Find this model in the Qwen2.5-small model summary
id | layer_type | N | M | Q | alpha | D | alpha-hat | num_spikes | warning |
---|---|---|---|---|---|---|---|---|---|
1 | dense | 8960 | 1536 | 5.833333 | 6.192732 | 0.053414 | 10.346296 | 337 | under-trained |
2 | dense | 8960 | 1536 | 5.833333 | 3.988269 | 0.014106 | 8.540256 | 176 | |
3 | dense | 8960 | 1536 | 5.833333 | 11.216515 | 0.074838 | 15.578239 | 61 | under-trained |
4 | dense | 1536 | 256 | 6.000000 | 6.601371 | 0.053442 | 8.175576 | 35 | under-trained |
5 | dense | 1536 | 1536 | 1.000000 | 3.857758 | 0.078567 | 4.199012 | 380 | |
6 | dense | 1536 | 1536 | 1.000000 | 2.491033 | 0.016290 | 7.162338 | 187 | |
7 | dense | 1536 | 256 | 6.000000 | 9.249598 | 0.117407 | 0.807286 | 54 | under-trained |
8 | dense | 1536 | 256 | 6.000000 | 8.659670 | 0.057693 | 1.346619 | 38 | under-trained |
9 | dense | 1536 | 1536 | 1.000000 | 2.946589 | 0.039966 | 4.751012 | 87 | |
10 | dense | 1536 | 1536 | 1.000000 | 3.799894 | 0.031738 | 4.510958 | 129 | |
11 | dense | 8960 | 1536 | 5.833333 | 5.766564 | 0.020838 | 11.512075 | 173 | |
12 | dense | 8960 | 1536 | 5.833333 | 4.295077 | 0.017742 | 9.836010 | 255 | |
13 | dense | 8960 | 1536 | 5.833333 | 4.188555 | 0.038459 | 7.440409 | 44 | |
14 | dense | 1536 | 256 | 6.000000 | 1.948153 | 0.138447 | 2.265253 | 180 | over-trained |
15 | dense | 1536 | 256 | 6.000000 | 5.640992 | 0.104665 | 1.722965 | 72 | |
16 | dense | 8960 | 1536 | 5.833333 | 6.425356 | 0.037866 | 10.678067 | 187 | under-trained |
17 | dense | 1536 | 256 | 6.000000 | 2.809418 | 0.073513 | 2.514817 | 110 | |
18 | dense | 1536 | 1536 | 1.000000 | 3.982889 | 0.057416 | 4.401463 | 128 | |
19 | dense | 1536 | 1536 | 1.000000 | 2.463721 | 0.044057 | 3.413705 | 240 | |
20 | dense | 8960 | 1536 | 5.833333 | 3.791164 | 0.014593 | 8.803422 | 346 | |
21 | dense | 8960 | 1536 | 5.833333 | 5.177057 | 0.026783 | 8.574793 | 52 | |
22 | dense | 8960 | 1536 | 5.833333 | 6.661750 | 0.012078 | 10.975939 | 124 | under-trained |
23 | dense | 8960 | 1536 | 5.833333 | 3.807793 | 0.021621 | 8.607070 | 395 | |
24 | dense | 8960 | 1536 | 5.833333 | 11.687328 | 0.111947 | 14.980680 | 150 | under-trained |
25 | dense | 1536 | 256 | 6.000000 | 4.633157 | 0.122331 | 4.279891 | 77 | |
26 | dense | 1536 | 1536 | 1.000000 | 4.689202 | 0.073950 | 5.028190 | 115 | |
27 | dense | 1536 | 1536 | 1.000000 | 3.221272 | 0.037350 | 4.581188 | 121 | |
28 | dense | 1536 | 256 | 6.000000 | 6.763702 | 0.052068 | 2.697367 | 46 | under-trained |
29 | dense | 8960 | 1536 | 5.833333 | 13.868327 | 0.048306 | 18.761084 | 75 | under-trained |
30 | dense | 8960 | 1536 | 5.833333 | 4.198176 | 0.043489 | 7.517608 | 385 | |
31 | dense | 8960 | 1536 | 5.833333 | 4.162315 | 0.011687 | 9.223583 | 286 | |
32 | dense | 1536 | 256 | 6.000000 | 5.732522 | 0.141886 | 4.100718 | 63 | |
33 | dense | 1536 | 1536 | 1.000000 | 4.991207 | 0.031678 | 5.431063 | 89 | |
34 | dense | 1536 | 1536 | 1.000000 | 4.631432 | 0.050596 | 6.066159 | 55 | |
35 | dense | 1536 | 256 | 6.000000 | 12.222344 | 0.047042 | 4.711628 | 31 | under-trained |
36 | dense | 1536 | 1536 | 1.000000 | 3.399360 | 0.052377 | 4.696224 | 98 | |
37 | dense | 1536 | 256 | 6.000000 | 5.607214 | 0.134396 | 3.884405 | 54 | |
38 | dense | 8960 | 1536 | 5.833333 | 15.307873 | 0.064693 | 19.818747 | 76 | under-trained |
39 | dense | 8960 | 1536 | 5.833333 | 5.206945 | 0.029760 | 8.469914 | 233 | |
40 | dense | 1536 | 256 | 6.000000 | 5.100932 | 0.087394 | 2.921472 | 73 | |
41 | dense | 8960 | 1536 | 5.833333 | 3.699872 | 0.018507 | 9.005504 | 409 | |
42 | dense | 1536 | 1536 | 1.000000 | 2.230894 | 0.105703 | 2.976226 | 346 | |
43 | dense | 1536 | 1536 | 1.000000 | 2.490843 | 0.078598 | 3.396442 | 190 | |
44 | dense | 8960 | 1536 | 5.833333 | 3.872802 | 0.020060 | 8.484631 | 375 | |
45 | dense | 8960 | 1536 | 5.833333 | 10.010717 | 0.118530 | 14.129108 | 209 | under-trained |
46 | dense | 1536 | 256 | 6.000000 | 3.359665 | 0.118421 | 3.170409 | 78 | |
47 | dense | 1536 | 1536 | 1.000000 | 5.200839 | 0.022721 | 6.194827 | 74 | |
48 | dense | 8960 | 1536 | 5.833333 | 4.944765 | 0.063119 | 8.517043 | 348 | |
49 | dense | 1536 | 256 | 6.000000 | 8.301245 | 0.051683 | 4.037567 | 30 | under-trained |
50 | dense | 1536 | 256 | 6.000000 | 9.330274 | 0.054402 | 4.280315 | 37 | under-trained |
51 | dense | 1536 | 1536 | 1.000000 | 4.598839 | 0.061739 | 6.011422 | 101 | |
52 | dense | 1536 | 256 | 6.000000 | 2.097507 | 0.149502 | 1.830708 | 159 | |
53 | dense | 1536 | 1536 | 1.000000 | 2.300247 | 0.089473 | 3.046359 | 329 | |
54 | dense | 8960 | 1536 | 5.833333 | 3.988181 | 0.016082 | 8.459346 | 309 | |
55 | dense | 8960 | 1536 | 5.833333 | 6.006131 | 0.017519 | 10.636528 | 103 | under-trained |
56 | dense | 8960 | 1536 | 5.833333 | 10.912916 | 0.047292 | 16.559290 | 105 | under-trained |
57 | dense | 1536 | 256 | 6.000000 | 7.069447 | 0.088015 | 2.744517 | 65 | under-trained |
58 | dense | 1536 | 1536 | 1.000000 | 2.342271 | 0.080442 | 3.149462 | 217 | |
59 | dense | 8960 | 1536 | 5.833333 | 5.102557 | 0.046228 | 10.102247 | 198 | |
60 | dense | 8960 | 1536 | 5.833333 | 4.134582 | 0.014998 | 8.758208 | 263 | |
61 | dense | 8960 | 1536 | 5.833333 | 11.177652 | 0.023147 | 15.806928 | 60 | under-trained |
62 | dense | 1536 | 256 | 6.000000 | 3.003304 | 0.059474 | 2.579017 | 73 | |
63 | dense | 1536 | 1536 | 1.000000 | 4.653224 | 0.045670 | 4.577975 | 76 | |
64 | dense | 1536 | 1536 | 1.000000 | 2.194472 | 0.085262 | 2.836288 | 290 | |
65 | dense | 8960 | 1536 | 5.833333 | 3.786920 | 0.072466 | 8.053799 | 471 | |
66 | dense | 8960 | 1536 | 5.833333 | 4.151295 | 0.016712 | 8.445251 | 228 | |
67 | dense | 8960 | 1536 | 5.833333 | 9.015780 | 0.019240 | 13.012854 | 81 | under-trained |
68 | dense | 1536 | 256 | 6.000000 | 7.474136 | 0.053709 | 6.141252 | 30 | under-trained |
69 | dense | 1536 | 1536 | 1.000000 | 3.681284 | 0.028524 | 4.153183 | 135 | |
70 | dense | 1536 | 256 | 6.000000 | 6.536903 | 0.038494 | 3.163638 | 53 | under-trained |
71 | dense | 8960 | 1536 | 5.833333 | 3.642787 | 0.022077 | 7.547328 | 296 | |
72 | dense | 1536 | 256 | 6.000000 | 11.068272 | 0.036770 | 4.138400 | 35 | under-trained |
73 | dense | 1536 | 1536 | 1.000000 | 2.286889 | 0.075278 | 3.030090 | 280 | |
74 | dense | 1536 | 1536 | 1.000000 | 4.720976 | 0.119084 | 6.068936 | 134 | |
75 | dense | 1536 | 256 | 6.000000 | 5.349618 | 0.077302 | 4.411973 | 28 | |
76 | dense | 8960 | 1536 | 5.833333 | 7.660972 | 0.027495 | 10.951116 | 104 | under-trained |
77 | dense | 8960 | 1536 | 5.833333 | 3.717588 | 0.057197 | 8.118905 | 448 | |
78 | dense | 1536 | 256 | 6.000000 | 8.965046 | 0.041249 | 2.988253 | 40 | under-trained |
79 | dense | 1536 | 1536 | 1.000000 | 3.717166 | 0.047234 | 5.094646 | 188 | |
80 | dense | 1536 | 256 | 6.000000 | 4.253726 | 0.130903 | 4.158571 | 55 | |
81 | dense | 8960 | 1536 | 5.833333 | 6.292470 | 0.039819 | 8.807026 | 138 | under-trained |
82 | dense | 8960 | 1536 | 5.833333 | 3.452131 | 0.038413 | 7.488068 | 298 | |
83 | dense | 8960 | 1536 | 5.833333 | 5.040999 | 0.016244 | 10.865299 | 199 | |
84 | dense | 1536 | 1536 | 1.000000 | 1.888479 | 0.102641 | 2.559120 | 490 | over-trained |
85 | dense | 1536 | 256 | 6.000000 | 5.940743 | 0.117987 | 2.228796 | 74 | |
86 | dense | 1536 | 1536 | 1.000000 | 2.042567 | 0.099624 | 2.703976 | 370 | |
87 | dense | 1536 | 1536 | 1.000000 | 3.663782 | 0.056835 | 4.413034 | 93 | |
88 | dense | 1536 | 256 | 6.000000 | 3.696612 | 0.144186 | 2.904817 | 62 | |
89 | dense | 8960 | 1536 | 5.833333 | 6.138574 | 0.031224 | 8.564404 | 157 | under-trained |
90 | dense | 8960 | 1536 | 5.833333 | 4.061060 | 0.024548 | 8.469624 | 170 | |
91 | dense | 8960 | 1536 | 5.833333 | 4.559149 | 0.020936 | 9.606735 | 171 | |
92 | dense | 8960 | 1536 | 5.833333 | 4.226422 | 0.020800 | 8.605664 | 167 | |
93 | dense | 1536 | 256 | 6.000000 | 5.002470 | 0.074783 | 2.065032 | 91 | |
94 | dense | 1536 | 1536 | 1.000000 | 3.508407 | 0.066514 | 4.916390 | 69 | |
95 | dense | 1536 | 1536 | 1.000000 | 4.182174 | 0.032282 | 5.054072 | 120 | |
96 | dense | 8960 | 1536 | 5.833333 | 4.142703 | 0.031644 | 8.487528 | 231 | |
97 | dense | 8960 | 1536 | 5.833333 | 6.120409 | 0.029749 | 8.720710 | 146 | under-trained |
98 | dense | 1536 | 256 | 6.000000 | 5.300690 | 0.075661 | 4.990967 | 25 | |
99 | dense | 1536 | 1536 | 1.000000 | 3.070896 | 0.104453 | 3.527150 | 168 | |
100 | dense | 1536 | 256 | 6.000000 | 1.749499 | 0.121859 | 1.627044 | 140 | over-trained |
101 | dense | 1536 | 256 | 6.000000 | 6.135047 | 0.093281 | 2.390938 | 46 | under-trained |
102 | dense | 1536 | 1536 | 1.000000 | 1.923431 | 0.079839 | 2.749048 | 318 | over-trained |
103 | dense | 8960 | 1536 | 5.833333 | 3.510089 | 0.075406 | 7.377644 | 427 | |
104 | dense | 8960 | 1536 | 5.833333 | 4.271436 | 0.016885 | 8.563691 | 143 | |
105 | dense | 8960 | 1536 | 5.833333 | 5.807736 | 0.019748 | 8.421993 | 153 | |
106 | dense | 1536 | 1536 | 1.000000 | 3.527058 | 0.044948 | 8.014304 | 27 | |
107 | dense | 1536 | 256 | 6.000000 | 9.188657 | 0.122880 | 3.087044 | 74 | under-trained |
108 | dense | 8960 | 1536 | 5.833333 | 6.537858 | 0.048491 | 9.416969 | 69 | under-trained |
109 | dense | 8960 | 1536 | 5.833333 | 4.046365 | 0.033967 | 8.135159 | 135 | |
110 | dense | 8960 | 1536 | 5.833333 | 4.875848 | 0.012841 | 10.508988 | 154 | |
111 | dense | 1536 | 256 | 6.000000 | 3.850850 | 0.079676 | 3.816533 | 23 | |
112 | dense | 1536 | 1536 | 1.000000 | 5.295649 | 0.102790 | 6.596783 | 87 | |
113 | dense | 8960 | 1536 | 5.833333 | 4.343250 | 0.020733 | 8.904405 | 204 | |
114 | dense | 8960 | 1536 | 5.833333 | 4.332672 | 0.041877 | 8.449353 | 136 | |
115 | dense | 8960 | 1536 | 5.833333 | 7.312091 | 0.043358 | 10.202291 | 83 | under-trained |
116 | dense | 1536 | 1536 | 1.000000 | 4.793134 | 0.062272 | 6.492009 | 44 | |
117 | dense | 1536 | 1536 | 1.000000 | 2.066577 | 0.071377 | 2.963435 | 280 | |
118 | dense | 1536 | 256 | 6.000000 | 7.331428 | 0.127659 | 2.524294 | 65 | under-trained |
119 | dense | 1536 | 256 | 6.000000 | 2.082208 | 0.153395 | 1.702594 | 115 | |
120 | dense | 8960 | 1536 | 5.833333 | 4.349535 | 0.015914 | 8.149673 | 185 | |
121 | dense | 8960 | 1536 | 5.833333 | 4.291488 | 0.032393 | 8.466334 | 149 | |
122 | dense | 8960 | 1536 | 5.833333 | 6.249738 | 0.046397 | 8.951590 | 158 | under-trained |
123 | dense | 1536 | 256 | 6.000000 | 4.211558 | 0.058996 | 2.912169 | 40 | |
124 | dense | 1536 | 1536 | 1.000000 | 2.946031 | 0.085670 | 3.728661 | 174 | |
125 | dense | 1536 | 1536 | 1.000000 | 1.968943 | 0.102475 | 2.511965 | 424 | over-trained |
126 | dense | 1536 | 256 | 6.000000 | 5.534511 | 0.121136 | 3.343237 | 46 | |
127 | dense | 8960 | 1536 | 5.833333 | 5.271300 | 0.031121 | 10.276681 | 30 | |
128 | dense | 8960 | 1536 | 5.833333 | 6.829580 | 0.033925 | 10.100236 | 104 | under-trained |
129 | dense | 1536 | 256 | 6.000000 | 6.711048 | 0.094003 | 4.948870 | 17 | under-trained |
130 | dense | 1536 | 1536 | 1.000000 | 2.911070 | 0.038801 | 3.451668 | 149 | |
131 | dense | 1536 | 1536 | 1.000000 | 2.149709 | 0.080178 | 2.821936 | 293 | |
132 | dense | 8960 | 1536 | 5.833333 | 4.145997 | 0.019223 | 7.388965 | 243 | |
133 | dense | 1536 | 256 | 6.000000 | 6.582742 | 0.110474 | 2.693819 | 76 | under-trained |
134 | dense | 8960 | 1536 | 5.833333 | 5.822343 | 0.029026 | 10.817062 | 172 | |
135 | dense | 8960 | 1536 | 5.833333 | 4.610228 | 0.031049 | 9.324222 | 108 | |
136 | dense | 8960 | 1536 | 5.833333 | 6.666224 | 0.022174 | 12.088863 | 138 | under-trained |
137 | dense | 1536 | 1536 | 1.000000 | 7.986119 | 0.058497 | 9.752339 | 43 | under-trained |
138 | dense | 1536 | 1536 | 1.000000 | 2.135236 | 0.071866 | 3.185385 | 258 | |
139 | dense | 1536 | 256 | 6.000000 | 8.081471 | 0.076485 | 3.296480 | 42 | under-trained |
140 | dense | 1536 | 256 | 6.000000 | 2.031798 | 0.099713 | 1.266101 | 121 | |
141 | dense | 1536 | 256 | 6.000000 | 5.745552 | 0.114150 | 3.202652 | 107 | |
142 | dense | 8960 | 1536 | 5.833333 | 7.178888 | 0.024819 | 12.139773 | 126 | under-trained |
143 | dense | 8960 | 1536 | 5.833333 | 5.131118 | 0.018190 | 10.478010 | 123 | |
144 | dense | 8960 | 1536 | 5.833333 | 8.218023 | 0.030217 | 12.486075 | 85 | under-trained |
145 | dense | 1536 | 256 | 6.000000 | 1.878093 | 0.145534 | 1.449685 | 138 | over-trained |
146 | dense | 1536 | 1536 | 1.000000 | 5.154539 | 0.024396 | 7.526251 | 81 | |
147 | dense | 1536 | 1536 | 1.000000 | 1.872556 | 0.091615 | 2.497908 | 442 | over-trained |
148 | dense | 8960 | 1536 | 5.833333 | 5.442999 | 0.015276 | 11.488695 | 129 | |
149 | dense | 8960 | 1536 | 5.833333 | 5.885151 | 0.032589 | 9.738623 | 28 | |
150 | dense | 8960 | 1536 | 5.833333 | 8.841463 | 0.027298 | 13.505339 | 96 | under-trained |
151 | dense | 1536 | 256 | 6.000000 | 5.834123 | 0.058242 | 4.475806 | 15 | |
152 | dense | 1536 | 1536 | 1.000000 | 3.268522 | 0.052785 | 3.984235 | 130 | |
153 | dense | 1536 | 1536 | 1.000000 | 2.091116 | 0.082548 | 2.808717 | 358 | |
154 | dense | 1536 | 256 | 6.000000 | 4.988411 | 0.070037 | 3.364337 | 70 | |
155 | dense | 1536 | 1536 | 1.000000 | 2.387682 | 0.083307 | 3.392633 | 259 | |
156 | dense | 8960 | 1536 | 5.833333 | 8.823411 | 0.047814 | 14.090040 | 121 | under-trained |
157 | dense | 1536 | 1536 | 1.000000 | 4.751384 | 0.108364 | 5.686731 | 120 | |
158 | dense | 1536 | 256 | 6.000000 | 2.562504 | 0.093931 | 1.954115 | 95 | |
159 | dense | 1536 | 256 | 6.000000 | 8.818753 | 0.136663 | 4.531508 | 58 | under-trained |
160 | dense | 8960 | 1536 | 5.833333 | 6.025584 | 0.029758 | 12.763737 | 144 | under-trained |
161 | dense | 8960 | 1536 | 5.833333 | 7.168961 | 0.058317 | 12.039945 | 214 | under-trained |
162 | dense | 1536 | 256 | 6.000000 | 2.819453 | 0.098939 | 1.848671 | 198 | |
163 | dense | 1536 | 1536 | 1.000000 | 5.010596 | 0.040776 | 7.078160 | 78 | |
164 | dense | 1536 | 256 | 6.000000 | 2.670785 | 0.066310 | 2.266284 | 44 | |
165 | dense | 8960 | 1536 | 5.833333 | 8.429790 | 0.041717 | 13.912143 | 104 | under-trained |
166 | dense | 8960 | 1536 | 5.833333 | 5.269237 | 0.024233 | 11.309271 | 149 | |
167 | dense | 8960 | 1536 | 5.833333 | 10.859550 | 0.074042 | 17.533613 | 119 | under-trained |
168 | dense | 1536 | 1536 | 1.000000 | 2.261389 | 0.062543 | 3.286702 | 219 | |
169 | dense | 1536 | 256 | 6.000000 | 6.479276 | 0.052193 | 4.468302 | 59 | under-trained |
170 | dense | 1536 | 1536 | 1.000000 | 2.528988 | 0.063315 | 3.744746 | 231 | |
171 | dense | 1536 | 1536 | 1.000000 | 8.120524 | 0.047558 | 10.181946 | 52 | under-trained |
172 | dense | 1536 | 256 | 6.000000 | 4.171582 | 0.054418 | 3.403425 | 32 | |
173 | dense | 8960 | 1536 | 5.833333 | 8.931620 | 0.057484 | 14.679871 | 114 | under-trained |
174 | dense | 8960 | 1536 | 5.833333 | 5.906404 | 0.026772 | 12.384088 | 135 | |
175 | dense | 8960 | 1536 | 5.833333 | 11.296101 | 0.048852 | 17.401210 | 90 | under-trained |
176 | dense | 1536 | 256 | 6.000000 | 6.021168 | 0.035875 | 4.901322 | 42 | under-trained |
177 | dense | 8960 | 1536 | 5.833333 | 7.045234 | 0.061599 | 10.716275 | 187 | under-trained |
178 | dense | 8960 | 1536 | 5.833333 | 5.936938 | 0.024073 | 11.820454 | 135 | |
179 | dense | 8960 | 1536 | 5.833333 | 9.431707 | 0.052813 | 15.379787 | 106 | under-trained |
180 | dense | 1536 | 256 | 6.000000 | 1.999358 | 0.060977 | 1.449005 | 155 | over-trained |
181 | dense | 1536 | 1536 | 1.000000 | 3.583733 | 0.016739 | 5.704252 | 159 | |
182 | dense | 1536 | 1536 | 1.000000 | 3.008343 | 0.027258 | 4.704169 | 116 | |
183 | dense | 1536 | 256 | 6.000000 | 6.119400 | 0.053731 | 6.727109 | 37 | under-trained |
184 | dense | 1536 | 1536 | 1.000000 | 3.041233 | 0.065331 | 4.264973 | 70 | |
185 | dense | 1536 | 1536 | 1.000000 | 6.541357 | 0.059740 | 11.305734 | 91 | under-trained |
186 | dense | 1536 | 256 | 6.000000 | 1.816577 | 0.074641 | 1.534134 | 139 | over-trained |
187 | dense | 8960 | 1536 | 5.833333 | 6.768009 | 0.054838 | 15.718645 | 182 | under-trained |
188 | dense | 8960 | 1536 | 5.833333 | 4.737632 | 0.015891 | 10.142019 | 209 | |
189 | dense | 8960 | 1536 | 5.833333 | 12.682194 | 0.035168 | 18.843943 | 48 | under-trained |
190 | dense | 1536 | 256 | 6.000000 | 6.032333 | 0.118819 | 6.288753 | 84 | under-trained |
191 | dense | 1536 | 1536 | 1.000000 | 2.187914 | 0.060869 | 3.056463 | 216 | |
192 | dense | 1536 | 1536 | 1.000000 | 2.884349 | 0.046297 | 5.053284 | 227 | |
193 | dense | 1536 | 256 | 6.000000 | 1.886137 | 0.051696 | 1.467891 | 124 | over-trained |
194 | dense | 8960 | 1536 | 5.833333 | 5.459954 | 0.063053 | 11.643075 | 289 | |
195 | dense | 8960 | 1536 | 5.833333 | 4.602602 | 0.017919 | 8.044780 | 187 | |
196 | dense | 8960 | 1536 | 5.833333 | 4.534571 | 0.034116 | 10.790930 | 344 |