Find this model in the Qwen2.5-small model summary
id | layer_type | N | M | Q | alpha | D | alpha-hat | num_spikes | warning |
---|---|---|---|---|---|---|---|---|---|
1 | dense | 8960 | 1536 | 5.833333 | 3.856309 | 0.015087 | -6.922298 | 149 | |
2 | dense | 8960 | 1536 | 5.833333 | 3.743784 | 0.021251 | -5.776101 | 123 | |
3 | dense | 8960 | 1536 | 5.833333 | 3.601378 | 0.022098 | -5.283770 | 117 | |
4 | dense | 1536 | 256 | 6.000000 | 2.329503 | 0.048585 | -4.716284 | 118 | |
5 | dense | 1536 | 1536 | 1.000000 | 3.207409 | 0.019578 | -6.709039 | 72 | |
6 | dense | 1536 | 1536 | 1.000000 | 2.374293 | 0.012538 | -3.815103 | 181 | |
7 | dense | 1536 | 256 | 6.000000 | 4.070437 | 0.033233 | -12.210399 | 37 | |
8 | dense | 8960 | 1536 | 5.833333 | 3.643620 | 0.015701 | -7.000235 | 148 | |
9 | dense | 8960 | 1536 | 5.833333 | 3.618663 | 0.023702 | -6.227721 | 151 | |
10 | dense | 8960 | 1536 | 5.833333 | 3.303550 | 0.020356 | -5.740900 | 146 | |
11 | dense | 1536 | 256 | 6.000000 | 2.158130 | 0.037712 | -4.755109 | 55 | |
12 | dense | 1536 | 1536 | 1.000000 | 2.742111 | 0.028759 | -5.235784 | 56 | |
13 | dense | 1536 | 1536 | 1.000000 | 2.214799 | 0.043204 | -4.267885 | 172 | |
14 | dense | 1536 | 256 | 6.000000 | 3.056191 | 0.020996 | -8.010254 | 47 | |
15 | dense | 1536 | 1536 | 1.000000 | 2.837669 | 0.026461 | -5.994994 | 77 | |
16 | dense | 8960 | 1536 | 5.833333 | 3.939383 | 0.015827 | -7.664268 | 142 | |
17 | dense | 1536 | 1536 | 1.000000 | 2.360472 | 0.021150 | -4.809516 | 185 | |
18 | dense | 1536 | 256 | 6.000000 | 2.592906 | 0.032159 | -6.740856 | 66 | |
19 | dense | 8960 | 1536 | 5.833333 | 3.449801 | 0.016099 | -5.788574 | 219 | |
20 | dense | 8960 | 1536 | 5.833333 | 3.413945 | 0.022404 | -5.959189 | 90 | |
21 | dense | 1536 | 256 | 6.000000 | 2.986545 | 0.022941 | -8.248993 | 57 | |
22 | dense | 1536 | 1536 | 1.000000 | 2.392885 | 0.025445 | -4.928147 | 128 | |
23 | dense | 1536 | 256 | 6.000000 | 3.056169 | 0.031555 | -8.730122 | 57 | |
24 | dense | 8960 | 1536 | 5.833333 | 3.689988 | 0.012140 | -6.201592 | 222 | |
25 | dense | 1536 | 1536 | 1.000000 | 2.734007 | 0.037600 | -5.941450 | 86 | |
26 | dense | 8960 | 1536 | 5.833333 | 4.100697 | 0.015941 | -8.067601 | 123 | |
27 | dense | 8960 | 1536 | 5.833333 | 3.546918 | 0.019590 | -6.194234 | 116 | |
28 | dense | 1536 | 256 | 6.000000 | 2.467319 | 0.039567 | -6.253182 | 63 | |
29 | dense | 8960 | 1536 | 5.833333 | 3.771016 | 0.016121 | -7.328037 | 161 | |
30 | dense | 1536 | 256 | 6.000000 | 2.882366 | 0.044115 | -7.408065 | 31 | |
31 | dense | 1536 | 1536 | 1.000000 | 3.186670 | 0.039566 | -7.174576 | 66 | |
32 | dense | 8960 | 1536 | 5.833333 | 3.616405 | 0.018606 | -5.906482 | 254 | |
33 | dense | 1536 | 1536 | 1.000000 | 2.510337 | 0.025356 | -5.670443 | 146 | |
34 | dense | 8960 | 1536 | 5.833333 | 3.243073 | 0.012982 | -5.444295 | 185 | |
35 | dense | 1536 | 256 | 6.000000 | 3.849792 | 0.044923 | -11.494306 | 37 | |
36 | dense | 8960 | 1536 | 5.833333 | 3.971919 | 0.015168 | -7.521452 | 87 | |
37 | dense | 8960 | 1536 | 5.833333 | 3.703736 | 0.021710 | -5.674391 | 284 | |
38 | dense | 1536 | 1536 | 1.000000 | 2.676626 | 0.040481 | -5.625588 | 83 | |
39 | dense | 1536 | 1536 | 1.000000 | 2.349052 | 0.019763 | -4.873136 | 137 | |
40 | dense | 1536 | 256 | 6.000000 | 3.303417 | 0.046786 | -9.602016 | 40 | |
41 | dense | 1536 | 256 | 6.000000 | 2.341072 | 0.035304 | -6.044737 | 93 | |
42 | dense | 8960 | 1536 | 5.833333 | 3.365744 | 0.019135 | -5.572396 | 153 | |
43 | dense | 8960 | 1536 | 5.833333 | 3.268205 | 0.013783 | -5.081680 | 127 | |
44 | dense | 8960 | 1536 | 5.833333 | 3.359975 | 0.015272 | -4.818328 | 258 | |
45 | dense | 8960 | 1536 | 5.833333 | 3.841595 | 0.025812 | -7.487191 | 118 | |
46 | dense | 1536 | 1536 | 1.000000 | 2.314226 | 0.014754 | -4.783687 | 194 | |
47 | dense | 1536 | 256 | 6.000000 | 3.040272 | 0.028901 | -8.097393 | 58 | |
48 | dense | 1536 | 256 | 6.000000 | 2.420181 | 0.024083 | -6.206648 | 83 | |
49 | dense | 1536 | 1536 | 1.000000 | 2.791125 | 0.032335 | -6.014475 | 79 | |
50 | dense | 8960 | 1536 | 5.833333 | 3.261887 | 0.019542 | -4.849144 | 123 | |
51 | dense | 1536 | 256 | 6.000000 | 2.385233 | 0.035165 | -5.952318 | 70 | |
52 | dense | 8960 | 1536 | 5.833333 | 3.995235 | 0.023161 | -7.688950 | 87 | |
53 | dense | 8960 | 1536 | 5.833333 | 3.235104 | 0.010128 | -4.482078 | 129 | |
54 | dense | 1536 | 1536 | 1.000000 | 1.748483 | 0.089185 | -3.785074 | 575 | over-trained |
55 | dense | 1536 | 256 | 6.000000 | 3.637142 | 0.054586 | -10.741814 | 37 | |
56 | dense | 1536 | 1536 | 1.000000 | 2.289729 | 0.030457 | -4.748295 | 139 | |
57 | dense | 8960 | 1536 | 5.833333 | 3.199811 | 0.012261 | -4.415784 | 169 | |
58 | dense | 8960 | 1536 | 5.833333 | 3.929373 | 0.035576 | -7.314764 | 90 | |
59 | dense | 1536 | 256 | 6.000000 | 2.608383 | 0.038440 | -6.899949 | 52 | |
60 | dense | 8960 | 1536 | 5.833333 | 3.417972 | 0.026268 | -5.143827 | 52 | |
61 | dense | 1536 | 1536 | 1.000000 | 1.881846 | 0.093037 | -4.421131 | 517 | over-trained |
62 | dense | 1536 | 256 | 6.000000 | 3.904610 | 0.068532 | -11.758886 | 42 | |
63 | dense | 1536 | 1536 | 1.000000 | 2.392600 | 0.029528 | -5.080652 | 104 | |
64 | dense | 8960 | 1536 | 5.833333 | 3.089123 | 0.017281 | -4.132979 | 117 | |
65 | dense | 1536 | 256 | 6.000000 | 2.463616 | 0.047246 | -6.505332 | 82 | |
66 | dense | 1536 | 1536 | 1.000000 | 1.913385 | 0.089304 | -4.320203 | 451 | over-trained |
67 | dense | 1536 | 1536 | 1.000000 | 2.395005 | 0.037913 | -5.065468 | 134 | |
68 | dense | 8960 | 1536 | 5.833333 | 3.176786 | 0.027293 | -4.666143 | 96 | |
69 | dense | 8960 | 1536 | 5.833333 | 3.894346 | 0.024192 | -7.494784 | 83 | |
70 | dense | 1536 | 256 | 6.000000 | 3.980775 | 0.040522 | -11.513235 | 26 | |
71 | dense | 8960 | 1536 | 5.833333 | 3.710672 | 0.024004 | -6.933979 | 93 | |
72 | dense | 8960 | 1536 | 5.833333 | 3.130031 | 0.020588 | -4.477046 | 81 | |
73 | dense | 1536 | 256 | 6.000000 | 4.088501 | 0.058826 | -12.242331 | 30 | |
74 | dense | 1536 | 256 | 6.000000 | 2.635073 | 0.057820 | -7.133862 | 38 | |
75 | dense | 1536 | 1536 | 1.000000 | 1.789471 | 0.084963 | -3.997355 | 528 | over-trained |
76 | dense | 1536 | 1536 | 1.000000 | 2.188790 | 0.049417 | -4.487058 | 182 | |
77 | dense | 8960 | 1536 | 5.833333 | 2.995201 | 0.015070 | -3.820054 | 83 | |
78 | dense | 1536 | 256 | 6.000000 | 2.514097 | 0.046245 | -6.463808 | 40 | |
79 | dense | 8960 | 1536 | 5.833333 | 2.871071 | 0.015217 | -3.505837 | 138 | |
80 | dense | 8960 | 1536 | 5.833333 | 3.518902 | 0.037459 | -6.538902 | 110 | |
81 | dense | 1536 | 1536 | 1.000000 | 2.286947 | 0.041188 | -4.730208 | 142 | |
82 | dense | 1536 | 1536 | 1.000000 | 3.331623 | 0.052543 | -7.507385 | 37 | |
83 | dense | 1536 | 256 | 6.000000 | 3.551839 | 0.060391 | -10.450941 | 52 | |
84 | dense | 8960 | 1536 | 5.833333 | 2.988545 | 0.024117 | -4.182762 | 120 | |
85 | dense | 8960 | 1536 | 5.833333 | 3.881256 | 0.040819 | -7.424648 | 81 | |
86 | dense | 8960 | 1536 | 5.833333 | 2.811351 | 0.018191 | -3.471001 | 151 | |
87 | dense | 8960 | 1536 | 5.833333 | 3.007684 | 0.024635 | -4.219667 | 99 | |
88 | dense | 1536 | 1536 | 1.000000 | 1.806657 | 0.095912 | -3.847614 | 518 | over-trained |
89 | dense | 1536 | 256 | 6.000000 | 3.313018 | 0.045428 | -8.750946 | 46 | |
90 | dense | 1536 | 1536 | 1.000000 | 2.402853 | 0.040029 | -4.906898 | 86 | |
91 | dense | 1536 | 256 | 6.000000 | 2.530821 | 0.051268 | -6.783945 | 62 | |
92 | dense | 1536 | 1536 | 1.000000 | 2.371554 | 0.051534 | -4.885779 | 112 | |
93 | dense | 8960 | 1536 | 5.833333 | 2.849343 | 0.019470 | -3.552184 | 152 | |
94 | dense | 8960 | 1536 | 5.833333 | 3.147705 | 0.029491 | -4.377477 | 65 | |
95 | dense | 1536 | 256 | 6.000000 | 2.517936 | 0.044197 | -6.545969 | 58 | |
96 | dense | 8960 | 1536 | 5.833333 | 3.856020 | 0.035402 | -7.316891 | 75 | |
97 | dense | 1536 | 256 | 6.000000 | 3.022545 | 0.103821 | -9.144131 | 103 | |
98 | dense | 1536 | 1536 | 1.000000 | 1.738071 | 0.075016 | -3.739393 | 552 | over-trained |
99 | dense | 8960 | 1536 | 5.833333 | 3.848511 | 0.035942 | -7.198791 | 74 | |
100 | dense | 1536 | 256 | 6.000000 | 3.450536 | 0.105530 | -10.169896 | 81 | |
101 | dense | 8960 | 1536 | 5.833333 | 2.997865 | 0.031055 | -4.204478 | 108 | |
102 | dense | 1536 | 256 | 6.000000 | 2.687411 | 0.054125 | -7.366657 | 53 | |
103 | dense | 1536 | 1536 | 1.000000 | 3.164046 | 0.069060 | -7.188852 | 71 | |
104 | dense | 8960 | 1536 | 5.833333 | 2.870652 | 0.016512 | -3.635988 | 152 | |
105 | dense | 1536 | 1536 | 1.000000 | 2.434678 | 0.047435 | -5.095435 | 112 | |
106 | dense | 1536 | 256 | 6.000000 | 2.536480 | 0.066538 | -6.760973 | 52 | |
107 | dense | 8960 | 1536 | 5.833333 | 2.715309 | 0.022609 | -3.301947 | 225 | |
108 | dense | 8960 | 1536 | 5.833333 | 3.603392 | 0.034071 | -6.602458 | 89 | |
109 | dense | 1536 | 1536 | 1.000000 | 2.290230 | 0.062350 | -4.879178 | 128 | |
110 | dense | 1536 | 1536 | 1.000000 | 4.064616 | 0.054741 | -8.974894 | 21 | |
111 | dense | 1536 | 256 | 6.000000 | 4.449219 | 0.054491 | -13.211337 | 32 | |
112 | dense | 8960 | 1536 | 5.833333 | 2.972653 | 0.024351 | -4.037014 | 115 | |
113 | dense | 1536 | 256 | 6.000000 | 2.405191 | 0.067438 | -6.526859 | 77 | |
114 | dense | 1536 | 256 | 6.000000 | 3.632548 | 0.050552 | -10.294729 | 33 | |
115 | dense | 1536 | 1536 | 1.000000 | 2.398825 | 0.055263 | -4.723327 | 102 | |
116 | dense | 1536 | 1536 | 1.000000 | 1.679212 | 0.074565 | -3.675280 | 597 | over-trained |
117 | dense | 8960 | 1536 | 5.833333 | 2.789708 | 0.019576 | -3.342072 | 106 | |
118 | dense | 8960 | 1536 | 5.833333 | 2.870329 | 0.019506 | -3.850653 | 143 | |
119 | dense | 8960 | 1536 | 5.833333 | 3.477775 | 0.036544 | -5.845135 | 71 | |
120 | dense | 8960 | 1536 | 5.833333 | 3.024434 | 0.073006 | -5.146931 | 307 | |
121 | dense | 1536 | 1536 | 1.000000 | 2.153823 | 0.049359 | -4.195467 | 180 | |
122 | dense | 8960 | 1536 | 5.833333 | 2.858436 | 0.024397 | -3.894409 | 97 | |
123 | dense | 1536 | 256 | 6.000000 | 2.410745 | 0.057275 | -6.418013 | 65 | |
124 | dense | 1536 | 1536 | 1.000000 | 1.699709 | 0.079781 | -3.079747 | 572 | over-trained |
125 | dense | 1536 | 256 | 6.000000 | 3.041210 | 0.061722 | -8.394266 | 44 | |
126 | dense | 8960 | 1536 | 5.833333 | 2.716991 | 0.018952 | -3.349308 | 118 | |
127 | dense | 8960 | 1536 | 5.833333 | 3.777718 | 0.034646 | -6.849075 | 89 | |
128 | dense | 8960 | 1536 | 5.833333 | 2.977955 | 0.017075 | -4.162147 | 120 | |
129 | dense | 8960 | 1536 | 5.833333 | 2.871623 | 0.022021 | -3.705877 | 95 | |
130 | dense | 1536 | 256 | 6.000000 | 2.659712 | 0.064330 | -7.282250 | 57 | |
131 | dense | 1536 | 1536 | 1.000000 | 1.729231 | 0.096014 | -4.015875 | 579 | over-trained |
132 | dense | 1536 | 1536 | 1.000000 | 2.631728 | 0.055071 | -5.674085 | 70 | |
133 | dense | 1536 | 256 | 6.000000 | 4.482802 | 0.065186 | -13.718454 | 31 | |
134 | dense | 8960 | 1536 | 5.833333 | 4.407760 | 0.034173 | -7.912508 | 52 | |
135 | dense | 8960 | 1536 | 5.833333 | 2.907301 | 0.019696 | -3.892646 | 143 | |
136 | dense | 8960 | 1536 | 5.833333 | 3.077625 | 0.017884 | -4.526383 | 127 | |
137 | dense | 1536 | 256 | 6.000000 | 3.646487 | 0.070733 | -10.552933 | 36 | |
138 | dense | 1536 | 1536 | 1.000000 | 2.352498 | 0.052847 | -4.926632 | 143 | |
139 | dense | 1536 | 1536 | 1.000000 | 1.722539 | 0.099456 | -3.617362 | 627 | over-trained |
140 | dense | 1536 | 256 | 6.000000 | 2.450018 | 0.061727 | -6.633658 | 73 | |
141 | dense | 8960 | 1536 | 5.833333 | 5.181114 | 0.048083 | -9.917528 | 23 | |
142 | dense | 1536 | 256 | 6.000000 | 3.251248 | 0.059352 | -8.850716 | 26 | |
143 | dense | 8960 | 1536 | 5.833333 | 3.264578 | 0.017786 | -4.884891 | 105 | |
144 | dense | 8960 | 1536 | 5.833333 | 3.082755 | 0.020470 | -4.218652 | 106 | |
145 | dense | 1536 | 1536 | 1.000000 | 1.832920 | 0.088847 | -4.092188 | 529 | over-trained |
146 | dense | 1536 | 1536 | 1.000000 | 2.435973 | 0.052679 | -5.011213 | 125 | |
147 | dense | 1536 | 256 | 6.000000 | 3.134922 | 0.114366 | -9.119937 | 111 | |
148 | dense | 1536 | 1536 | 1.000000 | 1.683513 | 0.091014 | -3.848171 | 636 | over-trained |
149 | dense | 8960 | 1536 | 5.833333 | 3.136402 | 0.016088 | -4.341796 | 129 | |
150 | dense | 8960 | 1536 | 5.833333 | 4.137617 | 0.080320 | -7.584144 | 166 | |
151 | dense | 1536 | 256 | 6.000000 | 2.634745 | 0.071866 | -6.932633 | 63 | |
152 | dense | 8960 | 1536 | 5.833333 | 3.298773 | 0.018430 | -4.952596 | 126 | |
153 | dense | 1536 | 1536 | 1.000000 | 2.865390 | 0.058986 | -6.147461 | 51 | |
154 | dense | 1536 | 256 | 6.000000 | 2.862314 | 0.116882 | -8.347426 | 118 | |
155 | dense | 1536 | 1536 | 1.000000 | 1.868239 | 0.101143 | -3.984157 | 533 | over-trained |
156 | dense | 1536 | 1536 | 1.000000 | 2.420034 | 0.041288 | -4.919487 | 130 | |
157 | dense | 8960 | 1536 | 5.833333 | 3.320297 | 0.015408 | -5.055573 | 138 | |
158 | dense | 8960 | 1536 | 5.833333 | 3.169060 | 0.014066 | -4.535855 | 139 | |
159 | dense | 8960 | 1536 | 5.833333 | 4.401398 | 0.022717 | -7.924927 | 61 | |
160 | dense | 1536 | 256 | 6.000000 | 2.702137 | 0.038601 | -6.760528 | 51 | |
161 | dense | 1536 | 256 | 6.000000 | 3.069261 | 0.122749 | -8.778122 | 115 | |
162 | dense | 1536 | 256 | 6.000000 | 3.857261 | 0.037830 | -10.523723 | 23 | |
163 | dense | 1536 | 1536 | 1.000000 | 2.267898 | 0.039282 | -4.493526 | 166 | |
164 | dense | 1536 | 1536 | 1.000000 | 3.442088 | 0.032681 | -6.000331 | 61 | |
165 | dense | 1536 | 256 | 6.000000 | 2.669437 | 0.051791 | -7.158458 | 54 | |
166 | dense | 8960 | 1536 | 5.833333 | 3.297155 | 0.018206 | -4.342044 | 158 | |
167 | dense | 8960 | 1536 | 5.833333 | 3.156638 | 0.012648 | -4.472148 | 172 | |
168 | dense | 8960 | 1536 | 5.833333 | 4.265355 | 0.024035 | -6.933075 | 107 | |
169 | dense | 8960 | 1536 | 5.833333 | 4.122975 | 0.018597 | -6.403676 | 118 | |
170 | dense | 1536 | 256 | 6.000000 | 2.886788 | 0.052071 | -7.362856 | 31 | |
171 | dense | 1536 | 256 | 6.000000 | 3.974314 | 0.041844 | -10.533901 | 33 | |
172 | dense | 1536 | 1536 | 1.000000 | 2.414712 | 0.039080 | -4.846511 | 108 | |
173 | dense | 1536 | 1536 | 1.000000 | 3.397787 | 0.034410 | -6.292933 | 42 | |
174 | dense | 8960 | 1536 | 5.833333 | 3.100160 | 0.011946 | -4.390722 | 149 | |
175 | dense | 8960 | 1536 | 5.833333 | 3.264132 | 0.017707 | -4.205403 | 138 | |
176 | dense | 8960 | 1536 | 5.833333 | 4.176850 | 0.018789 | -6.114907 | 113 | |
177 | dense | 8960 | 1536 | 5.833333 | 2.966668 | 0.012749 | -3.726815 | 184 | |
178 | dense | 1536 | 1536 | 1.000000 | 2.196494 | 0.035805 | -3.899535 | 174 | |
179 | dense | 8960 | 1536 | 5.833333 | 3.107045 | 0.013472 | -3.617999 | 176 | |
180 | dense | 1536 | 1536 | 1.000000 | 2.540673 | 0.023420 | -3.573314 | 119 | |
181 | dense | 1536 | 256 | 6.000000 | 2.693405 | 0.044325 | -6.307772 | 38 | |
182 | dense | 1536 | 256 | 6.000000 | 2.370255 | 0.042774 | -5.819136 | 54 | |
183 | dense | 1536 | 256 | 6.000000 | 2.587717 | 0.040303 | -6.632256 | 47 | |
184 | dense | 1536 | 1536 | 1.000000 | 1.966008 | 0.087288 | -2.674684 | 472 | over-trained |
185 | dense | 1536 | 256 | 6.000000 | 3.009345 | 0.022560 | -6.597735 | 46 | |
186 | dense | 8960 | 1536 | 5.833333 | 3.822843 | 0.028604 | -4.691234 | 150 | |
187 | dense | 1536 | 1536 | 1.000000 | 2.269076 | 0.036367 | -4.398178 | 195 | |
188 | dense | 8960 | 1536 | 5.833333 | 3.144322 | 0.012729 | -3.200354 | 173 | |
189 | dense | 8960 | 1536 | 5.833333 | 2.969492 | 0.011452 | -3.009386 | 176 | |
190 | dense | 1536 | 1536 | 1.000000 | 2.727325 | 0.036923 | -2.955769 | 75 | |
191 | dense | 1536 | 1536 | 1.000000 | 2.396094 | 0.033507 | -4.046921 | 124 | |
192 | dense | 1536 | 256 | 6.000000 | 2.532206 | 0.024619 | -4.823638 | 54 | |
193 | dense | 1536 | 256 | 6.000000 | 2.481382 | 0.030259 | -6.016692 | 52 | |
194 | dense | 8960 | 1536 | 5.833333 | 3.295819 | 0.024309 | -3.017168 | 250 | |
195 | dense | 8960 | 1536 | 5.833333 | 3.135125 | 0.016665 | -3.072388 | 275 | |
196 | dense | 8960 | 1536 | 5.833333 | 3.204400 | 0.021877 | -4.001899 | 236 |