Find this model in the Qwen2-small model summary
id | layer_type | N | M | Q | alpha | D | alpha-hat | num_spikes | warning |
---|---|---|---|---|---|---|---|---|---|
1 | dense | 8960 | 1536 | 5.833333 | 3.946585 | 0.025674 | -8.180127 | 134 | |
2 | dense | 8960 | 1536 | 5.833333 | 3.369453 | 0.022620 | -5.670709 | 138 | |
3 | dense | 8960 | 1536 | 5.833333 | 3.323439 | 0.024311 | -5.617861 | 111 | |
4 | dense | 1536 | 256 | 6.000000 | 2.214636 | 0.053901 | -4.351398 | 129 | |
5 | dense | 1536 | 1536 | 1.000000 | 2.903404 | 0.022758 | -6.561956 | 92 | |
6 | dense | 1536 | 1536 | 1.000000 | 2.396283 | 0.019349 | -4.434157 | 191 | |
7 | dense | 1536 | 256 | 6.000000 | 3.921332 | 0.032622 | -12.639108 | 42 | |
8 | dense | 1536 | 256 | 6.000000 | 3.243600 | 0.038631 | -9.501149 | 33 | |
9 | dense | 1536 | 1536 | 1.000000 | 2.507574 | 0.041121 | -5.576311 | 63 | |
10 | dense | 1536 | 1536 | 1.000000 | 1.818169 | 0.075363 | -4.029272 | 479 | over-trained |
11 | dense | 8960 | 1536 | 5.833333 | 2.846972 | 0.035738 | -5.535076 | 275 | |
12 | dense | 8960 | 1536 | 5.833333 | 2.967766 | 0.031433 | -5.710125 | 227 | |
13 | dense | 8960 | 1536 | 5.833333 | 4.132232 | 0.018966 | -9.265908 | 159 | |
14 | dense | 1536 | 256 | 6.000000 | 2.179972 | 0.027022 | -5.642216 | 79 | |
15 | dense | 1536 | 256 | 6.000000 | 3.344693 | 0.036737 | -10.720617 | 49 | |
16 | dense | 8960 | 1536 | 5.833333 | 3.498778 | 0.017381 | -6.572748 | 170 | |
17 | dense | 1536 | 256 | 6.000000 | 2.511361 | 0.033311 | -6.701355 | 57 | |
18 | dense | 1536 | 1536 | 1.000000 | 2.834270 | 0.025444 | -6.522429 | 76 | |
19 | dense | 1536 | 1536 | 1.000000 | 2.359409 | 0.022668 | -5.313370 | 179 | |
20 | dense | 8960 | 1536 | 5.833333 | 3.384987 | 0.023776 | -6.422398 | 53 | |
21 | dense | 8960 | 1536 | 5.833333 | 3.863233 | 0.016504 | -8.421248 | 143 | |
22 | dense | 8960 | 1536 | 5.833333 | 3.587523 | 0.014228 | -7.875166 | 151 | |
23 | dense | 8960 | 1536 | 5.833333 | 3.440668 | 0.022183 | -6.357759 | 295 | |
24 | dense | 8960 | 1536 | 5.833333 | 3.180983 | 0.010792 | -5.920294 | 225 | |
25 | dense | 1536 | 256 | 6.000000 | 2.264312 | 0.032647 | -6.170786 | 91 | |
26 | dense | 1536 | 1536 | 1.000000 | 2.354013 | 0.034528 | -5.195165 | 87 | |
27 | dense | 1536 | 1536 | 1.000000 | 2.207460 | 0.015942 | -5.006212 | 208 | |
28 | dense | 1536 | 256 | 6.000000 | 2.668291 | 0.045721 | -7.696089 | 62 | |
29 | dense | 8960 | 1536 | 5.833333 | 2.958780 | 0.010200 | -5.131709 | 219 | |
30 | dense | 8960 | 1536 | 5.833333 | 3.455215 | 0.016289 | -7.411496 | 159 | |
31 | dense | 8960 | 1536 | 5.833333 | 3.190601 | 0.021628 | -5.374738 | 341 | |
32 | dense | 1536 | 256 | 6.000000 | 2.143924 | 0.040788 | -5.816542 | 85 | |
33 | dense | 1536 | 1536 | 1.000000 | 2.742641 | 0.037269 | -6.355096 | 44 | |
34 | dense | 1536 | 1536 | 1.000000 | 2.195740 | 0.025201 | -4.847403 | 149 | |
35 | dense | 1536 | 256 | 6.000000 | 2.962299 | 0.044661 | -9.215166 | 56 | |
36 | dense | 1536 | 1536 | 1.000000 | 2.643512 | 0.036232 | -6.229244 | 75 | |
37 | dense | 1536 | 256 | 6.000000 | 2.353697 | 0.026825 | -6.693001 | 82 | |
38 | dense | 8960 | 1536 | 5.833333 | 2.943914 | 0.012329 | -5.060660 | 171 | |
39 | dense | 8960 | 1536 | 5.833333 | 3.443019 | 0.020194 | -7.013294 | 109 | |
40 | dense | 1536 | 256 | 6.000000 | 2.883049 | 0.044629 | -8.969828 | 55 | |
41 | dense | 8960 | 1536 | 5.833333 | 3.116271 | 0.017497 | -5.147964 | 321 | |
42 | dense | 1536 | 1536 | 1.000000 | 2.286424 | 0.018248 | -5.022663 | 155 | |
43 | dense | 1536 | 1536 | 1.000000 | 2.227299 | 0.025447 | -4.646233 | 162 | |
44 | dense | 8960 | 1536 | 5.833333 | 3.159564 | 0.012302 | -5.127829 | 267 | |
45 | dense | 8960 | 1536 | 5.833333 | 3.126033 | 0.015585 | -5.351352 | 124 | |
46 | dense | 1536 | 256 | 6.000000 | 2.436507 | 0.038007 | -6.910518 | 56 | |
47 | dense | 1536 | 1536 | 1.000000 | 2.646517 | 0.032890 | -6.315810 | 89 | |
48 | dense | 8960 | 1536 | 5.833333 | 3.575298 | 0.017033 | -7.314001 | 124 | |
49 | dense | 1536 | 256 | 6.000000 | 2.753723 | 0.045967 | -8.633265 | 78 | |
50 | dense | 1536 | 256 | 6.000000 | 2.705553 | 0.024787 | -7.261405 | 61 | |
51 | dense | 1536 | 1536 | 1.000000 | 2.765509 | 0.042174 | -6.545842 | 66 | |
52 | dense | 1536 | 256 | 6.000000 | 2.494468 | 0.025650 | -7.027119 | 74 | |
53 | dense | 1536 | 1536 | 1.000000 | 2.363552 | 0.019092 | -5.575012 | 136 | |
54 | dense | 8960 | 1536 | 5.833333 | 3.031629 | 0.012782 | -4.833263 | 205 | |
55 | dense | 8960 | 1536 | 5.833333 | 3.839722 | 0.022347 | -8.080428 | 68 | |
56 | dense | 8960 | 1536 | 5.833333 | 3.037668 | 0.017173 | -5.225155 | 121 | |
57 | dense | 1536 | 256 | 6.000000 | 3.805272 | 0.062200 | -12.556106 | 35 | |
58 | dense | 1536 | 1536 | 1.000000 | 2.404791 | 0.026691 | -5.679233 | 111 | |
59 | dense | 8960 | 1536 | 5.833333 | 3.801872 | 0.026277 | -8.295202 | 99 | |
60 | dense | 8960 | 1536 | 5.833333 | 3.110169 | 0.014164 | -5.171261 | 133 | |
61 | dense | 8960 | 1536 | 5.833333 | 3.136779 | 0.017348 | -5.584127 | 107 | |
62 | dense | 1536 | 256 | 6.000000 | 2.454509 | 0.041653 | -6.825018 | 52 | |
63 | dense | 1536 | 1536 | 1.000000 | 1.895017 | 0.084995 | -4.736834 | 476 | over-trained |
64 | dense | 1536 | 1536 | 1.000000 | 2.418266 | 0.022400 | -5.243413 | 125 | |
65 | dense | 8960 | 1536 | 5.833333 | 3.698975 | 0.040734 | -7.912564 | 118 | |
66 | dense | 8960 | 1536 | 5.833333 | 2.972519 | 0.014674 | -4.939804 | 176 | |
67 | dense | 8960 | 1536 | 5.833333 | 3.157116 | 0.026513 | -5.656694 | 66 | |
68 | dense | 1536 | 256 | 6.000000 | 2.420345 | 0.049197 | -7.069293 | 80 | |
69 | dense | 1536 | 1536 | 1.000000 | 1.787283 | 0.098679 | -4.693446 | 557 | over-trained |
70 | dense | 1536 | 256 | 6.000000 | 3.837846 | 0.046830 | -12.141753 | 27 | |
71 | dense | 8960 | 1536 | 5.833333 | 2.872830 | 0.018664 | -4.588078 | 185 | |
72 | dense | 1536 | 256 | 6.000000 | 3.172172 | 0.074884 | -10.456415 | 66 | |
73 | dense | 1536 | 1536 | 1.000000 | 2.457330 | 0.039010 | -5.578133 | 93 | |
74 | dense | 1536 | 1536 | 1.000000 | 1.863314 | 0.074621 | -4.738359 | 452 | over-trained |
75 | dense | 1536 | 256 | 6.000000 | 2.691749 | 0.044285 | -7.719557 | 36 | |
76 | dense | 8960 | 1536 | 5.833333 | 3.213295 | 0.034175 | -5.513514 | 50 | |
77 | dense | 8960 | 1536 | 5.833333 | 4.057246 | 0.047560 | -8.858590 | 57 | |
78 | dense | 1536 | 256 | 6.000000 | 3.652105 | 0.056468 | -11.569036 | 28 | |
79 | dense | 1536 | 1536 | 1.000000 | 1.718922 | 0.080682 | -4.322148 | 600 | over-trained |
80 | dense | 1536 | 256 | 6.000000 | 2.547644 | 0.057646 | -7.342823 | 46 | |
81 | dense | 8960 | 1536 | 5.833333 | 2.958570 | 0.026809 | -4.956644 | 89 | |
82 | dense | 8960 | 1536 | 5.833333 | 2.748683 | 0.021071 | -4.299220 | 224 | |
83 | dense | 8960 | 1536 | 5.833333 | 3.730420 | 0.037108 | -7.855194 | 74 | |
84 | dense | 1536 | 1536 | 1.000000 | 2.403188 | 0.039054 | -5.552876 | 105 | |
85 | dense | 1536 | 256 | 6.000000 | 3.733114 | 0.072965 | -12.467631 | 43 | |
86 | dense | 1536 | 1536 | 1.000000 | 2.368437 | 0.042190 | -5.309900 | 95 | |
87 | dense | 1536 | 1536 | 1.000000 | 1.765972 | 0.090946 | -4.553192 | 562 | over-trained |
88 | dense | 1536 | 256 | 6.000000 | 2.446173 | 0.053513 | -7.143075 | 50 | |
89 | dense | 8960 | 1536 | 5.833333 | 3.020323 | 0.028426 | -4.944563 | 82 | |
90 | dense | 8960 | 1536 | 5.833333 | 2.843163 | 0.013934 | -4.279895 | 123 | |
91 | dense | 8960 | 1536 | 5.833333 | 3.468413 | 0.049930 | -7.354438 | 130 | |
92 | dense | 8960 | 1536 | 5.833333 | 2.810103 | 0.012748 | -4.190390 | 124 | |
93 | dense | 1536 | 256 | 6.000000 | 3.342018 | 0.063597 | -10.386256 | 41 | |
94 | dense | 1536 | 1536 | 1.000000 | 2.250848 | 0.047170 | -5.108809 | 157 | |
95 | dense | 1536 | 1536 | 1.000000 | 1.778057 | 0.088564 | -4.421531 | 494 | over-trained |
96 | dense | 8960 | 1536 | 5.833333 | 3.738604 | 0.045215 | -8.000458 | 75 | |
97 | dense | 8960 | 1536 | 5.833333 | 2.905718 | 0.020283 | -4.687132 | 105 | |
98 | dense | 1536 | 256 | 6.000000 | 2.496261 | 0.048922 | -7.101386 | 51 | |
99 | dense | 1536 | 1536 | 1.000000 | 1.818275 | 0.096934 | -4.687370 | 490 | over-trained |
100 | dense | 1536 | 256 | 6.000000 | 2.541399 | 0.050407 | -7.315346 | 66 | |
101 | dense | 1536 | 256 | 6.000000 | 3.839950 | 0.096671 | -12.501697 | 45 | |
102 | dense | 1536 | 1536 | 1.000000 | 2.288757 | 0.042472 | -5.284815 | 170 | |
103 | dense | 8960 | 1536 | 5.833333 | 2.723376 | 0.068885 | -5.492604 | 384 | |
104 | dense | 8960 | 1536 | 5.833333 | 2.656388 | 0.020017 | -3.873158 | 210 | |
105 | dense | 8960 | 1536 | 5.833333 | 2.629986 | 0.032512 | -4.136898 | 208 | |
106 | dense | 1536 | 1536 | 1.000000 | 2.103423 | 0.038945 | -4.368914 | 182 | |
107 | dense | 1536 | 256 | 6.000000 | 3.572504 | 0.050140 | -10.850027 | 23 | |
108 | dense | 8960 | 1536 | 5.833333 | 2.815813 | 0.023850 | -4.285279 | 83 | |
109 | dense | 8960 | 1536 | 5.833333 | 2.703491 | 0.021443 | -3.765790 | 89 | |
110 | dense | 8960 | 1536 | 5.833333 | 3.404799 | 0.046356 | -6.806645 | 76 | |
111 | dense | 1536 | 256 | 6.000000 | 2.187847 | 0.062738 | -6.129413 | 80 | |
112 | dense | 1536 | 1536 | 1.000000 | 2.859202 | 0.049288 | -6.171813 | 45 | |
113 | dense | 8960 | 1536 | 5.833333 | 3.334447 | 0.040928 | -6.663061 | 103 | |
114 | dense | 8960 | 1536 | 5.833333 | 2.688547 | 0.014142 | -3.835499 | 189 | |
115 | dense | 8960 | 1536 | 5.833333 | 2.860354 | 0.018899 | -4.444119 | 98 | |
116 | dense | 1536 | 1536 | 1.000000 | 1.665704 | 0.072267 | -3.951385 | 594 | over-trained |
117 | dense | 1536 | 1536 | 1.000000 | 2.149105 | 0.054924 | -4.842301 | 185 | |
118 | dense | 1536 | 256 | 6.000000 | 3.226673 | 0.048528 | -10.134217 | 53 | |
119 | dense | 1536 | 256 | 6.000000 | 2.359587 | 0.060096 | -6.455861 | 62 | |
120 | dense | 8960 | 1536 | 5.833333 | 3.823516 | 0.057139 | -8.239188 | 74 | |
121 | dense | 8960 | 1536 | 5.833333 | 2.760801 | 0.011410 | -4.115630 | 167 | |
122 | dense | 8960 | 1536 | 5.833333 | 2.927882 | 0.019860 | -4.720068 | 96 | |
123 | dense | 1536 | 256 | 6.000000 | 2.746145 | 0.047171 | -7.950061 | 38 | |
124 | dense | 1536 | 1536 | 1.000000 | 1.712550 | 0.087261 | -4.389532 | 600 | over-trained |
125 | dense | 1536 | 1536 | 1.000000 | 2.484694 | 0.042178 | -5.499578 | 68 | |
126 | dense | 1536 | 256 | 6.000000 | 3.730320 | 0.057419 | -11.952727 | 31 | |
127 | dense | 8960 | 1536 | 5.833333 | 2.813164 | 0.016195 | -4.282926 | 165 | |
128 | dense | 8960 | 1536 | 5.833333 | 3.023339 | 0.025862 | -4.968489 | 92 | |
129 | dense | 1536 | 256 | 6.000000 | 2.552684 | 0.050043 | -7.184979 | 40 | |
130 | dense | 1536 | 1536 | 1.000000 | 1.760599 | 0.094502 | -4.712427 | 610 | over-trained |
131 | dense | 1536 | 1536 | 1.000000 | 2.271233 | 0.043185 | -5.090607 | 147 | |
132 | dense | 8960 | 1536 | 5.833333 | 4.363655 | 0.042383 | -9.511713 | 49 | |
133 | dense | 1536 | 256 | 6.000000 | 3.195096 | 0.069198 | -10.200005 | 52 | |
134 | dense | 8960 | 1536 | 5.833333 | 3.687511 | 0.052688 | -8.167788 | 131 | |
135 | dense | 8960 | 1536 | 5.833333 | 2.862702 | 0.021765 | -4.450374 | 139 | |
136 | dense | 8960 | 1536 | 5.833333 | 3.014434 | 0.018566 | -5.058950 | 146 | |
137 | dense | 1536 | 1536 | 1.000000 | 1.766708 | 0.096430 | -4.602663 | 588 | over-trained |
138 | dense | 1536 | 1536 | 1.000000 | 2.473024 | 0.041632 | -5.652957 | 77 | |
139 | dense | 1536 | 256 | 6.000000 | 3.275191 | 0.074838 | -10.723051 | 63 | |
140 | dense | 1536 | 256 | 6.000000 | 2.927491 | 0.056447 | -8.422401 | 27 | |
141 | dense | 1536 | 256 | 6.000000 | 3.248360 | 0.071860 | -10.618621 | 57 | |
142 | dense | 8960 | 1536 | 5.833333 | 4.611304 | 0.046657 | -10.120784 | 38 | |
143 | dense | 8960 | 1536 | 5.833333 | 2.943543 | 0.013496 | -4.607271 | 143 | |
144 | dense | 8960 | 1536 | 5.833333 | 3.092565 | 0.017410 | -5.252749 | 133 | |
145 | dense | 1536 | 256 | 6.000000 | 2.552291 | 0.057914 | -7.438893 | 64 | |
146 | dense | 1536 | 1536 | 1.000000 | 1.753779 | 0.082048 | -4.068816 | 549 | over-trained |
147 | dense | 1536 | 1536 | 1.000000 | 2.329618 | 0.047783 | -5.257816 | 148 | |
148 | dense | 8960 | 1536 | 5.833333 | 3.068278 | 0.012583 | -4.983853 | 137 | |
149 | dense | 8960 | 1536 | 5.833333 | 4.300421 | 0.020120 | -9.167001 | 64 | |
150 | dense | 8960 | 1536 | 5.833333 | 3.324627 | 0.019772 | -5.962279 | 94 | |
151 | dense | 1536 | 256 | 6.000000 | 2.637891 | 0.037262 | -7.587359 | 48 | |
152 | dense | 1536 | 1536 | 1.000000 | 1.733697 | 0.086191 | -4.502663 | 630 | over-trained |
153 | dense | 1536 | 1536 | 1.000000 | 2.340950 | 0.036992 | -5.476165 | 118 | |
154 | dense | 1536 | 256 | 6.000000 | 3.842059 | 0.061080 | -12.814195 | 37 | |
155 | dense | 1536 | 1536 | 1.000000 | 2.341036 | 0.037599 | -5.391667 | 127 | |
156 | dense | 8960 | 1536 | 5.833333 | 3.324615 | 0.014039 | -6.187933 | 120 | |
157 | dense | 1536 | 1536 | 1.000000 | 2.195885 | 0.084206 | -5.286181 | 359 | |
158 | dense | 1536 | 256 | 6.000000 | 2.707916 | 0.042663 | -7.833815 | 50 | |
159 | dense | 1536 | 256 | 6.000000 | 3.658299 | 0.044171 | -11.770211 | 40 | |
160 | dense | 8960 | 1536 | 5.833333 | 3.099093 | 0.013584 | -5.238215 | 181 | |
161 | dense | 8960 | 1536 | 5.833333 | 4.314217 | 0.027959 | -9.728337 | 79 | |
162 | dense | 1536 | 256 | 6.000000 | 2.866093 | 0.057047 | -8.925692 | 73 | |
163 | dense | 1536 | 1536 | 1.000000 | 3.052732 | 0.032621 | -7.289159 | 61 | |
164 | dense | 1536 | 256 | 6.000000 | 2.351459 | 0.030580 | -6.350608 | 78 | |
165 | dense | 8960 | 1536 | 5.833333 | 3.139986 | 0.016875 | -5.491259 | 143 | |
166 | dense | 8960 | 1536 | 5.833333 | 3.003096 | 0.014010 | -4.733643 | 199 | |
167 | dense | 8960 | 1536 | 5.833333 | 4.262120 | 0.023886 | -9.238597 | 85 | |
168 | dense | 1536 | 1536 | 1.000000 | 2.418463 | 0.032550 | -5.477471 | 97 | |
169 | dense | 1536 | 256 | 6.000000 | 3.329014 | 0.051378 | -10.641511 | 54 | |
170 | dense | 1536 | 1536 | 1.000000 | 2.366266 | 0.038633 | -5.346077 | 132 | |
171 | dense | 1536 | 1536 | 1.000000 | 1.838695 | 0.085943 | -4.228065 | 551 | over-trained |
172 | dense | 1536 | 256 | 6.000000 | 2.560223 | 0.034989 | -7.041007 | 54 | |
173 | dense | 8960 | 1536 | 5.833333 | 3.112499 | 0.020799 | -5.100115 | 161 | |
174 | dense | 8960 | 1536 | 5.833333 | 2.944043 | 0.016975 | -4.349164 | 170 | |
175 | dense | 8960 | 1536 | 5.833333 | 4.080455 | 0.026511 | -8.517786 | 104 | |
176 | dense | 1536 | 256 | 6.000000 | 2.187489 | 0.058054 | -6.188554 | 127 | |
177 | dense | 8960 | 1536 | 5.833333 | 3.763524 | 0.034598 | -7.401155 | 121 | |
178 | dense | 8960 | 1536 | 5.833333 | 2.829684 | 0.020385 | -4.196156 | 216 | |
179 | dense | 8960 | 1536 | 5.833333 | 2.973223 | 0.019121 | -4.770315 | 205 | |
180 | dense | 1536 | 256 | 6.000000 | 2.340776 | 0.045887 | -6.596748 | 73 | |
181 | dense | 1536 | 1536 | 1.000000 | 2.714775 | 0.023920 | -5.240794 | 87 | |
182 | dense | 1536 | 1536 | 1.000000 | 2.218960 | 0.030173 | -4.801275 | 157 | |
183 | dense | 1536 | 256 | 6.000000 | 2.977091 | 0.028496 | -8.642511 | 58 | |
184 | dense | 1536 | 1536 | 1.000000 | 2.258907 | 0.021315 | -4.845966 | 160 | |
185 | dense | 1536 | 1536 | 1.000000 | 3.140733 | 0.025371 | -6.406874 | 66 | |
186 | dense | 1536 | 256 | 6.000000 | 2.402237 | 0.033208 | -6.516434 | 79 | |
187 | dense | 8960 | 1536 | 5.833333 | 3.013280 | 0.013575 | -4.896492 | 172 | |
188 | dense | 8960 | 1536 | 5.833333 | 2.833382 | 0.014542 | -4.250228 | 172 | |
189 | dense | 8960 | 1536 | 5.833333 | 3.724171 | 0.024703 | -6.894345 | 87 | |
190 | dense | 1536 | 256 | 6.000000 | 2.636329 | 0.028146 | -7.396435 | 69 | |
191 | dense | 1536 | 1536 | 1.000000 | 2.336489 | 0.029157 | -5.183202 | 143 | |
192 | dense | 1536 | 1536 | 1.000000 | 2.843934 | 0.031320 | -5.390442 | 52 | |
193 | dense | 1536 | 256 | 6.000000 | 2.524560 | 0.031581 | -7.129858 | 71 | |
194 | dense | 8960 | 1536 | 5.833333 | 3.176877 | 0.022283 | -5.242412 | 182 | |
195 | dense | 8960 | 1536 | 5.833333 | 3.186846 | 0.019499 | -4.847810 | 167 | |
196 | dense | 8960 | 1536 | 5.833333 | 2.991844 | 0.016356 | -4.636410 | 213 |