Find this model in the Qwen2-small model summary
id | layer_type | N | M | Q | alpha | D | alpha-hat | num_spikes | warning |
---|---|---|---|---|---|---|---|---|---|
1 | dense | 4864 | 896 | 5.428571 | 4.177994 | 0.030186 | -9.950490 | 96 | |
2 | dense | 4864 | 896 | 5.428571 | 3.284196 | 0.030658 | -7.488194 | 219 | |
3 | dense | 4864 | 896 | 5.428571 | 3.398281 | 0.023066 | -7.694890 | 141 | |
4 | dense | 896 | 128 | 7.000000 | 1.600108 | 0.030827 | -2.553886 | 63 | over-trained |
5 | dense | 896 | 896 | 1.000000 | 2.332083 | 0.057667 | -5.830049 | 71 | |
6 | dense | 896 | 896 | 1.000000 | 1.862757 | 0.024075 | -3.319944 | 118 | over-trained |
7 | dense | 896 | 128 | 7.000000 | 2.961588 | 0.070639 | -9.716971 | 17 | |
8 | dense | 896 | 128 | 7.000000 | 3.117095 | 0.048378 | -10.931163 | 40 | |
9 | dense | 896 | 896 | 1.000000 | 2.334966 | 0.025728 | -5.787904 | 101 | |
10 | dense | 896 | 128 | 7.000000 | 2.308286 | 0.031771 | -6.431068 | 52 | |
11 | dense | 896 | 896 | 1.000000 | 1.830311 | 0.098912 | -4.672539 | 331 | over-trained |
12 | dense | 4864 | 896 | 5.428571 | 3.800813 | 0.036753 | -9.139468 | 56 | |
13 | dense | 4864 | 896 | 5.428571 | 4.257669 | 0.020109 | -11.169360 | 81 | |
14 | dense | 4864 | 896 | 5.428571 | 3.865935 | 0.045643 | -9.460968 | 60 | |
15 | dense | 4864 | 896 | 5.428571 | 3.608971 | 0.023823 | -8.158189 | 168 | |
16 | dense | 4864 | 896 | 5.428571 | 3.447224 | 0.018500 | -7.978667 | 109 | |
17 | dense | 896 | 128 | 7.000000 | 2.058226 | 0.063677 | -6.109872 | 51 | |
18 | dense | 896 | 896 | 1.000000 | 2.611719 | 0.060201 | -6.998451 | 47 | |
19 | dense | 896 | 896 | 1.000000 | 2.286552 | 0.060107 | -6.090500 | 119 | |
20 | dense | 896 | 128 | 7.000000 | 3.055825 | 0.054253 | -10.726808 | 34 | |
21 | dense | 4864 | 896 | 5.428571 | 4.022185 | 0.033033 | -10.641833 | 100 | |
22 | dense | 896 | 896 | 1.000000 | 2.369833 | 0.029698 | -6.598143 | 112 | |
23 | dense | 896 | 128 | 7.000000 | 3.309767 | 0.058092 | -11.920186 | 39 | |
24 | dense | 896 | 896 | 1.000000 | 3.266287 | 0.052569 | -9.317259 | 37 | |
25 | dense | 4864 | 896 | 5.428571 | 4.002582 | 0.032017 | -10.265278 | 90 | |
26 | dense | 4864 | 896 | 5.428571 | 3.354214 | 0.030715 | -7.790193 | 104 | |
27 | dense | 4864 | 896 | 5.428571 | 3.481795 | 0.020086 | -7.638162 | 152 | |
28 | dense | 896 | 128 | 7.000000 | 2.447007 | 0.040572 | -7.334249 | 44 | |
29 | dense | 896 | 896 | 1.000000 | 2.387374 | 0.031019 | -6.555942 | 89 | |
30 | dense | 896 | 896 | 1.000000 | 2.579403 | 0.064372 | -7.096283 | 68 | |
31 | dense | 896 | 128 | 7.000000 | 2.885869 | 0.079330 | -10.010041 | 37 | |
32 | dense | 4864 | 896 | 5.428571 | 3.374505 | 0.027211 | -7.737207 | 84 | |
33 | dense | 4864 | 896 | 5.428571 | 3.259634 | 0.022248 | -7.112577 | 134 | |
34 | dense | 4864 | 896 | 5.428571 | 4.149188 | 0.038207 | -10.654776 | 52 | |
35 | dense | 896 | 128 | 7.000000 | 2.370550 | 0.050807 | -7.469258 | 43 | |
36 | dense | 896 | 128 | 7.000000 | 3.419094 | 0.099868 | -12.580634 | 41 | |
37 | dense | 4864 | 896 | 5.428571 | 4.278108 | 0.035804 | -10.797598 | 58 | |
38 | dense | 4864 | 896 | 5.428571 | 3.232637 | 0.026390 | -7.068164 | 127 | |
39 | dense | 4864 | 896 | 5.428571 | 3.244980 | 0.044001 | -7.282073 | 106 | |
40 | dense | 896 | 128 | 7.000000 | 2.571916 | 0.046362 | -8.373305 | 39 | |
41 | dense | 896 | 896 | 1.000000 | 2.404915 | 0.100514 | -6.858395 | 173 | |
42 | dense | 896 | 896 | 1.000000 | 2.293481 | 0.029730 | -6.100787 | 115 | |
43 | dense | 896 | 896 | 1.000000 | 2.344099 | 0.044717 | -6.370752 | 81 | |
44 | dense | 4864 | 896 | 5.428571 | 3.148239 | 0.020250 | -6.478133 | 100 | |
45 | dense | 4864 | 896 | 5.428571 | 3.221253 | 0.042969 | -6.966845 | 71 | |
46 | dense | 896 | 128 | 7.000000 | 2.361505 | 0.078962 | -7.427829 | 34 | |
47 | dense | 896 | 896 | 1.000000 | 1.884621 | 0.085333 | -5.289881 | 292 | over-trained |
48 | dense | 896 | 128 | 7.000000 | 3.816716 | 0.086184 | -13.583558 | 27 | |
49 | dense | 4864 | 896 | 5.428571 | 4.046819 | 0.048272 | -10.020889 | 45 | |
50 | dense | 4864 | 896 | 5.428571 | 3.679218 | 0.050464 | -8.923267 | 73 | |
51 | dense | 4864 | 896 | 5.428571 | 3.062322 | 0.019578 | -5.919952 | 86 | |
52 | dense | 4864 | 896 | 5.428571 | 3.164374 | 0.046068 | -6.612883 | 69 | |
53 | dense | 896 | 128 | 7.000000 | 2.176472 | 0.066552 | -6.888514 | 55 | |
54 | dense | 896 | 896 | 1.000000 | 1.840124 | 0.092464 | -5.290776 | 300 | over-trained |
55 | dense | 896 | 896 | 1.000000 | 2.186119 | 0.041905 | -5.617147 | 119 | |
56 | dense | 896 | 128 | 7.000000 | 3.147174 | 0.081121 | -11.223136 | 44 | |
57 | dense | 896 | 128 | 7.000000 | 2.230977 | 0.139818 | -7.508302 | 102 | |
58 | dense | 896 | 896 | 1.000000 | 2.026433 | 0.049250 | -5.190675 | 143 | |
59 | dense | 896 | 896 | 1.000000 | 1.791912 | 0.091741 | -4.954655 | 302 | over-trained |
60 | dense | 4864 | 896 | 5.428571 | 4.086721 | 0.048609 | -10.168944 | 35 | |
61 | dense | 4864 | 896 | 5.428571 | 3.063242 | 0.035961 | -6.408923 | 64 | |
62 | dense | 4864 | 896 | 5.428571 | 3.017384 | 0.025265 | -5.884712 | 60 | |
63 | dense | 896 | 128 | 7.000000 | 2.044918 | 0.072820 | -6.124150 | 43 | |
64 | dense | 4864 | 896 | 5.428571 | 3.568411 | 0.048549 | -8.226492 | 64 | |
65 | dense | 4864 | 896 | 5.428571 | 2.589431 | 0.035185 | -4.910100 | 190 | |
66 | dense | 4864 | 896 | 5.428571 | 3.034477 | 0.043578 | -6.184601 | 45 | |
67 | dense | 896 | 128 | 7.000000 | 2.441043 | 0.049473 | -8.087394 | 42 | |
68 | dense | 896 | 896 | 1.000000 | 2.562946 | 0.081154 | -6.911137 | 67 | |
69 | dense | 896 | 896 | 1.000000 | 2.338905 | 0.049493 | -5.246167 | 88 | |
70 | dense | 896 | 128 | 7.000000 | 3.291499 | 0.072626 | -11.211090 | 29 | |
71 | dense | 4864 | 896 | 5.428571 | 2.930862 | 0.036701 | -5.753826 | 63 | |
72 | dense | 896 | 128 | 7.000000 | 3.519300 | 0.075847 | -11.963167 | 29 | |
73 | dense | 4864 | 896 | 5.428571 | 3.635878 | 0.055162 | -8.869794 | 50 | |
74 | dense | 4864 | 896 | 5.428571 | 2.724805 | 0.022336 | -4.961984 | 115 | |
75 | dense | 896 | 128 | 7.000000 | 2.346343 | 0.083979 | -7.028885 | 22 | |
76 | dense | 896 | 896 | 1.000000 | 2.679290 | 0.066178 | -7.105145 | 49 | |
77 | dense | 896 | 896 | 1.000000 | 1.913276 | 0.054194 | -4.712425 | 161 | over-trained |
78 | dense | 4864 | 896 | 5.428571 | 3.703295 | 0.054081 | -8.892402 | 51 | |
79 | dense | 896 | 128 | 7.000000 | 3.414098 | 0.097267 | -12.221869 | 37 | |
80 | dense | 896 | 896 | 1.000000 | 2.762209 | 0.055225 | -6.591284 | 21 | |
81 | dense | 896 | 896 | 1.000000 | 1.749677 | 0.098898 | -5.151141 | 344 | over-trained |
82 | dense | 896 | 128 | 7.000000 | 3.245439 | 0.072510 | -10.639414 | 14 | |
83 | dense | 4864 | 896 | 5.428571 | 2.821304 | 0.027522 | -5.060222 | 78 | |
84 | dense | 4864 | 896 | 5.428571 | 3.043930 | 0.032173 | -5.952307 | 55 | |
85 | dense | 4864 | 896 | 5.428571 | 3.037799 | 0.027279 | -6.151628 | 66 | |
86 | dense | 4864 | 896 | 5.428571 | 3.146127 | 0.073595 | -7.860060 | 152 | |
87 | dense | 4864 | 896 | 5.428571 | 2.746864 | 0.020257 | -5.091483 | 136 | |
88 | dense | 896 | 896 | 1.000000 | 2.096417 | 0.038635 | -4.930686 | 112 | |
89 | dense | 896 | 128 | 7.000000 | 2.931388 | 0.088497 | -9.974809 | 36 | |
90 | dense | 896 | 128 | 7.000000 | 2.707644 | 0.062333 | -8.476781 | 22 | |
91 | dense | 896 | 896 | 1.000000 | 2.209704 | 0.095990 | -5.856626 | 162 | |
92 | dense | 4864 | 896 | 5.428571 | 2.902581 | 0.027598 | -5.537920 | 75 | |
93 | dense | 896 | 128 | 7.000000 | 2.669577 | 0.067743 | -8.581964 | 27 | |
94 | dense | 896 | 896 | 1.000000 | 1.771540 | 0.087678 | -5.220472 | 307 | over-trained |
95 | dense | 896 | 896 | 1.000000 | 2.395104 | 0.058658 | -6.331350 | 70 | |
96 | dense | 896 | 128 | 7.000000 | 4.626768 | 0.060560 | -16.715929 | 22 | |
97 | dense | 4864 | 896 | 5.428571 | 3.082324 | 0.089248 | -7.764293 | 201 | |
98 | dense | 4864 | 896 | 5.428571 | 3.099663 | 0.032304 | -6.384726 | 59 | |
99 | dense | 4864 | 896 | 5.428571 | 3.222263 | 0.077710 | -8.044112 | 150 | |
100 | dense | 4864 | 896 | 5.428571 | 2.869719 | 0.029316 | -5.513084 | 91 | |
101 | dense | 4864 | 896 | 5.428571 | 3.092896 | 0.027134 | -6.356179 | 85 | |
102 | dense | 896 | 128 | 7.000000 | 2.997037 | 0.078092 | -9.717990 | 18 | |
103 | dense | 896 | 896 | 1.000000 | 1.778028 | 0.093756 | -5.229795 | 327 | over-trained |
104 | dense | 896 | 896 | 1.000000 | 2.367445 | 0.055682 | -5.386320 | 74 | |
105 | dense | 896 | 128 | 7.000000 | 3.656707 | 0.106529 | -13.379717 | 40 | |
106 | dense | 4864 | 896 | 5.428571 | 5.010752 | 0.041818 | -12.072853 | 27 | |
107 | dense | 4864 | 896 | 5.428571 | 3.009152 | 0.026002 | -5.822299 | 76 | |
108 | dense | 4864 | 896 | 5.428571 | 3.142499 | 0.033429 | -6.548253 | 86 | |
109 | dense | 896 | 128 | 7.000000 | 3.040661 | 0.075590 | -9.737400 | 18 | |
110 | dense | 896 | 896 | 1.000000 | 1.722857 | 0.075239 | -5.049437 | 320 | over-trained |
111 | dense | 896 | 896 | 1.000000 | 2.260836 | 0.056748 | -5.814553 | 88 | |
112 | dense | 896 | 128 | 7.000000 | 3.715160 | 0.102719 | -13.598099 | 38 | |
113 | dense | 896 | 896 | 1.000000 | 2.285821 | 0.057792 | -6.030239 | 98 | |
114 | dense | 896 | 128 | 7.000000 | 3.488355 | 0.107896 | -12.666496 | 39 | |
115 | dense | 4864 | 896 | 5.428571 | 3.376764 | 0.091311 | -8.554779 | 204 | |
116 | dense | 4864 | 896 | 5.428571 | 2.935291 | 0.025045 | -5.797821 | 124 | |
117 | dense | 4864 | 896 | 5.428571 | 3.289895 | 0.029075 | -6.971130 | 69 | |
118 | dense | 896 | 128 | 7.000000 | 2.580823 | 0.070227 | -8.329274 | 30 | |
119 | dense | 896 | 896 | 1.000000 | 1.967399 | 0.106999 | -5.667800 | 269 | over-trained |
120 | dense | 896 | 128 | 7.000000 | 3.598706 | 0.099779 | -12.760902 | 39 | |
121 | dense | 896 | 896 | 1.000000 | 2.238211 | 0.053503 | -5.492266 | 94 | |
122 | dense | 896 | 896 | 1.000000 | 1.866915 | 0.083485 | -5.525884 | 276 | over-trained |
123 | dense | 896 | 128 | 7.000000 | 2.517365 | 0.069969 | -7.828006 | 39 | |
124 | dense | 4864 | 896 | 5.428571 | 3.401377 | 0.033005 | -7.344728 | 74 | |
125 | dense | 4864 | 896 | 5.428571 | 3.079834 | 0.023655 | -6.167474 | 87 | |
126 | dense | 4864 | 896 | 5.428571 | 4.811148 | 0.055098 | -12.590621 | 69 | |
127 | dense | 4864 | 896 | 5.428571 | 3.160065 | 0.024415 | -6.760801 | 115 | |
128 | dense | 896 | 128 | 7.000000 | 2.755130 | 0.064462 | -8.709923 | 23 | |
129 | dense | 896 | 896 | 1.000000 | 1.914318 | 0.098845 | -5.733279 | 309 | over-trained |
130 | dense | 896 | 896 | 1.000000 | 2.387440 | 0.045225 | -5.320288 | 76 | |
131 | dense | 896 | 128 | 7.000000 | 2.959965 | 0.140720 | -10.679026 | 82 | |
132 | dense | 4864 | 896 | 5.428571 | 2.997170 | 0.017802 | -5.865010 | 116 | |
133 | dense | 4864 | 896 | 5.428571 | 3.623482 | 0.092834 | -9.293163 | 195 | |
134 | dense | 896 | 128 | 7.000000 | 4.406342 | 0.087604 | -15.441151 | 23 | |
135 | dense | 896 | 896 | 1.000000 | 2.416433 | 0.048519 | -6.302825 | 66 | |
136 | dense | 4864 | 896 | 5.428571 | 4.846622 | 0.037483 | -11.806066 | 40 | |
137 | dense | 4864 | 896 | 5.428571 | 3.012479 | 0.020838 | -5.875385 | 100 | |
138 | dense | 4864 | 896 | 5.428571 | 3.229220 | 0.026354 | -6.878794 | 84 | |
139 | dense | 896 | 128 | 7.000000 | 2.442422 | 0.057185 | -7.334962 | 38 | |
140 | dense | 896 | 896 | 1.000000 | 1.887253 | 0.082476 | -5.371658 | 290 | over-trained |
141 | dense | 4864 | 896 | 5.428571 | 2.989611 | 0.016000 | -5.739192 | 102 | |
142 | dense | 4864 | 896 | 5.428571 | 3.116684 | 0.020581 | -6.344778 | 96 | |
143 | dense | 896 | 128 | 7.000000 | 2.387475 | 0.056828 | -7.298941 | 35 | |
144 | dense | 896 | 896 | 1.000000 | 2.916253 | 0.034894 | -6.896422 | 29 | |
145 | dense | 896 | 896 | 1.000000 | 2.084446 | 0.048974 | -5.121791 | 107 | |
146 | dense | 896 | 128 | 7.000000 | 2.959790 | 0.091943 | -10.061628 | 35 | |
147 | dense | 4864 | 896 | 5.428571 | 4.341463 | 0.020848 | -9.689112 | 56 | |
148 | dense | 4864 | 896 | 5.428571 | 4.353438 | 0.022215 | -9.554578 | 59 | |
149 | dense | 4864 | 896 | 5.428571 | 2.968762 | 0.014568 | -5.702145 | 122 | |
150 | dense | 4864 | 896 | 5.428571 | 3.119448 | 0.025311 | -6.398776 | 99 | |
151 | dense | 896 | 896 | 1.000000 | 3.233164 | 0.031089 | -8.131878 | 40 | |
152 | dense | 896 | 128 | 7.000000 | 2.949770 | 0.074279 | -10.141138 | 45 | |
153 | dense | 896 | 128 | 7.000000 | 2.516131 | 0.054953 | -8.045087 | 37 | |
154 | dense | 896 | 896 | 1.000000 | 2.144255 | 0.042308 | -5.458247 | 121 | |
155 | dense | 4864 | 896 | 5.428571 | 4.098630 | 0.020129 | -8.519784 | 65 | |
156 | dense | 4864 | 896 | 5.428571 | 2.924464 | 0.018966 | -5.488628 | 110 | |
157 | dense | 4864 | 896 | 5.428571 | 3.057491 | 0.019935 | -6.076150 | 113 | |
158 | dense | 896 | 128 | 7.000000 | 2.406192 | 0.043131 | -7.537406 | 43 | |
159 | dense | 896 | 896 | 1.000000 | 2.091623 | 0.092446 | -4.495540 | 233 | |
160 | dense | 896 | 896 | 1.000000 | 2.149879 | 0.039030 | -5.055811 | 114 | |
161 | dense | 896 | 128 | 7.000000 | 2.662525 | 0.049660 | -8.576957 | 49 | |
162 | dense | 896 | 896 | 1.000000 | 1.593903 | 0.072146 | -3.959932 | 399 | over-trained |
163 | dense | 4864 | 896 | 5.428571 | 3.384567 | 0.024472 | -6.454213 | 113 | |
164 | dense | 4864 | 896 | 5.428571 | 3.084546 | 0.013723 | -5.923442 | 115 | |
165 | dense | 4864 | 896 | 5.428571 | 3.250504 | 0.015328 | -6.509149 | 112 | |
166 | dense | 896 | 128 | 7.000000 | 2.531337 | 0.046163 | -7.379721 | 32 | |
167 | dense | 896 | 896 | 1.000000 | 2.571646 | 0.041662 | -6.328030 | 59 | |
168 | dense | 896 | 128 | 7.000000 | 2.813799 | 0.051179 | -9.099811 | 35 |