Find this model in the Qwen2.5-small model summary
id | layer_type | N | M | Q | alpha | D | alpha-hat | num_spikes | warning |
---|---|---|---|---|---|---|---|---|---|
1 | dense | 4864 | 896 | 5.428571 | 8.040342 | 0.054421 | 11.211399 | 93 | under-trained |
2 | dense | 4864 | 896 | 5.428571 | 3.265221 | 0.052069 | 5.718243 | 374 | |
3 | dense | 4864 | 896 | 5.428571 | 13.858658 | 0.081286 | 12.357890 | 58 | under-trained |
4 | dense | 896 | 128 | 7.000000 | 1.705504 | 0.080801 | 3.839151 | 50 | over-trained |
5 | dense | 896 | 896 | 1.000000 | 1.753546 | 0.098443 | 1.549687 | 333 | over-trained |
6 | dense | 896 | 896 | 1.000000 | 1.941109 | 0.044646 | 6.008260 | 72 | over-trained |
7 | dense | 896 | 128 | 7.000000 | 3.788853 | 0.135768 | -1.038288 | 40 | |
8 | dense | 896 | 128 | 7.000000 | 9.259229 | 0.083157 | -2.977757 | 22 | under-trained |
9 | dense | 896 | 896 | 1.000000 | 3.506172 | 0.053103 | 4.418250 | 68 | |
10 | dense | 896 | 128 | 7.000000 | 4.612226 | 0.095308 | 4.250618 | 31 | |
11 | dense | 896 | 896 | 1.000000 | 4.748578 | 0.033730 | 4.257064 | 36 | |
12 | dense | 4864 | 896 | 5.428571 | 3.893259 | 0.054545 | 6.519901 | 236 | |
13 | dense | 4864 | 896 | 5.428571 | 7.166078 | 0.042981 | 9.934975 | 91 | under-trained |
14 | dense | 4864 | 896 | 5.428571 | 13.029151 | 0.126487 | 10.187965 | 85 | under-trained |
15 | dense | 4864 | 896 | 5.428571 | 4.719416 | 0.032387 | 8.123322 | 139 | |
16 | dense | 4864 | 896 | 5.428571 | 6.908085 | 0.090775 | 10.516904 | 161 | under-trained |
17 | dense | 896 | 128 | 7.000000 | 4.347263 | 0.159526 | 3.361843 | 48 | |
18 | dense | 896 | 896 | 1.000000 | 2.546359 | 0.123995 | 2.433509 | 234 | |
19 | dense | 896 | 896 | 1.000000 | 3.784185 | 0.111680 | 4.024313 | 91 | |
20 | dense | 896 | 128 | 7.000000 | 5.274816 | 0.136336 | -1.362573 | 48 | |
21 | dense | 4864 | 896 | 5.428571 | 7.537951 | 0.046343 | 9.785726 | 76 | under-trained |
22 | dense | 896 | 896 | 1.000000 | 3.896605 | 0.080914 | 3.597993 | 79 | |
23 | dense | 896 | 128 | 7.000000 | 8.704572 | 0.154128 | -0.904002 | 32 | under-trained |
24 | dense | 896 | 896 | 1.000000 | 5.545392 | 0.053260 | 3.805568 | 45 | |
25 | dense | 4864 | 896 | 5.428571 | 5.998621 | 0.033834 | 7.733741 | 87 | |
26 | dense | 4864 | 896 | 5.428571 | 18.585633 | 0.126233 | 14.931464 | 53 | under-trained |
27 | dense | 4864 | 896 | 5.428571 | 6.316034 | 0.032099 | 11.059773 | 93 | under-trained |
28 | dense | 896 | 128 | 7.000000 | 8.210047 | 0.107130 | 5.597431 | 13 | under-trained |
29 | dense | 896 | 896 | 1.000000 | 2.504616 | 0.116583 | 2.271184 | 201 | |
30 | dense | 896 | 896 | 1.000000 | 4.700882 | 0.098050 | 3.501952 | 54 | |
31 | dense | 896 | 128 | 7.000000 | 7.040867 | 0.119289 | -0.109531 | 32 | under-trained |
32 | dense | 4864 | 896 | 5.428571 | 10.451870 | 0.071741 | 10.982097 | 87 | under-trained |
33 | dense | 4864 | 896 | 5.428571 | 4.935176 | 0.027320 | 8.847282 | 125 | |
34 | dense | 4864 | 896 | 5.428571 | 7.076002 | 0.042569 | 10.347336 | 73 | under-trained |
35 | dense | 896 | 128 | 7.000000 | 5.839008 | 0.121620 | 3.393528 | 29 | |
36 | dense | 896 | 128 | 7.000000 | 7.466681 | 0.147102 | -0.174624 | 42 | under-trained |
37 | dense | 4864 | 896 | 5.428571 | 7.116280 | 0.045311 | 9.741597 | 70 | under-trained |
38 | dense | 4864 | 896 | 5.428571 | 5.049429 | 0.035409 | 9.535447 | 49 | |
39 | dense | 4864 | 896 | 5.428571 | 7.549522 | 0.123157 | 6.440213 | 167 | under-trained |
40 | dense | 896 | 128 | 7.000000 | 6.243987 | 0.115955 | 3.710162 | 21 | under-trained |
41 | dense | 896 | 896 | 1.000000 | 7.337058 | 0.060886 | 5.719521 | 35 | under-trained |
42 | dense | 896 | 896 | 1.000000 | 2.753109 | 0.104305 | 2.425995 | 154 | |
43 | dense | 896 | 896 | 1.000000 | 2.514152 | 0.112039 | 2.243148 | 200 | |
44 | dense | 4864 | 896 | 5.428571 | 5.657994 | 0.046279 | 9.877973 | 106 | |
45 | dense | 4864 | 896 | 5.428571 | 15.300423 | 0.124648 | 13.355082 | 60 | under-trained |
46 | dense | 896 | 128 | 7.000000 | 4.443214 | 0.153069 | 3.112632 | 48 | |
47 | dense | 896 | 896 | 1.000000 | 4.278949 | 0.030678 | 3.322294 | 55 | |
48 | dense | 896 | 128 | 7.000000 | 8.148426 | 0.135110 | -0.110242 | 31 | under-trained |
49 | dense | 4864 | 896 | 5.428571 | 7.179381 | 0.049754 | 9.858648 | 72 | under-trained |
50 | dense | 4864 | 896 | 5.428571 | 5.650973 | 0.081246 | 7.601290 | 145 | |
51 | dense | 4864 | 896 | 5.428571 | 5.138363 | 0.027620 | 9.244165 | 51 | |
52 | dense | 4864 | 896 | 5.428571 | 10.677734 | 0.121782 | 9.331619 | 100 | under-trained |
53 | dense | 896 | 128 | 7.000000 | 4.094685 | 0.158592 | 2.251377 | 45 | |
54 | dense | 896 | 896 | 1.000000 | 9.579512 | 0.121301 | 5.640510 | 35 | under-trained |
55 | dense | 896 | 896 | 1.000000 | 2.770068 | 0.103430 | 2.488193 | 144 | |
56 | dense | 896 | 128 | 7.000000 | 7.368676 | 0.155450 | -0.077994 | 42 | under-trained |
57 | dense | 896 | 128 | 7.000000 | 5.963473 | 0.124741 | -0.325195 | 44 | |
58 | dense | 896 | 896 | 1.000000 | 2.932539 | 0.106932 | 4.077917 | 158 | |
59 | dense | 896 | 896 | 1.000000 | 4.293945 | 0.130296 | 3.242153 | 116 | |
60 | dense | 4864 | 896 | 5.428571 | 6.238356 | 0.029236 | 9.272006 | 57 | under-trained |
61 | dense | 4864 | 896 | 5.428571 | 16.978627 | 0.110543 | 15.404228 | 38 | under-trained |
62 | dense | 4864 | 896 | 5.428571 | 5.369935 | 0.044810 | 9.280906 | 78 | |
63 | dense | 896 | 128 | 7.000000 | 4.130552 | 0.104780 | 3.392419 | 25 | |
64 | dense | 4864 | 896 | 5.428571 | 5.106457 | 0.055582 | 7.885743 | 94 | |
65 | dense | 4864 | 896 | 5.428571 | 5.695533 | 0.047542 | 9.191383 | 61 | |
66 | dense | 4864 | 896 | 5.428571 | 8.333649 | 0.099378 | 7.543141 | 117 | under-trained |
67 | dense | 896 | 128 | 7.000000 | 5.011675 | 0.124815 | 2.697092 | 26 | |
68 | dense | 896 | 896 | 1.000000 | 2.916346 | 0.115069 | 1.694782 | 151 | |
69 | dense | 896 | 896 | 1.000000 | 1.998083 | 0.116933 | 1.973538 | 259 | over-trained |
70 | dense | 896 | 128 | 7.000000 | 4.965142 | 0.142116 | -0.034488 | 50 | |
71 | dense | 4864 | 896 | 5.428571 | 6.002941 | 0.106294 | 5.637438 | 176 | under-trained |
72 | dense | 896 | 128 | 7.000000 | 9.433002 | 0.107955 | 0.946462 | 22 | under-trained |
73 | dense | 4864 | 896 | 5.428571 | 5.852468 | 0.042849 | 9.275457 | 85 | |
74 | dense | 4864 | 896 | 5.428571 | 4.391382 | 0.083457 | 7.438902 | 165 | |
75 | dense | 896 | 128 | 7.000000 | 2.668288 | 0.142165 | 1.663138 | 57 | |
76 | dense | 896 | 896 | 1.000000 | 3.270324 | 0.127600 | 2.280287 | 169 | |
77 | dense | 896 | 896 | 1.000000 | 3.589951 | 0.089913 | 3.851545 | 74 | |
78 | dense | 4864 | 896 | 5.428571 | 4.025262 | 0.082214 | 6.194480 | 226 | |
79 | dense | 896 | 128 | 7.000000 | 6.356385 | 0.113886 | 0.463906 | 37 | under-trained |
80 | dense | 896 | 896 | 1.000000 | 2.439577 | 0.119388 | 2.886963 | 161 | |
81 | dense | 896 | 896 | 1.000000 | 3.871033 | 0.036450 | 3.943120 | 49 | |
82 | dense | 896 | 128 | 7.000000 | 7.304252 | 0.154307 | 3.734054 | 18 | under-trained |
83 | dense | 4864 | 896 | 5.428571 | 5.627771 | 0.039644 | 9.317909 | 48 | |
84 | dense | 4864 | 896 | 5.428571 | 5.882456 | 0.121028 | 7.937960 | 203 | |
85 | dense | 4864 | 896 | 5.428571 | 8.572833 | 0.095292 | 7.927053 | 92 | under-trained |
86 | dense | 4864 | 896 | 5.428571 | 5.769841 | 0.042735 | 8.746235 | 63 | |
87 | dense | 4864 | 896 | 5.428571 | 5.709195 | 0.036244 | 9.535468 | 62 | |
88 | dense | 896 | 896 | 1.000000 | 3.064031 | 0.100022 | 2.879952 | 98 | |
89 | dense | 896 | 128 | 7.000000 | 6.161392 | 0.118143 | 0.287017 | 42 | under-trained |
90 | dense | 896 | 128 | 7.000000 | 4.881164 | 0.145917 | 2.968580 | 30 | |
91 | dense | 896 | 896 | 1.000000 | 5.894110 | 0.112254 | 4.319557 | 53 | |
92 | dense | 4864 | 896 | 5.428571 | 4.302835 | 0.098392 | 6.717023 | 237 | |
93 | dense | 896 | 128 | 7.000000 | 6.450270 | 0.142982 | 3.446783 | 21 | under-trained |
94 | dense | 896 | 896 | 1.000000 | 3.703193 | 0.121237 | 2.340584 | 119 | |
95 | dense | 896 | 896 | 1.000000 | 2.264856 | 0.104320 | 2.167589 | 199 | |
96 | dense | 896 | 128 | 7.000000 | 5.698374 | 0.154687 | 0.526115 | 49 | |
97 | dense | 4864 | 896 | 5.428571 | 6.059626 | 0.024868 | 8.935609 | 58 | under-trained |
98 | dense | 4864 | 896 | 5.428571 | 9.116327 | 0.094115 | 8.967249 | 83 | under-trained |
99 | dense | 4864 | 896 | 5.428571 | 6.866573 | 0.022450 | 9.194867 | 63 | under-trained |
100 | dense | 4864 | 896 | 5.428571 | 5.201582 | 0.093304 | 8.216190 | 153 | |
101 | dense | 4864 | 896 | 5.428571 | 10.379960 | 0.099741 | 10.154033 | 67 | under-trained |
102 | dense | 896 | 128 | 7.000000 | 4.464800 | 0.158862 | 2.581389 | 38 | |
103 | dense | 896 | 896 | 1.000000 | 5.040300 | 0.081282 | 3.440180 | 47 | |
104 | dense | 896 | 896 | 1.000000 | 2.472648 | 0.093339 | 2.273189 | 161 | |
105 | dense | 896 | 128 | 7.000000 | 4.832291 | 0.132157 | 0.458495 | 64 | |
106 | dense | 4864 | 896 | 5.428571 | 8.215156 | 0.040717 | 10.145047 | 59 | under-trained |
107 | dense | 4864 | 896 | 5.428571 | 5.911166 | 0.050139 | 9.862516 | 90 | |
108 | dense | 4864 | 896 | 5.428571 | 12.829300 | 0.118830 | 12.688231 | 65 | under-trained |
109 | dense | 896 | 128 | 7.000000 | 3.982223 | 0.155055 | 2.210965 | 35 | |
110 | dense | 896 | 896 | 1.000000 | 3.124897 | 0.069863 | 2.470788 | 95 | |
111 | dense | 896 | 896 | 1.000000 | 2.500863 | 0.090518 | 2.302558 | 170 | |
112 | dense | 896 | 128 | 7.000000 | 5.909795 | 0.133200 | 0.830423 | 38 | |
113 | dense | 896 | 896 | 1.000000 | 2.467650 | 0.089664 | 2.838671 | 123 | |
114 | dense | 896 | 128 | 7.000000 | 7.224839 | 0.155089 | 0.953820 | 34 | under-trained |
115 | dense | 4864 | 896 | 5.428571 | 8.476228 | 0.061991 | 11.432287 | 90 | under-trained |
116 | dense | 4864 | 896 | 5.428571 | 6.582619 | 0.046518 | 11.045835 | 98 | under-trained |
117 | dense | 4864 | 896 | 5.428571 | 9.162498 | 0.061427 | 11.311176 | 73 | under-trained |
118 | dense | 896 | 128 | 7.000000 | 2.925996 | 0.096023 | 1.796481 | 41 | |
119 | dense | 896 | 896 | 1.000000 | 7.567919 | 0.060736 | 5.572995 | 29 | under-trained |
120 | dense | 896 | 128 | 7.000000 | 8.331153 | 0.137307 | 1.418412 | 28 | under-trained |
121 | dense | 896 | 896 | 1.000000 | 2.861464 | 0.088065 | 3.003921 | 98 | |
122 | dense | 896 | 896 | 1.000000 | 5.370602 | 0.117772 | 5.339380 | 65 | |
123 | dense | 896 | 128 | 7.000000 | 3.994296 | 0.131943 | 2.400430 | 27 | |
124 | dense | 4864 | 896 | 5.428571 | 8.845118 | 0.121033 | 8.543534 | 117 | under-trained |
125 | dense | 4864 | 896 | 5.428571 | 6.431614 | 0.048163 | 11.214119 | 96 | under-trained |
126 | dense | 4864 | 896 | 5.428571 | 9.512996 | 0.066366 | 12.316115 | 77 | under-trained |
127 | dense | 4864 | 896 | 5.428571 | 7.229188 | 0.120315 | 7.198850 | 154 | under-trained |
128 | dense | 896 | 128 | 7.000000 | 4.246674 | 0.136454 | 2.106592 | 35 | |
129 | dense | 896 | 896 | 1.000000 | 2.266186 | 0.131268 | 1.540139 | 268 | |
130 | dense | 896 | 896 | 1.000000 | 2.810649 | 0.098799 | 2.769977 | 136 | |
131 | dense | 896 | 128 | 7.000000 | 4.353987 | 0.154524 | 1.205799 | 74 | |
132 | dense | 4864 | 896 | 5.428571 | 7.376154 | 0.058358 | 11.857703 | 72 | under-trained |
133 | dense | 4864 | 896 | 5.428571 | 9.739678 | 0.040235 | 11.787124 | 58 | under-trained |
134 | dense | 896 | 128 | 7.000000 | 5.521502 | 0.134602 | 1.302506 | 51 | |
135 | dense | 896 | 896 | 1.000000 | 2.890111 | 0.090928 | 2.815623 | 150 | |
136 | dense | 4864 | 896 | 5.428571 | 11.376712 | 0.048066 | 12.441277 | 42 | under-trained |
137 | dense | 4864 | 896 | 5.428571 | 6.637477 | 0.059823 | 11.310353 | 117 | under-trained |
138 | dense | 4864 | 896 | 5.428571 | 7.739570 | 0.126378 | 8.258122 | 145 | under-trained |
139 | dense | 896 | 128 | 7.000000 | 2.625498 | 0.152226 | 1.244735 | 78 | |
140 | dense | 896 | 896 | 1.000000 | 2.025645 | 0.123803 | 1.453554 | 318 | |
141 | dense | 4864 | 896 | 5.428571 | 6.676192 | 0.046666 | 10.951633 | 97 | under-trained |
142 | dense | 4864 | 896 | 5.428571 | 10.459114 | 0.044769 | 11.503720 | 58 | under-trained |
143 | dense | 896 | 128 | 7.000000 | 3.903641 | 0.131119 | 2.336452 | 26 | |
144 | dense | 896 | 896 | 1.000000 | 7.708240 | 0.122041 | 6.165900 | 37 | under-trained |
145 | dense | 896 | 896 | 1.000000 | 2.444367 | 0.094987 | 2.560513 | 155 | |
146 | dense | 896 | 128 | 7.000000 | 3.505638 | 0.145670 | 1.515202 | 82 | |
147 | dense | 4864 | 896 | 5.428571 | 14.457317 | 0.053852 | 15.470645 | 38 | under-trained |
148 | dense | 4864 | 896 | 5.428571 | 8.479177 | 0.125430 | 8.909933 | 124 | under-trained |
149 | dense | 4864 | 896 | 5.428571 | 5.866510 | 0.048950 | 9.826199 | 111 | |
150 | dense | 4864 | 896 | 5.428571 | 6.666003 | 0.083036 | 11.228750 | 129 | under-trained |
151 | dense | 896 | 896 | 1.000000 | 4.532203 | 0.105107 | 4.233377 | 104 | |
152 | dense | 896 | 128 | 7.000000 | 5.733378 | 0.135498 | 3.315494 | 33 | |
153 | dense | 896 | 128 | 7.000000 | 2.060075 | 0.108704 | 1.171895 | 67 | |
154 | dense | 896 | 896 | 1.000000 | 2.462721 | 0.098281 | 2.546883 | 130 | |
155 | dense | 4864 | 896 | 5.428571 | 7.198595 | 0.119965 | 7.850524 | 159 | under-trained |
156 | dense | 4864 | 896 | 5.428571 | 5.284220 | 0.033388 | 8.498887 | 110 | |
157 | dense | 4864 | 896 | 5.428571 | 6.560228 | 0.081838 | 9.991615 | 130 | under-trained |
158 | dense | 896 | 128 | 7.000000 | 2.937743 | 0.089889 | 1.651481 | 29 | |
159 | dense | 896 | 896 | 1.000000 | 5.723969 | 0.036925 | 5.573910 | 32 | |
160 | dense | 896 | 896 | 1.000000 | 1.862237 | 0.111898 | 1.971595 | 311 | over-trained |
161 | dense | 896 | 128 | 7.000000 | 3.939963 | 0.158659 | 2.318404 | 53 | |
162 | dense | 896 | 896 | 1.000000 | 4.105360 | 0.023106 | 5.035519 | 56 | |
163 | dense | 4864 | 896 | 5.428571 | 7.795438 | 0.098367 | 9.859751 | 95 | under-trained |
164 | dense | 4864 | 896 | 5.428571 | 4.666909 | 0.051854 | 9.084435 | 197 | |
165 | dense | 4864 | 896 | 5.428571 | 5.470770 | 0.071539 | 9.262204 | 174 | |
166 | dense | 896 | 128 | 7.000000 | 3.274775 | 0.091676 | 1.613782 | 35 | |
167 | dense | 896 | 896 | 1.000000 | 3.717732 | 0.061811 | 4.619922 | 41 | |
168 | dense | 896 | 128 | 7.000000 | 3.951470 | 0.149750 | 2.878433 | 59 |