Find this model in the Qwen2.5-small model summary
id | layer_type | N | M | Q | alpha | D | alpha-hat | num_spikes | warning |
---|---|---|---|---|---|---|---|---|---|
1 | dense | 4864 | 896 | 5.428571 | 6.796745 | 0.078478 | -6.422834 | 146 | under-trained |
2 | dense | 4864 | 896 | 5.428571 | 8.316737 | 0.034049 | -10.357131 | 70 | under-trained |
3 | dense | 4864 | 896 | 5.428571 | 8.853733 | 0.046482 | -13.624902 | 77 | under-trained |
4 | dense | 896 | 128 | 7.000000 | 1.517866 | 0.095835 | -0.678882 | 80 | over-trained |
5 | dense | 896 | 896 | 1.000000 | 2.278895 | 0.054089 | -3.214078 | 178 | |
6 | dense | 896 | 896 | 1.000000 | 2.026583 | 0.030949 | 1.170068 | 46 | |
7 | dense | 896 | 128 | 7.000000 | 2.390231 | 0.050512 | -4.874272 | 58 | |
8 | dense | 4864 | 896 | 5.428571 | 8.436567 | 0.093251 | -11.622591 | 119 | under-trained |
9 | dense | 4864 | 896 | 5.428571 | 13.968728 | 0.112844 | -21.803785 | 63 | under-trained |
10 | dense | 4864 | 896 | 5.428571 | 12.317036 | 0.124970 | -21.138026 | 87 | under-trained |
11 | dense | 896 | 128 | 7.000000 | 3.758939 | 0.057685 | -7.129252 | 27 | |
12 | dense | 896 | 896 | 1.000000 | 5.499931 | 0.045913 | -9.131922 | 55 | |
13 | dense | 896 | 896 | 1.000000 | 3.605790 | 0.035022 | -2.358173 | 87 | |
14 | dense | 896 | 128 | 7.000000 | 4.867235 | 0.049082 | -13.852550 | 26 | |
15 | dense | 896 | 896 | 1.000000 | 7.463426 | 0.045636 | -15.031191 | 34 | under-trained |
16 | dense | 4864 | 896 | 5.428571 | 9.174856 | 0.125862 | -16.235339 | 135 | under-trained |
17 | dense | 896 | 896 | 1.000000 | 5.324761 | 0.124291 | -7.619276 | 91 | |
18 | dense | 896 | 128 | 7.000000 | 5.928357 | 0.057553 | -12.080519 | 14 | |
19 | dense | 4864 | 896 | 5.428571 | 17.990498 | 0.120369 | -27.892761 | 48 | under-trained |
20 | dense | 4864 | 896 | 5.428571 | 12.730782 | 0.127014 | -19.560987 | 81 | under-trained |
21 | dense | 896 | 128 | 7.000000 | 4.141989 | 0.146188 | -10.824969 | 64 | |
22 | dense | 896 | 896 | 1.000000 | 4.628873 | 0.032714 | -6.807687 | 51 | |
23 | dense | 4864 | 896 | 5.428571 | 14.231436 | 0.126026 | -24.278822 | 72 | under-trained |
24 | dense | 896 | 896 | 1.000000 | 5.795670 | 0.114330 | -12.168166 | 84 | |
25 | dense | 896 | 128 | 7.000000 | 6.084989 | 0.060952 | -13.396551 | 16 | under-trained |
26 | dense | 896 | 128 | 7.000000 | 4.143126 | 0.115240 | -11.713774 | 57 | |
27 | dense | 4864 | 896 | 5.428571 | 13.525137 | 0.122785 | -21.047561 | 71 | under-trained |
28 | dense | 4864 | 896 | 5.428571 | 13.876587 | 0.125140 | -23.665439 | 71 | under-trained |
29 | dense | 4864 | 896 | 5.428571 | 15.160811 | 0.119741 | -22.237111 | 55 | under-trained |
30 | dense | 4864 | 896 | 5.428571 | 13.952509 | 0.112783 | -22.970321 | 67 | under-trained |
31 | dense | 896 | 128 | 7.000000 | 5.256219 | 0.109425 | -11.769231 | 28 | |
32 | dense | 4864 | 896 | 5.428571 | 8.569976 | 0.128322 | -13.863878 | 139 | under-trained |
33 | dense | 896 | 896 | 1.000000 | 4.099738 | 0.122064 | -6.500038 | 125 | |
34 | dense | 896 | 896 | 1.000000 | 7.048787 | 0.114590 | -14.794884 | 53 | under-trained |
35 | dense | 896 | 128 | 7.000000 | 6.925805 | 0.138149 | -19.816902 | 33 | under-trained |
36 | dense | 4864 | 896 | 5.428571 | 11.480030 | 0.128519 | -19.375238 | 90 | under-trained |
37 | dense | 4864 | 896 | 5.428571 | 8.178580 | 0.119545 | -13.569624 | 141 | under-trained |
38 | dense | 4864 | 896 | 5.428571 | 15.906301 | 0.095362 | -22.036520 | 44 | under-trained |
39 | dense | 896 | 896 | 1.000000 | 7.036708 | 0.116930 | -14.170529 | 54 | under-trained |
40 | dense | 896 | 128 | 7.000000 | 4.027207 | 0.131893 | -9.336471 | 52 | |
41 | dense | 896 | 128 | 7.000000 | 2.982682 | 0.156023 | -8.427896 | 100 | |
42 | dense | 896 | 896 | 1.000000 | 5.328060 | 0.121002 | -8.479472 | 85 | |
43 | dense | 4864 | 896 | 5.428571 | 15.250774 | 0.110842 | -22.246926 | 57 | under-trained |
44 | dense | 4864 | 896 | 5.428571 | 22.284393 | 0.112657 | -37.051797 | 37 | under-trained |
45 | dense | 896 | 128 | 7.000000 | 6.040139 | 0.132554 | -13.776601 | 32 | under-trained |
46 | dense | 4864 | 896 | 5.428571 | 13.553654 | 0.118340 | -21.856142 | 70 | under-trained |
47 | dense | 896 | 896 | 1.000000 | 6.987323 | 0.079717 | -11.659409 | 32 | under-trained |
48 | dense | 896 | 896 | 1.000000 | 5.933998 | 0.099147 | -12.428595 | 74 | |
49 | dense | 896 | 128 | 7.000000 | 5.883831 | 0.141319 | -15.870875 | 38 | |
50 | dense | 896 | 896 | 1.000000 | 10.497154 | 0.107094 | -23.624780 | 38 | under-trained |
51 | dense | 4864 | 896 | 5.428571 | 17.316267 | 0.109272 | -27.793870 | 40 | under-trained |
52 | dense | 4864 | 896 | 5.428571 | 8.746994 | 0.120539 | -12.720863 | 123 | under-trained |
53 | dense | 4864 | 896 | 5.428571 | 16.383636 | 0.063451 | -26.046924 | 38 | under-trained |
54 | dense | 896 | 128 | 7.000000 | 4.619440 | 0.125661 | -10.603045 | 43 | |
55 | dense | 896 | 128 | 7.000000 | 3.736973 | 0.142307 | -10.567390 | 72 | |
56 | dense | 896 | 896 | 1.000000 | 5.758271 | 0.120266 | -9.858747 | 73 | |
57 | dense | 896 | 896 | 1.000000 | 4.263914 | 0.049953 | -4.705403 | 75 | |
58 | dense | 896 | 896 | 1.000000 | 6.503825 | 0.126500 | -13.767809 | 74 | under-trained |
59 | dense | 896 | 128 | 7.000000 | 4.675507 | 0.070593 | -9.514311 | 21 | |
60 | dense | 4864 | 896 | 5.428571 | 7.205014 | 0.090412 | -10.869181 | 110 | under-trained |
61 | dense | 4864 | 896 | 5.428571 | 4.893721 | 0.103890 | -6.595804 | 220 | |
62 | dense | 4864 | 896 | 5.428571 | 13.744268 | 0.120381 | -21.774416 | 70 | under-trained |
63 | dense | 896 | 128 | 7.000000 | 4.070689 | 0.137954 | -11.993508 | 62 | |
64 | dense | 4864 | 896 | 5.428571 | 16.905307 | 0.071374 | -25.092699 | 32 | under-trained |
65 | dense | 896 | 128 | 7.000000 | 4.649035 | 0.138426 | -12.762854 | 52 | |
66 | dense | 896 | 896 | 1.000000 | 3.748636 | 0.030642 | -4.878690 | 68 | |
67 | dense | 896 | 896 | 1.000000 | 3.112469 | 0.064355 | -5.892548 | 142 | |
68 | dense | 896 | 128 | 7.000000 | 3.328691 | 0.140789 | -7.269070 | 58 | |
69 | dense | 4864 | 896 | 5.428571 | 4.770617 | 0.095207 | -7.041350 | 216 | |
70 | dense | 4864 | 896 | 5.428571 | 4.159408 | 0.095707 | -5.513785 | 273 | |
71 | dense | 4864 | 896 | 5.428571 | 13.334280 | 0.123483 | -19.797393 | 67 | under-trained |
72 | dense | 4864 | 896 | 5.428571 | 4.014544 | 0.089871 | -5.164000 | 278 | |
73 | dense | 4864 | 896 | 5.428571 | 4.405522 | 0.086526 | -6.314324 | 242 | |
74 | dense | 896 | 128 | 7.000000 | 4.995542 | 0.054818 | -10.647225 | 26 | |
75 | dense | 896 | 896 | 1.000000 | 5.043939 | 0.078702 | -11.030999 | 66 | |
76 | dense | 896 | 896 | 1.000000 | 4.774552 | 0.116577 | -7.949736 | 80 | |
77 | dense | 896 | 128 | 7.000000 | 3.554780 | 0.149162 | -9.869027 | 74 | |
78 | dense | 4864 | 896 | 5.428571 | 10.527661 | 0.060360 | -15.009948 | 59 | under-trained |
79 | dense | 4864 | 896 | 5.428571 | 4.252179 | 0.081813 | -5.664890 | 239 | |
80 | dense | 4864 | 896 | 5.428571 | 4.397980 | 0.085622 | -6.302206 | 237 | |
81 | dense | 896 | 128 | 7.000000 | 3.494081 | 0.124822 | -6.864912 | 45 | |
82 | dense | 896 | 896 | 1.000000 | 1.991110 | 0.044552 | -2.564996 | 284 | over-trained |
83 | dense | 896 | 896 | 1.000000 | 2.776342 | 0.045961 | -3.005566 | 111 | |
84 | dense | 896 | 128 | 7.000000 | 6.337409 | 0.108001 | -16.861401 | 26 | under-trained |
85 | dense | 4864 | 896 | 5.428571 | 15.940175 | 0.116922 | -24.056329 | 51 | under-trained |
86 | dense | 4864 | 896 | 5.428571 | 4.704182 | 0.092916 | -6.128113 | 193 | |
87 | dense | 4864 | 896 | 5.428571 | 4.732166 | 0.090507 | -6.723654 | 219 | |
88 | dense | 896 | 128 | 7.000000 | 4.703176 | 0.122326 | -9.779464 | 32 | |
89 | dense | 896 | 896 | 1.000000 | 4.681276 | 0.108688 | -9.821942 | 99 | |
90 | dense | 896 | 896 | 1.000000 | 5.348728 | 0.086452 | -8.766297 | 56 | |
91 | dense | 896 | 128 | 7.000000 | 4.763588 | 0.124007 | -13.285397 | 43 | |
92 | dense | 4864 | 896 | 5.428571 | 9.648480 | 0.121589 | -14.869907 | 113 | under-trained |
93 | dense | 4864 | 896 | 5.428571 | 4.959857 | 0.091704 | -6.735108 | 190 | |
94 | dense | 4864 | 896 | 5.428571 | 6.116606 | 0.091354 | -8.828412 | 130 | under-trained |
95 | dense | 896 | 128 | 7.000000 | 4.442438 | 0.052099 | -8.690130 | 18 | |
96 | dense | 896 | 896 | 1.000000 | 7.228789 | 0.057914 | -15.389104 | 41 | under-trained |
97 | dense | 896 | 896 | 1.000000 | 4.751183 | 0.041896 | -6.714040 | 41 | |
98 | dense | 896 | 128 | 7.000000 | 3.976080 | 0.147326 | -11.149317 | 57 | |
99 | dense | 4864 | 896 | 5.428571 | 16.789417 | 0.126958 | -26.266165 | 56 | under-trained |
100 | dense | 4864 | 896 | 5.428571 | 10.966986 | 0.117615 | -14.530600 | 67 | under-trained |
101 | dense | 4864 | 896 | 5.428571 | 7.890956 | 0.122957 | -11.575099 | 126 | under-trained |
102 | dense | 896 | 128 | 7.000000 | 4.089455 | 0.057016 | -8.604156 | 23 | |
103 | dense | 896 | 896 | 1.000000 | 4.063564 | 0.126469 | -9.742553 | 149 | |
104 | dense | 896 | 896 | 1.000000 | 3.651090 | 0.114489 | -5.280280 | 136 | |
105 | dense | 896 | 128 | 7.000000 | 3.819968 | 0.133043 | -11.057980 | 73 | |
106 | dense | 896 | 128 | 7.000000 | 3.512591 | 0.143528 | -9.307302 | 65 | |
107 | dense | 896 | 896 | 1.000000 | 4.750183 | 0.042829 | -8.787033 | 65 | |
108 | dense | 896 | 896 | 1.000000 | 5.842904 | 0.059945 | -9.502087 | 31 | |
109 | dense | 4864 | 896 | 5.428571 | 8.332588 | 0.112812 | -12.332355 | 105 | under-trained |
110 | dense | 4864 | 896 | 5.428571 | 7.632361 | 0.110774 | -10.455610 | 109 | under-trained |
111 | dense | 4864 | 896 | 5.428571 | 15.229441 | 0.113365 | -23.988148 | 53 | under-trained |
112 | dense | 896 | 128 | 7.000000 | 4.777900 | 0.119076 | -10.349861 | 34 | |
113 | dense | 896 | 128 | 7.000000 | 3.830776 | 0.055042 | -7.429132 | 22 | |
114 | dense | 896 | 128 | 7.000000 | 7.508182 | 0.140259 | -21.018639 | 26 | under-trained |
115 | dense | 896 | 896 | 1.000000 | 3.252692 | 0.051419 | -4.489591 | 80 | |
116 | dense | 896 | 896 | 1.000000 | 5.424869 | 0.111328 | -10.570277 | 71 | |
117 | dense | 4864 | 896 | 5.428571 | 15.748722 | 0.099606 | -21.737209 | 41 | under-trained |
118 | dense | 4864 | 896 | 5.428571 | 8.175791 | 0.116508 | -12.184532 | 122 | under-trained |
119 | dense | 4864 | 896 | 5.428571 | 6.507064 | 0.127308 | -10.518767 | 194 | under-trained |
120 | dense | 4864 | 896 | 5.428571 | 7.261394 | 0.125168 | -11.479688 | 170 | under-trained |
121 | dense | 896 | 896 | 1.000000 | 4.676467 | 0.126947 | -6.013720 | 104 | |
122 | dense | 4864 | 896 | 5.428571 | 9.667512 | 0.027838 | -13.412925 | 53 | under-trained |
123 | dense | 896 | 128 | 7.000000 | 5.389544 | 0.065868 | -11.496761 | 24 | |
124 | dense | 896 | 896 | 1.000000 | 9.003658 | 0.118130 | -18.378351 | 43 | under-trained |
125 | dense | 896 | 128 | 7.000000 | 5.030359 | 0.139349 | -13.959944 | 51 | |
126 | dense | 4864 | 896 | 5.428571 | 8.266755 | 0.027437 | -10.580772 | 68 | under-trained |
127 | dense | 4864 | 896 | 5.428571 | 9.652396 | 0.123365 | -15.304679 | 114 | under-trained |
128 | dense | 4864 | 896 | 5.428571 | 9.081788 | 0.040326 | -12.560520 | 53 | under-trained |
129 | dense | 896 | 128 | 7.000000 | 6.319227 | 0.148281 | -16.938471 | 37 | under-trained |
130 | dense | 896 | 128 | 7.000000 | 6.181004 | 0.138639 | -14.411620 | 30 | under-trained |
131 | dense | 896 | 896 | 1.000000 | 3.974285 | 0.127526 | -8.172765 | 158 | |
132 | dense | 4864 | 896 | 5.428571 | 7.613635 | 0.029647 | -9.939777 | 72 | under-trained |
133 | dense | 896 | 896 | 1.000000 | 5.934841 | 0.126967 | -9.878657 | 77 | |
134 | dense | 4864 | 896 | 5.428571 | 13.653499 | 0.050987 | -20.076928 | 32 | under-trained |
135 | dense | 4864 | 896 | 5.428571 | 12.202735 | 0.119717 | -18.565598 | 74 | under-trained |
136 | dense | 4864 | 896 | 5.428571 | 12.079121 | 0.127417 | -20.005503 | 81 | under-trained |
137 | dense | 896 | 128 | 7.000000 | 3.370591 | 0.140444 | -7.586391 | 62 | |
138 | dense | 896 | 896 | 1.000000 | 9.340216 | 0.113969 | -18.792646 | 44 | under-trained |
139 | dense | 896 | 896 | 1.000000 | 8.629328 | 0.114849 | -14.200137 | 37 | under-trained |
140 | dense | 896 | 128 | 7.000000 | 5.096226 | 0.144998 | -13.612413 | 52 | |
141 | dense | 4864 | 896 | 5.428571 | 10.672546 | 0.029227 | -14.977332 | 42 | under-trained |
142 | dense | 4864 | 896 | 5.428571 | 8.848785 | 0.115656 | -12.704385 | 116 | under-trained |
143 | dense | 896 | 128 | 7.000000 | 4.189168 | 0.133208 | -8.944850 | 38 | |
144 | dense | 4864 | 896 | 5.428571 | 9.794930 | 0.122175 | -16.460265 | 105 | under-trained |
145 | dense | 896 | 896 | 1.000000 | 5.339765 | 0.055483 | -7.875163 | 39 | |
146 | dense | 896 | 128 | 7.000000 | 4.036845 | 0.129426 | -10.240644 | 50 | |
147 | dense | 896 | 896 | 1.000000 | 6.176339 | 0.121968 | -12.628653 | 83 | under-trained |
148 | dense | 4864 | 896 | 5.428571 | 7.072069 | 0.029277 | -9.079815 | 83 | under-trained |
149 | dense | 4864 | 896 | 5.428571 | 7.091165 | 0.123241 | -11.302136 | 171 | under-trained |
150 | dense | 896 | 896 | 1.000000 | 7.177379 | 0.080770 | -12.330570 | 34 | under-trained |
151 | dense | 896 | 128 | 7.000000 | 5.426312 | 0.087072 | -11.208065 | 17 | |
152 | dense | 4864 | 896 | 5.428571 | 7.811014 | 0.034848 | -10.296763 | 80 | under-trained |
153 | dense | 896 | 128 | 7.000000 | 4.881081 | 0.143873 | -11.515850 | 44 | |
154 | dense | 896 | 896 | 1.000000 | 3.667389 | 0.105508 | -5.429983 | 110 | |
155 | dense | 896 | 128 | 7.000000 | 3.710427 | 0.077902 | -7.313113 | 32 | |
156 | dense | 4864 | 896 | 5.428571 | 12.694372 | 0.048181 | -19.203661 | 27 | under-trained |
157 | dense | 896 | 896 | 1.000000 | 5.281452 | 0.082414 | -7.366407 | 32 | |
158 | dense | 896 | 896 | 1.000000 | 4.452298 | 0.127083 | -8.254509 | 127 | |
159 | dense | 4864 | 896 | 5.428571 | 7.957367 | 0.042233 | -10.477434 | 82 | under-trained |
160 | dense | 4864 | 896 | 5.428571 | 9.537416 | 0.049759 | -12.593315 | 63 | under-trained |
161 | dense | 896 | 128 | 7.000000 | 5.625792 | 0.138816 | -12.796147 | 40 | |
162 | dense | 896 | 128 | 7.000000 | 3.974621 | 0.119519 | -8.211341 | 43 | |
163 | dense | 4864 | 896 | 5.428571 | 9.865053 | 0.074795 | -12.171816 | 66 | under-trained |
164 | dense | 4864 | 896 | 5.428571 | 5.276874 | 0.073253 | -6.014796 | 183 | |
165 | dense | 896 | 896 | 1.000000 | 4.206936 | 0.104870 | -5.682458 | 67 | |
166 | dense | 896 | 896 | 1.000000 | 5.359288 | 0.033217 | -9.707489 | 58 | |
167 | dense | 896 | 128 | 7.000000 | 5.824821 | 0.139401 | -13.003427 | 28 | |
168 | dense | 4864 | 896 | 5.428571 | 9.848768 | 0.072226 | -12.696973 | 65 | under-trained |