Find this model in the Qwen2-0.5B model summary
id | layer_type | N | M | Q | alpha | D | alpha-hat | num_spikes | warning |
---|---|---|---|---|---|---|---|---|---|
1 | dense | 4864 | 896 | 5.428571 | 6.551682 | 0.045625 | 7.189667 | 127 | under-trained |
2 | dense | 4864 | 896 | 5.428571 | 2.477034 | 0.062614 | 3.609712 | 46 | |
3 | dense | 4864 | 896 | 5.428571 | 9.295573 | 0.051292 | 6.764146 | 81 | under-trained |
4 | dense | 896 | 128 | 7.000000 | 1.731639 | 0.070003 | 3.461420 | 39 | over-trained |
5 | dense | 896 | 896 | 1.000000 | 1.846480 | 0.061579 | 1.503464 | 223 | over-trained |
6 | dense | 896 | 896 | 1.000000 | 1.781065 | 0.051017 | 4.414714 | 94 | over-trained |
7 | dense | 896 | 128 | 7.000000 | 3.494410 | 0.127872 | -2.099096 | 46 | |
8 | dense | 896 | 128 | 7.000000 | 5.856395 | 0.128464 | -3.208385 | 45 | |
9 | dense | 896 | 896 | 1.000000 | 1.955930 | 0.105005 | 2.373389 | 309 | over-trained |
10 | dense | 896 | 128 | 7.000000 | 5.101411 | 0.080345 | 3.667084 | 27 | |
11 | dense | 896 | 896 | 1.000000 | 3.448435 | 0.021520 | 2.647063 | 106 | |
12 | dense | 4864 | 896 | 5.428571 | 3.111879 | 0.045377 | 4.530211 | 397 | |
13 | dense | 4864 | 896 | 5.428571 | 5.474139 | 0.017875 | 6.178977 | 132 | |
14 | dense | 4864 | 896 | 5.428571 | 9.343803 | 0.097953 | 5.210929 | 100 | under-trained |
15 | dense | 4864 | 896 | 5.428571 | 4.046666 | 0.013278 | 6.445025 | 187 | |
16 | dense | 4864 | 896 | 5.428571 | 7.492965 | 0.093584 | 8.546784 | 148 | under-trained |
17 | dense | 896 | 128 | 7.000000 | 2.724918 | 0.146408 | 1.477889 | 54 | |
18 | dense | 896 | 896 | 1.000000 | 1.800615 | 0.110988 | 1.033993 | 314 | over-trained |
19 | dense | 896 | 896 | 1.000000 | 2.545709 | 0.066109 | 2.543839 | 109 | |
20 | dense | 896 | 128 | 7.000000 | 6.254124 | 0.120555 | -2.791456 | 41 | under-trained |
21 | dense | 4864 | 896 | 5.428571 | 5.878816 | 0.028601 | 6.436952 | 89 | |
22 | dense | 896 | 896 | 1.000000 | 4.656673 | 0.058927 | 3.591491 | 37 | |
23 | dense | 896 | 128 | 7.000000 | 12.099408 | 0.143900 | -5.671090 | 33 | under-trained |
24 | dense | 896 | 896 | 1.000000 | 4.544381 | 0.086375 | 2.040400 | 80 | |
25 | dense | 4864 | 896 | 5.428571 | 5.070794 | 0.019350 | 5.090147 | 167 | |
26 | dense | 4864 | 896 | 5.428571 | 8.688068 | 0.118539 | 4.370961 | 147 | under-trained |
27 | dense | 4864 | 896 | 5.428571 | 4.816545 | 0.016976 | 8.114628 | 110 | |
28 | dense | 896 | 128 | 7.000000 | 6.529906 | 0.062064 | 3.502218 | 22 | under-trained |
29 | dense | 896 | 896 | 1.000000 | 1.994859 | 0.084188 | 1.574632 | 261 | over-trained |
30 | dense | 896 | 896 | 1.000000 | 3.637590 | 0.070202 | 2.110511 | 39 | |
31 | dense | 896 | 128 | 7.000000 | 6.518658 | 0.064340 | -1.855353 | 25 | under-trained |
32 | dense | 4864 | 896 | 5.428571 | 8.572913 | 0.073164 | 8.321569 | 102 | under-trained |
33 | dense | 4864 | 896 | 5.428571 | 4.665241 | 0.030826 | 7.413914 | 161 | |
34 | dense | 4864 | 896 | 5.428571 | 5.366362 | 0.018827 | 5.941470 | 67 | |
35 | dense | 896 | 128 | 7.000000 | 8.234554 | 0.055263 | 3.313591 | 19 | under-trained |
36 | dense | 896 | 128 | 7.000000 | 11.327167 | 0.147954 | -4.164334 | 36 | under-trained |
37 | dense | 4864 | 896 | 5.428571 | 5.950887 | 0.025476 | 6.357763 | 49 | |
38 | dense | 4864 | 896 | 5.428571 | 4.957804 | 0.019596 | 7.510027 | 106 | |
39 | dense | 4864 | 896 | 5.428571 | 10.869029 | 0.048265 | 7.483677 | 65 | under-trained |
40 | dense | 896 | 128 | 7.000000 | 3.625612 | 0.157168 | 1.665904 | 48 | |
41 | dense | 896 | 896 | 1.000000 | 4.117461 | 0.063896 | 1.423450 | 84 | |
42 | dense | 896 | 896 | 1.000000 | 2.032596 | 0.109796 | 1.487562 | 257 | |
43 | dense | 896 | 896 | 1.000000 | 2.892166 | 0.070259 | 2.282066 | 92 | |
44 | dense | 4864 | 896 | 5.428571 | 4.337170 | 0.021905 | 6.801796 | 99 | |
45 | dense | 4864 | 896 | 5.428571 | 10.534726 | 0.089720 | 6.405026 | 69 | under-trained |
46 | dense | 896 | 128 | 7.000000 | 4.593972 | 0.074904 | 2.642709 | 29 | |
47 | dense | 896 | 896 | 1.000000 | 6.301978 | 0.060054 | 2.357586 | 32 | under-trained |
48 | dense | 896 | 128 | 7.000000 | 14.252745 | 0.141666 | -5.257399 | 29 | under-trained |
49 | dense | 4864 | 896 | 5.428571 | 4.828216 | 0.074845 | 5.665256 | 179 | |
50 | dense | 4864 | 896 | 5.428571 | 5.377725 | 0.036074 | 5.508954 | 39 | |
51 | dense | 4864 | 896 | 5.428571 | 3.336236 | 0.023103 | 5.697603 | 207 | |
52 | dense | 4864 | 896 | 5.428571 | 11.451881 | 0.058433 | 6.575077 | 49 | under-trained |
53 | dense | 896 | 128 | 7.000000 | 2.660692 | 0.159475 | 0.944688 | 59 | |
54 | dense | 896 | 896 | 1.000000 | 7.363508 | 0.107553 | 2.279651 | 43 | under-trained |
55 | dense | 896 | 896 | 1.000000 | 2.225137 | 0.101174 | 1.694558 | 198 | |
56 | dense | 896 | 128 | 7.000000 | 9.457107 | 0.135761 | -3.187421 | 40 | under-trained |
57 | dense | 896 | 128 | 7.000000 | 8.901237 | 0.094971 | -2.952971 | 23 | under-trained |
58 | dense | 896 | 896 | 1.000000 | 2.460180 | 0.087275 | 2.093203 | 166 | |
59 | dense | 896 | 896 | 1.000000 | 4.031756 | 0.093680 | 1.891023 | 73 | |
60 | dense | 4864 | 896 | 5.428571 | 4.738663 | 0.031748 | 5.805706 | 113 | |
61 | dense | 4864 | 896 | 5.428571 | 8.246358 | 0.050691 | 6.036240 | 55 | under-trained |
62 | dense | 4864 | 896 | 5.428571 | 4.090682 | 0.030264 | 6.382751 | 82 | |
63 | dense | 896 | 128 | 7.000000 | 2.500412 | 0.161264 | 1.514754 | 61 | |
64 | dense | 4864 | 896 | 5.428571 | 3.083200 | 0.052027 | 3.767428 | 301 | |
65 | dense | 4864 | 896 | 5.428571 | 4.238064 | 0.029938 | 6.028983 | 104 | |
66 | dense | 4864 | 896 | 5.428571 | 7.122652 | 0.041506 | 5.055999 | 80 | under-trained |
67 | dense | 896 | 128 | 7.000000 | 7.618087 | 0.114418 | 2.178255 | 17 | under-trained |
68 | dense | 896 | 896 | 1.000000 | 2.947953 | 0.089517 | 1.204532 | 76 | |
69 | dense | 896 | 896 | 1.000000 | 2.106217 | 0.109936 | 1.737665 | 132 | |
70 | dense | 896 | 128 | 7.000000 | 5.978483 | 0.127069 | -1.488796 | 34 | |
71 | dense | 4864 | 896 | 5.428571 | 5.786648 | 0.055686 | 4.422509 | 110 | |
72 | dense | 896 | 128 | 7.000000 | 6.093273 | 0.073488 | -1.097738 | 37 | under-trained |
73 | dense | 4864 | 896 | 5.428571 | 4.870504 | 0.023176 | 5.856598 | 83 | |
74 | dense | 4864 | 896 | 5.428571 | 3.271460 | 0.046813 | 5.130322 | 196 | |
75 | dense | 896 | 128 | 7.000000 | 2.316239 | 0.136970 | 1.142604 | 59 | |
76 | dense | 896 | 896 | 1.000000 | 2.678878 | 0.112620 | 1.420278 | 171 | |
77 | dense | 896 | 896 | 1.000000 | 2.348399 | 0.082574 | 2.432170 | 151 | |
78 | dense | 4864 | 896 | 5.428571 | 4.863859 | 0.022917 | 5.474661 | 84 | |
79 | dense | 896 | 128 | 7.000000 | 7.114348 | 0.147785 | -1.093198 | 47 | under-trained |
80 | dense | 896 | 896 | 1.000000 | 1.787603 | 0.103613 | 1.408808 | 285 | over-trained |
81 | dense | 896 | 896 | 1.000000 | 1.669254 | 0.101583 | 0.790135 | 357 | over-trained |
82 | dense | 896 | 128 | 7.000000 | 2.544097 | 0.139608 | 0.894625 | 61 | |
83 | dense | 4864 | 896 | 5.428571 | 4.129937 | 0.036842 | 6.531194 | 47 | |
84 | dense | 4864 | 896 | 5.428571 | 6.316289 | 0.044035 | 8.232170 | 93 | under-trained |
85 | dense | 4864 | 896 | 5.428571 | 6.974273 | 0.077050 | 4.768455 | 112 | under-trained |
86 | dense | 4864 | 896 | 5.428571 | 4.573292 | 0.039868 | 5.181602 | 150 | |
87 | dense | 4864 | 896 | 5.428571 | 4.474291 | 0.042220 | 7.072582 | 65 | |
88 | dense | 896 | 896 | 1.000000 | 2.888406 | 0.089386 | 2.393050 | 66 | |
89 | dense | 896 | 128 | 7.000000 | 6.920799 | 0.089502 | -1.609497 | 27 | under-trained |
90 | dense | 896 | 128 | 7.000000 | 3.747185 | 0.103816 | 1.193957 | 39 | |
91 | dense | 896 | 896 | 1.000000 | 3.793565 | 0.085599 | 1.730381 | 69 | |
92 | dense | 4864 | 896 | 5.428571 | 4.992632 | 0.034751 | 7.348378 | 48 | |
93 | dense | 896 | 128 | 7.000000 | 3.994687 | 0.135144 | 1.610569 | 40 | |
94 | dense | 896 | 896 | 1.000000 | 2.509753 | 0.114709 | 0.923426 | 182 | |
95 | dense | 896 | 896 | 1.000000 | 2.107190 | 0.089914 | 1.687826 | 231 | |
96 | dense | 896 | 128 | 7.000000 | 9.882666 | 0.132505 | -2.253880 | 29 | under-trained |
97 | dense | 4864 | 896 | 5.428571 | 5.242043 | 0.029877 | 6.142883 | 85 | |
98 | dense | 4864 | 896 | 5.428571 | 7.835424 | 0.055665 | 6.001147 | 55 | under-trained |
99 | dense | 4864 | 896 | 5.428571 | 4.931079 | 0.021403 | 5.305393 | 100 | |
100 | dense | 4864 | 896 | 5.428571 | 4.849560 | 0.020830 | 6.907342 | 70 | |
101 | dense | 4864 | 896 | 5.428571 | 7.381815 | 0.048210 | 5.619805 | 78 | under-trained |
102 | dense | 896 | 128 | 7.000000 | 8.987024 | 0.083022 | 3.535601 | 10 | under-trained |
103 | dense | 896 | 896 | 1.000000 | 3.542919 | 0.075543 | 1.860845 | 46 | |
104 | dense | 896 | 896 | 1.000000 | 2.249265 | 0.080292 | 1.716786 | 150 | |
105 | dense | 896 | 128 | 7.000000 | 7.623454 | 0.136574 | -1.917623 | 40 | under-trained |
106 | dense | 4864 | 896 | 5.428571 | 5.544092 | 0.023014 | 5.175012 | 95 | |
107 | dense | 4864 | 896 | 5.428571 | 4.193220 | 0.084269 | 6.272879 | 215 | |
108 | dense | 4864 | 896 | 5.428571 | 8.875426 | 0.050283 | 7.424631 | 62 | under-trained |
109 | dense | 896 | 128 | 7.000000 | 3.305651 | 0.148436 | 1.200509 | 47 | |
110 | dense | 896 | 896 | 1.000000 | 3.011099 | 0.051067 | 1.950813 | 78 | |
111 | dense | 896 | 896 | 1.000000 | 2.219114 | 0.081814 | 1.742492 | 198 | |
112 | dense | 896 | 128 | 7.000000 | 10.958066 | 0.141882 | -2.519328 | 27 | under-trained |
113 | dense | 896 | 896 | 1.000000 | 2.428779 | 0.085681 | 1.908523 | 117 | |
114 | dense | 896 | 128 | 7.000000 | 4.958115 | 0.106717 | -0.537622 | 72 | |
115 | dense | 4864 | 896 | 5.428571 | 7.254198 | 0.062595 | 7.724561 | 108 | under-trained |
116 | dense | 4864 | 896 | 5.428571 | 5.351503 | 0.032900 | 8.145359 | 105 | |
117 | dense | 4864 | 896 | 5.428571 | 8.048571 | 0.038665 | 7.606499 | 80 | under-trained |
118 | dense | 896 | 128 | 7.000000 | 2.987790 | 0.106552 | 0.846361 | 44 | |
119 | dense | 896 | 896 | 1.000000 | 2.772384 | 0.126121 | 1.297394 | 122 | |
120 | dense | 896 | 128 | 7.000000 | 6.910116 | 0.134829 | -1.345953 | 57 | under-trained |
121 | dense | 896 | 896 | 1.000000 | 2.268949 | 0.061887 | 2.049461 | 161 | |
122 | dense | 896 | 896 | 1.000000 | 1.599532 | 0.116860 | 1.317562 | 441 | over-trained |
123 | dense | 896 | 128 | 7.000000 | 1.800353 | 0.147981 | 0.929600 | 80 | over-trained |
124 | dense | 4864 | 896 | 5.428571 | 8.338617 | 0.043126 | 6.848175 | 68 | under-trained |
125 | dense | 4864 | 896 | 5.428571 | 4.922129 | 0.025230 | 7.965501 | 81 | |
126 | dense | 4864 | 896 | 5.428571 | 8.320988 | 0.074419 | 7.977300 | 99 | under-trained |
127 | dense | 4864 | 896 | 5.428571 | 9.313145 | 0.086565 | 7.814912 | 79 | under-trained |
128 | dense | 896 | 128 | 7.000000 | 3.534275 | 0.108965 | 1.218404 | 32 | |
129 | dense | 896 | 896 | 1.000000 | 2.212815 | 0.106859 | 1.082324 | 226 | |
130 | dense | 896 | 896 | 1.000000 | 2.192197 | 0.061232 | 1.769493 | 202 | |
131 | dense | 896 | 128 | 7.000000 | 9.294324 | 0.097001 | -0.091092 | 26 | under-trained |
132 | dense | 4864 | 896 | 5.428571 | 5.448246 | 0.017289 | 7.999442 | 103 | |
133 | dense | 4864 | 896 | 5.428571 | 9.555281 | 0.031427 | 8.507078 | 41 | under-trained |
134 | dense | 896 | 128 | 7.000000 | 5.199333 | 0.116627 | -0.526316 | 57 | |
135 | dense | 896 | 896 | 1.000000 | 3.069689 | 0.029849 | 2.646768 | 73 | |
136 | dense | 4864 | 896 | 5.428571 | 9.615848 | 0.038617 | 7.386968 | 54 | under-trained |
137 | dense | 4864 | 896 | 5.428571 | 6.205421 | 0.036035 | 9.780683 | 89 | under-trained |
138 | dense | 4864 | 896 | 5.428571 | 11.059703 | 0.048901 | 9.822908 | 43 | under-trained |
139 | dense | 896 | 128 | 7.000000 | 4.548526 | 0.067883 | 1.684384 | 22 | |
140 | dense | 896 | 896 | 1.000000 | 1.933760 | 0.106442 | 0.838005 | 330 | over-trained |
141 | dense | 4864 | 896 | 5.428571 | 5.823895 | 0.031057 | 8.536806 | 101 | |
142 | dense | 4864 | 896 | 5.428571 | 8.490363 | 0.059503 | 7.628719 | 73 | under-trained |
143 | dense | 896 | 128 | 7.000000 | 2.609740 | 0.113059 | 1.145504 | 41 | |
144 | dense | 896 | 896 | 1.000000 | 2.923837 | 0.100007 | 1.599325 | 100 | |
145 | dense | 896 | 896 | 1.000000 | 2.948913 | 0.058473 | 2.671888 | 55 | |
146 | dense | 896 | 128 | 7.000000 | 4.303299 | 0.058979 | 1.001239 | 31 | |
147 | dense | 4864 | 896 | 5.428571 | 9.140988 | 0.104958 | 6.934989 | 93 | under-trained |
148 | dense | 4864 | 896 | 5.428571 | 9.179011 | 0.072718 | 7.727907 | 65 | under-trained |
149 | dense | 4864 | 896 | 5.428571 | 5.343617 | 0.028830 | 7.804938 | 103 | |
150 | dense | 4864 | 896 | 5.428571 | 8.176947 | 0.050093 | 8.309394 | 66 | under-trained |
151 | dense | 896 | 896 | 1.000000 | 4.215312 | 0.094687 | 2.372875 | 123 | |
152 | dense | 896 | 128 | 7.000000 | 4.675246 | 0.131119 | 1.580299 | 44 | |
153 | dense | 896 | 128 | 7.000000 | 1.629343 | 0.112875 | 0.707981 | 84 | over-trained |
154 | dense | 896 | 896 | 1.000000 | 1.820711 | 0.081895 | 1.694022 | 242 | over-trained |
155 | dense | 4864 | 896 | 5.428571 | 7.687709 | 0.030461 | 7.155423 | 33 | under-trained |
156 | dense | 4864 | 896 | 5.428571 | 4.361557 | 0.028092 | 6.456797 | 142 | |
157 | dense | 4864 | 896 | 5.428571 | 5.980487 | 0.055017 | 8.884806 | 111 | |
158 | dense | 896 | 128 | 7.000000 | 2.319724 | 0.075231 | 0.819343 | 54 | |
159 | dense | 896 | 896 | 1.000000 | 4.715978 | 0.035083 | 3.642226 | 44 | |
160 | dense | 896 | 896 | 1.000000 | 1.845976 | 0.106947 | 1.472087 | 273 | over-trained |
161 | dense | 896 | 128 | 7.000000 | 6.370638 | 0.101938 | 1.977601 | 32 | under-trained |
162 | dense | 896 | 896 | 1.000000 | 3.846706 | 0.046289 | 3.389676 | 62 | |
163 | dense | 4864 | 896 | 5.428571 | 5.519099 | 0.059040 | 5.064966 | 112 | |
164 | dense | 4864 | 896 | 5.428571 | 4.416165 | 0.034220 | 7.842929 | 177 | |
165 | dense | 4864 | 896 | 5.428571 | 5.241711 | 0.063260 | 7.754452 | 167 | |
166 | dense | 896 | 128 | 7.000000 | 4.073497 | 0.083413 | 1.237533 | 23 | |
167 | dense | 896 | 896 | 1.000000 | 2.905258 | 0.052974 | 3.525628 | 54 | |
168 | dense | 896 | 128 | 7.000000 | 5.688361 | 0.071589 | 3.505463 | 37 |