Find this model in the Llama3.2 model summary
id | layer_type | N | M | Q | alpha | D | alpha-hat | num_spikes | warning |
---|---|---|---|---|---|---|---|---|---|
1 | dense | 8192 | 3072 | 2.666667 | 8.264839 | 0.030377 | -4.589123 | 123 | under-trained |
2 | dense | 8192 | 3072 | 2.666667 | 5.427392 | 0.019027 | -2.402859 | 95 | |
3 | dense | 8192 | 3072 | 2.666667 | 6.029718 | 0.027023 | -2.954579 | 101 | under-trained |
4 | dense | 3072 | 1024 | 3.000000 | 1.910244 | 0.022000 | 1.444429 | 148 | over-trained |
5 | dense | 3072 | 3072 | 1.000000 | 4.333730 | 0.036602 | -2.600130 | 82 | |
6 | dense | 3072 | 3072 | 1.000000 | 1.927324 | 0.020446 | 1.934747 | 243 | over-trained |
7 | dense | 3072 | 1024 | 3.000000 | 3.807295 | 0.029937 | -3.314137 | 74 | |
8 | dense | 3072 | 3072 | 1.000000 | 2.648597 | 0.034925 | 0.342856 | 160 | |
9 | dense | 3072 | 3072 | 1.000000 | 4.729355 | 0.021629 | -2.796744 | 90 | |
10 | dense | 3072 | 1024 | 3.000000 | 2.808248 | 0.043723 | -0.926874 | 74 | |
11 | dense | 3072 | 1024 | 3.000000 | 5.690921 | 0.036735 | -5.487120 | 48 | |
12 | dense | 8192 | 3072 | 2.666667 | 6.731403 | 0.019521 | -2.175872 | 84 | under-trained |
13 | dense | 8192 | 3072 | 2.666667 | 8.547087 | 0.020030 | -4.944839 | 104 | under-trained |
14 | dense | 8192 | 3072 | 2.666667 | 7.711370 | 0.021197 | -3.372511 | 72 | under-trained |
15 | dense | 8192 | 3072 | 2.666667 | 5.904502 | 0.022263 | -1.591275 | 130 | |
16 | dense | 3072 | 1024 | 3.000000 | 7.618950 | 0.035025 | -7.679263 | 25 | under-trained |
17 | dense | 3072 | 3072 | 1.000000 | 4.549074 | 0.030948 | -0.345708 | 38 | |
18 | dense | 3072 | 3072 | 1.000000 | 6.208726 | 0.085655 | -3.271215 | 115 | under-trained |
19 | dense | 3072 | 1024 | 3.000000 | 3.544139 | 0.050380 | -1.458445 | 93 | |
20 | dense | 8192 | 3072 | 2.666667 | 7.184942 | 0.022499 | -2.556238 | 62 | under-trained |
21 | dense | 8192 | 3072 | 2.666667 | 9.086916 | 0.023779 | -5.604345 | 76 | under-trained |
22 | dense | 3072 | 1024 | 3.000000 | 7.122589 | 0.094340 | -8.186353 | 79 | under-trained |
23 | dense | 3072 | 3072 | 1.000000 | 3.614073 | 0.024263 | -0.171875 | 77 | |
24 | dense | 3072 | 3072 | 1.000000 | 5.142811 | 0.020526 | -3.049696 | 117 | |
25 | dense | 3072 | 1024 | 3.000000 | 3.733194 | 0.044643 | -1.321285 | 57 | |
26 | dense | 8192 | 3072 | 2.666667 | 6.501665 | 0.022653 | -1.991742 | 78 | under-trained |
27 | dense | 8192 | 3072 | 2.666667 | 5.664677 | 0.023492 | -0.935428 | 54 | |
28 | dense | 8192 | 3072 | 2.666667 | 8.526405 | 0.017026 | -4.632564 | 93 | under-trained |
29 | dense | 8192 | 3072 | 2.666667 | 9.556273 | 0.026890 | -5.615144 | 58 | under-trained |
30 | dense | 8192 | 3072 | 2.666667 | 5.516295 | 0.022439 | -0.994037 | 87 | |
31 | dense | 8192 | 3072 | 2.666667 | 6.987621 | 0.022587 | -2.845342 | 68 | under-trained |
32 | dense | 3072 | 1024 | 3.000000 | 3.590479 | 0.039800 | -0.993560 | 35 | |
33 | dense | 3072 | 3072 | 1.000000 | 4.404401 | 0.058301 | -2.902641 | 155 | |
34 | dense | 3072 | 3072 | 1.000000 | 3.478386 | 0.026929 | 0.234005 | 60 | |
35 | dense | 3072 | 1024 | 3.000000 | 5.432409 | 0.024825 | -5.265191 | 58 | |
36 | dense | 8192 | 3072 | 2.666667 | 10.110294 | 0.030994 | -6.162674 | 70 | under-trained |
37 | dense | 3072 | 1024 | 3.000000 | 6.755524 | 0.061907 | -7.565215 | 55 | under-trained |
38 | dense | 3072 | 3072 | 1.000000 | 3.391487 | 0.060168 | -0.136335 | 160 | |
39 | dense | 3072 | 3072 | 1.000000 | 5.057540 | 0.093630 | -3.346338 | 183 | |
40 | dense | 8192 | 3072 | 2.666667 | 6.763696 | 0.029802 | -2.420135 | 43 | under-trained |
41 | dense | 8192 | 3072 | 2.666667 | 4.734504 | 0.055742 | -0.625182 | 226 | |
42 | dense | 3072 | 1024 | 3.000000 | 2.776399 | 0.077997 | -0.982277 | 211 | |
43 | dense | 3072 | 3072 | 1.000000 | 3.875615 | 0.045758 | 0.025864 | 86 | |
44 | dense | 8192 | 3072 | 2.666667 | 7.358754 | 0.093769 | -4.214166 | 201 | under-trained |
45 | dense | 8192 | 3072 | 2.666667 | 6.798716 | 0.031775 | -2.405412 | 40 | under-trained |
46 | dense | 3072 | 1024 | 3.000000 | 4.347303 | 0.038639 | -1.495709 | 26 | |
47 | dense | 3072 | 3072 | 1.000000 | 5.532252 | 0.100236 | -3.662962 | 166 | |
48 | dense | 3072 | 1024 | 3.000000 | 5.720510 | 0.102531 | -5.778248 | 127 | |
49 | dense | 8192 | 3072 | 2.666667 | 5.507796 | 0.033077 | -0.586292 | 65 | |
50 | dense | 8192 | 3072 | 2.666667 | 8.631938 | 0.033260 | -4.482566 | 55 | under-trained |
51 | dense | 3072 | 1024 | 3.000000 | 5.952008 | 0.101501 | -6.944707 | 117 | |
52 | dense | 3072 | 3072 | 1.000000 | 4.301208 | 0.024631 | 0.236861 | 43 | |
53 | dense | 8192 | 3072 | 2.666667 | 5.532444 | 0.035529 | -0.242229 | 61 | |
54 | dense | 3072 | 1024 | 3.000000 | 2.744294 | 0.087061 | -0.897100 | 248 | |
55 | dense | 8192 | 3072 | 2.666667 | 6.115520 | 0.022318 | -1.718034 | 73 | under-trained |
56 | dense | 3072 | 3072 | 1.000000 | 7.522208 | 0.033112 | -5.930586 | 45 | under-trained |
57 | dense | 8192 | 3072 | 2.666667 | 7.267693 | 0.026346 | -2.556404 | 98 | under-trained |
58 | dense | 8192 | 3072 | 2.666667 | 5.753881 | 0.024230 | -1.495162 | 81 | |
59 | dense | 3072 | 1024 | 3.000000 | 3.012027 | 0.088892 | -0.888047 | 195 | |
60 | dense | 3072 | 3072 | 1.000000 | 8.958462 | 0.041867 | -7.807350 | 25 | under-trained |
61 | dense | 3072 | 3072 | 1.000000 | 4.139258 | 0.034595 | -0.042304 | 63 | |
62 | dense | 3072 | 1024 | 3.000000 | 7.944905 | 0.114593 | -9.875511 | 86 | under-trained |
63 | dense | 8192 | 3072 | 2.666667 | 5.315628 | 0.032064 | -0.278696 | 73 | |
64 | dense | 3072 | 1024 | 3.000000 | 7.407059 | 0.040245 | -7.348673 | 44 | under-trained |
65 | dense | 3072 | 1024 | 3.000000 | 3.006987 | 0.097543 | -0.937154 | 218 | |
66 | dense | 3072 | 3072 | 1.000000 | 3.964925 | 0.038657 | -0.195011 | 79 | |
67 | dense | 3072 | 3072 | 1.000000 | 5.091320 | 0.026000 | -2.952071 | 99 | |
68 | dense | 8192 | 3072 | 2.666667 | 5.412970 | 0.024791 | -1.220361 | 97 | |
69 | dense | 8192 | 3072 | 2.666667 | 7.185206 | 0.022925 | -3.073511 | 78 | under-trained |
70 | dense | 8192 | 3072 | 2.666667 | 5.052900 | 0.029897 | -0.045710 | 63 | |
71 | dense | 8192 | 3072 | 2.666667 | 6.493500 | 0.038445 | -2.556522 | 80 | under-trained |
72 | dense | 8192 | 3072 | 2.666667 | 4.717411 | 0.019863 | 0.317115 | 83 | |
73 | dense | 8192 | 3072 | 2.666667 | 5.139345 | 0.026459 | -0.552209 | 80 | |
74 | dense | 3072 | 1024 | 3.000000 | 3.188272 | 0.081218 | -0.819267 | 159 | |
75 | dense | 3072 | 3072 | 1.000000 | 5.037666 | 0.049692 | -3.792928 | 90 | |
76 | dense | 3072 | 3072 | 1.000000 | 4.795733 | 0.045455 | -0.309341 | 33 | |
77 | dense | 3072 | 1024 | 3.000000 | 6.138529 | 0.088793 | -7.218179 | 90 | under-trained |
78 | dense | 3072 | 1024 | 3.000000 | 5.427275 | 0.095177 | -6.699990 | 132 | |
79 | dense | 3072 | 3072 | 1.000000 | 6.554191 | 0.033631 | -5.554564 | 65 | under-trained |
80 | dense | 8192 | 3072 | 2.666667 | 5.952573 | 0.067177 | -2.166174 | 170 | |
81 | dense | 8192 | 3072 | 2.666667 | 5.058885 | 0.023361 | 0.135069 | 63 | |
82 | dense | 8192 | 3072 | 2.666667 | 5.309048 | 0.025181 | -0.792477 | 60 | |
83 | dense | 3072 | 1024 | 3.000000 | 3.175213 | 0.105834 | -1.216840 | 190 | |
84 | dense | 3072 | 3072 | 1.000000 | 4.722322 | 0.052215 | -0.413471 | 44 | |
85 | dense | 8192 | 3072 | 2.666667 | 6.654460 | 0.031731 | -2.255034 | 79 | under-trained |
86 | dense | 3072 | 1024 | 3.000000 | 5.566587 | 0.114020 | -7.112098 | 146 | |
87 | dense | 8192 | 3072 | 2.666667 | 5.207274 | 0.017908 | 0.244679 | 77 | |
88 | dense | 8192 | 3072 | 2.666667 | 5.487356 | 0.023645 | -0.351474 | 56 | |
89 | dense | 3072 | 1024 | 3.000000 | 3.532797 | 0.095202 | -0.827521 | 148 | |
90 | dense | 3072 | 3072 | 1.000000 | 5.133543 | 0.097435 | -4.201509 | 181 | |
91 | dense | 3072 | 3072 | 1.000000 | 4.430546 | 0.044070 | -0.348987 | 61 | |
92 | dense | 3072 | 3072 | 1.000000 | 4.910760 | 0.038128 | -0.497659 | 37 | |
93 | dense | 3072 | 3072 | 1.000000 | 8.334429 | 0.043026 | -7.538385 | 44 | under-trained |
94 | dense | 3072 | 1024 | 3.000000 | 4.901953 | 0.027700 | -0.948545 | 34 | |
95 | dense | 3072 | 1024 | 3.000000 | 5.738744 | 0.112249 | -7.328643 | 134 | |
96 | dense | 8192 | 3072 | 2.666667 | 5.721652 | 0.021788 | -0.044854 | 59 | |
97 | dense | 8192 | 3072 | 2.666667 | 5.912942 | 0.027695 | -0.931761 | 71 | |
98 | dense | 8192 | 3072 | 2.666667 | 5.886546 | 0.083802 | -2.006790 | 215 | |
99 | dense | 8192 | 3072 | 2.666667 | 6.219474 | 0.025183 | -0.259615 | 66 | under-trained |
100 | dense | 8192 | 3072 | 2.666667 | 6.440292 | 0.022573 | -1.287689 | 60 | under-trained |
101 | dense | 3072 | 1024 | 3.000000 | 4.555946 | 0.036389 | -0.766451 | 41 | |
102 | dense | 3072 | 3072 | 1.000000 | 5.565154 | 0.020158 | -3.088433 | 64 | |
103 | dense | 3072 | 3072 | 1.000000 | 4.141186 | 0.034665 | 0.083568 | 80 | |
104 | dense | 3072 | 1024 | 3.000000 | 7.575715 | 0.035376 | -8.101897 | 45 | under-trained |
105 | dense | 8192 | 3072 | 2.666667 | 8.885557 | 0.040561 | -3.719243 | 36 | under-trained |
106 | dense | 3072 | 1024 | 3.000000 | 7.645046 | 0.108143 | -9.180988 | 75 | under-trained |
107 | dense | 3072 | 3072 | 1.000000 | 4.849263 | 0.040118 | 0.335063 | 30 | |
108 | dense | 8192 | 3072 | 2.666667 | 6.857132 | 0.024924 | -1.478075 | 46 | under-trained |
109 | dense | 3072 | 1024 | 3.000000 | 3.624050 | 0.102802 | -0.908616 | 159 | |
110 | dense | 8192 | 3072 | 2.666667 | 8.258966 | 0.084968 | -4.276262 | 129 | under-trained |
111 | dense | 3072 | 3072 | 1.000000 | 5.271566 | 0.070601 | -3.131146 | 102 | |
112 | dense | 8192 | 3072 | 2.666667 | 6.185030 | 0.020739 | -0.160873 | 66 | under-trained |
113 | dense | 8192 | 3072 | 2.666667 | 6.849085 | 0.031994 | -1.579294 | 67 | under-trained |
114 | dense | 8192 | 3072 | 2.666667 | 6.706749 | 0.094155 | -3.515423 | 221 | under-trained |
115 | dense | 8192 | 3072 | 2.666667 | 6.276559 | 0.027149 | -0.233640 | 71 | under-trained |
116 | dense | 3072 | 3072 | 1.000000 | 4.600202 | 0.023081 | -2.186406 | 119 | |
117 | dense | 3072 | 3072 | 1.000000 | 4.008723 | 0.078144 | -0.155087 | 145 | |
118 | dense | 3072 | 1024 | 3.000000 | 7.453955 | 0.092573 | -8.294528 | 72 | under-trained |
119 | dense | 3072 | 1024 | 3.000000 | 3.926190 | 0.091026 | -1.171796 | 122 | |
120 | dense | 3072 | 1024 | 3.000000 | 6.009933 | 0.102167 | -6.466365 | 108 | under-trained |
121 | dense | 3072 | 3072 | 1.000000 | 4.026371 | 0.024068 | 0.425802 | 55 | |
122 | dense | 8192 | 3072 | 2.666667 | 6.849526 | 0.024650 | -1.516523 | 69 | under-trained |
123 | dense | 3072 | 1024 | 3.000000 | 3.627929 | 0.081328 | -0.601244 | 116 | |
124 | dense | 8192 | 3072 | 2.666667 | 5.987901 | 0.032022 | -0.237384 | 80 | |
125 | dense | 8192 | 3072 | 2.666667 | 7.570369 | 0.090089 | -3.694327 | 141 | under-trained |
126 | dense | 3072 | 3072 | 1.000000 | 4.508092 | 0.025265 | -1.440112 | 111 | |
127 | dense | 8192 | 3072 | 2.666667 | 6.202949 | 0.031211 | -0.284259 | 74 | under-trained |
128 | dense | 8192 | 3072 | 2.666667 | 7.019441 | 0.026270 | -1.608652 | 57 | under-trained |
129 | dense | 3072 | 1024 | 3.000000 | 4.653248 | 0.040481 | -1.485704 | 42 | |
130 | dense | 3072 | 3072 | 1.000000 | 5.529526 | 0.084469 | -3.394428 | 131 | |
131 | dense | 3072 | 3072 | 1.000000 | 4.748300 | 0.034885 | 0.028604 | 46 | |
132 | dense | 3072 | 1024 | 3.000000 | 6.364392 | 0.025299 | -5.726780 | 48 | under-trained |
133 | dense | 8192 | 3072 | 2.666667 | 6.636970 | 0.079429 | -3.230008 | 183 | under-trained |
134 | dense | 3072 | 1024 | 3.000000 | 8.477756 | 0.065978 | -9.406626 | 44 | under-trained |
135 | dense | 3072 | 3072 | 1.000000 | 4.453446 | 0.024194 | 0.658736 | 55 | |
136 | dense | 8192 | 3072 | 2.666667 | 6.936479 | 0.072845 | -3.438040 | 176 | under-trained |
137 | dense | 8192 | 3072 | 2.666667 | 4.468272 | 0.085501 | -0.185257 | 361 | |
138 | dense | 8192 | 3072 | 2.666667 | 7.147755 | 0.026431 | -1.798608 | 50 | under-trained |
139 | dense | 3072 | 1024 | 3.000000 | 4.300101 | 0.036633 | -0.689486 | 35 | |
140 | dense | 3072 | 3072 | 1.000000 | 5.672154 | 0.063032 | -3.412428 | 99 | |
141 | dense | 3072 | 1024 | 3.000000 | 6.390775 | 0.111749 | -6.455160 | 121 | under-trained |
142 | dense | 3072 | 3072 | 1.000000 | 3.689466 | 0.090642 | 0.158537 | 217 | |
143 | dense | 3072 | 3072 | 1.000000 | 8.339880 | 0.040442 | -4.803492 | 20 | under-trained |
144 | dense | 3072 | 1024 | 3.000000 | 3.855413 | 0.077769 | -1.207930 | 111 | |
145 | dense | 8192 | 3072 | 2.666667 | 5.760600 | 0.077158 | -1.405165 | 205 | |
146 | dense | 8192 | 3072 | 2.666667 | 5.009717 | 0.084357 | -0.443438 | 262 | |
147 | dense | 8192 | 3072 | 2.666667 | 7.608954 | 0.057242 | -3.790640 | 132 | under-trained |
148 | dense | 3072 | 1024 | 3.000000 | 5.483925 | 0.028605 | -4.158159 | 74 | |
149 | dense | 3072 | 3072 | 1.000000 | 4.185415 | 0.028851 | -0.041434 | 53 | |
150 | dense | 3072 | 3072 | 1.000000 | 4.852770 | 0.020355 | -1.690690 | 96 | |
151 | dense | 8192 | 3072 | 2.666667 | 5.698082 | 0.079403 | -1.415803 | 218 | |
152 | dense | 8192 | 3072 | 2.666667 | 5.440340 | 0.082032 | -0.620181 | 204 | |
153 | dense | 8192 | 3072 | 2.666667 | 8.020194 | 0.059518 | -3.798777 | 116 | under-trained |
154 | dense | 3072 | 1024 | 3.000000 | 3.868746 | 0.046848 | -1.177952 | 64 | |
155 | dense | 8192 | 3072 | 2.666667 | 8.141527 | 0.069997 | -3.357192 | 118 | under-trained |
156 | dense | 8192 | 3072 | 2.666667 | 4.735686 | 0.081625 | -0.605928 | 301 | |
157 | dense | 8192 | 3072 | 2.666667 | 6.029901 | 0.083501 | -1.590201 | 190 | under-trained |
158 | dense | 3072 | 1024 | 3.000000 | 4.197407 | 0.049121 | -1.524693 | 60 | |
159 | dense | 3072 | 3072 | 1.000000 | 5.201413 | 0.032870 | -1.518818 | 92 | |
160 | dense | 3072 | 3072 | 1.000000 | 3.520016 | 0.062146 | -0.136534 | 168 | |
161 | dense | 3072 | 1024 | 3.000000 | 6.681214 | 0.046805 | -6.258060 | 40 | under-trained |
162 | dense | 3072 | 3072 | 1.000000 | 4.274271 | 0.042114 | 0.541092 | 35 | |
163 | dense | 3072 | 1024 | 3.000000 | 5.741477 | 0.056254 | -5.274330 | 52 | |
164 | dense | 3072 | 1024 | 3.000000 | 3.119910 | 0.077013 | -1.038360 | 147 | |
165 | dense | 3072 | 3072 | 1.000000 | 4.901584 | 0.019173 | -1.975715 | 94 | |
166 | dense | 8192 | 3072 | 2.666667 | 5.097782 | 0.069167 | -0.286624 | 232 | |
167 | dense | 8192 | 3072 | 2.666667 | 7.904297 | 0.089004 | -3.022863 | 160 | under-trained |
168 | dense | 8192 | 3072 | 2.666667 | 6.845739 | 0.071624 | -1.692234 | 111 | under-trained |
169 | dense | 8192 | 3072 | 2.666667 | 6.614367 | 0.076231 | -1.867252 | 175 | under-trained |
170 | dense | 8192 | 3072 | 2.666667 | 6.558527 | 0.030339 | -0.028086 | 48 | under-trained |
171 | dense | 8192 | 3072 | 2.666667 | 6.835269 | 0.026048 | -1.435628 | 54 | under-trained |
172 | dense | 3072 | 1024 | 3.000000 | 3.775873 | 0.050260 | -1.242890 | 74 | |
173 | dense | 3072 | 3072 | 1.000000 | 4.644010 | 0.033895 | -1.387461 | 140 | |
174 | dense | 3072 | 3072 | 1.000000 | 4.077691 | 0.034343 | -0.082271 | 47 | |
175 | dense | 3072 | 1024 | 3.000000 | 6.517423 | 0.041256 | -5.443046 | 51 | under-trained |
176 | dense | 3072 | 3072 | 1.000000 | 3.008584 | 0.072483 | -0.106340 | 252 | |
177 | dense | 3072 | 1024 | 3.000000 | 4.918856 | 0.042334 | -4.037775 | 70 | |
178 | dense | 3072 | 3072 | 1.000000 | 4.538220 | 0.044565 | -1.916185 | 121 | |
179 | dense | 8192 | 3072 | 2.666667 | 6.022281 | 0.027644 | -0.353195 | 61 | under-trained |
180 | dense | 8192 | 3072 | 2.666667 | 4.544539 | 0.075303 | 0.426160 | 309 | |
181 | dense | 8192 | 3072 | 2.666667 | 5.651510 | 0.085010 | -1.274866 | 260 | |
182 | dense | 3072 | 1024 | 3.000000 | 3.563683 | 0.057146 | -1.328501 | 73 | |
183 | dense | 8192 | 3072 | 2.666667 | 4.869988 | 0.080899 | -1.193470 | 250 | |
184 | dense | 8192 | 3072 | 2.666667 | 5.202246 | 0.026691 | 1.057774 | 73 | |
185 | dense | 8192 | 3072 | 2.666667 | 5.196762 | 0.027069 | 0.557480 | 99 | |
186 | dense | 3072 | 1024 | 3.000000 | 3.622290 | 0.041992 | -0.560113 | 42 | |
187 | dense | 3072 | 3072 | 1.000000 | 4.146193 | 0.027388 | 0.108201 | 154 | |
188 | dense | 3072 | 3072 | 1.000000 | 3.209385 | 0.029470 | 0.864039 | 82 | |
189 | dense | 3072 | 1024 | 3.000000 | 5.010277 | 0.025048 | -2.821888 | 72 | |
190 | dense | 3072 | 3072 | 1.000000 | 4.327806 | 0.023233 | -0.730448 | 86 | |
191 | dense | 3072 | 1024 | 3.000000 | 3.098688 | 0.029395 | -0.390327 | 63 | |
192 | dense | 3072 | 3072 | 1.000000 | 3.070410 | 0.034157 | 0.843136 | 67 | |
193 | dense | 8192 | 3072 | 2.666667 | 4.431375 | 0.025254 | 1.097357 | 146 | |
194 | dense | 8192 | 3072 | 2.666667 | 4.418619 | 0.030192 | 0.535987 | 162 | |
195 | dense | 8192 | 3072 | 2.666667 | 4.132474 | 0.017018 | 1.453998 | 175 | |
196 | dense | 3072 | 1024 | 3.000000 | 5.376847 | 0.027423 | -4.527333 | 42 |