bert-base-cased


Find this model in the BertXLNet model summary
bert-base-cased Model Summary Plots





bert-base-cased Model Selected Details
  layer_type N M Q alpha D alpha-hat log_SN % Rand num_traps num_fingers rank_loss
layer_id                        
2 EMBEDDING 28996 768 37.76 3.47 0.04 13.59 3.92 78.64 1 0 0
3 EMBEDDING 768 512 1.50 1.83 0.09 1.67 0.91 50.42 2 0 0
12 DENSE 768 768 1.00 2.71 0.03 3.58 1.32 80.72 0 0 1
13 DENSE 768 768 1.00 3.11 0.03 4.19 1.35 83.36 0 0 1
14 DENSE 768 768 1.00 4.74 0.11 2.89 0.61 89.81 0 1 1
17 DENSE 768 768 1.00 2.46 0.04 2.20 0.89 81.87 0 0 2
21 DENSE 3072 768 4.00 3.36 0.07 7.35 2.19 81.58 0 0 0
24 DENSE 3072 768 4.00 4.66 0.05 7.70 1.65 91.46 1 0 0
30 DENSE 768 768 1.00 2.48 0.06 3.17 1.28 78.00 0 0 2
31 DENSE 768 768 1.00 2.53 0.07 3.81 1.50 75.43 0 0 1
32 DENSE 768 768 1.00 3.39 0.07 2.42 0.72 87.36 0 0 1
35 DENSE 768 768 1.00 2.82 0.08 2.01 0.71 86.61 0 0 2
39 DENSE 3072 768 4.00 3.13 0.04 6.64 2.12 81.44 0 0 0
42 DENSE 3072 768 4.00 3.86 0.02 7.58 1.96 88.47 3 0 0
48 DENSE 768 768 1.00 2.57 0.04 3.76 1.46 78.74 0 0 0
49 DENSE 768 768 1.00 2.54 0.05 3.89 1.53 76.73 0 0 1
50 DENSE 768 768 1.00 3.15 0.08 2.48 0.79 87.54 0 0 0
53 DENSE 768 768 1.00 2.90 0.08 2.08 0.72 88.59 0 0 1
57 DENSE 3072 768 4.00 2.91 0.03 6.25 2.15 81.90 0 0 0
60 DENSE 3072 768 4.00 3.58 0.01 6.38 1.78 88.85 2 0 0
66 DENSE 768 768 1.00 2.85 0.03 3.87 1.36 82.45 0 0 0
67 DENSE 768 768 1.00 2.84 0.04 4.46 1.57 80.48 0 0 1
68 DENSE 768 768 1.00 2.57 0.09 2.07 0.80 86.79 0 0 2
71 DENSE 768 768 1.00 3.31 0.10 2.41 0.73 88.81 0 1 1
75 DENSE 3072 768 4.00 3.13 0.02 6.58 2.10 84.11 0 0 0
78 DENSE 3072 768 4.00 3.57 0.02 6.27 1.76 89.47 2 0 0
84 DENSE 768 768 1.00 3.09 0.04 4.08 1.32 86.25 0 0 0
85 DENSE 768 768 1.00 3.02 0.03 4.53 1.50 83.57 0 0 1
86 DENSE 768 768 1.00 3.22 0.09 2.81 0.87 87.15 0 0 2
89 DENSE 768 768 1.00 5.59 0.10 4.50 0.81 91.15 0 1 1
93 DENSE 3072 768 4.00 3.26 0.02 6.79 2.08 85.07 0 0 0
96 DENSE 3072 768 4.00 3.79 0.05 6.60 1.74 89.06 1 0 0
102 DENSE 768 768 1.00 2.96 0.04 3.94 1.33 83.24 0 0 0
103 DENSE 768 768 1.00 2.71 0.05 4.36 1.61 78.22 0 0 1
104 DENSE 768 768 1.00 3.67 0.08 3.04 0.83 89.47 0 0 1
107 DENSE 768 768 1.00 6.43 0.08 4.34 0.68 93.62 0 0 2
111 DENSE 3072 768 4.00 3.33 0.02 6.77 2.03 86.14 0 0 0
114 DENSE 3072 768 4.00 3.33 0.03 6.13 1.84 87.42 3 0 0
120 DENSE 768 768 1.00 3.01 0.05 4.06 1.35 82.82 0 0 1
121 DENSE 768 768 1.00 2.67 0.03 4.10 1.54 77.82 0 0 1
122 DENSE 768 768 1.00 2.97 0.07 2.25 0.76 90.39 0 0 2
125 DENSE 768 768 1.00 5.15 0.10 3.64 0.71 94.67 0 1 2
129 DENSE 3072 768 4.00 3.47 0.02 7.10 2.04 86.22 0 0 0
132 DENSE 3072 768 4.00 3.87 0.03 6.83 1.76 87.51 2 0 0
138 DENSE 768 768 1.00 3.37 0.06 4.33 1.28 83.91 0 0 1
139 DENSE 768 768 1.00 2.85 0.03 4.22 1.48 81.03 0 0 2
140 DENSE 768 768 1.00 3.74 0.09 2.64 0.71 94.14 0 0 2
143 DENSE 768 768 1.00 4.62 0.07 3.06 0.66 96.44 0 0 1
147 DENSE 3072 768 4.00 3.37 0.02 6.96 2.06 85.57 0 0 0
150 DENSE 3072 768 4.00 3.24 0.05 4.68 1.44 90.87 2 0 0
156 DENSE 768 768 1.00 3.29 0.04 4.21 1.28 86.30 0 0 0
157 DENSE 768 768 1.00 2.94 0.03 4.26 1.45 83.60 0 0 1
158 DENSE 768 768 1.00 2.82 0.10 2.24 0.80 92.00 0 0 2
161 DENSE 768 768 1.00 3.54 0.11 2.70 0.76 93.86 0 6 2
165 DENSE 3072 768 4.00 3.50 0.02 7.25 2.07 85.35 0 0 0
168 DENSE 3072 768 4.00 3.94 0.04 6.78 1.72 92.58 3 0 0
174 DENSE 768 768 1.00 3.20 0.06 4.01 1.25 86.02 0 0 0
175 DENSE 768 768 1.00 3.15 0.03 4.66 1.48 83.99 0 0 1
176 DENSE 768 768 1.00 5.19 0.10 3.91 0.75 92.91 0 1 1
179 DENSE 768 768 1.00 3.39 0.11 2.80 0.83 94.71 0 1 1
183 DENSE 3072 768 4.00 3.76 0.03 7.87 2.10 84.67 0 0 0
186 DENSE 3072 768 4.00 5.11 0.04 9.14 1.79 93.60 4 0 0
192 DENSE 768 768 1.00 4.19 0.03 5.70 1.36 87.56 0 0 0
193 DENSE 768 768 1.00 4.15 0.05 5.46 1.32 87.88 0 0 1
194 DENSE 768 768 1.00 6.48 0.05 5.61 0.87 95.20 0 0 1
197 DENSE 768 768 1.00 5.64 0.03 4.97 0.88 94.80 0 0 2
201 DENSE 3072 768 4.00 3.70 0.04 7.50 2.03 85.79 0 0 0
204 DENSE 3072 768 4.00 5.13 0.06 10.67 2.08 90.40 5 0 0
210 DENSE 768 768 1.00 3.14 0.04 4.69 1.50 84.05 0 0 1
211 DENSE 768 768 1.00 3.77 0.05 5.42 1.44 85.24 0 0 1
212 DENSE 768 768 1.00 8.67 0.07 8.65 1.00 92.17 0 0 1
215 DENSE 768 768 1.00 8.61 0.07 7.46 0.87 94.04 0 0 1
219 DENSE 3072 768 4.00 3.69 0.04 6.88 1.87 88.01 0 0 0
222 DENSE 3072 768 4.00 7.67 0.06 10.71 1.40 85.90 0 0 0
226 DENSE 768 768 1.00 1.88 0.03 4.18 2.23 46.04 0 0 1

bert-base-cased Layer Plots
Layer 2
   Layer=2  |  N=28996  |  M=768  |  Q=37.76  |  alpha=3.47  |  D_ks=0.04  |  alpha-hat=13.59  |  num traps=1









Layer 3
   Layer=3  |  N=768  |  M=512  |  Q=1.50  |  alpha=1.83  |  D_ks=0.09  |  alpha-hat=1.67  |  num traps=2









Layer 12
   Layer=12  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.71  |  D_ks=0.03  |  alpha-hat=3.58  |  num traps=0









Layer 13
   Layer=13  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.11  |  D_ks=0.03  |  alpha-hat=4.19  |  num traps=0









Layer 14
   Layer=14  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.74  |  D_ks=0.11  |  alpha-hat=2.89  |  num traps=0









Layer 17
   Layer=17  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.46  |  D_ks=0.04  |  alpha-hat=2.20  |  num traps=0









Layer 21
   Layer=21  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.36  |  D_ks=0.07  |  alpha-hat=7.35  |  num traps=0









Layer 24
   Layer=24  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.66  |  D_ks=0.05  |  alpha-hat=7.70  |  num traps=1









Layer 30
   Layer=30  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.48  |  D_ks=0.06  |  alpha-hat=3.17  |  num traps=0









Layer 31
   Layer=31  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.53  |  D_ks=0.07  |  alpha-hat=3.81  |  num traps=0









Layer 32
   Layer=32  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.39  |  D_ks=0.07  |  alpha-hat=2.42  |  num traps=0









Layer 35
   Layer=35  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.82  |  D_ks=0.08  |  alpha-hat=2.01  |  num traps=0









Layer 39
   Layer=39  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.13  |  D_ks=0.04  |  alpha-hat=6.64  |  num traps=0









Layer 42
   Layer=42  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.86  |  D_ks=0.02  |  alpha-hat=7.58  |  num traps=3









Layer 48
   Layer=48  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.57  |  D_ks=0.04  |  alpha-hat=3.76  |  num traps=0









Layer 49
   Layer=49  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.54  |  D_ks=0.05  |  alpha-hat=3.89  |  num traps=0









Layer 50
   Layer=50  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.15  |  D_ks=0.08  |  alpha-hat=2.48  |  num traps=0









Layer 53
   Layer=53  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.90  |  D_ks=0.08  |  alpha-hat=2.08  |  num traps=0









Layer 57
   Layer=57  |  N=3072  |  M=768  |  Q=4.00  |  alpha=2.91  |  D_ks=0.03  |  alpha-hat=6.25  |  num traps=0









Layer 60
   Layer=60  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.58  |  D_ks=0.01  |  alpha-hat=6.38  |  num traps=2









Layer 66
   Layer=66  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.85  |  D_ks=0.03  |  alpha-hat=3.87  |  num traps=0









Layer 67
   Layer=67  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.84  |  D_ks=0.04  |  alpha-hat=4.46  |  num traps=0









Layer 68
   Layer=68  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.57  |  D_ks=0.09  |  alpha-hat=2.07  |  num traps=0









Layer 71
   Layer=71  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.31  |  D_ks=0.10  |  alpha-hat=2.41  |  num traps=0









Layer 75
   Layer=75  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.13  |  D_ks=0.02  |  alpha-hat=6.58  |  num traps=0









Layer 78
   Layer=78  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.57  |  D_ks=0.02  |  alpha-hat=6.27  |  num traps=2









Layer 84
   Layer=84  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.09  |  D_ks=0.04  |  alpha-hat=4.08  |  num traps=0









Layer 85
   Layer=85  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.02  |  D_ks=0.03  |  alpha-hat=4.53  |  num traps=0









Layer 86
   Layer=86  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.22  |  D_ks=0.09  |  alpha-hat=2.81  |  num traps=0









Layer 89
   Layer=89  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.59  |  D_ks=0.10  |  alpha-hat=4.50  |  num traps=0









Layer 93
   Layer=93  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.26  |  D_ks=0.02  |  alpha-hat=6.79  |  num traps=0









Layer 96
   Layer=96  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.79  |  D_ks=0.05  |  alpha-hat=6.60  |  num traps=1









Layer 102
   Layer=102  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.96  |  D_ks=0.04  |  alpha-hat=3.94  |  num traps=0









Layer 103
   Layer=103  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.71  |  D_ks=0.05  |  alpha-hat=4.36  |  num traps=0









Layer 104
   Layer=104  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.67  |  D_ks=0.08  |  alpha-hat=3.04  |  num traps=0









Layer 107
   Layer=107  |  N=768  |  M=768  |  Q=1.00  |  alpha=6.43  |  D_ks=0.08  |  alpha-hat=4.34  |  num traps=0









Layer 111
   Layer=111  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.33  |  D_ks=0.02  |  alpha-hat=6.77  |  num traps=0









Layer 114
   Layer=114  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.33  |  D_ks=0.03  |  alpha-hat=6.13  |  num traps=3









Layer 120
   Layer=120  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.01  |  D_ks=0.05  |  alpha-hat=4.06  |  num traps=0









Layer 121
   Layer=121  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.67  |  D_ks=0.03  |  alpha-hat=4.10  |  num traps=0









Layer 122
   Layer=122  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.97  |  D_ks=0.07  |  alpha-hat=2.25  |  num traps=0









Layer 125
   Layer=125  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.15  |  D_ks=0.10  |  alpha-hat=3.64  |  num traps=0









Layer 129
   Layer=129  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.47  |  D_ks=0.02  |  alpha-hat=7.10  |  num traps=0









Layer 132
   Layer=132  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.87  |  D_ks=0.03  |  alpha-hat=6.83  |  num traps=2









Layer 138
   Layer=138  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.37  |  D_ks=0.06  |  alpha-hat=4.33  |  num traps=0









Layer 139
   Layer=139  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.85  |  D_ks=0.03  |  alpha-hat=4.22  |  num traps=0









Layer 140
   Layer=140  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.74  |  D_ks=0.09  |  alpha-hat=2.64  |  num traps=0









Layer 143
   Layer=143  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.62  |  D_ks=0.07  |  alpha-hat=3.06  |  num traps=0









Layer 147
   Layer=147  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.37  |  D_ks=0.02  |  alpha-hat=6.96  |  num traps=0









Layer 150
   Layer=150  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.24  |  D_ks=0.05  |  alpha-hat=4.68  |  num traps=2









Layer 156
   Layer=156  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.29  |  D_ks=0.04  |  alpha-hat=4.21  |  num traps=0









Layer 157
   Layer=157  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.94  |  D_ks=0.03  |  alpha-hat=4.26  |  num traps=0









Layer 158
   Layer=158  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.82  |  D_ks=0.10  |  alpha-hat=2.24  |  num traps=0









Layer 161
   Layer=161  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.54  |  D_ks=0.11  |  alpha-hat=2.70  |  num traps=0









Layer 165
   Layer=165  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.50  |  D_ks=0.02  |  alpha-hat=7.25  |  num traps=0









Layer 168
   Layer=168  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.94  |  D_ks=0.04  |  alpha-hat=6.78  |  num traps=3









Layer 174
   Layer=174  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.20  |  D_ks=0.06  |  alpha-hat=4.01  |  num traps=0









Layer 175
   Layer=175  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.15  |  D_ks=0.03  |  alpha-hat=4.66  |  num traps=0









Layer 176
   Layer=176  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.19  |  D_ks=0.10  |  alpha-hat=3.91  |  num traps=0









Layer 179
   Layer=179  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.39  |  D_ks=0.11  |  alpha-hat=2.80  |  num traps=0









Layer 183
   Layer=183  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.76  |  D_ks=0.03  |  alpha-hat=7.87  |  num traps=0









Layer 186
   Layer=186  |  N=3072  |  M=768  |  Q=4.00  |  alpha=5.11  |  D_ks=0.04  |  alpha-hat=9.14  |  num traps=4









Layer 192
   Layer=192  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.19  |  D_ks=0.03  |  alpha-hat=5.70  |  num traps=0









Layer 193
   Layer=193  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.15  |  D_ks=0.05  |  alpha-hat=5.46  |  num traps=0









Layer 194
   Layer=194  |  N=768  |  M=768  |  Q=1.00  |  alpha=6.48  |  D_ks=0.05  |  alpha-hat=5.61  |  num traps=0









Layer 197
   Layer=197  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.64  |  D_ks=0.03  |  alpha-hat=4.97  |  num traps=0









Layer 201
   Layer=201  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.70  |  D_ks=0.04  |  alpha-hat=7.50  |  num traps=0









Layer 204
   Layer=204  |  N=3072  |  M=768  |  Q=4.00  |  alpha=5.13  |  D_ks=0.06  |  alpha-hat=10.67  |  num traps=5









Layer 210
   Layer=210  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.14  |  D_ks=0.04  |  alpha-hat=4.69  |  num traps=0









Layer 211
   Layer=211  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.77  |  D_ks=0.05  |  alpha-hat=5.42  |  num traps=0









Layer 212
   Layer=212  |  N=768  |  M=768  |  Q=1.00  |  alpha=8.67  |  D_ks=0.07  |  alpha-hat=8.65  |  num traps=0









Layer 215
   Layer=215  |  N=768  |  M=768  |  Q=1.00  |  alpha=8.61  |  D_ks=0.07  |  alpha-hat=7.46  |  num traps=0









Layer 219
   Layer=219  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.69  |  D_ks=0.04  |  alpha-hat=6.88  |  num traps=0









Layer 222
   Layer=222  |  N=3072  |  M=768  |  Q=4.00  |  alpha=7.67  |  D_ks=0.06  |  alpha-hat=10.71  |  num traps=0









Layer 226
   Layer=226  |  N=768  |  M=768  |  Q=1.00  |  alpha=1.88  |  D_ks=0.03  |  alpha-hat=4.18  |  num traps=0