xlm-roberta-base


Find this model in the RoBERTa model summary
xlm-roberta-base Model Summary Plots





xlm-roberta-base Model Selected Details
  layer_type N M Q alpha D alpha-hat log_SN % Rand num_traps num_fingers rank_loss
layer_id                        
3 EMBEDDING 768 514 1.49 1.44 0.09 4.60 3.20 27.65 1 1 1
12 DENSE 768 768 1.00 3.58 0.04 7.48 2.09 89.63 0 0 1
13 DENSE 768 768 1.00 1.93 0.09 4.50 2.33 76.24 0 1 1
14 DENSE 768 768 1.00 2.08 0.12 2.57 1.23 82.23 0 3 2
17 DENSE 768 768 1.00 2.85 0.10 4.77 1.67 83.01 1 3 2
21 DENSE 3072 768 4.00 2.53 0.04 7.94 3.14 70.53 1 0 0
24 DENSE 3072 768 4.00 6.28 0.06 11.59 1.85 92.51 0 0 0
30 DENSE 768 768 1.00 2.49 0.04 5.70 2.28 81.60 0 0 2
31 DENSE 768 768 1.00 2.60 0.06 5.30 2.04 80.64 0 0 2
32 DENSE 768 768 1.00 4.88 0.07 5.31 1.09 85.41 0 0 1
35 DENSE 768 768 1.00 4.74 0.06 4.73 1.00 89.31 0 0 2
39 DENSE 3072 768 4.00 2.62 0.03 8.08 3.09 79.71 1 0 0
42 DENSE 3072 768 4.00 5.09 0.02 9.01 1.77 89.11 0 0 0
48 DENSE 768 768 1.00 2.46 0.04 4.82 1.96 81.14 0 0 2
49 DENSE 768 768 1.00 2.65 0.04 5.26 1.98 81.19 0 0 2
50 DENSE 768 768 1.00 4.19 0.06 4.37 1.04 88.10 0 0 1
53 DENSE 768 768 1.00 3.33 0.03 4.49 1.35 83.44 0 0 3
57 DENSE 3072 768 4.00 2.67 0.03 8.09 3.03 80.72 1 0 0
60 DENSE 3072 768 4.00 3.26 0.04 5.90 1.81 82.68 0 0 0
66 DENSE 768 768 1.00 3.86 0.07 6.17 1.60 85.45 0 0 2
67 DENSE 768 768 1.00 3.78 0.05 5.88 1.56 88.39 0 0 2
68 DENSE 768 768 1.00 5.03 0.10 5.82 1.16 87.26 0 0 1
71 DENSE 768 768 1.00 2.48 0.04 3.49 1.41 80.46 0 0 1
75 DENSE 3072 768 4.00 3.22 0.03 10.09 3.14 81.50 1 0 0
78 DENSE 3072 768 4.00 4.77 0.03 10.40 2.18 83.01 0 0 0
84 DENSE 768 768 1.00 3.18 0.07 5.37 1.69 86.87 0 0 1
85 DENSE 768 768 1.00 3.31 0.05 5.58 1.68 87.48 0 0 1
86 DENSE 768 768 1.00 4.87 0.09 6.01 1.23 85.69 0 0 0
89 DENSE 768 768 1.00 2.07 0.06 3.66 1.77 74.93 0 0 3
93 DENSE 3072 768 4.00 3.59 0.04 11.75 3.27 80.12 1 0 0
96 DENSE 3072 768 4.00 4.56 0.03 10.96 2.40 80.24 0 0 0
102 DENSE 768 768 1.00 4.09 0.09 5.83 1.43 86.79 0 0 3
103 DENSE 768 768 1.00 3.34 0.04 5.43 1.62 84.94 0 0 4
104 DENSE 768 768 1.00 5.07 0.11 6.47 1.28 87.82 0 1 1
107 DENSE 768 768 1.00 1.92 0.08 3.05 1.59 76.75 0 0 2
111 DENSE 3072 768 4.00 3.75 0.04 12.24 3.27 80.06 1 0 0
114 DENSE 3072 768 4.00 4.04 0.03 9.77 2.42 76.19 0 0 0
120 DENSE 768 768 1.00 3.44 0.08 6.67 1.94 85.57 0 0 3
121 DENSE 768 768 1.00 3.35 0.07 5.24 1.56 86.31 0 0 4
122 DENSE 768 768 1.00 2.71 0.11 3.77 1.39 85.27 0 1 2
125 DENSE 768 768 1.00 2.00 0.07 3.27 1.64 75.55 0 0 2
129 DENSE 3072 768 4.00 3.62 0.05 11.68 3.23 80.14 1 0 0
132 DENSE 3072 768 4.00 3.56 0.03 9.40 2.64 73.93 0 0 0
138 DENSE 768 768 1.00 3.87 0.05 6.79 1.75 85.10 0 0 3
139 DENSE 768 768 1.00 3.54 0.06 5.64 1.59 83.54 0 0 4
140 DENSE 768 768 1.00 1.73 0.11 1.98 1.14 89.95 0 0 2
143 DENSE 768 768 1.00 2.14 0.07 3.56 1.66 80.01 0 0 2
147 DENSE 3072 768 4.00 3.58 0.05 11.24 3.14 81.26 1 0 0
150 DENSE 3072 768 4.00 3.77 0.03 9.70 2.57 75.25 0 0 0
156 DENSE 768 768 1.00 3.30 0.12 6.10 1.85 83.45 0 4 3
157 DENSE 768 768 1.00 4.24 0.08 6.54 1.54 83.35 0 0 4
158 DENSE 768 768 1.00 3.48 0.13 3.91 1.13 90.93 0 4 4
161 DENSE 768 768 1.00 1.94 0.05 3.31 1.71 71.71 0 0 1
165 DENSE 3072 768 4.00 3.61 0.04 11.02 3.06 81.24 1 0 0
168 DENSE 3072 768 4.00 3.61 0.02 8.90 2.47 76.94 0 0 0
174 DENSE 768 768 1.00 3.36 0.10 7.07 2.10 83.64 0 1 3
175 DENSE 768 768 1.00 3.82 0.07 6.17 1.62 84.06 0 0 4
176 DENSE 768 768 1.00 5.80 0.13 6.43 1.11 93.04 0 6 3
179 DENSE 768 768 1.00 2.67 0.09 4.41 1.65 85.08 0 1 1
183 DENSE 3072 768 4.00 3.54 0.05 10.49 2.96 82.81 1 0 0
186 DENSE 3072 768 4.00 5.24 0.06 12.78 2.44 82.71 0 0 0
192 DENSE 768 768 1.00 3.37 0.09 7.50 2.23 81.90 0 0 1
193 DENSE 768 768 1.00 2.73 0.10 4.31 1.58 84.44 0 1 3
194 DENSE 768 768 1.00 6.86 0.05 7.94 1.16 95.63 0 0 3
197 DENSE 768 768 1.00 5.25 0.07 9.13 1.74 85.83 0 0 1
201 DENSE 3072 768 4.00 3.50 0.02 9.33 2.66 83.45 1 0 0
204 DENSE 3072 768 4.00 4.69 0.08 12.43 2.65 79.06 0 0 0
210 DENSE 768 768 1.00 2.46 0.08 5.00 2.03 82.68 0 0 2
211 DENSE 768 768 1.00 2.98 0.09 5.12 1.72 87.99 0 1 2
212 DENSE 768 768 1.00 8.50 0.13 7.59 0.89 97.11 0 1 3
215 DENSE 768 768 1.00 4.22 0.08 7.85 1.86 84.36 2 0 1
219 DENSE 3072 768 4.00 2.67 0.02 6.32 2.37 79.35 1 0 0
222 DENSE 3072 768 4.00 3.71 0.09 10.35 2.79 70.00 4 0 0
226 DENSE 768 768 1.00 7.22 0.14 0.61 0.08 99.80 0 7 2

xlm-roberta-base Layer Plots
Layer 3
   Layer=3  |  N=768  |  M=514  |  Q=1.49  |  alpha=1.44  |  D_ks=0.09  |  alpha-hat=4.60  |  num traps=1









Layer 12
   Layer=12  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.58  |  D_ks=0.04  |  alpha-hat=7.48  |  num traps=0









Layer 13
   Layer=13  |  N=768  |  M=768  |  Q=1.00  |  alpha=1.93  |  D_ks=0.09  |  alpha-hat=4.50  |  num traps=0









Layer 14
   Layer=14  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.08  |  D_ks=0.12  |  alpha-hat=2.57  |  num traps=0









Layer 17
   Layer=17  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.85  |  D_ks=0.10  |  alpha-hat=4.77  |  num traps=1









Layer 21
   Layer=21  |  N=3072  |  M=768  |  Q=4.00  |  alpha=2.53  |  D_ks=0.04  |  alpha-hat=7.94  |  num traps=1









Layer 24
   Layer=24  |  N=3072  |  M=768  |  Q=4.00  |  alpha=6.28  |  D_ks=0.06  |  alpha-hat=11.59  |  num traps=0









Layer 30
   Layer=30  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.49  |  D_ks=0.04  |  alpha-hat=5.70  |  num traps=0









Layer 31
   Layer=31  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.60  |  D_ks=0.06  |  alpha-hat=5.30  |  num traps=0









Layer 32
   Layer=32  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.88  |  D_ks=0.07  |  alpha-hat=5.31  |  num traps=0









Layer 35
   Layer=35  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.74  |  D_ks=0.06  |  alpha-hat=4.73  |  num traps=0









Layer 39
   Layer=39  |  N=3072  |  M=768  |  Q=4.00  |  alpha=2.62  |  D_ks=0.03  |  alpha-hat=8.08  |  num traps=1









Layer 42
   Layer=42  |  N=3072  |  M=768  |  Q=4.00  |  alpha=5.09  |  D_ks=0.02  |  alpha-hat=9.01  |  num traps=0









Layer 48
   Layer=48  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.46  |  D_ks=0.04  |  alpha-hat=4.82  |  num traps=0









Layer 49
   Layer=49  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.65  |  D_ks=0.04  |  alpha-hat=5.26  |  num traps=0









Layer 50
   Layer=50  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.19  |  D_ks=0.06  |  alpha-hat=4.37  |  num traps=0









Layer 53
   Layer=53  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.33  |  D_ks=0.03  |  alpha-hat=4.49  |  num traps=0









Layer 57
   Layer=57  |  N=3072  |  M=768  |  Q=4.00  |  alpha=2.67  |  D_ks=0.03  |  alpha-hat=8.09  |  num traps=1









Layer 60
   Layer=60  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.26  |  D_ks=0.04  |  alpha-hat=5.90  |  num traps=0









Layer 66
   Layer=66  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.86  |  D_ks=0.07  |  alpha-hat=6.17  |  num traps=0









Layer 67
   Layer=67  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.78  |  D_ks=0.05  |  alpha-hat=5.88  |  num traps=0









Layer 68
   Layer=68  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.03  |  D_ks=0.10  |  alpha-hat=5.82  |  num traps=0









Layer 71
   Layer=71  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.48  |  D_ks=0.04  |  alpha-hat=3.49  |  num traps=0









Layer 75
   Layer=75  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.22  |  D_ks=0.03  |  alpha-hat=10.09  |  num traps=1









Layer 78
   Layer=78  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.77  |  D_ks=0.03  |  alpha-hat=10.40  |  num traps=0









Layer 84
   Layer=84  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.18  |  D_ks=0.07  |  alpha-hat=5.37  |  num traps=0









Layer 85
   Layer=85  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.31  |  D_ks=0.05  |  alpha-hat=5.58  |  num traps=0









Layer 86
   Layer=86  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.87  |  D_ks=0.09  |  alpha-hat=6.01  |  num traps=0









Layer 89
   Layer=89  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.07  |  D_ks=0.06  |  alpha-hat=3.66  |  num traps=0









Layer 93
   Layer=93  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.59  |  D_ks=0.04  |  alpha-hat=11.75  |  num traps=1









Layer 96
   Layer=96  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.56  |  D_ks=0.03  |  alpha-hat=10.96  |  num traps=0









Layer 102
   Layer=102  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.09  |  D_ks=0.09  |  alpha-hat=5.83  |  num traps=0









Layer 103
   Layer=103  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.34  |  D_ks=0.04  |  alpha-hat=5.43  |  num traps=0









Layer 104
   Layer=104  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.07  |  D_ks=0.11  |  alpha-hat=6.47  |  num traps=0









Layer 107
   Layer=107  |  N=768  |  M=768  |  Q=1.00  |  alpha=1.92  |  D_ks=0.08  |  alpha-hat=3.05  |  num traps=0









Layer 111
   Layer=111  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.75  |  D_ks=0.04  |  alpha-hat=12.24  |  num traps=1









Layer 114
   Layer=114  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.04  |  D_ks=0.03  |  alpha-hat=9.77  |  num traps=0









Layer 120
   Layer=120  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.44  |  D_ks=0.08  |  alpha-hat=6.67  |  num traps=0









Layer 121
   Layer=121  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.35  |  D_ks=0.07  |  alpha-hat=5.24  |  num traps=0









Layer 122
   Layer=122  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.71  |  D_ks=0.11  |  alpha-hat=3.77  |  num traps=0









Layer 125
   Layer=125  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.00  |  D_ks=0.07  |  alpha-hat=3.27  |  num traps=0









Layer 129
   Layer=129  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.62  |  D_ks=0.05  |  alpha-hat=11.68  |  num traps=1









Layer 132
   Layer=132  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.56  |  D_ks=0.03  |  alpha-hat=9.40  |  num traps=0









Layer 138
   Layer=138  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.87  |  D_ks=0.05  |  alpha-hat=6.79  |  num traps=0









Layer 139
   Layer=139  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.54  |  D_ks=0.06  |  alpha-hat=5.64  |  num traps=0









Layer 140
   Layer=140  |  N=768  |  M=768  |  Q=1.00  |  alpha=1.73  |  D_ks=0.11  |  alpha-hat=1.98  |  num traps=0









Layer 143
   Layer=143  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.14  |  D_ks=0.07  |  alpha-hat=3.56  |  num traps=0









Layer 147
   Layer=147  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.58  |  D_ks=0.05  |  alpha-hat=11.24  |  num traps=1









Layer 150
   Layer=150  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.77  |  D_ks=0.03  |  alpha-hat=9.70  |  num traps=0









Layer 156
   Layer=156  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.30  |  D_ks=0.12  |  alpha-hat=6.10  |  num traps=0









Layer 157
   Layer=157  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.24  |  D_ks=0.08  |  alpha-hat=6.54  |  num traps=0









Layer 158
   Layer=158  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.48  |  D_ks=0.13  |  alpha-hat=3.91  |  num traps=0









Layer 161
   Layer=161  |  N=768  |  M=768  |  Q=1.00  |  alpha=1.94  |  D_ks=0.05  |  alpha-hat=3.31  |  num traps=0









Layer 165
   Layer=165  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.61  |  D_ks=0.04  |  alpha-hat=11.02  |  num traps=1









Layer 168
   Layer=168  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.61  |  D_ks=0.02  |  alpha-hat=8.90  |  num traps=0









Layer 174
   Layer=174  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.36  |  D_ks=0.10  |  alpha-hat=7.07  |  num traps=0









Layer 175
   Layer=175  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.82  |  D_ks=0.07  |  alpha-hat=6.17  |  num traps=0









Layer 176
   Layer=176  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.80  |  D_ks=0.13  |  alpha-hat=6.43  |  num traps=0









Layer 179
   Layer=179  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.67  |  D_ks=0.09  |  alpha-hat=4.41  |  num traps=0









Layer 183
   Layer=183  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.54  |  D_ks=0.05  |  alpha-hat=10.49  |  num traps=1









Layer 186
   Layer=186  |  N=3072  |  M=768  |  Q=4.00  |  alpha=5.24  |  D_ks=0.06  |  alpha-hat=12.78  |  num traps=0









Layer 192
   Layer=192  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.37  |  D_ks=0.09  |  alpha-hat=7.50  |  num traps=0









Layer 193
   Layer=193  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.73  |  D_ks=0.10  |  alpha-hat=4.31  |  num traps=0









Layer 194
   Layer=194  |  N=768  |  M=768  |  Q=1.00  |  alpha=6.86  |  D_ks=0.05  |  alpha-hat=7.94  |  num traps=0









Layer 197
   Layer=197  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.25  |  D_ks=0.07  |  alpha-hat=9.13  |  num traps=0









Layer 201
   Layer=201  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.50  |  D_ks=0.02  |  alpha-hat=9.33  |  num traps=1









Layer 204
   Layer=204  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.69  |  D_ks=0.08  |  alpha-hat=12.43  |  num traps=0









Layer 210
   Layer=210  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.46  |  D_ks=0.08  |  alpha-hat=5.00  |  num traps=0









Layer 211
   Layer=211  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.98  |  D_ks=0.09  |  alpha-hat=5.12  |  num traps=0









Layer 212
   Layer=212  |  N=768  |  M=768  |  Q=1.00  |  alpha=8.50  |  D_ks=0.13  |  alpha-hat=7.59  |  num traps=0









Layer 215
   Layer=215  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.22  |  D_ks=0.08  |  alpha-hat=7.85  |  num traps=2









Layer 219
   Layer=219  |  N=3072  |  M=768  |  Q=4.00  |  alpha=2.67  |  D_ks=0.02  |  alpha-hat=6.32  |  num traps=1









Layer 222
   Layer=222  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.71  |  D_ks=0.09  |  alpha-hat=10.35  |  num traps=4









Layer 226
   Layer=226  |  N=768  |  M=768  |  Q=1.00  |  alpha=7.22  |  D_ks=0.14  |  alpha-hat=0.61  |  num traps=0