distilroberta-base


Find this model in the RoBERTa model summary
distilroberta-base Model Summary Plots





distilroberta-base Model Selected Details
  layer_type N M Q alpha D alpha-hat log_SN % Rand num_traps num_fingers rank_loss
layer_id                        
3 EMBEDDING 768 514 1.49 1.50 0.10 4.29 2.86 27.70 0 0 1
12 DENSE 768 768 1.00 3.35 0.03 7.06 2.11 86.78 0 0 1
13 DENSE 768 768 1.00 2.75 0.07 5.94 2.16 80.44 0 0 1
14 DENSE 768 768 1.00 2.78 0.09 3.02 1.09 86.78 0 0 1
17 DENSE 768 768 1.00 2.72 0.08 4.48 1.65 83.32 0 1 1
21 DENSE 3072 768 4.00 2.80 0.02 7.46 2.66 81.35 1 0 0
24 DENSE 3072 768 4.00 3.31 0.03 6.56 1.98 84.67 0 0 0
30 DENSE 768 768 1.00 3.76 0.05 8.30 2.21 82.67 0 0 2
31 DENSE 768 768 1.00 2.91 0.04 5.19 1.78 83.24 0 0 1
32 DENSE 768 768 1.00 2.33 0.10 3.28 1.41 84.84 0 1 0
35 DENSE 768 768 1.00 3.36 0.08 4.45 1.32 86.58 0 0 2
39 DENSE 3072 768 4.00 3.58 0.02 10.05 2.81 85.33 1 0 0
42 DENSE 3072 768 4.00 3.27 0.07 6.60 2.02 83.28 0 1 0
48 DENSE 768 768 1.00 3.92 0.04 8.85 2.25 83.24 0 0 1
49 DENSE 768 768 1.00 2.87 0.09 4.84 1.69 86.10 0 3 2
50 DENSE 768 768 1.00 4.15 0.11 6.57 1.58 83.93 0 1 1
53 DENSE 768 768 1.00 3.59 0.08 4.64 1.29 86.50 0 0 1
57 DENSE 3072 768 4.00 3.69 0.02 9.97 2.70 84.30 1 0 0
60 DENSE 3072 768 4.00 3.23 0.07 6.56 2.03 82.79 0 1 0
66 DENSE 768 768 1.00 4.19 0.08 7.32 1.75 85.63 0 0 1
67 DENSE 768 768 1.00 3.72 0.05 6.46 1.74 83.44 0 0 2
68 DENSE 768 768 1.00 2.65 0.10 3.43 1.29 86.43 0 1 1
71 DENSE 768 768 1.00 5.06 0.05 6.63 1.31 89.73 0 0 1
75 DENSE 3072 768 4.00 3.56 0.04 9.53 2.68 84.05 1 0 0
78 DENSE 3072 768 4.00 3.63 0.02 8.08 2.23 81.48 0 0 0
84 DENSE 768 768 1.00 3.54 0.08 7.26 2.05 79.88 0 0 2
85 DENSE 768 768 1.00 3.26 0.09 5.47 1.68 81.09 0 0 2
86 DENSE 768 768 1.00 3.87 0.10 6.13 1.59 92.20 0 1 2
89 DENSE 768 768 1.00 3.48 0.06 4.87 1.40 92.88 0 0 1
93 DENSE 3072 768 4.00 3.39 0.03 9.21 2.72 83.49 1 0 0
96 DENSE 3072 768 4.00 4.43 0.04 9.36 2.12 87.36 0 0 0
102 DENSE 768 768 1.00 2.29 0.11 4.92 2.15 80.09 0 1 1
103 DENSE 768 768 1.00 2.61 0.10 4.17 1.60 83.35 0 1 2
104 DENSE 768 768 1.00 7.08 0.09 9.95 1.41 94.71 0 0 2
107 DENSE 768 768 1.00 4.90 0.10 7.08 1.44 92.84 0 0 1
111 DENSE 3072 768 4.00 3.34 0.02 8.77 2.62 82.85 1 0 0
114 DENSE 3072 768 4.00 4.69 0.08 10.82 2.31 82.21 0 0 0
118 DENSE 768 768 1.00 9.58 0.14 0.81 0.08 99.82 0 1 1

distilroberta-base Layer Plots
Layer 3
   Layer=3  |  N=768  |  M=514  |  Q=1.49  |  alpha=1.50  |  D_ks=0.10  |  alpha-hat=4.29  |  num traps=0









Layer 12
   Layer=12  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.35  |  D_ks=0.03  |  alpha-hat=7.06  |  num traps=0









Layer 13
   Layer=13  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.75  |  D_ks=0.07  |  alpha-hat=5.94  |  num traps=0









Layer 14
   Layer=14  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.78  |  D_ks=0.09  |  alpha-hat=3.02  |  num traps=0









Layer 17
   Layer=17  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.72  |  D_ks=0.08  |  alpha-hat=4.48  |  num traps=0









Layer 21
   Layer=21  |  N=3072  |  M=768  |  Q=4.00  |  alpha=2.80  |  D_ks=0.02  |  alpha-hat=7.46  |  num traps=1









Layer 24
   Layer=24  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.31  |  D_ks=0.03  |  alpha-hat=6.56  |  num traps=0









Layer 30
   Layer=30  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.76  |  D_ks=0.05  |  alpha-hat=8.30  |  num traps=0









Layer 31
   Layer=31  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.91  |  D_ks=0.04  |  alpha-hat=5.19  |  num traps=0









Layer 32
   Layer=32  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.33  |  D_ks=0.10  |  alpha-hat=3.28  |  num traps=0









Layer 35
   Layer=35  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.36  |  D_ks=0.08  |  alpha-hat=4.45  |  num traps=0









Layer 39
   Layer=39  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.58  |  D_ks=0.02  |  alpha-hat=10.05  |  num traps=1









Layer 42
   Layer=42  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.27  |  D_ks=0.07  |  alpha-hat=6.60  |  num traps=0









Layer 48
   Layer=48  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.92  |  D_ks=0.04  |  alpha-hat=8.85  |  num traps=0









Layer 49
   Layer=49  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.87  |  D_ks=0.09  |  alpha-hat=4.84  |  num traps=0









Layer 50
   Layer=50  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.15  |  D_ks=0.11  |  alpha-hat=6.57  |  num traps=0









Layer 53
   Layer=53  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.59  |  D_ks=0.08  |  alpha-hat=4.64  |  num traps=0









Layer 57
   Layer=57  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.69  |  D_ks=0.02  |  alpha-hat=9.97  |  num traps=1









Layer 60
   Layer=60  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.23  |  D_ks=0.07  |  alpha-hat=6.56  |  num traps=0









Layer 66
   Layer=66  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.19  |  D_ks=0.08  |  alpha-hat=7.32  |  num traps=0









Layer 67
   Layer=67  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.72  |  D_ks=0.05  |  alpha-hat=6.46  |  num traps=0









Layer 68
   Layer=68  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.65  |  D_ks=0.10  |  alpha-hat=3.43  |  num traps=0









Layer 71
   Layer=71  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.06  |  D_ks=0.05  |  alpha-hat=6.63  |  num traps=0









Layer 75
   Layer=75  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.56  |  D_ks=0.04  |  alpha-hat=9.53  |  num traps=1









Layer 78
   Layer=78  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.63  |  D_ks=0.02  |  alpha-hat=8.08  |  num traps=0









Layer 84
   Layer=84  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.54  |  D_ks=0.08  |  alpha-hat=7.26  |  num traps=0









Layer 85
   Layer=85  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.26  |  D_ks=0.09  |  alpha-hat=5.47  |  num traps=0









Layer 86
   Layer=86  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.87  |  D_ks=0.10  |  alpha-hat=6.13  |  num traps=0









Layer 89
   Layer=89  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.48  |  D_ks=0.06  |  alpha-hat=4.87  |  num traps=0









Layer 93
   Layer=93  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.39  |  D_ks=0.03  |  alpha-hat=9.21  |  num traps=1









Layer 96
   Layer=96  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.43  |  D_ks=0.04  |  alpha-hat=9.36  |  num traps=0









Layer 102
   Layer=102  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.29  |  D_ks=0.11  |  alpha-hat=4.92  |  num traps=0









Layer 103
   Layer=103  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.61  |  D_ks=0.10  |  alpha-hat=4.17  |  num traps=0









Layer 104
   Layer=104  |  N=768  |  M=768  |  Q=1.00  |  alpha=7.08  |  D_ks=0.09  |  alpha-hat=9.95  |  num traps=0









Layer 107
   Layer=107  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.90  |  D_ks=0.10  |  alpha-hat=7.08  |  num traps=0









Layer 111
   Layer=111  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.34  |  D_ks=0.02  |  alpha-hat=8.77  |  num traps=1









Layer 114
   Layer=114  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.69  |  D_ks=0.08  |  alpha-hat=10.82  |  num traps=0









Layer 118
   Layer=118  |  N=768  |  M=768  |  Q=1.00  |  alpha=9.58  |  D_ks=0.14  |  alpha-hat=0.81  |  num traps=0