distilroberta-base


Find this model in the RoBERTa model summary
distilroberta-base Model Summary Plots





distilroberta-base Model Selected Details
  layer_type N M Q alpha D alpha-hat log_SN rand_D num_traps stable_rank rank_loss
layer_id                        
2 EMBEDDING 50265 768 65.45 3.23 0.05 14.17 4.39 0.24 1 26.63 0.00
3 EMBEDDING 768 514 1.49 1.51 0.10 4.31 2.86 0.72 0 2.38 1.00
12 DENSE 768 768 1.00 3.31 0.04 6.99 2.11 0.13 0 49.61 1.00
13 DENSE 768 768 1.00 2.81 0.07 6.06 2.16 0.20 0 43.82 1.00
14 DENSE 768 768 1.00 2.71 0.12 2.95 1.09 0.13 0 76.31 1.00
17 DENSE 768 768 1.00 4.13 0.05 6.80 1.65 0.17 0 21.35 1.00
21 DENSE 3072 768 4.00 2.80 0.02 7.46 2.66 0.19 1 16.80 0.00
24 DENSE 3072 768 4.00 3.31 0.04 6.56 1.98 0.15 0 53.94 0.00
30 DENSE 768 768 1.00 3.76 0.05 8.30 2.21 0.17 0 18.81 2.00
31 DENSE 768 768 1.00 2.87 0.06 5.11 1.78 0.17 0 48.85 1.00
32 DENSE 768 768 1.00 7.00 0.08 9.86 1.41 0.15 0 53.78 0.00
35 DENSE 768 768 1.00 4.98 0.09 6.60 1.32 0.13 0 50.69 2.00
39 DENSE 3072 768 4.00 3.58 0.02 10.05 2.81 0.15 1 14.68 0.00
42 DENSE 3072 768 4.00 5.14 0.05 10.36 2.02 0.17 0 56.41 0.00
48 DENSE 768 768 1.00 3.92 0.04 8.85 2.25 0.17 0 18.28 1.00
49 DENSE 768 768 1.00 4.22 0.09 7.12 1.69 0.14 0 62.65 2.00
50 DENSE 768 768 1.00 5.85 0.06 9.25 1.58 0.16 0 41.63 1.00
53 DENSE 768 768 1.00 3.59 0.10 4.64 1.29 0.13 0 64.32 1.00
57 DENSE 3072 768 4.00 3.69 0.02 9.97 2.70 0.16 1 16.31 0.00
60 DENSE 3072 768 4.00 4.67 0.04 9.49 2.03 0.17 0 45.09 0.00
66 DENSE 768 768 1.00 4.04 0.09 7.06 1.75 0.14 0 60.62 1.00
67 DENSE 768 768 1.00 3.66 0.07 6.36 1.74 0.17 0 58.23 2.00
68 DENSE 768 768 1.00 8.71 0.08 11.27 1.29 0.13 0 69.27 1.00
71 DENSE 768 768 1.00 5.00 0.06 6.56 1.31 0.10 0 50.53 1.00
75 DENSE 3072 768 4.00 3.56 0.03 9.53 2.68 0.16 1 16.42 0.00
78 DENSE 3072 768 4.00 3.59 0.02 8.00 2.23 0.19 0 29.84 0.00
84 DENSE 768 768 1.00 3.54 0.08 7.26 2.05 0.20 0 26.40 2.00
85 DENSE 768 768 1.00 4.32 0.11 7.26 1.68 0.19 0 58.21 2.00
86 DENSE 768 768 1.00 5.18 0.06 8.21 1.59 0.08 0 35.18 2.00
89 DENSE 768 768 1.00 3.48 0.06 4.87 1.40 0.07 0 41.38 1.00
93 DENSE 3072 768 4.00 3.39 0.02 9.21 2.72 0.16 1 11.23 0.00
96 DENSE 3072 768 4.00 4.43 0.04 9.36 2.12 0.13 0 36.19 0.00
102 DENSE 768 768 1.00 4.33 0.07 9.31 2.15 0.20 0 19.56 1.00
103 DENSE 768 768 1.00 8.99 0.08 14.38 1.60 0.17 0 66.55 2.00
104 DENSE 768 768 1.00 7.08 0.09 9.95 1.41 0.05 0 63.27 2.00
107 DENSE 768 768 1.00 4.90 0.10 7.08 1.44 0.07 0 43.07 1.00
111 DENSE 3072 768 4.00 3.34 0.02 8.77 2.62 0.17 1 11.97 0.00
114 DENSE 3072 768 4.00 4.69 0.08 10.82 2.31 0.18 0 14.94 0.00
118 DENSE 768 768 1.00 9.92 0.18 0.84 0.08 0.00 0 193.83 1.00

distilroberta-base Layer Plots
Layer 2
   Layer=2  |  N=50265  |  M=768  |  Q=65.45  |  alpha=3.23  |  D_ks=0.05  |  alpha-hat=14.17  |  num traps=1









Layer 3
   Layer=3  |  N=768  |  M=514  |  Q=1.49  |  alpha=1.51  |  D_ks=0.10  |  alpha-hat=4.31  |  num traps=0









Layer 12
   Layer=12  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.31  |  D_ks=0.04  |  alpha-hat=6.99  |  num traps=0









Layer 13
   Layer=13  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.81  |  D_ks=0.07  |  alpha-hat=6.06  |  num traps=0









Layer 14
   Layer=14  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.71  |  D_ks=0.12  |  alpha-hat=2.95  |  num traps=0









Layer 17
   Layer=17  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.13  |  D_ks=0.05  |  alpha-hat=6.80  |  num traps=0









Layer 21
   Layer=21  |  N=3072  |  M=768  |  Q=4.00  |  alpha=2.80  |  D_ks=0.02  |  alpha-hat=7.46  |  num traps=1









Layer 24
   Layer=24  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.31  |  D_ks=0.04  |  alpha-hat=6.56  |  num traps=0









Layer 30
   Layer=30  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.76  |  D_ks=0.05  |  alpha-hat=8.30  |  num traps=0









Layer 31
   Layer=31  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.87  |  D_ks=0.06  |  alpha-hat=5.11  |  num traps=0









Layer 32
   Layer=32  |  N=768  |  M=768  |  Q=1.00  |  alpha=7.00  |  D_ks=0.08  |  alpha-hat=9.86  |  num traps=0









Layer 35
   Layer=35  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.98  |  D_ks=0.09  |  alpha-hat=6.60  |  num traps=0









Layer 39
   Layer=39  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.58  |  D_ks=0.02  |  alpha-hat=10.05  |  num traps=1









Layer 42
   Layer=42  |  N=3072  |  M=768  |  Q=4.00  |  alpha=5.14  |  D_ks=0.05  |  alpha-hat=10.36  |  num traps=0









Layer 48
   Layer=48  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.92  |  D_ks=0.04  |  alpha-hat=8.85  |  num traps=0









Layer 49
   Layer=49  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.22  |  D_ks=0.09  |  alpha-hat=7.12  |  num traps=0









Layer 50
   Layer=50  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.85  |  D_ks=0.06  |  alpha-hat=9.25  |  num traps=0









Layer 53
   Layer=53  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.59  |  D_ks=0.10  |  alpha-hat=4.64  |  num traps=0









Layer 57
   Layer=57  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.69  |  D_ks=0.02  |  alpha-hat=9.97  |  num traps=1









Layer 60
   Layer=60  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.67  |  D_ks=0.04  |  alpha-hat=9.49  |  num traps=0









Layer 66
   Layer=66  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.04  |  D_ks=0.09  |  alpha-hat=7.06  |  num traps=0









Layer 67
   Layer=67  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.66  |  D_ks=0.07  |  alpha-hat=6.36  |  num traps=0









Layer 68
   Layer=68  |  N=768  |  M=768  |  Q=1.00  |  alpha=8.71  |  D_ks=0.08  |  alpha-hat=11.27  |  num traps=0









Layer 71
   Layer=71  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.00  |  D_ks=0.06  |  alpha-hat=6.56  |  num traps=0









Layer 75
   Layer=75  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.56  |  D_ks=0.03  |  alpha-hat=9.53  |  num traps=1









Layer 78
   Layer=78  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.59  |  D_ks=0.02  |  alpha-hat=8.00  |  num traps=0









Layer 84
   Layer=84  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.54  |  D_ks=0.08  |  alpha-hat=7.26  |  num traps=0









Layer 85
   Layer=85  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.32  |  D_ks=0.11  |  alpha-hat=7.26  |  num traps=0









Layer 86
   Layer=86  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.18  |  D_ks=0.06  |  alpha-hat=8.21  |  num traps=0









Layer 89
   Layer=89  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.48  |  D_ks=0.06  |  alpha-hat=4.87  |  num traps=0









Layer 93
   Layer=93  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.39  |  D_ks=0.02  |  alpha-hat=9.21  |  num traps=1









Layer 96
   Layer=96  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.43  |  D_ks=0.04  |  alpha-hat=9.36  |  num traps=0









Layer 102
   Layer=102  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.33  |  D_ks=0.07  |  alpha-hat=9.31  |  num traps=0









Layer 103
   Layer=103  |  N=768  |  M=768  |  Q=1.00  |  alpha=8.99  |  D_ks=0.08  |  alpha-hat=14.38  |  num traps=0









Layer 104
   Layer=104  |  N=768  |  M=768  |  Q=1.00  |  alpha=7.08  |  D_ks=0.09  |  alpha-hat=9.95  |  num traps=0









Layer 107
   Layer=107  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.90  |  D_ks=0.10  |  alpha-hat=7.08  |  num traps=0









Layer 111
   Layer=111  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.34  |  D_ks=0.02  |  alpha-hat=8.77  |  num traps=1









Layer 114
   Layer=114  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.69  |  D_ks=0.08  |  alpha-hat=10.82  |  num traps=0









Layer 118
   Layer=118  |  N=768  |  M=768  |  Q=1.00  |  alpha=9.92  |  D_ks=0.18  |  alpha-hat=0.84  |  num traps=0