xlnet-base-cased


Find this model in the BertXLNet model summary
xlnet-base-cased Model Summary Plots





xlnet-base-cased Model Selected Details
  layer_type N M Q alpha D alpha-hat log_SN % Rand num_traps num_fingers rank_loss
layer_id                        
1 EMBEDDING 32000 768 41.67 2.85 0.03 10.90 3.83 71.70 0 0 0
9 DENSE 3072 768 4.00 2.33 0.04 5.62 2.42 74.84 0 0 0
10 DENSE 3072 768 4.00 3.19 0.02 6.94 2.17 81.95 7 0 0
20 DENSE 3072 768 4.00 2.84 0.02 6.77 2.39 78.37 0 0 0
21 DENSE 3072 768 4.00 3.39 0.04 6.84 2.02 84.23 4 0 0
31 DENSE 3072 768 4.00 3.04 0.02 7.44 2.44 79.05 0 0 0
32 DENSE 3072 768 4.00 3.99 0.05 7.16 1.79 86.25 1 0 0
42 DENSE 3072 768 4.00 3.32 0.02 8.47 2.55 78.18 0 0 0
43 DENSE 3072 768 4.00 4.37 0.05 9.55 2.18 87.51 8 0 0
53 DENSE 3072 768 4.00 3.68 0.03 8.57 2.33 82.21 0 0 0
54 DENSE 3072 768 4.00 4.29 0.03 9.11 2.12 87.90 3 0 0
64 DENSE 3072 768 4.00 3.98 0.03 9.02 2.27 82.75 0 0 0
65 DENSE 3072 768 4.00 4.56 0.03 8.73 1.91 87.30 3 0 0
75 DENSE 3072 768 4.00 3.57 0.04 8.17 2.29 82.96 0 0 0
76 DENSE 3072 768 4.00 3.27 0.10 5.91 1.81 88.13 5 7 0
86 DENSE 3072 768 4.00 3.76 0.03 8.55 2.27 83.34 0 0 0
87 DENSE 3072 768 4.00 3.95 0.03 8.94 2.26 86.53 5 0 0
97 DENSE 3072 768 4.00 3.57 0.02 8.88 2.49 82.24 0 0 0
98 DENSE 3072 768 4.00 3.63 0.04 9.85 2.71 81.03 9 0 0
108 DENSE 3072 768 4.00 3.81 0.02 9.36 2.46 82.89 0 0 0
109 DENSE 3072 768 4.00 3.54 0.02 9.48 2.67 81.95 6 0 0
119 DENSE 3072 768 4.00 3.61 0.02 8.72 2.41 83.00 0 0 0
120 DENSE 3072 768 4.00 3.80 0.02 8.89 2.34 84.51 14 0 0
130 DENSE 3072 768 4.00 3.39 0.02 7.18 2.12 84.16 0 0 0
131 DENSE 3072 768 4.00 3.43 0.06 7.70 2.25 87.94 23 0 0

xlnet-base-cased Layer Plots
Layer 1
   Layer=1  |  N=32000  |  M=768  |  Q=41.67  |  alpha=2.85  |  D_ks=0.03  |  alpha-hat=10.90  |  num traps=0









Layer 9
   Layer=9  |  N=3072  |  M=768  |  Q=4.00  |  alpha=2.33  |  D_ks=0.04  |  alpha-hat=5.62  |  num traps=0









Layer 10
   Layer=10  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.19  |  D_ks=0.02  |  alpha-hat=6.94  |  num traps=7









Layer 20
   Layer=20  |  N=3072  |  M=768  |  Q=4.00  |  alpha=2.84  |  D_ks=0.02  |  alpha-hat=6.77  |  num traps=0









Layer 21
   Layer=21  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.39  |  D_ks=0.04  |  alpha-hat=6.84  |  num traps=4









Layer 31
   Layer=31  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.04  |  D_ks=0.02  |  alpha-hat=7.44  |  num traps=0









Layer 32
   Layer=32  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.99  |  D_ks=0.05  |  alpha-hat=7.16  |  num traps=1









Layer 42
   Layer=42  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.32  |  D_ks=0.02  |  alpha-hat=8.47  |  num traps=0









Layer 43
   Layer=43  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.37  |  D_ks=0.05  |  alpha-hat=9.55  |  num traps=8









Layer 53
   Layer=53  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.68  |  D_ks=0.03  |  alpha-hat=8.57  |  num traps=0









Layer 54
   Layer=54  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.29  |  D_ks=0.03  |  alpha-hat=9.11  |  num traps=3









Layer 64
   Layer=64  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.98  |  D_ks=0.03  |  alpha-hat=9.02  |  num traps=0









Layer 65
   Layer=65  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.56  |  D_ks=0.03  |  alpha-hat=8.73  |  num traps=3









Layer 75
   Layer=75  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.57  |  D_ks=0.04  |  alpha-hat=8.17  |  num traps=0









Layer 76
   Layer=76  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.27  |  D_ks=0.10  |  alpha-hat=5.91  |  num traps=5









Layer 86
   Layer=86  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.76  |  D_ks=0.03  |  alpha-hat=8.55  |  num traps=0









Layer 87
   Layer=87  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.95  |  D_ks=0.03  |  alpha-hat=8.94  |  num traps=5









Layer 97
   Layer=97  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.57  |  D_ks=0.02  |  alpha-hat=8.88  |  num traps=0









Layer 98
   Layer=98  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.63  |  D_ks=0.04  |  alpha-hat=9.85  |  num traps=9









Layer 108
   Layer=108  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.81  |  D_ks=0.02  |  alpha-hat=9.36  |  num traps=0









Layer 109
   Layer=109  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.54  |  D_ks=0.02  |  alpha-hat=9.48  |  num traps=6









Layer 119
   Layer=119  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.61  |  D_ks=0.02  |  alpha-hat=8.72  |  num traps=0









Layer 120
   Layer=120  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.80  |  D_ks=0.02  |  alpha-hat=8.89  |  num traps=14









Layer 130
   Layer=130  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.39  |  D_ks=0.02  |  alpha-hat=7.18  |  num traps=0









Layer 131
   Layer=131  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.43  |  D_ks=0.06  |  alpha-hat=7.70  |  num traps=23