openai-gpt


Find this model in the GPT model summary
openai-gpt Model Summary Plots





openai-gpt Model Selected Details
  layer_type N M Q alpha D alpha-hat log_SN % Rand num_traps num_fingers rank_loss
layer_id                        
1 EMBEDDING 40478 768 52.71 3.00 0.04 15.02 5.01 63.52 0 0 0
2 EMBEDDING 768 512 1.50 1.38 0.07 3.14 2.27 26.70 0 0 0
7 CONV1D 2304 768 3.00 2.04 0.05 6.52 3.20 50.78 0 0 0
8 CONV1D 768 768 1.00 2.32 0.06 2.69 1.16 81.02 0 0 1
13 CONV1D 3072 768 4.00 4.01 0.06 10.67 2.66 84.48 0 0 0
14 CONV1D 3072 768 4.00 2.65 0.04 6.77 2.56 87.08 20 0 0
20 CONV1D 2304 768 3.00 4.77 0.05 9.48 1.99 90.13 0 0 0
21 CONV1D 768 768 1.00 4.51 0.04 4.99 1.11 90.66 0 0 1
26 CONV1D 3072 768 4.00 3.47 0.04 9.52 2.74 82.90 1 0 0
27 CONV1D 3072 768 4.00 3.37 0.05 10.29 3.05 83.44 24 0 0
32 CONV1D 2304 768 3.00 4.23 0.05 8.94 2.11 89.68 0 0 0
33 CONV1D 768 768 1.00 5.88 0.07 5.79 0.98 92.53 0 0 1
38 CONV1D 3072 768 4.00 3.76 0.04 9.93 2.64 84.72 1 0 0
39 CONV1D 3072 768 4.00 3.95 0.04 10.60 2.69 90.90 5 0 0
44 CONV1D 2304 768 3.00 4.98 0.04 9.33 1.87 91.46 0 0 0
45 CONV1D 768 768 1.00 7.09 0.04 6.49 0.92 94.37 0 0 1
50 CONV1D 3072 768 4.00 4.18 0.04 10.81 2.59 86.25 1 0 0
51 CONV1D 3072 768 4.00 3.81 0.05 11.61 3.05 87.68 4 0 0
56 CONV1D 2304 768 3.00 4.34 0.03 8.17 1.88 88.29 0 0 0
57 CONV1D 768 768 1.00 6.03 0.07 4.91 0.81 93.59 0 0 1
62 CONV1D 3072 768 4.00 4.19 0.03 10.85 2.59 86.52 1 0 0
63 CONV1D 3072 768 4.00 3.93 0.03 9.76 2.48 88.55 2 0 0
68 CONV1D 2304 768 3.00 4.16 0.04 7.64 1.84 87.33 0 0 0
69 CONV1D 768 768 1.00 6.06 0.09 4.56 0.75 92.31 0 0 1
74 CONV1D 3072 768 4.00 4.00 0.04 9.96 2.49 88.22 1 0 0
75 CONV1D 3072 768 4.00 3.89 0.03 9.58 2.46 87.21 5 0 0
80 CONV1D 2304 768 3.00 3.56 0.04 6.96 1.96 86.41 0 0 0
81 CONV1D 768 768 1.00 5.48 0.11 4.25 0.77 93.56 0 1 1
86 CONV1D 3072 768 4.00 4.07 0.03 9.88 2.42 87.79 1 0 0
87 CONV1D 3072 768 4.00 3.55 0.04 8.05 2.27 86.89 4 0 0
92 CONV1D 2304 768 3.00 3.82 0.03 7.03 1.84 88.14 0 0 0
93 CONV1D 768 768 1.00 3.50 0.13 3.24 0.93 92.69 0 4 1
98 CONV1D 3072 768 4.00 4.19 0.02 9.83 2.35 87.93 1 0 0
99 CONV1D 3072 768 4.00 3.89 0.04 10.86 2.79 83.15 3 0 0
104 CONV1D 2304 768 3.00 4.96 0.05 9.59 1.93 88.59 0 0 0
105 CONV1D 768 768 1.00 3.16 0.12 2.72 0.86 93.14 0 1 1
110 CONV1D 3072 768 4.00 4.09 0.02 9.69 2.37 90.29 1 0 0
111 CONV1D 3072 768 4.00 3.81 0.04 11.10 2.92 79.48 4 0 0
116 CONV1D 2304 768 3.00 4.50 0.03 8.71 1.94 89.28 0 0 0
117 CONV1D 768 768 1.00 8.26 0.13 6.61 0.80 95.02 0 1 1
122 CONV1D 3072 768 4.00 4.06 0.02 9.38 2.31 91.01 1 0 0
123 CONV1D 3072 768 4.00 4.16 0.04 9.69 2.33 91.10 1 0 0
128 CONV1D 2304 768 3.00 4.33 0.04 8.19 1.89 89.22 0 0 0
129 CONV1D 768 768 1.00 6.23 0.12 5.48 0.88 96.21 0 3 1
134 CONV1D 3072 768 4.00 4.10 0.02 8.90 2.17 90.36 1 0 0
135 CONV1D 3072 768 4.00 4.18 0.05 9.80 2.34 90.88 3 0 0
140 CONV1D 2304 768 3.00 3.55 0.03 7.17 2.02 87.02 0 0 0
141 CONV1D 768 768 1.00 4.17 0.11 6.75 1.62 88.45 1 0 0
146 CONV1D 3072 768 4.00 3.19 0.02 7.12 2.23 83.94 1 0 0
147 CONV1D 3072 768 4.00 4.13 0.02 8.82 2.14 88.94 11 0 0

openai-gpt Layer Plots
Layer 1
   Layer=1  |  N=40478  |  M=768  |  Q=52.71  |  alpha=3.00  |  D_ks=0.04  |  alpha-hat=15.02  |  num traps=0









Layer 2
   Layer=2  |  N=768  |  M=512  |  Q=1.50  |  alpha=1.38  |  D_ks=0.07  |  alpha-hat=3.14  |  num traps=0









Layer 7
   Layer=7  |  N=2304  |  M=768  |  Q=3.00  |  alpha=2.04  |  D_ks=0.05  |  alpha-hat=6.52  |  num traps=0









Layer 8
   Layer=8  |  N=768  |  M=768  |  Q=1.00  |  alpha=2.32  |  D_ks=0.06  |  alpha-hat=2.69  |  num traps=0









Layer 13
   Layer=13  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.01  |  D_ks=0.06  |  alpha-hat=10.67  |  num traps=0









Layer 14
   Layer=14  |  N=3072  |  M=768  |  Q=4.00  |  alpha=2.65  |  D_ks=0.04  |  alpha-hat=6.77  |  num traps=20









Layer 20
   Layer=20  |  N=2304  |  M=768  |  Q=3.00  |  alpha=4.77  |  D_ks=0.05  |  alpha-hat=9.48  |  num traps=0









Layer 21
   Layer=21  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.51  |  D_ks=0.04  |  alpha-hat=4.99  |  num traps=0









Layer 26
   Layer=26  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.47  |  D_ks=0.04  |  alpha-hat=9.52  |  num traps=1









Layer 27
   Layer=27  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.37  |  D_ks=0.05  |  alpha-hat=10.29  |  num traps=24









Layer 32
   Layer=32  |  N=2304  |  M=768  |  Q=3.00  |  alpha=4.23  |  D_ks=0.05  |  alpha-hat=8.94  |  num traps=0









Layer 33
   Layer=33  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.88  |  D_ks=0.07  |  alpha-hat=5.79  |  num traps=0









Layer 38
   Layer=38  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.76  |  D_ks=0.04  |  alpha-hat=9.93  |  num traps=1









Layer 39
   Layer=39  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.95  |  D_ks=0.04  |  alpha-hat=10.60  |  num traps=5









Layer 44
   Layer=44  |  N=2304  |  M=768  |  Q=3.00  |  alpha=4.98  |  D_ks=0.04  |  alpha-hat=9.33  |  num traps=0









Layer 45
   Layer=45  |  N=768  |  M=768  |  Q=1.00  |  alpha=7.09  |  D_ks=0.04  |  alpha-hat=6.49  |  num traps=0









Layer 50
   Layer=50  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.18  |  D_ks=0.04  |  alpha-hat=10.81  |  num traps=1









Layer 51
   Layer=51  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.81  |  D_ks=0.05  |  alpha-hat=11.61  |  num traps=4









Layer 56
   Layer=56  |  N=2304  |  M=768  |  Q=3.00  |  alpha=4.34  |  D_ks=0.03  |  alpha-hat=8.17  |  num traps=0









Layer 57
   Layer=57  |  N=768  |  M=768  |  Q=1.00  |  alpha=6.03  |  D_ks=0.07  |  alpha-hat=4.91  |  num traps=0









Layer 62
   Layer=62  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.19  |  D_ks=0.03  |  alpha-hat=10.85  |  num traps=1









Layer 63
   Layer=63  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.93  |  D_ks=0.03  |  alpha-hat=9.76  |  num traps=2









Layer 68
   Layer=68  |  N=2304  |  M=768  |  Q=3.00  |  alpha=4.16  |  D_ks=0.04  |  alpha-hat=7.64  |  num traps=0









Layer 69
   Layer=69  |  N=768  |  M=768  |  Q=1.00  |  alpha=6.06  |  D_ks=0.09  |  alpha-hat=4.56  |  num traps=0









Layer 74
   Layer=74  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.00  |  D_ks=0.04  |  alpha-hat=9.96  |  num traps=1









Layer 75
   Layer=75  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.89  |  D_ks=0.03  |  alpha-hat=9.58  |  num traps=5









Layer 80
   Layer=80  |  N=2304  |  M=768  |  Q=3.00  |  alpha=3.56  |  D_ks=0.04  |  alpha-hat=6.96  |  num traps=0









Layer 81
   Layer=81  |  N=768  |  M=768  |  Q=1.00  |  alpha=5.48  |  D_ks=0.11  |  alpha-hat=4.25  |  num traps=0









Layer 86
   Layer=86  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.07  |  D_ks=0.03  |  alpha-hat=9.88  |  num traps=1









Layer 87
   Layer=87  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.55  |  D_ks=0.04  |  alpha-hat=8.05  |  num traps=4









Layer 92
   Layer=92  |  N=2304  |  M=768  |  Q=3.00  |  alpha=3.82  |  D_ks=0.03  |  alpha-hat=7.03  |  num traps=0









Layer 93
   Layer=93  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.50  |  D_ks=0.13  |  alpha-hat=3.24  |  num traps=0









Layer 98
   Layer=98  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.19  |  D_ks=0.02  |  alpha-hat=9.83  |  num traps=1









Layer 99
   Layer=99  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.89  |  D_ks=0.04  |  alpha-hat=10.86  |  num traps=3









Layer 104
   Layer=104  |  N=2304  |  M=768  |  Q=3.00  |  alpha=4.96  |  D_ks=0.05  |  alpha-hat=9.59  |  num traps=0









Layer 105
   Layer=105  |  N=768  |  M=768  |  Q=1.00  |  alpha=3.16  |  D_ks=0.12  |  alpha-hat=2.72  |  num traps=0









Layer 110
   Layer=110  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.09  |  D_ks=0.02  |  alpha-hat=9.69  |  num traps=1









Layer 111
   Layer=111  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.81  |  D_ks=0.04  |  alpha-hat=11.10  |  num traps=4









Layer 116
   Layer=116  |  N=2304  |  M=768  |  Q=3.00  |  alpha=4.50  |  D_ks=0.03  |  alpha-hat=8.71  |  num traps=0









Layer 117
   Layer=117  |  N=768  |  M=768  |  Q=1.00  |  alpha=8.26  |  D_ks=0.13  |  alpha-hat=6.61  |  num traps=0









Layer 122
   Layer=122  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.06  |  D_ks=0.02  |  alpha-hat=9.38  |  num traps=1









Layer 123
   Layer=123  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.16  |  D_ks=0.04  |  alpha-hat=9.69  |  num traps=1









Layer 128
   Layer=128  |  N=2304  |  M=768  |  Q=3.00  |  alpha=4.33  |  D_ks=0.04  |  alpha-hat=8.19  |  num traps=0









Layer 129
   Layer=129  |  N=768  |  M=768  |  Q=1.00  |  alpha=6.23  |  D_ks=0.12  |  alpha-hat=5.48  |  num traps=0









Layer 134
   Layer=134  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.10  |  D_ks=0.02  |  alpha-hat=8.90  |  num traps=1









Layer 135
   Layer=135  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.18  |  D_ks=0.05  |  alpha-hat=9.80  |  num traps=3









Layer 140
   Layer=140  |  N=2304  |  M=768  |  Q=3.00  |  alpha=3.55  |  D_ks=0.03  |  alpha-hat=7.17  |  num traps=0









Layer 141
   Layer=141  |  N=768  |  M=768  |  Q=1.00  |  alpha=4.17  |  D_ks=0.11  |  alpha-hat=6.75  |  num traps=1









Layer 146
   Layer=146  |  N=3072  |  M=768  |  Q=4.00  |  alpha=3.19  |  D_ks=0.02  |  alpha-hat=7.12  |  num traps=1









Layer 147
   Layer=147  |  N=3072  |  M=768  |  Q=4.00  |  alpha=4.13  |  D_ks=0.02  |  alpha-hat=8.82  |  num traps=11