Thetaphipsi's workspace
Runs
85
Name
15 visualized
Created
Runtime
GitHub
Notes
Tags
batch_size
dataset_type
lr
model
omit_embeddings
text_embedding_dimension
train_data
train_sz
val_data
val_sz
wandb_notes
warmup
wd
omit_eventsdata
model
step
train/batch_time
train/data_time
train/loss
train/lr
train/scale
val/image_to_text_R@1
val/image_to_text_R@10
val/image_to_text_R@5
val/image_to_text_mean_rank
val/image_to_text_median_rank
val/num_elements
val/text_to_image_R@1
val/text_to_image_R@10
val/text_to_image_R@5
val/text_to_image_mean_rank
val/text_to_image_median_rank
val/val_loss
16m 4s
45epochs
frozen
fulldata
lstmcnn
overfit
sentence-embedding
128
mimic-emb
0.014142
LSTMCNN-EMB
false
700
./data/mimic3/new_extended_data_unique_embed_s2v.pickle
17397
./data/mimic3/new_test_data_unique_embed_s2v.pickle
4204
SentVec
1500
0.001
-
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=7500, out_features=15000, bias=True)
(enc_relu): ReLU()
(enc_layernorm): LayerNorm((15000,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=15000, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=700, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
6040
0.035539
0.00023723
0.017164
0.0000019271
4.6052
0.24667
0.65461
0.52735
30.98906
5
4204
0.25904
0.66817
0.53592
30.14129
5
1.33804
33m 11s
CNEP
128
mimic-emb
0.014142
LSTMCNN-EMB
false
1600
./data/mimic3/new_extended_data_unique_embed_BioGPT_chunked_meanpooler_prepro.pickle
17397
./data/mimic3/new_test_data_unique_embed_BioGPT_chunked_meanpooler_prepro.pickle
4204
Sent2Vec diff model
1500
0.001
-
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=8400, out_features=16800, bias=True)
(enc_relu): ReLU()
(enc_layernorm2): LayerNorm((16800,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=16800, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm3): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=1600, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
13465
0.040609
0.00019789
0.014838
2.8012e-7
4.6052
0.19886
0.5628
0.44196
49.35276
8
4204
0.20052
0.56137
0.44458
49.95599
7
2.03417
15m 18s
CNEP
128
mimic-emb
0.014142
LSTMCNN-EMB
true
700
/home/thetaphipsi/MasterAI/src/CNEP/src/data/mimic3/new_extended_data_unique_embed_s2v.pickle
17397
/home/thetaphipsi/MasterAI/src/CNEP/src/data/mimic3/new_test_data_unique_embed_s2v.pickle
4204
Sent2Vec ablation study chart events vs discharge notes
1500
0.001
-
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=6800, out_features=13600, bias=True)
(enc_relu): ReLU()
(enc_layernorm2): LayerNorm((13600,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=13600, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm3): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=700, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
6040
0.031641
0.00021601
0.025118
0.0000019271
4.6052
0.18459
0.54068
0.4196
53.05352
8
4204
0.19101
0.55947
0.43744
49.60014
8
1.99335
16m 23s
45epochs
frozen
fulldata
lstmcnn
overfit
sentence-embedding
128
mimic-emb
0.014142
LSTMCNN-EMB
false
1280
./data/mimic3/new_extended_data_unique_embed_GPT-2_chunked_meanpooler_prepro.pickle
17397
./data/mimic3/new_test_data_unique_embed_GPT-2_chunked_meanpooler_prepro.pickle
4204
BioBERT chunked meanpooler prepro
1500
0.001
-
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=8080, out_features=16160, bias=True)
(enc_relu): ReLU()
(enc_layernorm): LayerNorm((16160,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=16160, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=1280, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
6040
0.037821
0.00022602
0.048871
0.0000019271
4.6052
0.16151
0.51522
0.39391
54.38963
10
4204
0.16508
0.5176
0.39058
54.17816
10
2.02438
16m 14s
45epochs
frozen
fulldata
lstmcnn
overfit
sentence-embedding
128
mimic-emb
0.014142
LSTMCNN-EMB
false
768
./data/mimic3/new_extended_data_unique_embed_CliBERT_2m_chunked_meanpooler_prepro.pickle
17397
./data/mimic3/new_test_data_unique_embed_CliBERT_2m_chunked_meanpooler_prepro.pickle
4204
CliBERT 2m chunked prepro
1500
0.001
-
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=7568, out_features=15136, bias=True)
(enc_relu): ReLU()
(enc_layernorm): LayerNorm((15136,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=15136, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=768, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
6040
0.040807
0.00026417
0.11089
0.0000019271
4.6052
0.11965
0.43768
0.31422
73.38796
14
4204
0.12464
0.43982
0.31613
71.85609
14
2.32895
11m 39s
CNEP
128
mimic-emb
0.014142
LSTMCNN-EMB
false
700
/home/thetaphipsi/MasterAI/src/CNEP/src/data/mimic3/new_extended_data_unique_embed_s2v.pickle
17397
/home/thetaphipsi/MasterAI/src/CNEP/src/data/mimic3/new_test_data_unique_embed_s2v.pickle
4204
Sent2Vec ablation study chart events vs discharge notes
1500
0.001
true
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=700, out_features=1400, bias=True)
(enc_relu): ReLU()
(enc_layernorm2): LayerNorm((1400,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=1400, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm3): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=700, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
6040
0.0090945
0.005337
0.016263
0.0000019271
4.6052
0.15081
0.41579
0.33135
191.66127
20
4204
0.15913
0.41817
0.33563
172.53568
19
3.75981
16m 15s
45epochs
frozen
fulldata
lstmcnn
overfit
sentence-embedding
128
mimic-emb
0.014142
LSTMCNN-EMB
false
768
./data/mimic3/new_extended_data_unique_embed_BioBERT_chunked_meanpooler_prepro.pickle
17397
./data/mimic3/new_test_data_unique_embed_BioBERT_chunked_meanpooler_prepro.pickle
4204
BioBERT chunked meanpooler prepro
1500
0.001
-
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=7568, out_features=15136, bias=True)
(enc_relu): ReLU()
(enc_layernorm): LayerNorm((15136,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=15136, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=768, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
6040
0.03611
0.0002315
0.11747
0.0000019271
4.6052
0.10871
0.4058
0.29686
85.40961
17
4204
0.10252
0.41365
0.29353
87.42103
16
2.44081
16m 8s
45epochs
frozen
fulldata
lstmcnn
overfit
sentence-embedding
128
mimic-emb
0.014142
LSTMCNN-EMB
false
768
./data/mimic3/new_extended_data_unique_embed_PubMedBERT_chunked_meanpooler_prepro.pickle
17397
./data/mimic3/new_test_data_unique_embed_PubMedBERT_chunked_meanpooler_prepro.pickle
4204
PubMedBERT chunked meanpooler prepro
1500
0.001
-
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=7568, out_features=15136, bias=True)
(enc_relu): ReLU()
(enc_layernorm): LayerNorm((15136,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=15136, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=768, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
6040
0.036162
0.00023532
0.16826
0.0000019271
4.6052
0.10228
0.38297
0.27783
96.07041
19
4204
0.097526
0.38368
0.2726
99.06494
19
2.77651
15m 35s
BERT
128
mimic-emb
0.014142
LSTMCNN-EMB
false
768
./data/mimic3/new_extended_data_unique_embed_RoBERTa_chunked_meanpooler_prepro.pickle
17397
./data/mimic3/new_test_data_unique_embed_RoBERTa_chunked_meanpooler_prepro.pickle
4204
RoBERTa chunked meanpooler prepro
1500
0.001
-
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=7568, out_features=15136, bias=True)
(enc_relu): ReLU()
(enc_layernorm): LayerNorm((15136,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=15136, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=768, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
6040
0.037142
0.00026393
0.22047
0.0000019271
4.6052
0.094196
0.37274
0.26665
93.63511
20
4204
0.09372
0.37536
0.26047
99.65366
20
2.46251
15m 48s
45epochs
frozen
fulldata
lstmcnn
overfit
sentence-embedding
128
mimic-emb
0.014142
LSTMCNN-EMB
false
768
./data/mimic3/new_extended_data_unique_embed_BERT_chunked_meanpooler_prepro.pickle
17397
./data/mimic3/new_test_data_unique_embed_BERT_chunked_meanpooler_prepro.pickle
4204
BERT chunked meanpooler prepro
1500
0.001
-
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=7568, out_features=15136, bias=True)
(enc_relu): ReLU()
(enc_layernorm): LayerNorm((15136,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=15136, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=768, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
6040
0.033894
0.00029445
0.17763
0.0000019271
4.6052
0.089676
0.34182
0.24215
121.67555
24
4204
0.084443
0.34277
0.23906
121.43601
25
3.04979
15m 34s
45epochs
frozen
fulldata
lstmcnn
overfit
sentence-embedding
128
mimic-emb
0.00014142
LSTMCNN-EMB
false
768
./data/mimic3/new_extended_data_unique_embed_Doc2Vec_prepro.pickle
17397
./data/mimic3/new_test_data_unique_embed_Doc2Vec_prepro.pickle
4204
Doc2Vec chunked prepro
1500
0.001
-
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=7568, out_features=15136, bias=True)
(enc_relu): ReLU()
(enc_layernorm): LayerNorm((15136,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=15136, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=768, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
6040
0.034011
0.00020409
1.05745
1.9271e-8
3.08444
0.072788
0.27973
0.19244
122.04424
34
4204
0.072074
0.30733
0.21813
116.94767
29
2.00899
15m 10s
CNEP
128
mimic-emb
0.014142
LSTMCNN-EMB
true
768
/home/thetaphipsi/MasterAI/src/CNEP/src/data/mimic3/new_extended_data_unique_embed_BERT_chunked_meanpooler_prepro.pickle
17397
/home/thetaphipsi/MasterAI/src/CNEP/src/data/mimic3/new_test_data_unique_embed_BERT_chunked_meanpooler_prepro.pickle
4204
Sent2Vec ablation study chart events vs discharge notes
1500
0.001
-
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=6800, out_features=13600, bias=True)
(enc_relu): ReLU()
(enc_layernorm2): LayerNorm((13600,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=13600, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm3): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=768, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
6040
0.032053
0.00023556
0.29228
0.0000019271
4.4824
0.067079
0.28806
0.19458
160.81827
36
4204
0.061132
0.29044
0.18863
162.55971
35
3.38256
11m 45s
CNEP
128
mimic-emb
0.014142
LSTMCNN-EMB
false
768
/home/thetaphipsi/MasterAI/src/CNEP/src/data/mimic3/new_extended_data_unique_embed_BERT_chunked_meanpooler_prepro.pickle
17397
/home/thetaphipsi/MasterAI/src/CNEP/src/data/mimic3/new_test_data_unique_embed_BERT_chunked_meanpooler_prepro.pickle
4204
Sent2Vec ablation study chart events vs discharge notes
1500
0.001
true
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=768, out_features=1536, bias=True)
(enc_relu): ReLU()
(enc_layernorm2): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=1536, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm3): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=768, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
6040
0.010173
0.0064168
0.88408
0.0000019271
4.21436
0.02236
0.12964
0.081827
436.11893
143
4204
0.022598
0.12821
0.080637
417.84967
146
4.93538
15m 38s
45epochs
frozen
fulldata
lstmcnn
overfit
sentence-embedding
128
mimic-emb
0.014142
LSTMCNN-EMB
false
768
./data/mimic3/new_extended_data_unique_embed_BioELECTRA_chunked_meanpooler_prepro.pickle
17397
./data/mimic3/new_test_data_unique_embed_BioELECTRA_chunked_meanpooler_prepro.pickle
4204
BioELECTRA chunked meanpooler prepro
1500
0.001
-
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=7568, out_features=15136, bias=True)
(enc_relu): ReLU()
(enc_layernorm): LayerNorm((15136,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=15136, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): NotesDataEncoder(
(encoder): Sequential(
(0): Sequential(
(0): Linear(in_features=768, out_features=15000, bias=True)
(1): ReLU()
)
(1): Sequential(
(0): Linear(in_features=15000, out_features=1024, bias=True)
(1): ReLU()
)
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
6040
0.036194
0.00022697
1.16937
0.0000019271
4.32808
0.01451
0.10252
0.057802
341.96503
135
4204
0.013559
0.090866
0.051618
368.42364
137
4.39918
45m 22s
45epochs
frozen
fulldata
lstmcnn
overfit
sentence-embedding
64
mimic
0.000014142
LSTMCNN
-
1
./data/mimic3/new_extended_data_unique.pickle
18184
./data/mimic3/new_test_data_unique.pickle
4424
baseline
1500
0.000005
-
CLIP(
(visual): EventsDataEncoder(
(lstm1): LSTMNew(
390, 512, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(do0): Dropout(p=0.3, inplace=False)
(lstm2): LSTMNew(
1024, 1024, batch_first=True, dropout=0.2
(input_drop): VariationalDropout()
(output_drop): VariationalDropout()
)
(cnn1): Sequential(
(cnn1_conv1d): Conv1d(1024, 100, kernel_size=(2,), stride=(1,))
(cnn1_relu): ReLU()
(cnn1_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn1_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn2): Sequential(
(cnn2_conv1d): Conv1d(1024, 100, kernel_size=(3,), stride=(1,))
(cnn2_relu): ReLU()
(cnn2_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn2_flatten): Flatten(start_dim=1, end_dim=-1)
)
(cnn3): Sequential(
(cnn3_conv1d): Conv1d(1024, 100, kernel_size=(4,), stride=(1,))
(cnn3_relu): ReLU()
(cnn3_maxpool1d): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(cnn3_flatten): Flatten(start_dim=1, end_dim=-1)
)
(encoder): Sequential(
(enc_fc1): Linear(in_features=6801, out_features=13602, bias=True)
(enc_relu): ReLU()
(enc_layernorm): LayerNorm((13602,), eps=1e-05, elementwise_affine=True)
(enc_fc2): Linear(in_features=13602, out_features=1024, bias=True)
(enc_relu2): ReLU()
(enc_layernorm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(do2): Dropout(p=0.2, inplace=False)
)
(transformer): Transformer(
(resblocks): Sequential(
(0): ResidualAttentionBlock(
(attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(mlp): Sequential(
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
(gelu): QuickGELU()
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
)
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(1): ResidualAttentionBlock(
(attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(mlp): Sequential(
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
(gelu): QuickGELU()
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
)
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(2): ResidualAttentionBlock(
(attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(mlp): Sequential(
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
(gelu): QuickGELU()
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
)
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(3): ResidualAttentionBlock(
(attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(mlp): Sequential(
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
(gelu): QuickGELU()
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
)
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(4): ResidualAttentionBlock(
(attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(mlp): Sequential(
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
(gelu): QuickGELU()
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
)
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(5): ResidualAttentionBlock(
(attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(mlp): Sequential(
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
(gelu): QuickGELU()
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
)
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(6): ResidualAttentionBlock(
(attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(mlp): Sequential(
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
(gelu): QuickGELU()
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
)
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(7): ResidualAttentionBlock(
(attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(mlp): Sequential(
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
(gelu): QuickGELU()
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
)
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(8): ResidualAttentionBlock(
(attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(mlp): Sequential(
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
(gelu): QuickGELU()
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
)
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(9): ResidualAttentionBlock(
(attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(mlp): Sequential(
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
(gelu): QuickGELU()
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
)
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(10): ResidualAttentionBlock(
(attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(mlp): Sequential(
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
(gelu): QuickGELU()
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
)
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(11): ResidualAttentionBlock(
(attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(mlp): Sequential(
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
(gelu): QuickGELU()
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
)
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
)
)
(token_embedding): Embedding(49408, 512)
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
12696
0.12425
0.049664
4.16043
1.8892e-9
2.68317
0.00045208
0.0040687
0.0022604
2204.7561
2205
4424
0.00022604
0.0024864
0.0013562
2205.71361
2204
4.84185
1-15
of 15