docker compose build
docker compose up
git clone https://github.com/megagonlabs/jrte-corpus
python jrte/sentiment.py --model-name nlp-waseda/roberta-base-japanese
model |
batch_size |
initial_learning_rate |
gradient_accumulation_steps |
max_length |
bert-base-japanese-v2 |
32 |
5e-05 |
1 |
128 |
bert-base-japanese-v2(512) |
32 |
5e-05 |
1 |
512 |
bert-base-japanese-whole-word-masking |
32 |
5e-05 |
1 |
128 |
bert-large-japanese |
32 |
5e-05 |
1 |
128 |
bert-base-japanese-char-v2(512) |
16 |
7e-05 |
1 |
512 |
transformers-ud-japanese-electra-base-discriminator |
32 |
0.0001 |
1 |
128 |
transformers-ud-japanese-electra-base-discriminator(512) |
32 |
0.0001 |
1 |
512 |
roberta-base-japanese |
32 |
5e-05 |
1 |
128 |
roberta-base-japanese(512) |
32 |
5e-05 |
1 |
512 |
roberta-large-japanese |
16 |
2e-05 |
1 |
128 |
roberta-large-japanese(512) |
4 |
1e-05 |
1 |
512 |
electra-base-japanese-discriminator |
32 |
0.0001 |
1 |
128 |
chiTra-1.0 |
32 |
5e-05 |
1 |
128 |
model |
batch_size |
initial_learning_rate |
gradient_accumulation_steps |
max_length |
bert-base-japanese-v2 |
32 |
5e-05 |
1 |
128 |
bert-base-japanese-v2(512) |
32 |
5e-05 |
1 |
512 |
bert-base-japanese-whole-word-masking |
32 |
5e-05 |
1 |
128 |
bert-large-japanese |
32 |
5e-05 |
1 |
128 |
bert-large-japanese(512, batch_size=16) |
16 |
5e-05 |
1 |
512 |
bert-base-japanese-char-v2(512) |
16 |
7e-05 |
1 |
512 |
transformers-ud-japanese-electra-base-discriminator |
32 |
0.0001 |
1 |
128 |
transformers-ud-japanese-electra-base-discriminator(512) |
32 |
0.0001 |
1 |
512 |
roberta-base-japanese |
32 |
5e-05 |
1 |
128 |
roberta-base-japanese(512) |
32 |
5e-05 |
1 |
512 |
roberta-large-japanese |
16 |
2e-05 |
1 |
128 |
roberta-large-japanese(512) |
4 |
1e-05 |
1 |
512 |
electra-base-japanese-discriminator |
32 |
0.0001 |
1 |
128 |
chiTra-1.0 |
32 |
5e-05 |
1 |
128 |
chiTra-1.0(512) |
32 |
5e-05 |
1 |
512 |
model |
batch_size |
initial_learning_rate |
gradient_accumulation_steps |
max_length |
bert-base-japanese-v2 |
32 |
5e-05 |
1 |
128 |
bert-large-japanese |
16 |
2e-05 |
1 |
128 |
bert-base-japanese-char-v2(512) |
16 |
7e-05 |
1 |
512 |
transformers-ud-japanese-electra-base-discriminator |
32 |
0.0001 |
1 |
128 |
transformers-ud-japanese-electra-base-discriminator(512) |
16 |
0.0001 |
2 |
512 |
roberta-base-japanese |
32 |
1e-05 |
1 |
128 |
roberta-base-japanese(512) |
16 |
1e-05 |
2 |
512 |
roberta-large-japanese |
16 |
7e-06 |
1 |
128 |
roberta-large-japanese(512) |
4 |
5e-06 |
1 |
512 |
chiTra-1.0 |
32 |
5e-05 |
1 |
128 |
script: jrte/sentiment.py
model(max_len) |
macro-f1 |
weighted-f1 |
acc. |
cl-tohoku/bert-base-japanese-v2 |
0.8173 |
0.8559 |
0.8590 |
cl-tohoku/bert-base-japanese-v2(512) |
0.8438 |
0.8697 |
0.8716 |
cl-tohoku/bert-base-japanese-whole-word-masking |
0.8150 |
0.8444 |
0.8499 |
cl-tohoku/bert-large-japanese |
0.8026 |
0.8424 |
0.8463 |
cl-tohoku/bert-base-japanese-char-v2(512) |
0.8106 |
0.8543 |
0.8571 |
megagonlabs/transformers-ud-japanese-electra-base-discriminator |
0.8594 |
0.8889 |
0.8897 |
megagonlabs/transformers-ud-japanese-electra-base-discriminator(512) |
0.8530 |
0.8793 |
0.8807 |
nlp-waseda/roberta-base-japanese |
0.8496 |
0.8719 |
0.8770 |
nlp-waseda/roberta-base-japanese(512) |
0.8576 |
0.8832 |
0.8843 |
nlp-waseda/roberta-large-japanese |
0.8753 |
0.8928 |
0.8951 |
nlp-waseda/roberta-large-japanese(512) |
0.8572 |
0.8830 |
0.8843 |
izumi-lab/electra-base-japanese-discriminator |
0.8116 |
0.8422 |
0.8463 |
chiTra-1 |
0.8214 |
0.8594 |
0.8608 |
script: reputation.py
model(maxlen) |
macro-f1 |
weighted-f1 |
acc. |
cl-tohoku/bert-base-japanese-v2 |
0.8951 |
0.9023 |
0.9024 |
cl-tohoku/bert-base-japanese-v2(512) |
0.8959 |
0.9035 |
0.9042 |
cl-tohoku/bert-base-japanese-whole-word-masking |
0.8938 |
0.9016 |
0.9024 |
cl-tohoku/bert-large-japanese |
0.9017 |
0.9089 |
0.9096 |
cl-tohoku/bert-large-japanese(512, batch_size=16) |
0.8846 |
0.8928 |
0.8933 |
cl-tohoku/bert-base-japanese-char-v2(512) |
0.8816 |
0.8912 |
0.8933 |
megagonlabs/transformers-ud-japanese-electra-base-discriminator |
0.8915 |
0.8987 |
0.8987 |
megagonlabs/transformers-ud-japanese-electra-base-discriminator(512) |
0.8936 |
0.9015 |
0.9024 |
nlp-waseda/roberta-base-japanese |
0.8984 |
0.9056 |
0.9060 |
nlp-waseda/roberta-base-japanese(512) |
0.8933 |
0.9013 |
0.9024 |
nlp-waseda/roberta-large-japanese |
0.8918 |
0.9005 |
0.9024 |
nlp-waseda/roberta-large-japanese(512) |
0.9042 |
0.9110 |
0.9114 |
izumi-lab/electra-base-japanese-discriminator |
0.8917 |
0.8997 |
0.9005 |
chiTra-1 |
0.9054 |
0.9124 |
0.9132 |
chiTra-1(512) |
0.9052 |
0.9123 |
0.9132 |
script: entailment.py
model(maxlen) |
macro-f1 |
weighted-f1 |
acc. |
cl-tohoku/bert-base-japanese-v2 |
0.8847 |
0.8851 |
0.8850 |
cl-tohoku/bert-large-japanese |
0.8879 |
0.8883 |
0.8882 |
cl-tohoku/bert-base-japanese-char-v2(512) |
0.8813 |
0.8817 |
0.8815 |
megagonlabs/transformers-ud-japanese-electra-base-discriminator |
0.8967 |
0.8970 |
0.8969 |
megagonlabs/transformers-ud-japanese-electra-base-discriminator(512) |
0.8965 |
0.8968 |
0.8967 |
nlp-waseda/roberta-base-japanese |
0.9078 |
0.9080 |
0.9079 |
nlp-waseda/roberta-base-japanese(512) |
0.9031 |
0.9034 |
0.9032 |
nlp-waseda/roberta-large-japanese |
0.9031 |
0.9034 |
0.9032 |
nlp-waseda/roberta-large-japanese(512) |
0.9047 |
0.9051 |
0.9050 |
chiTra-1 |
0.8899 |
0.8903 |
0.8902 |