Upload 6 files

Browse files

Files changed (7) hide show

.gitattributes +1 -0
config.json +66 -0
vocabulary/.lock +0 -0
vocabulary/labels.txt +53 -0
vocabulary/non_padded_namespaces.txt +2 -0
vocabulary/tokens.txt +0 -0
weights.th +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+weights.th filter=lfs diff=lfs merge=lfs -text

config.json ADDED Viewed

	@@ -0,0 +1,66 @@

+{
+    "dataset_reader": {
+        "type": "text_classification_json_utf8",
+        "token_indexers": {
+            "tokens": {
+                "type": "single_id",
+                "namespace": "tokens",
+                "lowercase_tokens": true,
+                "token_min_padding_length": 5
+            }
+        },
+        "tokenizer": {
+            "type": "pretrained_transformer",
+            "model_name": "/data/tianxing/PycharmProjects/AllenNLP/pretrained_models/huggingface/google-bert/bert-base-multilingual-uncased"
+        },
+        "max_sequence_length": 256
+    },
+    "train_data_path": "train.json",
+    "validation_data_path": "valid.json",
+    "vocabulary": {
+        "directory_path": "vocabulary"
+    },
+    "model": {
+        "type": "basic_classifier",
+        "text_field_embedder": {
+            "token_embedders": {
+                "tokens": {
+                    "type": "embedding",
+                    "num_embeddings": 105880,
+                    "embedding_dim": 64
+                }
+            }
+        },
+        "seq2seq_encoder": {
+            "type": "pass_through",
+            "input_dim": 64
+        },
+        "seq2vec_encoder": {
+            "type": "bag_of_embeddings",
+            "embedding_dim": 64
+        }
+    },
+    "data_loader": {
+        "type": "multiprocess",
+        "batch_size": 64,
+        "shuffle": true
+    },
+    "trainer": {
+        "type": "gradient_descent",
+        "cuda_device": -1,
+        "optimizer": {
+            "type": "bert_adam",
+            "lr": 5e-05,
+            "warmup": 0.1,
+            "t_total": 50000,
+            "schedule": "warmup_linear"
+        },
+        "checkpointer": {
+            "serialization_dir": "serialization_dir",
+            "keep_most_recent_by_count": 10
+        },
+        "patience": 5,
+        "validation_metric": "+accuracy",
+        "num_epochs": 100
+    }
+}

vocabulary/.lock ADDED Viewed

File without changes

vocabulary/labels.txt ADDED Viewed

	@@ -0,0 +1,53 @@

+en
+zh
+ar
+bg
+cs
+da
+de
+el
+es
+et
+fi
+fr
+ga
+hi
+is
+it
+ja
+ko
+lt
+lv
+mt
+nl
+no
+pl
+pt
+ro
+ru
+sk
+sl
+sw
+sv
+th
+tn
+tr
+ts
+ur
+vi
+tl
+hr
+eo
+uk
+mr
+bn
+af
+zu
+id
+fo
+bs
+yo
+hi_en
+hu
+gl
+hy

vocabulary/non_padded_namespaces.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *tags
2	+ *labels

vocabulary/tokens.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

weights.th ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e497bc5b8d85a72a9bf97634af0502c6af0c0a6bc792e4d237667a49a33583d8
+size 27120547