AngelRaychev
/

1.5B-value-iteration_2

Text Generation

text-generation-inference

Model card Files Files and versions

AngelRaychev commited on Jun 9

Commit

1bbdbcd

·

verified ·

1 Parent(s): e9986ba

Upload Qwen2ForCausalLM

Files changed (2) hide show

config.json +1 -1
model.safetensors +1 -1

config.json CHANGED Viewed

@@ -11,7 +11,7 @@
   "intermediate_size": 8960,
   "max_position_embeddings": 131072,
   "max_window_layers": 28,
-  "model_card": "\nFinal Loss: 0.6147\nBatch Size: 256\nLearning Rate: 1e-05\nDataset Size: 15397\n",
   "model_type": "qwen2",
   "num_attention_heads": 12,
   "num_hidden_layers": 28,

   "intermediate_size": 8960,
   "max_position_embeddings": 131072,
   "max_window_layers": 28,
+  "model_card": "\nFinal Loss: 0.1743\nBatch Size: 1024\nLearning Rate: 2e-05\nDataset Size: 59966\n",
   "model_type": "qwen2",
   "num_attention_heads": 12,
   "num_hidden_layers": 28,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8efa318cd0c7810423ba9d9f681e05cab9392d83709b1d367cdac6461a4f7b83
 size 3087467144

 version https://git-lfs.github.com/spec/v1
+oid sha256:445dc17adf8376ba3a2b182fd1a5d8ca2b02b496925586f4fa2e3f80c0ca791b
 size 3087467144