mateoguaman commited on
Commit
7c22a47
·
verified ·
1 Parent(s): 23a9878

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +5 -0
  2. config.json +55 -0
  3. eval_results.json +7 -0
  4. training.log +3 -0
all_results.json CHANGED
@@ -1,4 +1,9 @@
1
  {
 
 
 
 
 
2
  "total_flos": 1.355217425276076e+18,
3
  "train_loss": 2.8033093200847907,
4
  "train_runtime": 20478.1713,
 
1
  {
2
+ "eval_loss": 3.6843419075012207,
3
+ "eval_runtime": 378.7783,
4
+ "eval_samples": 15692,
5
+ "eval_samples_per_second": 41.428,
6
+ "eval_steps_per_second": 1.296,
7
  "total_flos": 1.355217425276076e+18,
8
  "train_loss": 2.8033093200847907,
9
  "train_runtime": 20478.1713,
config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "_name_or_path": "google/paligemma2-3b-pt-224",
4
+ "_vocab_size": 257152,
5
+ "architectures": [
6
+ "PaliGemmaForConditionalGeneration"
7
+ ],
8
+ "bos_token_id": 2,
9
+ "eos_token_id": 1,
10
+ "hidden_size": 2048,
11
+ "image_token_index": 257152,
12
+ "model_type": "paligemma",
13
+ "num_hidden_layers": 26,
14
+ "pad_token_id": 0,
15
+ "projection_dim": 2304,
16
+ "text_config": {
17
+ "architectures": [
18
+ "Gemma2ForCausalLM"
19
+ ],
20
+ "attn_logit_softcapping": 50.0,
21
+ "cache_implementation": "hybrid",
22
+ "eos_token_id": [
23
+ 1,
24
+ 107
25
+ ],
26
+ "final_logit_softcapping": 30.0,
27
+ "hidden_act": "gelu_pytorch_tanh",
28
+ "hidden_activation": "gelu_pytorch_tanh",
29
+ "hidden_size": 2304,
30
+ "intermediate_size": 9216,
31
+ "model_type": "gemma2",
32
+ "num_hidden_layers": 26,
33
+ "num_image_tokens": 256,
34
+ "num_key_value_heads": 4,
35
+ "query_pre_attn_scalar": 256,
36
+ "sliding_window": 4096,
37
+ "torch_dtype": "bfloat16",
38
+ "vocab_size": 257216
39
+ },
40
+ "torch_dtype": "bfloat16",
41
+ "transformers_version": "4.49.0",
42
+ "vision_config": {
43
+ "hidden_size": 1152,
44
+ "intermediate_size": 4304,
45
+ "model_type": "siglip_vision_model",
46
+ "num_attention_heads": 16,
47
+ "num_hidden_layers": 27,
48
+ "num_image_tokens": 256,
49
+ "num_positions": 256,
50
+ "patch_size": 14,
51
+ "projection_dim": 2304,
52
+ "torch_dtype": "bfloat16",
53
+ "vision_use_head": false
54
+ }
55
+ }
eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_loss": 3.6843419075012207,
3
+ "eval_runtime": 378.7783,
4
+ "eval_samples": 15692,
5
+ "eval_samples_per_second": 41.428,
6
+ "eval_steps_per_second": 1.296
7
+ }
training.log CHANGED
@@ -182,3 +182,6 @@ Navigate to x=<loc0007>, y=<loc0256>
182
  2025-04-26 07:36:05 - INFO - __main__ - *** Train ***
183
  2025-04-26 13:17:27 - WARNING - huggingface_hub.hf_api - No files have been modified since last commit. Skipping to prevent empty commit.
184
  2025-04-26 13:17:27 - INFO - __main__ - *** Save model ***
 
 
 
 
182
  2025-04-26 07:36:05 - INFO - __main__ - *** Train ***
183
  2025-04-26 13:17:27 - WARNING - huggingface_hub.hf_api - No files have been modified since last commit. Skipping to prevent empty commit.
184
  2025-04-26 13:17:27 - INFO - __main__ - *** Save model ***
185
+ 2025-04-26 13:17:33 - INFO - __main__ - Model saved to data/paligemma2-3b-pt-224-sft-lora-iphonecf-only_rdp
186
+ 2025-04-26 13:17:33 - INFO - __main__ - *** Evaluate ***
187
+ 2025-04-26 13:23:52 - INFO - __main__ - Pushing to hub...