KangLiao commited on
Commit
2788c5c
·
1 Parent(s): eeb3295
app.py CHANGED
@@ -135,7 +135,6 @@ def generate_image(prompt_scene,
135
 
136
  prompt = prompt_scene + " " + prompt_camera
137
 
138
-
139
  bsz = 4
140
  with torch.no_grad():
141
  images, output_reasoning = model.generate(
@@ -159,7 +158,6 @@ def generate_image(prompt_scene,
159
  return ret_images
160
 
161
 
162
-
163
  # Gradio interface
164
  css = '''
165
  .gradio-container {max-width: 960px !important}
 
135
 
136
  prompt = prompt_scene + " " + prompt_camera
137
 
 
138
  bsz = 4
139
  with torch.no_grad():
140
  images, output_reasoning = model.generate(
 
158
  return ret_images
159
 
160
 
 
161
  # Gradio interface
162
  css = '''
163
  .gradio-container {max-width: 960px !important}
configs/models/qwen2_5_1_5b_radio_sd3_dynamic_puffin.py CHANGED
@@ -6,8 +6,9 @@ from src.models.radiov3.hf_model import RADIOModel
6
  from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
 
9
- llm_name_or_path = 'Qwen/Qwen2.5-1.5B-Instruct'
10
  sd3_model_name_or_path = "configs/sd3"
 
11
 
12
  prompt_template = dict(
13
  SYSTEM=('<|im_start|>system\n{system}<|im_end|>\n'),
@@ -68,14 +69,14 @@ model = dict(type=Qwen2p5RadioStableDiffusion3HFDynamic,
68
  freeze_visual_encoder=True,
69
  freeze_llm=True,
70
  llm=dict(
71
- type=AutoModelForCausalLM.from_pretrained,
72
  pretrained_model_name_or_path=llm_name_or_path,
73
  torch_dtype=torch.bfloat16,
74
  #local_files_only=True,
75
  #attn_implementation='flash_attention_2',
76
  ),
77
  tokenizer=dict(
78
- type=AutoTokenizer.from_pretrained,
79
  pretrained_model_name_or_path=llm_name_or_path,
80
  #local_files_only=True,
81
  ),
@@ -83,8 +84,8 @@ model = dict(type=Qwen2p5RadioStableDiffusion3HFDynamic,
83
  pretrained_pth=None,
84
  use_activation_checkpointing=False,
85
  visual_encoder=dict(
86
- type=RADIOModel.from_pretrained,
87
- pretrained_model_name_or_path="nvidia/C-RADIOv3-H",
88
  torch_dtype=torch.bfloat16,
89
  #local_files_only=True,
90
  ),
 
6
  from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
 
9
+ llm_name_or_path = 'configs/qwen2.5'
10
  sd3_model_name_or_path = "configs/sd3"
11
+ radiov3_model_name_or_path = "configs/radiov3"
12
 
13
  prompt_template = dict(
14
  SYSTEM=('<|im_start|>system\n{system}<|im_end|>\n'),
 
69
  freeze_visual_encoder=True,
70
  freeze_llm=True,
71
  llm=dict(
72
+ type=AutoModelForCausalLM.from_config,
73
  pretrained_model_name_or_path=llm_name_or_path,
74
  torch_dtype=torch.bfloat16,
75
  #local_files_only=True,
76
  #attn_implementation='flash_attention_2',
77
  ),
78
  tokenizer=dict(
79
+ type=AutoTokenizer.from_config,
80
  pretrained_model_name_or_path=llm_name_or_path,
81
  #local_files_only=True,
82
  ),
 
84
  pretrained_pth=None,
85
  use_activation_checkpointing=False,
86
  visual_encoder=dict(
87
+ type=RADIOModel.from_config,
88
+ pretrained_model_name_or_path=radiov3_model_name_or_path,
89
  torch_dtype=torch.bfloat16,
90
  #local_files_only=True,
91
  ),
configs/qwen2.5/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 1536,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 8960,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 21,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 12,
16
+ "num_hidden_layers": 28,
17
+ "num_key_value_heads": 2,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_theta": 1000000.0,
20
+ "sliding_window": 32768,
21
+ "tie_word_embeddings": true,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.43.1",
24
+ "use_cache": true,
25
+ "use_sliding_window": false,
26
+ "vocab_size": 151936
27
+ }
configs/qwen2.5/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "pad_token_id": 151643,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 151645,
7
+ 151643
8
+ ],
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_p": 0.8,
12
+ "top_k": 20,
13
+ "transformers_version": "4.37.0"
14
+ }
configs/qwen2.5/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
configs/qwen2.5/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
configs/qwen2.5/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
configs/radio3/config.json ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adaptor_configs": {},
3
+ "adaptor_names": null,
4
+ "architectures": [
5
+ "RADIOModel"
6
+ ],
7
+ "args": {
8
+ "aa": null,
9
+ "amp": true,
10
+ "amp_dtype": "bfloat16",
11
+ "amp_impl": "native",
12
+ "aug_repeats": 0,
13
+ "aug_splits": 0,
14
+ "bn_eps": null,
15
+ "bn_momentum": null,
16
+ "cache_dir": null,
17
+ "channels_last": false,
18
+ "checkpoint_hist": 10,
19
+ "chk_keep_forever": 100,
20
+ "class_map": "",
21
+ "clip_grad": null,
22
+ "clip_mode": "norm",
23
+ "cls_token_per_teacher": true,
24
+ "coco_annotations_file": "/datasets/coco2017-adlsa/annotations/captions_val2017.json",
25
+ "coco_image_dir": "/datasets/coco2017-adlsa/val2017",
26
+ "color_jitter": 0.4,
27
+ "cooldown_epochs": 0,
28
+ "cpe_max_size": 2048,
29
+ "cpe_num_registers": 4,
30
+ "crd_loss": false,
31
+ "crd_loss_weight": 0.8,
32
+ "crop_pct": null,
33
+ "cutmix": 0.0,
34
+ "cutmix_minmax": null,
35
+ "dataset_download": false,
36
+ "debug_full_knn": false,
37
+ "decay_epochs": 90,
38
+ "decay_milestones": [
39
+ 90,
40
+ 180,
41
+ 270
42
+ ],
43
+ "decay_rate": 0.1,
44
+ "depchain": true,
45
+ "detect_anomaly": false,
46
+ "dist_bn": "reduce",
47
+ "dist_norm_weight": 0.0,
48
+ "distributed": true,
49
+ "drop": 0.0,
50
+ "drop_block": null,
51
+ "drop_connect": null,
52
+ "drop_path": null,
53
+ "dtype": "float32",
54
+ "epoch_repeats": 0.0,
55
+ "eval": false,
56
+ "eval_metric": "knn_top1",
57
+ "eval_teacher": false,
58
+ "eval_teacher_only": false,
59
+ "eval_throughput": false,
60
+ "fast_norm": false,
61
+ "fd_loss_fn": "MSE",
62
+ "feature_normalization": "PHI_STANDARDIZE",
63
+ "feature_summarizer": "cls_token",
64
+ "feature_upscale_factor": null,
65
+ "force_new_wandb_id": false,
66
+ "force_spectral_reparam": false,
67
+ "freeze_bn": false,
68
+ "fsdp": true,
69
+ "full_equivariance": false,
70
+ "fuser": "",
71
+ "gp": null,
72
+ "grad_accum_steps": 1,
73
+ "grad_checkpointing": false,
74
+ "head_init_bias": null,
75
+ "head_init_scale": null,
76
+ "head_lr": null,
77
+ "head_warmup": 5,
78
+ "head_weight_decay": 0.01,
79
+ "hflip": 0.5,
80
+ "img_size": null,
81
+ "in_chans": null,
82
+ "initial_checkpoint": null,
83
+ "input_size": null,
84
+ "interpolation": "",
85
+ "layer_decay": null,
86
+ "local_rank": 0,
87
+ "log_interval": 50,
88
+ "log_mlflow": false,
89
+ "log_wandb": true,
90
+ "loss_auto_balance": false,
91
+ "lr_base": 0.1,
92
+ "lr_base_scale": "",
93
+ "lr_base_size": 256,
94
+ "lr_cycle_decay": 0.5,
95
+ "lr_cycle_limit": 1,
96
+ "lr_cycle_mul": 1.0,
97
+ "lr_k_decay": 1.0,
98
+ "lr_noise": null,
99
+ "lr_noise_pct": 0.67,
100
+ "lr_noise_std": 1.0,
101
+ "mean": null,
102
+ "mesa": false,
103
+ "min_lr": 0.0001,
104
+ "mixup": 0.0,
105
+ "mixup_mode": "batch",
106
+ "mixup_off_epoch": 0,
107
+ "mixup_prob": 1.0,
108
+ "mixup_switch_prob": 0.5,
109
+ "mlp_hidden_size": 2560,
110
+ "mlp_num_inner": 1,
111
+ "mlp_version": "v2",
112
+ "model": "vit_huge_patch16_224",
113
+ "model_kwargs": {},
114
+ "model_norm": false,
115
+ "momentum": 0.9,
116
+ "no_aug": false,
117
+ "no_custom_validation": false,
118
+ "no_ddp_bb": true,
119
+ "no_knn": false,
120
+ "no_prefetcher": false,
121
+ "no_resume_opt": false,
122
+ "num_classes": null,
123
+ "one_logger_app_tag": "",
124
+ "one_logger_is_baseline": false,
125
+ "one_logger_run_name": "",
126
+ "onelogger": null,
127
+ "opt_betas": null,
128
+ "opt_eps": null,
129
+ "patience_epochs": 10,
130
+ "pin_mem": false,
131
+ "prefetcher": true,
132
+ "pretrained": false,
133
+ "rank": 0,
134
+ "ratio": [
135
+ 0.75,
136
+ 1.3333333333333333
137
+ ],
138
+ "recount": 1,
139
+ "recovery_interval": 0,
140
+ "register_multiple": 0,
141
+ "remode": "pixel",
142
+ "reprob": 0.0,
143
+ "reset_loss_state": true,
144
+ "resplit": false,
145
+ "sample_tracking": false,
146
+ "save_images": false,
147
+ "scale": [
148
+ 0.5,
149
+ 1.0
150
+ ],
151
+ "sched": "cosine",
152
+ "seed": 42,
153
+ "shift_equivariance": true,
154
+ "smoothing": 0.1,
155
+ "spectral_heads": false,
156
+ "spectral_reparam": false,
157
+ "spectral_weight_decay": null,
158
+ "split_bn": false,
159
+ "start_epoch": null,
160
+ "std": null,
161
+ "stream_teachers": true,
162
+ "sync_bn": false,
163
+ "synchronize_step": false,
164
+ "teachers": [
165
+ {
166
+ "fd_normalize": false,
167
+ "feature_distillation": true,
168
+ "input_size": 378,
169
+ "model": "ViT-H-14-378-quickgelu",
170
+ "name": "clip",
171
+ "pretrained": "dfn5b",
172
+ "type": "open_clip",
173
+ "use_summary": true
174
+ },
175
+ {
176
+ "fd_normalize": false,
177
+ "feature_distillation": true,
178
+ "input_size": 384,
179
+ "model": "siglip2-g-384",
180
+ "name": "siglip2-g",
181
+ "type": "siglip2",
182
+ "use_summary": true
183
+ },
184
+ {
185
+ "fd_normalize": false,
186
+ "feature_distillation": true,
187
+ "input_size": 224,
188
+ "model": "dinov2_vitg14_reg",
189
+ "name": "dino_v2",
190
+ "type": "dino_v2",
191
+ "use_summary": true
192
+ },
193
+ {
194
+ "fd_normalize": false,
195
+ "feature_distillation": true,
196
+ "input_size": 1024,
197
+ "model": "vit-h",
198
+ "name": "sam",
199
+ "type": "sam",
200
+ "use_summary": false
201
+ }
202
+ ],
203
+ "torchcompile": null,
204
+ "torchscript": false,
205
+ "train_interpolation": "random",
206
+ "train_split": "train",
207
+ "tta": 0,
208
+ "use_coco": false,
209
+ "use_multi_epochs_loader": false,
210
+ "val_ema_only": false,
211
+ "val_split": "val",
212
+ "vflip": 0.0,
213
+ "vitdet_version": 1,
214
+ "wandb_entity": "",
215
+ "wandb_id": "",
216
+ "wandb_job_type": "",
217
+ "wandb_name": "",
218
+ "wandb_project": "",
219
+ "warmup_lr": 1e-05,
220
+ "warmup_prefix": false,
221
+ "worker_seeding": "all",
222
+ "workers": 8,
223
+ "world_size": 256
224
+ },
225
+ "auto_map": {
226
+ "AutoConfig": "hf_model.RADIOConfig",
227
+ "AutoModel": "hf_model.RADIOModel"
228
+ },
229
+ "feature_normalizer_config": null,
230
+ "inter_feature_normalizer_config": null,
231
+ "max_resolution": 2048,
232
+ "patch_size": 16,
233
+ "preferred_resolution": [
234
+ 512,
235
+ 512
236
+ ],
237
+ "torch_dtype": "float32",
238
+ "transformers_version": "4.51.3",
239
+ "version": "c-radio_v3-h",
240
+ "vitdet_window_size": null
241
+ }