| source ~/environments/clearml/bin/activate | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 0fe5857173ac484a89316214b14fcf96 \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x2 \ | |
| --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-72B-Instruct/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| <<END | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 6a4ecaa68a6e45ea80c62680b0a65aa0 \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x2 \ | |
| --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-72B/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id c67be85bc77f462c93381280019dea1d \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x4 \ | |
| --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-7B/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id effae214cc464181a92d5a57df10f3d6 \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-0.5B/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 21dd39ef3013401d84b258410647e847 \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-0.5B-Instruct/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 324acdd4c7c4426dbdfeb29667dc4b53 \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-1.5B/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 82cbaa6e27c84f08ac10e9f115034b0b \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-1.5B-Instruct/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 337f5b50610443c7ad2a380dce8e0be8 \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-3B/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id a8ebd9cae5324572906d50f95eeee5dd \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-3B-Instruct/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \ | |
| --model-id 5190911e94a340988dac223c252e72a2 \ | |
| --clearml-model \ | |
| --queue-name oneshot-a100x1 \ | |
| --project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \ | |
| --task-name "Qwen2.5-7B-Instruct/openllm/vllm" \ | |
| --benchmark-tasks openllm \ | |
| --max-model-len 4096 \ | |
| --add-bos-token \ | |
| --batch-size auto \ | |
| --enable-chunked-prefill \ | |
| --gpu-memory-utilization 0.9 \ | |
| --max-num-batched-tokens 256 | |
| END |