Qwen2.5-0.5B-quantized.w8a8 / evaluate_qwen2.5_w8a16.sh
alexmarques's picture
Upload folder using huggingface_hub
1a552f6 verified
source ~/environments/clearml/bin/activate
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 0fe5857173ac484a89316214b14fcf96 \
--clearml-model \
--queue-name oneshot-a100x2 \
--project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-72B-Instruct/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
<<END
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 6a4ecaa68a6e45ea80c62680b0a65aa0 \
--clearml-model \
--queue-name oneshot-a100x2 \
--project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-72B/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id c67be85bc77f462c93381280019dea1d \
--clearml-model \
--queue-name oneshot-a100x4 \
--project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-7B/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id effae214cc464181a92d5a57df10f3d6 \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-0.5B/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 21dd39ef3013401d84b258410647e847 \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-0.5B-Instruct/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 324acdd4c7c4426dbdfeb29667dc4b53 \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-1.5B/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 82cbaa6e27c84f08ac10e9f115034b0b \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-1.5B-Instruct/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 337f5b50610443c7ad2a380dce8e0be8 \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-3B/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id a8ebd9cae5324572906d50f95eeee5dd \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-3B-Instruct/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
python /cache/git/research/automation/evaluation_scripts/queue_lm_evaluation_harness_vllm.py \
--model-id 5190911e94a340988dac223c252e72a2 \
--clearml-model \
--queue-name oneshot-a100x1 \
--project-name "LLM quantization - W8A16/llmcompressor/Qwen2.5" \
--task-name "Qwen2.5-7B-Instruct/openllm/vllm" \
--benchmark-tasks openllm \
--max-model-len 4096 \
--add-bos-token \
--batch-size auto \
--enable-chunked-prefill \
--gpu-memory-utilization 0.9 \
--max-num-batched-tokens 256
END