Upload folder using huggingface_hub
Browse files- config.yml +8 -14
config.yml
CHANGED
|
@@ -1,15 +1,6 @@
|
|
| 1 |
Image:
|
| 2 |
from_base: parachutes/python:3.12
|
| 3 |
run_command:
|
| 4 |
-
# - USER root
|
| 5 |
-
# - apt-get update
|
| 6 |
-
# - apt-get install -y --no-install-recommends wget gnupg2
|
| 7 |
-
# - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
| 8 |
-
# - dpkg -i cuda-keyring_1.1-1_all.deb
|
| 9 |
-
# - apt-get update
|
| 10 |
-
# - apt-get install -y --no-install-recommends tensorrt python3-libnvinfer-dev
|
| 11 |
-
# - rm -rf /var/lib/apt/lists/*
|
| 12 |
-
# - USER 1000
|
| 13 |
- pip install --upgrade setuptools wheel
|
| 14 |
- pip install huggingface_hub==0.19.4 requests opencv-python-headless pydantic onnxruntime onnxruntime-gpu scikit-learn tensorflow torch-tensorrt==2.7 torch==2.7.1 torchvision==0.22.1 pyyaml
|
| 15 |
|
|
@@ -19,7 +10,11 @@ NodeSelector:
|
|
| 19 |
gpu_count: 1
|
| 20 |
min_vram_gb_per_gpu: 24
|
| 21 |
# include:
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
exclude:
|
| 24 |
- h100
|
| 25 |
- a100
|
|
@@ -41,7 +36,6 @@ NodeSelector:
|
|
| 41 |
|
| 42 |
Chute:
|
| 43 |
timeout_seconds: 300
|
| 44 |
-
concurrency:
|
| 45 |
-
max_instances:
|
| 46 |
-
scaling_threshold: 0.
|
| 47 |
-
allow_external_egress: true
|
|
|
|
| 1 |
Image:
|
| 2 |
from_base: parachutes/python:3.12
|
| 3 |
run_command:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
- pip install --upgrade setuptools wheel
|
| 5 |
- pip install huggingface_hub==0.19.4 requests opencv-python-headless pydantic onnxruntime onnxruntime-gpu scikit-learn tensorflow torch-tensorrt==2.7 torch==2.7.1 torchvision==0.22.1 pyyaml
|
| 6 |
|
|
|
|
| 10 |
gpu_count: 1
|
| 11 |
min_vram_gb_per_gpu: 24
|
| 12 |
# include:
|
| 13 |
+
# - a100
|
| 14 |
+
# - a100_40gb
|
| 15 |
+
# - "3090"
|
| 16 |
+
# - a40
|
| 17 |
+
# - a6000
|
| 18 |
exclude:
|
| 19 |
- h100
|
| 20 |
- a100
|
|
|
|
| 36 |
|
| 37 |
Chute:
|
| 38 |
timeout_seconds: 300
|
| 39 |
+
concurrency: 4 # Reduced concurrency to limit memory usage
|
| 40 |
+
max_instances: 5 # Reduced max instances to limit memory usage
|
| 41 |
+
scaling_threshold: 0.5 # Higher threshold to reduce scaling frequency
|
|
|