Spaces:
Runtime error
Runtime error
Fix CPU issues with device
Browse files
app.py
CHANGED
|
@@ -45,6 +45,20 @@ This Space demonstrates StyleRemix, a Llama 3 model with 8B parameters fine-tune
|
|
| 45 |
🔨 Looking for an even more powerful model? Check out the [13B version](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat) or the large [70B model demo](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI).
|
| 46 |
"""
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
# Load models
|
| 49 |
if not torch.cuda.is_available():
|
| 50 |
device = "cpu"
|
|
@@ -62,10 +76,12 @@ if torch.cuda.is_available():
|
|
| 62 |
# Load in the first model
|
| 63 |
model = PeftModel.from_pretrained(base_model, MODEL_PATHS[FIRST_MODEL], adapter_name=FIRST_MODEL).to(device)
|
| 64 |
# Load in the rest of the models
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
|
|
|
|
|
|
|
| 69 |
model.eval()
|
| 70 |
|
| 71 |
# Global variable to store the latest obfuscation result
|
|
@@ -136,8 +152,11 @@ def greet(input_text, length, function_words, grade_level, sarcasm, formality, v
|
|
| 136 |
input_length = inputs.input_ids.shape[1]
|
| 137 |
with torch.no_grad():
|
| 138 |
outputs = model.generate(**inputs, max_length=100, top_p = 0.95)
|
|
|
|
| 139 |
response = tokenizer.decode(outputs[0, input_length:], skip_special_tokens=True)
|
| 140 |
-
|
|
|
|
|
|
|
| 141 |
# Save the new obfuscation result and reset feedback
|
| 142 |
latest_obfuscation = {
|
| 143 |
"datetime": current_time,
|
|
|
|
| 45 |
🔨 Looking for an even more powerful model? Check out the [13B version](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat) or the large [70B model demo](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI).
|
| 46 |
"""
|
| 47 |
|
| 48 |
+
import subprocess
|
| 49 |
+
|
| 50 |
+
def print_nvidia_smi():
|
| 51 |
+
try:
|
| 52 |
+
# Run the nvidia-smi command
|
| 53 |
+
result = subprocess.run(['nvidia-smi'], capture_output=True, text=True, check=True)
|
| 54 |
+
print(result.stdout)
|
| 55 |
+
except subprocess.CalledProcessError as e:
|
| 56 |
+
# Handle errors in the subprocess
|
| 57 |
+
print(f"Failed to run nvidia-smi: {e}")
|
| 58 |
+
except FileNotFoundError:
|
| 59 |
+
# Handle the case where nvidia-smi is not installed
|
| 60 |
+
print("nvidia-smi is not installed or not in the PATH.")
|
| 61 |
+
|
| 62 |
# Load models
|
| 63 |
if not torch.cuda.is_available():
|
| 64 |
device = "cpu"
|
|
|
|
| 76 |
# Load in the first model
|
| 77 |
model = PeftModel.from_pretrained(base_model, MODEL_PATHS[FIRST_MODEL], adapter_name=FIRST_MODEL).to(device)
|
| 78 |
# Load in the rest of the models
|
| 79 |
+
for cur_adapter in MODEL_PATHS.keys():
|
| 80 |
+
if cur_adapter != FIRST_MODEL:
|
| 81 |
+
model.load_adapter(MODEL_PATHS[cur_adapter], adapter_name=cur_adapter)
|
| 82 |
|
| 83 |
+
# print(model.device) # Seems it re-allocates to CPU
|
| 84 |
+
model.to(device)
|
| 85 |
model.eval()
|
| 86 |
|
| 87 |
# Global variable to store the latest obfuscation result
|
|
|
|
| 152 |
input_length = inputs.input_ids.shape[1]
|
| 153 |
with torch.no_grad():
|
| 154 |
outputs = model.generate(**inputs, max_length=100, top_p = 0.95)
|
| 155 |
+
|
| 156 |
response = tokenizer.decode(outputs[0, input_length:], skip_special_tokens=True)
|
| 157 |
+
|
| 158 |
+
print_nvidia_smi() # Print GPU usage
|
| 159 |
+
|
| 160 |
# Save the new obfuscation result and reset feedback
|
| 161 |
latest_obfuscation = {
|
| 162 |
"datetime": current_time,
|