Spaces:

hallisky
/

StyleRemix

Runtime error

App Files Files Community

hallisky commited on Jun 25, 2024

Commit

de1b0d5

1 Parent(s): 63b741b

Fix CPU issues with device

Browse files

Files changed (1) hide show

app.py +23 -4

app.py CHANGED Viewed

@@ -45,6 +45,20 @@ This Space demonstrates StyleRemix, a Llama 3 model with 8B parameters fine-tune
 🔨 Looking for an even more powerful model? Check out the [13B version](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat) or the large [70B model demo](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI).
 """
 # Load models
 if not torch.cuda.is_available():
     device = "cpu"
@@ -62,10 +76,12 @@ if torch.cuda.is_available():
     # Load in the first model
     model = PeftModel.from_pretrained(base_model, MODEL_PATHS[FIRST_MODEL], adapter_name=FIRST_MODEL).to(device)
     # Load in the rest of the models
-    # for cur_adapter in MODEL_PATHS.keys():
-    #     if cur_adapter != FIRST_MODEL:
-    #         model.load_adapter(MODEL_PATHS[cur_adapter], adapter_name=cur_adapter)
     model.eval()
 # Global variable to store the latest obfuscation result
@@ -136,8 +152,11 @@ def greet(input_text, length, function_words, grade_level, sarcasm, formality, v
     input_length = inputs.input_ids.shape[1]
     with torch.no_grad():
         outputs = model.generate(**inputs, max_length=100, top_p = 0.95)
     response = tokenizer.decode(outputs[0, input_length:], skip_special_tokens=True)
     # Save the new obfuscation result and reset feedback
     latest_obfuscation = {
         "datetime": current_time,

 🔨 Looking for an even more powerful model? Check out the [13B version](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat) or the large [70B model demo](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI).
 """
+import subprocess
+def print_nvidia_smi():
+    try:
+        # Run the nvidia-smi command
+        result = subprocess.run(['nvidia-smi'], capture_output=True, text=True, check=True)
+        print(result.stdout)
+    except subprocess.CalledProcessError as e:
+        # Handle errors in the subprocess
+        print(f"Failed to run nvidia-smi: {e}")
+    except FileNotFoundError:
+        # Handle the case where nvidia-smi is not installed
+        print("nvidia-smi is not installed or not in the PATH.")
 # Load models
 if not torch.cuda.is_available():
     device = "cpu"
     # Load in the first model
     model = PeftModel.from_pretrained(base_model, MODEL_PATHS[FIRST_MODEL], adapter_name=FIRST_MODEL).to(device)
     # Load in the rest of the models
+    for cur_adapter in MODEL_PATHS.keys():
+        if cur_adapter != FIRST_MODEL:
+            model.load_adapter(MODEL_PATHS[cur_adapter], adapter_name=cur_adapter)
+    # print(model.device) # Seems it re-allocates to CPU
+    model.to(device)
     model.eval()
 # Global variable to store the latest obfuscation result
     input_length = inputs.input_ids.shape[1]
     with torch.no_grad():
         outputs = model.generate(**inputs, max_length=100, top_p = 0.95)
     response = tokenizer.decode(outputs[0, input_length:], skip_special_tokens=True)
+    print_nvidia_smi() # Print GPU usage
     # Save the new obfuscation result and reset feedback
     latest_obfuscation = {
         "datetime": current_time,