hallisky commited on
Commit
de1b0d5
·
1 Parent(s): 63b741b

Fix CPU issues with device

Browse files
Files changed (1) hide show
  1. app.py +23 -4
app.py CHANGED
@@ -45,6 +45,20 @@ This Space demonstrates StyleRemix, a Llama 3 model with 8B parameters fine-tune
45
  🔨 Looking for an even more powerful model? Check out the [13B version](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat) or the large [70B model demo](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI).
46
  """
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  # Load models
49
  if not torch.cuda.is_available():
50
  device = "cpu"
@@ -62,10 +76,12 @@ if torch.cuda.is_available():
62
  # Load in the first model
63
  model = PeftModel.from_pretrained(base_model, MODEL_PATHS[FIRST_MODEL], adapter_name=FIRST_MODEL).to(device)
64
  # Load in the rest of the models
65
- # for cur_adapter in MODEL_PATHS.keys():
66
- # if cur_adapter != FIRST_MODEL:
67
- # model.load_adapter(MODEL_PATHS[cur_adapter], adapter_name=cur_adapter)
68
 
 
 
69
  model.eval()
70
 
71
  # Global variable to store the latest obfuscation result
@@ -136,8 +152,11 @@ def greet(input_text, length, function_words, grade_level, sarcasm, formality, v
136
  input_length = inputs.input_ids.shape[1]
137
  with torch.no_grad():
138
  outputs = model.generate(**inputs, max_length=100, top_p = 0.95)
 
139
  response = tokenizer.decode(outputs[0, input_length:], skip_special_tokens=True)
140
-
 
 
141
  # Save the new obfuscation result and reset feedback
142
  latest_obfuscation = {
143
  "datetime": current_time,
 
45
  🔨 Looking for an even more powerful model? Check out the [13B version](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat) or the large [70B model demo](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI).
46
  """
47
 
48
+ import subprocess
49
+
50
+ def print_nvidia_smi():
51
+ try:
52
+ # Run the nvidia-smi command
53
+ result = subprocess.run(['nvidia-smi'], capture_output=True, text=True, check=True)
54
+ print(result.stdout)
55
+ except subprocess.CalledProcessError as e:
56
+ # Handle errors in the subprocess
57
+ print(f"Failed to run nvidia-smi: {e}")
58
+ except FileNotFoundError:
59
+ # Handle the case where nvidia-smi is not installed
60
+ print("nvidia-smi is not installed or not in the PATH.")
61
+
62
  # Load models
63
  if not torch.cuda.is_available():
64
  device = "cpu"
 
76
  # Load in the first model
77
  model = PeftModel.from_pretrained(base_model, MODEL_PATHS[FIRST_MODEL], adapter_name=FIRST_MODEL).to(device)
78
  # Load in the rest of the models
79
+ for cur_adapter in MODEL_PATHS.keys():
80
+ if cur_adapter != FIRST_MODEL:
81
+ model.load_adapter(MODEL_PATHS[cur_adapter], adapter_name=cur_adapter)
82
 
83
+ # print(model.device) # Seems it re-allocates to CPU
84
+ model.to(device)
85
  model.eval()
86
 
87
  # Global variable to store the latest obfuscation result
 
152
  input_length = inputs.input_ids.shape[1]
153
  with torch.no_grad():
154
  outputs = model.generate(**inputs, max_length=100, top_p = 0.95)
155
+
156
  response = tokenizer.decode(outputs[0, input_length:], skip_special_tokens=True)
157
+
158
+ print_nvidia_smi() # Print GPU usage
159
+
160
  # Save the new obfuscation result and reset feedback
161
  latest_obfuscation = {
162
  "datetime": current_time,