Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,7 @@ import spaces
|
|
| 10 |
import subprocess
|
| 11 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 12 |
|
| 13 |
-
|
| 14 |
|
| 15 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 16 |
'qnguyen3/nanoLLaVA',
|
|
@@ -38,7 +38,8 @@ class KeywordsStoppingCriteria(StoppingCriteria):
|
|
| 38 |
self.keyword_ids.append(torch.tensor(cur_keyword_ids))
|
| 39 |
self.tokenizer = tokenizer
|
| 40 |
self.start_len = input_ids.shape[1]
|
| 41 |
-
|
|
|
|
| 42 |
def call_for_batch(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
| 43 |
offset = min(output_ids.shape[1] - self.start_len, self.max_keyword_len)
|
| 44 |
self.keyword_ids = [keyword_id.to(output_ids.device) for keyword_id in self.keyword_ids]
|
|
@@ -51,7 +52,8 @@ class KeywordsStoppingCriteria(StoppingCriteria):
|
|
| 51 |
if keyword in outputs:
|
| 52 |
return True
|
| 53 |
return False
|
| 54 |
-
|
|
|
|
| 55 |
def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
| 56 |
outputs = []
|
| 57 |
for i in range(output_ids.shape[0]):
|
|
|
|
| 10 |
import subprocess
|
| 11 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 12 |
|
| 13 |
+
torch.set_default_device('cuda')
|
| 14 |
|
| 15 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 16 |
'qnguyen3/nanoLLaVA',
|
|
|
|
| 38 |
self.keyword_ids.append(torch.tensor(cur_keyword_ids))
|
| 39 |
self.tokenizer = tokenizer
|
| 40 |
self.start_len = input_ids.shape[1]
|
| 41 |
+
|
| 42 |
+
@spaces.GPU
|
| 43 |
def call_for_batch(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
| 44 |
offset = min(output_ids.shape[1] - self.start_len, self.max_keyword_len)
|
| 45 |
self.keyword_ids = [keyword_id.to(output_ids.device) for keyword_id in self.keyword_ids]
|
|
|
|
| 52 |
if keyword in outputs:
|
| 53 |
return True
|
| 54 |
return False
|
| 55 |
+
|
| 56 |
+
@spaces.GPU
|
| 57 |
def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
|
| 58 |
outputs = []
|
| 59 |
for i in range(output_ids.shape[0]):
|