Spaces:
Sleeping
Sleeping
update
Browse files
utils.py
CHANGED
|
@@ -7,6 +7,9 @@ import io
|
|
| 7 |
from transformers import DynamicCache
|
| 8 |
import os
|
| 9 |
import spaces
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
os.makedirs("tmp", exist_ok=True)
|
| 12 |
|
|
@@ -54,7 +57,7 @@ def generate_answer(
|
|
| 54 |
)
|
| 55 |
new_id = outputs.logits[0, -1].argmax()
|
| 56 |
generated_ids.append(new_id)
|
| 57 |
-
if new_id.item()
|
| 58 |
break
|
| 59 |
|
| 60 |
answer = tokenizer.decode(torch.stack(generated_ids), skip_special_tokens=True)
|
|
@@ -84,6 +87,8 @@ def get_condense_kv_cache(context: str):
|
|
| 84 |
response = requests.post(url, json=payload, headers=headers).json()
|
| 85 |
print(response)
|
| 86 |
numpy_kv_cache, error = load_npy_from_url(response["compressed_kv_url"])
|
|
|
|
|
|
|
| 87 |
kv_cache = DynamicCache.from_legacy_cache(
|
| 88 |
torch.from_numpy(numpy_kv_cache).to("cuda").to(torch.bfloat16)
|
| 89 |
)
|
|
|
|
| 7 |
from transformers import DynamicCache
|
| 8 |
import os
|
| 9 |
import spaces
|
| 10 |
+
import httpx
|
| 11 |
+
import tqdm
|
| 12 |
+
|
| 13 |
|
| 14 |
os.makedirs("tmp", exist_ok=True)
|
| 15 |
|
|
|
|
| 57 |
)
|
| 58 |
new_id = outputs.logits[0, -1].argmax()
|
| 59 |
generated_ids.append(new_id)
|
| 60 |
+
if new_id.item() == model.generation_config.eos_token_id:
|
| 61 |
break
|
| 62 |
|
| 63 |
answer = tokenizer.decode(torch.stack(generated_ids), skip_special_tokens=True)
|
|
|
|
| 87 |
response = requests.post(url, json=payload, headers=headers).json()
|
| 88 |
print(response)
|
| 89 |
numpy_kv_cache, error = load_npy_from_url(response["compressed_kv_url"])
|
| 90 |
+
if error:
|
| 91 |
+
print(error)
|
| 92 |
kv_cache = DynamicCache.from_legacy_cache(
|
| 93 |
torch.from_numpy(numpy_kv_cache).to("cuda").to(torch.bfloat16)
|
| 94 |
)
|