Spaces:
Sleeping
Sleeping
update
Browse files
app.py
CHANGED
|
@@ -22,7 +22,7 @@ def get_model_kv_cache(context_ids):
|
|
| 22 |
kv_cache = DynamicCache.from_legacy_cache(
|
| 23 |
past_key_values
|
| 24 |
)
|
| 25 |
-
return
|
| 26 |
|
| 27 |
@spaces.GPU
|
| 28 |
def inference(question: str, doc_path: str, use_turbo=True) -> str:
|
|
@@ -43,6 +43,7 @@ def inference(question: str, doc_path: str, use_turbo=True) -> str:
|
|
| 43 |
print("turbo-mode-off")
|
| 44 |
kv_cache = get_model_kv_cache(context_ids)
|
| 45 |
|
|
|
|
| 46 |
|
| 47 |
answer = generate_answer(MODEL, TOKENIZER, prompt_ids, kv_cache, context_length, 128)
|
| 48 |
print(answer)
|
|
|
|
| 22 |
kv_cache = DynamicCache.from_legacy_cache(
|
| 23 |
past_key_values
|
| 24 |
)
|
| 25 |
+
return kv_cache
|
| 26 |
|
| 27 |
@spaces.GPU
|
| 28 |
def inference(question: str, doc_path: str, use_turbo=True) -> str:
|
|
|
|
| 43 |
print("turbo-mode-off")
|
| 44 |
kv_cache = get_model_kv_cache(context_ids)
|
| 45 |
|
| 46 |
+
print("kv-length", kv_cache.get_seq_length())
|
| 47 |
|
| 48 |
answer = generate_answer(MODEL, TOKENIZER, prompt_ids, kv_cache, context_length, 128)
|
| 49 |
print(answer)
|