Update app.py
Browse files
app.py
CHANGED
|
@@ -28,8 +28,8 @@ os.makedirs("models",exist_ok=True)
|
|
| 28 |
|
| 29 |
|
| 30 |
hf_hub_download(
|
| 31 |
-
repo_id="
|
| 32 |
-
filename="
|
| 33 |
local_dir="./models",
|
| 34 |
)
|
| 35 |
|
|
@@ -85,11 +85,10 @@ def respond(
|
|
| 85 |
try:
|
| 86 |
global llama
|
| 87 |
if llama == None:
|
| 88 |
-
model_id = "
|
| 89 |
llama = Llama(f"models/{model_id}",flash_attn=False,
|
| 90 |
n_gpu_layers=0,
|
| 91 |
-
|
| 92 |
-
n_ctx=512,
|
| 93 |
n_threads=2,
|
| 94 |
n_threads_batch=2)
|
| 95 |
|
|
@@ -97,14 +96,14 @@ def respond(
|
|
| 97 |
llama.encode(tokens)
|
| 98 |
tokens = [llama.decoder_start_token()]
|
| 99 |
outputs =""
|
| 100 |
-
iteration =
|
| 101 |
for i in range(iteration):
|
| 102 |
for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty):
|
| 103 |
outputs+= llama.detokenize([token]).decode()
|
| 104 |
yield outputs
|
| 105 |
if token == llama.token_eos():
|
| 106 |
break
|
| 107 |
-
outputs+="\n"
|
| 108 |
return outputs
|
| 109 |
except Exception as e:
|
| 110 |
# Custom exception handling
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
hf_hub_download(
|
| 31 |
+
repo_id="pszemraj/flan-t5-large-grammar-synthesis",
|
| 32 |
+
filename="ggml-model-Q6_K.gguf",
|
| 33 |
local_dir="./models",
|
| 34 |
)
|
| 35 |
|
|
|
|
| 85 |
try:
|
| 86 |
global llama
|
| 87 |
if llama == None:
|
| 88 |
+
model_id = "ggml-model-Q6_K.gguf"
|
| 89 |
llama = Llama(f"models/{model_id}",flash_attn=False,
|
| 90 |
n_gpu_layers=0,
|
| 91 |
+
|
|
|
|
| 92 |
n_threads=2,
|
| 93 |
n_threads_batch=2)
|
| 94 |
|
|
|
|
| 96 |
llama.encode(tokens)
|
| 97 |
tokens = [llama.decoder_start_token()]
|
| 98 |
outputs =""
|
| 99 |
+
iteration = 1
|
| 100 |
for i in range(iteration):
|
| 101 |
for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty):
|
| 102 |
outputs+= llama.detokenize([token]).decode()
|
| 103 |
yield outputs
|
| 104 |
if token == llama.token_eos():
|
| 105 |
break
|
| 106 |
+
#outputs+="\n"
|
| 107 |
return outputs
|
| 108 |
except Exception as e:
|
| 109 |
# Custom exception handling
|