llamacpp-flan-t5-large-grammar-synthesis

Sleeping

Akjava commited on Mar 19

Commit

358544d

verified ·

1 Parent(s): c62145d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,15 +23,11 @@ from exception import CustomExceptionHandling
 # Download gguf model files
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 hf_hub_download(
-    repo_id="bartowski/google_gemma-3-1b-it-GGUF",
-    filename="google_gemma-3-1b-it-Q6_K.gguf",
-    local_dir="./models",
-)
-hf_hub_download(
-    repo_id="bartowski/google_gemma-3-1b-it-GGUF",
-    filename="google_gemma-3-1b-it-Q5_K_M.gguf",
     local_dir="./models",
 )
@@ -64,6 +60,19 @@ description = """Gemma 3 is a family of lightweight, multimodal open models that
 llm = None
 llm_model = None
 def respond(
     message: str,
     history: List[Tuple[str, str]],
@@ -97,6 +106,7 @@ def respond(
         global llm
         global llm_model
         # Load the model
         if llm is None or llm_model != model:
             llm = Llama(
@@ -109,6 +119,9 @@ def respond(
                 n_threads_batch=8,
             )
             llm_model = model
         provider = LlamaCppPythonProvider(llm)
         # Create the agent
@@ -172,10 +185,9 @@ demo = gr.ChatInterface(
     additional_inputs=[
         gr.Dropdown(
             choices=[
-                "google_gemma-3-1b-it-Q6_K.gguf",
-                "google_gemma-3-1b-it-Q5_K_M.gguf",
             ],
-            value="google_gemma-3-1b-it-Q5_K_M.gguf",
             label="Model",
             info="Select the AI model to use for chat",
         ),

 # Download gguf model files
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
+os.makedirs("models",exist_ok=True)
+#mtsdurica/madlad400-3b-mt-Q8_0-GGUF
 hf_hub_download(
+    repo_id="mtsdurica/madlad400-3b-mt-Q8_0-GGUF",
+    filename="madlad400-3b-mt-q8_0.gguf",
     local_dir="./models",
 )
 llm = None
 llm_model = None
+def trans(text):
+    llama = llm
+    text = f"<2ja>{text}".encode()
+    tokens = llama.tokenize(text)
+    llama.encode(tokens)
+    tokens = [llama.decoder_start_token()]
+    buf = ""
+    for token in llama.generate(tokens, top_k=0, top_p=0.95, temp=0, repeat_penalty=1.0):
+        buf += llama.detokenize([token]).decode()
+        if token == llama.token_eos():
+            break
+    return buf
 def respond(
     message: str,
     history: List[Tuple[str, str]],
         global llm
         global llm_model
+        #llama = Llama("madlad400-3b-mt-q8_0.gguf")
         # Load the model
         if llm is None or llm_model != model:
             llm = Llama(
                 n_threads_batch=8,
             )
             llm_model = model
+        return trans(message)
         provider = LlamaCppPythonProvider(llm)
         # Create the agent
     additional_inputs=[
         gr.Dropdown(
             choices=[
+                "madlad400-3b-mt-q8_0.gguf",
             ],
+            value="madlad400-3b-mt-q8_0.gguf",
             label="Model",
             info="Select the AI model to use for chat",
         ),