Update app.py
Browse files
app.py
CHANGED
|
@@ -23,15 +23,11 @@ from exception import CustomExceptionHandling
|
|
| 23 |
|
| 24 |
# Download gguf model files
|
| 25 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
| 26 |
-
|
|
|
|
| 27 |
hf_hub_download(
|
| 28 |
-
repo_id="
|
| 29 |
-
filename="
|
| 30 |
-
local_dir="./models",
|
| 31 |
-
)
|
| 32 |
-
hf_hub_download(
|
| 33 |
-
repo_id="bartowski/google_gemma-3-1b-it-GGUF",
|
| 34 |
-
filename="google_gemma-3-1b-it-Q5_K_M.gguf",
|
| 35 |
local_dir="./models",
|
| 36 |
)
|
| 37 |
|
|
@@ -64,6 +60,19 @@ description = """Gemma 3 is a family of lightweight, multimodal open models that
|
|
| 64 |
llm = None
|
| 65 |
llm_model = None
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
def respond(
|
| 68 |
message: str,
|
| 69 |
history: List[Tuple[str, str]],
|
|
@@ -97,6 +106,7 @@ def respond(
|
|
| 97 |
global llm
|
| 98 |
global llm_model
|
| 99 |
|
|
|
|
| 100 |
# Load the model
|
| 101 |
if llm is None or llm_model != model:
|
| 102 |
llm = Llama(
|
|
@@ -109,6 +119,9 @@ def respond(
|
|
| 109 |
n_threads_batch=8,
|
| 110 |
)
|
| 111 |
llm_model = model
|
|
|
|
|
|
|
|
|
|
| 112 |
provider = LlamaCppPythonProvider(llm)
|
| 113 |
|
| 114 |
# Create the agent
|
|
@@ -172,10 +185,9 @@ demo = gr.ChatInterface(
|
|
| 172 |
additional_inputs=[
|
| 173 |
gr.Dropdown(
|
| 174 |
choices=[
|
| 175 |
-
"
|
| 176 |
-
"google_gemma-3-1b-it-Q5_K_M.gguf",
|
| 177 |
],
|
| 178 |
-
value="
|
| 179 |
label="Model",
|
| 180 |
info="Select the AI model to use for chat",
|
| 181 |
),
|
|
|
|
| 23 |
|
| 24 |
# Download gguf model files
|
| 25 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
| 26 |
+
os.makedirs("models",exist_ok=True)
|
| 27 |
+
#mtsdurica/madlad400-3b-mt-Q8_0-GGUF
|
| 28 |
hf_hub_download(
|
| 29 |
+
repo_id="mtsdurica/madlad400-3b-mt-Q8_0-GGUF",
|
| 30 |
+
filename="madlad400-3b-mt-q8_0.gguf",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
local_dir="./models",
|
| 32 |
)
|
| 33 |
|
|
|
|
| 60 |
llm = None
|
| 61 |
llm_model = None
|
| 62 |
|
| 63 |
+
def trans(text):
|
| 64 |
+
llama = llm
|
| 65 |
+
text = f"<2ja>{text}".encode()
|
| 66 |
+
tokens = llama.tokenize(text)
|
| 67 |
+
llama.encode(tokens)
|
| 68 |
+
tokens = [llama.decoder_start_token()]
|
| 69 |
+
buf = ""
|
| 70 |
+
for token in llama.generate(tokens, top_k=0, top_p=0.95, temp=0, repeat_penalty=1.0):
|
| 71 |
+
buf += llama.detokenize([token]).decode()
|
| 72 |
+
if token == llama.token_eos():
|
| 73 |
+
break
|
| 74 |
+
return buf
|
| 75 |
+
|
| 76 |
def respond(
|
| 77 |
message: str,
|
| 78 |
history: List[Tuple[str, str]],
|
|
|
|
| 106 |
global llm
|
| 107 |
global llm_model
|
| 108 |
|
| 109 |
+
#llama = Llama("madlad400-3b-mt-q8_0.gguf")
|
| 110 |
# Load the model
|
| 111 |
if llm is None or llm_model != model:
|
| 112 |
llm = Llama(
|
|
|
|
| 119 |
n_threads_batch=8,
|
| 120 |
)
|
| 121 |
llm_model = model
|
| 122 |
+
|
| 123 |
+
return trans(message)
|
| 124 |
+
|
| 125 |
provider = LlamaCppPythonProvider(llm)
|
| 126 |
|
| 127 |
# Create the agent
|
|
|
|
| 185 |
additional_inputs=[
|
| 186 |
gr.Dropdown(
|
| 187 |
choices=[
|
| 188 |
+
"madlad400-3b-mt-q8_0.gguf",
|
|
|
|
| 189 |
],
|
| 190 |
+
value="madlad400-3b-mt-q8_0.gguf",
|
| 191 |
label="Model",
|
| 192 |
info="Select the AI model to use for chat",
|
| 193 |
),
|