QWEN-2.5-Coder-7B

Sleeping

App Files Files Community

Leri777 commited on Oct 9, 2024

Commit

8899af8

verified ·

1 Parent(s): e541b11

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -8

app.py CHANGED Viewed

@@ -25,11 +25,18 @@ MODEL_ID = "Qwen/Qwen2.5-Coder-7B-Instruct"
 CONTEXT_LENGTH = 16000
 # Configuration for 4-bit quantization
-quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
 # Load tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto", quantization_config=quantization_config)
 # Create Hugging Face pipeline
 pipe = pipeline(
@@ -59,7 +66,9 @@ template = """<|im_start|>system
 <|im_start|>assistant"""
 # Create LangChain prompt and chain
-prompt = PromptTemplate(template=template, input_variables=["system_prompt", "history", "human_input"])
 chain = LLMChain(llm=chat_model, prompt=prompt)
 # Format the conversation history
@@ -70,11 +79,26 @@ def format_history(history):
     return formatted
 # Prediction function using LangChain and model
-def predict(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
     formatted_history = format_history(history)
     try:
-        result = chain.run({"system_prompt": system_prompt, "history": formatted_history, "human_input": message})
         return result
     except Exception as e:
         logger.exception(f"Error during prediction: {e}")
@@ -91,7 +115,7 @@ gr.Interface(
         gr.Slider(128, 2048, 1024, label="Max new tokens"),
         gr.Slider(1, 80, 40, label="Top K sampling"),
         gr.Slider(0, 2, 1.1, label="Repetition penalty"),
-        gr.Slider(0, 1, 0.95, label="Top P sampling")
     ],
     outputs="text",
     title="Qwen2.5-Coder-7B-Instruct with LangChain",

 CONTEXT_LENGTH = 16000
 # Configuration for 4-bit quantization
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16
+)
 # Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    device_map="auto",
+    quantization_config=quantization_config,
+    trust_remote_code=True,
+)
 # Create Hugging Face pipeline
 pipe = pipeline(
 <|im_start|>assistant"""
 # Create LangChain prompt and chain
+prompt = PromptTemplate(
+    template=template, input_variables=["system_prompt", "history", "human_input"]
+)
 chain = LLMChain(llm=chat_model, prompt=prompt)
 # Format the conversation history
     return formatted
 # Prediction function using LangChain and model
+def predict(
+    message,
+    history,
+    system_prompt,
+    temperature,
+    max_new_tokens,
+    top_k,
+    repetition_penalty,
+    top_p,
+):
     formatted_history = format_history(history)
     try:
+        result = chain.run(
+            {
+                "system_prompt": system_prompt,
+                "history": formatted_history,
+                "human_input": message,
+            }
+        )
         return result
     except Exception as e:
         logger.exception(f"Error during prediction: {e}")
         gr.Slider(128, 2048, 1024, label="Max new tokens"),
         gr.Slider(1, 80, 40, label="Top K sampling"),
         gr.Slider(0, 2, 1.1, label="Repetition penalty"),
+        gr.Slider(0, 1, 0.95, label="Top P sampling"),
     ],
     outputs="text",
     title="Qwen2.5-Coder-7B-Instruct with LangChain",