Leri777 commited on
Commit
8899af8
·
verified ·
1 Parent(s): e541b11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -8
app.py CHANGED
@@ -25,11 +25,18 @@ MODEL_ID = "Qwen/Qwen2.5-Coder-7B-Instruct"
25
  CONTEXT_LENGTH = 16000
26
 
27
  # Configuration for 4-bit quantization
28
- quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
 
 
29
 
30
  # Load tokenizer and model
31
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
32
- model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto", quantization_config=quantization_config)
 
 
 
 
 
33
 
34
  # Create Hugging Face pipeline
35
  pipe = pipeline(
@@ -59,7 +66,9 @@ template = """<|im_start|>system
59
  <|im_start|>assistant"""
60
 
61
  # Create LangChain prompt and chain
62
- prompt = PromptTemplate(template=template, input_variables=["system_prompt", "history", "human_input"])
 
 
63
  chain = LLMChain(llm=chat_model, prompt=prompt)
64
 
65
  # Format the conversation history
@@ -70,11 +79,26 @@ def format_history(history):
70
  return formatted
71
 
72
  # Prediction function using LangChain and model
73
- def predict(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
 
 
 
 
 
 
 
 
 
74
  formatted_history = format_history(history)
75
-
76
  try:
77
- result = chain.run({"system_prompt": system_prompt, "history": formatted_history, "human_input": message})
 
 
 
 
 
 
78
  return result
79
  except Exception as e:
80
  logger.exception(f"Error during prediction: {e}")
@@ -91,7 +115,7 @@ gr.Interface(
91
  gr.Slider(128, 2048, 1024, label="Max new tokens"),
92
  gr.Slider(1, 80, 40, label="Top K sampling"),
93
  gr.Slider(0, 2, 1.1, label="Repetition penalty"),
94
- gr.Slider(0, 1, 0.95, label="Top P sampling")
95
  ],
96
  outputs="text",
97
  title="Qwen2.5-Coder-7B-Instruct with LangChain",
 
25
  CONTEXT_LENGTH = 16000
26
 
27
  # Configuration for 4-bit quantization
28
+ quantization_config = BitsAndBytesConfig(
29
+ load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16
30
+ )
31
 
32
  # Load tokenizer and model
33
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
34
+ model = AutoModelForCausalLM.from_pretrained(
35
+ MODEL_ID,
36
+ device_map="auto",
37
+ quantization_config=quantization_config,
38
+ trust_remote_code=True,
39
+ )
40
 
41
  # Create Hugging Face pipeline
42
  pipe = pipeline(
 
66
  <|im_start|>assistant"""
67
 
68
  # Create LangChain prompt and chain
69
+ prompt = PromptTemplate(
70
+ template=template, input_variables=["system_prompt", "history", "human_input"]
71
+ )
72
  chain = LLMChain(llm=chat_model, prompt=prompt)
73
 
74
  # Format the conversation history
 
79
  return formatted
80
 
81
  # Prediction function using LangChain and model
82
+ def predict(
83
+ message,
84
+ history,
85
+ system_prompt,
86
+ temperature,
87
+ max_new_tokens,
88
+ top_k,
89
+ repetition_penalty,
90
+ top_p,
91
+ ):
92
  formatted_history = format_history(history)
93
+
94
  try:
95
+ result = chain.run(
96
+ {
97
+ "system_prompt": system_prompt,
98
+ "history": formatted_history,
99
+ "human_input": message,
100
+ }
101
+ )
102
  return result
103
  except Exception as e:
104
  logger.exception(f"Error during prediction: {e}")
 
115
  gr.Slider(128, 2048, 1024, label="Max new tokens"),
116
  gr.Slider(1, 80, 40, label="Top K sampling"),
117
  gr.Slider(0, 2, 1.1, label="Repetition penalty"),
118
+ gr.Slider(0, 1, 0.95, label="Top P sampling"),
119
  ],
120
  outputs="text",
121
  title="Qwen2.5-Coder-7B-Instruct with LangChain",