Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -303,102 +303,107 @@ def transfer_input(inputs):
|
|
| 303 |
##############################################
|
| 304 |
# generate function
|
| 305 |
##############################################
|
| 306 |
-
def generate(text, history, rag_option, model_option, k=3, top_p=0.6, temperature=0.5, max_new_tokens=4048, max_context_length_tokens=2048, repetition_penalty=1.3,):
|
| 307 |
-
#
|
| 308 |
-
if (
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
if (model_option == "HF1"):
|
| 315 |
-
#Anfrage an InferenceEndpoint1 ----------------------------
|
| 316 |
-
API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
|
| 317 |
-
print("HF1")
|
| 318 |
-
else:
|
| 319 |
-
API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
|
| 320 |
-
print("HF2")
|
| 321 |
-
|
| 322 |
-
if (rag_option == "An"):
|
| 323 |
-
#muss nur einmal ausgeführt werden...
|
| 324 |
-
if not splittet:
|
| 325 |
-
splits = document_loading_splitting()
|
| 326 |
-
document_storage_chroma(splits)
|
| 327 |
-
db = document_retrieval_chroma()
|
| 328 |
-
#mit RAG:
|
| 329 |
-
neu_text_mit_chunks = rag_chain(text, db, k)
|
| 330 |
-
#für Chat LLM:
|
| 331 |
-
#prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
|
| 332 |
-
#als reiner prompt:
|
| 333 |
-
prompt = generate_prompt_with_history(neu_text_mit_chunks, history)
|
| 334 |
-
else:
|
| 335 |
-
#für Chat LLM:
|
| 336 |
-
#prompt = generate_prompt_with_history_openai(text, history)
|
| 337 |
-
#als reiner prompt:
|
| 338 |
-
prompt = generate_prompt_with_history(text, history)
|
| 339 |
-
print("prompt:....................................")
|
| 340 |
-
print (prompt)
|
| 341 |
-
#Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
|
| 342 |
-
#payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
|
| 343 |
-
#Für LLAMA:
|
| 344 |
-
#payload = tokenizer.apply_chat_template(prompt,tokenize=False)
|
| 345 |
-
#result = client.text_generation(payload, do_sample=True,return_full_text=False, max_new_tokens=2048,top_p=0.9,temperature=0.6,)
|
| 346 |
-
#inference allg:
|
| 347 |
-
data = {
|
| 348 |
-
"inputs": prompt,
|
| 349 |
-
"options": {"max_new_tokens": max_new_tokens},
|
| 350 |
-
}
|
| 351 |
-
response= requests.post(API_URL, headers=HEADERS, json=data)
|
| 352 |
-
result = response.json()
|
| 353 |
-
print("result:------------------")
|
| 354 |
-
chatbot_response = result[0]['generated_text']
|
| 355 |
-
print("anzahl tokens gesamt antwort:------------------")
|
| 356 |
-
print (len(chatbot_response.split()))
|
| 357 |
-
except Exception as e:
|
| 358 |
-
raise gr.Error(e)
|
| 359 |
-
|
| 360 |
-
chatbot_message = chatbot_response[len(prompt):].strip()
|
| 361 |
-
print("history/chatbot_rsponse:--------------------------------")
|
| 362 |
-
print(history)
|
| 363 |
-
print(chatbot_message)
|
| 364 |
-
|
| 365 |
-
"""
|
| 366 |
-
#Antwort als Stream ausgeben...
|
| 367 |
-
for i in range(len(chatbot_message)):
|
| 368 |
-
time.sleep(0.03)
|
| 369 |
-
yield chatbot_message[: i+1], "Generating"
|
| 370 |
-
if shared_state.interrupted:
|
| 371 |
-
shared_state.recover()
|
| 372 |
-
try:
|
| 373 |
-
yield chatbot_message[: i+1], "Stop: Success"
|
| 374 |
-
return
|
| 375 |
-
except:
|
| 376 |
-
pass
|
| 377 |
-
"""
|
| 378 |
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
|
| 403 |
########################################
|
| 404 |
#Evaluation
|
|
|
|
| 303 |
##############################################
|
| 304 |
# generate function
|
| 305 |
##############################################
|
| 306 |
+
def generate(text, history, rag_option, model_option, k=3, top_p=0.6, temperature=0.5, max_new_tokens=4048, max_context_length_tokens=2048, repetition_penalty=1.3, validate=False):
|
| 307 |
+
#nur wenn man sich validiert hat, kann die Anwendung los legen
|
| 308 |
+
if (validate and not text == "" and not text == None):
|
| 309 |
+
#mit RAG
|
| 310 |
+
if (rag_option is None):
|
| 311 |
+
raise gr.Error("Retrieval Augmented Generation ist erforderlich.")
|
| 312 |
+
if (text == ""):
|
| 313 |
+
raise gr.Error("Prompt ist erforderlich.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
+
try:
|
| 316 |
+
if (model_option == "HF1"):
|
| 317 |
+
#Anfrage an InferenceEndpoint1 ----------------------------
|
| 318 |
+
API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
|
| 319 |
+
print("HF1")
|
| 320 |
+
else:
|
| 321 |
+
API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
|
| 322 |
+
print("HF2")
|
| 323 |
+
|
| 324 |
+
if (rag_option == "An"):
|
| 325 |
+
#muss nur einmal ausgeführt werden...
|
| 326 |
+
if not splittet:
|
| 327 |
+
splits = document_loading_splitting()
|
| 328 |
+
document_storage_chroma(splits)
|
| 329 |
+
db = document_retrieval_chroma()
|
| 330 |
+
#mit RAG:
|
| 331 |
+
neu_text_mit_chunks = rag_chain(text, db, k)
|
| 332 |
+
#für Chat LLM:
|
| 333 |
+
#prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
|
| 334 |
+
#als reiner prompt:
|
| 335 |
+
prompt = generate_prompt_with_history(neu_text_mit_chunks, history)
|
| 336 |
+
else:
|
| 337 |
+
#für Chat LLM:
|
| 338 |
+
#prompt = generate_prompt_with_history_openai(text, history)
|
| 339 |
+
#als reiner prompt:
|
| 340 |
+
prompt = generate_prompt_with_history(text, history)
|
| 341 |
+
print("prompt:....................................")
|
| 342 |
+
print (prompt)
|
| 343 |
+
#Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
|
| 344 |
+
#payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
|
| 345 |
+
#Für LLAMA:
|
| 346 |
+
#payload = tokenizer.apply_chat_template(prompt,tokenize=False)
|
| 347 |
+
#result = client.text_generation(payload, do_sample=True,return_full_text=False, max_new_tokens=2048,top_p=0.9,temperature=0.6,)
|
| 348 |
+
#inference allg:
|
| 349 |
+
data = {
|
| 350 |
+
"inputs": prompt,
|
| 351 |
+
"options": {"max_new_tokens": max_new_tokens},
|
| 352 |
+
}
|
| 353 |
+
response= requests.post(API_URL, headers=HEADERS, json=data)
|
| 354 |
+
result = response.json()
|
| 355 |
+
print("result:------------------")
|
| 356 |
+
chatbot_response = result[0]['generated_text']
|
| 357 |
+
print("anzahl tokens gesamt antwort:------------------")
|
| 358 |
+
print (len(chatbot_response.split()))
|
| 359 |
+
except Exception as e:
|
| 360 |
+
raise gr.Error(e)
|
| 361 |
|
| 362 |
+
chatbot_message = chatbot_response[len(prompt):].strip()
|
| 363 |
+
print("history/chatbot_rsponse:--------------------------------")
|
| 364 |
+
print(history)
|
| 365 |
+
print(chatbot_message)
|
| 366 |
|
| 367 |
+
"""
|
| 368 |
+
#Antwort als Stream ausgeben...
|
| 369 |
+
for i in range(len(chatbot_message)):
|
| 370 |
+
time.sleep(0.03)
|
| 371 |
+
yield chatbot_message[: i+1], "Generating"
|
| 372 |
+
if shared_state.interrupted:
|
| 373 |
+
shared_state.recover()
|
| 374 |
+
try:
|
| 375 |
+
yield chatbot_message[: i+1], "Stop: Success"
|
| 376 |
+
return
|
| 377 |
+
except:
|
| 378 |
+
pass
|
| 379 |
+
"""
|
| 380 |
+
|
| 381 |
+
#Antwort als Stream ausgeben...
|
| 382 |
+
history[-1][1] = ""
|
| 383 |
+
for character in chatbot_message:
|
| 384 |
+
history[-1][1] += character
|
| 385 |
+
time.sleep(0.03)
|
| 386 |
+
yield history, "Generating"
|
| 387 |
+
if shared_state.interrupted:
|
| 388 |
+
shared_state.recover()
|
| 389 |
+
try:
|
| 390 |
+
yield history, "Stop: Success"
|
| 391 |
+
return
|
| 392 |
+
except:
|
| 393 |
+
pass
|
| 394 |
+
|
| 395 |
+
|
| 396 |
+
#zum Evaluieren:
|
| 397 |
+
# custom eli5 criteria
|
| 398 |
+
#custom_criterion = {"eli5": "Is the output explained in a way that a 5 yeard old would unterstand it?"}
|
| 399 |
+
|
| 400 |
+
#eval_result = evaluator.evaluate_strings(prediction=res.strip(), input=text, criteria=custom_criterion, requires_reference=True)
|
| 401 |
+
#print ("eval_result:............ ")
|
| 402 |
+
#print(eval_result)
|
| 403 |
+
#return res.strip()
|
| 404 |
+
else: #noch nicht validiert, oder kein Prompt
|
| 405 |
+
return history, "Erst validieren oder einen Prompt eingeben!"
|
| 406 |
+
|
| 407 |
|
| 408 |
########################################
|
| 409 |
#Evaluation
|