Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -74,14 +74,16 @@ splittet = False
|
|
| 74 |
print ("Inf.Client")
|
| 75 |
#client = InferenceClient("https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf")
|
| 76 |
#client = InferenceClient("https://ybdhvwle4ksrawzo.eu-west-1.aws.endpoints.huggingface.cloud")
|
| 77 |
-
|
|
|
|
|
|
|
| 78 |
|
| 79 |
##############################################
|
| 80 |
# tokenizer for generating prompt
|
| 81 |
##############################################
|
| 82 |
print ("Tokenizer")
|
| 83 |
#tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-chat-hf")
|
| 84 |
-
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
|
| 85 |
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
|
| 86 |
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
| 87 |
|
|
@@ -279,22 +281,21 @@ def generate(text, history, rag_option, model_option, temperature=0.5, max_new_
|
|
| 279 |
|
| 280 |
#Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
|
| 281 |
#payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
top_p=0.9,
|
| 289 |
-
temperature=0.6,
|
| 290 |
-
)
|
| 291 |
except Exception as e:
|
| 292 |
raise gr.Error(e)
|
| 293 |
|
|
|
|
|
|
|
| 294 |
#Antwort als Stream ausgeben...
|
| 295 |
-
for i in range(len(result)):
|
| 296 |
-
time.sleep(0.05)
|
| 297 |
-
yield result[: i+1]
|
| 298 |
|
| 299 |
|
| 300 |
|
|
|
|
| 74 |
print ("Inf.Client")
|
| 75 |
#client = InferenceClient("https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf")
|
| 76 |
#client = InferenceClient("https://ybdhvwle4ksrawzo.eu-west-1.aws.endpoints.huggingface.cloud")
|
| 77 |
+
#Inference mit Authorisation:
|
| 78 |
+
API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
|
| 79 |
+
HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
|
| 80 |
|
| 81 |
##############################################
|
| 82 |
# tokenizer for generating prompt
|
| 83 |
##############################################
|
| 84 |
print ("Tokenizer")
|
| 85 |
#tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-chat-hf")
|
| 86 |
+
#tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
|
| 87 |
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
|
| 88 |
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
| 89 |
|
|
|
|
| 281 |
|
| 282 |
#Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
|
| 283 |
#payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
|
| 284 |
+
#Für LLAMA:
|
| 285 |
+
#payload = tokenizer.apply_chat_template(prompt,tokenize=False)
|
| 286 |
+
#result = client.text_generation(payload, do_sample=True,return_full_text=False, max_new_tokens=2048,top_p=0.9,temperature=0.6,)
|
| 287 |
+
#inference allg:
|
| 288 |
+
result= requests.post(API_URL, headers=HEADERS, json=prompt)
|
| 289 |
+
|
|
|
|
|
|
|
|
|
|
| 290 |
except Exception as e:
|
| 291 |
raise gr.Error(e)
|
| 292 |
|
| 293 |
+
return result.json()
|
| 294 |
+
|
| 295 |
#Antwort als Stream ausgeben...
|
| 296 |
+
#for i in range(len(result)):
|
| 297 |
+
#time.sleep(0.05)
|
| 298 |
+
#yield result[: i+1]
|
| 299 |
|
| 300 |
|
| 301 |
|