modified example, added rate limit warning
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ from datetime import datetime
|
|
| 8 |
import json
|
| 9 |
|
| 10 |
retrieve_results = 10
|
|
|
|
| 11 |
|
| 12 |
generate_kwargs = dict(
|
| 13 |
temperature = None,
|
|
@@ -38,10 +39,14 @@ try:
|
|
| 38 |
except:
|
| 39 |
pass
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
|
| 47 |
def rag_cleaner(inp):
|
|
@@ -106,7 +111,11 @@ with gr.Blocks(theme = gr.themes.Soft()) as demo:
|
|
| 106 |
|
| 107 |
client = InferenceClient(llm_model_picked)
|
| 108 |
#output = client.text_generation(prompt, **generate_kwargs, stream=False, details=False, return_full_text=False)
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
#output = output.lstrip(' \n') if output.lstrip().startswith('\n') else output
|
| 111 |
|
| 112 |
|
|
|
|
| 8 |
import json
|
| 9 |
|
| 10 |
retrieve_results = 10
|
| 11 |
+
show_examples = False
|
| 12 |
|
| 13 |
generate_kwargs = dict(
|
| 14 |
temperature = None,
|
|
|
|
| 39 |
except:
|
| 40 |
pass
|
| 41 |
|
| 42 |
+
if show_examples:
|
| 43 |
+
with open("sample_outputs.json", "r") as f:
|
| 44 |
+
sample_outputs = json.load(f)
|
| 45 |
+
output_placeholder = sample_outputs['output_placeholder']
|
| 46 |
+
md_text_initial = sample_outputs['search_placeholder']
|
| 47 |
+
else:
|
| 48 |
+
output_placeholder = None
|
| 49 |
+
md_text_initial = ''
|
| 50 |
|
| 51 |
|
| 52 |
def rag_cleaner(inp):
|
|
|
|
| 111 |
|
| 112 |
client = InferenceClient(llm_model_picked)
|
| 113 |
#output = client.text_generation(prompt, **generate_kwargs, stream=False, details=False, return_full_text=False)
|
| 114 |
+
try:
|
| 115 |
+
stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
| 116 |
+
except:
|
| 117 |
+
gr.Warning("LLM Inference rate limit reached, try again later!")
|
| 118 |
+
return ""
|
| 119 |
#output = output.lstrip(' \n') if output.lstrip().startswith('\n') else output
|
| 120 |
|
| 121 |
|