Spaces:

AutoRAG
/

AutoRAG-data-creation

Build error

App Files Files Community

jeffrey commited on Oct 13, 2024

Commit

cf0997e

1 Parent(s): 20b5695

init commit

Browse files

Files changed (8) hide show

.gitignore +164 -0
README.md +3 -3
app.py +241 -0
packages.txt +4 -0
requirements.txt +2 -0
src/__init__.py +0 -0
src/create.py +111 -0
src/util.py +62 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,164 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+file_cache/
+data/

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: AutoRAG Data Creation
-emoji: 🏢
-colorFrom: yellow
-colorTo: yellow
 sdk: gradio
 sdk_version: 5.0.2
 app_file: app.py

 ---
 title: AutoRAG Data Creation
+emoji: 🛠️
+colorFrom: green
+colorTo: blue
 sdk: gradio
 sdk_version: 5.0.2
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,241 @@

+import os
+import shutil
+from typing import List
+import gradio as gr
+import pandas as pd
+from autorag.data.parse import langchain_parse
+from autorag.data.parse.llamaparse import llama_parse
+from autorag.data.qa.schema import Raw
+from llama_index.llms.openai import OpenAI
+from src.create import default_create, fast_create, advanced_create
+from src.util import on_submit_openai_key
+root_dir = os.path.dirname(os.path.realpath(__file__))
+FILE_DIR = os.path.join(root_dir, "file_cache")
+if not os.path.exists(FILE_DIR):
+	os.makedirs(FILE_DIR)
+DATA_DIR = os.path.join(root_dir, "data")
+if not os.path.exists(DATA_DIR):
+	os.makedirs(DATA_DIR)
+def change_lang_choice(lang: str) -> str:
+	lang_dict = {
+		"English": "en",
+		"한국어": "ko",
+		"日本語": "ja"
+	}
+	return lang_dict[lang]
+def change_visible_status_api_key(parse_method: str):
+	if parse_method == "llama-parse":
+		return gr.update(visible=True), gr.update(visible=False)
+	elif parse_method == "upstage🇰🇷":
+		return gr.update(visible=False), gr.update(visible=True)
+	else:
+		return gr.update(visible=False), gr.update(visible=False)
+def run_parse(file_lists: List[str], parse_method: str, progress=gr.Progress()):
+	# save an input file to a directory
+	for file_path in file_lists:
+		shutil.copy(file_path, FILE_DIR)
+	progress(0.05)
+	if parse_method in ["pdfminer", "pdfplumber", "pypdfium2", "pypdf", "pymupdf"]:
+		raw_df: pd.DataFrame = langchain_parse(data_path_glob=os.path.join(FILE_DIR, "*.pdf"), parse_method=parse_method)
+	elif parse_method == "llama-parse":
+		llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
+		if llama_cloud_api_key is None:
+			return "Please submit your Llama Cloud API key first."
+		raw_df: pd.DataFrame = llama_parse(data_path_glob=os.path.join(FILE_DIR, "*.pdf"))
+	elif parse_method == "upstage🇰🇷":
+		upstage_api_key = os.getenv("UPSTAGE_API_KEY")
+		if upstage_api_key is None:
+			return "Please submit your Upstage API key first."
+		raw_df: pd.DataFrame = langchain_parse(data_path_glob=os.path.join(FILE_DIR, "*.pdf"), parse_method="upstagedocumentparse")
+	else:
+		return "Unsupported parse method."
+	progress(0.8)
+	raw_df.to_parquet(os.path.join(DATA_DIR, "raw.parquet"), index=False)
+	return "Parsing Complete. Download at the bottom button."
+def run_chunk(use_existed_raw: bool, raw_file: str, chunk_method: str, chunk_size: int, chunk_overlap: int,
+			  lang: str = "English", progress=gr.Progress()):
+	lang = change_lang_choice(lang)
+	if use_existed_raw:
+		raw_df_path = os.path.join(DATA_DIR, "raw.parquet")
+	else:
+		raw_df_path = raw_file
+	if not os.path.exists(raw_df_path):
+		return "Please upload raw.parquet file first. Or run the parsing stage first."
+	raw_df = pd.read_parquet(raw_df_path, engine="pyarrow")
+	raw_instance = Raw(raw_df)
+	if chunk_method in ["Token", "Sentence"]:
+		corpus = raw_instance.chunk("llama_index_chunk", chunk_method=chunk_method, chunk_size=chunk_size,
+									chunk_overlap=chunk_overlap, add_file_name=lang)
+	elif chunk_method in ["Semantic"]:
+		corpus = raw_instance.chunk("llama_index_chunk", chunk_method="Semantic_llama_index",
+									embed_model="openai", breakpoint_percnetile_threshold=0.95,
+									add_file_name=lang)
+	elif chunk_method == "Recursive":
+		corpus = raw_instance.chunk("langchain_chunk", chunk_method="recursivecharacter",
+									add_file_name=lang, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+	elif chunk_method == "Konlpy🇰🇷":
+		corpus = raw_instance.chunk("langchain_chunk", chunk_method="konlpy", add_file_name=lang,
+									chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+	else:
+		gr.Error("Unsupported chunk method.")
+		return "Unsupported chunk method."
+	progress(0.8)
+	corpus.to_parquet(os.path.join(DATA_DIR, "corpus.parquet"))
+	return "Chunking Complete. Download at the bottom button."
+def run_qa(use_existed_corpus: bool, corpus_file: str, qa_method: str,
+		   model_name: str, qa_cnt: int, batch_size: int, lang: str = "English", progress=gr.Progress()):
+	lang = change_lang_choice(lang)
+	if use_existed_corpus:
+		corpus_df_path = os.path.join(DATA_DIR, "corpus.parquet")
+	else:
+		corpus_df_path = corpus_file
+	if not os.path.exists(corpus_df_path):
+		gr.Error("Please upload corpus.parquet file first. Or run the chunking stage first.")
+		return "Please upload corpus.parquet file first. Or run the chunking stage first."
+	corpus_df = pd.read_parquet(corpus_df_path, engine="pyarrow")
+	if os.getenv("OPENAI_API_KEY") is None:
+		gr.Error("Please submit your OpenAI API key first.")
+		return "Please submit your OpenAI API key first."
+	llm = OpenAI(model=model_name)
+	if qa_method == "default":
+		qa = default_create(corpus_df, llm=llm, n=qa_cnt, lang=lang, progress=progress, batch_size=batch_size)
+	elif qa_method == "fast":
+		qa = fast_create(corpus_df, llm=llm, n=qa_cnt, lang=lang, progress=progress, batch_size=batch_size)
+	elif qa_method == "advanced":
+		qa = advanced_create(corpus_df, llm=llm, n=qa_cnt, lang=lang, progress=progress, batch_size=batch_size)
+	else:
+		gr.Error("Unsupported QA method.")
+		return "Unsupported QA method."
+	qa.to_parquet(os.path.join(DATA_DIR, "qa.parquet"), os.path.join(DATA_DIR, "corpus.parquet"))
+	return "QA Creation Complete. Download at the bottom button."
+def file_reset() -> str:
+	shutil.rmtree(FILE_DIR)
+	os.makedirs(FILE_DIR)
+	return "Files reset complete."
+with gr.Blocks(theme="earneleh/paris") as demo:
+	gr.HTML("<h1>AutoRAG Data Creation 🛠️</h1>")
+	with gr.Row():
+		openai_key_textbox = gr.Textbox(label="Please input your OpenAI API key and press Enter.", type="password",
+										info="You can get your API key from https://platform.openai.com/account/api-keys\n\n"
+											 "AutoRAG do not store your API key.",
+										autofocus=True)
+		api_key_status_box = gr.Textbox(label="OpenAI API status", value="Not Set", interactive=False)
+		lang_choice = gr.Radio(["English", "한국어", "日本語"], label="Language",
+									   value="English", info="Choose Langauge. En, Ko, Ja are supported.",
+									   interactive=True)
+	with gr.Row(visible=False) as llama_cloud_api_key_row:
+		llama_key_textbox = gr.Textbox(label="Please input your Llama Cloud API key and press Enter.", type="password",
+									   		info="You can get your API key from https://docs.cloud.llamaindex.ai/llamacloud/getting_started/api_key\n\n"
+											 "AutoRAG do not store your API key.",)
+		llama_key_status_box = gr.Textbox(label="Llama Cloud API status", value="Not Set", interactive=False)
+	with gr.Row(visible=False) as upstage_api_key_row:
+		upstage_key_textbox = gr.Textbox(label="Please input your Upstage API key and press Enter.", type="password",
+									   		info="You can get your API key from https://upstage.ai/\n\n"
+											 "AutoRAG do not store your API key.",)
+		upstage_key_status_box = gr.Textbox(label="Upstage API status", value="Not Set", interactive=False)
+	with gr.Row():
+		with gr.Column(scale=1):
+			gr.Markdown("## 1. Parse your PDF files\n\nUpload your pdf files and make it to raw.parquet.")
+			document_file_input = gr.File(label="Upload Files", type="filepath", file_count="multiple")
+			parse_choice = gr.Dropdown(
+				["pdfminer", "pdfplumber", "pypdfium2", "pypdf", "pymupdf", "llama-parse", "upstage🇰🇷"],
+				label="Parsing Method", info="Choose parsing method that you want")
+			parse_button = gr.Button(value="Run Parsing")
+			parse_status = gr.Textbox(value="Not Started", interactive=False)
+			raw_download_button = gr.Button(value="Download raw.parquet",
+											link=f"/file={os.path.join(DATA_DIR, 'raw.parquet')}")
+			file_reset_button = gr.Button(value="Reset uploaded files")
+		with gr.Column(scale=1):
+			gr.Markdown(
+				"## 2. Chunk your raw.parquet\n\nUse parsed raw.parquet or upload your own. It will make a corpus.parquet."
+			)
+			raw_file_input = gr.File(label="Upload raw.parquet", type="filepath", file_count="single", visible=False)
+			use_previous_raw_file = gr.Checkbox(label="Use previous raw.parquet", value=True)
+			chunk_choice = gr.Dropdown(
+				["Token", "Sentence", "Semantic", "Recursive", "Konlpy🇰🇷"],
+				label="Chunking Method", info="Choose chunking method that you want")
+			chunk_size = gr.Slider(minimum=128, maximum=1024, step=128, label="Chunk Size", value=256)
+			chunk_overlap = gr.Slider(minimum=16, maximum=256, step=16, label="Chunk Overlap", value=32)
+			chunk_button = gr.Button(value="Run Chunking")
+			chunk_status = gr.Textbox(value="Not Started", interactive=False)
+			corpus_download_button = gr.Button(value="Download corpus.parquet",
+											   link=f"/file={os.path.join(DATA_DIR, 'corpus.parquet')}")
+		with gr.Column(scale=1):
+			gr.Markdown(
+				"## 3. Create QA dataset from your corpus.parquet\n\nQA dataset is essential to run AutoRAG. Upload corpus.parquet & select QA method and run.")
+			gr.HTML("<b style='color: red; background-color: black; font-weight: bold;'>Warning: QA Creation uses an OpenAI model, which can be costly. Start with a small batch to gauge expenses.</b>")
+			corpus_file_input = gr.File(label="Upload corpus.parquet", type="filepath", file_count="single",
+										visible=False)
+			use_previous_corpus_file = gr.Checkbox(label="Use previous corpus.parquet", value=True)
+			qa_choice = gr.Radio(["default", "fast", "advanced"], label="QA Method",
+									info="Choose QA method that you want")
+			model_choice = gr.Radio(["gpt-4o-mini", "gpt-4o"], label="Select model for data creation",
+									)
+			qa_cnt = gr.Slider(minimum=20, maximum=150, step=5, label="Number of QA pairs", value=80)
+			batch_size = gr.Slider(minimum=1, maximum=16, step=1,
+								   label="Batch Size to OpenAI model. If there is an error, decrease this.", value=16)
+			run_qa_button = gr.Button(value="Run QA Creation")
+			qa_status = gr.Textbox(value="Not Started", interactive=False)
+			qa_download_button = gr.Button(value="Download qa.parquet",
+										   link=f"/file={os.path.join(DATA_DIR, 'qa.parquet')}")
+	#================================================================================================#
+	# Logics
+	use_previous_raw_file.change(lambda x: gr.update(visible=not x), inputs=[use_previous_raw_file],
+								 outputs=[raw_file_input])
+	use_previous_corpus_file.change(lambda x: gr.update(visible=not x), inputs=[use_previous_corpus_file],
+									outputs=[corpus_file_input])
+	openai_key_textbox.submit(on_submit_openai_key, inputs=[openai_key_textbox], outputs=api_key_status_box)
+	# Parsing
+	parse_button.click(run_parse, inputs=[document_file_input, parse_choice], outputs=parse_status)
+	file_reset_button.click(file_reset, outputs=parse_status)
+	# Chunking
+	chunk_button.click(run_chunk, inputs=[use_previous_raw_file, raw_file_input, chunk_choice, chunk_size, chunk_overlap,
+										  lang_choice],
+					   outputs=chunk_status)
+	# QA Creation
+	run_qa_button.click(run_qa, inputs=[use_previous_corpus_file, corpus_file_input, qa_choice, model_choice, qa_cnt,
+										batch_size, lang_choice], outputs=qa_status)
+	# API Key visibility
+	parse_choice.change(change_visible_status_api_key, inputs=[parse_choice],
+						outputs=[llama_cloud_api_key_row, upstage_api_key_row])
+demo.launch(share=False, debug=True, allowed_paths=[FILE_DIR, DATA_DIR])

packages.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gcc
+poppler-utils
+tesseract
+pyOpenSSL

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ AutoRAG[parse,ko,ja]>=0.3.4
2	+ llama-index-llms-upstage

src/__init__.py ADDED Viewed

File without changes

src/create.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import pandas as pd
+import gradio as gr
+from autorag.data.qa.filter.passage_dependency import passage_dependency_filter_llama_index
+from autorag.data.qa.query.llama_gen_query import factoid_query_gen
+from autorag.data.qa.sample import random_single_hop
+from autorag.data.qa.schema import Corpus, QA
+from autorag.data.qa.generation_gt.llama_index_gen_gt import (
+    make_basic_gen_gt,
+    make_concise_gen_gt,
+)
+from autorag.data.qa.filter.dontknow import dontknow_filter_rule_based
+from llama_index.core.base.llms.base import BaseLLM
+from autorag.data.qa.evolve.llama_index_query_evolve import reasoning_evolve_ragas
+from autorag.data.qa.evolve.llama_index_query_evolve import compress_ragas
+def default_create(corpus_df, llm: BaseLLM, n: int = 100, lang: str = "en",
+				   batch_size: int = 32,
+				   progress=gr.Progress()) -> QA:
+	corpus_instance = Corpus(corpus_df)
+	if len(corpus_instance.data) < n:
+		n = len(corpus_instance.data)
+	sampled_corpus = corpus_instance.sample(random_single_hop, n=n)
+	mapped_corpus = sampled_corpus.map(lambda df: df.reset_index(drop=True))
+	retrieval_gt_contents = mapped_corpus.make_retrieval_gt_contents()
+	progress(0.05)
+	query_generated = retrieval_gt_contents.batch_apply(factoid_query_gen, llm=llm, lang=lang, batch_size=batch_size)
+	progress(0.2)
+	basic_answers = query_generated.batch_apply(make_basic_gen_gt, llm=llm, lang=lang, batch_size=batch_size)
+	progress(0.4)
+	concise_answers = basic_answers.batch_apply(make_concise_gen_gt, llm=llm, lang=lang, batch_size=batch_size)
+	progress(0.6)
+	filtered_answers = concise_answers.filter(dontknow_filter_rule_based, lang=lang)
+	progress(0.8)
+	initial_qa = filtered_answers.batch_filter(passage_dependency_filter_llama_index, lang=lang, batch_size=batch_size)
+	progress(0.96)
+	return initial_qa
+def fast_create(corpus_df, llm: BaseLLM, n: int = 100, lang: str = "en",
+				batch_size: int = 32,
+				progress=gr.Progress()) -> QA:
+	corpus_instance = Corpus(corpus_df)
+	progress(0.05)
+	if len(corpus_instance.data) < n:
+		n = len(corpus_instance.data)
+	sampled_corpus = corpus_instance.sample(random_single_hop, n=n)
+	mapped_corpus = sampled_corpus.map(lambda df: df.reset_index(drop=True))
+	progress(0.1)
+	retrieval_gt_contents = mapped_corpus.make_retrieval_gt_contents()
+	progress(0.2)
+	query_generated = retrieval_gt_contents.batch_apply(factoid_query_gen, llm=llm, lang=lang, batch_size=batch_size)
+	progress(0.3)
+	basic_answers = query_generated.batch_apply(make_basic_gen_gt, llm=llm, lang=lang, batch_size=batch_size)
+	progress(0.5)
+	concise_answers = basic_answers.batch_apply(make_concise_gen_gt, llm=llm, lang=lang, batch_size=batch_size)
+	progress(0.75)
+	initial_qa = concise_answers
+	progress(0.9)
+	return initial_qa
+def advanced_create(corpus_df, llm: BaseLLM, n: int = 100, lang: str = "en",
+					batch_size: int = 32,
+					progress=gr.Progress()) -> QA:
+	"""
+	Mix hard and easy question.
+	"""
+	corpus_instance = Corpus(corpus_df)
+	if len(corpus_instance.data) < n:
+		n = len(corpus_instance.data)
+	sampled_corpus = corpus_instance.sample(random_single_hop, n=n)
+	mapped_corpus = sampled_corpus.map(lambda df: df.reset_index(drop=True))
+	retrieval_gt_contents = mapped_corpus.make_retrieval_gt_contents()
+	progress(0.05)
+	query_generated = retrieval_gt_contents.batch_apply(factoid_query_gen, llm=llm, lang=lang, batch_size=batch_size)
+	progress(0.15)
+	basic_answers = query_generated.batch_apply(make_basic_gen_gt, llm=llm, lang=lang, batch_size=batch_size)
+	progress(0.25)
+	concise_answers = basic_answers.batch_apply(make_concise_gen_gt, llm=llm, lang=lang, batch_size=batch_size)
+	progress(0.35)
+	filtered_answers = concise_answers.filter(dontknow_filter_rule_based, lang=lang)
+	progress(0.45)
+	initial_qa = filtered_answers.batch_filter(passage_dependency_filter_llama_index, lang=lang, batch_size=batch_size)
+	progress(0.55)
+	cut_idx = n // 2
+	reasoning_qa = initial_qa.map(lambda df: df.iloc[:cut_idx]).batch_apply(
+		reasoning_evolve_ragas,
+		llm=llm,
+		lang=lang,
+		batch_size=batch_size,
+	)
+	progress(0.75)
+	compressed_qa = initial_qa.map(lambda df: df.iloc[cut_idx:]).map(lambda df: df.reset_index(drop=True)).batch_apply(
+		compress_ragas,
+		llm=llm,
+		lang=lang,
+		batch_size=batch_size,
+	)
+	progress(0.95)
+	final_qa = QA(pd.concat([reasoning_qa.data, compressed_qa.data], ignore_index=True),
+				  linked_corpus=corpus_instance)
+	return final_qa

src/util.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import openai
+import os
+import gradio as gr
+from llama_index.core.base.llms.types import ChatResponse
+def on_submit_openai_key(openai_key):
+    os.environ["OPENAI_API_KEY"] = openai_key
+    # Test openai key
+    try:
+        client = openai.OpenAI()
+        response = client.chat.completions.create(
+            messages=[
+                {"role": "user", "content": "What is the capital of France?"},
+            ],
+            model="gpt-4o-mini",
+            max_tokens=3,
+        )
+        assert isinstance(response.choices[0].message.content, str)
+        gr.Info("OpenAI API key submitted.", duration=3)
+        return "Setting complete."
+    except openai.AuthenticationError as e:
+        gr.Error("OpenAI API key is invalid.", duration=3)
+        return "Not Set"
+    except AssertionError as e:
+        gr.Error("OpenAI server is not working properly.", duration=3)
+        return "Not Set"
+def on_submit_llama_cloud_key(llama_cloud_key):
+    from llama_parse import LlamaParse
+    os.environ["LLAMA_CLOUD_API_KEY"] = llama_cloud_key
+    # Test llama cloud key
+    try:
+        parser = LlamaParse(
+            result_type="markdown"  # "markdown" and "text" are available
+        )
+        return "Setting complete."
+    except:
+        gr.Error("LLAMA Cloud API key is invalid.", duration=3)
+        return "Not Set"
+def on_submit_upstage_key(upstage_key):
+    os.environ["UPSTAGE_API_KEY"] = upstage_key
+    # Test upstage key
+    try:
+        from llama_index.llms.upstage import Upstage
+        from llama_index.core.llms import ChatMessage
+        llm = Upstage()
+        response: ChatResponse = llm.chat(messages=[
+            ChatMessage(role="system", content="You are a helpful assistant."),
+            ChatMessage(role="user", content="Hi, how are you?")
+        ], max_token=3)
+        assert isinstance(response.message, str)
+        assert bool(response.message)
+        return "Setting complete."
+    except:
+        gr.Error("Upstage API key is invalid.", duration=3)
+        return "Not Set"