import gradio as gr from langchain_core.vectorstores import InMemoryVectorStore from langchain_cohere import CohereEmbeddings from langchain_core.documents import Document import os from typing import List from dotenv import load_dotenv import requests #pip install -U langchain-huggingface from langchain_huggingface import HuggingFaceEmbeddings #from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_google_genai import GoogleGenerativeAIEmbeddings from google import genai #from transformers import pipeline load_dotenv(verbose=True) google_api_key = os.environ.get("GEMINI_API_KEY") client = genai.Client(api_key=os.environ["GEMINI_API_KEY"]) # Initialize the Cohere embeddings model embed_model = HuggingFaceEmbeddings( model_name="sentence-transformers/msmarco-MiniLM-L12-cos-v5" ) # Pass the Cohere embeddings model to the InMemoryVectorStore vector_store = InMemoryVectorStore(embedding=embed_model) url = "https://www.ryhintl.com/dbjson/getjson?sqlcmd=select `法人名`,`本社所在地`,`会計基準`,`事業年度`,`売上高`,`資本金`,`従業員数` from corp_info" response = requests.get(url) data_list = response.json() documents = [] for idx, data in enumerate(data_list): # Extract data for page_content coname = data.get("法人名", "不明") address = data.get("本社所在地", "不明") ifrs = data.get("会計基準", "不明") bizyear = data.get("事業年度", "不明") revenue = data.get("売上高", "不明") capital = data.get("資本金", "不明") empno= data.get("従業員数", "不明") # Create page_content page_content = f"{coname} {address} {ifrs}の売上高は({revenue})円です。事業年度は{bizyear}で資本金は{capital}です。従業員数は{empno}です。" # Copy the dictionary and remove keys used for page_content metadata = data.copy() #metadata.pop("法人名", None) #metadata.pop("本社所在地", None) #metadata.pop("会計基準", None) #metadata.pop("事業年度", None) #metadata.pop("売上高", None) # Create the Document object and add it to the list document = Document(id=str(idx + 1), page_content=page_content, metadata=metadata) documents.append(document) # Add documents to the vector store vector_store.add_documents(documents=documents) def create_filter(company_name: str): """ Returns a filter function that checks for a specific company name. This function "remembers" the company_name argument. """ def _filter_function(doc: Document) -> bool: #print("fdoc:", doc) return doc.metadata.get("法人名") == company_name return _filter_function # --- Gradio UI Logic --- def search_documents(query, query_company): """ Performs a similarity search on the vector store and formats the results. """ try: if not query: return "プロンプトを入力してください" target_filter = create_filter(query_company) results = vector_store.similarity_search(query=query, k=1, filter=target_filter) #results = vector_store.similarity_search(query=query, k=1) #for doc in results: #print("filtered:",f"* {doc.page_content} [{doc.metadata}]") if results: doc = results[0] # Format the output as a clean, readable string output_str = f"結果:\n\n* コンテンツ: {doc.page_content}\n* メタデータ: {doc.metadata}" prompt = f"{output_str}に基づいて{query_company}の{query}に対する答えを出力してください。回答が見つからない場合は、ウェブで検索して回答してください。" gresponse = client.models.generate_content( model="gemini-2.5-flash", contents=[prompt] ) #cl-tohoku/bert-base-japanese-v2 LINE-DistilBERT-Japanese sonoisa/t5-base-japanese rinna/japanese-gpt2-medium #generator = pipeline("text-generation", model="rinna/japanese-gpt2-medium") #gresponse = generator(prompt, max_new_tokens=100)[0]['generated_text'] #myresp = gresponse myresp = gresponse.text return myresp else: prompt = f"{query}に対する答えを出力してください。ウェブで検索して回答してください。" gresponse = client.models.generate_content( model="gemini-2.5-flash", contents=[prompt] ) return gresponse.text #return "一致する情報が見つかりません。" except Exception as e: return f"エラー: {e}" # --- Gradio UI setup --- with gr.Blocks(title="上場会社売上報検索", css="""footer {visibility: hidden;} #header {display: flex; justify-content: space-between; align-items: center; font-size: 24px; font-weight: bold;} #logo {width: 50px; height: 50px;}""") as dob: gr.HTML('') gr.Markdown("꧁ 上場会社売上報検索 ꧂ 社名を入力して情報を検索できます。") with gr.Row(): query_input = gr.Textbox( label="プロンプト", info="例) 売上高を教えてください。", lines=2 ) query_company = gr.Textbox( label="企業名", info="例)トヨタ自動車株式会社", lines=1 ) search_button = gr.Button("検索") results_output = gr.Textbox(label="結果", interactive=False) search_button.click( fn=search_documents, inputs=[query_input, query_company], outputs=results_output ) dob.launch(favicon_path="favicon.ico", show_api=False)