Spaces:

aiqtech
/

rag

Sleeping

App Files Files Community

aiqtech commited on Aug 21

Commit

eddae4d

verified ·

1 Parent(s): 57c5941

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -2319

app.py CHANGED Viewed

@@ -470,2320 +470,14 @@ class ResponseCleaner:
     @staticmethod
     def clean_response(response: str) -> str:
         """불필요한 마크업 제거 강화"""
-        # 마크다운 테이블 제거
-        # | 로 시작하는 테이블 행 제거
-        response = re.sub(r'^\|.*\|.*
-# ============================================================================
-# 통합 최적화 멀티 에이전트 시스템 (캐싱 제거 버전)
-# ============================================================================
-class SpeedOptimizedMultiAgentSystem:
-    """속도 최적화된 멀티 에이전트 시스템 (캐싱 없음)"""
-    def __init__(self):
-        self.llm = OptimizedFireworksClient()
-        self.search = AsyncBraveSearch()
-        self.reasoning = LightweightReasoningChain()
-        self.quality_checker = QualityChecker()
-        self.streaming = OptimizedStreaming()
-        self.language_detector = LanguageDetector()
-        self.response_cleaner = ResponseCleaner()
-        # 병렬 처리 풀
-        self.executor = ThreadPoolExecutor(max_workers=4)
-    def _init_compact_prompts(self, lang: str = 'ko') -> Dict:
-        """압축된 고효율 프롬프트 (언어별)"""
-        prompts = {
-            'ko': {
-                AgentRole.SUPERVISOR: """[감독자-구조설계]
-즉시분석: 핵심의도+필요정보+답변구조
-출력: 5개 핵심포인트(각 1문장)
-추론체계 명시""",
-                AgentRole.CREATIVE: """[창의성생성자]
-입력구조 따라 창의적 확장
-실용예시+혁신접근+구체조언
-불필요설명 제거""",
-                AgentRole.CRITIC: """[비평자-검증]
-신속검토: 정확성/논리성/실용성
-개선포인트 3개만
-각 2문장 이내""",
-                AgentRole.FINALIZER: """[최종통합]
-모든의견 종합→최적답변
-명확구조+실용정보+창의균형
-바로 핵심 내용부터 시작. 불필요한 헤더나 마크업 없이. 마크다운 헤더(#, ##, ###) 사용 금지."""
-            },
-            'en': {
-                AgentRole.SUPERVISOR: """[Supervisor-Structure]
-Immediate analysis: core intent+required info+answer structure
-Output: 5 key points (1 sentence each)
-Clear reasoning framework""",
-                AgentRole.CREATIVE: """[Creative Generator]
-Follow structure, expand creatively
-Practical examples+innovative approach+specific advice
-Remove unnecessary explanations""",
-                AgentRole.CRITIC: """[Critic-Verification]
-Quick review: accuracy/logic/practicality
-Only 3 improvement points
-Max 2 sentences each""",
-                AgentRole.FINALIZER: """[Final Integration]
-Synthesize all inputs→optimal answer
-Clear structure+practical info+creative balance
-Start with core content directly. No unnecessary headers or markup. No markdown headers (#, ##, ###)."""
-            },
-            'ja': {
-                AgentRole.SUPERVISOR: """[監督者-構造設計]
-即時分析：核心意図+必要情報+回答構造
-出力：5つの核心ポイント（各1文）
-推論体系明示""",
-                AgentRole.CREATIVE: """[創造性生成者]
-入力構造に従って創造的拡張
-実用例+革新的アプローチ+具体的アドバイス
-不要な説明削除""",
-                AgentRole.CRITIC: """[批評者-検証]
-迅速レビュー：正確性/論理性/実用性
-改善ポイント3つのみ
-各2文以内""",
-                AgentRole.FINALIZER: """[最終統合]
-全意見統合→最適回答
-明確構造+実用情報+創造性バランス
-核心内容から直接開始。不要なヘッダーやマークアップなし。マークダウンヘッダー（#、##、###）使用禁止。"""
-            },
-            'zh': {
-                AgentRole.SUPERVISOR: """[主管-结构设计]
-立即分析：核心意图+所需信息+答案结构
-输出：5个核心要点（每个1句）
-推理体系明确""",
-                AgentRole.CREATIVE: """[创意生成器]
-按结构创造性扩展
-实用示例+创新方法+具体建议
-删除不必要的解释""",
-                AgentRole.CRITIC: """[评论家-验证]
-快速审查：准确性/逻辑性/实用性
-仅3个改进点
-每个最多2句""",
-                AgentRole.FINALIZER: """[最终整合]
-综合所有意见→最佳答案
-清晰结构+实用信息+创意平衡
-直接从核心内容开始。无需不必要的标题或标记。禁止使用Markdown标题（#、##、###）。"""
-            }
-        }
-        return prompts.get(lang, prompts['en'])
-    async def parallel_process_agents(
-        self,
-        query: str,
-        search_results: List[Dict],
-        show_progress: bool = True,
-        lang: str = None
-    ) -> AsyncGenerator[Tuple[str, str], None]:
-        """병렬 처리 파이프라인 (캐싱 없음)"""
-        start_time = time.time()
-        # 언어 자동 감지
-        if lang is None:
-            lang = self.language_detector.detect_language(query)
-        # 언어별 프롬프트 설정
-        self.compact_prompts = self._init_compact_prompts(lang)
-        search_context = self._format_search_results(search_results)
-        accumulated_response = ""
-        agent_thoughts = ""
-        # 추론 패턴 결정
-        reasoning_pattern = self.reasoning.get_reasoning_pattern(query, lang)
-        try:
-            # === 1단계: 감독자 + 검색 병렬 실행 ===
-            if show_progress:
-                progress_msg = {
-                    'ko': "🚀 병렬 처리 시작\n👔 감독자 분석 + 🔍 추가 검색 동시 진행...\n\n",
-                    'en': "🚀 Starting parallel processing\n👔 Supervisor analysis + 🔍 Additional search in progress...\n\n",
-                    'ja': "🚀 並列処理開始\n👔 監督者分析 + 🔍 追加検索同時進行中...\n\n",
-                    'zh': "🚀 开始并行处理\n👔 主管分析 + 🔍 附加搜索同时进行...\n\n"
-                }
-                agent_thoughts = progress_msg.get(lang, progress_msg['en'])
-                yield accumulated_response, agent_thoughts
-            # 감독자 프롬프트 (언어별)
-            supervisor_prompt_templates = {
-                'ko': f"""
-질문: {query}
-검색결과: {search_context}
-추론패턴: {reasoning_pattern}
-즉시 핵심구조 5개 제시""",
-                'en': f"""
-Question: {query}
-Search results: {search_context}
-Reasoning pattern: {reasoning_pattern}
-Immediately provide 5 key structures""",
-                'ja': f"""
-質問: {query}
-検索結果: {search_context}
-推論パターン: {reasoning_pattern}
-即座に5つの核心構造を提示""",
-                'zh': f"""
-问题: {query}
-搜索结果: {search_context}
-推理模式: {reasoning_pattern}
-立即提供5个核心结构"""
-            }
-            supervisor_prompt = supervisor_prompt_templates.get(lang, supervisor_prompt_templates['en'])
-            supervisor_response = ""
-            supervisor_task = self.llm.chat_stream_async(
-                messages=[
-                    {"role": "system", "content": self.compact_prompts[AgentRole.SUPERVISOR]},
-                    {"role": "user", "content": supervisor_prompt}
-                ],
-                temperature=0.3,
-                max_tokens=500
-            )
-            # 감독자 스트리밍 (버퍼링)
-            async for chunk in self.streaming.buffer_and_yield(supervisor_task):
-                supervisor_response += chunk
-                if show_progress and len(supervisor_response) < 300:
-                    supervisor_label = {
-                        'ko': "👔 감독자 분석",
-                        'en': "👔 Supervisor Analysis",
-                        'ja': "👔 監督者分析",
-                        'zh': "👔 主管分析"
-                    }
-                    agent_thoughts = f"{supervisor_label.get(lang, supervisor_label['en'])}\n{supervisor_response[:300]}...\n\n"
-                    yield accumulated_response, agent_thoughts
-            # === 2단계: 창의성 + 비평 준비 병렬 ===
-            if show_progress:
-                creative_msg = {
-                    'ko': "🎨 창의성 생성자 + 🔍 비평자 준비...\n\n",
-                    'en': "🎨 Creative Generator + 🔍 Critic preparing...\n\n",
-                    'ja': "🎨 創造性生成者 + 🔍 批評者準備中...\n\n",
-                    'zh': "🎨 创意生成器 + 🔍 评论家准备中...\n\n"
-                }
-                agent_thoughts += creative_msg.get(lang, creative_msg['en'])
-                yield accumulated_response, agent_thoughts
-            # 창의성 생성 시작 (언어별)
-            creative_prompt_templates = {
-                'ko': f"""
-질문: {query}
-감독자구조: {supervisor_response}
-검색결과: {search_context}
-창의적+실용적 답변 즉시생성""",
-                'en': f"""
-Question: {query}
-Supervisor structure: {supervisor_response}
-Search results: {search_context}
-Generate creative+practical answer immediately""",
-                'ja': f"""
-質問: {query}
-監督者構造: {supervisor_response}
-検索結果: {search_context}
-創造的+実用的回答即座生成""",
-                'zh': f"""
-问题: {query}
-主管结构: {supervisor_response}
-搜索结果: {search_context}
-立即生成创意+实用答案"""
-            }
-            creative_prompt = creative_prompt_templates.get(lang, creative_prompt_templates['en'])
-            creative_response = ""
-            creative_partial = ""
-            critic_started = False
-            critic_response = ""
-            creative_task = self.llm.chat_stream_async(
-                messages=[
-                    {"role": "system", "content": self.compact_prompts[AgentRole.CREATIVE]},
-                    {"role": "user", "content": creative_prompt}
-                ],
-                temperature=0.8,
-                max_tokens=1500
-            )
-            # 창의성 스트리밍 + 비평자 조기 시작
-            async for chunk in self.streaming.buffer_and_yield(creative_task):
-                creative_response += chunk
-                creative_partial += chunk
-                # 창의성 응답이 500자 넘으면 비평자 시작
-                if len(creative_partial) > 500 and not critic_started:
-                    critic_started = True
-                    # 비평자 비동기 시작 (언어별)
-                    critic_prompt_templates = {
-                        'ko': f"""
-원본질문: {query}
-창의성답변(일부): {creative_partial}
-신속검토→개선점3개""",
-                        'en': f"""
-Original question: {query}
-Creative answer (partial): {creative_partial}
-Quick review→3 improvements""",
-                        'ja': f"""
-元の質問: {query}
-創造的回答（一部）: {creative_partial}
-迅速レビュー→改善点3つ""",
-                        'zh': f"""
-原始问题: {query}
-创意答案（部分）: {creative_partial}
-快速审查→3个改进点"""
-                    }
-                    critic_prompt = critic_prompt_templates.get(lang, critic_prompt_templates['en'])
-                    critic_task = asyncio.create_task(
-                        self._run_critic_async(critic_prompt)
-                    )
-                if show_progress:
-                    display_creative = creative_response[:400] + "..." if len(creative_response) > 400 else creative_response
-                    creative_label = {
-                        'ko': "🎨 창의성 생성자",
-                        'en': "🎨 Creative Generator",
-                        'ja': "🎨 創造性生成者",
-                        'zh': "🎨 创意生成器"
-                    }
-                    agent_thoughts = f"{creative_label.get(lang, creative_label['en'])}\n{display_creative}\n\n"
-                    yield accumulated_response, agent_thoughts
-            # 비평자 결과 대기
-            if critic_started:
-                critic_response = await critic_task
-                if show_progress:
-                    critic_label = {
-                        'ko': "🔍 비평자 검토",
-                        'en': "🔍 Critic Review",
-                        'ja': "🔍 批評者レビュー",
-                        'zh': "🔍 评论家审查"
-                    }
-                    agent_thoughts += f"{critic_label.get(lang, critic_label['en'])}\n{critic_response[:200]}...\n\n"
-                    yield accumulated_response, agent_thoughts
-            # === 3단계: 품질 체크 및 조기 종료 ===
-            quality_score, need_more = self.quality_checker.evaluate_response(
-                creative_response, query, lang
-            )
-            if not need_more and quality_score > 0.85:
-                # 품질이 충분히 높으면 바로 반환
-                accumulated_response = self.response_cleaner.clean_response(creative_response)
-                if show_progress:
-                    quality_msg = {
-                        'ko': f"✅ 품질 충족 (점수: {quality_score:.2f})\n조기 완료!\n",
-                        'en': f"✅ Quality met (score: {quality_score:.2f})\nEarly completion!\n",
-                        'ja': f"✅ 品質満足 (スコア: {quality_score:.2f})\n早期完了!\n",
-                        'zh': f"✅ 质量满足 (分数: {quality_score:.2f})\n提前完成!\n"
-                    }
-                    agent_thoughts += quality_msg.get(lang, quality_msg['en'])
-                yield accumulated_response, agent_thoughts
-                return
-            # === 4단계: 최종 통합 (스트리밍) ===
-            if show_progress:
-                final_msg = {
-                    'ko': "✅ 최종 통합 중...\n\n",
-                    'en': "✅ Final integration in progress...\n\n",
-                    'ja': "✅ 最終統合中...\n\n",
-                    'zh': "✅ 最终整合中...\n\n"
-                }
-                agent_thoughts += final_msg.get(lang, final_msg['en'])
-                yield accumulated_response, agent_thoughts
-            # 최종 프롬프트 (언어별)
-            final_prompt_templates = {
-                'ko': f"""
-질문: {query}
-창의성답변: {creative_response}
-비평피드백: {critic_response}
-감독자구조: {supervisor_response}
-최종통합→완벽답변. 마크다운 헤더(#, ##, ###) 사용 금지.""",
-                'en': f"""
-Question: {query}
-Creative answer: {creative_response}
-Critic feedback: {critic_response}
-Supervisor structure: {supervisor_response}
-Final integration→perfect answer. No markdown headers (#, ##, ###).""",
-                'ja': f"""
-質問: {query}
-創造的回答: {creative_response}
-批評フィードバック: {critic_response}
-監督者構造: {supervisor_response}
-最終統合→完璧な回答。マークダウンヘッダー（#、##、###）使用禁止。""",
-                'zh': f"""
-问题: {query}
-创意答案: {creative_response}
-评论反馈: {critic_response}
-主管结构: {supervisor_response}
-最终整合→完美答案。禁止使用Markdown标题（#、##、###）。"""
-            }
-            final_prompt = final_prompt_templates.get(lang, final_prompt_templates['en'])
-            final_task = self.llm.chat_stream_async(
-                messages=[
-                    {"role": "system", "content": self.compact_prompts[AgentRole.FINALIZER]},
-                    {"role": "user", "content": final_prompt}
-                ],
-                temperature=0.5,
-                max_tokens=2500
-            )
-            # 최종 답변 스트리밍
-            accumulated_response = ""
-            async for chunk in final_task:
-                accumulated_response += chunk
-                # 실시간 정리
-                cleaned_response = self.response_cleaner.clean_response(accumulated_response)
-                yield cleaned_response, agent_thoughts
-            # 최종 정리
-            accumulated_response = self.response_cleaner.clean_response(accumulated_response)
-            # 처리 시간 추가 (언어별)
-            processing_time = time.time() - start_time
-            time_msg = {
-                'ko': f"\n\n---\n⚡ 처리 시간: {processing_time:.1f}초",
-                'en': f"\n\n---\n⚡ Processing time: {processing_time:.1f} seconds",
-                'ja': f"\n\n---\n⚡ 処理時間: {processing_time:.1f}秒",
-                'zh': f"\n\n---\n⚡ 处理时间: {processing_time:.1f}秒"
-            }
-            accumulated_response += time_msg.get(lang, time_msg['en'])
-            yield accumulated_response, agent_thoughts
-        except Exception as e:
-            error_msg = {
-                'ko': f"❌ 오류 발생: {str(e)}",
-                'en': f"❌ Error occurred: {str(e)}",
-                'ja': f"❌ エラー発生: {str(e)}",
-                'zh': f"❌ 发生错误: {str(e)}"
-            }
-            yield error_msg.get(lang, error_msg['en']), agent_thoughts
-    async def _run_critic_async(self, prompt: str) -> str:
-        """비평자 비동기 실행 with error handling"""
-        try:
-            response = ""
-            async for chunk in self.llm.chat_stream_async(
-                messages=[
-                    {"role": "system", "content": self.compact_prompts[AgentRole.CRITIC]},
-                    {"role": "user", "content": prompt}
-                ],
-                temperature=0.2,
-                max_tokens=500
-            ):
-                response += chunk
-            return response
-        except Exception as e:
-            # 언어 감지
-            lang = 'ko' if '질문' in prompt else 'en'
-            error_msg = {
-                'ko': "비평 처리 중 오류",
-                'en': "Error during critic processing",
-                'ja': "批評処理中のエラー",
-                'zh': "评论处理中出错"
-            }
-            return error_msg.get(lang, error_msg['en'])
-    def _format_search_results(self, results: List[Dict]) -> str:
-        """검색 결과 압축 포맷"""
-        if not results:
-            return "No search results"
-        formatted = []
-        for i, r in enumerate(results[:3], 1):
-            title = r.get('title', '')[:50]
-            desc = r.get('description', '')[:100]
-            formatted.append(f"[{i}]{title}:{desc}")
-        return " | ".join(formatted)
-# ============================================================================
-# Gradio UI (최적화 버전 - 캐싱 제거)
-# ============================================================================
-def create_optimized_gradio_interface():
-    """최적화된 Gradio 인터페이스 (캐싱 없음)"""
-    # 시스템 초기화
-    system = SpeedOptimizedMultiAgentSystem()
-    def process_query_optimized(
-        message: str,
-        history: List[Dict],
-        use_search: bool,
-        show_agent_thoughts: bool,
-        search_count: int,
-        language_mode: str
-    ):
-        """최적화된 쿼리 처리 - 실시간 스트리밍 버전"""
-        if not message:
-            yield history, "", ""
-            return
-        # 언어 설정
-        if language_mode == "Auto":
-            lang = None  # 자동 감지
-        else:
-            lang_map = {"Korean": "ko", "English": "en", "Japanese": "ja", "Chinese": "zh"}
-            lang = lang_map.get(language_mode, None)
-        # 비동기 함수를 동기적으로 실행
-        try:
-            import nest_asyncio
-            nest_asyncio.apply()
-        except ImportError:
-            pass
-        try:
-            # 검색 수행 (동기화)
-            search_results = []
-            search_display = ""
-            # 언어 자동 감지 (필요한 경우)
-            detected_lang = lang or system.language_detector.detect_language(message)
-            if use_search:
-                # 검색 상태 표시
-                processing_msg = {
-                    'ko': "⚡ 고속 처리 중...",
-                    'en': "⚡ High-speed processing...",
-                    'ja': "⚡ 高速処理中...",
-                    'zh': "⚡ 高速处理中..."
-                }
-                history_with_message = history + [
-                    {"role": "user", "content": message},
-                    {"role": "assistant", "content": processing_msg.get(detected_lang, processing_msg['en'])}
-                ]
-                yield history_with_message, "", ""
-                # 비동기 검색을 동기적으로 실행
-                async def search_wrapper():
-                    return await system.search.search_async(message, count=search_count, lang=detected_lang)
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-                search_results = loop.run_until_complete(search_wrapper())
-                if search_results:
-                    ref_label = {
-                        'ko': "📚 참고 자료",
-                        'en': "📚 References",
-                        'ja': "📚 参考資料",
-                        'zh': "📚 参考资料"
-                    }
-                    search_display = f"{ref_label.get(detected_lang, ref_label['en'])}\n\n"
-                    for i, result in enumerate(search_results[:3], 1):
-                        search_display += f"**{i}. [{result['title'][:50]}]({result['url']})**\n"
-                        search_display += f"   {result['description'][:100]}...\n\n"
-            # 사용자 메시지 추가
-            current_history = history + [{"role": "user", "content": message}]
-            # 실시간 스트리밍을 위한 비동기 처리
-            async def stream_responses():
-                """실시간 스트리밍 제너레이터"""
-                async for response, thoughts in system.parallel_process_agents(
-                    query=message,
-                    search_results=search_results,
-                    show_progress=show_agent_thoughts,
-                    lang=detected_lang
-                ):
-                    yield response, thoughts
-            # 새 이벤트 루프에서 실시간 스트리밍
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            # 비동기 제너레이터를 동기적으로 순회
-            gen = stream_responses()
-            while True:
-                try:
-                    # 다음 항목 가져오기
-                    task = asyncio.ensure_future(gen.__anext__(), loop=loop)
-                    response, thoughts = loop.run_until_complete(task)
-                    # 실시간 업데이트
-                    updated_history = current_history + [
-                        {"role": "assistant", "content": response}
-                    ]
-                    yield updated_history, thoughts, search_display
-                except StopAsyncIteration:
-                    break
-        except Exception as e:
-            error_history = history + [
-                {"role": "user", "content": message},
-                {"role": "assistant", "content": f"❌ Error: {str(e)}"}
-            ]
-            yield error_history, "", ""
-        finally:
-            # 루프 정리
-            try:
-                loop.close()
-            except:
-                pass
-    # Gradio 인터페이스
-    with gr.Blocks(
-        title="⚡ Speed-Optimized Multi-Agent System (No Cache)",
-        theme=gr.themes.Soft(),
-        css="""
-        .gradio-container {
-            max-width: 1400px !important;
-            margin: auto !important;
-        }
-        """
-    ) as demo:
-        gr.Markdown("""
-        # ⚡ Enhanced Multi-Agent RAG System (캐싱 제거 버전)
-        **Complex questions processed within 5-8 seconds | Multi-language support**
-        **Optimization Features:**
-        - 🚀 Parallel Processing: Concurrent agent execution
-        - ⚡ Stream Buffering: Network optimization
-        - 🎯 Early Termination: Complete immediately when quality is met
-        - 🌍 Multi-language: Auto-detect Korean/English/Japanese/Chinese
-        - ❌ **Caching Disabled**: 캐싱 기능 제거됨
-        """)
-        with gr.Row():
-            with gr.Column(scale=3):
-                chatbot = gr.Chatbot(
-                    height=500,
-                    label="💬 Chat",
-                    type="messages"
-                )
-                msg = gr.Textbox(
-                    label="Enter complex question",
-                    placeholder="Enter complex questions requiring analysis, strategy, or creative solutions...",
-                    lines=3
-                )
-                with gr.Row():
-                    submit = gr.Button("⚡ High-Speed Process", variant="primary")
-                    clear = gr.Button("🔄 Reset")
-                with gr.Accordion("🤖 Agent Processing", open=False):
-                    agent_thoughts = gr.Markdown()
-                with gr.Accordion("📚 Search Sources", open=False):
-                    search_sources = gr.Markdown()
-            with gr.Column(scale=1):
-                gr.Markdown("**⚙️ Settings**")
-                language_mode = gr.Radio(
-                    choices=["Auto", "Korean", "English", "Japanese", "Chinese"],
-                    value="Auto",
-                    label="🌍 Language Mode"
-                )
-                use_search = gr.Checkbox(
-                    label="🔍 Use Web Search",
-                    value=True
-                )
-                show_agent_thoughts = gr.Checkbox(
-                    label="🧠 Show Processing",
-                    value=True
-                )
-                search_count = gr.Slider(
-                    minimum=3,
-                    maximum=10,
-                    value=5,
-                    step=1,
-                    label="Search Results Count"
-                )
-                gr.Markdown("""
-                **⚡ Optimization Status**
-                **Active Optimizations:**
-                - ✅ Parallel Processing
-                - ❌ ~~Smart Caching~~ (제거됨)
-                - ✅ Buffer Streaming
-                - ✅ Early Termination
-                - ✅ Compressed Prompts
-                - ✅ Multi-language Support
-                - ✅ Error Recovery
-                **Expected Processing Time:**
-                - Simple Query: 3-5 seconds
-                - Complex Query: 5-8 seconds
-                - Very Complex: 8-12 seconds
-                """)
-        # 복잡한 질문 예제 (다국어)
-        gr.Examples(
-            examples=[
-                # Korean
-                "AI 기술이 향후 10년간 한국 경제에 미칠 영향을 다각도로 분석하고 대응 전략을 제시해줘",
-                "스타트업이 대기업과 경쟁하기 위한 혁신적인 전략을 단계별로 수립해줘",
-                # English
-                "Analyze the multifaceted impact of quantum computing on current encryption systems and propose alternatives",
-                "Design 5 innovative business models for climate change mitigation with practical implementation details",
-                # Japanese
-                "メタバース時代の教育革新方案を実装可能なレベルで提案してください",
-                # Chinese
-                "分析人工智能对未来十年全球经济的影响并提出应对策略"
-            ],
-            inputs=msg
-        )
-        # 이벤트 바인딩
-        submit.click(
-            process_query_optimized,
-            inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count, language_mode],
-            outputs=[chatbot, agent_thoughts, search_sources]
-        ).then(
-            lambda: "",
-            None,
-            msg
-        )
-        msg.submit(
-            process_query_optimized,
-            inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count, language_mode],
-            outputs=[chatbot, agent_thoughts, search_sources]
-        ).then(
-            lambda: "",
-            None,
-            msg
-        )
-        clear.click(
-            lambda: ([], "", ""),
-            None,
-            [chatbot, agent_thoughts, search_sources]
-        )
-    return demo
-# ============================================================================
-# 메인 실행
-# ============================================================================
-if __name__ == "__main__":
-    print("""
-╔══════════════════════════════════════════════════════════════╗
-║     ⚡ Speed-Optimized Multi-Agent System (No Cache) ⚡     ║
-║                                                              ║
-║   High-speed AI system processing complex questions         ║
-║                                                              ║
-║  Features:                                                  ║
-║  • Multi-language support (KO/EN/JA/ZH)                    ║
-║  • Improved error recovery                                  ║
-║  • NO CACHING (캐싱 기능 제거됨)                           ║
-║  • Adaptive stream buffering                                ║
-║  • Response cleaning & formatting                           ║
-╚══════════════════════════════════════════════════════════════╝
-    """)
-    # API 키 확인
-    if not os.getenv("FIREWORKS_API_KEY"):
-        print("\n⚠️  FIREWORKS_API_KEY is not set.")
-    if not os.getenv("BRAVE_SEARCH_API_KEY"):
-        print("\n⚠️  BRAVE_SEARCH_API_KEY is not set.")
-    # Gradio 앱 실행
-    demo = create_optimized_gradio_interface()
-    is_hf_spaces = os.getenv("SPACE_ID") is not None
-    if is_hf_spaces:
-        print("\n🤗 Running in optimized mode on Hugging Face Spaces (No Cache)...")
-        demo.launch(server_name="0.0.0.0", server_port=7860)
-    else:
-        print("\n💻 Running in optimized mode on local environment (No Cache)...")
-        demo.launch(server_name="0.0.0.0", server_port=7860, share=False), '', response, flags=re.MULTILINE)
-        # 테이블 구분선 제거 (|---|, |:---|, |---:|, |:---:| 등)
-        response = re.sub(r'^\|[\s\-:]+\|.*
-# ============================================================================
-# 통합 최적화 멀티 에이전트 시스템 (캐싱 제거 버전)
-# ============================================================================
-class SpeedOptimizedMultiAgentSystem:
-    """속도 최적화된 멀티 에이전트 시스템 (캐싱 없음)"""
-    def __init__(self):
-        self.llm = OptimizedFireworksClient()
-        self.search = AsyncBraveSearch()
-        self.reasoning = LightweightReasoningChain()
-        self.quality_checker = QualityChecker()
-        self.streaming = OptimizedStreaming()
-        self.language_detector = LanguageDetector()
-        self.response_cleaner = ResponseCleaner()
-        # 병렬 처리 풀
-        self.executor = ThreadPoolExecutor(max_workers=4)
-    def _init_compact_prompts(self, lang: str = 'ko') -> Dict:
-        """압축된 고효율 프롬프트 (언어별)"""
-        prompts = {
-            'ko': {
-                AgentRole.SUPERVISOR: """[감독자-구조설계]
-즉시분석: 핵심의도+필요정보+답변구조
-출력: 5개 핵심포인트(각 1문장)
-추론체계 명시""",
-                AgentRole.CREATIVE: """[창의성생성자]
-입력구조 따라 창의적 확장
-실용예시+혁신접근+구체조언
-불필요설명 제거""",
-                AgentRole.CRITIC: """[비평자-검증]
-신속검토: 정확성/논리성/실용성
-개선포인트 3개만
-각 2문장 이내""",
-                AgentRole.FINALIZER: """[최종통합]
-모든의견 종합→최적답변
-명확구조+실용정보+창의균형
-바로 핵심 내용부터 시작. 불필요한 헤더나 마크업 없이. 마크다운 헤더(#, ##, ###) 사용 금지."""
-            },
-            'en': {
-                AgentRole.SUPERVISOR: """[Supervisor-Structure]
-Immediate analysis: core intent+required info+answer structure
-Output: 5 key points (1 sentence each)
-Clear reasoning framework""",
-                AgentRole.CREATIVE: """[Creative Generator]
-Follow structure, expand creatively
-Practical examples+innovative approach+specific advice
-Remove unnecessary explanations""",
-                AgentRole.CRITIC: """[Critic-Verification]
-Quick review: accuracy/logic/practicality
-Only 3 improvement points
-Max 2 sentences each""",
-                AgentRole.FINALIZER: """[Final Integration]
-Synthesize all inputs→optimal answer
-Clear structure+practical info+creative balance
-Start with core content directly. No unnecessary headers or markup. No markdown headers (#, ##, ###)."""
-            },
-            'ja': {
-                AgentRole.SUPERVISOR: """[監督者-構造設計]
-即時分析：核心意図+必要情報+回答構造
-出力：5つの核心ポイント（各1文）
-推論体系明示""",
-                AgentRole.CREATIVE: """[創造性生成者]
-入力構造に従って創造的拡張
-実用例+革新的アプローチ+具体的アドバイス
-不要な説明削除""",
-                AgentRole.CRITIC: """[批評者-検証]
-迅速レビュー：正確性/論理性/実用性
-改善ポイント3つのみ
-各2文以内""",
-                AgentRole.FINALIZER: """[最終統合]
-全意見統合→最適回答
-明確構造+実用情報+創造性���ランス
-核心内容から直接開始。不要なヘッダーやマークアップなし。マークダウンヘッダー（#、##、###）使用禁止。"""
-            },
-            'zh': {
-                AgentRole.SUPERVISOR: """[主管-结构设计]
-立即分析：核心意图+所需信息+答案结构
-输出：5个核心要点（每个1句）
-推理体系明确""",
-                AgentRole.CREATIVE: """[创意生成器]
-按结构创造性扩展
-实用示例+创新方法+具体建议
-删除不必要的解释""",
-                AgentRole.CRITIC: """[评论家-验证]
-快速审查：准确性/逻辑性/实用性
-仅3个改进点
-每个最多2句""",
-                AgentRole.FINALIZER: """[最终整合]
-综合所有意见→最佳答案
-清晰结构+实用信息+创意平衡
-直接从核心内容开始。无需不必要的标题或标记。禁止使用Markdown标题（#、##、###）。"""
-            }
-        }
-        return prompts.get(lang, prompts['en'])
-    async def parallel_process_agents(
-        self,
-        query: str,
-        search_results: List[Dict],
-        show_progress: bool = True,
-        lang: str = None
-    ) -> AsyncGenerator[Tuple[str, str], None]:
-        """병렬 처리 파이프라인 (캐싱 없음)"""
-        start_time = time.time()
-        # 언어 자동 감지
-        if lang is None:
-            lang = self.language_detector.detect_language(query)
-        # 언어별 프롬프트 설정
-        self.compact_prompts = self._init_compact_prompts(lang)
-        search_context = self._format_search_results(search_results)
-        accumulated_response = ""
-        agent_thoughts = ""
-        # 추론 패턴 결정
-        reasoning_pattern = self.reasoning.get_reasoning_pattern(query, lang)
-        try:
-            # === 1단계: 감독자 + 검색 병렬 실행 ===
-            if show_progress:
-                progress_msg = {
-                    'ko': "🚀 병렬 처리 시작\n👔 감독자 분석 + 🔍 추가 검색 동시 진행...\n\n",
-                    'en': "🚀 Starting parallel processing\n👔 Supervisor analysis + 🔍 Additional search in progress...\n\n",
-                    'ja': "🚀 並列処理開始\n👔 監督者分析 + 🔍 追加検索同時進行中...\n\n",
-                    'zh': "🚀 开始并行处理\n👔 主管分析 + 🔍 附加搜索同时进行...\n\n"
-                }
-                agent_thoughts = progress_msg.get(lang, progress_msg['en'])
-                yield accumulated_response, agent_thoughts
-            # 감독자 프롬프트 (언어별)
-            supervisor_prompt_templates = {
-                'ko': f"""
-질문: {query}
-검색결과: {search_context}
-추론패턴: {reasoning_pattern}
-즉시 핵심구조 5개 제시""",
-                'en': f"""
-Question: {query}
-Search results: {search_context}
-Reasoning pattern: {reasoning_pattern}
-Immediately provide 5 key structures""",
-                'ja': f"""
-質問: {query}
-検索結果: {search_context}
-推論パターン: {reasoning_pattern}
-即座に5つの核心構造を提示""",
-                'zh': f"""
-问题: {query}
-搜索结果: {search_context}
-推理模式: {reasoning_pattern}
-立即提供5个核心结构"""
-            }
-            supervisor_prompt = supervisor_prompt_templates.get(lang, supervisor_prompt_templates['en'])
-            supervisor_response = ""
-            supervisor_task = self.llm.chat_stream_async(
-                messages=[
-                    {"role": "system", "content": self.compact_prompts[AgentRole.SUPERVISOR]},
-                    {"role": "user", "content": supervisor_prompt}
-                ],
-                temperature=0.3,
-                max_tokens=500
-            )
-            # 감독자 스트리밍 (버퍼링)
-            async for chunk in self.streaming.buffer_and_yield(supervisor_task):
-                supervisor_response += chunk
-                if show_progress and len(supervisor_response) < 300:
-                    supervisor_label = {
-                        'ko': "👔 감독자 분석",
-                        'en': "👔 Supervisor Analysis",
-                        'ja': "👔 監督者分析",
-                        'zh': "👔 主管分析"
-                    }
-                    agent_thoughts = f"{supervisor_label.get(lang, supervisor_label['en'])}\n{supervisor_response[:300]}...\n\n"
-                    yield accumulated_response, agent_thoughts
-            # === 2단계: 창의성 + 비평 준비 병렬 ===
-            if show_progress:
-                creative_msg = {
-                    'ko': "🎨 창의성 생성자 + 🔍 비평자 준비...\n\n",
-                    'en': "🎨 Creative Generator + 🔍 Critic preparing...\n\n",
-                    'ja': "🎨 創造性生成者 + 🔍 批評者準備中...\n\n",
-                    'zh': "🎨 创意生成器 + 🔍 评论家准备中...\n\n"
-                }
-                agent_thoughts += creative_msg.get(lang, creative_msg['en'])
-                yield accumulated_response, agent_thoughts
-            # 창의성 생성 시작 (언어별)
-            creative_prompt_templates = {
-                'ko': f"""
-질문: {query}
-감독자구조: {supervisor_response}
-검색결과: {search_context}
-창의적+실용적 답변 즉시생성""",
-                'en': f"""
-Question: {query}
-Supervisor structure: {supervisor_response}
-Search results: {search_context}
-Generate creative+practical answer immediately""",
-                'ja': f"""
-質問: {query}
-監督者構造: {supervisor_response}
-検索結果: {search_context}
-創造的+実用的回答即座生成""",
-                'zh': f"""
-问题: {query}
-主管结构: {supervisor_response}
-搜索结果: {search_context}
-立即生成创意+实用答案"""
-            }
-            creative_prompt = creative_prompt_templates.get(lang, creative_prompt_templates['en'])
-            creative_response = ""
-            creative_partial = ""
-            critic_started = False
-            critic_response = ""
-            creative_task = self.llm.chat_stream_async(
-                messages=[
-                    {"role": "system", "content": self.compact_prompts[AgentRole.CREATIVE]},
-                    {"role": "user", "content": creative_prompt}
-                ],
-                temperature=0.8,
-                max_tokens=1500
-            )
-            # 창의성 스트리밍 + 비평자 조기 시작
-            async for chunk in self.streaming.buffer_and_yield(creative_task):
-                creative_response += chunk
-                creative_partial += chunk
-                # 창의성 응답이 500자 넘으면 비평자 시작
-                if len(creative_partial) > 500 and not critic_started:
-                    critic_started = True
-                    # 비평자 비동기 시작 (언어별)
-                    critic_prompt_templates = {
-                        'ko': f"""
-원본질문: {query}
-창의성답변(일부): {creative_partial}
-신속검토→개선점3개""",
-                        'en': f"""
-Original question: {query}
-Creative answer (partial): {creative_partial}
-Quick review→3 improvements""",
-                        'ja': f"""
-元の質問: {query}
-創造的回答（一部）: {creative_partial}
-迅速レビュー→改善点3つ""",
-                        'zh': f"""
-原始问题: {query}
-创意答案（部分）: {creative_partial}
-快速审查→3个改进点"""
-                    }
-                    critic_prompt = critic_prompt_templates.get(lang, critic_prompt_templates['en'])
-                    critic_task = asyncio.create_task(
-                        self._run_critic_async(critic_prompt)
-                    )
-                if show_progress:
-                    display_creative = creative_response[:400] + "..." if len(creative_response) > 400 else creative_response
-                    creative_label = {
-                        'ko': "🎨 창의성 생성자",
-                        'en': "🎨 Creative Generator",
-                        'ja': "🎨 創造性生成者",
-                        'zh': "🎨 创意生成器"
-                    }
-                    agent_thoughts = f"{creative_label.get(lang, creative_label['en'])}\n{display_creative}\n\n"
-                    yield accumulated_response, agent_thoughts
-            # 비평자 결과 대기
-            if critic_started:
-                critic_response = await critic_task
-                if show_progress:
-                    critic_label = {
-                        'ko': "🔍 비평자 검토",
-                        'en': "🔍 Critic Review",
-                        'ja': "🔍 批評者レビュー",
-                        'zh': "🔍 评论家审查"
-                    }
-                    agent_thoughts += f"{critic_label.get(lang, critic_label['en'])}\n{critic_response[:200]}...\n\n"
-                    yield accumulated_response, agent_thoughts
-            # === 3단계: 품질 체크 및 조기 종료 ===
-            quality_score, need_more = self.quality_checker.evaluate_response(
-                creative_response, query, lang
-            )
-            if not need_more and quality_score > 0.85:
-                # 품질이 충분히 높으면 바로 반환
-                accumulated_response = self.response_cleaner.clean_response(creative_response)
-                if show_progress:
-                    quality_msg = {
-                        'ko': f"✅ 품질 충족 (점수: {quality_score:.2f})\n조기 완료!\n",
-                        'en': f"✅ Quality met (score: {quality_score:.2f})\nEarly completion!\n",
-                        'ja': f"✅ 品質満足 (スコア: {quality_score:.2f})\n早期完了!\n",
-                        'zh': f"✅ 质量满足 (分数: {quality_score:.2f})\n提前完成!\n"
-                    }
-                    agent_thoughts += quality_msg.get(lang, quality_msg['en'])
-                yield accumulated_response, agent_thoughts
-                return
-            # === 4단계: 최종 통합 (스트리밍) ===
-            if show_progress:
-                final_msg = {
-                    'ko': "✅ 최종 통합 중...\n\n",
-                    'en': "✅ Final integration in progress...\n\n",
-                    'ja': "✅ 最終統合中...\n\n",
-                    'zh': "✅ 最终整合中...\n\n"
-                }
-                agent_thoughts += final_msg.get(lang, final_msg['en'])
-                yield accumulated_response, agent_thoughts
-            # 최종 프롬프트 (언어별)
-            final_prompt_templates = {
-                'ko': f"""
-질문: {query}
-창의성답변: {creative_response}
-비평피드백: {critic_response}
-감독자구조: {supervisor_response}
-최종통합→완벽답변. 마크다운 헤더(#, ##, ###) 사용 금지.""",
-                'en': f"""
-Question: {query}
-Creative answer: {creative_response}
-Critic feedback: {critic_response}
-Supervisor structure: {supervisor_response}
-Final integration→perfect answer. No markdown headers (#, ##, ###).""",
-                'ja': f"""
-質問: {query}
-創造的回答: {creative_response}
-批評フィードバック: {critic_response}
-監督者構造: {supervisor_response}
-最終統合→完璧な回答。マークダウンヘッダー（#、##、###）使用禁止。""",
-                'zh': f"""
-问题: {query}
-创意答案: {creative_response}
-评论反馈: {critic_response}
-主管结构: {supervisor_response}
-最终整合→完美答案。禁止使用Markdown标题（#、##、###）。"""
-            }
-            final_prompt = final_prompt_templates.get(lang, final_prompt_templates['en'])
-            final_task = self.llm.chat_stream_async(
-                messages=[
-                    {"role": "system", "content": self.compact_prompts[AgentRole.FINALIZER]},
-                    {"role": "user", "content": final_prompt}
-                ],
-                temperature=0.5,
-                max_tokens=2500
-            )
-            # 최종 답변 스트리밍
-            accumulated_response = ""
-            async for chunk in final_task:
-                accumulated_response += chunk
-                # 실시간 정리
-                cleaned_response = self.response_cleaner.clean_response(accumulated_response)
-                yield cleaned_response, agent_thoughts
-            # 최종 정리
-            accumulated_response = self.response_cleaner.clean_response(accumulated_response)
-            # 처리 시간 추가 (언어별)
-            processing_time = time.time() - start_time
-            time_msg = {
-                'ko': f"\n\n---\n⚡ 처리 시간: {processing_time:.1f}초",
-                'en': f"\n\n---\n⚡ Processing time: {processing_time:.1f} seconds",
-                'ja': f"\n\n---\n⚡ 処理時間: {processing_time:.1f}秒",
-                'zh': f"\n\n---\n⚡ 处理时间: {processing_time:.1f}秒"
-            }
-            accumulated_response += time_msg.get(lang, time_msg['en'])
-            yield accumulated_response, agent_thoughts
-        except Exception as e:
-            error_msg = {
-                'ko': f"❌ 오류 발생: {str(e)}",
-                'en': f"❌ Error occurred: {str(e)}",
-                'ja': f"❌ エラー発生: {str(e)}",
-                'zh': f"❌ 发生错误: {str(e)}"
-            }
-            yield error_msg.get(lang, error_msg['en']), agent_thoughts
-    async def _run_critic_async(self, prompt: str) -> str:
-        """비평자 비동기 실행 with error handling"""
-        try:
-            response = ""
-            async for chunk in self.llm.chat_stream_async(
-                messages=[
-                    {"role": "system", "content": self.compact_prompts[AgentRole.CRITIC]},
-                    {"role": "user", "content": prompt}
-                ],
-                temperature=0.2,
-                max_tokens=500
-            ):
-                response += chunk
-            return response
-        except Exception as e:
-            # 언어 감지
-            lang = 'ko' if '질문' in prompt else 'en'
-            error_msg = {
-                'ko': "비평 처리 중 오류",
-                'en': "Error during critic processing",
-                'ja': "批評処理中のエラー",
-                'zh': "评论处理中出错"
-            }
-            return error_msg.get(lang, error_msg['en'])
-    def _format_search_results(self, results: List[Dict]) -> str:
-        """검색 결과 압축 포맷"""
-        if not results:
-            return "No search results"
-        formatted = []
-        for i, r in enumerate(results[:3], 1):
-            title = r.get('title', '')[:50]
-            desc = r.get('description', '')[:100]
-            formatted.append(f"[{i}]{title}:{desc}")
-        return " | ".join(formatted)
-# ============================================================================
-# Gradio UI (최적화 버전 - 캐싱 제거)
-# ============================================================================
-def create_optimized_gradio_interface():
-    """최적화된 Gradio 인터페이스 (캐싱 없음)"""
-    # 시스템 초기화
-    system = SpeedOptimizedMultiAgentSystem()
-    def process_query_optimized(
-        message: str,
-        history: List[Dict],
-        use_search: bool,
-        show_agent_thoughts: bool,
-        search_count: int,
-        language_mode: str
-    ):
-        """최적화된 쿼리 처리 - 실시간 스트리밍 버전"""
-        if not message:
-            yield history, "", ""
-            return
-        # 언어 설정
-        if language_mode == "Auto":
-            lang = None  # 자동 감지
-        else:
-            lang_map = {"Korean": "ko", "English": "en", "Japanese": "ja", "Chinese": "zh"}
-            lang = lang_map.get(language_mode, None)
-        # 비동기 함수를 동기적으로 실행
-        try:
-            import nest_asyncio
-            nest_asyncio.apply()
-        except ImportError:
-            pass
-        try:
-            # 검색 수행 (동기화)
-            search_results = []
-            search_display = ""
-            # 언어 자동 감지 (필요한 경우)
-            detected_lang = lang or system.language_detector.detect_language(message)
-            if use_search:
-                # 검색 상태 표시
-                processing_msg = {
-                    'ko': "⚡ 고속 처리 중...",
-                    'en': "⚡ High-speed processing...",
-                    'ja': "⚡ 高速処理中...",
-                    'zh': "⚡ 高速处理中..."
-                }
-                history_with_message = history + [
-                    {"role": "user", "content": message},
-                    {"role": "assistant", "content": processing_msg.get(detected_lang, processing_msg['en'])}
-                ]
-                yield history_with_message, "", ""
-                # 비동기 검색을 동기적으로 실행
-                async def search_wrapper():
-                    return await system.search.search_async(message, count=search_count, lang=detected_lang)
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-                search_results = loop.run_until_complete(search_wrapper())
-                if search_results:
-                    ref_label = {
-                        'ko': "📚 참고 자료",
-                        'en': "📚 References",
-                        'ja': "📚 参考資料",
-                        'zh': "📚 参考资料"
-                    }
-                    search_display = f"{ref_label.get(detected_lang, ref_label['en'])}\n\n"
-                    for i, result in enumerate(search_results[:3], 1):
-                        search_display += f"**{i}. [{result['title'][:50]}]({result['url']})**\n"
-                        search_display += f"   {result['description'][:100]}...\n\n"
-            # 사용자 메시지 추가
-            current_history = history + [{"role": "user", "content": message}]
-            # 실시간 스트리밍을 위한 비동기 처리
-            async def stream_responses():
-                """실시간 스트리밍 제너레이터"""
-                async for response, thoughts in system.parallel_process_agents(
-                    query=message,
-                    search_results=search_results,
-                    show_progress=show_agent_thoughts,
-                    lang=detected_lang
-                ):
-                    yield response, thoughts
-            # 새 이벤트 루프에서 실시간 스트리밍
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            # 비동기 제너레이터를 동기적으로 순회
-            gen = stream_responses()
-            while True:
-                try:
-                    # 다음 항목 가져오기
-                    task = asyncio.ensure_future(gen.__anext__(), loop=loop)
-                    response, thoughts = loop.run_until_complete(task)
-                    # 실시간 업데이트
-                    updated_history = current_history + [
-                        {"role": "assistant", "content": response}
-                    ]
-                    yield updated_history, thoughts, search_display
-                except StopAsyncIteration:
-                    break
-        except Exception as e:
-            error_history = history + [
-                {"role": "user", "content": message},
-                {"role": "assistant", "content": f"❌ Error: {str(e)}"}
-            ]
-            yield error_history, "", ""
-        finally:
-            # 루프 정리
-            try:
-                loop.close()
-            except:
-                pass
-    # Gradio 인터페이스
-    with gr.Blocks(
-        title="⚡ Speed-Optimized Multi-Agent System (No Cache)",
-        theme=gr.themes.Soft(),
-        css="""
-        .gradio-container {
-            max-width: 1400px !important;
-            margin: auto !important;
-        }
-        """
-    ) as demo:
-        gr.Markdown("""
-        # ⚡ Enhanced Multi-Agent RAG System (캐싱 제거 버전)
-        **Complex questions processed within 5-8 seconds | Multi-language support**
-        **Optimization Features:**
-        - 🚀 Parallel Processing: Concurrent agent execution
-        - ⚡ Stream Buffering: Network optimization
-        - 🎯 Early Termination: Complete immediately when quality is met
-        - 🌍 Multi-language: Auto-detect Korean/English/Japanese/Chinese
-        - ❌ **Caching Disabled**: 캐싱 기능 제거됨
-        """)
-        with gr.Row():
-            with gr.Column(scale=3):
-                chatbot = gr.Chatbot(
-                    height=500,
-                    label="💬 Chat",
-                    type="messages"
-                )
-                msg = gr.Textbox(
-                    label="Enter complex question",
-                    placeholder="Enter complex questions requiring analysis, strategy, or creative solutions...",
-                    lines=3
-                )
-                with gr.Row():
-                    submit = gr.Button("⚡ High-Speed Process", variant="primary")
-                    clear = gr.Button("🔄 Reset")
-                with gr.Accordion("🤖 Agent Processing", open=False):
-                    agent_thoughts = gr.Markdown()
-                with gr.Accordion("📚 Search Sources", open=False):
-                    search_sources = gr.Markdown()
-            with gr.Column(scale=1):
-                gr.Markdown("**⚙️ Settings**")
-                language_mode = gr.Radio(
-                    choices=["Auto", "Korean", "English", "Japanese", "Chinese"],
-                    value="Auto",
-                    label="🌍 Language Mode"
-                )
-                use_search = gr.Checkbox(
-                    label="🔍 Use Web Search",
-                    value=True
-                )
-                show_agent_thoughts = gr.Checkbox(
-                    label="🧠 Show Processing",
-                    value=True
-                )
-                search_count = gr.Slider(
-                    minimum=3,
-                    maximum=10,
-                    value=5,
-                    step=1,
-                    label="Search Results Count"
-                )
-                gr.Markdown("""
-                **⚡ Optimization Status**
-                **Active Optimizations:**
-                - ✅ Parallel Processing
-                - ❌ ~~Smart Caching~~ (제거됨)
-                - ✅ Buffer Streaming
-                - ✅ Early Termination
-                - ✅ Compressed Prompts
-                - ✅ Multi-language Support
-                - ✅ Error Recovery
-                **Expected Processing Time:**
-                - Simple Query: 3-5 seconds
-                - Complex Query: 5-8 seconds
-                - Very Complex: 8-12 seconds
-                """)
-        # 복잡한 질문 예제 (다국어)
-        gr.Examples(
-            examples=[
-                # Korean
-                "AI 기술이 향후 10년간 한국 경제에 미칠 영향을 다각도로 분석하고 대응 전략을 제시해줘",
-                "스타트업이 대기업과 경쟁하기 위한 혁신적인 전략을 단계별로 수립해줘",
-                # English
-                "Analyze the multifaceted impact of quantum computing on current encryption systems and propose alternatives",
-                "Design 5 innovative business models for climate change mitigation with practical implementation details",
-                # Japanese
-                "メタバース時代の教育革新方案を実装可能なレベルで提案してください",
-                # Chinese
-                "分析人工智能对未来十年全球经济的影响并提出应对策略"
-            ],
-            inputs=msg
-        )
-        # 이벤트 바인딩
-        submit.click(
-            process_query_optimized,
-            inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count, language_mode],
-            outputs=[chatbot, agent_thoughts, search_sources]
-        ).then(
-            lambda: "",
-            None,
-            msg
-        )
-        msg.submit(
-            process_query_optimized,
-            inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count, language_mode],
-            outputs=[chatbot, agent_thoughts, search_sources]
-        ).then(
-            lambda: "",
-            None,
-            msg
-        )
-        clear.click(
-            lambda: ([], "", ""),
-            None,
-            [chatbot, agent_thoughts, search_sources]
-        )
-    return demo
-# ============================================================================
-# 메인 실행
-# ============================================================================
-if __name__ == "__main__":
-    print("""
-╔══════════════════════════════════════════════════════════════╗
-║     ⚡ Speed-Optimized Multi-Agent System (No Cache) ⚡     ║
-║                                                              ║
-║   High-speed AI system processing complex questions         ║
-║                                                              ║
-║  Features:                                                  ║
-║  • Multi-language support (KO/EN/JA/ZH)                    ║
-║  • Improved error recovery                                  ║
-║  • NO CACHING (캐싱 기능 제거됨)                           ║
-║  • Adaptive stream buffering                                ║
-║  • Response cleaning & formatting                           ║
-╚══════════════════════════════════════════════════════════════╝
-    """)
-    # API 키 확인
-    if not os.getenv("FIREWORKS_API_KEY"):
-        print("\n⚠️  FIREWORKS_API_KEY is not set.")
-    if not os.getenv("BRAVE_SEARCH_API_KEY"):
-        print("\n⚠️  BRAVE_SEARCH_API_KEY is not set.")
-    # Gradio 앱 실행
-    demo = create_optimized_gradio_interface()
-    is_hf_spaces = os.getenv("SPACE_ID") is not None
-    if is_hf_spaces:
-        print("\n🤗 Running in optimized mode on Hugging Face Spaces (No Cache)...")
-        demo.launch(server_name="0.0.0.0", server_port=7860)
-    else:
-        print("\n💻 Running in optimized mode on local environment (No Cache)...")
-        demo.launch(server_name="0.0.0.0", server_port=7860, share=False), '', response, flags=re.MULTILINE)
-        # 마크다운 헤더 제거
-        response = re.sub(r'^#{1,6}\s+', '', response, flags=re.MULTILINE)
-        # 볼드 텍스트 제거 (**text** 또는 __text__)
-        response = re.sub(r'\*\*(.*?)\*\*', r'\1', response)
-        response = re.sub(r'__(.*?)__', r'\1', response)
-        # 이탤릭 텍스트 제거 (*text* 또는 _text_)
-        response = re.sub(r'\*(.*?)\*', r'\1', response)
-        response = re.sub(r'_(.*?)_', r'\1', response)
-        # 코드 블록 제거 (```code```)
-        response = re.sub(r'```[\s\S]*?```', '', response)
-        # 인라인 코드 제거 (`code`)
-        response = re.sub(r'`([^`]*)`', r'\1', response)
-        # 불필요한 구분선 제거 (---, ***, ___)
-        response = re.sub(r'^[\*\-_]{3,}
-# ============================================================================
-# 통합 최적화 멀티 에이전트 시스템 (캐싱 제거 버전)
-# ============================================================================
-class SpeedOptimizedMultiAgentSystem:
-    """속도 최적화된 멀티 에이전트 시스템 (캐싱 없음)"""
-    def __init__(self):
-        self.llm = OptimizedFireworksClient()
-        self.search = AsyncBraveSearch()
-        self.reasoning = LightweightReasoningChain()
-        self.quality_checker = QualityChecker()
-        self.streaming = OptimizedStreaming()
-        self.language_detector = LanguageDetector()
-        self.response_cleaner = ResponseCleaner()
-        # 병렬 처리 풀
-        self.executor = ThreadPoolExecutor(max_workers=4)
-    def _init_compact_prompts(self, lang: str = 'ko') -> Dict:
-        """압축된 고효율 프롬프트 (언어별)"""
-        prompts = {
-            'ko': {
-                AgentRole.SUPERVISOR: """[감독자-구조설계]
-즉시분석: 핵심의도+필요정보+답변구조
-출력: 5개 핵심포인트(각 1문장)
-추론체계 명시""",
-                AgentRole.CREATIVE: """[창의성생성자]
-입력구조 따라 창의적 확장
-실용예시+혁신접근+구체조언
-불필요설명 제거""",
-                AgentRole.CRITIC: """[비평자-검증]
-신속검토: 정확성/논리성/실용성
-개선포인트 3개만
-각 2문장 이내""",
-                AgentRole.FINALIZER: """[최종통합]
-모든의견 종합→최적답변
-명확구조+실용정보+창의균형
-바로 핵심 내용부터 시작. 불필요한 헤더나 마크업 없이. 마크다운 헤더(#, ##, ###) 사용 금지."""
-            },
-            'en': {
-                AgentRole.SUPERVISOR: """[Supervisor-Structure]
-Immediate analysis: core intent+required info+answer structure
-Output: 5 key points (1 sentence each)
-Clear reasoning framework""",
-                AgentRole.CREATIVE: """[Creative Generator]
-Follow structure, expand creatively
-Practical examples+innovative approach+specific advice
-Remove unnecessary explanations""",
-                AgentRole.CRITIC: """[Critic-Verification]
-Quick review: accuracy/logic/practicality
-Only 3 improvement points
-Max 2 sentences each""",
-                AgentRole.FINALIZER: """[Final Integration]
-Synthesize all inputs→optimal answer
-Clear structure+practical info+creative balance
-Start with core content directly. No unnecessary headers or markup. No markdown headers (#, ##, ###)."""
-            },
-            'ja': {
-                AgentRole.SUPERVISOR: """[監督者-構造設計]
-即時分析：核心意図+必要情報+回答構造
-出力：5つの核心ポイント（各1文）
-推論体系明示""",
-                AgentRole.CREATIVE: """[創造性生成者]
-入力構造に従って創造的拡張
-実用例+革新的アプローチ+具体的アドバイス
-不要な説明削除""",
-                AgentRole.CRITIC: """[批評者-検証]
-迅速レビュー：正確性/論理性/実用性
-改善ポイント3つのみ
-各2文以内""",
-                AgentRole.FINALIZER: """[最終統合]
-全意見統合→最適回答
-明確構造+実用情報+創造性バランス
-核心内容から直接開始。不要なヘッダーやマークアップなし。マークダウンヘッダー（#、##、###）使用禁止。"""
-            },
-            'zh': {
-                AgentRole.SUPERVISOR: """[主管-结构设计]
-立即分析：核心意图+所需信息+答案结构
-输出：5个核心要点（每个1句）
-推理体系明确""",
-                AgentRole.CREATIVE: """[创意生成器]
-按结构创造性扩展
-实用示例+创新方法+具体建议
-删除不必要的解释""",
-                AgentRole.CRITIC: """[评论家-验证]
-快速审查：准确性/逻辑性/实用性
-仅3个改进点
-每个最多2句""",
-                AgentRole.FINALIZER: """[最终整合]
-综合所有意见→最佳答案
-清晰结构+实用信息+创意平衡
-直接从核心内容开始。无需不必要的标题或标记。禁止使用Markdown标题（#、##、###）。"""
-            }
-        }
-        return prompts.get(lang, prompts['en'])
-    async def parallel_process_agents(
-        self,
-        query: str,
-        search_results: List[Dict],
-        show_progress: bool = True,
-        lang: str = None
-    ) -> AsyncGenerator[Tuple[str, str], None]:
-        """병렬 처리 파이프라인 (캐싱 없음)"""
-        start_time = time.time()
-        # 언어 자동 감지
-        if lang is None:
-            lang = self.language_detector.detect_language(query)
-        # 언어별 프롬프트 설정
-        self.compact_prompts = self._init_compact_prompts(lang)
-        search_context = self._format_search_results(search_results)
-        accumulated_response = ""
-        agent_thoughts = ""
-        # 추론 패턴 결정
-        reasoning_pattern = self.reasoning.get_reasoning_pattern(query, lang)
-        try:
-            # === 1단계: 감독자 + 검색 병렬 실행 ===
-            if show_progress:
-                progress_msg = {
-                    'ko': "🚀 병렬 처리 시작\n👔 감독자 분석 + 🔍 추가 검색 동시 진행...\n\n",
-                    'en': "🚀 Starting parallel processing\n👔 Supervisor analysis + 🔍 Additional search in progress...\n\n",
-                    'ja': "🚀 並列処理開始\n👔 監督者分析 + 🔍 追加検索同時進行中...\n\n",
-                    'zh': "🚀 开始并行处理\n👔 主管分析 + 🔍 附加搜索同时进行...\n\n"
-                }
-                agent_thoughts = progress_msg.get(lang, progress_msg['en'])
-                yield accumulated_response, agent_thoughts
-            # 감독자 프롬프트 (언어별)
-            supervisor_prompt_templates = {
-                'ko': f"""
-질문: {query}
-검색결과: {search_context}
-추론패턴: {reasoning_pattern}
-즉시 핵심구조 5개 제시""",
-                'en': f"""
-Question: {query}
-Search results: {search_context}
-Reasoning pattern: {reasoning_pattern}
-Immediately provide 5 key structures""",
-                'ja': f"""
-質問: {query}
-検索結果: {search_context}
-推論パターン: {reasoning_pattern}
-即座に5つの核心構造を提示""",
-                'zh': f"""
-问题: {query}
-搜索结果: {search_context}
-推理模式: {reasoning_pattern}
-立即提供5个核心结构"""
-            }
-            supervisor_prompt = supervisor_prompt_templates.get(lang, supervisor_prompt_templates['en'])
-            supervisor_response = ""
-            supervisor_task = self.llm.chat_stream_async(
-                messages=[
-                    {"role": "system", "content": self.compact_prompts[AgentRole.SUPERVISOR]},
-                    {"role": "user", "content": supervisor_prompt}
-                ],
-                temperature=0.3,
-                max_tokens=500
-            )
-            # 감독자 스트리밍 (버퍼링)
-            async for chunk in self.streaming.buffer_and_yield(supervisor_task):
-                supervisor_response += chunk
-                if show_progress and len(supervisor_response) < 300:
-                    supervisor_label = {
-                        'ko': "👔 감독자 분석",
-                        'en': "👔 Supervisor Analysis",
-                        'ja': "👔 監督者分析",
-                        'zh': "👔 主管分析"
-                    }
-                    agent_thoughts = f"{supervisor_label.get(lang, supervisor_label['en'])}\n{supervisor_response[:300]}...\n\n"
-                    yield accumulated_response, agent_thoughts
-            # === 2단계: 창의성 + 비평 준비 병렬 ===
-            if show_progress:
-                creative_msg = {
-                    'ko': "🎨 창의성 생성자 + 🔍 비평자 준비...\n\n",
-                    'en': "🎨 Creative Generator + 🔍 Critic preparing...\n\n",
-                    'ja': "🎨 創造性生成者 + 🔍 批評者準備中...\n\n",
-                    'zh': "🎨 创意生成器 + 🔍 评论家准备中...\n\n"
-                }
-                agent_thoughts += creative_msg.get(lang, creative_msg['en'])
-                yield accumulated_response, agent_thoughts
-            # 창의성 생성 시작 (언어별)
-            creative_prompt_templates = {
-                'ko': f"""
-질문: {query}
-감독자구조: {supervisor_response}
-검색결과: {search_context}
-창의적+실용적 답변 즉시생성""",
-                'en': f"""
-Question: {query}
-Supervisor structure: {supervisor_response}
-Search results: {search_context}
-Generate creative+practical answer immediately""",
-                'ja': f"""
-質問: {query}
-監督者構造: {supervisor_response}
-検索結果: {search_context}
-創造的+実用的回答即座生成""",
-                'zh': f"""
-问题: {query}
-主管结构: {supervisor_response}
-搜索结果: {search_context}
-立即生成创意+实用答案"""
-            }
-            creative_prompt = creative_prompt_templates.get(lang, creative_prompt_templates['en'])
-            creative_response = ""
-            creative_partial = ""
-            critic_started = False
-            critic_response = ""
-            creative_task = self.llm.chat_stream_async(
-                messages=[
-                    {"role": "system", "content": self.compact_prompts[AgentRole.CREATIVE]},
-                    {"role": "user", "content": creative_prompt}
-                ],
-                temperature=0.8,
-                max_tokens=1500
-            )
-            # 창의성 스트리밍 + 비평자 조기 시작
-            async for chunk in self.streaming.buffer_and_yield(creative_task):
-                creative_response += chunk
-                creative_partial += chunk
-                # 창의성 응답이 500자 넘으면 비평자 시작
-                if len(creative_partial) > 500 and not critic_started:
-                    critic_started = True
-                    # 비평자 비동기 시작 (언어별)
-                    critic_prompt_templates = {
-                        'ko': f"""
-원본질문: {query}
-창의성답변(일부): {creative_partial}
-신속검토→개선점3개""",
-                        'en': f"""
-Original question: {query}
-Creative answer (partial): {creative_partial}
-Quick review→3 improvements""",
-                        'ja': f"""
-元の質問: {query}
-創造的回答（一部）: {creative_partial}
-迅速レビュー→改善点3つ""",
-                        'zh': f"""
-原始问题: {query}
-创意答案（部分）: {creative_partial}
-快速审查→3个改进点"""
-                    }
-                    critic_prompt = critic_prompt_templates.get(lang, critic_prompt_templates['en'])
-                    critic_task = asyncio.create_task(
-                        self._run_critic_async(critic_prompt)
-                    )
-                if show_progress:
-                    display_creative = creative_response[:400] + "..." if len(creative_response) > 400 else creative_response
-                    creative_label = {
-                        'ko': "🎨 창의성 생성자",
-                        'en': "🎨 Creative Generator",
-                        'ja': "🎨 創造性生成者",
-                        'zh': "🎨 创意生成器"
-                    }
-                    agent_thoughts = f"{creative_label.get(lang, creative_label['en'])}\n{display_creative}\n\n"
-                    yield accumulated_response, agent_thoughts
-            # 비평자 결과 대기
-            if critic_started:
-                critic_response = await critic_task
-                if show_progress:
-                    critic_label = {
-                        'ko': "🔍 비평자 검토",
-                        'en': "🔍 Critic Review",
-                        'ja': "🔍 批評者レビュー",
-                        'zh': "🔍 评论家审查"
-                    }
-                    agent_thoughts += f"{critic_label.get(lang, critic_label['en'])}\n{critic_response[:200]}...\n\n"
-                    yield accumulated_response, agent_thoughts
-            # === 3단계: 품질 체크 및 조기 종료 ===
-            quality_score, need_more = self.quality_checker.evaluate_response(
-                creative_response, query, lang
-            )
-            if not need_more and quality_score > 0.85:
-                # 품질이 충분히 높으면 바로 반환
-                accumulated_response = self.response_cleaner.clean_response(creative_response)
-                if show_progress:
-                    quality_msg = {
-                        'ko': f"✅ 품질 충족 (점수: {quality_score:.2f})\n조기 완료!\n",
-                        'en': f"✅ Quality met (score: {quality_score:.2f})\nEarly completion!\n",
-                        'ja': f"✅ 品質満足 (スコア: {quality_score:.2f})\n早期完了!\n",
-                        'zh': f"✅ 质量满足 (分数: {quality_score:.2f})\n提前完成!\n"
-                    }
-                    agent_thoughts += quality_msg.get(lang, quality_msg['en'])
-                yield accumulated_response, agent_thoughts
-                return
-            # === 4단계: 최종 통합 (스트리밍) ===
-            if show_progress:
-                final_msg = {
-                    'ko': "✅ 최종 통합 중...\n\n",
-                    'en': "✅ Final integration in progress...\n\n",
-                    'ja': "✅ 最終統合中...\n\n",
-                    'zh': "✅ 最终整合中...\n\n"
-                }
-                agent_thoughts += final_msg.get(lang, final_msg['en'])
-                yield accumulated_response, agent_thoughts
-            # 최종 프롬프트 (언어별)
-            final_prompt_templates = {
-                'ko': f"""
-질문: {query}
-창의성답변: {creative_response}
-비평피드백: {critic_response}
-감독자구조: {supervisor_response}
-최종통합→완벽답변. 마크다운 헤더(#, ##, ###) 사용 금지.""",
-                'en': f"""
-Question: {query}
-Creative answer: {creative_response}
-Critic feedback: {critic_response}
-Supervisor structure: {supervisor_response}
-Final integration→perfect answer. No markdown headers (#, ##, ###).""",
-                'ja': f"""
-質問: {query}
-創造的回答: {creative_response}
-批評フィードバック: {critic_response}
-監督者構造: {supervisor_response}
-最終統合→完璧な回答。マークダウンヘッダー（#、##、###）使用禁止。""",
-                'zh': f"""
-问题: {query}
-创意答案: {creative_response}
-评论反馈: {critic_response}
-主管结构: {supervisor_response}
-最终整合→完美答案。禁止使用Markdown标题（#、##、###）。"""
-            }
-            final_prompt = final_prompt_templates.get(lang, final_prompt_templates['en'])
-            final_task = self.llm.chat_stream_async(
-                messages=[
-                    {"role": "system", "content": self.compact_prompts[AgentRole.FINALIZER]},
-                    {"role": "user", "content": final_prompt}
-                ],
-                temperature=0.5,
-                max_tokens=2500
-            )
-            # 최종 답변 스트리밍
-            accumulated_response = ""
-            async for chunk in final_task:
-                accumulated_response += chunk
-                # 실시간 정리
-                cleaned_response = self.response_cleaner.clean_response(accumulated_response)
-                yield cleaned_response, agent_thoughts
-            # 최종 정리
-            accumulated_response = self.response_cleaner.clean_response(accumulated_response)
-            # 처리 시간 추가 (언어별)
-            processing_time = time.time() - start_time
-            time_msg = {
-                'ko': f"\n\n---\n⚡ 처리 시간: {processing_time:.1f}초",
-                'en': f"\n\n---\n⚡ Processing time: {processing_time:.1f} seconds",
-                'ja': f"\n\n---\n⚡ 処理時間: {processing_time:.1f}秒",
-                'zh': f"\n\n---\n⚡ 处理时间: {processing_time:.1f}秒"
-            }
-            accumulated_response += time_msg.get(lang, time_msg['en'])
-            yield accumulated_response, agent_thoughts
-        except Exception as e:
-            error_msg = {
-                'ko': f"❌ 오류 발생: {str(e)}",
-                'en': f"❌ Error occurred: {str(e)}",
-                'ja': f"❌ エラー発生: {str(e)}",
-                'zh': f"❌ 发生错误: {str(e)}"
-            }
-            yield error_msg.get(lang, error_msg['en']), agent_thoughts
-    async def _run_critic_async(self, prompt: str) -> str:
-        """비평자 비동기 실행 with error handling"""
-        try:
-            response = ""
-            async for chunk in self.llm.chat_stream_async(
-                messages=[
-                    {"role": "system", "content": self.compact_prompts[AgentRole.CRITIC]},
-                    {"role": "user", "content": prompt}
-                ],
-                temperature=0.2,
-                max_tokens=500
-            ):
-                response += chunk
-            return response
-        except Exception as e:
-            # 언어 감지
-            lang = 'ko' if '질문' in prompt else 'en'
-            error_msg = {
-                'ko': "비평 처리 중 오류",
-                'en': "Error during critic processing",
-                'ja': "批評処理中のエラー",
-                'zh': "评论处理中出错"
-            }
-            return error_msg.get(lang, error_msg['en'])
-    def _format_search_results(self, results: List[Dict]) -> str:
-        """검색 결과 압축 포맷"""
-        if not results:
-            return "No search results"
-        formatted = []
-        for i, r in enumerate(results[:3], 1):
-            title = r.get('title', '')[:50]
-            desc = r.get('description', '')[:100]
-            formatted.append(f"[{i}]{title}:{desc}")
-        return " | ".join(formatted)
-# ============================================================================
-# Gradio UI (최적화 버전 - 캐싱 제거)
-# ============================================================================
-def create_optimized_gradio_interface():
-    """최적화된 Gradio 인터페이스 (캐싱 없음)"""
-    # 시스템 초기화
-    system = SpeedOptimizedMultiAgentSystem()
-    def process_query_optimized(
-        message: str,
-        history: List[Dict],
-        use_search: bool,
-        show_agent_thoughts: bool,
-        search_count: int,
-        language_mode: str
-    ):
-        """최적화된 쿼리 처리 - 실시간 스트리밍 버전"""
-        if not message:
-            yield history, "", ""
-            return
-        # 언어 설정
-        if language_mode == "Auto":
-            lang = None  # 자동 감지
-        else:
-            lang_map = {"Korean": "ko", "English": "en", "Japanese": "ja", "Chinese": "zh"}
-            lang = lang_map.get(language_mode, None)
-        # 비동기 함수를 동기적으로 실행
-        try:
-            import nest_asyncio
-            nest_asyncio.apply()
-        except ImportError:
-            pass
-        try:
-            # 검색 수행 (동기화)
-            search_results = []
-            search_display = ""
-            # 언어 자동 감지 (필요한 경우)
-            detected_lang = lang or system.language_detector.detect_language(message)
-            if use_search:
-                # 검색 상태 표시
-                processing_msg = {
-                    'ko': "⚡ 고속 처리 중...",
-                    'en': "⚡ High-speed processing...",
-                    'ja': "⚡ 高速処理中...",
-                    'zh': "⚡ 高速处理中..."
-                }
-                history_with_message = history + [
-                    {"role": "user", "content": message},
-                    {"role": "assistant", "content": processing_msg.get(detected_lang, processing_msg['en'])}
-                ]
-                yield history_with_message, "", ""
-                # 비동기 검색을 동기적으로 실행
-                async def search_wrapper():
-                    return await system.search.search_async(message, count=search_count, lang=detected_lang)
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-                search_results = loop.run_until_complete(search_wrapper())
-                if search_results:
-                    ref_label = {
-                        'ko': "📚 참고 자료",
-                        'en': "📚 References",
-                        'ja': "📚 参考資料",
-                        'zh': "📚 参考资料"
-                    }
-                    search_display = f"{ref_label.get(detected_lang, ref_label['en'])}\n\n"
-                    for i, result in enumerate(search_results[:3], 1):
-                        search_display += f"**{i}. [{result['title'][:50]}]({result['url']})**\n"
-                        search_display += f"   {result['description'][:100]}...\n\n"
-            # 사용자 메시지 추가
-            current_history = history + [{"role": "user", "content": message}]
-            # 실시간 스트리밍을 위한 비동기 처리
-            async def stream_responses():
-                """실시간 스트리밍 제너레이터"""
-                async for response, thoughts in system.parallel_process_agents(
-                    query=message,
-                    search_results=search_results,
-                    show_progress=show_agent_thoughts,
-                    lang=detected_lang
-                ):
-                    yield response, thoughts
-            # 새 이벤트 루프에서 실시간 스트리밍
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            # 비동기 제너레이터를 동기적으로 순회
-            gen = stream_responses()
-            while True:
-                try:
-                    # 다음 항목 가져오기
-                    task = asyncio.ensure_future(gen.__anext__(), loop=loop)
-                    response, thoughts = loop.run_until_complete(task)
-                    # 실시간 업데이트
-                    updated_history = current_history + [
-                        {"role": "assistant", "content": response}
-                    ]
-                    yield updated_history, thoughts, search_display
-                except StopAsyncIteration:
-                    break
-        except Exception as e:
-            error_history = history + [
-                {"role": "user", "content": message},
-                {"role": "assistant", "content": f"❌ Error: {str(e)}"}
-            ]
-            yield error_history, "", ""
-        finally:
-            # 루프 정리
-            try:
-                loop.close()
-            except:
-                pass
-    # Gradio 인터페이스
-    with gr.Blocks(
-        title="⚡ Speed-Optimized Multi-Agent System (No Cache)",
-        theme=gr.themes.Soft(),
-        css="""
-        .gradio-container {
-            max-width: 1400px !important;
-            margin: auto !important;
-        }
-        """
-    ) as demo:
-        gr.Markdown("""
-        # ⚡ Enhanced Multi-Agent RAG System (캐싱 제거 버전)
-        **Complex questions processed within 5-8 seconds | Multi-language support**
-        **Optimization Features:**
-        - 🚀 Parallel Processing: Concurrent agent execution
-        - ⚡ Stream Buffering: Network optimization
-        - 🎯 Early Termination: Complete immediately when quality is met
-        - 🌍 Multi-language: Auto-detect Korean/English/Japanese/Chinese
-        - ❌ **Caching Disabled**: 캐싱 기능 제거됨
-        """)
-        with gr.Row():
-            with gr.Column(scale=3):
-                chatbot = gr.Chatbot(
-                    height=500,
-                    label="💬 Chat",
-                    type="messages"
-                )
-                msg = gr.Textbox(
-                    label="Enter complex question",
-                    placeholder="Enter complex questions requiring analysis, strategy, or creative solutions...",
-                    lines=3
-                )
-                with gr.Row():
-                    submit = gr.Button("⚡ High-Speed Process", variant="primary")
-                    clear = gr.Button("🔄 Reset")
-                with gr.Accordion("🤖 Agent Processing", open=False):
-                    agent_thoughts = gr.Markdown()
-                with gr.Accordion("📚 Search Sources", open=False):
-                    search_sources = gr.Markdown()
-            with gr.Column(scale=1):
-                gr.Markdown("**⚙️ Settings**")
-                language_mode = gr.Radio(
-                    choices=["Auto", "Korean", "English", "Japanese", "Chinese"],
-                    value="Auto",
-                    label="🌍 Language Mode"
-                )
-                use_search = gr.Checkbox(
-                    label="🔍 Use Web Search",
-                    value=True
-                )
-                show_agent_thoughts = gr.Checkbox(
-                    label="🧠 Show Processing",
-                    value=True
-                )
-                search_count = gr.Slider(
-                    minimum=3,
-                    maximum=10,
-                    value=5,
-                    step=1,
-                    label="Search Results Count"
-                )
-                gr.Markdown("""
-                **⚡ Optimization Status**
-                **Active Optimizations:**
-                - ✅ Parallel Processing
-                - ❌ ~~Smart Caching~~ (제거됨)
-                - ✅ Buffer Streaming
-                - ✅ Early Termination
-                - ✅ Compressed Prompts
-                - ✅ Multi-language Support
-                - ✅ Error Recovery
-                **Expected Processing Time:**
-                - Simple Query: 3-5 seconds
-                - Complex Query: 5-8 seconds
-                - Very Complex: 8-12 seconds
-                """)
-        # 복잡한 질문 예제 (다국어)
-        gr.Examples(
-            examples=[
-                # Korean
-                "AI 기술이 향후 10년간 한국 경제에 미칠 영향을 다각도로 분석하고 대응 전략을 제시해줘",
-                "스타트업이 대기업과 경쟁하기 위한 혁신적인 전략을 단계별로 수립해줘",
-                # English
-                "Analyze the multifaceted impact of quantum computing on current encryption systems and propose alternatives",
-                "Design 5 innovative business models for climate change mitigation with practical implementation details",
-                # Japanese
-                "メタバース時代の教育革新方案を実装可能なレベルで提案してください",
-                # Chinese
-                "分析人工智能对未来十年全球经济的影响并提出应对策略"
-            ],
-            inputs=msg
-        )
-        # 이벤트 바인딩
-        submit.click(
-            process_query_optimized,
-            inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count, language_mode],
-            outputs=[chatbot, agent_thoughts, search_sources]
-        ).then(
-            lambda: "",
-            None,
-            msg
-        )
-        msg.submit(
-            process_query_optimized,
-            inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count, language_mode],
-            outputs=[chatbot, agent_thoughts, search_sources]
-        ).then(
-            lambda: "",
-            None,
-            msg
-        )
-        clear.click(
-            lambda: ([], "", ""),
-            None,
-            [chatbot, agent_thoughts, search_sources]
-        )
-    return demo
-# ============================================================================
-# 메인 실행
-# ============================================================================
-if __name__ == "__main__":
-    print("""
-╔══════════════════════════════════════════════════════════════╗
-║     ⚡ Speed-Optimized Multi-Agent System (No Cache) ⚡     ║
-║                                                              ║
-║   High-speed AI system processing complex questions         ║
-║                                                              ║
-║  Features:                                                  ║
-║  • Multi-language support (KO/EN/JA/ZH)                    ║
-║  • Improved error recovery                                  ║
-║  • NO CACHING (캐싱 기능 제거됨)                           ║
-║  • Adaptive stream buffering                                ║
-║  • Response cleaning & formatting                           ║
-╚══════════════════════════════════════════════════════════════╝
-    """)
-    # API 키 확인
-    if not os.getenv("FIREWORKS_API_KEY"):
-        print("\n⚠️  FIREWORKS_API_KEY is not set.")
-    if not os.getenv("BRAVE_SEARCH_API_KEY"):
-        print("\n⚠️  BRAVE_SEARCH_API_KEY is not set.")
-    # Gradio 앱 실행
-    demo = create_optimized_gradio_interface()
-    is_hf_spaces = os.getenv("SPACE_ID") is not None
-    if is_hf_spaces:
-        print("\n🤗 Running in optimized mode on Hugging Face Spaces (No Cache)...")
-        demo.launch(server_name="0.0.0.0", server_port=7860)
-    else:
-        print("\n💻 Running in optimized mode on local environment (No Cache)...")
-        demo.launch(server_name="0.0.0.0", server_port=7860, share=False), '', response, flags=re.MULTILINE)
-        # 리스트 마커를 일반 텍스트로 변경
-        response = re.sub(r'^\s*[\*\-\+]\s+', '• ', response, flags=re.MULTILINE)
-        response = re.sub(r'^\s*\d+\.\s+', '', response, flags=re.MULTILINE)
-        # 링크 텍스트만 남기기 [text](url) -> text
-        response = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', response)
-        # 이미지 제거 ![alt](url)
-        response = re.sub(r'!\[([^\]]*)\]\([^\)]+\)', '', response)
         # 중복 공백 제거
         response = re.sub(r'\n{3,}', '\n\n', response)
-        response = re.sub(r' {2,}', ' ', response)
         # 특정 패턴 제거
         unwanted_patterns = [
@@ -2792,18 +486,12 @@ if __name__ == "__main__":
             r'^\s*\*\*\[.*?\]\*\*\s*',  # [태그] 형식 제거
             r'^\s*###\s*',  # ### 제거
             r'^\s*##\s*',   # ## 제거
-            r'^\s*#\s*',    # # 제거
-            r'^\s*>\s+',    # 인용문 마커 제거
         ]
         for pattern in unwanted_patterns:
             response = re.sub(pattern, '', response, flags=re.MULTILINE)
-        # 빈 줄 정리
-        lines = [line.strip() for line in response.split('\n')]
-        lines = [line for line in lines if line]
-        response = '\n\n'.join(lines)
         return response.strip()
@@ -3562,5 +1250,4 @@ if __name__ == "__main__":
         demo.launch(server_name="0.0.0.0", server_port=7860)
     else:
         print("\n💻 Running in optimized mode on local environment (No Cache)...")
-        demo.launch(server_name="0.0.0.0", server_port=7860, share=False)

     @staticmethod
     def clean_response(response: str) -> str:
         """불필요한 마크업 제거 강화"""
+        # 마크다운 헤더 제거
+        response = re.sub(r'^#{1,6}\s+', '', response, flags=re.MULTILINE)
+        # 불필요한 구분선 제거
+        response = re.sub(r'\*{2,}|_{2,}|-{3,}', '', response)
         # 중복 공백 제거
         response = re.sub(r'\n{3,}', '\n\n', response)
         # 특정 패턴 제거
         unwanted_patterns = [
             r'^\s*\*\*\[.*?\]\*\*\s*',  # [태그] 형식 제거
             r'^\s*###\s*',  # ### 제거
             r'^\s*##\s*',   # ## 제거
+            r'^\s*#\s*'     # # 제거
         ]
         for pattern in unwanted_patterns:
             response = re.sub(pattern, '', response, flags=re.MULTILINE)
         return response.strip()
         demo.launch(server_name="0.0.0.0", server_port=7860)
     else:
         print("\n💻 Running in optimized mode on local environment (No Cache)...")
+        demo.launch(server_name="0.0.0.0", server_port=7860, share=False)