""" Viz LLM - Gradio App A RAG-powered assistant for data visualization guidance, powered by Jina-CLIP-v2 embeddings and research from the field of information graphics. Now with Datawrapper integration for chart generation! """ import os import io import asyncio import time import pandas as pd import gradio as gr from dotenv import load_dotenv from src.rag_pipeline import create_pipeline from src.datawrapper_client import create_and_publish_chart, get_iframe_html from datetime import datetime, timedelta from collections import defaultdict from src.vanna import VannaComponent from src.query_intent_classifier import classify_query, IntentClassifier # Load environment variables load_dotenv() # Rate limiting: Track requests per user (IP-based) # Format: {ip: [(timestamp1, timestamp2, ...)]} rate_limit_tracker = defaultdict(list) DAILY_LIMIT = 20 # Initialize the RAG pipeline print("Initializing Graphics Design Pipeline...") try: pipeline = create_pipeline( retrieval_k=5, model=os.getenv("LLM_MODEL", "meta-llama/Llama-3.1-8B-Instruct"), temperature=float(os.getenv("LLM_TEMPERATURE", "0.2")) ) print("✓ Pipeline initialized successfully") except Exception as e: print(f"✗ Error initializing pipeline: {e}") raise # Initialize Vanna print("Initializing Vanna...") try: vanna = VannaComponent( hf_model="Qwen/Qwen3-VL-30B-A3B-Instruct", hf_token=os.getenv("HF_TOKEN_VANNA"), hf_provider="novita", connection_string=os.getenv("SUPABASE_CONNECTION") ) print("✓ Vanna initialized successfully") except Exception as e: print(f"✗ Error initializing Vanna: {e}") raise # CSV cleanup function def cleanup_old_csv_files(): """Delete CSV files older than 24 hours to prevent accumulation""" folder = "513935c4d2db2d2d" if not os.path.exists(folder): return cleaned = 0 for file in os.listdir(folder): if file.endswith(".csv"): file_path = os.path.join(folder, file) try: # Check if file is older than 24 hours if os.path.getmtime(file_path) < time.time() - 86400: os.remove(file_path) cleaned += 1 except Exception as e: print(f"Warning: Could not delete {file_path}: {e}") if cleaned > 0: print(f"✓ Cleaned up {cleaned} old CSV files") # Run cleanup on startup print("Cleaning up old CSV files...") cleanup_old_csv_files() def check_rate_limit(request: gr.Request) -> tuple[bool, int]: """Check if user has exceeded rate limit""" if request is None: return True, DAILY_LIMIT # Allow if no request object user_id = request.client.host now = datetime.now() cutoff = now - timedelta(days=1) # Remove old requests (older than 24 hours) rate_limit_tracker[user_id] = [ ts for ts in rate_limit_tracker[user_id] if ts > cutoff ] remaining = DAILY_LIMIT - len(rate_limit_tracker[user_id]) if remaining <= 0: return False, 0 # Add current request rate_limit_tracker[user_id].append(now) return True, remaining - 1 def recommend_stream(message: str, history: list, request: gr.Request): """ Streaming version of design recommendation function Args: message: User's design query history: Chat history request: Gradio request object for rate limiting Yields: Response chunks """ # Check rate limit allowed, remaining = check_rate_limit(request) if not allowed: yield "⚠️ **Rate limit exceeded.** You've reached the maximum of 20 queries per day. Please try again in 24 hours." return try: response_stream = pipeline.generate_recommendations(message, stream=True) full_response = "" for chunk in response_stream: full_response += chunk yield full_response # Add rate limit info at the end if remaining <= 5: yield full_response + f"\n\n---\n*You have {remaining} queries remaining today.*" except Exception as e: yield f"Error generating response: {str(e)}\n\nPlease check your environment variables (HF_TOKEN, SUPABASE_URL, SUPABASE_KEY) and try again." def generate_chart_from_csv(csv_file, user_prompt, api_key): """ Generate a Datawrapper chart from uploaded CSV and user prompt using user's API key. Args: csv_file: Uploaded CSV file user_prompt: User's description of the chart api_key: User's Datawrapper API key Returns: HTML string with iframe or error message """ # Validate API key first if not api_key or api_key.strip() == "": return """

❌ No API Key Provided

Please enter your Datawrapper API key above to generate charts.

Get your API key →

""" if not csv_file: return "
Please upload a CSV file to generate a chart.
" if not user_prompt or user_prompt.strip() == "": return "
Please describe what chart you want to create.
" # Temporarily set the API key in environment for this request original_key = os.environ.get("DATAWRAPPER_ACCESS_TOKEN") os.environ["DATAWRAPPER_ACCESS_TOKEN"] = api_key try: # Show loading message loading_html = """

🎨 Creating your chart...

Analyzing your data and selecting the best visualization...

""" # Read CSV file df = pd.read_csv(csv_file) # Create and publish chart (async function, need to run in event loop) loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) result = loop.run_until_complete( create_and_publish_chart(df, user_prompt, pipeline) ) loop.close() if result.get("success"): # Get the iframe HTML iframe_html = get_iframe_html(result.get('public_url'), height=500) # Create HTML with iframe, reasoning, and edit button chart_html = f"""
{iframe_html}
Why this chart?

{result['reasoning']}

✏️ Open in Datawrapper
""" return chart_html else: error_msg = result.get("error", "Unknown error") return f"""

❌ Chart Generation Failed

{error_msg}

Please check your CSV format and try again.

""" except Exception as e: return f"""

❌ Error

{str(e)}

Please ensure your CSV is properly formatted and your API key is correct.

""" finally: # Restore original API key or remove it if original_key: os.environ["DATAWRAPPER_ACCESS_TOKEN"] = original_key elif "DATAWRAPPER_ACCESS_TOKEN" in os.environ: del os.environ["DATAWRAPPER_ACCESS_TOKEN"] def csv_to_cards_html(csv_text: str) -> str: """ Transforme le CSV brut retourné par Vanna en cartes HTML. """ try: df = pd.read_csv(io.StringIO(csv_text.strip())) if df.empty: return "
Aucune donnée trouvée.
" cards_html = "" for _, row in df.iterrows(): title = row.get("title", "Sans titre") source_url = row.get("source_url", "#") author = row.get("author", "Inconnu") published_date = row.get("published_date", "") image_url = row.get("image_url", "https://fpoimg.com/800x600?text=Image+not+found") cards_html += f"""
{title}

{title}

{author}

{published_date}

🔗 Source
""" html = f"""
{cards_html}
""" return html except Exception as e: return f"
Erreur lors du parsing du CSV : {e}
" async def search_inspiration_from_database(user_prompt): """ Search inspiration posts from user prompt in database. Args: user_prompt: User's description of the inspiration query Returns: HTML string displaying cards or an error message """ if not user_prompt or user_prompt.strip() == "": return """
Please describe what kind of inspiration you want to search for.
""" try: # Classify user intent print(f"\n{'='*60}") print(f"[SEARCH] User prompt: {user_prompt}") classifier = IntentClassifier() classification = classifier.classify(user_prompt) print(f"[INTENT] Type: {classification['intent'].value}") print(f"[INTENT] Keywords: {classification['keywords']}") print(f"[INTENT] Inferred tags: {classification['tags']}") print(f"[INTENT] Short query: {classification['is_short_query']}") # Enhance prompt with intent guidance enhanced_prompt = classifier.format_for_vanna(classification) full_prompt = f"{user_prompt}\n\n{enhanced_prompt}" print(f"[VANNA] Sending enhanced prompt to Vanna...") response = await vanna.ask(full_prompt) print(f"[VANNA] Response received: {repr(response)[:200]}...") print(f"{'='*60}\n") clean_response = response.strip() # Check for empty query results (0 rows returned) if "No rows returned" in clean_response or "0 rows" in clean_response.lower(): return f"""

🔍 No Results Found

Your query was executed successfully, but no posts matched your criteria.

Suggestions:

Note: Most posts are currently being enriched with tags.
Keyword search works for all {classification.get('total_posts', '7,000+')} posts in the database.

""" # Check for errors or warnings if clean_response.startswith("⚠️") or clean_response.startswith("❌") or "Aucun CSV détecté" in clean_response: return f"""

❌ Query Error

The AI encountered an issue processing your request.

{clean_response[:200]}

Try rephrasing your query or being more specific.

""" # Process CSV response csv_text = ( clean_response .strip("```") .replace("csv", "") .replace("CSV", "") ) # Check if response contains CSV data if "," not in csv_text or "id,title" not in csv_text.lower(): return f"""

❌ Invalid Response Format

The database query didn't return structured data.

This might be a temporary issue. Please try again.

""" cards_html = csv_to_cards_html(csv_text) return cards_html except Exception as e: print(f"❌ Exception in search_inspiration_from_database: {str(e)}") import traceback traceback.print_exc() return f"""

❌ System Error

An unexpected error occurred:

{str(e)}

Please check the console logs for more details.

""" # Minimal CSS to fix UI artifacts and style the mode selector custom_css = """ /* Hide retry/undo buttons that appear as artifacts */ .chatbot button[aria-label="Retry"], .chatbot button[aria-label="Undo"] { display: none !important; } /* Remove overflow-y scroll from textarea */ textarea[data-testid="textbox"] { overflow-y: hidden !important; } /* Mode selector buttons */ .mode-button { font-size: 1.1em; padding: 12px 24px; margin: 5px; } """ # Create Gradio interface with dual-mode layout with gr.Blocks( title="Viz LLM", css=custom_css ) as demo: gr.Markdown(""" # 📊 Viz LLM Discover inspiring visualizations, refine your design ideas, or generate charts using Datawrapper. """) # JavaScript for localStorage persistence gr.HTML(""" """) # Mode selector buttons (reordered: Inspiration, Refinement, Chart) with gr.Row(): inspiration_btn = gr.Button("✨ Inspiration", variant="primary", elem_classes="mode-button") ideation_btn = gr.Button("💡 Refinement", variant="secondary", elem_classes="mode-button") chart_gen_btn = gr.Button("📊 Chart", variant="secondary", elem_classes="mode-button") # Inspiration Mode: Search interface (shown by default) with gr.Column(visible=True) as inspiration_container: with gr.Row(): inspiration_prompt_input = gr.Textbox( placeholder="Search for inspiration (e.g., 'F1', 'interactive maps')...", show_label=False, scale=4, container=False ) inspiration_search_btn = gr.Button("🔍 Search", variant="primary", scale=1) inspiration_cards_html = gr.HTML("") # Refinement Mode: Chat interface (hidden by default, wrapped in Column) with gr.Column(visible=False) as ideation_container: ideation_interface = gr.ChatInterface( fn=recommend_stream, type="messages", examples=[ "What's the best chart type for showing trends over time?", "How do I create an effective infographic for complex data?", "What are best practices for data visualization accessibility?", "How should I design a dashboard for storytelling?", "What visualization works best for comparing categories?" ], cache_examples=False, api_name="recommend" ) # Chart Generation Mode: Chart controls and output (hidden by default) with gr.Column(visible=False) as chart_gen_container: gr.Markdown("### Chart Generator") # API Key Input (collapsible) with gr.Accordion("🔑 Datawrapper API Key", open=False): gr.Markdown(""" Enter your Datawrapper API key to generate charts. Your key is stored in your browser and persists across sessions. **Get your key**: [Datawrapper Account Settings](https://app.datawrapper.de/account/api-tokens) """) # Warning about permissions gr.HTML("""
⚠️ Important: When creating your API key, toggle ALL permissions (Read & Write for Charts, Tables, Folders, etc.) otherwise chart generation will fail.
""") api_key_input = gr.Textbox( label="API Key", placeholder="Paste your Datawrapper API key here...", type="password", value="" ) api_key_status = gr.Markdown("⚠️ Status: No API key provided") csv_upload = gr.File( label="📁 Upload CSV File", file_types=[".csv"], type="filepath" ) chart_prompt_input = gr.Textbox( label="Describe your chart", placeholder="E.g., 'Show sales trends over time' or 'Compare revenue by category'", lines=2 ) generate_chart_btn = gr.Button("Generate Chart", variant="primary", size="lg") chart_output = gr.HTML( value="
Upload a CSV file and describe your visualization above, then click Generate Chart.
", label="Generated Chart" ) # API key state management api_key_state = gr.State(value="") def validate_api_key(api_key: str) -> tuple[str, str]: """Validate and store API key""" if not api_key or api_key.strip() == "": return "", "⚠️ Status: No API key provided" # Basic validation (check format) if len(api_key) < 20: return "", "❌ Status: Invalid API key format (too short)" # Key looks valid - it will be saved to localStorage via JavaScript masked_key = f"...{api_key[-6:]}" if len(api_key) > 6 else "***" return api_key, f"✅ Status: API key saved to browser storage (ends with {masked_key})" # Mode switching functions (updated for new order: Inspiration, Refinement, Chart) def switch_to_inspiration(): return [ gr.update(variant="primary"), # inspiration_btn gr.update(variant="secondary"), # ideation_btn gr.update(variant="secondary"), # chart_gen_btn gr.update(visible=True), # inspiration_container gr.update(visible=False), # ideation_container gr.update(visible=False), # chart_gen_container ] def switch_to_ideation(): return [ gr.update(variant="secondary"), # inspiration_btn gr.update(variant="primary"), # ideation_btn gr.update(variant="secondary"), # chart_gen_btn gr.update(visible=False), # inspiration_container gr.update(visible=True), # ideation_container gr.update(visible=False), # chart_gen_container ] def switch_to_chart_gen(): return [ gr.update(variant="secondary"), # inspiration_btn gr.update(variant="secondary"), # ideation_btn gr.update(variant="primary"), # chart_gen_btn gr.update(visible=False), # inspiration_container gr.update(visible=False), # ideation_container gr.update(visible=True), # chart_gen_container ] # Wire up mode switching (updated order: inspiration, ideation, chart) inspiration_btn.click( fn=switch_to_inspiration, inputs=[], outputs=[inspiration_btn, ideation_btn, chart_gen_btn, inspiration_container, ideation_container, chart_gen_container] ) ideation_btn.click( fn=switch_to_ideation, inputs=[], outputs=[inspiration_btn, ideation_btn, chart_gen_btn, inspiration_container, ideation_container, chart_gen_container] ) chart_gen_btn.click( fn=switch_to_chart_gen, inputs=[], outputs=[inspiration_btn, ideation_btn, chart_gen_btn, inspiration_container, ideation_container, chart_gen_container] ) # Connect API key validation and localStorage save api_key_input.change( fn=validate_api_key, inputs=[api_key_input], outputs=[api_key_state, api_key_status], js="(key) => { saveApiKeyToStorage(key); return key; }" ) # Generate chart when button is clicked (now with API key) generate_chart_btn.click( fn=generate_chart_from_csv, inputs=[csv_upload, chart_prompt_input, api_key_state], outputs=[chart_output] ) # Search inspiration with loading state def search_with_loading(prompt): """Wrapper to show loading state""" if not prompt or not prompt.strip(): return """
Please enter a search query.
""" # Show loading immediately (Gradio will display this first) yield """
🔍

Searching database...

Analyzing your query and generating SQL...

""" # Run the actual search import asyncio result = asyncio.run(search_inspiration_from_database(prompt)) yield result inspiration_search_btn.click( fn=search_with_loading, inputs=[inspiration_prompt_input], outputs=[inspiration_cards_html] ) # Knowledge base section (below both interfaces) gr.Markdown(""" ### About Viz LLM **Credits:** Special thanks to the researchers whose work informed this model: Robert Kosara, Edward Segel, Jeffrey Heer, Matthew Conlen, John Maeda, Kennedy Elliott, Scott McCloud, and many others. --- **Usage Limits:** This service is limited to 20 queries per day per user to manage costs. Responses are optimized for English.
Embeddings: Jina-CLIP-v2 | Charts: Datawrapper API | Database: Nuanced
""") # Launch configuration if __name__ == "__main__": # Check for required environment variables (Datawrapper key now user-provided) required_vars = ["SUPABASE_URL", "SUPABASE_KEY", "HF_TOKEN"] missing_vars = [var for var in required_vars if not os.getenv(var)] if missing_vars: print(f"⚠️ Warning: Missing environment variables: {', '.join(missing_vars)}") print("Please set these in your .env file or as environment variables") # Launch the app demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_api=True )