"""
Viz LLM - Gradio App
A RAG-powered assistant for data visualization guidance, powered by Jina-CLIP-v2
embeddings and research from the field of information graphics.
Now with Datawrapper integration for chart generation!
"""
import os
import io
import asyncio
import time
import pandas as pd
import gradio as gr
from dotenv import load_dotenv
from src.rag_pipeline import create_pipeline
from src.datawrapper_client import create_and_publish_chart, get_iframe_html
from datetime import datetime, timedelta
from collections import defaultdict
from src.vanna import VannaComponent
from src.query_intent_classifier import classify_query, IntentClassifier
# Load environment variables
load_dotenv()
# Rate limiting: Track requests per user (IP-based)
# Format: {ip: [(timestamp1, timestamp2, ...)]}
rate_limit_tracker = defaultdict(list)
DAILY_LIMIT = 20
# Initialize the RAG pipeline
print("Initializing Graphics Design Pipeline...")
try:
pipeline = create_pipeline(
retrieval_k=5,
model=os.getenv("LLM_MODEL", "meta-llama/Llama-3.1-8B-Instruct"),
temperature=float(os.getenv("LLM_TEMPERATURE", "0.2"))
)
print("✓ Pipeline initialized successfully")
except Exception as e:
print(f"✗ Error initializing pipeline: {e}")
raise
# Initialize Vanna
print("Initializing Vanna...")
try:
vanna = VannaComponent(
hf_model="Qwen/Qwen3-VL-30B-A3B-Instruct",
hf_token=os.getenv("HF_TOKEN_VANNA"),
hf_provider="novita",
connection_string=os.getenv("SUPABASE_CONNECTION")
)
print("✓ Vanna initialized successfully")
except Exception as e:
print(f"✗ Error initializing Vanna: {e}")
raise
# CSV cleanup function
def cleanup_old_csv_files():
"""Delete CSV files older than 24 hours to prevent accumulation"""
folder = "513935c4d2db2d2d"
if not os.path.exists(folder):
return
cleaned = 0
for file in os.listdir(folder):
if file.endswith(".csv"):
file_path = os.path.join(folder, file)
try:
# Check if file is older than 24 hours
if os.path.getmtime(file_path) < time.time() - 86400:
os.remove(file_path)
cleaned += 1
except Exception as e:
print(f"Warning: Could not delete {file_path}: {e}")
if cleaned > 0:
print(f"✓ Cleaned up {cleaned} old CSV files")
# Run cleanup on startup
print("Cleaning up old CSV files...")
cleanup_old_csv_files()
def check_rate_limit(request: gr.Request) -> tuple[bool, int]:
"""Check if user has exceeded rate limit"""
if request is None:
return True, DAILY_LIMIT # Allow if no request object
user_id = request.client.host
now = datetime.now()
cutoff = now - timedelta(days=1)
# Remove old requests (older than 24 hours)
rate_limit_tracker[user_id] = [
ts for ts in rate_limit_tracker[user_id] if ts > cutoff
]
remaining = DAILY_LIMIT - len(rate_limit_tracker[user_id])
if remaining <= 0:
return False, 0
# Add current request
rate_limit_tracker[user_id].append(now)
return True, remaining - 1
def recommend_stream(message: str, history: list, request: gr.Request):
"""
Streaming version of design recommendation function
Args:
message: User's design query
history: Chat history
request: Gradio request object for rate limiting
Yields:
Response chunks
"""
# Check rate limit
allowed, remaining = check_rate_limit(request)
if not allowed:
yield "⚠️ **Rate limit exceeded.** You've reached the maximum of 20 queries per day. Please try again in 24 hours."
return
try:
response_stream = pipeline.generate_recommendations(message, stream=True)
full_response = ""
for chunk in response_stream:
full_response += chunk
yield full_response
# Add rate limit info at the end
if remaining <= 5:
yield full_response + f"\n\n---\n*You have {remaining} queries remaining today.*"
except Exception as e:
yield f"Error generating response: {str(e)}\n\nPlease check your environment variables (HF_TOKEN, SUPABASE_URL, SUPABASE_KEY) and try again."
def generate_chart_from_csv(csv_file, user_prompt, api_key):
"""
Generate a Datawrapper chart from uploaded CSV and user prompt using user's API key.
Args:
csv_file: Uploaded CSV file
user_prompt: User's description of the chart
api_key: User's Datawrapper API key
Returns:
HTML string with iframe or error message
"""
# Validate API key first
if not api_key or api_key.strip() == "":
return """
❌ No API Key Provided
Please enter your Datawrapper API key above to generate charts.
Get your API key →
"""
if not csv_file:
return "Please upload a CSV file to generate a chart.
"
if not user_prompt or user_prompt.strip() == "":
return "Please describe what chart you want to create.
"
# Temporarily set the API key in environment for this request
original_key = os.environ.get("DATAWRAPPER_ACCESS_TOKEN")
os.environ["DATAWRAPPER_ACCESS_TOKEN"] = api_key
try:
# Show loading message
loading_html = """
🎨 Creating your chart...
Analyzing your data and selecting the best visualization...
"""
# Read CSV file
df = pd.read_csv(csv_file)
# Create and publish chart (async function, need to run in event loop)
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
result = loop.run_until_complete(
create_and_publish_chart(df, user_prompt, pipeline)
)
loop.close()
if result.get("success"):
# Get the iframe HTML
iframe_html = get_iframe_html(result.get('public_url'), height=500)
# Create HTML with iframe, reasoning, and edit button
chart_html = f"""
{iframe_html}
Why this chart?
{result['reasoning']}
"""
return chart_html
else:
error_msg = result.get("error", "Unknown error")
return f"""
❌ Chart Generation Failed
{error_msg}
Please check your CSV format and try again.
"""
except Exception as e:
return f"""
❌ Error
{str(e)}
Please ensure your CSV is properly formatted and your API key is correct.
"""
finally:
# Restore original API key or remove it
if original_key:
os.environ["DATAWRAPPER_ACCESS_TOKEN"] = original_key
elif "DATAWRAPPER_ACCESS_TOKEN" in os.environ:
del os.environ["DATAWRAPPER_ACCESS_TOKEN"]
def csv_to_cards_html(csv_text: str) -> str:
"""
Transforme le CSV brut retourné par Vanna en cartes HTML.
"""
try:
df = pd.read_csv(io.StringIO(csv_text.strip()))
if df.empty:
return "Aucune donnée trouvée.
"
cards_html = ""
for _, row in df.iterrows():
title = row.get("title", "Sans titre")
source_url = row.get("source_url", "#")
author = row.get("author", "Inconnu")
published_date = row.get("published_date", "")
image_url = row.get("image_url", "https://fpoimg.com/800x600?text=Image+not+found")
cards_html += f"""
{title}
{author}
{published_date}
🔗 Source
"""
html = f"""
{cards_html}
"""
return html
except Exception as e:
return f"Erreur lors du parsing du CSV : {e}
"
async def search_inspiration_from_database(user_prompt):
"""
Search inspiration posts from user prompt in database.
Args:
user_prompt: User's description of the inspiration query
Returns:
HTML string displaying cards or an error message
"""
if not user_prompt or user_prompt.strip() == "":
return """
Please describe what kind of inspiration you want to search for.
"""
try:
# Classify user intent
print(f"\n{'='*60}")
print(f"[SEARCH] User prompt: {user_prompt}")
classifier = IntentClassifier()
classification = classifier.classify(user_prompt)
print(f"[INTENT] Type: {classification['intent'].value}")
print(f"[INTENT] Keywords: {classification['keywords']}")
print(f"[INTENT] Inferred tags: {classification['tags']}")
print(f"[INTENT] Short query: {classification['is_short_query']}")
# Enhance prompt with intent guidance
enhanced_prompt = classifier.format_for_vanna(classification)
full_prompt = f"{user_prompt}\n\n{enhanced_prompt}"
print(f"[VANNA] Sending enhanced prompt to Vanna...")
response = await vanna.ask(full_prompt)
print(f"[VANNA] Response received: {repr(response)[:200]}...")
print(f"{'='*60}\n")
clean_response = response.strip()
# Check for empty query results (0 rows returned)
if "No rows returned" in clean_response or "0 rows" in clean_response.lower():
return f"""
🔍 No Results Found
Your query was executed successfully, but no posts matched your criteria.
Suggestions:
- • Try broader keywords (e.g., "visualization" instead of "F1 dataviz")
- • Search by author names (e.g., "New York Times")
- • Use simple terms (e.g., "interactive", "maps")
Note: Most posts are currently being enriched with tags.
Keyword search works for all {classification.get('total_posts', '7,000+')} posts in the database.
"""
# Check for errors or warnings
if clean_response.startswith("⚠️") or clean_response.startswith("❌") or "Aucun CSV détecté" in clean_response:
return f"""
❌ Query Error
The AI encountered an issue processing your request.
{clean_response[:200]}
Try rephrasing your query or being more specific.
"""
# Process CSV response
csv_text = (
clean_response
.strip("```")
.replace("csv", "")
.replace("CSV", "")
)
# Check if response contains CSV data
if "," not in csv_text or "id,title" not in csv_text.lower():
return f"""
❌ Invalid Response Format
The database query didn't return structured data.
This might be a temporary issue. Please try again.
"""
cards_html = csv_to_cards_html(csv_text)
return cards_html
except Exception as e:
print(f"❌ Exception in search_inspiration_from_database: {str(e)}")
import traceback
traceback.print_exc()
return f"""
❌ System Error
An unexpected error occurred:
{str(e)}
Please check the console logs for more details.
"""
# Minimal CSS to fix UI artifacts and style the mode selector
custom_css = """
/* Hide retry/undo buttons that appear as artifacts */
.chatbot button[aria-label="Retry"],
.chatbot button[aria-label="Undo"] {
display: none !important;
}
/* Remove overflow-y scroll from textarea */
textarea[data-testid="textbox"] {
overflow-y: hidden !important;
}
/* Mode selector buttons */
.mode-button {
font-size: 1.1em;
padding: 12px 24px;
margin: 5px;
}
"""
# Create Gradio interface with dual-mode layout
with gr.Blocks(
title="Viz LLM",
css=custom_css
) as demo:
gr.Markdown("""
# 📊 Viz LLM
Discover inspiring visualizations, refine your design ideas, or generate charts using Datawrapper.
""")
# JavaScript for localStorage persistence
gr.HTML("""
""")
# Mode selector buttons (reordered: Inspiration, Refinement, Chart)
with gr.Row():
inspiration_btn = gr.Button("✨ Inspiration", variant="primary", elem_classes="mode-button")
ideation_btn = gr.Button("💡 Refinement", variant="secondary", elem_classes="mode-button")
chart_gen_btn = gr.Button("📊 Chart", variant="secondary", elem_classes="mode-button")
# Inspiration Mode: Search interface (shown by default)
with gr.Column(visible=True) as inspiration_container:
with gr.Row():
inspiration_prompt_input = gr.Textbox(
placeholder="Search for inspiration (e.g., 'F1', 'interactive maps')...",
show_label=False,
scale=4,
container=False
)
inspiration_search_btn = gr.Button("🔍 Search", variant="primary", scale=1)
inspiration_cards_html = gr.HTML("")
# Refinement Mode: Chat interface (hidden by default, wrapped in Column)
with gr.Column(visible=False) as ideation_container:
ideation_interface = gr.ChatInterface(
fn=recommend_stream,
type="messages",
examples=[
"What's the best chart type for showing trends over time?",
"How do I create an effective infographic for complex data?",
"What are best practices for data visualization accessibility?",
"How should I design a dashboard for storytelling?",
"What visualization works best for comparing categories?"
],
cache_examples=False,
api_name="recommend"
)
# Chart Generation Mode: Chart controls and output (hidden by default)
with gr.Column(visible=False) as chart_gen_container:
gr.Markdown("### Chart Generator")
# API Key Input (collapsible)
with gr.Accordion("🔑 Datawrapper API Key", open=False):
gr.Markdown("""
Enter your Datawrapper API key to generate charts. Your key is stored in your browser and persists across sessions.
**Get your key**: [Datawrapper Account Settings](https://app.datawrapper.de/account/api-tokens)
""")
# Warning about permissions
gr.HTML("""
⚠️ Important: When creating your API key, toggle ALL permissions (Read & Write for Charts, Tables, Folders, etc.) otherwise chart generation will fail.
""")
api_key_input = gr.Textbox(
label="API Key",
placeholder="Paste your Datawrapper API key here...",
type="password",
value=""
)
api_key_status = gr.Markdown("⚠️ Status: No API key provided")
csv_upload = gr.File(
label="📁 Upload CSV File",
file_types=[".csv"],
type="filepath"
)
chart_prompt_input = gr.Textbox(
label="Describe your chart",
placeholder="E.g., 'Show sales trends over time' or 'Compare revenue by category'",
lines=2
)
generate_chart_btn = gr.Button("Generate Chart", variant="primary", size="lg")
chart_output = gr.HTML(
value="Upload a CSV file and describe your visualization above, then click Generate Chart.
",
label="Generated Chart"
)
# API key state management
api_key_state = gr.State(value="")
def validate_api_key(api_key: str) -> tuple[str, str]:
"""Validate and store API key"""
if not api_key or api_key.strip() == "":
return "", "⚠️ Status: No API key provided"
# Basic validation (check format)
if len(api_key) < 20:
return "", "❌ Status: Invalid API key format (too short)"
# Key looks valid - it will be saved to localStorage via JavaScript
masked_key = f"...{api_key[-6:]}" if len(api_key) > 6 else "***"
return api_key, f"✅ Status: API key saved to browser storage (ends with {masked_key})"
# Mode switching functions (updated for new order: Inspiration, Refinement, Chart)
def switch_to_inspiration():
return [
gr.update(variant="primary"), # inspiration_btn
gr.update(variant="secondary"), # ideation_btn
gr.update(variant="secondary"), # chart_gen_btn
gr.update(visible=True), # inspiration_container
gr.update(visible=False), # ideation_container
gr.update(visible=False), # chart_gen_container
]
def switch_to_ideation():
return [
gr.update(variant="secondary"), # inspiration_btn
gr.update(variant="primary"), # ideation_btn
gr.update(variant="secondary"), # chart_gen_btn
gr.update(visible=False), # inspiration_container
gr.update(visible=True), # ideation_container
gr.update(visible=False), # chart_gen_container
]
def switch_to_chart_gen():
return [
gr.update(variant="secondary"), # inspiration_btn
gr.update(variant="secondary"), # ideation_btn
gr.update(variant="primary"), # chart_gen_btn
gr.update(visible=False), # inspiration_container
gr.update(visible=False), # ideation_container
gr.update(visible=True), # chart_gen_container
]
# Wire up mode switching (updated order: inspiration, ideation, chart)
inspiration_btn.click(
fn=switch_to_inspiration,
inputs=[],
outputs=[inspiration_btn, ideation_btn, chart_gen_btn, inspiration_container, ideation_container, chart_gen_container]
)
ideation_btn.click(
fn=switch_to_ideation,
inputs=[],
outputs=[inspiration_btn, ideation_btn, chart_gen_btn, inspiration_container, ideation_container, chart_gen_container]
)
chart_gen_btn.click(
fn=switch_to_chart_gen,
inputs=[],
outputs=[inspiration_btn, ideation_btn, chart_gen_btn, inspiration_container, ideation_container, chart_gen_container]
)
# Connect API key validation and localStorage save
api_key_input.change(
fn=validate_api_key,
inputs=[api_key_input],
outputs=[api_key_state, api_key_status],
js="(key) => { saveApiKeyToStorage(key); return key; }"
)
# Generate chart when button is clicked (now with API key)
generate_chart_btn.click(
fn=generate_chart_from_csv,
inputs=[csv_upload, chart_prompt_input, api_key_state],
outputs=[chart_output]
)
# Search inspiration with loading state
def search_with_loading(prompt):
"""Wrapper to show loading state"""
if not prompt or not prompt.strip():
return """
Please enter a search query.
"""
# Show loading immediately (Gradio will display this first)
yield """
🔍
Searching database...
Analyzing your query and generating SQL...
"""
# Run the actual search
import asyncio
result = asyncio.run(search_inspiration_from_database(prompt))
yield result
inspiration_search_btn.click(
fn=search_with_loading,
inputs=[inspiration_prompt_input],
outputs=[inspiration_cards_html]
)
# Knowledge base section (below both interfaces)
gr.Markdown("""
### About Viz LLM
**Credits:** Special thanks to the researchers whose work informed this model: Robert Kosara, Edward Segel, Jeffrey Heer, Matthew Conlen, John Maeda, Kennedy Elliott, Scott McCloud, and many others.
---
**Usage Limits:** This service is limited to 20 queries per day per user to manage costs. Responses are optimized for English.
Embeddings: Jina-CLIP-v2 | Charts: Datawrapper API | Database: Nuanced
""")
# Launch configuration
if __name__ == "__main__":
# Check for required environment variables (Datawrapper key now user-provided)
required_vars = ["SUPABASE_URL", "SUPABASE_KEY", "HF_TOKEN"]
missing_vars = [var for var in required_vars if not os.getenv(var)]
if missing_vars:
print(f"⚠️ Warning: Missing environment variables: {', '.join(missing_vars)}")
print("Please set these in your .env file or as environment variables")
# Launch the app
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_api=True
)