Spaces:

iisadia
/

Movie_Buff_QA

Sleeping

App Files Files Community

Movie_Buff_QA / app.py

iisadia

Update app.py

740f48a verified 8 months ago

raw

history blame

6.79 kB

	import streamlit as st
	import pandas as pd
	import faiss
	import numpy as np
	from datasets import load_dataset
	from sentence_transformers import SentenceTransformer
	from groq import Groq
	import os

	# --------------------------
	# Configuration & Styling
	# --------------------------
	st.set_page_config(
	page_title="CineMaster AI - Movie Expert",
	page_icon="🎬",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	st.markdown("""
	<style>
	:root {
	--primary: #7017ff;
	--secondary: #ff2d55;
	}
	.header {
	background: linear-gradient(135deg, var(--primary), var(--secondary));
	color: white;
	padding: 2rem;
	border-radius: 15px;
	text-align: center;
	box-shadow: 0 4px 6px rgba(0,0,0,0.1);
	margin-bottom: 2rem;
	}
	.response-box {
	background: rgba(255,255,255,0.1);
	border-radius: 10px;
	padding: 1.5rem;
	margin: 1rem 0;
	border: 1px solid rgba(255,255,255,0.2);
	}
	.stButton>button {
	background: linear-gradient(45deg, var(--primary), var(--secondary)) !important;
	color: white !important;
	border-radius: 25px;
	padding: 0.8rem 2rem;
	font-weight: 600;
	transition: transform 0.2s;
	}
	.stButton>button:hover {
	transform: scale(1.05);
	}
	.movie-card {
	background: rgba(0,0,0,0.2);
	border-radius: 10px;
	padding: 1rem;
	margin: 0.5rem 0;
	}
	</style>
	""", unsafe_allow_html=True)

	# --------------------------
	# Data Loading & Processing
	# --------------------------
	@st.cache_resource
	def load_movie_data():
	try:
	# Try loading wiki_movies dataset
	dataset = load_dataset("wikipedia", "20220301.en", split="train[:5000]")
	df = pd.DataFrame(dataset)

	# Create synthetic movie data from Wikipedia snippets
	df['title'] = df['title'].apply(lambda x: x.replace("_", " "))
	df['context'] = "Title: " + df['title'] + "\nContent: " + df['text'].str[:500] + "..."
	return df.sample(1000) # Return random 1000 entries

	except Exception as e:
	st.warning(f"Couldn't load dataset: {str(e)}. Using synthetic data.")
	movies = [
	{
	"title": "The Dark Knight",
	"context": "Title: The Dark Knight\nPlot: Batman faces the Joker in a battle for Gotham's soul...\nCast: Christian Bale, Heath Ledger\nYear: 2008\nDirector: Christopher Nolan"
	},
	{
	"title": "Inception",
	"context": "Title: Inception\nPlot: A thief who enters the dreams of others...\nCast: Leonardo DiCaprio, Tom Hardy\nYear: 2010\nDirector: Christopher Nolan"
	},
	{
	"title": "Pulp Fiction",
	"context": "Title: Pulp Fiction\nPlot: The lives of two mob hitmen, a boxer, and a gangster's wife intertwine...\nCast: John Travolta, Samuel L. Jackson\nYear: 1994\nDirector: Quentin Tarantino"
	}
	]
	return pd.DataFrame(movies)

	@st.cache_resource
	def setup_retrieval(df):
	embedder = SentenceTransformer('all-MiniLM-L6-v2')
	embeddings = embedder.encode(df['context'].tolist())

	index = faiss.IndexFlatL2(embeddings.shape[1])
	index.add(embeddings)
	return embedder, index

	# --------------------------
	# Groq API Functions
	# --------------------------
	def get_groq_response(query, context):
	try:
	client = Groq(api_key=os.getenv("GROQ_API_KEY", "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"))

	prompt = f"""You are a film expert analyzing this question:

	Question: {query}

	Using these verified sources:
	{context}

	Provide a detailed response with:
	1. 🎬 Direct Answer
	2. 📖 Explanation
	3. 🎥 Relevant Scenes
	4. 🏆 Awards/Trivia (if available)
	"""

	response = client.chat.completions.create(
	messages=[{"role": "user", "content": prompt}],
	model="llama3-70b-8192",
	temperature=0.3
	)
	return response.choices[0].message.content
	except Exception as e:
	return f"Error getting response: {str(e)}"

	# --------------------------
	# Main Application
	# --------------------------
	def main():
	# Load data and models
	df = load_movie_data()
	embedder, index = setup_retrieval(df)

	# Header Section
	st.markdown("""
	<div class="header">
	<h1>🎞️ CineMaster AI</h1>
	<h3>Your Personal Movie Encyclopedia</h3>
	</div>
	""", unsafe_allow_html=True)

	# Sidebar
	with st.sidebar:
	st.image("https://cdn-icons-png.flaticon.com/512/2598/2598702.png", width=120)
	st.subheader("Sample Questions")
	examples = [
	"Who played the Joker in The Dark Knight?",
	"Explain the ending of Inception",
	"List Tarantino's movies",
	"What's the plot of Pulp Fiction?",
	"Who directed The Dark Knight?"
	]
	for ex in examples:
	st.code(ex, language="bash")

	st.markdown("---")
	st.markdown("Database Info")
	st.write(f"📊 {len(df)} movies loaded")
	st.write("🔍 Using FAISS for vector search")
	st.write("🤖 Powered by Llama 3 70B")

	# Main Interface
	query = st.text_input("🎯 Ask any movie question:",
	placeholder="e.g., 'Who played the villain in The Dark Knight?'")

	if st.button("🚀 Get Expert Analysis", type="primary"):
	if query:
	with st.spinner("🔍 Searching through movie database..."):
	query_embed = embedder.encode([query])
	_, indices = index.search(query_embed, 3)
	contexts = [df.iloc[i]['context'] for i in indices[0]]
	combined_context = "\n\n---\n\n".join(contexts)

	with st.spinner("🎥 Generating cinematic insights..."):
	answer = get_groq_response(query, combined_context)

	st.markdown("---")
	with st.container():
	st.markdown("## 🎬 Expert Analysis")
	st.markdown(f'<div class="response-box">{answer}</div>', unsafe_allow_html=True)

	st.markdown("## 📚 Reference Materials")
	for i, ctx in enumerate(contexts, 1):
	with st.expander(f"Source {i}", expanded=(i==1)):
	st.markdown(f'<div class="movie-card">{ctx}</div>', unsafe_allow_html=True)
	else:
	st.warning("Please enter a movie-related question")

	if __name__ == "__main__":
	main()