import streamlit as st import pandas as pd import faiss import numpy as np from datasets import load_dataset from sentence_transformers import SentenceTransformer from groq import Groq import os # -------------------------- # Configuration & Styling # -------------------------- st.set_page_config( page_title="CineMaster AI - Movie Expert", page_icon="🎬", layout="wide", initial_sidebar_state="expanded" ) st.markdown(""" """, unsafe_allow_html=True) # -------------------------- # Data Loading & Processing # -------------------------- @st.cache_resource def load_movie_data(): try: # Try loading wiki_movies dataset dataset = load_dataset("wikipedia", "20220301.en", split="train[:5000]") df = pd.DataFrame(dataset) # Create synthetic movie data from Wikipedia snippets df['title'] = df['title'].apply(lambda x: x.replace("_", " ")) df['context'] = "Title: " + df['title'] + "\nContent: " + df['text'].str[:500] + "..." return df.sample(1000) # Return random 1000 entries except Exception as e: st.warning(f"Couldn't load dataset: {str(e)}. Using synthetic data.") movies = [ { "title": "The Dark Knight", "context": "Title: The Dark Knight\nPlot: Batman faces the Joker in a battle for Gotham's soul...\nCast: Christian Bale, Heath Ledger\nYear: 2008\nDirector: Christopher Nolan" }, { "title": "Inception", "context": "Title: Inception\nPlot: A thief who enters the dreams of others...\nCast: Leonardo DiCaprio, Tom Hardy\nYear: 2010\nDirector: Christopher Nolan" }, { "title": "Pulp Fiction", "context": "Title: Pulp Fiction\nPlot: The lives of two mob hitmen, a boxer, and a gangster's wife intertwine...\nCast: John Travolta, Samuel L. Jackson\nYear: 1994\nDirector: Quentin Tarantino" } ] return pd.DataFrame(movies) @st.cache_resource def setup_retrieval(df): embedder = SentenceTransformer('all-MiniLM-L6-v2') embeddings = embedder.encode(df['context'].tolist()) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(embeddings) return embedder, index # -------------------------- # Groq API Functions # -------------------------- def get_groq_response(query, context): try: client = Groq(api_key=os.getenv("GROQ_API_KEY", "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")) prompt = f"""You are a film expert analyzing this question: Question: {query} Using these verified sources: {context} Provide a detailed response with: 1. 🎬 Direct Answer 2. 📖 Explanation 3. 🎥 Relevant Scenes 4. 🏆 Awards/Trivia (if available) """ response = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-70b-8192", temperature=0.3 ) return response.choices[0].message.content except Exception as e: return f"Error getting response: {str(e)}" # -------------------------- # Main Application # -------------------------- def main(): # Load data and models df = load_movie_data() embedder, index = setup_retrieval(df) # Header Section st.markdown("""

🎞️ CineMaster AI

Your Personal Movie Encyclopedia

""", unsafe_allow_html=True) # Sidebar with st.sidebar: st.image("https://cdn-icons-png.flaticon.com/512/2598/2598702.png", width=120) st.subheader("Sample Questions") examples = [ "Who played the Joker in The Dark Knight?", "Explain the ending of Inception", "List Tarantino's movies", "What's the plot of Pulp Fiction?", "Who directed The Dark Knight?" ] for ex in examples: st.code(ex, language="bash") st.markdown("---") st.markdown("**Database Info**") st.write(f"📊 {len(df)} movies loaded") st.write("🔍 Using FAISS for vector search") st.write("🤖 Powered by Llama 3 70B") # Main Interface query = st.text_input("🎯 Ask any movie question:", placeholder="e.g., 'Who played the villain in The Dark Knight?'") if st.button("🚀 Get Expert Analysis", type="primary"): if query: with st.spinner("🔍 Searching through movie database..."): query_embed = embedder.encode([query]) _, indices = index.search(query_embed, 3) contexts = [df.iloc[i]['context'] for i in indices[0]] combined_context = "\n\n---\n\n".join(contexts) with st.spinner("🎥 Generating cinematic insights..."): answer = get_groq_response(query, combined_context) st.markdown("---") with st.container(): st.markdown("## 🎬 Expert Analysis") st.markdown(f'
{answer}
', unsafe_allow_html=True) st.markdown("## 📚 Reference Materials") for i, ctx in enumerate(contexts, 1): with st.expander(f"Source {i}", expanded=(i==1)): st.markdown(f'
{ctx}
', unsafe_allow_html=True) else: st.warning("Please enter a movie-related question") if __name__ == "__main__": main()