import streamlit as st import pandas as pd import faiss import numpy as np from datasets import load_dataset from sentence_transformers import SentenceTransformer from groq import Groq import os # -------------------------- # Configuration & Styling # -------------------------- st.set_page_config( page_title="CineMaster AI - Movie Expert", page_icon="🎬", layout="wide", initial_sidebar_state="expanded" ) st.markdown(""" """, unsafe_allow_html=True) # -------------------------- # Data Loading & Processing # -------------------------- @st.cache_resource def load_movie_data(): try: # Try loading wiki_movies dataset dataset = load_dataset("wikipedia", "20220301.en", split="train[:5000]") df = pd.DataFrame(dataset) # Create synthetic movie data from Wikipedia snippets df['title'] = df['title'].apply(lambda x: x.replace("_", " ")) df['context'] = "Title: " + df['title'] + "\nContent: " + df['text'].str[:500] + "..." return df.sample(1000) # Return random 1000 entries except Exception as e: st.warning(f"Couldn't load dataset: {str(e)}. Using synthetic data.") movies = [ { "title": "The Dark Knight", "context": "Title: The Dark Knight\nPlot: Batman faces the Joker in a battle for Gotham's soul...\nCast: Christian Bale, Heath Ledger\nYear: 2008\nDirector: Christopher Nolan" }, { "title": "Inception", "context": "Title: Inception\nPlot: A thief who enters the dreams of others...\nCast: Leonardo DiCaprio, Tom Hardy\nYear: 2010\nDirector: Christopher Nolan" }, { "title": "Pulp Fiction", "context": "Title: Pulp Fiction\nPlot: The lives of two mob hitmen, a boxer, and a gangster's wife intertwine...\nCast: John Travolta, Samuel L. Jackson\nYear: 1994\nDirector: Quentin Tarantino" } ] return pd.DataFrame(movies) @st.cache_resource def setup_retrieval(df): embedder = SentenceTransformer('all-MiniLM-L6-v2') embeddings = embedder.encode(df['context'].tolist()) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(embeddings) return embedder, index # -------------------------- # Groq API Functions # -------------------------- def get_groq_response(query, context): try: client = Groq(api_key=os.getenv("GROQ_API_KEY", "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")) prompt = f"""You are a film expert analyzing this question: Question: {query} Using these verified sources: {context} Provide a detailed response with: 1. 🎬 Direct Answer 2. 📖 Explanation 3. 🎥 Relevant Scenes 4. 🏆 Awards/Trivia (if available) """ response = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-70b-8192", temperature=0.3 ) return response.choices[0].message.content except Exception as e: return f"Error getting response: {str(e)}" # -------------------------- # Main Application # -------------------------- def main(): # Load data and models df = load_movie_data() embedder, index = setup_retrieval(df) # Header Section st.markdown("""