Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import faiss | |
| import numpy as np | |
| from datasets import load_dataset | |
| from sentence_transformers import SentenceTransformer | |
| from groq import Groq | |
| import os | |
| # -------------------------- | |
| # Configuration & Styling | |
| # -------------------------- | |
| st.set_page_config( | |
| page_title="CineMaster AI - Movie Expert", | |
| page_icon="π¬", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| st.markdown(""" | |
| <style> | |
| :root { | |
| --primary: #7017ff; | |
| --secondary: #ff2d55; | |
| } | |
| .header { | |
| background: linear-gradient(135deg, var(--primary), var(--secondary)); | |
| color: white; | |
| padding: 2rem; | |
| border-radius: 15px; | |
| text-align: center; | |
| box-shadow: 0 4px 6px rgba(0,0,0,0.1); | |
| margin-bottom: 2rem; | |
| } | |
| .response-box { | |
| background: rgba(255,255,255,0.1); | |
| border-radius: 10px; | |
| padding: 1.5rem; | |
| margin: 1rem 0; | |
| border: 1px solid rgba(255,255,255,0.2); | |
| } | |
| .stButton>button { | |
| background: linear-gradient(45deg, var(--primary), var(--secondary)) !important; | |
| color: white !important; | |
| border-radius: 25px; | |
| padding: 0.8rem 2rem; | |
| font-weight: 600; | |
| transition: transform 0.2s; | |
| } | |
| .stButton>button:hover { | |
| transform: scale(1.05); | |
| } | |
| .movie-card { | |
| background: rgba(0,0,0,0.2); | |
| border-radius: 10px; | |
| padding: 1rem; | |
| margin: 0.5rem 0; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # -------------------------- | |
| # Data Loading & Processing | |
| # -------------------------- | |
| def load_movie_data(): | |
| try: | |
| # Try loading wiki_movies dataset | |
| dataset = load_dataset("wikipedia", "20220301.en", split="train[:5000]") | |
| df = pd.DataFrame(dataset) | |
| # Create synthetic movie data from Wikipedia snippets | |
| df['title'] = df['title'].apply(lambda x: x.replace("_", " ")) | |
| df['context'] = "Title: " + df['title'] + "\nContent: " + df['text'].str[:500] + "..." | |
| return df.sample(1000) # Return random 1000 entries | |
| except Exception as e: | |
| st.warning(f"Couldn't load dataset: {str(e)}. Using synthetic data.") | |
| movies = [ | |
| { | |
| "title": "The Dark Knight", | |
| "context": "Title: The Dark Knight\nPlot: Batman faces the Joker in a battle for Gotham's soul...\nCast: Christian Bale, Heath Ledger\nYear: 2008\nDirector: Christopher Nolan" | |
| }, | |
| { | |
| "title": "Inception", | |
| "context": "Title: Inception\nPlot: A thief who enters the dreams of others...\nCast: Leonardo DiCaprio, Tom Hardy\nYear: 2010\nDirector: Christopher Nolan" | |
| }, | |
| { | |
| "title": "Pulp Fiction", | |
| "context": "Title: Pulp Fiction\nPlot: The lives of two mob hitmen, a boxer, and a gangster's wife intertwine...\nCast: John Travolta, Samuel L. Jackson\nYear: 1994\nDirector: Quentin Tarantino" | |
| } | |
| ] | |
| return pd.DataFrame(movies) | |
| def setup_retrieval(df): | |
| embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
| embeddings = embedder.encode(df['context'].tolist()) | |
| index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| index.add(embeddings) | |
| return embedder, index | |
| # -------------------------- | |
| # Groq API Functions | |
| # -------------------------- | |
| def get_groq_response(query, context): | |
| try: | |
| client = Groq(api_key=os.getenv("GROQ_API_KEY", "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")) | |
| prompt = f"""You are a film expert analyzing this question: | |
| Question: {query} | |
| Using these verified sources: | |
| {context} | |
| Provide a detailed response with: | |
| 1. π¬ Direct Answer | |
| 2. π Explanation | |
| 3. π₯ Relevant Scenes | |
| 4. π Awards/Trivia (if available) | |
| """ | |
| response = client.chat.completions.create( | |
| messages=[{"role": "user", "content": prompt}], | |
| model="llama3-70b-8192", | |
| temperature=0.3 | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"Error getting response: {str(e)}" | |
| # -------------------------- | |
| # Main Application | |
| # -------------------------- | |
| def main(): | |
| # Load data and models | |
| df = load_movie_data() | |
| embedder, index = setup_retrieval(df) | |
| # Header Section | |
| st.markdown(""" | |
| <div class="header"> | |
| <h1>ποΈ CineMaster AI</h1> | |
| <h3>Your Personal Movie Encyclopedia</h3> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Sidebar | |
| with st.sidebar: | |
| st.image("https://cdn-icons-png.flaticon.com/512/2598/2598702.png", width=120) | |
| st.subheader("Sample Questions") | |
| examples = [ | |
| "Who played the Joker in The Dark Knight?", | |
| "Explain the ending of Inception", | |
| "List Tarantino's movies", | |
| "What's the plot of Pulp Fiction?", | |
| "Who directed The Dark Knight?" | |
| ] | |
| for ex in examples: | |
| st.code(ex, language="bash") | |
| st.markdown("---") | |
| st.markdown("**Database Info**") | |
| st.write(f"π {len(df)} movies loaded") | |
| st.write("π Using FAISS for vector search") | |
| st.write("π€ Powered by Llama 3 70B") | |
| # Main Interface | |
| query = st.text_input("π― Ask any movie question:", | |
| placeholder="e.g., 'Who played the villain in The Dark Knight?'") | |
| if st.button("π Get Expert Analysis", type="primary"): | |
| if query: | |
| with st.spinner("π Searching through movie database..."): | |
| query_embed = embedder.encode([query]) | |
| _, indices = index.search(query_embed, 3) | |
| contexts = [df.iloc[i]['context'] for i in indices[0]] | |
| combined_context = "\n\n---\n\n".join(contexts) | |
| with st.spinner("π₯ Generating cinematic insights..."): | |
| answer = get_groq_response(query, combined_context) | |
| st.markdown("---") | |
| with st.container(): | |
| st.markdown("## π¬ Expert Analysis") | |
| st.markdown(f'<div class="response-box">{answer}</div>', unsafe_allow_html=True) | |
| st.markdown("## π Reference Materials") | |
| for i, ctx in enumerate(contexts, 1): | |
| with st.expander(f"Source {i}", expanded=(i==1)): | |
| st.markdown(f'<div class="movie-card">{ctx}</div>', unsafe_allow_html=True) | |
| else: | |
| st.warning("Please enter a movie-related question") | |
| if __name__ == "__main__": | |
| main() |