Spaces:

noddysnots
/

Gift-Recommender

Paused

App Files Files Community

noddysnots commited on Jan 31

Commit

e7b9fde

verified ·

1 Parent(s): 5e6436a

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -224

app.py CHANGED Viewed

@@ -1,249 +1,207 @@
 from typing import Dict, List, Optional
-import aiohttp
-import asyncio
-from bs4 import BeautifulSoup
-from transformers import pipeline
-import pandas as pd
-from datetime import datetime
 import json
-import sqlite3
-import re
-import urllib.parse
-class ProductKnowledgeBase:
-    def __init__(self, db_path: str = "product_knowledge.db"):
-        self.db_path = db_path
-        self.setup_database()
-    def setup_database(self):
-        """Initialize the SQLite database with required tables"""
-        conn = sqlite3.connect(self.db_path)
-        cursor = conn.cursor()
-        # Create products table
-        cursor.execute("""
-        CREATE TABLE IF NOT EXISTS products (
-            id INTEGER PRIMARY KEY,
-            name TEXT NOT NULL,
-            category TEXT NOT NULL,
-            subcategory TEXT,
-            features TEXT,
-            target_audience TEXT,
-            price_range TEXT,
-            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-        )
-        """)
-        # Create price history table
-        cursor.execute("""
-        CREATE TABLE IF NOT EXISTS price_history (
-            id INTEGER PRIMARY KEY,
-            product_id INTEGER,
-            platform TEXT NOT NULL,
-            price REAL NOT NULL,
-            timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-            FOREIGN KEY (product_id) REFERENCES products (id)
         )
-        """)
-        # Create recommendations table for feedback
-        cursor.execute("""
-        CREATE TABLE IF NOT EXISTS recommendations (
-            id INTEGER PRIMARY KEY,
-            user_input TEXT NOT NULL,
-            product_id INTEGER,
-            success_rating INTEGER,
-            timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-            FOREIGN KEY (product_id) REFERENCES products (id)
         )
-        """)
-        conn.commit()
-        conn.close()
-class PriceFetcher:
-    def __init__(self):
-        self.headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
-        }
-    async def fetch_price(self, url: str) -> Optional[float]:
-        """Fetch price from a given URL"""
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.get(url, headers=self.headers) as response:
-                    if response.status == 200:
-                        html = await response.text()
-                        return self._extract_price(html)
-            return None
-        except Exception as e:
-            print(f"Error fetching price: {str(e)}")
-            return None
-    def _extract_price(self, html: str) -> Optional[float]:
-        """Extract price from HTML content"""
-        soup = BeautifulSoup(html, 'html.parser')
-        # Add platform-specific price extraction logic here
-        return None
-class RecommendationEngine:
-    def __init__(self, knowledge_base: ProductKnowledgeBase, price_fetcher: PriceFetcher):
-        self.kb = knowledge_base
-        self.price_fetcher = price_fetcher
-        self.nlp = pipeline("text-generation", model="gpt2", device_map="auto")
-    def analyze_user_input(self, text: str) -> Dict:
-        """Analyze user input for context and requirements"""
-        # Extract age if mentioned
-        age_match = re.search(r'age\s+(?:is\s+)?(\d+)', text.lower())
-        age = age_match.group(1) if age_match else None
-        # Extract budget if mentioned
-        budget_match = re.search(r'(?:budget|cost|price)\s*(?:is|:)?\s*(?:rs|₹)?\s*(\d+)', text.lower())
-        budget = budget_match.group(1) if budget_match else None
-        # Generate categories and context
-        prompt = f"Extract gift categories and context from: {text}\nFormat: category1, category2 | context"
-        response = self.nlp(prompt, max_new_tokens=50)[0]['generated_text']
-        categories, context = response.split('|')
-        categories = [c.strip() for c in categories.split(',')]
         return {
-            "age": age,
-            "budget": budget,
-            "categories": categories,
-            "context": context.strip()
         }
-    def find_matching_products(self, analysis: Dict) -> List[Dict]:
-        """Find products matching the analysis"""
-        conn = sqlite3.connect(self.kb.db_path)
-        cursor = conn.cursor()
-        query = """
-        SELECT p.*, GROUP_CONCAT(DISTINCT ph.price) as prices
-        FROM products p
-        LEFT JOIN price_history ph ON p.id = ph.product_id
-        WHERE p.category IN ({})
-        GROUP BY p.id
-        """.format(','.join('?' * len(analysis['categories'])))
-        cursor.execute(query, analysis['categories'])
-        products = cursor.fetchall()
-        conn.close()
-        return [self._format_product(p, analysis) for p in products]
-    def _format_product(self, product_data: tuple, analysis: Dict) -> Dict:
-        """Format product data with explanation"""
-        return {
-            "name": product_data[1],
-            "category": product_data[2],
-            "features": json.loads(product_data[4]),
-            "why_recommended": self._generate_explanation(product_data, analysis),
-            "price_info": self._process_price_info(product_data[-1]),
-            "target_audience": json.loads(product_data[5])
-        }
-    def _generate_explanation(self, product_data: tuple, analysis: Dict) -> str:
-        """Generate personalized explanation for recommendation"""
-        prompt = f"""
-        Product: {product_data[1]}
-        Category: {product_data[2]}
-        User Context: {analysis['context']}
-        Generate a brief explanation why this product is recommended:
-        """
-        response = self.nlp(prompt, max_new_tokens=100)[0]['generated_text']
-        return response.split('Generate a brief explanation why this product is recommended:')[-1].strip()
-    def _process_price_info(self, prices: str) -> Dict:
-        """Process and format price information"""
-        if not prices:
-            return {"min": None, "max": None, "average": None}
-        price_list = [float(p) for p in prices.split(',')]
-        return {
-            "min": min(price_list),
-            "max": max(price_list),
-            "average": sum(price_list) / len(price_list)
-        }
-class GiftRecommenderAPI:
     def __init__(self):
-        self.kb = ProductKnowledgeBase()
-        self.price_fetcher = PriceFetcher()
-        self.engine = RecommendationEngine(self.kb, self.price_fetcher)
-    async def get_recommendations(self, text: str) -> Dict:
-        """Main method to get gift recommendations"""
-        try:
-            # Analyze user input
-            analysis = self.engine.analyze_user_input(text)
-            # Find matching products
-            recommendations = self.engine.find_matching_products(analysis)
-            # Fetch current prices
-            for rec in recommendations:
-                current_prices = await self._fetch_current_prices(rec['name'])
-                rec['current_prices'] = current_prices
-            return {
-                "analysis": analysis,
-                "recommendations": recommendations
-            }
-        except Exception as e:
-            return {"error": f"An error occurred: {str(e)}"}
-    async def _fetch_current_prices(self, product_name: str) -> Dict:
-        """Fetch current prices from various platforms"""
-        encoded_name = urllib.parse.quote(product_name)
-        urls = {
-            "amazon": f"https://www.amazon.in/s?k={encoded_name}",
-            "flipkart": f"https://www.flipkart.com/search?q={encoded_name}",
-            "igp": f"https://www.igp.com/search?q={encoded_name}"
-        }
-        prices = {}
-        for platform, url in urls.items():
-            price = await self.price_fetcher.fetch_price(url)
-            if price:
-                prices[platform] = price
-        return prices
-# Create Gradio interface
-import gradio as gr
-def create_gradio_interface():
-    recommender = GiftRecommenderAPI()
-    def recommend(text: str) -> Dict:
-        return asyncio.run(recommender.get_recommendations(text))
-    demo = gr.Interface(
-        fn=recommend,
-        inputs=gr.Textbox(
-            lines=3,
-            placeholder="Describe who you're buying a gift for (age, interests, occasion, etc.)"
-        ),
-        outputs=gr.JSON(),
-        title="🎁 Smart Gift Recommender",
-        description="Get personalized gift suggestions with real-time prices and explanations!",
-        examples=[
-            ["need a fifa latest game of EA"],
-            ["a small kid of age 3 want him to have something like toy that teaches alphabets"],
-            ["Looking for a gift for my mom who enjoys gardening and cooking"]
-        ]
-    )
-    return demo
-if __name__ == "__main__":
-    demo = create_gradio_interface()
-    demo.launch()

+from transformers import (
+    AutoModelForSequenceClassification,
+    AutoTokenizer,
+    AutoModelForTokenClassification,
+    TrainingArguments,
+    Trainer
+)
+from sentence_transformers import SentenceTransformer
+from datasets import Dataset
+import torch
+import numpy as np
 from typing import Dict, List, Optional
 import json
+class MultiModelAnalyzer:
+    def __init__(self):
+        # Initialize different models for different tasks
+        # 1. Category Understanding Model
+        self.category_model = AutoModelForSequenceClassification.from_pretrained(
+            "EMBEDDIA/sloberta-commerce"
         )
+        self.category_tokenizer = AutoTokenizer.from_pretrained(
+            "EMBEDDIA/sloberta-commerce"
         )
+        # 2. Semantic Understanding Model
+        self.semantic_model = SentenceTransformer('all-mpnet-base-v2')
+        # 3. Feature Extraction Model
+        self.feature_model = AutoModelForTokenClassification.from_pretrained(
+            "bert-base-multilingual-uncased"
+        )
+        self.feature_tokenizer = AutoTokenizer.from_pretrained(
+            "bert-base-multilingual-uncased"
+        )
+    def analyze_text(self, text: str) -> Dict:
+        """Combine analysis from all models"""
+        # Get category prediction
+        category = self._predict_category(text)
+        # Get semantic embedding
+        embedding = self._get_semantic_embedding(text)
+        # Extract features
+        features = self._extract_features(text)
         return {
+            "category": category,
+            "embedding": embedding,
+            "features": features
         }
+    def _predict_category(self, text: str) -> str:
+        """Predict product category"""
+        inputs = self.category_tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            max_length=512
+        )
+        outputs = self.category_model(**inputs)
+        predictions = torch.nn.functional.softmax(outputs.logits, dim=1)
+        return predictions.argmax().item()
+    def _get_semantic_embedding(self, text: str) -> np.ndarray:
+        """Get semantic embedding of text"""
+        return self.semantic_model.encode(text)
+    def _extract_features(self, text: str) -> List[str]:
+        """Extract relevant features from text"""
+        inputs = self.feature_tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            max_length=512
+        )
+        outputs = self.feature_model(**inputs)
+        predictions = outputs.logits.argmax(dim=2)
+        return self._convert_predictions_to_features(predictions, inputs)
+class ModelTrainer:
+    def __init__(self, model_analyzer: MultiModelAnalyzer):
+        self.analyzer = model_analyzer
+    def prepare_training_data(self, product_data: List[Dict]) -> Dataset:
+        """Prepare data for fine-tuning"""
+        training_data = []
+        for product in product_data:
+            # Format data for training
+            item = {
+                "text": product["description"],
+                "category": product["category"],
+                "features": product["features"],
+                "price": product["price"]
+            }
+            training_data.append(item)
+        return Dataset.from_list(training_data)
+    def fine_tune_category_model(self, training_data: Dataset):
+        """Fine-tune the category prediction model"""
+        training_args = TrainingArguments(
+            output_dir="./results",
+            num_train_epochs=3,
+            per_device_train_batch_size=8,
+            per_device_eval_batch_size=8,
+            warmup_steps=500,
+            weight_decay=0.01,
+            logging_dir="./logs",
+            logging_steps=10,
+        )
+        trainer = Trainer(
+            model=self.analyzer.category_model,
+            args=training_args,
+            train_dataset=training_data,
+            tokenizer=self.analyzer.category_tokenizer
+        )
+        trainer.train()
+    def fine_tune_feature_model(self, training_data: Dataset):
+        """Fine-tune the feature extraction model"""
+        training_args = TrainingArguments(
+            output_dir="./results_feature",
+            num_train_epochs=3,
+            per_device_train_batch_size=8,
+            per_device_eval_batch_size=8,
+            warmup_steps=500,
+            weight_decay=0.01,
+            logging_dir="./logs_feature",
+            logging_steps=10,
+        )
+        trainer = Trainer(
+            model=self.analyzer.feature_model,
+            args=training_args,
+            train_dataset=training_data,
+            tokenizer=self.analyzer.feature_tokenizer
+        )
+        trainer.train()
+class ProductRecommender:
     def __init__(self):
+        self.model_analyzer = MultiModelAnalyzer()
+        self.trainer = ModelTrainer(self.model_analyzer)
+    def train_on_product_data(self, product_data: List[Dict]):
+        """Train models on product data"""
+        # Prepare training data
+        training_dataset = self.trainer.prepare_training_data(product_data)
+        # Fine-tune models
+        self.trainer.fine_tune_category_model(training_dataset)
+        self.trainer.fine_tune_feature_model(training_dataset)
+    def get_recommendations(self, query: str, product_database: List[Dict]) -> List[Dict]:
+        """Get product recommendations"""
+        # Analyze query
+        query_analysis = self.model_analyzer.analyze_text(query)
+        # Find matching products
+        matches = []
+        for product in product_database:
+            product_analysis = self.model_analyzer.analyze_text(product['description'])
+            # Calculate similarity score
+            similarity = self._calculate_similarity(
+                query_analysis,
+                product_analysis
+            )
+            matches.append({
+                "product": product,
+                "similarity": similarity
+            })
+        # Sort by similarity
+        matches.sort(key=lambda x: x['similarity'], reverse=True)
+        # Return top 5 matches
+        return [match['product'] for match in matches[:5]]
+    def _calculate_similarity(self, query_analysis: Dict, product_analysis: Dict) -> float:
+        """Calculate similarity between query and product"""
+        # Combine multiple similarity factors
+        category_match = query_analysis['category'] == product_analysis['category']
+        embedding_similarity = np.dot(
+            query_analysis['embedding'],
+            product_analysis['embedding']
+        )
+        feature_overlap = len(
+            set(query_analysis['features']) & set(product_analysis['features'])
+        )
+        # Weight and combine scores
+        total_score = (
+            0.4 * category_match +
+            0.4 * embedding_similarity +
+            0.2 * feature_overlap
+        )
+        return total_score