Spaces:
Build error
Build error
| import os | |
| import logging | |
| import google.generativeai as genai | |
| import spacy | |
| import nltk | |
| import gradio as gr | |
| import pandas as pd | |
| import re | |
| from fuzzywuzzy import fuzz | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| from nltk.stem import WordNetLemmatizer | |
| import fitz | |
| from typing import List, Dict, Set | |
| import docx | |
| import tempfile | |
| from pathlib import Path | |
| class ResumeAnalyzer: | |
| def __init__(self): | |
| self._initialize_logging() | |
| self._initialize_nltk() | |
| self._initialize_spacy() | |
| self._setup_api_key() | |
| def _initialize_logging(self): | |
| self.logger = logging.getLogger(__name__) | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s' | |
| ) | |
| def _initialize_nltk(self) -> None: | |
| try: | |
| nltk.data.path.append(os.getcwd()) | |
| for resource in ['punkt', 'stopwords', 'wordnet']: | |
| try: | |
| nltk.data.find(f'tokenizers/{resource}') | |
| except LookupError: | |
| nltk.download(resource, quiet=True) | |
| self.stop_words = set(stopwords.words('english')) | |
| self.lemmatizer = WordNetLemmatizer() | |
| except Exception as e: | |
| self.logger.error(f"Failed to initialize NLTK: {str(e)}") | |
| raise | |
| def _initialize_spacy(self) -> None: | |
| try: | |
| self.nlp = spacy.load("en_core_web_sm") | |
| except OSError: | |
| self.logger.info("Downloading spaCy model...") | |
| import subprocess | |
| subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True) | |
| self.nlp = spacy.load("en_core_web_sm") | |
| def _setup_api_key(self) -> None: | |
| try: | |
| self.google_api_key = os.environ.get("GOOGLE_API_KEY") | |
| if not self.google_api_key: | |
| raise ValueError("GOOGLE_API_KEY not found in environment variables") | |
| genai.configure(api_key=self.google_api_key) | |
| except Exception as e: | |
| self.logger.error(f"Failed to setup API key: {str(e)}") | |
| raise | |
| def extract_text_from_pdf(self, file_path: str) -> str: | |
| try: | |
| with fitz.open(file_path) as doc: | |
| text = " ".join(page.get_text() for page in doc) | |
| return text | |
| except Exception as e: | |
| self.logger.error(f"Error extracting text from PDF: {str(e)}") | |
| return "" | |
| def extract_text_from_docx(self, file_path: str) -> str: | |
| try: | |
| doc = docx.Document(file_path) | |
| return "\n".join(para.text for para in doc.paragraphs) | |
| except Exception as e: | |
| self.logger.error(f"Error extracting text from DOCX: {str(e)}") | |
| return "" | |
| def preprocess_text(self, text: str) -> str: | |
| try: | |
| text = text.lower() | |
| text = re.sub(r'\s+', ' ', text) | |
| text = re.sub(r'[^a-z0-9\s]', '', text) | |
| tokens = word_tokenize(text) | |
| tokens = [self.lemmatizer.lemmatize(word) | |
| for word in tokens | |
| if word not in self.stop_words] | |
| return " ".join(tokens) | |
| except Exception as e: | |
| self.logger.error(f"Error in text preprocessing: {str(e)}") | |
| return text | |
| def extract_named_entities(self, text: str) -> Set[str]: | |
| try: | |
| doc = self.nlp(text[:100000]) | |
| return {ent.text.lower() for ent in doc.ents} | |
| except Exception as e: | |
| self.logger.error(f"Error in named entity extraction: {str(e)}") | |
| return set() | |
| def calculate_match_percentage(self, resume_text: str, job_desc_text: str) -> float: | |
| try: | |
| resume_text = self.preprocess_text(resume_text) | |
| job_desc_text = self.preprocess_text(job_desc_text) | |
| return fuzz.token_set_ratio(resume_text, job_desc_text) | |
| except Exception as e: | |
| self.logger.error(f"Error calculating match percentage: {str(e)}") | |
| return 0.0 | |
| def gemini_analysis(self, text: str) -> str: | |
| try: | |
| model = genai.GenerativeModel('gemini-pro') | |
| prompt = f"""Analyze this resume text and provide a brief summary of key skills and experience: | |
| {text[:1000]}...""" | |
| response = model.generate_content(prompt) | |
| return response.text | |
| except Exception as e: | |
| self.logger.error(f"Error in Gemini analysis: {str(e)}") | |
| return "AI analysis failed" | |
| def process_file(self, file_path: str, job_desc: str) -> dict: | |
| try: | |
| # Extract text based on file type | |
| if file_path.lower().endswith('.pdf'): | |
| text = self.extract_text_from_pdf(file_path) | |
| elif file_path.lower().endswith('.docx'): | |
| text = self.extract_text_from_docx(file_path) | |
| else: | |
| return {"Resume": Path(file_path).name, "Match Percentage": "Invalid File Type"} | |
| if not text.strip(): | |
| return {"Resume": Path(file_path).name, "Match Percentage": "No text extracted"} | |
| entities = self.extract_named_entities(text) | |
| job_entities = self.extract_named_entities(job_desc) | |
| entity_match = ( | |
| len(entities.intersection(job_entities)) / len(job_entities) * 100 | |
| if job_entities else 0 | |
| ) | |
| match_percentage = self.calculate_match_percentage(text, job_desc) | |
| gemini_analysis = self.gemini_analysis(text) | |
| return { | |
| "Resume": Path(file_path).name, | |
| "Match Percentage": round(match_percentage, 2), | |
| "Entity Match (%)": round(entity_match, 2), | |
| "AI Analysis": gemini_analysis | |
| } | |
| except Exception as e: | |
| self.logger.error(f"Error processing file {file_path}: {str(e)}") | |
| return {"Resume": Path(file_path).name, "Error": str(e)} | |
| def process_uploaded_resumes(self, resume_files: List[str], job_desc: str) -> pd.DataFrame: | |
| if not resume_files: | |
| return pd.DataFrame({"Message": ["Please upload at least one resume."]}) | |
| if not job_desc.strip(): | |
| return pd.DataFrame({"Message": ["Please provide a job description."]}) | |
| results = [] | |
| for file_path in resume_files: | |
| result = self.process_file(file_path, job_desc) | |
| results.append(result) | |
| return pd.DataFrame(results) | |
| # Create the Gradio interface | |
| analyzer = ResumeAnalyzer() | |
| interface = gr.Interface( | |
| fn=analyzer.process_uploaded_resumes, | |
| inputs=[ | |
| gr.File( | |
| label="Upload Resumes (PDF or DOCX)", | |
| file_types=[".pdf", ".docx"], | |
| multiple=True | |
| ), | |
| gr.Textbox( | |
| label="Job Description", | |
| placeholder="Paste the job description here...", | |
| lines=6 | |
| ) | |
| ], | |
| outputs=gr.DataFrame(label="Analysis Results"), | |
| title="AI Resume Analyzer", | |
| description=""" | |
| Upload resumes (PDF or DOCX) and provide a job description to see how well they match. | |
| The analysis includes: | |
| - Overall match percentage | |
| - Key skills and experience matching | |
| - AI-powered resume analysis | |
| """, | |
| allow_flagging="never", | |
| theme=gr.themes.Soft() | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch() |