Spaces:
Build error
Build error
File size: 7,470 Bytes
6779194 c04a9f1 943c180 a2eab18 943c180 6bbfa30 943c180 6bbfa30 c04a9f1 943c180 c04a9f1 6bbfa30 c04a9f1 943c180 c04a9f1 943c180 bfc6129 943c180 bfc6129 943c180 bfc6129 943c180 6bbfa30 943c180 6bbfa30 943c180 6bbfa30 943c180 6bbfa30 943c180 6bbfa30 c04a9f1 6bbfa30 c04a9f1 6bbfa30 74ccf3a c04a9f1 6bbfa30 c04a9f1 943c180 c04a9f1 6bbfa30 943c180 c04a9f1 6bbfa30 c04a9f1 6bbfa30 c04a9f1 943c180 4860836 c04a9f1 943c180 c04a9f1 6bbfa30 c04a9f1 943c180 a2eab18 943c180 c04a9f1 6bbfa30 c04a9f1 6bbfa30 c04a9f1 943c180 c04a9f1 6bbfa30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
import os
import logging
import google.generativeai as genai
import spacy
import nltk
import gradio as gr
import pandas as pd
import re
from fuzzywuzzy import fuzz
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import fitz
from typing import List, Dict, Set
import docx
import tempfile
from pathlib import Path
class ResumeAnalyzer:
def __init__(self):
self._initialize_logging()
self._initialize_nltk()
self._initialize_spacy()
self._setup_api_key()
def _initialize_logging(self):
self.logger = logging.getLogger(__name__)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
def _initialize_nltk(self) -> None:
try:
nltk.data.path.append(os.getcwd())
for resource in ['punkt', 'stopwords', 'wordnet']:
try:
nltk.data.find(f'tokenizers/{resource}')
except LookupError:
nltk.download(resource, quiet=True)
self.stop_words = set(stopwords.words('english'))
self.lemmatizer = WordNetLemmatizer()
except Exception as e:
self.logger.error(f"Failed to initialize NLTK: {str(e)}")
raise
def _initialize_spacy(self) -> None:
try:
self.nlp = spacy.load("en_core_web_sm")
except OSError:
self.logger.info("Downloading spaCy model...")
import subprocess
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
self.nlp = spacy.load("en_core_web_sm")
def _setup_api_key(self) -> None:
try:
self.google_api_key = os.environ.get("GOOGLE_API_KEY")
if not self.google_api_key:
raise ValueError("GOOGLE_API_KEY not found in environment variables")
genai.configure(api_key=self.google_api_key)
except Exception as e:
self.logger.error(f"Failed to setup API key: {str(e)}")
raise
def extract_text_from_pdf(self, file_path: str) -> str:
try:
with fitz.open(file_path) as doc:
text = " ".join(page.get_text() for page in doc)
return text
except Exception as e:
self.logger.error(f"Error extracting text from PDF: {str(e)}")
return ""
def extract_text_from_docx(self, file_path: str) -> str:
try:
doc = docx.Document(file_path)
return "\n".join(para.text for para in doc.paragraphs)
except Exception as e:
self.logger.error(f"Error extracting text from DOCX: {str(e)}")
return ""
def preprocess_text(self, text: str) -> str:
try:
text = text.lower()
text = re.sub(r'\s+', ' ', text)
text = re.sub(r'[^a-z0-9\s]', '', text)
tokens = word_tokenize(text)
tokens = [self.lemmatizer.lemmatize(word)
for word in tokens
if word not in self.stop_words]
return " ".join(tokens)
except Exception as e:
self.logger.error(f"Error in text preprocessing: {str(e)}")
return text
def extract_named_entities(self, text: str) -> Set[str]:
try:
doc = self.nlp(text[:100000])
return {ent.text.lower() for ent in doc.ents}
except Exception as e:
self.logger.error(f"Error in named entity extraction: {str(e)}")
return set()
def calculate_match_percentage(self, resume_text: str, job_desc_text: str) -> float:
try:
resume_text = self.preprocess_text(resume_text)
job_desc_text = self.preprocess_text(job_desc_text)
return fuzz.token_set_ratio(resume_text, job_desc_text)
except Exception as e:
self.logger.error(f"Error calculating match percentage: {str(e)}")
return 0.0
def gemini_analysis(self, text: str) -> str:
try:
model = genai.GenerativeModel('gemini-pro')
prompt = f"""Analyze this resume text and provide a brief summary of key skills and experience:
{text[:1000]}..."""
response = model.generate_content(prompt)
return response.text
except Exception as e:
self.logger.error(f"Error in Gemini analysis: {str(e)}")
return "AI analysis failed"
def process_file(self, file_path: str, job_desc: str) -> dict:
try:
# Extract text based on file type
if file_path.lower().endswith('.pdf'):
text = self.extract_text_from_pdf(file_path)
elif file_path.lower().endswith('.docx'):
text = self.extract_text_from_docx(file_path)
else:
return {"Resume": Path(file_path).name, "Match Percentage": "Invalid File Type"}
if not text.strip():
return {"Resume": Path(file_path).name, "Match Percentage": "No text extracted"}
entities = self.extract_named_entities(text)
job_entities = self.extract_named_entities(job_desc)
entity_match = (
len(entities.intersection(job_entities)) / len(job_entities) * 100
if job_entities else 0
)
match_percentage = self.calculate_match_percentage(text, job_desc)
gemini_analysis = self.gemini_analysis(text)
return {
"Resume": Path(file_path).name,
"Match Percentage": round(match_percentage, 2),
"Entity Match (%)": round(entity_match, 2),
"AI Analysis": gemini_analysis
}
except Exception as e:
self.logger.error(f"Error processing file {file_path}: {str(e)}")
return {"Resume": Path(file_path).name, "Error": str(e)}
def process_uploaded_resumes(self, resume_files: List[str], job_desc: str) -> pd.DataFrame:
if not resume_files:
return pd.DataFrame({"Message": ["Please upload at least one resume."]})
if not job_desc.strip():
return pd.DataFrame({"Message": ["Please provide a job description."]})
results = []
for file_path in resume_files:
result = self.process_file(file_path, job_desc)
results.append(result)
return pd.DataFrame(results)
# Create the Gradio interface
analyzer = ResumeAnalyzer()
interface = gr.Interface(
fn=analyzer.process_uploaded_resumes,
inputs=[
gr.File(
label="Upload Resumes (PDF or DOCX)",
file_types=[".pdf", ".docx"],
multiple=True
),
gr.Textbox(
label="Job Description",
placeholder="Paste the job description here...",
lines=6
)
],
outputs=gr.DataFrame(label="Analysis Results"),
title="AI Resume Analyzer",
description="""
Upload resumes (PDF or DOCX) and provide a job description to see how well they match.
The analysis includes:
- Overall match percentage
- Key skills and experience matching
- AI-powered resume analysis
""",
allow_flagging="never",
theme=gr.themes.Soft()
)
if __name__ == "__main__":
interface.launch() |