Spaces:

DreamStream-1
/

New-1

Build error

App Files Files Community

New-1 / app.py

DreamStream-1

Update app.py

6bbfa30 verified about 1 year ago

raw

history blame contribute delete

7.47 kB

	import os
	import logging
	import google.generativeai as genai
	import spacy
	import nltk
	import gradio as gr
	import pandas as pd
	import re
	from fuzzywuzzy import fuzz
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	from nltk.stem import WordNetLemmatizer
	import fitz
	from typing import List, Dict, Set
	import docx
	import tempfile
	from pathlib import Path

	class ResumeAnalyzer:
	def __init__(self):
	self._initialize_logging()
	self._initialize_nltk()
	self._initialize_spacy()
	self._setup_api_key()

	def _initialize_logging(self):
	self.logger = logging.getLogger(__name__)
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s'
	)

	def _initialize_nltk(self) -> None:
	try:
	nltk.data.path.append(os.getcwd())
	for resource in ['punkt', 'stopwords', 'wordnet']:
	try:
	nltk.data.find(f'tokenizers/{resource}')
	except LookupError:
	nltk.download(resource, quiet=True)
	self.stop_words = set(stopwords.words('english'))
	self.lemmatizer = WordNetLemmatizer()
	except Exception as e:
	self.logger.error(f"Failed to initialize NLTK: {str(e)}")
	raise

	def _initialize_spacy(self) -> None:
	try:
	self.nlp = spacy.load("en_core_web_sm")
	except OSError:
	self.logger.info("Downloading spaCy model...")
	import subprocess
	subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
	self.nlp = spacy.load("en_core_web_sm")

	def _setup_api_key(self) -> None:
	try:
	self.google_api_key = os.environ.get("GOOGLE_API_KEY")
	if not self.google_api_key:
	raise ValueError("GOOGLE_API_KEY not found in environment variables")
	genai.configure(api_key=self.google_api_key)
	except Exception as e:
	self.logger.error(f"Failed to setup API key: {str(e)}")
	raise

	def extract_text_from_pdf(self, file_path: str) -> str:
	try:
	with fitz.open(file_path) as doc:
	text = " ".join(page.get_text() for page in doc)
	return text
	except Exception as e:
	self.logger.error(f"Error extracting text from PDF: {str(e)}")
	return ""

	def extract_text_from_docx(self, file_path: str) -> str:
	try:
	doc = docx.Document(file_path)
	return "\n".join(para.text for para in doc.paragraphs)
	except Exception as e:
	self.logger.error(f"Error extracting text from DOCX: {str(e)}")
	return ""

	def preprocess_text(self, text: str) -> str:
	try:
	text = text.lower()
	text = re.sub(r'\s+', ' ', text)
	text = re.sub(r'[^a-z0-9\s]', '', text)
	tokens = word_tokenize(text)
	tokens = [self.lemmatizer.lemmatize(word)
	for word in tokens
	if word not in self.stop_words]
	return " ".join(tokens)
	except Exception as e:
	self.logger.error(f"Error in text preprocessing: {str(e)}")
	return text

	def extract_named_entities(self, text: str) -> Set[str]:
	try:
	doc = self.nlp(text[:100000])
	return {ent.text.lower() for ent in doc.ents}
	except Exception as e:
	self.logger.error(f"Error in named entity extraction: {str(e)}")
	return set()

	def calculate_match_percentage(self, resume_text: str, job_desc_text: str) -> float:
	try:
	resume_text = self.preprocess_text(resume_text)
	job_desc_text = self.preprocess_text(job_desc_text)
	return fuzz.token_set_ratio(resume_text, job_desc_text)
	except Exception as e:
	self.logger.error(f"Error calculating match percentage: {str(e)}")
	return 0.0

	def gemini_analysis(self, text: str) -> str:
	try:
	model = genai.GenerativeModel('gemini-pro')
	prompt = f"""Analyze this resume text and provide a brief summary of key skills and experience:
	{text[:1000]}..."""
	response = model.generate_content(prompt)
	return response.text
	except Exception as e:
	self.logger.error(f"Error in Gemini analysis: {str(e)}")
	return "AI analysis failed"

	def process_file(self, file_path: str, job_desc: str) -> dict:
	try:
	# Extract text based on file type
	if file_path.lower().endswith('.pdf'):
	text = self.extract_text_from_pdf(file_path)
	elif file_path.lower().endswith('.docx'):
	text = self.extract_text_from_docx(file_path)
	else:
	return {"Resume": Path(file_path).name, "Match Percentage": "Invalid File Type"}

	if not text.strip():
	return {"Resume": Path(file_path).name, "Match Percentage": "No text extracted"}

	entities = self.extract_named_entities(text)
	job_entities = self.extract_named_entities(job_desc)

	entity_match = (
	len(entities.intersection(job_entities)) / len(job_entities) * 100
	if job_entities else 0
	)

	match_percentage = self.calculate_match_percentage(text, job_desc)
	gemini_analysis = self.gemini_analysis(text)

	return {
	"Resume": Path(file_path).name,
	"Match Percentage": round(match_percentage, 2),
	"Entity Match (%)": round(entity_match, 2),
	"AI Analysis": gemini_analysis
	}

	except Exception as e:
	self.logger.error(f"Error processing file {file_path}: {str(e)}")
	return {"Resume": Path(file_path).name, "Error": str(e)}

	def process_uploaded_resumes(self, resume_files: List[str], job_desc: str) -> pd.DataFrame:
	if not resume_files:
	return pd.DataFrame({"Message": ["Please upload at least one resume."]})

	if not job_desc.strip():
	return pd.DataFrame({"Message": ["Please provide a job description."]})

	results = []
	for file_path in resume_files:
	result = self.process_file(file_path, job_desc)
	results.append(result)

	return pd.DataFrame(results)

	# Create the Gradio interface
	analyzer = ResumeAnalyzer()

	interface = gr.Interface(
	fn=analyzer.process_uploaded_resumes,
	inputs=[
	gr.File(
	label="Upload Resumes (PDF or DOCX)",
	file_types=[".pdf", ".docx"],
	multiple=True
	),
	gr.Textbox(
	label="Job Description",
	placeholder="Paste the job description here...",
	lines=6
	)
	],
	outputs=gr.DataFrame(label="Analysis Results"),
	title="AI Resume Analyzer",
	description="""
	Upload resumes (PDF or DOCX) and provide a job description to see how well they match.
	The analysis includes:
	- Overall match percentage
	- Key skills and experience matching
	- AI-powered resume analysis
	""",
	allow_flagging="never",
	theme=gr.themes.Soft()
	)

	if __name__ == "__main__":
	interface.launch()