Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import re | |
| import yaml | |
| from openai import OpenAI | |
| # OpenAI ํด๋ผ์ด์ธํธ ์ด๊ธฐํ | |
| client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
| # ํ๋กฌํํธ ํ ํ๋ฆฟ ๋ก๋ | |
| import os | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| prompt_path = os.path.join(current_dir, 'prompt.yaml') | |
| with open(prompt_path, 'r', encoding='utf-8') as f: | |
| prompt_data = yaml.safe_load(f) | |
| prompt_template = prompt_data['prompt'] | |
| def parse_prediction(content): | |
| """ | |
| AI ์๋ต์์ JSON ํ์์ ๋ฉด์ ์ง๋ฌธ์ ํ์ฑํ๋ ํจ์ | |
| """ | |
| try: | |
| print(f"ํ์ฑํ ์ปจํ ์ธ ๊ธธ์ด: {len(content)}") | |
| print(f"ํ์ฑํ ์ปจํ ์ธ ์ฒซ 200์: {repr(content[:200])}") | |
| # ํ ์คํธ ์ ์ฒ๋ฆฌ - ๋ถํ์ํ ๊ณต๋ฐฑ๊ณผ ํน์๋ฌธ์ ์ ๊ฑฐ | |
| cleaned_content = content.strip() | |
| # 1. JSON ์ฝ๋ ๋ธ๋ก ์ฐพ๊ธฐ (```json ... ``` ํ์) | |
| json_patterns = [ | |
| r'```json\s*(\{.*?\})\s*```', | |
| r'```\s*(\{.*?\})\s*```', | |
| r'```json\s*(.*?)\s*```', | |
| r'```\s*(.*?)\s*```' | |
| ] | |
| for pattern in json_patterns: | |
| json_match = re.search(pattern, cleaned_content, re.DOTALL) | |
| if json_match: | |
| json_str = json_match.group(1).strip() | |
| print(f"JSON ๋ธ๋ก ๋ฐ๊ฒฌ: {repr(json_str[:100])}") | |
| # JSON ๋ฌธ์์ด ์ ๋ฆฌ | |
| json_str = re.sub(r'\n\s*', ' ', json_str) # ์ค๋ฐ๊ฟ๊ณผ ๊ณต๋ฐฑ ์ ๋ฆฌ | |
| json_str = re.sub(r',\s*}', '}', json_str) # ๋ง์ง๋ง ์ฝค๋ง ์ ๊ฑฐ | |
| json_str = re.sub(r',\s*]', ']', json_str) # ๋ฐฐ์ด ๋ง์ง๋ง ์ฝค๋ง ์ ๊ฑฐ | |
| try: | |
| parsed_json = json.loads(json_str) | |
| if 'sample_questions' in parsed_json: | |
| return parsed_json['sample_questions'] | |
| except json.JSONDecodeError as e: | |
| print(f"JSON ๋ธ๋ก ํ์ฑ ์คํจ: {e}") | |
| print(f"์คํจํ JSON: {repr(json_str)}") | |
| # 2. ์ค๊ดํธ๋ก ๋๋ฌ์ธ์ธ ์ ์ฒด JSON ์ฐพ๊ธฐ | |
| brace_patterns = [ | |
| r'\{[^{}]*"sample_questions"[^{}]*\[[^\]]*\][^{}]*\}', | |
| r'\{.*?"sample_questions".*?\[.*?\].*?\}' | |
| ] | |
| for pattern in brace_patterns: | |
| brace_match = re.search(pattern, cleaned_content, re.DOTALL) | |
| if brace_match: | |
| json_str = brace_match.group(0).strip() | |
| print(f"์ค๊ดํธ JSON ๋ฐ๊ฒฌ: {repr(json_str[:100])}") | |
| # JSON ๋ฌธ์์ด ์ ๋ฆฌ | |
| json_str = re.sub(r'\n\s*', ' ', json_str) | |
| json_str = re.sub(r',\s*}', '}', json_str) | |
| json_str = re.sub(r',\s*]', ']', json_str) | |
| try: | |
| parsed_json = json.loads(json_str) | |
| if 'sample_questions' in parsed_json: | |
| return parsed_json['sample_questions'] | |
| except json.JSONDecodeError as e: | |
| print(f"์ค๊ดํธ JSON ํ์ฑ ์คํจ: {e}") | |
| # 3. sample_questions ๋ฐฐ์ด๋ง ์ง์ ์ฐพ๊ธฐ | |
| array_patterns = [ | |
| r'"sample_questions"\s*:\s*\[(.*?)\]', | |
| r'sample_questions\s*:\s*\[(.*?)\]' | |
| ] | |
| for pattern in array_patterns: | |
| array_match = re.search(pattern, cleaned_content, re.DOTALL) | |
| if array_match: | |
| array_content = array_match.group(1).strip() | |
| print(f"๋ฐฐ์ด ๋ด์ฉ ๋ฐ๊ฒฌ: {repr(array_content[:100])}") | |
| # ๋ฐฐ์ด ๋ด์ฉ์์ ๋ฌธ์์ด ์ถ์ถ | |
| questions = [] | |
| # ๋ฐ์ดํ๋ก ๋๋ฌ์ธ์ธ ๋ฌธ์์ด๋ค ์ฐพ๊ธฐ | |
| question_matches = re.findall(r'"([^"]+)"', array_content) | |
| for q in question_matches: | |
| if len(q.strip()) > 10: # ์๋ฏธ์๋ ๊ธธ์ด์ ์ง๋ฌธ๋ง | |
| questions.append(q.strip()) | |
| if questions: | |
| return questions | |
| # 4. ์ ์ฒด ํ ์คํธ๋ฅผ JSON์ผ๋ก ํ์ฑ ์๋ | |
| try: | |
| # ์ฝ๋ ๋ธ๋ก ๋ง์ปค ์ ๊ฑฐ | |
| if cleaned_content.startswith('```'): | |
| lines = cleaned_content.split('\n') | |
| start_idx = 1 if lines[0].startswith('```') else 0 | |
| end_idx = len(lines) | |
| for i in range(len(lines)-1, -1, -1): | |
| if lines[i].strip() == '```': | |
| end_idx = i | |
| break | |
| cleaned_content = '\n'.join(lines[start_idx:end_idx]) | |
| cleaned_content = cleaned_content.strip() | |
| parsed_json = json.loads(cleaned_content) | |
| if 'sample_questions' in parsed_json: | |
| return parsed_json['sample_questions'] | |
| except json.JSONDecodeError as e: | |
| print(f"์ ์ฒด JSON ํ์ฑ ์คํจ: {e}") | |
| # 5. ์ตํ์ ์๋จ: ํจํด ๋งค์นญ์ผ๋ก ์ง๋ฌธ ์ถ์ถ | |
| print("ํจํด ๋งค์นญ์ผ๋ก ์ง๋ฌธ ์ถ์ถ ์๋") | |
| questions = [] | |
| # ๋ค์ํ ํจํด์ผ๋ก ์ง๋ฌธ ์ฐพ๊ธฐ | |
| patterns = [ | |
| r'"([^"]{20,}[?])"', # ๋ฐ์ดํ ์์ ๋ฌผ์ํ๋ก ๋๋๋ ๊ธด ๋ฌธ์ฅ | |
| r'"([^"]{20,})"', # ๋ฐ์ดํ ์์ ๊ธด ๋ฌธ์ฅ | |
| r'[1-9]\.\s*([^"\n]{20,}[?])', # ๋ฒํธ. ์ง๋ฌธ ํํ | |
| r'[1-9]\.\s*([^"\n]{20,})', # ๋ฒํธ. ๋ฌธ์ฅ ํํ | |
| ] | |
| for pattern in patterns: | |
| matches = re.findall(pattern, cleaned_content) | |
| for match in matches: | |
| question = match.strip() | |
| if len(question) > 15 and question not in questions: | |
| questions.append(question) | |
| if len(questions) >= 5: # ์ต๋ 5๊ฐ | |
| break | |
| if questions: | |
| break | |
| return questions[:5] if questions else [] | |
| except Exception as e: | |
| print(f"JSON ํ์ฑ ์ ์ฒด ์ค๋ฅ: {e}") | |
| print(f"ํ์ฑ ์คํจํ ์ปจํ ์ธ : {repr(content)}") | |
| return [] | |
| def generate_interview_questions(company_name, job_title, experience_level, selected_questions, num_questions=3): | |
| """ | |
| OpenAI API๋ฅผ ์ฌ์ฉํ์ฌ ๋ง์ถคํ ๋ฉด์ ์ง๋ฌธ์ ์์ฑํ๋ ํจ์ | |
| """ | |
| # try: | |
| if True: | |
| if not company_name or not job_title or not experience_level or not selected_questions: | |
| return "๋ชจ๋ ํ๋๋ฅผ ์ ๋ ฅํด์ฃผ์ธ์.", [] | |
| # ์ ํ๋ ์ง๋ฌธ๋ค์ ๋ฆฌ์คํธ๋ก ๋ณํ | |
| if isinstance(selected_questions, str): | |
| common_questions = [q.strip() for q in selected_questions.split(',')] | |
| else: | |
| common_questions = selected_questions | |
| # ํ๋กฌํํธ ์์ฑ | |
| prompt = prompt_template.format( | |
| company_name=company_name, | |
| job_title=job_title, | |
| experience_level=experience_level, | |
| common_questions=common_questions, | |
| num_questions=num_questions | |
| ) | |
| print(prompt) | |
| # OpenAI Responses API ํธ์ถ (Web Search Preview ์ฌ์ฉ) | |
| response = client.responses.create( | |
| model="gpt-4o", | |
| tools=[{ | |
| "type": "web_search_preview", | |
| "search_context_size": "high", | |
| }], | |
| input=f"๋น์ ์ ๋ฉด์ ์ง๋ฌธ ์์ฑ ์ ๋ฌธ๊ฐ์ ๋๋ค. ์น ๊ฒ์์ ํตํด ์ต์ ๊ธฐ์ ์ ๋ณด์ ์ฑ์ฉ ๋ํฅ์ ํ์ธํ๊ณ ์ฃผ์ด์ง ์กฐ๊ฑด์ ๋ง๋ ๊ตฌ์ฒด์ ์ด๊ณ ์ค์ฉ์ ์ธ ๋ฉด์ ์ง๋ฌธ์ ์์ฑํด์ฃผ์ธ์.\n\n{prompt}" | |
| ) | |
| content = response.output_text | |
| print(f"=== AI ์๋ต ์๋ณธ ===") | |
| print(content) | |
| print(f"=== ์ ์ฒด ์๋ต ๊ฐ์ฒด ===") | |
| print(response) | |
| # ์น ๊ฒ์ ์ฐธ๊ณ ๋งํฌ ์ถ๋ ฅ | |
| if hasattr(response, 'web_search_results') and response.web_search_results: | |
| print(f"=== ์ฐธ๊ณ ํ ์น ๊ฒ์ ๋งํฌ ===") | |
| for i, result in enumerate(response.web_search_results, 1): | |
| if hasattr(result, 'url'): | |
| print(f"{i}. {result.url}") | |
| elif hasattr(result, 'link'): | |
| print(f"{i}. {result.link}") | |
| print(f"=== AI ์๋ต ๋ ===") | |
| questions = parse_prediction(content) | |
| if not questions: | |
| return "์ง๋ฌธ ์์ฑ์ ์คํจํ์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์.", [] | |
| # ๊ฒฐ๊ณผ ํฌ๋งทํ | |
| result = f"""## ๐ฏ {company_name} - {job_title} ๋ง์ถคํ ๋ฉด์ ์ง๋ฌธ | |
| ### ๐ **์์ฑ๋ ์ง๋ฌธ๋ค** | |
| """ | |
| for i, question in enumerate(questions, 1): | |
| result += f"**{i}.** {question}\n\n" | |
| result += f""" | |
| --- | |
| **๐ ์ ๋ ฅ ์ ๋ณด:** | |
| - ํ์ฌ: {company_name} | |
| - ์ง๋ฌด: {job_title} | |
| - ๊ฒฝ๋ ฅ: {experience_level} | |
| - ์์ฑ๋ ์ง๋ฌธ ์: {len(questions)}๊ฐ (์์ฒญ: {num_questions}๊ฐ) | |
| - ์ฐธ๊ณ ์ง๋ฌธ ์: {len(common_questions)}๊ฐ | |
| *๋ณธ ์ง๋ฌธ๋ค์ AI๊ฐ ์์ฑํ ๊ฒ์ผ๋ก, ์ค์ ๋ฉด์ ๊ณผ ๋ค๋ฅผ ์ ์์ต๋๋ค.* | |
| """ | |
| return result, questions | |
| # except Exception as e: | |
| # error_msg = f"""## โ ์ค๋ฅ ๋ฐ์ | |
| # ์ง๋ฌธ ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. | |
| # **์ค๋ฅ ๋ด์ฉ:** {str(e)} | |
| # ๋ค์ ์๋ํด์ฃผ์ธ์. | |
| # """ | |
| # return error_msg, [] | |
| if __name__ == "__main__": | |
| company_name = "ํ ์ค" | |
| job_title = "๋ฐฑ์๋ ๊ฐ๋ฐ" | |
| experience_level = "์ ์ " | |
| selected_questions = "์๊ธฐ์๊ฐ๋ฅผ ํด๋ณด์ธ์, ์ง์ ๋๊ธฐ๊ฐ ๋ฌด์์ธ๊ฐ์, ๊ฐ์ฅ ๋์ ์ ์ธ ๊ฒฝํ์ ๋ฌด์์ธ๊ฐ์, ์ ์ฌ ํ ํฌ๋ถ๋ ๋ฌด์์ธ๊ฐ์" | |
| num_questions = 3 | |
| result, questions = generate_interview_questions(company_name, job_title, experience_level, selected_questions, num_questions) | |
| print(result) | |
| print(questions) |