kyle8581's picture
.
594e237
import os
import json
import re
import yaml
from openai import OpenAI
# OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# ํ”„๋กฌํ”„ํŠธ ํ…œํ”Œ๋ฆฟ ๋กœ๋“œ
import os
current_dir = os.path.dirname(os.path.abspath(__file__))
prompt_path = os.path.join(current_dir, 'prompt.yaml')
with open(prompt_path, 'r', encoding='utf-8') as f:
prompt_data = yaml.safe_load(f)
prompt_template = prompt_data['prompt']
def parse_prediction(content):
"""
AI ์‘๋‹ต์—์„œ JSON ํ˜•์‹์˜ ๋ฉด์ ‘ ์งˆ๋ฌธ์„ ํŒŒ์‹ฑํ•˜๋Š” ํ•จ์ˆ˜
"""
try:
print(f"ํŒŒ์‹ฑํ•  ์ปจํ…์ธ  ๊ธธ์ด: {len(content)}")
print(f"ํŒŒ์‹ฑํ•  ์ปจํ…์ธ  ์ฒซ 200์ž: {repr(content[:200])}")
# ํ…์ŠคํŠธ ์ „์ฒ˜๋ฆฌ - ๋ถˆํ•„์š”ํ•œ ๊ณต๋ฐฑ๊ณผ ํŠน์ˆ˜๋ฌธ์ž ์ œ๊ฑฐ
cleaned_content = content.strip()
# 1. JSON ์ฝ”๋“œ ๋ธ”๋ก ์ฐพ๊ธฐ (```json ... ``` ํ˜•์‹)
json_patterns = [
r'```json\s*(\{.*?\})\s*```',
r'```\s*(\{.*?\})\s*```',
r'```json\s*(.*?)\s*```',
r'```\s*(.*?)\s*```'
]
for pattern in json_patterns:
json_match = re.search(pattern, cleaned_content, re.DOTALL)
if json_match:
json_str = json_match.group(1).strip()
print(f"JSON ๋ธ”๋ก ๋ฐœ๊ฒฌ: {repr(json_str[:100])}")
# JSON ๋ฌธ์ž์—ด ์ •๋ฆฌ
json_str = re.sub(r'\n\s*', ' ', json_str) # ์ค„๋ฐ”๊ฟˆ๊ณผ ๊ณต๋ฐฑ ์ •๋ฆฌ
json_str = re.sub(r',\s*}', '}', json_str) # ๋งˆ์ง€๋ง‰ ์ฝค๋งˆ ์ œ๊ฑฐ
json_str = re.sub(r',\s*]', ']', json_str) # ๋ฐฐ์—ด ๋งˆ์ง€๋ง‰ ์ฝค๋งˆ ์ œ๊ฑฐ
try:
parsed_json = json.loads(json_str)
if 'sample_questions' in parsed_json:
return parsed_json['sample_questions']
except json.JSONDecodeError as e:
print(f"JSON ๋ธ”๋ก ํŒŒ์‹ฑ ์‹คํŒจ: {e}")
print(f"์‹คํŒจํ•œ JSON: {repr(json_str)}")
# 2. ์ค‘๊ด„ํ˜ธ๋กœ ๋‘˜๋Ÿฌ์‹ธ์ธ ์ „์ฒด JSON ์ฐพ๊ธฐ
brace_patterns = [
r'\{[^{}]*"sample_questions"[^{}]*\[[^\]]*\][^{}]*\}',
r'\{.*?"sample_questions".*?\[.*?\].*?\}'
]
for pattern in brace_patterns:
brace_match = re.search(pattern, cleaned_content, re.DOTALL)
if brace_match:
json_str = brace_match.group(0).strip()
print(f"์ค‘๊ด„ํ˜ธ JSON ๋ฐœ๊ฒฌ: {repr(json_str[:100])}")
# JSON ๋ฌธ์ž์—ด ์ •๋ฆฌ
json_str = re.sub(r'\n\s*', ' ', json_str)
json_str = re.sub(r',\s*}', '}', json_str)
json_str = re.sub(r',\s*]', ']', json_str)
try:
parsed_json = json.loads(json_str)
if 'sample_questions' in parsed_json:
return parsed_json['sample_questions']
except json.JSONDecodeError as e:
print(f"์ค‘๊ด„ํ˜ธ JSON ํŒŒ์‹ฑ ์‹คํŒจ: {e}")
# 3. sample_questions ๋ฐฐ์—ด๋งŒ ์ง์ ‘ ์ฐพ๊ธฐ
array_patterns = [
r'"sample_questions"\s*:\s*\[(.*?)\]',
r'sample_questions\s*:\s*\[(.*?)\]'
]
for pattern in array_patterns:
array_match = re.search(pattern, cleaned_content, re.DOTALL)
if array_match:
array_content = array_match.group(1).strip()
print(f"๋ฐฐ์—ด ๋‚ด์šฉ ๋ฐœ๊ฒฌ: {repr(array_content[:100])}")
# ๋ฐฐ์—ด ๋‚ด์šฉ์—์„œ ๋ฌธ์ž์—ด ์ถ”์ถœ
questions = []
# ๋”ฐ์˜ดํ‘œ๋กœ ๋‘˜๋Ÿฌ์‹ธ์ธ ๋ฌธ์ž์—ด๋“ค ์ฐพ๊ธฐ
question_matches = re.findall(r'"([^"]+)"', array_content)
for q in question_matches:
if len(q.strip()) > 10: # ์˜๋ฏธ์žˆ๋Š” ๊ธธ์ด์˜ ์งˆ๋ฌธ๋งŒ
questions.append(q.strip())
if questions:
return questions
# 4. ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ JSON์œผ๋กœ ํŒŒ์‹ฑ ์‹œ๋„
try:
# ์ฝ”๋“œ ๋ธ”๋ก ๋งˆ์ปค ์ œ๊ฑฐ
if cleaned_content.startswith('```'):
lines = cleaned_content.split('\n')
start_idx = 1 if lines[0].startswith('```') else 0
end_idx = len(lines)
for i in range(len(lines)-1, -1, -1):
if lines[i].strip() == '```':
end_idx = i
break
cleaned_content = '\n'.join(lines[start_idx:end_idx])
cleaned_content = cleaned_content.strip()
parsed_json = json.loads(cleaned_content)
if 'sample_questions' in parsed_json:
return parsed_json['sample_questions']
except json.JSONDecodeError as e:
print(f"์ „์ฒด JSON ํŒŒ์‹ฑ ์‹คํŒจ: {e}")
# 5. ์ตœํ›„์˜ ์ˆ˜๋‹จ: ํŒจํ„ด ๋งค์นญ์œผ๋กœ ์งˆ๋ฌธ ์ถ”์ถœ
print("ํŒจํ„ด ๋งค์นญ์œผ๋กœ ์งˆ๋ฌธ ์ถ”์ถœ ์‹œ๋„")
questions = []
# ๋‹ค์–‘ํ•œ ํŒจํ„ด์œผ๋กœ ์งˆ๋ฌธ ์ฐพ๊ธฐ
patterns = [
r'"([^"]{20,}[?])"', # ๋”ฐ์˜ดํ‘œ ์•ˆ์˜ ๋ฌผ์Œํ‘œ๋กœ ๋๋‚˜๋Š” ๊ธด ๋ฌธ์žฅ
r'"([^"]{20,})"', # ๋”ฐ์˜ดํ‘œ ์•ˆ์˜ ๊ธด ๋ฌธ์žฅ
r'[1-9]\.\s*([^"\n]{20,}[?])', # ๋ฒˆํ˜ธ. ์งˆ๋ฌธ ํ˜•ํƒœ
r'[1-9]\.\s*([^"\n]{20,})', # ๋ฒˆํ˜ธ. ๋ฌธ์žฅ ํ˜•ํƒœ
]
for pattern in patterns:
matches = re.findall(pattern, cleaned_content)
for match in matches:
question = match.strip()
if len(question) > 15 and question not in questions:
questions.append(question)
if len(questions) >= 5: # ์ตœ๋Œ€ 5๊ฐœ
break
if questions:
break
return questions[:5] if questions else []
except Exception as e:
print(f"JSON ํŒŒ์‹ฑ ์ „์ฒด ์˜ค๋ฅ˜: {e}")
print(f"ํŒŒ์‹ฑ ์‹คํŒจํ•œ ์ปจํ…์ธ : {repr(content)}")
return []
def generate_interview_questions(company_name, job_title, experience_level, selected_questions, num_questions=3):
"""
OpenAI API๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋งž์ถคํ˜• ๋ฉด์ ‘ ์งˆ๋ฌธ์„ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜
"""
# try:
if True:
if not company_name or not job_title or not experience_level or not selected_questions:
return "๋ชจ๋“  ํ•„๋“œ๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.", []
# ์„ ํƒ๋œ ์งˆ๋ฌธ๋“ค์„ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜
if isinstance(selected_questions, str):
common_questions = [q.strip() for q in selected_questions.split(',')]
else:
common_questions = selected_questions
# ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
prompt = prompt_template.format(
company_name=company_name,
job_title=job_title,
experience_level=experience_level,
common_questions=common_questions,
num_questions=num_questions
)
print(prompt)
# OpenAI Responses API ํ˜ธ์ถœ (Web Search Preview ์‚ฌ์šฉ)
response = client.responses.create(
model="gpt-4o",
tools=[{
"type": "web_search_preview",
"search_context_size": "high",
}],
input=f"๋‹น์‹ ์€ ๋ฉด์ ‘ ์งˆ๋ฌธ ์ƒ์„ฑ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์›น ๊ฒ€์ƒ‰์„ ํ†ตํ•ด ์ตœ์‹  ๊ธฐ์—… ์ •๋ณด์™€ ์ฑ„์šฉ ๋™ํ–ฅ์„ ํ™•์ธํ•˜๊ณ  ์ฃผ์–ด์ง„ ์กฐ๊ฑด์— ๋งž๋Š” ๊ตฌ์ฒด์ ์ด๊ณ  ์‹ค์šฉ์ ์ธ ๋ฉด์ ‘ ์งˆ๋ฌธ์„ ์ƒ์„ฑํ•ด์ฃผ์„ธ์š”.\n\n{prompt}"
)
content = response.output_text
print(f"=== AI ์‘๋‹ต ์›๋ณธ ===")
print(content)
print(f"=== ์ „์ฒด ์‘๋‹ต ๊ฐ์ฒด ===")
print(response)
# ์›น ๊ฒ€์ƒ‰ ์ฐธ๊ณ  ๋งํฌ ์ถœ๋ ฅ
if hasattr(response, 'web_search_results') and response.web_search_results:
print(f"=== ์ฐธ๊ณ ํ•œ ์›น ๊ฒ€์ƒ‰ ๋งํฌ ===")
for i, result in enumerate(response.web_search_results, 1):
if hasattr(result, 'url'):
print(f"{i}. {result.url}")
elif hasattr(result, 'link'):
print(f"{i}. {result.link}")
print(f"=== AI ์‘๋‹ต ๋ ===")
questions = parse_prediction(content)
if not questions:
return "์งˆ๋ฌธ ์ƒ์„ฑ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.", []
# ๊ฒฐ๊ณผ ํฌ๋งทํŒ…
result = f"""## ๐ŸŽฏ {company_name} - {job_title} ๋งž์ถคํ˜• ๋ฉด์ ‘ ์งˆ๋ฌธ
### ๐Ÿ“‹ **์ƒ์„ฑ๋œ ์งˆ๋ฌธ๋“ค**
"""
for i, question in enumerate(questions, 1):
result += f"**{i}.** {question}\n\n"
result += f"""
---
**๐Ÿ“ ์ž…๋ ฅ ์ •๋ณด:**
- ํšŒ์‚ฌ: {company_name}
- ์ง๋ฌด: {job_title}
- ๊ฒฝ๋ ฅ: {experience_level}
- ์ƒ์„ฑ๋œ ์งˆ๋ฌธ ์ˆ˜: {len(questions)}๊ฐœ (์š”์ฒญ: {num_questions}๊ฐœ)
- ์ฐธ๊ณ  ์งˆ๋ฌธ ์ˆ˜: {len(common_questions)}๊ฐœ
*๋ณธ ์งˆ๋ฌธ๋“ค์€ AI๊ฐ€ ์ƒ์„ฑํ•œ ๊ฒƒ์œผ๋กœ, ์‹ค์ œ ๋ฉด์ ‘๊ณผ ๋‹ค๋ฅผ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.*
"""
return result, questions
# except Exception as e:
# error_msg = f"""## โŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ
# ์งˆ๋ฌธ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.
# **์˜ค๋ฅ˜ ๋‚ด์šฉ:** {str(e)}
# ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.
# """
# return error_msg, []
if __name__ == "__main__":
company_name = "ํ† ์Šค"
job_title = "๋ฐฑ์—”๋“œ ๊ฐœ๋ฐœ"
experience_level = "์‹ ์ž…"
selected_questions = "์ž๊ธฐ์†Œ๊ฐœ๋ฅผ ํ•ด๋ณด์„ธ์š”, ์ง€์› ๋™๊ธฐ๊ฐ€ ๋ฌด์—‡์ธ๊ฐ€์š”, ๊ฐ€์žฅ ๋„์ „์ ์ธ ๊ฒฝํ—˜์€ ๋ฌด์—‡์ธ๊ฐ€์š”, ์ž…์‚ฌ ํ›„ ํฌ๋ถ€๋Š” ๋ฌด์—‡์ธ๊ฐ€์š”"
num_questions = 3
result, questions = generate_interview_questions(company_name, job_title, experience_level, selected_questions, num_questions)
print(result)
print(questions)