|
|
import torch |
|
|
from transformers import AutoModelForImageTextToText, AutoProcessor |
|
|
from qwen_vl_utils import process_vision_info |
|
|
from PIL import Image |
|
|
from typing import List, Dict |
|
|
import json |
|
|
from opencc import OpenCC |
|
|
import warnings |
|
|
|
|
|
class CaptionGenerationManager: |
|
|
"""Caption generation using Vision-Language Models (supports Qwen2.5-VL, Qwen3-VL, etc.)""" |
|
|
|
|
|
def __init__(self, model_name: str = "Qwen/Qwen2.5-VL-7B-Instruct"): |
|
|
""" |
|
|
Args: |
|
|
model_name: Vision-Language model name, e.g.: |
|
|
- "Qwen/Qwen2.5-VL-7B-Instruct" (default) |
|
|
- "Qwen/Qwen3-VL-8B-Instruct" (2025 latest) |
|
|
""" |
|
|
print(f"Loading Vision-Language Model: {model_name}...") |
|
|
|
|
|
|
|
|
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers") |
|
|
|
|
|
|
|
|
self.processor = AutoProcessor.from_pretrained(model_name, use_fast=False) |
|
|
self.model = AutoModelForImageTextToText.from_pretrained( |
|
|
model_name, |
|
|
dtype=torch.bfloat16, |
|
|
device_map="auto" |
|
|
) |
|
|
|
|
|
|
|
|
self.cc = OpenCC('s2t') |
|
|
|
|
|
self.generation_config = { |
|
|
'temperature': 0.7, |
|
|
'top_p': 0.9, |
|
|
'max_new_tokens': 300, |
|
|
'repetition_penalty': 1.1 |
|
|
} |
|
|
|
|
|
|
|
|
self.platform_templates = { |
|
|
'instagram': { |
|
|
'style': 'storytelling, aesthetic', |
|
|
'emoji_count': '2-3', |
|
|
'hashtag_count': '8-10', |
|
|
'min_length': 120, |
|
|
'max_length': 220, |
|
|
'features': ['call-to-action', 'question', 'relatable'] |
|
|
}, |
|
|
'tiktok': { |
|
|
'style': 'brief, punchy', |
|
|
'emoji_count': '1-2', |
|
|
'hashtag_count': '5-8', |
|
|
'min_length': 60, |
|
|
'max_length': 120, |
|
|
'features': ['trending', 'POV', 'relatable'] |
|
|
}, |
|
|
'xiaohongshu': { |
|
|
'style': 'structured, informative, detailed', |
|
|
'emoji_count': '5-8', |
|
|
'hashtag_count': '8-12', |
|
|
'min_length': 180, |
|
|
'max_length': 500, |
|
|
'features': ['tips', 'bullets', 'sharing-tone'] |
|
|
} |
|
|
} |
|
|
|
|
|
print(f"✓ {model_name.split('/')[-1]} loaded successfully (using Auto* classes for flexibility)") |
|
|
|
|
|
def construct_prompt(self, analysis_results: Dict, platform: str = 'instagram', language: str = 'zh') -> str: |
|
|
"""Construct prompt with language support ensuring consistency |
|
|
|
|
|
Args: |
|
|
language: 'zh' (Traditional Chinese), 'en' (English), 'zh-en' (Bilingual) |
|
|
""" |
|
|
platform_config = self.platform_templates.get(platform, self.platform_templates['instagram']) |
|
|
|
|
|
|
|
|
language_instructions = { |
|
|
'zh': '請使用繁體中文生成標題和標籤。語言要自然流暢,符合華語社群媒體的表達習慣。避免使用簡體字。當偵測到品牌時,必須在標題中提及品牌名稱。', |
|
|
'en': '''🚨 CRITICAL LANGUAGE REQUIREMENT 🚨 |
|
|
Generate captions and hashtags EXCLUSIVELY in English. |
|
|
- NEVER use Chinese characters (Traditional or Simplified) |
|
|
- NEVER mix languages |
|
|
- Use natural, engaging language suitable for international social media |
|
|
- When brands are detected, mention them naturally in English |
|
|
- All text output must be 100% English only |
|
|
This is MANDATORY and NON-NEGOTIABLE.''', |
|
|
'zh-en': '''生成雙語內容:標題使用繁體中文,同時提供英文翻譯。標籤混合使用中英文以擴大觸及範圍。當偵測到品牌時,必須在標題中提及品牌名稱。 |
|
|
|
|
|
🚨 重要:雙語一致性要求 🚨 |
|
|
- 中文和英文必須表達相同的核心意義 |
|
|
- 允許表達方式的差異(形容詞、語法不同) |
|
|
- 但整體訊息、語氣、品牌提及必須一致 |
|
|
- 兩種語言都要朝同一方向詮釋內容''' |
|
|
} |
|
|
|
|
|
system_instruction = f"""You are a professional social media content strategist. |
|
|
|
|
|
{language_instructions.get(language, language_instructions['zh'])} |
|
|
|
|
|
Target platform: {platform} |
|
|
Content style: Authentic, creative, and optimized for engagement. |
|
|
|
|
|
CRITICAL RULE: Never include hashtags (symbols starting with #) in the caption text. Hashtags must only appear in the separate 'hashtags' array.""" |
|
|
|
|
|
|
|
|
objects = analysis_results.get('detections', []) |
|
|
brands = analysis_results.get('brands', []) |
|
|
scene_info = analysis_results.get('scene_analysis', {}) |
|
|
composition = analysis_results.get('composition', {}) |
|
|
|
|
|
|
|
|
lighting = scene_info.get('lighting', {}).get('top', 'natural light') |
|
|
lighting_confidence = scene_info.get('lighting', {}).get('confidence', 0.7) |
|
|
|
|
|
|
|
|
lighting_translations_zh = { |
|
|
'soft diffused light': '柔和漫射光', |
|
|
'overcast atmosphere': '陰天氛圍', |
|
|
'natural daylight': '自然日光', |
|
|
'warm ambient light': '溫暖環境光', |
|
|
'evening light': '傍晚光線', |
|
|
'bright sunlight': '明亮陽光', |
|
|
'golden hour': '金黃時刻', |
|
|
'blue hour': '藍調時刻' |
|
|
} |
|
|
|
|
|
|
|
|
if language == 'zh': |
|
|
lighting_zh = lighting_translations_zh.get(lighting, lighting) |
|
|
lighting_display = lighting_zh |
|
|
else: |
|
|
|
|
|
lighting_display = lighting |
|
|
lighting_zh = lighting |
|
|
|
|
|
objects_str = ', '.join([obj['class_name'] for obj in objects[:10]]) |
|
|
|
|
|
|
|
|
if brands: |
|
|
brands_list = [b[0] for b in brands[:5]] |
|
|
brands_str = ', '.join(brands_list) |
|
|
brand_emphasis = f""" |
|
|
|
|
|
🚨 CRITICAL BRAND REQUIREMENT 🚨 |
|
|
The following brands were POSITIVELY IDENTIFIED in this image: {brands_str} |
|
|
|
|
|
YOU ABSOLUTELY MUST: |
|
|
1. Mention the brand name "{brands_list[0]}" explicitly in the FIRST sentence |
|
|
2. Use the exact brand name - do not use generic terms like "bag" or "accessory" without the brand |
|
|
3. Write naturally as if you're excited to share this {brands_list[0]} item |
|
|
4. Example: "在傍晚光線下,這款{brands_list[0]}經典黑色菱格紋皮革包..." (CORRECT) |
|
|
5. NOT acceptable: "在傍晚光線下,這款經典黑色菱格紋皮革包..." (WRONG - missing brand name!) |
|
|
|
|
|
THIS IS MANDATORY - The caption will be rejected if it doesn't mention {brands_str}. |
|
|
""" |
|
|
else: |
|
|
brands_str = 'None detected' |
|
|
brand_emphasis = "" |
|
|
|
|
|
|
|
|
urban_scene = scene_info.get('urban', {}).get('top', 'unknown') |
|
|
mood = scene_info.get('mood', {}).get('top', 'neutral') |
|
|
comp_type = composition.get('composition_type', 'standard') |
|
|
|
|
|
context = f""" |
|
|
Analyze this image and generate an engaging, DETAILED social media caption with rich visual descriptions. |
|
|
|
|
|
**Visual Elements (Describe in Detail):** |
|
|
- Detected objects: {objects_str} |
|
|
- Scene composition: {comp_type} |
|
|
- Urban environment: {urban_scene} |
|
|
- **IMPORTANT**: Include specific details about: |
|
|
* Materials (leather, metal, fabric, canvas, etc.) |
|
|
* Colors (use descriptive terms: jet black, antique gold, midnight blue, etc.) |
|
|
* Textures (quilted, smooth, matte, glossy, metallic, etc.) |
|
|
* Design features (stitching patterns, hardware, logos, emblems, etc.) |
|
|
* Reflections and lighting effects on surfaces |
|
|
|
|
|
**Atmosphere:** |
|
|
- Lighting (analyzed with Places365 + CV): {lighting_display} (confidence: {lighting_confidence:.2f}) |
|
|
- Mood: {mood} |
|
|
|
|
|
**Brand Detection:** |
|
|
- Identified brands: {brands_str}{brand_emphasis} |
|
|
|
|
|
**Caption Structure (Required - BE SPECIFIC AND DETAILED):** |
|
|
1. Opening hook - Most striking visual element with SPECIFIC details (1-2 sentences) |
|
|
{f"- 🚨 MANDATORY: Start with the BRAND NAME '{brands_list[0]}' in the FIRST sentence!" if brands else ""} |
|
|
{f"- Example (CORRECT): '這款{brands_list[0]}經典黑色菱格紋皮革包...'" if brands else ""} |
|
|
{f"- Example (WRONG): '這款經典黑色菱格紋皮革包...' (missing {brands_list[0]}!)" if brands else ""} |
|
|
- Be SPECIFIC: Include material, color, design features WITH the brand name |
|
|
|
|
|
2. Visual details - Describe materials, textures, colors, and design elements (2-3 sentences) |
|
|
- Be SPECIFIC: mention quilting patterns, metal finishes, chain details, logo placements |
|
|
- Describe how light interacts with materials (reflections on leather, gleam of metal) |
|
|
- MUST use the EXACT lighting description: "{lighting_display}" |
|
|
|
|
|
3. Atmospheric context - How lighting and mood create the scene's character (1-2 sentences) |
|
|
- Connect lighting to the overall visual impact |
|
|
- Describe depth, shadows, contrasts |
|
|
|
|
|
4. Emotional connection & Engagement - How this resonates with viewers + call-to-action (1 sentence) |
|
|
|
|
|
**Content Requirements:** |
|
|
- Minimum information: 3-4 specific visual details per caption |
|
|
- Include material types, color descriptions, design features |
|
|
- Describe how lighting affects the appearance |
|
|
- Make it vivid and immersive |
|
|
|
|
|
Platform style: {platform_config['style']} |
|
|
""" |
|
|
|
|
|
|
|
|
if language == 'zh': |
|
|
brand_name_zh = brands_list[0] if brands else "Gucci" |
|
|
example_correct = f"""正確範例 - 詳細描述 + 品牌提及 (繁體中文): |
|
|
"在{lighting_zh}的映襯下,這款{brand_name_zh}經典黑色菱格紋皮革包展現奢華質感,V字形縫線在柔軟小牛皮上勾勒出精緻的幾何圖案,復古金色雙G標誌在深色背景中熠熠生輝。金屬鏈條肩帶反射著{lighting_zh},增添層次感與立體效果。皮革表面細膩的光澤與霧面質地形成迷人對比,每個細節都彰顯義大利工藝的極致追求。這樣的{brand_name_zh}單品不只是配件,更是品味與格調的完美詮釋。你的衣櫃裡有哪件經典單品?✨🖤" |
|
|
|
|
|
注意:品牌名稱 "{brand_name_zh}" 出現在第一句!這是正確的做法。 |
|
|
|
|
|
CRITICAL: |
|
|
- 必須包含材質描述(皮革、金屬等) |
|
|
- 必須包含顏色細節(黑色、復古金色等) |
|
|
- 必須包含設計特點(縫線、標誌、鏈條等) |
|
|
- 必須使用"{lighting_zh}"來描述光線 |
|
|
""" |
|
|
elif language == 'en': |
|
|
brand_name_en = brands_list[0] if brands else "Gucci" |
|
|
example_correct = f"""CORRECT EXAMPLE - Detailed Description + Brand Mention (ENGLISH ONLY - NO CHINESE): |
|
|
"Under the {lighting}, this {brand_name_en} classic black quilted leather bag showcases luxurious craftsmanship. V-shaped stitching traces intricate geometric patterns across supple calfskin, while the antique gold double-G logo gleams against the dark backdrop. The metal chain strap catches and reflects the {lighting}, adding dimension and depth to the piece. The leather surface presents a captivating contrast between fine sheen and matte texture, with every detail exemplifying Italian artisanship at its finest. This {brand_name_en} piece isn't just an accessory – it's a perfect expression of taste and sophistication. What's your timeless wardrobe essential? ✨🖤" |
|
|
|
|
|
NOTE: Brand name "{brand_name_en}" appears in the FIRST sentence! This is the correct approach. |
|
|
|
|
|
🚨 ABSOLUTE REQUIREMENT FOR ENGLISH MODE 🚨 |
|
|
- Output must be 100% ENGLISH - zero Chinese characters allowed |
|
|
- MUST include material descriptions (leather, metal, etc.) |
|
|
- MUST include color details (black, antique gold, etc.) |
|
|
- MUST include design features (stitching, logo, chain, etc.) |
|
|
- MUST use "{lighting}" to describe the lighting |
|
|
- NO Chinese characters anywhere in the output |
|
|
""" |
|
|
else: |
|
|
brand_name_en = brands_list[0] if brands else "Gucci" |
|
|
example_correct = f"""BILINGUAL EXAMPLE - 雙語範例: |
|
|
Caption in Traditional Chinese, with English hashtags support. |
|
|
(Details omitted for brevity) |
|
|
""" |
|
|
|
|
|
|
|
|
if language == 'zh': |
|
|
hashtag_instruction = """ |
|
|
【CRITICAL HASHTAG REQUIREMENT - 繁體中文】: |
|
|
- ALL hashtags MUST be in Traditional Chinese (繁體中文) |
|
|
- NEVER use English hashtags when language is 繁體中文 |
|
|
- Examples of CORRECT hashtags: ["時尚包包", "奢華風格", "皮革工藝", "精品配件"] |
|
|
- Examples of WRONG hashtags: ["FashionBlogger", "LuxuryLifestyle"] - DO NOT USE THESE |
|
|
""" |
|
|
elif language == 'en': |
|
|
hashtag_instruction = """ |
|
|
【CRITICAL HASHTAG REQUIREMENT - English】: |
|
|
- ALL hashtags MUST be in English |
|
|
- NEVER use Chinese characters in hashtags |
|
|
- Examples of CORRECT hashtags: ["FashionBlogger", "LuxuryLifestyle", "LeatherCraft"] |
|
|
""" |
|
|
else: |
|
|
hashtag_instruction = """ |
|
|
【CRITICAL HASHTAG REQUIREMENT - Bilingual】: |
|
|
- Hashtags should MIX Traditional Chinese and English |
|
|
- First half in Chinese, second half in English |
|
|
- Example: ["時尚包包", "奢華風格", "FashionBlogger", "LuxuryLifestyle"] |
|
|
""" |
|
|
|
|
|
output_format = f""" |
|
|
Generate output in JSON format: |
|
|
{{ |
|
|
"caption": "string (minimum {platform_config['min_length']} chars, maximum {platform_config['max_length']} chars, engaging and descriptive)", |
|
|
"hashtags": ["tag1", "tag2", ...] ({platform_config['hashtag_count']} relevant hashtags), |
|
|
"tone": "casual|professional|playful", |
|
|
"platform": "{platform}" |
|
|
}} |
|
|
|
|
|
{hashtag_instruction} |
|
|
|
|
|
STRICT REQUIREMENTS: |
|
|
1. Caption length: {platform_config['min_length']}-{platform_config['max_length']} characters |
|
|
2. 🚨 EMOJI REQUIREMENT 🚨 - MUST use EXACTLY {platform_config['emoji_count']} emojis naturally integrated into caption text |
|
|
- Professional style: 1-2 emojis (e.g., ✨💼🌟) |
|
|
- Creative style: 2-3 emojis (e.g., 🎨✨💫🌙) |
|
|
- Authentic style: 2-3 emojis (e.g., 💖👜✨🖤) |
|
|
- Place emojis naturally within or at end of sentences |
|
|
3. Caption must be pure descriptive text only - absolutely NO hashtags allowed |
|
|
4. 🚨 CALL-TO-ACTION REQUIREMENT 🚨 - MUST include an engaging question or CTA at the end |
|
|
- Professional: Brief professional question (e.g., "What's your go-to piece?") |
|
|
- Creative: Thought-provoking question (e.g., "How does this speak to you?") |
|
|
- Authentic: Personal question (e.g., "What's your favorite timeless accessory?") |
|
|
5. Write 3-4 complete sentences following the structure above |
|
|
6. Be specific and vivid - describe what you see in detail |
|
|
7. 【CRITICAL】 MUST use the EXACT lighting description: "{lighting_display}" |
|
|
- DO NOT substitute with similar terms |
|
|
- DO NOT use "金黃時刻" if the lighting is "{lighting_zh if language == 'zh' else lighting}" |
|
|
- DO NOT invent your own lighting description |
|
|
8. 🚨 HASHTAG REQUIREMENT 🚨 - Generate {platform_config['hashtag_count']} relevant hashtags |
|
|
- Hashtags go ONLY in the 'hashtags' array, NEVER in the caption text |
|
|
- Mix of broad and specific tags |
|
|
- Include brand name as hashtag if detected |
|
|
9. {"🚨 CRITICAL BRAND REQUIREMENT 🚨 - The brand name '" + brands_list[0] + "' MUST appear in the FIRST sentence of your caption. This is MANDATORY and NON-NEGOTIABLE. Example: " + ("'這款" + brands_list[0] + "經典黑色...'" if language == 'zh' else "'This " + brands_list[0] + " classic black...'") if brands else "No brands detected to mention"} |
|
|
10. {"🚨 LANGUAGE REQUIREMENT 🚨 - Output must be 100% ENGLISH ONLY. NO Chinese characters allowed anywhere." if language == 'en' else ""} |
|
|
|
|
|
WRONG EXAMPLE (DO NOT DO THIS): |
|
|
"Lost in the city's towering skyscrapers 🏙️✨ | #UrbanVibes #CityLife" |
|
|
|
|
|
{example_correct} |
|
|
""" |
|
|
|
|
|
full_prompt = f"{system_instruction}\n\n{context}\n\n{output_format}" |
|
|
return full_prompt |
|
|
|
|
|
def generate_captions(self, analysis_results: Dict, image: Image.Image, |
|
|
platform: str = 'instagram', language: str = 'zh') -> List[Dict]: |
|
|
"""Generate 3 captions with distinct styles: Professional, Creative, Authentic""" |
|
|
|
|
|
|
|
|
brands_in_image = analysis_results.get('brands', []) |
|
|
brand_names = [b[0] for b in brands_in_image[:3]] if brands_in_image else [] |
|
|
brand_mention_requirement = f" CRITICAL: Mention {', '.join(brand_names)} brand(s) naturally in the caption." if brand_names else "" |
|
|
|
|
|
|
|
|
styles = [ |
|
|
{ |
|
|
'name': 'professional', |
|
|
'temp': 0.6, |
|
|
'instruction': f'Professional style: Concise, elegant, sophisticated. Focus on quality and craftsmanship. Use refined language.{brand_mention_requirement}', |
|
|
'length_modifier': 0.8 |
|
|
}, |
|
|
{ |
|
|
'name': 'creative', |
|
|
'temp': 0.7, |
|
|
'instruction': f'Creative style: Artistic, expressive, imaginative. Use vivid metaphors and sensory descriptions. Balance detail with flair.{brand_mention_requirement}', |
|
|
'length_modifier': 1.0 |
|
|
}, |
|
|
{ |
|
|
'name': 'authentic', |
|
|
'temp': 0.8, |
|
|
'instruction': f'Authentic style: Personal, detailed, storytelling. Share rich observations and genuine feelings. Most descriptive and engaging.{brand_mention_requirement}', |
|
|
'length_modifier': 1.2 |
|
|
} |
|
|
] |
|
|
|
|
|
variations = [] |
|
|
|
|
|
for style in styles: |
|
|
|
|
|
base_prompt = self.construct_prompt(analysis_results, platform, language) |
|
|
|
|
|
|
|
|
style_prompt = f"""{base_prompt} |
|
|
|
|
|
**STYLE REQUIREMENT FOR THIS CAPTION:** |
|
|
{style['instruction']} |
|
|
|
|
|
Adjust tone to be clearly '{style['name']}' - this should be noticeably different from other styles.""" |
|
|
|
|
|
messages = [{ |
|
|
"role": "user", |
|
|
"content": [ |
|
|
{"type": "image", "image": image}, |
|
|
{"type": "text", "text": style_prompt} |
|
|
] |
|
|
}] |
|
|
|
|
|
text = self.processor.apply_chat_template( |
|
|
messages, tokenize=False, add_generation_prompt=True |
|
|
) |
|
|
|
|
|
image_inputs, video_inputs = process_vision_info(messages) |
|
|
inputs = self.processor( |
|
|
text=[text], |
|
|
images=image_inputs, |
|
|
videos=video_inputs, |
|
|
padding=True, |
|
|
return_tensors="pt" |
|
|
) |
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
inputs = inputs.to("cuda") |
|
|
|
|
|
|
|
|
config = self.generation_config.copy() |
|
|
config['temperature'] = style['temp'] |
|
|
|
|
|
with torch.no_grad(): |
|
|
generated_ids = self.model.generate(**inputs, **config) |
|
|
|
|
|
generated_ids_trimmed = [ |
|
|
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) |
|
|
] |
|
|
|
|
|
output_text = self.processor.batch_decode( |
|
|
generated_ids_trimmed, |
|
|
skip_special_tokens=True, |
|
|
clean_up_tokenization_spaces=False |
|
|
)[0] |
|
|
|
|
|
parsed = self._parse_json_output(output_text) |
|
|
if parsed: |
|
|
|
|
|
parsed['tone'] = style['name'] |
|
|
|
|
|
|
|
|
if 'caption' in parsed: |
|
|
parsed['caption'] = self._remove_hashtags_from_caption(parsed['caption']) |
|
|
|
|
|
|
|
|
if language == 'zh' or language == 'zh-en': |
|
|
parsed = self._convert_to_traditional(parsed) |
|
|
|
|
|
variations.append(parsed) |
|
|
|
|
|
return variations if variations else [self._get_fallback_caption(platform, language)] |
|
|
|
|
|
def _remove_hashtags_from_caption(self, caption: str) -> str: |
|
|
"""Remove any hashtags, pipes, and debug info that leaked into caption text""" |
|
|
import re |
|
|
|
|
|
|
|
|
|
|
|
if '|' in caption: |
|
|
caption = caption.split('|')[0].strip() |
|
|
|
|
|
|
|
|
caption = re.sub(r'#\w+', '', caption) |
|
|
caption = re.sub(r'#[\u4e00-\u9fff]+', '', caption) |
|
|
|
|
|
|
|
|
|
|
|
words = caption.split() |
|
|
if len(words) > 0: |
|
|
last_word = words[-1].strip('✨💎👗🌟💫🖤') |
|
|
|
|
|
if last_word.isupper() and len(last_word) > 3 and not any(char in last_word for char in '.,!?'): |
|
|
caption = ' '.join(words[:-1]) |
|
|
|
|
|
|
|
|
emoji_pattern = r'[\U0001F300-\U0001F9FF]{4,}$' |
|
|
caption = re.sub(emoji_pattern, '', caption) |
|
|
|
|
|
|
|
|
caption = re.sub(r'\s+', ' ', caption) |
|
|
|
|
|
|
|
|
caption = caption.strip() |
|
|
|
|
|
|
|
|
if re.search(r'[✨💎👗🌟💫🖤]{2,}\s*$', caption): |
|
|
caption = re.sub(r'[✨💎👗🌟💫🖤\s]+$', '', caption).strip() |
|
|
|
|
|
return caption |
|
|
|
|
|
def _convert_to_traditional(self, caption: Dict) -> Dict: |
|
|
"""Convert Simplified Chinese to Traditional Chinese""" |
|
|
if 'caption' in caption: |
|
|
caption['caption'] = self.cc.convert(caption['caption']) |
|
|
return caption |
|
|
|
|
|
def _parse_json_output(self, text: str) -> Dict: |
|
|
"""Parse JSON output""" |
|
|
try: |
|
|
start = text.find('{') |
|
|
end = text.rfind('}') + 1 |
|
|
if start != -1 and end > start: |
|
|
json_str = text[start:end] |
|
|
return json.loads(json_str) |
|
|
except: |
|
|
pass |
|
|
return None |
|
|
|
|
|
def _get_fallback_caption(self, platform: str, language: str) -> Dict: |
|
|
"""Fallback caption""" |
|
|
if language == 'en': |
|
|
return { |
|
|
'caption': 'Every moment tells a story worth sharing. The world around us is filled with beauty waiting to be discovered. Take a pause and appreciate the details that make life extraordinary. What caught your eye today? ✨', |
|
|
'hashtags': ['photography', 'daily', 'lifestyle', 'moment', 'capture'], |
|
|
'tone': 'casual', |
|
|
'platform': platform |
|
|
} |
|
|
else: |
|
|
return { |
|
|
'caption': '每個瞬間都值得被記錄與分享。生活中充滿了等待被發現的美好細節。停下腳步,用心感受周遭的一切。今天什麼畫面觸動了你的心?✨', |
|
|
'hashtags': ['攝影', '日常', '生活', '瞬間', '分享'], |
|
|
'tone': 'casual', |
|
|
'platform': platform |
|
|
} |
|
|
|
|
|
print("✓ CaptionGenerationManager (with Auto* classes for flexible model support) defined") |
|
|
|