import gradio as gr import os import tempfile from pathlib import Path import requests import base64 import re from typing import Tuple import markdown from dotenv import load_dotenv from openai import OpenAI from urllib.parse import urlparse # Load environment variables from .env file load_dotenv() # API Configuration for PaddleOCR-VL API_URL = os.getenv("API_URL", "") TOKEN = os.getenv("TOKEN", "") class Doc2PageConverter: def __init__(self): self.qianfan_token = os.getenv('QIANFAN_TOKEN') self.qianfan_model = "ernie-x1.1-preview" self.client = None if self.qianfan_token: self.client = OpenAI( base_url="https://qianfan.baidubce.com/v2", api_key=self.qianfan_token ) def extract_text_with_vl_api(self, file_path: str) -> str: if not API_URL: raise ValueError("API_URL must be configured in .env file") headers = {"Content-Type": "application/json"} if TOKEN: headers["Authorization"] = f"bearer {TOKEN}" try: is_url = isinstance(file_path, str) and file_path.startswith(("http://", "https://")) if is_url: path = urlparse(file_path).path ext = os.path.splitext(path)[1].lower() else: ext = os.path.splitext(file_path)[1].lower() if ext == '.pdf': file_type = 0 # PDF 文件 elif ext in ['.png', '.jpg', '.jpeg', '.bmp', '.gif']: file_type = 1 # 图片文件 else: raise ValueError(f"不支持的文件类型: '{ext}'") if is_url: response = requests.get(file_path, timeout=60) response.raise_for_status() content = response.content else: with open(file_path, "rb") as f: content = f.read() b64_content = base64.b64encode(content).decode("utf-8") except Exception as e: raise RuntimeError(f"读取和编码文件失败: {e}") payload = { "file": b64_content, "fileType": file_type, "useLayoutDetection": True, "useDocUnwarping": False, "useDocOrientationClassify": False, "useChartRecognition": False, } try: print(f"Sending PaddleOCR-VL API request to {API_URL}...") response = requests.post(API_URL, json=payload, headers=headers, timeout=300) response.raise_for_status() result_data = response.json() except requests.exceptions.RequestException as e: raise RuntimeError(f"PaddleOCR-VL API request failed: {e}") except json.JSONDecodeError: raise RuntimeError(f"Invalid JSON response from VL API: {response.text}") if result_data.get("errorCode", -1) != 0: error_msg = result_data.get("errorMessage", "Unknown API error") raise RuntimeError(f"PaddleOCR-VL API returned an error: {error_msg}") layout_results = result_data.get("result", {}).get("layoutParsingResults", []) if not layout_results: return "" first_page_result = layout_results[0] # print(first_page_result.get("prunedResult")) markdown_data = first_page_result.get("markdown", {}) full_markdown_text = markdown_data.get("text", "") image_map = markdown_data.get("images", {}) if image_map: for placeholder, real_url in image_map.items(): full_markdown_text = full_markdown_text.replace(f'src="{placeholder}"', f'src="{real_url}"') return full_markdown_text def markdown_to_html_with_ernie(self, markdown_text: str) -> str: """Convert markdown to HTML using ERNIE API. (No changes needed)""" if not self.client: return self.basic_markdown_to_html(markdown_text) try: prompt = f"""Please convert the following markdown text into a modern, clean HTML page. Use contemporary typography with the Inter font family and clean design principles. Make it visually appealing with proper CSS styling, responsive design, and excellent readability. Design requirements: - Use Inter font from Google Fonts - Clean, modern spacing and typography - Subtle shadows and rounded corners - Good color contrast and hierarchy - Responsive design that works on all devices - Include proper HTML structure with head, body, and semantic elements Important: Add a footer at the bottom with "Powered by PaddleOCR-VL and ERNIE" where PaddleOCR-VL links to https://github.com/PaddlePaddle/PaddleOCR and ERNIE links to https://huggingface.co/BAIDU. Style it with modern, subtle styling. Markdown content: {markdown_text} IMPORTANT: Return ONLY the raw HTML code starting with and ending with . Do NOT wrap it in markdown code blocks or add any explanations. I need the pure HTML content that can be directly saved as an .html file.""" messages = [{"role": "user", "content": prompt}] response = self.client.chat.completions.create( model=self.qianfan_model, messages=messages, max_tokens=64000, ) html_content = response.choices[0].message.content.strip() if html_content.startswith('```html'): html_content = html_content[7:] elif html_content.startswith('```'): html_content = html_content[3:] if html_content.endswith('```'): html_content = html_content[:-3] return html_content.strip() except Exception as e: print(f"Error calling ERNIE API: {e}") return self.basic_markdown_to_html(markdown_text) def basic_markdown_to_html(self, markdown_text: str) -> str: """Fallback markdown to HTML conversion. (No changes needed)""" html = markdown.markdown(markdown_text) complete_html = f""" Converted Document
{html}
""" return complete_html def process_document(self, file_path: str) -> Tuple[str, str]: """Process uploaded document and convert to HTML""" try: markdown_content = self.extract_text_with_vl_api(file_path) if not markdown_content.strip(): return ("Warning: No text content extracted from the document."), "" html_content = self.markdown_to_html_with_ernie(markdown_content) return markdown_content, html_content except Exception as e: return f"Error processing document: {str(e)}", "" # --- Gradio UI and event handling logic (unchanged) --- converter = Doc2PageConverter() def process_upload(file): if file is None: return "Please upload a file.", "", "" try: markdown_result, html_result = converter.process_document(file.name) if html_result: return "Document processed successfully!", markdown_result, html_result else: return markdown_result, "", "" except Exception as e: return f"Error: {str(e)}", "", "" def save_html_file(html_content, filename="converted_page"): if not html_content: return None temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False, prefix=f"{filename}_") temp_file.write(html_content) temp_file.close() return temp_file.name custom_theme = gr.themes.Default( primary_hue="blue", secondary_hue="gray", neutral_hue="gray", font=("Inter", "system-ui", "sans-serif"), ).set( body_background_fill="#fafafa", background_fill_primary="#ffffff", border_color_primary="#e5e7eb", button_primary_background_fill="#6366f1", button_primary_background_fill_hover="#4f46e5", button_primary_text_color="#ffffff", ) with gr.Blocks( title="Doc2Page - Document to Webpage Converter", theme=custom_theme, css=".gradio-container { max-width: 1200px !important; margin: auto; }" ) as app: gr.Markdown("# Doc2Page\n🥃 Transform your documents into beautiful webpages!") with gr.Row(): with gr.Column(scale=1, min_width=350): file_input = gr.File( label="📄 Upload Document", file_types=[".pdf", ".png", ".jpg", ".jpeg", ".bmp", ".tiff"], ) process_btn = gr.Button("✨ Convert to Webpage", variant="primary") status_output = gr.Textbox(label="Status", interactive=False) with gr.Column(scale=2): with gr.Tabs(): with gr.TabItem("❤️ Preview"): html_preview = gr.HTML(label="", value="
Your converted webpage will appear here
") with gr.TabItem("📝 Markdown Source"): markdown_output = gr.Textbox(label="", interactive=False, show_copy_button=True) with gr.TabItem("🌐 HTML Source"): html_output = gr.Code(label="", language="html", interactive=False) with gr.Row(visible=False) as download_section: gr.Markdown("### 📥 Download Your Webpage") download_btn = gr.File(label="HTML File", visible=True) def process_and_update(file): status, markdown_content, html_content = process_upload(file) download_file = None show_download = False if html_content: filename = Path(file.name).stem if file else "converted_page" download_file = save_html_file(html_content, filename) show_download = True preview_content = html_content or "
No preview available
" return ( status, markdown_content, html_content, preview_content, download_file, gr.update(visible=show_download) ) process_btn.click( fn=process_and_update, inputs=[file_input], outputs=[status_output, markdown_output, html_output, html_preview, download_btn, download_section] ) gr.Markdown( """
Powered by PaddleOCR-VL & ERNIE
""" ) if __name__ == "__main__": app.launch()