Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -208,22 +208,31 @@ async def process_pdf_snapshot(pdf_path, mode="single"):
|
|
| 208 |
status.error(f"Failed to process PDF: {str(e)}")
|
| 209 |
return []
|
| 210 |
|
| 211 |
-
async def
|
| 212 |
start_time = time.time()
|
| 213 |
status = st.empty()
|
| 214 |
-
status.text("Processing
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
async def process_image_gen(prompt, output_file):
|
| 229 |
start_time = time.time()
|
|
@@ -373,7 +382,7 @@ with tab_ocr:
|
|
| 373 |
all_files = get_gallery_files()
|
| 374 |
if all_files:
|
| 375 |
if st.button("OCR All Assets 🚀"):
|
| 376 |
-
full_text = "# OCR Results\n\n"
|
| 377 |
for file in all_files:
|
| 378 |
if file.endswith('.png'):
|
| 379 |
image = Image.open(file)
|
|
@@ -383,7 +392,7 @@ with tab_ocr:
|
|
| 383 |
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 384 |
doc.close()
|
| 385 |
output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
|
| 386 |
-
result = asyncio.run(
|
| 387 |
full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
|
| 388 |
entry = f"OCR Test: {file} -> {output_file}"
|
| 389 |
st.session_state['history'].append(entry)
|
|
@@ -405,7 +414,7 @@ with tab_ocr:
|
|
| 405 |
if st.button("Run OCR 🚀", key="ocr_run"):
|
| 406 |
output_file = generate_filename("ocr_output", "txt")
|
| 407 |
st.session_state['processing']['ocr'] = True
|
| 408 |
-
result = asyncio.run(
|
| 409 |
entry = f"OCR Test: {selected_file} -> {output_file}"
|
| 410 |
st.session_state['history'].append(entry)
|
| 411 |
st.text_area("OCR Result", result, height=200, key="ocr_result")
|
|
@@ -418,7 +427,7 @@ with tab_ocr:
|
|
| 418 |
pix = doc[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
| 419 |
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 420 |
output_file = generate_filename(f"ocr_page_{i}", "txt")
|
| 421 |
-
result = asyncio.run(
|
| 422 |
full_text += f"## Page {i + 1}\n\n{result}\n\n"
|
| 423 |
entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
|
| 424 |
st.session_state['history'].append(entry)
|
|
@@ -454,7 +463,7 @@ with tab_build:
|
|
| 454 |
entry = f"Built {model_type} model: {model_name}"
|
| 455 |
st.session_state['history'].append(entry)
|
| 456 |
st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
|
| 457 |
-
st.
|
| 458 |
|
| 459 |
with tab_imggen:
|
| 460 |
st.header("Test Image Gen 🎨")
|
|
@@ -644,7 +653,7 @@ def update_gallery():
|
|
| 644 |
os.remove(file)
|
| 645 |
st.session_state['asset_checkboxes'].pop(file, None)
|
| 646 |
st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
|
| 647 |
-
st.
|
| 648 |
|
| 649 |
update_gallery()
|
| 650 |
|
|
|
|
| 208 |
status.error(f"Failed to process PDF: {str(e)}")
|
| 209 |
return []
|
| 210 |
|
| 211 |
+
async def process_gpt4o_ocr(image, output_file):
|
| 212 |
start_time = time.time()
|
| 213 |
status = st.empty()
|
| 214 |
+
status.text("Processing GPT-4o OCR... (0s)")
|
| 215 |
+
buffered = BytesIO()
|
| 216 |
+
image.save(buffered, format="PNG")
|
| 217 |
+
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
| 218 |
+
messages = [{
|
| 219 |
+
"role": "user",
|
| 220 |
+
"content": [
|
| 221 |
+
{"type": "text", "text": "Extract the electronic text from this image."},
|
| 222 |
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}", "detail": "auto"}}
|
| 223 |
+
]
|
| 224 |
+
}]
|
| 225 |
+
try:
|
| 226 |
+
response = client.chat.completions.create(model="gpt-4o", messages=messages, max_tokens=300)
|
| 227 |
+
result = response.choices[0].message.content
|
| 228 |
+
elapsed = int(time.time() - start_time)
|
| 229 |
+
status.text(f"GPT-4o OCR completed in {elapsed}s!")
|
| 230 |
+
async with aiofiles.open(output_file, "w") as f:
|
| 231 |
+
await f.write(result)
|
| 232 |
+
return result
|
| 233 |
+
except Exception as e:
|
| 234 |
+
status.error(f"Failed to process image with GPT-4o: {str(e)}")
|
| 235 |
+
return ""
|
| 236 |
|
| 237 |
async def process_image_gen(prompt, output_file):
|
| 238 |
start_time = time.time()
|
|
|
|
| 382 |
all_files = get_gallery_files()
|
| 383 |
if all_files:
|
| 384 |
if st.button("OCR All Assets 🚀"):
|
| 385 |
+
full_text = "# OCR Results (GPT-4o)\n\n"
|
| 386 |
for file in all_files:
|
| 387 |
if file.endswith('.png'):
|
| 388 |
image = Image.open(file)
|
|
|
|
| 392 |
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 393 |
doc.close()
|
| 394 |
output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
|
| 395 |
+
result = asyncio.run(process_gpt4o_ocr(image, output_file))
|
| 396 |
full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
|
| 397 |
entry = f"OCR Test: {file} -> {output_file}"
|
| 398 |
st.session_state['history'].append(entry)
|
|
|
|
| 414 |
if st.button("Run OCR 🚀", key="ocr_run"):
|
| 415 |
output_file = generate_filename("ocr_output", "txt")
|
| 416 |
st.session_state['processing']['ocr'] = True
|
| 417 |
+
result = asyncio.run(process_gpt4o_ocr(image, output_file))
|
| 418 |
entry = f"OCR Test: {selected_file} -> {output_file}"
|
| 419 |
st.session_state['history'].append(entry)
|
| 420 |
st.text_area("OCR Result", result, height=200, key="ocr_result")
|
|
|
|
| 427 |
pix = doc[i].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
| 428 |
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 429 |
output_file = generate_filename(f"ocr_page_{i}", "txt")
|
| 430 |
+
result = asyncio.run(process_gpt4o_ocr(image, output_file))
|
| 431 |
full_text += f"## Page {i + 1}\n\n{result}\n\n"
|
| 432 |
entry = f"OCR Test: {selected_file} Page {i + 1} -> {output_file}"
|
| 433 |
st.session_state['history'].append(entry)
|
|
|
|
| 463 |
entry = f"Built {model_type} model: {model_name}"
|
| 464 |
st.session_state['history'].append(entry)
|
| 465 |
st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
|
| 466 |
+
st.rerun()
|
| 467 |
|
| 468 |
with tab_imggen:
|
| 469 |
st.header("Test Image Gen 🎨")
|
|
|
|
| 653 |
os.remove(file)
|
| 654 |
st.session_state['asset_checkboxes'].pop(file, None)
|
| 655 |
st.success(f"Asset {os.path.basename(file)} vaporized! 💨")
|
| 656 |
+
st.rerun()
|
| 657 |
|
| 658 |
update_gallery()
|
| 659 |
|