Spaces:
Sleeping
Sleeping
fix: Configure Tesseract OCR path for Docker/Linux environment
Browse files- Dockerfile +2 -1
- backend/services/gps_extractor.py +8 -3
Dockerfile
CHANGED
|
@@ -6,7 +6,8 @@ FROM python:3.9-slim
|
|
| 6 |
# Set environment variables
|
| 7 |
ENV PYTHONUNBUFFERED=1 \
|
| 8 |
PYTHONDONTWRITEBYTECODE=1 \
|
| 9 |
-
DEBIAN_FRONTEND=noninteractive
|
|
|
|
| 10 |
|
| 11 |
# Install system dependencies including Tesseract OCR
|
| 12 |
RUN apt-get update && apt-get install -y \
|
|
|
|
| 6 |
# Set environment variables
|
| 7 |
ENV PYTHONUNBUFFERED=1 \
|
| 8 |
PYTHONDONTWRITEBYTECODE=1 \
|
| 9 |
+
DEBIAN_FRONTEND=noninteractive \
|
| 10 |
+
TESSERACT_CMD=/usr/bin/tesseract
|
| 11 |
|
| 12 |
# Install system dependencies including Tesseract OCR
|
| 13 |
RUN apt-get update && apt-get install -y \
|
backend/services/gps_extractor.py
CHANGED
|
@@ -93,13 +93,18 @@ class GPSExtractor:
|
|
| 93 |
import cv2
|
| 94 |
import os
|
| 95 |
|
| 96 |
-
# Configure Tesseract path
|
| 97 |
tesseract_paths = [
|
|
|
|
|
|
|
| 98 |
r"D:\OCR-System\tesseract.exe", # User's custom installation
|
| 99 |
-
r"C:\Program Files\Tesseract-OCR\tesseract.exe",
|
| 100 |
-
r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe",
|
| 101 |
]
|
| 102 |
|
|
|
|
|
|
|
|
|
|
| 103 |
for path in tesseract_paths:
|
| 104 |
if os.path.exists(path):
|
| 105 |
pytesseract.pytesseract.tesseract_cmd = path
|
|
|
|
| 93 |
import cv2
|
| 94 |
import os
|
| 95 |
|
| 96 |
+
# Configure Tesseract path
|
| 97 |
tesseract_paths = [
|
| 98 |
+
os.getenv('TESSERACT_CMD'), # Environment variable (Docker/HF)
|
| 99 |
+
'/usr/bin/tesseract', # Linux default (Docker/HF)
|
| 100 |
r"D:\OCR-System\tesseract.exe", # User's custom installation
|
| 101 |
+
r"C:\Program Files\Tesseract-OCR\tesseract.exe", # Windows
|
| 102 |
+
r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe", # Windows x86
|
| 103 |
]
|
| 104 |
|
| 105 |
+
# Filter out None values
|
| 106 |
+
tesseract_paths = [p for p in tesseract_paths if p]
|
| 107 |
+
|
| 108 |
for path in tesseract_paths:
|
| 109 |
if os.path.exists(path):
|
| 110 |
pytesseract.pytesseract.tesseract_cmd = path
|