In [None]:
import numpy as np
import cv2
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import os
import numpy as np
import cv2
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, precision_score, recall_score
import os


In [None]:

def extract_features(image_path):
 # Read the image
 img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
 
 # Resize the image to a fixed size
 img = cv2.resize(img, (200, 200))
 
 # Extract HOG features
 hog = cv2.HOGDescriptor()
 features = hog.compute(img)
 
 return features.flatten()

def load_yolo_annotations(annotation_path, img_width, img_height):
 with open(annotation_path, 'r') as file:
 lines = file.readlines()
 
 for line in lines:
 parts = line.strip().split()
 class_id = int(parts[0])
 if class_id == 3:
 x_center = float(parts[1]) * img_width
 y_center = float(parts[2]) * img_height
 width = float(parts[3]) * img_width
 height = float(parts[4]) * img_height
 
 # Convert from YOLO format (center x, center y, width, height) to (x, y, width, height)
 x = x_center - (width / 2)
 y = y_center - (height / 2)
 return [x, y, width, height]
 return None

In [None]:

# Load dataset
X = [] # Features
y = [] # Labels (bounding box coordinates)

# Path to your dataset and annotations
dataset_path = "C:/Users/keese/term_project/data/processed/training/images"
annotations_path = "C:/Users/keese/term_project/data/processed/training/labels"


for filename in os.listdir(dataset_path):
 if filename.endswith(".jpg") or filename.endswith(".png"):
 image_path = os.path.join(dataset_path, filename)
 annotation_file = os.path.join(annotations_path, filename.replace('.jpg', '.txt').replace('.png', '.txt'))
 
 if not os.path.exists(annotation_file):
 print(f"Warning: Annotation file not found for {image_path}")
 continue
 
 # Read the image to get its dimensions
 img = cv2.imread(image_path)
 img_height, img_width = img.shape[:2]
 
 # Extract features
 features = extract_features(image_path)
 X.append(features)
 
 # Load bounding box coordinates from YOLO annotations
 bbox = load_yolo_annotations(annotation_file, img_width, img_height)
 y.append(bbox)

In [None]:
X = np.array(X)
y = np.array(y)
y = np.array([bbox if bbox is not None else [np.nan, np.nan, np.nan, np.nan] for bbox in y], dtype=float)

In [None]:
# Now create a mask for valid y values
valid_mask = ~np.isnan(y).any(axis=1) # Create a mask for valid y values
X = X[valid_mask] # Filter X
y = y[valid_mask] # Filter y

In [None]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the SVM model for each coordinate
models = {
 'x': svm.SVR(kernel='linear'),
 'y': svm.SVR(kernel='linear'),
 'width': svm.SVR(kernel='linear'),
 'height': svm.SVR(kernel='linear')
}

In [None]:
# Train each model separately
for coord in models:
 coord_index = ['x', 'y', 'width', 'height'].index(coord)
 models[coord].fit(X_train, y_train[:, coord_index])

In [None]:
# Evaluate the model
y_pred = np.column_stack([models[coord].predict(X_test) for coord in models])
y_test = np.array(y_test)


mse = mean_squared_error(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

In [None]:
# Function to predict the bounding box of a table in an image
def predict_table_bbox(image_path):
 features = extract_features(image_path)
 bbox = [models[coord].predict([features])[0] for coord in models]
 return bbox


In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import cv2

def visualize_predictions(image_path, predictions):
 # Load the image
 img = cv2.imread(image_path)
 img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Convert BGR to RGB

 # Create a figure and axis
 fig, ax = plt.subplots(1)
 fig.set_size_inches(20, 20)

 # Display the image
 ax.imshow(img)
 ax.axis("off") # Hide the axes

 # Loop through the predictions and draw rectangles
 for pred in predictions:
 x, y, w, h, cls = pred # Assuming pred is in the format (x, y, width, height, score, class_id)
 
 # Create a rectangle patch
 rect = Rectangle((x, y), w, h, linewidth=2, edgecolor='r', facecolor='none')
 ax.add_patch(rect)
 
 # Optionally, add a label with the class name and score
 ax.text(x + w / 2, y, f'{cls}', color='r', ha='center', va='bottom')

 # Show the plot
 plt.show()



image_path = "C:/Users/keese/term_project/Document_layout_Detection_Yolov8/training/images/PMC2987860_00002.jpg"

preds = predict_table_bbox(image_path)

predictions = [
 [preds[0], preds[1], preds[2], preds[3], 'Table']
]

visualize_predictions(image_path, predictions)


In [None]:
image_path = "C:/Users/keese/term_project/Document_layout_Detection_Yolov8/training/images/PMC3033327_00002.jpg"

preds = predict_table_bbox(image_path)
# Example predictions: list of [x, y, width, height, score, class_id]
predictions = [
 [preds[0], preds[1], preds[2], preds[3], 'Table']
]

visualize_predictions(image_path, predictions)

In [None]:
image_path = "C:/Users/keese/term_project/Document_layout_Detection_Yolov8/validation/images/PMC2639556_00006.jpg"

preds = predict_table_bbox(image_path)
# Example predictions: list of [x, y, width, height, score, class_id]
predictions = [
 [preds[0], preds[1], preds[2], preds[3], 'Table']
]

visualize_predictions(image_path, predictions)

In [None]:
image_path = "C:/Users/keese/term_project/Document_layout_Detection_Yolov8/validation/images/PMC2683799_00002.jpg"

preds = predict_table_bbox(image_path)
# Example predictions: list of [x, y, width, height, score, class_id]
predictions = [
 [preds[0], preds[1], preds[2], preds[3], 'Table']
]

visualize_predictions(image_path, predictions)