Spaces:

Ahmed-El-Sharkawy
/

Object-Detection-Faster-CNN-RESNET-OR-MobileNet

Sleeping

App Files Files Community

Ahmed-El-Sharkawy commited on May 10

Commit

b65b15a

1 Parent(s): 794a8d6

Add application file

Browse files

Files changed (3) hide show

.gitingore +0 -0
app.py +82 -0
requirements.txt +6 -0

.gitingore ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import gradio as gr
+import cv2
+import torch
+import os
+import numpy as np
+from torchvision.models.detection import FasterRCNN
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+# Load Models
+def load_model(model_path, backbone_name, num_classes):
+    if backbone_name == "resnet50":
+        model = torch.load(model_path)
+    elif backbone_name == "mobilenet":
+        model = torch.load(model_path)
+    model.eval()
+    return model
+resnet_model = load_model('fasterrcnnResnet.pth', 'resnet50', num_classes=6)
+mobilenet_model = load_model('fasterrcnnMobilenet.pth', 'mobilenet', num_classes=6)
+class_names = ['background', 'Ambulance', 'Bus', 'Car', 'Motorcycle', 'Truck']
+# Inference Function for Images and Videos
+def predict_image(image_path, model):
+    image = cv2.imread(image_path)
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    image_tensor = torch.tensor(image / 255.0).permute(2, 0, 1).float().unsqueeze(0)
+    with torch.no_grad():
+        output = model(image_tensor)[0]
+    for box, label, score in zip(output['boxes'], output['labels'], output['scores']):
+        if score > 0.5:
+            x1, y1, x2, y2 = map(int, box.tolist())
+            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
+            cv2.putText(image, f"{class_names[label]}: {score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
+    return image
+def predict_video(video_path, model):
+    cap = cv2.VideoCapture(video_path)
+    frames = []
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        frame_tensor = torch.tensor(frame / 255.0).permute(2, 0, 1).float().unsqueeze(0)
+        with torch.no_grad():
+            output = model(frame_tensor)[0]
+        for box, label, score in zip(output['boxes'], output['labels'], output['scores']):
+            if score > 0.5:
+                x1, y1, x2, y2 = map(int, box.tolist())
+                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+                cv2.putText(frame, f"{class_names[label]}: {score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
+        frames.append(frame)
+    cap.release()
+    return frames[0] if frames else None
+# Gradio Interface for Image and Video Inference
+model_selection = gr.Dropdown(choices=["ResNet50", "MobileNet"], label="Select Model")
+inputs_image = [gr.Image(type="filepath", label="Upload Image"), model_selection]
+outputs_image = gr.Image(type="numpy", label="Detection Output")
+inputs_video = [gr.Video(type="filepath", label="Upload Video"), model_selection]
+outputs_video = gr.Image(type="numpy", label="Detection Output")
+image_interface = gr.Interface(
+    fn=lambda img, model_name: predict_image(img, resnet_model if model_name == "ResNet50" else mobilenet_model),
+    inputs=inputs_image,
+    outputs=outputs_image,
+    title="Image Inference"
+)
+video_interface = gr.Interface(
+    fn=lambda vid, model_name: predict_video(vid, resnet_model if model_name == "ResNet50" else mobilenet_model),
+    inputs=inputs_video,
+    outputs=outputs_video,
+    title="Video Inference"
+)
+gr.TabbedInterface([image_interface, video_interface], ["Image", "Video"]).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+opencv-python-headless
+torch
+torchvision
+numpy
+matplotlib