MY Repository
clone the project repository using Git:
cd The-Nearest-Car-Detection
Real-Time Object Distance Estimation using YOLOv5 and OpenCV
This publication demonstrates object distance estimation in videos using the YOLOv5 model for object detection and OpenCV for video processing.
Overview
Requirements
Make sure you have the following libraries installed:
D = (F * W)\P
Where:
F: Focal length of the camera in pixels.
W: Real-world width of the object in centimeters.
P: Width of the object in pixels (bounding box).
Output: A new video file (output_video_with_distances.mp4) with bounding boxes and distances displayed.
Example Output
Here is an example frame showing the distance overlay on detected objects (cars):
.gif?Expires=1758225446&Key-Pair-Id=K2V2TN6YBJQHTG&Signature=H6aSlPh5EVWUJHMnH~V-d~kQOU54lLl6vUS41xJNvWPyefcXWVJjTPyYaYQrGbcWNfaK8dU7Bpx0A2enqsATuZx0iRqtAv7vL0bSc6xMg31mogdx5Lgh5SNWTGYwv37o4u8iMGtqIau-lBaq0CULXH3OrCfrWvozk7ntcdydMsPzRT6MDVcXTbyKaocMQH00ayecn4UGlMr3jbRm2jNTLR3x30G3WUY6U71KUyzOy7wuSnStuX7v75UVN3~RWGKcRw2lBoIOG0jAg4qTHyR99laa~LZ6roFM18R595mkO50kg-sijI4RNNjCdTrjVsc0o4bW5w~yUjpJ8ysyklVUyQ__)
YOLOv5 Model
We use the YOLOv5 object detection model pretrained on the COCO dataset.
Next Steps :
To enhance this project, you can:
Optimize the accuracy of distance estimation.
Detect multiple objects with different classes (e.g., trucks, buses)
References :
YOLOv5 GitHub Repository
OpenCV Documentation
COCO Dataset
the full code
Good Code
import numpy as np
import cv2
# Constants: Focal length (F), Width of the object (W) in cm
F = 700 # replace with actual focal length value in pixels
W = 60 # replace with the actual width of the object in cm
# Get the distance (D) of the object from the camera using pixels (P) covered
def calculate_distance(P):
distance = (F * W) / P
return distance
# Load the video
video_path = '/content/1075151066-preview.mp4' # replace with actual path to your video
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print("Error: Video file not accessible.")
exit()
# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
# Define the codec and create a VideoWriter object to save the output video
output_path = '/content/output_video_with_distances.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for .mp4 file
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))
while True:
ret, img = cap.read()
if not ret:
print("Failed to grab frame or end of video reached")
break
# Convert the frame to HSV and apply color detection
hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# You can adjust these values based on your object color
lower = np.array([30, 50, 50]) # Example: Yellow color lower bound
upper = np.array([90, 255, 255]) # Example: Yellow color upper bound
mask = cv2.inRange(hsv_img, lower, upper)
# Find contours in the mask
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
if cv2.contourArea(contour) > 1000: # Ignore small contours
# Get bounding box for the object
x, y, w, h = cv2.boundingRect(contour)
# Calculate pixel width (P)
P = w
# Calculate the distance from the camera
distance = calculate_distance(P)
# Draw a rectangle around the object
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Display the calculated distance on the video
cv2.putText(img, f'{distance:.2f} cm', (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
# Write the processed frame to the output video
out.write(img)
# Release resources
cap.release()
out.release()
print(f"Video saved at: {output_path}")
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
!pip install -r requirements.txt
!pip install torch torchvision torchaudio
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
Perfect code
import torch
import cv2
import numpy as np
# Constants: Focal length (F), Width of the object (W) in cm
F = 700 # replace with actual focal length value in pixels
W = 60 # replace with the actual width of the object in cm
# Get the distance (D) of the object from the camera using pixels (P) covered
def calculate_distance(P):
distance = (F * W) / P
return distance
# Load YOLOv5 model (pretrained)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s') # Load YOLOv5s pre-trained model
# Load the video
video_path = '/content/1075151066-preview.mp4' # replace with actual path to your video
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print("Error: Video file not accessible.")
exit()
# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
# Define the codec and create a VideoWriter object to save the output video
output_path = '/content/output_video_with_distances.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for .mp4 file
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))
while True:
ret, img = cap.read()
if not ret:
print("Failed to grab frame or end of video reached")
break
# Perform object detection using YOLOv5
results = model(img) # Perform inference
# Parse the results to get bounding boxes and class labels
detections = results.pred[0].cpu().numpy() # Get predictions (boxes, scores, classes)
cars = []
for detection in detections:
x1, y1, x2, y2, conf, cls = detection
cls = int(cls)
if conf > 0.5 and cls == 2: # Class 2 corresponds to "car" in COCO dataset
width = int(x2 - x1)
# Add the car information to the list
cars.append((x1, y1, width, conf))
# If cars are detected, choose the closest one
if cars:
closest_car = None
closest_distance = float('inf')
for car in cars:
x1, y1, width, conf = car
P = width # Using width of the bounding box
distance = calculate_distance(P)
if distance < closest_distance:
closest_car = car
closest_distance = distance
# Draw the bounding box and distance for the closest car
if closest_car:
x1, y1, width, _ = closest_car
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + width), int(y1 + width)), (0, 255, 0), 2)
cv2.putText(img, f'{closest_distance:.2f} cm', (int(x1), int(y1 - 10)),
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
# Write the processed frame to the output video
out.write(img)
# Release resources
cap.release()
out.release()
print(f"Video saved at: {output_path}")
!pip install gTTS
from IPython.display import Audio
from gtts import gTTS
import torch
import cv2
# Constants: Focal length (F), Width of the object (W) in cm
F = 700 # replace with actual focal length value in pixels
W = 60 # replace with the actual width of the object in cm
# Get the distance (D) of the object from the camera using pixels (P) covered
def calculate_distance(P):
distance = (F * W) / P
return distance
# Load YOLOv5 model (pretrained)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s') # Load YOLOv5s pre-trained model
# Load the video
video_path = '/content/1075151066-preview.mp4' # replace with actual path to your video
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print("Error: Video file not accessible.")
exit()
# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
# Define the codec and create a VideoWriter object to save the output video
output_path = '/content/output_video_with_alerts.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for .mp4 file
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))
played_10m = False # Track if 10m sound has been played
played_5m = False # Track if 5m sound has been played
while True:
ret, img = cap.read()
if not ret:
print("End of video reached or failed to grab frame.")
break
# Perform object detection using YOLOv5
results = model(img) # Perform inference
# Parse the results to get bounding boxes and class labels
detections = results.pred[0].cpu().numpy() # Get predictions (boxes, scores, classes)
cars = []
for detection in detections:
x1, y1, x2, y2, conf, cls = detection
cls = int(cls)
if conf > 0.5 and cls == 2: # Class 2 corresponds to "car" in COCO dataset
width = int(x2 - x1)
cars.append((x1, y1, width, conf))
# If cars are detected, choose the closest one
if cars:
closest_car = None
closest_distance = float('inf')
for car in cars:
x1, y1, width, conf = car
P = width # Using width of the bounding box
distance = calculate_distance(P)
if distance < closest_distance:
closest_car = car
closest_distance = distance
# Draw the bounding box and distance for the closest car
if closest_car:
x1, y1, width, _ = closest_car
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + width), int(y1 + width)), (0, 255, 0), 2)
cv2.putText(img, f'{closest_distance:.2f} cm', (int(x1), int(y1 - 10)),
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
# Handle sound alerts
if closest_distance <= 1000 and not played_10m: # 10 meters or less
tts = gTTS("There is a car less than 10 meters away!", lang="en")
tts.save("/content/alert_10m.mp3")
display(Audio("/content/alert_10m.mp3", autoplay=True))
played_10m = True
elif closest_distance <= 500 and not played_5m: # 5 meters or less
tts = gTTS("Caution! Car less than 5 meters away!", lang="en")
tts.save("/content/alert_5m.mp3")
display(Audio("/content/alert_5m.mp3", autoplay=True))
played_5m = True
elif closest_distance > 1000: # Reset flags if the car moves away
played_10m = False
played_5m = False
# Write the processed frame to the output video
out.write(img)
# Release resources
cap.release()
out.release()
print(f"Video saved at: {output_path}")