Code forHow to Perform YOLO Object Detection using OpenCV in Python Tutorial

yolov8_opencv.py

import numpy as npimport osimport cv2import timefrom ultralytics import YOLO# define some parametersCONFIDENCE = 0.5font_scale = 1thickness = 1# loading the YOLOv8 model with the default weight filemodel = YOLO("yolov8n.pt")# loading all the class labels (objects)labels = open("data/coco.names").read().strip().split("\n")# generating colors for each object for later plottingcolors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")path_name = "images/dog.jpg"image = cv2.imread(path_name)file_name = os.path.basename(path_name) # "dog.jpg"filename, ext = file_name.split(".") # "dog", "jpg"# measure how much it took in secondsstart = time.perf_counter()# run inference on the image # see: https://docs.ultralytics.com/modes/predict/#arguments for full list of argumentsresults = model.predict(image, conf=CONFIDENCE)[0]time_took = time.perf_counter() - startprint(f"Time took: {time_took:.2f}s")print(results.boxes.data)# loop over the detectionsfor data in results.boxes.data.tolist():    # get the bounding box coordinates, confidence, and class id     xmin, ymin, xmax, ymax, confidence, class_id = data    # converting the coordinates and the class id to integers    xmin = int(xmin)    ymin = int(ymin)    xmax = int(xmax)    ymax = int(ymax)    class_id = int(class_id)    # draw a bounding box rectangle and label on the image    color = [int(c) for c in colors[class_id]]    cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color=color, thickness=thickness)    text = f"{labels[class_id]}: {confidence:.2f}"    # calculate text width & height to draw the transparent boxes as background of the text    (text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]    text_offset_x = xmin    text_offset_y = ymin - 5    box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))    overlay = image.copy()    cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)    # add opacity (transparency to the box)    image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)    # now put the text (label: confidence %)    cv2.putText(image, text, (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX,        fontScale=font_scale, color=(0, 0, 0), thickness=thickness)# display output imagecv2.imshow("Image", image)cv2.waitKey(0)# save output image to diskcv2.imwrite(filename + "_yolo8." + ext, image)

live_yolov8_opencv.py

import cv2import numpy as npimport timeimport sysfrom ultralytics import YOLOCONFIDENCE = 0.5font_scale = 1thickness = 1labels = open("data/coco.names").read().strip().split("\n")colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")model = YOLO("yolov8n.pt")cap = cv2.VideoCapture(0)_, image = cap.read()h, w = image.shape[:2]fourcc = cv2.VideoWriter_fourcc(*"XVID")out = cv2.VideoWriter("output.avi", fourcc, 20.0, (w, h))while True:    _, image = cap.read()        start = time.perf_counter()    # run inference on the image     # see: https://docs.ultralytics.com/modes/predict/#arguments for full list of arguments    results = model.predict(image, conf=CONFIDENCE)[0]    time_took = time.perf_counter() - start    print("Time took:", time_took)    # loop over the detections    for data in results.boxes.data.tolist():        # get the bounding box coordinates, confidence, and class id         xmin, ymin, xmax, ymax, confidence, class_id = data        # converting the coordinates and the class id to integers        xmin = int(xmin)        ymin = int(ymin)        xmax = int(xmax)        ymax = int(ymax)        class_id = int(class_id)        # draw a bounding box rectangle and label on the image        color = [int(c) for c in colors[class_id]]        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color=color, thickness=thickness)        text = f"{labels[class_id]}: {confidence:.2f}"        # calculate text width & height to draw the transparent boxes as background of the text        (text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]        text_offset_x = xmin        text_offset_y = ymin - 5        box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))        overlay = image.copy()        cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)        # add opacity (transparency to the box)        image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)        # now put the text (label: confidence %)        cv2.putText(image, text, (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX,            fontScale=font_scale, color=(0, 0, 0), thickness=thickness)    # end time to compute the fps    end = time.perf_counter()    # calculate the frame per second and draw it on the frame    fps = f"FPS: {1 / (end - start):.2f}"    cv2.putText(image, fps, (50, 50),                cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 6)    out.write(image)    cv2.imshow("image", image)        if ord("q") == cv2.waitKey(1):        breakcap.release()cv2.destroyAllWindows()

read_video_yolov8.py

import cv2import numpy as npimport timeimport sysfrom ultralytics import YOLO# define some parametersCONFIDENCE = 0.5font_scale = 1thickness = 1labels = open("data/coco.names").read().strip().split("\n")colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")# loading the YOLOv8 model with the default weight filemodel = YOLO("yolov8n.pt")# read the file from the command linevideo_file = sys.argv[1]cap = cv2.VideoCapture(video_file)_, image = cap.read()h, w = image.shape[:2]fourcc = cv2.VideoWriter_fourcc(*"XVID")out = cv2.VideoWriter("output.avi", fourcc, 20.0, (w, h))while True:    _, image = cap.read()        start = time.perf_counter()    results = model.predict(image, conf=CONFIDENCE)[0]    time_took = time.perf_counter() - start    print("Time took:", time_took)    # loop over the detections    for data in results.boxes.data.tolist():        # get the bounding box coordinates, confidence, and class id         xmin, ymin, xmax, ymax, confidence, class_id = data        # converting the coordinates and the class id to integers        xmin = int(xmin)        ymin = int(ymin)        xmax = int(xmax)        ymax = int(ymax)        class_id = int(class_id)        # draw a bounding box rectangle and label on the image        color = [int(c) for c in colors[class_id]]        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color=color, thickness=thickness)        text = f"{labels[class_id]}: {confidence:.2f}"        # calculate text width & height to draw the transparent boxes as background of the text        (text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]        text_offset_x = xmin        text_offset_y = ymin - 5        box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))        try:            overlay = image.copy()        except:            break        cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)        # add opacity (transparency to the box)        image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)        # now put the text (label: confidence %)        cv2.putText(image, text, (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX,            fontScale=font_scale, color=(0, 0, 0), thickness=thickness)    # end time to compute the fps    end = time.perf_counter()    # calculate the frame per second and draw it on the frame    fps = f"FPS: {1 / (end - start):.2f}"    cv2.putText(image, fps, (50, 50),                cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 6)    out.write(image)    cv2.imshow("image", image)        if ord("q") == cv2.waitKey(1):        breakcap.release()cv2.destroyAllWindows()

yolo_opencv.py

import cv2import numpy as npimport timeimport sysimport osCONFIDENCE = 0.5SCORE_THRESHOLD = 0.5IOU_THRESHOLD = 0.5# the neural network configurationconfig_path = "cfg/yolov3.cfg"# the YOLO net weights fileweights_path = "weights/yolov3.weights"# loading all the class labels (objects)labels = open("data/coco.names").read().strip().split("\n")# generating colors for each object for later plottingcolors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")# load the YOLO networknet = cv2.dnn.readNetFromDarknet(config_path, weights_path)# path_name = "images/city_scene.jpg"path_name = sys.argv[1]image = cv2.imread(path_name)file_name = os.path.basename(path_name)filename, ext = file_name.split(".")h, w = image.shape[:2]# create 4D blobblob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)# sets the blob as the input of the networknet.setInput(blob)# get all the layer namesln = net.getLayerNames()try:    ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]except IndexError:    # in case getUnconnectedOutLayers() returns 1D array when CUDA isn't available    ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]# feed forward (inference) and get the network output# measure how much it took in secondsstart = time.perf_counter()layer_outputs = net.forward(ln)time_took = time.perf_counter() - startprint(f"Time took: {time_took:.2f}s")boxes, confidences, class_ids = [], [], []# loop over each of the layer outputsfor output in layer_outputs:    # loop over each of the object detections    for detection in output:        # extract the class id (label) and confidence (as a probability) of        # the current object detection        scores = detection[5:]        class_id = np.argmax(scores)        confidence = scores[class_id]        # discard weak predictions by ensuring the detected        # probability is greater than the minimum probability        if confidence > CONFIDENCE:            # scale the bounding box coordinates back relative to the            # size of the image, keeping in mind that YOLO actually            # returns the center (x, y)-coordinates of the bounding            # box followed by the boxes' width and height            box = detection[:4] * np.array([w, h, w, h])            (centerX, centerY, width, height) = box.astype("int")            # use the center (x, y)-coordinates to derive the top and            # and left corner of the bounding box            x = int(centerX - (width / 2))            y = int(centerY - (height / 2))            # update our list of bounding box coordinates, confidences,            # and class IDs            boxes.append([x, y, int(width), int(height)])            confidences.append(float(confidence))            class_ids.append(class_id)# perform the non maximum suppression given the scores defined beforeidxs = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_THRESHOLD, IOU_THRESHOLD)font_scale = 1thickness = 1# ensure at least one detection existsif len(idxs) > 0:    # loop over the indexes we are keeping    for i in idxs.flatten():        # extract the bounding box coordinates        x, y = boxes[i][0], boxes[i][1]        w, h = boxes[i][2], boxes[i][3]        # draw a bounding box rectangle and label on the image        color = [int(c) for c in colors[class_ids[i]]]        cv2.rectangle(image, (x, y), (x + w, y + h), color=color, thickness=thickness)        text = f"{labels[class_ids[i]]}: {confidences[i]:.2f}"        # calculate text width & height to draw the transparent boxes as background of the text        (text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]        text_offset_x = x        text_offset_y = y - 5        box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))        overlay = image.copy()        cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)        # add opacity (transparency to the box)        image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)        # now put the text (label: confidence %)        cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,            fontScale=font_scale, color=(0, 0, 0), thickness=thickness)        # cv2.imshow("image", image)# if cv2.waitKey(0) == ord("q"):#     passcv2.imwrite(filename + "_yolo3." + ext, image)

live_yolo_opencv.py

import cv2import numpy as npimport timeCONFIDENCE = 0.5SCORE_THRESHOLD = 0.5IOU_THRESHOLD = 0.5config_path = "cfg/yolov3.cfg"weights_path = "weights/yolov3.weights"font_scale = 1thickness = 1LABELS = open("data/coco.names").read().strip().split("\n")COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")net = cv2.dnn.readNetFromDarknet(config_path, weights_path)ln = net.getLayerNames()try:    ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]except IndexError:    # in case getUnconnectedOutLayers() returns 1D array when CUDA isn't available    ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]cap = cv2.VideoCapture(0)while True:    _, image = cap.read()    h, w = image.shape[:2]    blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)    net.setInput(blob)    start = time.perf_counter()    layer_outputs = net.forward(ln)    time_took = time.perf_counter() - start    print("Time took:", time_took)    boxes, confidences, class_ids = [], [], []    # loop over each of the layer outputs    for output in layer_outputs:        # loop over each of the object detections        for detection in output:            # extract the class id (label) and confidence (as a probability) of            # the current object detection            scores = detection[5:]            class_id = np.argmax(scores)            confidence = scores[class_id]            # discard weak predictions by ensuring the detected            # probability is greater than the minimum probability            if confidence > CONFIDENCE:                # scale the bounding box coordinates back relative to the                # size of the image, keeping in mind that YOLO actually                # returns the center (x, y)-coordinates of the bounding                # box followed by the boxes' width and height                box = detection[:4] * np.array([w, h, w, h])                (centerX, centerY, width, height) = box.astype("int")                # use the center (x, y)-coordinates to derive the top and                # and left corner of the bounding box                x = int(centerX - (width / 2))                y = int(centerY - (height / 2))                # update our list of bounding box coordinates, confidences,                # and class IDs                boxes.append([x, y, int(width), int(height)])                confidences.append(float(confidence))                class_ids.append(class_id)    # perform the non maximum suppression given the scores defined before    idxs = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_THRESHOLD, IOU_THRESHOLD)    font_scale = 1    thickness = 1    # ensure at least one detection exists    if len(idxs) > 0:        # loop over the indexes we are keeping        for i in idxs.flatten():            # extract the bounding box coordinates            x, y = boxes[i][0], boxes[i][1]            w, h = boxes[i][2], boxes[i][3]            # draw a bounding box rectangle and label on the image            color = [int(c) for c in colors[class_ids[i]]]            cv2.rectangle(image, (x, y), (x + w, y + h), color=color, thickness=thickness)            text = f"{labels[class_ids[i]]}: {confidences[i]:.2f}"            # calculate text width & height to draw the transparent boxes as background of the text            (text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]            text_offset_x = x            text_offset_y = y - 5            box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))            overlay = image.copy()            cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)            # add opacity (transparency to the box)            image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)            # now put the text (label: confidence %)            cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,                fontScale=font_scale, color=(0, 0, 0), thickness=thickness)    cv2.imshow("image", image)    if ord("q") == cv2.waitKey(1):        breakcap.release()cv2.destroyAllWindows()

read_video.py

import cv2import numpy as npimport timeimport sysCONFIDENCE = 0.5SCORE_THRESHOLD = 0.5IOU_THRESHOLD = 0.5config_path = "cfg/yolov3.cfg"weights_path = "weights/yolov3.weights"font_scale = 1thickness = 1labels = open("data/coco.names").read().strip().split("\n")colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")net = cv2.dnn.readNetFromDarknet(config_path, weights_path)ln = net.getLayerNames()try:    ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]except IndexError:    # in case getUnconnectedOutLayers() returns 1D array when CUDA isn't available    ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]# read the file from the command linevideo_file = sys.argv[1]cap = cv2.VideoCapture(video_file)_, image = cap.read()h, w = image.shape[:2]fourcc = cv2.VideoWriter_fourcc(*"XVID")out = cv2.VideoWriter("output.avi", fourcc, 20.0, (w, h))while True:    _, image = cap.read()    h, w = image.shape[:2]    blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)    net.setInput(blob)    start = time.perf_counter()    layer_outputs = net.forward(ln)    time_took = time.perf_counter() - start    print("Time took:", time_took)    boxes, confidences, class_ids = [], [], []    # loop over each of the layer outputs    for output in layer_outputs:        # loop over each of the object detections        for detection in output:            # extract the class id (label) and confidence (as a probability) of            # the current object detection            scores = detection[5:]            class_id = np.argmax(scores)            confidence = scores[class_id]            # discard weak predictions by ensuring the detected            # probability is greater than the minimum probability            if confidence > CONFIDENCE:                # scale the bounding box coordinates back relative to the                # size of the image, keeping in mind that YOLO actually                # returns the center (x, y)-coordinates of the bounding                # box followed by the boxes' width and height                box = detection[:4] * np.array([w, h, w, h])                (centerX, centerY, width, height) = box.astype("int")                # use the center (x, y)-coordinates to derive the top and                # and left corner of the bounding box                x = int(centerX - (width / 2))                y = int(centerY - (height / 2))                # update our list of bounding box coordinates, confidences,                # and class IDs                boxes.append([x, y, int(width), int(height)])                confidences.append(float(confidence))                class_ids.append(class_id)    # perform the non maximum suppression given the scores defined before    idxs = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_THRESHOLD, IOU_THRESHOLD)    font_scale = 1    thickness = 1    # ensure at least one detection exists    if len(idxs) > 0:        # loop over the indexes we are keeping        for i in idxs.flatten():            # extract the bounding box coordinates            x, y = boxes[i][0], boxes[i][1]            w, h = boxes[i][2], boxes[i][3]            # draw a bounding box rectangle and label on the image            color = [int(c) for c in colors[class_ids[i]]]            cv2.rectangle(image, (x, y), (x + w, y + h), color=color, thickness=thickness)            text = f"{labels[class_ids[i]]}: {confidences[i]:.2f}"            # calculate text width & height to draw the transparent boxes as background of the text            (text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]            text_offset_x = x            text_offset_y = y - 5            box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))            overlay = image.copy()            cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)            # add opacity (transparency to the box)            image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)            # now put the text (label: confidence %)            cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,                fontScale=font_scale, color=(0, 0, 0), thickness=thickness)    out.write(image)    cv2.imshow("image", image)        if ord("q") == cv2.waitKey(1):        breakcap.release()cv2.destroyAllWindows()

yolo.py (PyTorch) requiresdarknet.py andutils.py.

import cv2import matplotlib.pyplot as pltfrom utils import *from darknet import Darknet# Set the NMS Thresholdscore_threshold = 0.6# Set the IoU thresholdiou_threshold = 0.4cfg_file = "cfg/yolov3.cfg"weight_file = "weights/yolov3.weights"namesfile = "data/coco.names"m = Darknet(cfg_file)m.load_weights(weight_file)class_names = load_class_names(namesfile)# m.print_network()original_image = cv2.imread("images/city_scene.jpg")original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)img = cv2.resize(original_image, (m.width, m.height))# detect the objectsboxes = detect_objects(m, img, iou_threshold, score_threshold)# plot the image with the bounding boxes and corresponding object class labelsplot_boxes(original_image, boxes, class_names, plot_labels=True)

Ethical Hacking with Python EBook - Topic - Top

New Tutorials

Building a Full-Stack RAG Chatbot with FastAPI, OpenAI, and Streamlit

How to Recover Deleted Files with Python

How to Use Python to Track Google Search Results and Reviews Over Time

YouTube Video Transcription Summarization with Python

Getting Started with Python for SaaS Applications

Movatterモバイル変換

Code forHow to Perform YOLO Object Detection using OpenCV in Python Tutorial

Tags

New Tutorials

Popular Tutorials