首页 > 解决方案 > 构建面罩应用程序时出现 OpenCV 索引错误

问题描述

我正在尝试制作:一个 OpenCV 程序,它可以录制视频并通过使用面具的图像来掩盖我的脸。

我的代码:

import numpy as np
import cv2
import os
import time
import face_recognition as fr
import pkg_resources

filename = "THIS_IS_A_TEST.mp4"
frames_per_seconds = 24.0
my_res = "720p"

face_cascade = cv2.CascadeClassifier('C:\\Users\\jack\\Desktop\\haarcascade_frontalface_default.xml')
#eyes_cascade = cv2.CascadeClassifier('C:\\Users\\jack\\Desktop\\frontalEyes35x16.xml')

mask = cv2.imread("C:\\Users\\jack\\Desktop\\Blogger_Video_Recorder\\TheMask.png", -1)

   
def change_res(cap, width, height):
    cap.set(3, width)
    cap.set(4, height)

STD_DIMENSIONS = {
    "480p": (640,480),
    "720p": (1280, 720),
    "1080p": (1920, 1080),
    "4k": (3840, 2160),
}
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    dim = None
    (h, w) = image.shape[:2]
    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image
    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        r = height / float(h)
        dim = (int(w * r), height)
    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        r = width / float(w)
        dim = (width, int(h * r))

    # resize the image
    resized = cv2.resize(image, dim, interpolation = inter)
    # return the resized image
    return resized
def get_dims(cap, res="1080p"):
    width, height = STD_DIMENSIONS["480p"]
    if res in STD_DIMENSIONS:
        width, height = STD_DIMENSIONS[res]
    change_res(cap, width, height)
    return width, height

VIDEO_TYPE = {
    "mp4": cv2.VideoWriter_fourcc(*"XVID")
}

def get_video_type(filename):
    filename, ext = os.path.splitext(filename)
    if ext in VIDEO_TYPE:
        return VIDEO_TYPE[ext]
    return VIDEO_TYPE["mp4"]

cap = cv2.VideoCapture(0)
dims = get_dims(cap, res = my_res)
video_type_cv2 = get_video_type(filename)

out = cv2.VideoWriter(filename, video_type_cv2, frames_per_seconds, dims)

while True: 
    ret, frame = cap.read()
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.5, minNeighbors=5)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
    for (x, y, w, h) in faces:
        roi_gray = gray[y:y+h, x:x+h]
        roi_color = frame[y:y+h, x:x+h]
        roi_faces = roi_gray[y:y+h, x:x+w]
        mask2 = image_resize(mask.copy(), width=w)

        mw, mh, mc = mask2.shape
        for i in range(0,mw):
            for j in range(0, mh):
                if mask2[i, j][3] != 0:
                    roi_color[y + i, x + j] = mask2[i, j]
        
    frame = cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR)
    out.write(frame)
    cv2.imshow("frame", frame)
    if cv2.waitKey(20) & 0xFF == ord("q"):
        break

cap.release()
out.release()
cv2.destroyAllWindows()

回溯(最近一次通话最后):

File "C:\Users\jack\Desktop\Blogger_Video_Recorder\tutorial#1.py", line 93, in <module>
roi_color[y + i, x + j] = mask2[i, j]
IndexError: index 426 is out of bounds for axis 0 with size 273

顺便说一句,我也在 Python IDLE 上运行它,所以任何帮助表示赞赏:)

标签: pythonopencv

解决方案


您不必执行大多数现有阶段。您可以使用以下方法:

draw[y:y+h, x:x+w] = mask2

但实际上有几个错误,第一个是面具的高度小于人脸的高度,所以我们将无法使用找到的人脸周围的值但是,可以知道掩码高度并重置基本框架上的打印尺寸。下一行

mask2 = image_resize(mask.copy(), width=w)
mw, mh, mc = mask2.shape

draw[y:y+mw, x:x+w] = mask2

注意请不要使用'cv2.COLOR_BGR2BGRA'或转换线:

frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)

除非必要,因为它将图像的深度增加到 4 维,并且当您开始打印具有 3 维的蒙版时会出现错误,例如:

ValueError: could not broadcast input array from shape (273,410,3) into shape (273,410,4)

当然可以找到另一种解决方案,但它仍然不太可能,因为您根本没有BGRA在此代码中使用。

想象一下->完整代码:

import numpy as np
import cv2
import os
import time
import face_recognition as fr
import pkg_resources

filename = "THIS_IS_A_TEST.mp4"
frames_per_seconds = 24.0
my_res = "720p"

face_cascade = cv2.CascadeClassifier(os.path.join(cv2.data.haarcascades ,'haarcascade_frontalface_default.xml'))
#eyes_cascade = cv2.CascadeClassifier(os.path.join(cv2.data.haarcascades ,'frontalEyes35x16.xml')

mask = cv2.imread("test.jpg", -1)

   
def change_res(cap, width, height):
    cap.set(3, width)
    cap.set(4, height)

STD_DIMENSIONS = {
    "480p": (640,480),
    "720p": (1280, 720),
    "1080p": (1920, 1080),
    "4k": (3840, 2160),
}
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    dim = None
    (h, w) = image.shape[:2]
    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image
    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        r = height / float(h)
        dim = (int(w * r), height)
    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        r = width / float(w)
        dim = (width, int(h * r))

    # resize the image
    resized = cv2.resize(image, dim, interpolation = inter)
    # return the resized image
    return resized
def get_dims(cap, res="1080p"):
    width, height = STD_DIMENSIONS["480p"]
    if res in STD_DIMENSIONS:
        width, height = STD_DIMENSIONS[res]
    change_res(cap, width, height)
    return width, height

VIDEO_TYPE = {
    "mp4": cv2.VideoWriter_fourcc(*"XVID")
}

def get_video_type(filename):
    filename, ext = os.path.splitext(filename)
    if ext in VIDEO_TYPE:
        return VIDEO_TYPE[ext]
    return VIDEO_TYPE["mp4"]

cap = cv2.VideoCapture(0)
dims = get_dims(cap, res = my_res)
video_type_cv2 = get_video_type(filename)

out = cv2.VideoWriter(filename, video_type_cv2, frames_per_seconds, dims)

while True: 
    ret, frame = cap.read()
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.5, minNeighbors=5)
    draw  = frame.copy()

    for (x, y, w, h) in faces:
        roi_gray  = gray[y:y+h, x:x+w]
        roi_color = frame[y:y+h, x:x+w]
        roi_faces = roi_gray[y:y+h, x:x+w]
        mask2 = image_resize(mask.copy(), width=w)

        mw, mh, mc = mask2.shape

        draw[y:y+mw, x:x+w] = mask2
        
    out.write(draw)
    cv2.imshow("frame", draw)
    if cv2.waitKey(5) & 0xFF == ord("q"):
        break

cap.release()
out.release()
cv2.destroyAllWindows()

旁注:最好使用表达式cv2.data.haarcascades查找库中包含的 xml 文件的路径

可用于os.path.join(cv2.data.haarcascades ,'u_file.xml')为大多数操作系统查找有效路径。

这个例子

import os
import cv2

root_data = cv2.data.haarcascades
face_cascade = cv2.CascadeClassifier(os.path.join(root_data ,'haarcascade_frontalface_default.xml'))
eyes_cascade = cv2.CascadeClassifier(os.path.join(root_data ,'frontalEyes35x16.xml'))

完毕..


推荐阅读