YOLOx 调用USB摄像头识别
yolox默认调用摄像头0的命令如下:
python tools/demo.py webcam -n yolox-s -c ./yolox_s.pth --conf 0.3 --nms 0.65 --tsize 640
如果加上“--save_result”,执行之后,不会有图像显示,但会在“YOLOX-main\YOLOX_outputs\yolox_s\vis_res”目录下,生成一个“camera.mp4”视频文件。
demo.py的精简代码:
执行命令:
python tools/my_demo.py
import argparse
import os
import time
from loguru import logger
import cv2
import torch
from yolox.data.data_augment import ValTransform
from yolox.data.datasets import COCO_CLASSES
from yolox.exp import get_exp
from yolox.utils import postprocess, vis
class Predictor(object):
def __init__(
self,
model,
exp,
cls_names=COCO_CLASSES,
trt_file=None,
decoder=None,
device="cpu",
fp16=False,
legacy=False,
):
self.model = model
self.cls_names = cls_names
self.decoder = decoder
self.num_classes = exp.num_classes
self.confthre = exp.test_conf
self.nmsthre = exp.nmsthre
self.test_size = exp.test_size
self.device = device
self.fp16 = fp16
self.preproc = ValTransform(legacy=legacy)
if trt_file is not None:
from torch2trt import TRTModule
model_trt = TRTModule()
model_trt.load_state_dict(torch.load(trt_file))
x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()
self.model(x)
self.model = model_trt
def inference(self, img):
img_info = {"id": 0}
if isinstance(img, str):
img_info["file_name"] = os.path.basename(img)
img = cv2.imread(img)
else:
img_info["file_name"] = None
height, width = img.shape[:2]
img_info["height"] = height
img_info["width"] = width
img_info["raw_img"] = img
ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])
img_info["ratio"] = ratio
img, _ = self.preproc(img, None, self.test_size)
img = torch.from_numpy(img).unsqueeze(0)
img = img.float()
if self.device == "gpu":
img = img.cuda()
if self.fp16:
img = img.half() # to FP16
with torch.no_grad():
t0 = time.time()
outputs = self.model(img)
if self.decoder is not None:
outputs = self.decoder(outputs, dtype=outputs.type())
outputs = postprocess(
outputs, self.num_classes, self.confthre,
self.nmsthre, class_agnostic=True
)
# logger.info("Infer time: {:.4f}s".format(time.time() - t0))
return outputs, img_info
def visual(self, output, img_info, cls_conf=0.35):
ratio = img_info["ratio"]
img = img_info["raw_img"]
if output is None:
return img
output = output.cpu()
# 识别到的物体
bboxes = output[:, 0:4]
# preprocessing: resize
bboxes /= ratio
# 识别物体的名字下标
cls = output[:, 6]
# 每个物体的识别度
scores = output[:, 4] * output[:, 5]
logger.info(bboxes)
logger.info(cls)
logger.info(scores)
# cls_conf 是过滤最小阈值
vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names)
return vis_res
def imageflow_demo(predictor):
cap = cv2.VideoCapture(0)
while True:
ret_val, frame = cap.read()
if ret_val:
outputs, img_info = predictor.inference(frame)
result_frame = predictor.visual(outputs[0], img_info, 0.35)
cv2.imshow("yolox", result_frame)
ch = cv2.waitKey(1)
if ch == 27 or ch == ord("q") or ch == ord("Q"):
break
else:
break
if __name__ == "__main__":
exp = get_exp(None, "yolox-s")
exp.nmsthre = 0.65
exp.test_size = (640, 640)
model = exp.get_model()
# 使用GPU
# model.cuda()
# model.half() # to FP16
model.eval()
ckpt = torch.load("yolox_s.pth", map_location="cpu")
model.load_state_dict(ckpt["model"])
predictor = Predictor(model, exp, COCO_CLASSES, None, None, "cpu", False, False)
imageflow_demo(predictor)