增加了 imagenet 识别模块，且有部分语音识别功能

e14f78e4 · Eric.Lee2021 · a4cd2d1e · e14f78e4 · e14f78e4 · e14f78e4
10 changed file
--- a/README.md
+++ b/README.md
@@ -43,12 +43,17 @@
 ```
 detect_model_path=./latest_416.pt #手部检测模型地址
 detect_model_arch=yolo_v3 #检测模型类型 ，yolo  or yolo-tiny
+yolo_anchor_scale=1.0 # yolo anchor 比例，默认为 1
 detect_conf_thres=0.5 # 检测模型阈值
 detect_nms_thres=0.45 # 检测模型 nms 阈值

 handpose_x_model_path=./ReXNetV1-size-256-wingloss102-0.1063.pth # 21点手回归模型地址
 handpose_x_model_arch=rexnetv1 # 回归模型结构

+classify_model_path=./imagenet_size-256_20210409.pth # 分类识别模型地址
+classify_model_arch=resnet_50 # 分类识别模型结构
+classify_model_classify_num=1000 # 分类类别数
+
 camera_id = 0 # 相机 ID ，一般默认为0，如果不是请自行确认
 vis_gesture_lines = True # True: 点击时的轨迹可视化， False：点击时的轨迹不可视化
 charge_cycle_step = 32 # 点击稳定状态计数器，点击稳定充电环。

--- a/applications/handpose_local_app.py
+++ b/applications/handpose_local_app.py
@@ -30,6 +30,7 @@ import time
 # 加载模型组件库
 from hand_detect.yolo_v3_hand import yolo_v3_hand_model
 from hand_keypoints.handpose_x import handpose_x_model
+from classify_imagenet.imagenet_c import classify_imagenet_model

 # 加载工具库
 import sys
@@ -190,13 +191,20 @@ def audio_process_recognize_up_edge(info_dict):
                        if (info_dict[g_]^gesture_dict[g_]) and info_dict[g_]==True:# 判断Click手势信号为上升沿，Click动作开始
                            playsound("./materials/audio/sentences/IdentifyingObjectsWait.mp3")
                            playsound("./materials/audio/sentences/ObjectMayBeIdentified.mp3")
+                            if info_dict["reco_msg"] is not None:
+                                print("process - (audio_process_recognize_up_edge) reco_msg : {} ".format(info_dict["reco_msg"]))
+                                doc_name = info_dict["reco_msg"]["label_msg"]["doc_name"]
+                                reco_audio_file = "./materials/audio/imagenet_2012/{}.mp3".format(doc_name)
+                                if os.access(reco_audio_file,os.F_OK):# 判断语音文件是否存在
+                                    playsound(reco_audio_file)
+
+                                info_dict["reco_msg"] = None

                    gesture_dict[g_] = info_dict[g_]

        except Exception as inst:
            print(type(inst),inst)    # exception instance

-
        if info_dict["break"] == True:
            break
 '''
@@ -209,13 +217,15 @@ def handpose_x_process(info_dict,config):
    print("load model component  ...")
    # yolo v3 手部检测模型初始化
    hand_detect_model = yolo_v3_hand_model(conf_thres=float(config["detect_conf_thres"]),nms_thres=float(config["detect_nms_thres"]),
-        model_arch = config["detect_model_arch"],model_path = config["detect_model_path"])
+        model_arch = config["detect_model_arch"],model_path = config["detect_model_path"],yolo_anchor_scale = float(config["yolo_anchor_scale"]),
+        img_size = float(config["detect_input_size"]),
+        )
    # handpose_x 21 关键点回归模型初始化
    handpose_model = handpose_x_model(model_arch = config["handpose_x_model_arch"],model_path = config["handpose_x_model_path"])
    #
    gesture_model = None # 目前缺省
    #
-    object_recognize_model = None # 识别分类模型，目前缺省
+    object_recognize_model = classify_imagenet_model(model_arch = config["classify_model_arch"],model_path = config["classify_model_path"]) # 识别分类模型

    #
    img_reco_crop = None
@@ -309,8 +319,8 @@ def handpose_x_process(info_dict,config):
            # 判断各手的click状态是否稳定，且满足设定阈值
            flag_click_stable = judge_click_stabel(img,handpose_list,int(config["charge_cycle_step"]))
            # 判断是否启动识别语音,且进行选中目标识别
-            img_reco_crop = audio_recognize(img,algo_img,img_reco_crop,object_recognize_model,info_dict,double_en_pts,flag_click_stable)
-
+            img_reco_crop,reco_msg = audio_recognize(img,algo_img,img_reco_crop,object_recognize_model,info_dict,double_en_pts,flag_click_stable)
+            # print(reco_msg)
            cv2.putText(img, 'HandNum:[{}]'.format(len(hand_bbox)), (5,25),cv2.FONT_HERSHEY_COMPLEX, 0.7, (255, 0, 0),5)
            cv2.putText(img, 'HandNum:[{}]'.format(len(hand_bbox)), (5,25),cv2.FONT_HERSHEY_COMPLEX, 0.7, (0, 0, 255))

@@ -342,6 +352,8 @@ def main_handpose_x(cfg_file):
    g_info_dict["click_up_cnt"] = 0
    g_info_dict["click_dw_cnt"] = 0

+    g_info_dict["reco_msg"] = None
+
    print(" multiprocessing dict key:\n")
    for key_ in g_info_dict.keys():
        print( " -> ",key_)

--- a/components/classify_imagenet/imagenet_c.py
+++ b/components/classify_imagenet/imagenet_c.py
+#-*-coding:utf-8-*-
+# date:2020-04-11
+# author: Eric.Lee
+# function : classify
+
+import os
+import torch
+import cv2
+import numpy as np
+import json
+
+import torch
+import torch.nn as nn
+
+import numpy as np
+
+import time
+import datetime
+import os
+import math
+from datetime import datetime
+import cv2
+import torch.nn.functional as F
+from classify_imagenet.models.resnet import resnet18, resnet34, resnet50, resnet101, resnet152
+#
+class classify_imagenet_model(object):
+    def __init__(self,
+        model_path = './components/classify_imagenet/weights/imagenet_size-256_20210409.pth',
+        model_arch = "resnet_50",
+        img_size= 256,
+        num_classes = 1000,
+        ):
+
+        f = open("./components/classify_imagenet/imagenet_msg.json",  encoding='utf-8')#读取 json文件
+        dict_ = json.load(f)
+        f.close()
+        self.classify_dict = dict_
+        # print("-------------->>\n dict_ : \n",dict_)
+#
+        print("classify model loading : ",model_path)
+        # print('use model : %s'%(model_arch))
+
+        if model_arch == 'resnet_18':
+            model_=resnet18(num_classes=num_classes, img_size=img_size)
+        elif model_arch == 'resnet_34':
+            model_=resnet34(num_classes=num_classes, img_size=img_size)
+        elif model_arch == 'resnet_50':
+            model_=resnet50(num_classes=num_classes, img_size=img_size)
+        elif model_arch == 'resnet_101':
+            model_=resnet101(num_classes=num_classes, img_size=img_size)
+        elif model_arch == 'resnet_152':
+            model_=resnet152(num_classes=num_classes, img_size=img_size)
+        else:
+            print('error no the struct model : {}'.format(model_arch))
+
+        use_cuda = torch.cuda.is_available()
+
+        device = torch.device("cuda:0" if use_cuda else "cpu")
+        model_ = model_.to(device)
+        model_.eval() # 设置为前向推断模式
+
+        # print(model_)# 打印模型结构
+
+        # 加载测试模型
+        if os.access(model_path,os.F_OK):# checkpoint
+            chkpt = torch.load(model_path, map_location=device)
+            model_.load_state_dict(chkpt)
+            # print('load classify model : {}'.format(model_path))
+        self.model_ = model_
+        self.use_cuda = use_cuda
+        self.img_size = img_size
+
+    def predict(self, img, vis = False):# img is align img
+        with torch.no_grad():
+
+            img_ = cv2.resize(img, (self.img_size,self.img_size), interpolation = cv2.INTER_CUBIC)
+
+            img_ = img_.astype(np.float32)
+            img_ = (img_-128.)/256.
+
+            img_ = img_.transpose(2, 0, 1)
+            img_ = torch.from_numpy(img_)
+            img_ = img_.unsqueeze_(0)
+
+            if self.use_cuda:
+                img_ = img_.cuda()  # (bs, 3, h, w)
+
+            pre_ = self.model_(img_.float())
+
+            outputs = F.softmax(pre_,dim = 1)
+            outputs = outputs[0]
+
+            output = outputs.cpu().detach().numpy()
+            output = np.array(output)
+
+            max_index = np.argmax(output)
+
+            score_ = output[max_index]
+            # print("max_index:",max_index)
+            # print("name:",self.label_dict[max_index])
+            return max_index,self.classify_dict[str(max_index)],score_
--- a/components/classify_imagenet/imagenet_msg.json
+++ b/components/classify_imagenet/imagenet_msg.json
--- a/components/classify_imagenet/models/resnet.py
+++ b/components/classify_imagenet/models/resnet.py
+import torch
+import torch.nn as nn
+import math
+import torch.utils.model_zoo as model_zoo
+
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152']
+
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+
+    def __init__(self, block, layers, num_classes=1000, img_size=224,dropout_factor = 1.):
+        self.inplanes = 64
+        self.dropout_factor = dropout_factor
+        super(ResNet, self).__init__()
+    
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        # see this issue: https://github.com/xxradon/PytorchToCaffe/issues/16
+        # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+
+        assert img_size % 32 == 0
+        pool_kernel = int(img_size / 32)
+        self.avgpool = nn.AvgPool2d(pool_kernel, stride=1, ceil_mode=True)
+
+        self.dropout = nn.Dropout(self.dropout_factor)
+
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+
+        x = self.dropout(x)
+
+        x = self.fc(x)
+
+        return x
+
+
+def load_model(model, pretrained_state_dict):
+    model_dict = model.state_dict()
+    pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if
+                       k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()}
+    model.load_state_dict(pretrained_dict, strict=False)
+    if len(pretrained_dict) == 0:
+        print("[INFO] No params were loaded ...")
+    else:
+        for k, v in pretrained_state_dict.items():
+            if k in pretrained_dict:
+                print("==>> Load {} {}".format(k, v.size()))
+            else:
+                print("[INFO] Skip {} {}".format(k, v.size()))
+    return model
+
+
+def resnet18(pretrained=False, **kwargs):
+    """Constructs a ResNet-18 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+    if pretrained:
+        # model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
+        print("Load pretrained model from {}".format(model_urls['resnet18']))
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
+        model = load_model(model, pretrained_state_dict)
+    return model
+
+
+def resnet34(pretrained=False, **kwargs):
+    """Constructs a ResNet-34 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        # model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
+        print("Load pretrained model from {}".format(model_urls['resnet34']))
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet34'])
+        model = load_model(model, pretrained_state_dict)
+    return model
+
+
+def resnet50(pretrained=False, **kwargs):
+    """Constructs a ResNet-50 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        # model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
+        print("Load pretrained model from {}".format(model_urls['resnet50']))
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet50'])
+        model = load_model(model, pretrained_state_dict)
+    return model
+
+
+def resnet101(pretrained=False, **kwargs):
+    """Constructs a ResNet-101 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
+    if pretrained:
+        # model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
+        print("Load pretrained model from {}".format(model_urls['resnet101']))
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet101'])
+        model = load_model(model, pretrained_state_dict)
+    return model
+
+
+def resnet152(pretrained=False, **kwargs):
+    """Constructs a ResNet-152 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+    if pretrained:
+        # model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
+        print("Load pretrained model from {}".format(model_urls['resnet152']))
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet152'])
+        model = load_model(model, pretrained_state_dict)
+    return model
+
+if __name__ == "__main__":
+    input = torch.randn([32, 3, 256,256])
+    model = resnet34(False, num_classes=2, img_size=256)
+    output = model(input)
+    print(output.size())
--- a/components/classify_imagenet/utils/common_utils.py
+++ b/components/classify_imagenet/utils/common_utils.py
+#-*-coding:utf-8-*-
+# date:2020-04-11
+# author: Eric.Lee
+# function: utils
+
+import os
+import shutil
+import cv2
+import numpy as np
+import json
+
+def mkdir_(path, flag_rm=False):
+    if os.path.exists(path):
+        if flag_rm == True:
+            shutil.rmtree(path)
+            os.mkdir(path)
+            print('remove {} done ~ '.format(path))
+    else:
+        os.mkdir(path)
+
+def plot_box(bbox, img, color=None, label=None, line_thickness=None):
+    tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1
+    color = color or [random.randint(0, 255) for _ in range(3)]
+    c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3]))
+    cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox
+    if label:
+        tf = max(tl - 2, 1)
+        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size
+        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox
+        cv2.rectangle(img, c1, c2, color, -1)  # label 矩形填充
+        # 文本绘制
+        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA)
+
+class JSON_Encoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, np.integer):
+            return int(obj)
+        elif isinstance(obj, np.floating):
+            return float(obj)
+        elif isinstance(obj, np.ndarray):
+            return obj.tolist()
+        else:
+            return super(JSON_Encoder, self).default(obj)
--- a/components/classify_imagenet/utils/model_utils.py
+++ b/components/classify_imagenet/utils/model_utils.py
+#-*-coding:utf-8-*-
+# date:2020-04-11
+# author: Eric.Lee
+# function : utils
+
+import os
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+import random
+
+def get_acc(output, label):
+    total = output.shape[0]
+    _, pred_label = output.max(1)
+    num_correct = (pred_label == label).sum().item()
+    return num_correct / float(total)
+
+def set_learning_rate(optimizer, lr):
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+
+def set_seed(seed = 666):
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+        cudnn.deterministic = True
+
+def split_trainval_datasets(ops):
+    print(' --------------->>> split_trainval_datasets ')
+    train_split_datasets = []
+    train_split_datasets_label = []
+
+    val_split_datasets = []
+    val_split_datasets_label = []
+    for idx,doc in enumerate(sorted(os.listdir(ops.train_path), key=lambda x:int(x.split('-')[0]), reverse=False)):
+        # print(' %s label is %s \n'%(doc,idx))
+
+        data_list = os.listdir(ops.train_path+doc)
+        random.shuffle(data_list)
+
+        cal_split_num = int(len(data_list)*ops.val_factor)
+
+        for i,file in enumerate(data_list):
+            if '.jpg' in file:
+                if i < cal_split_num:
+                    val_split_datasets.append(ops.train_path+doc + '/' + file)
+                    val_split_datasets_label.append(idx)
+                else:
+                    train_split_datasets.append(ops.train_path+doc + '/' + file)
+                    train_split_datasets_label.append(idx)
+
+                print(ops.train_path+doc + '/' + file,idx)
+
+    print('\n')
+    print('train_split_datasets len {}'.format(len(train_split_datasets)))
+    print('val_split_datasets len {}'.format(len(val_split_datasets)))
+
+    return train_split_datasets,train_split_datasets_label,val_split_datasets,val_split_datasets_label
--- a/components/hand_detect/yolo_v3_hand.py
+++ b/components/hand_detect/yolo_v3_hand.py
@@ -236,6 +236,7 @@ class yolo_v3_hand_model(object):
    def __init__(self,
        model_path = './components/hand_detect/weights/latest_416-2021-02-19.pt',
        model_arch = 'yolov3',
+        yolo_anchor_scale = 1.,
        img_size=416,
        conf_thres=0.16,
        nms_thres=0.4,):
@@ -250,7 +251,7 @@ class yolo_v3_hand_model(object):
        #-----------------------------------------------------------------------
        weights = model_path
        if "tiny" in model_arch:
-            a_scalse = 416./img_size
+            a_scalse = 416./img_size*yolo_anchor_scale
            anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)]
            anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ]


--- a/lib/hand_lib/cfg/handpose.cfg
+++ b/lib/hand_lib/cfg/handpose.cfg
-detect_model_path=./components/hand_detect/weights/latest_416-2021-02-19.pt
-detect_model_arch=yolo_v3
-detect_conf_thres=0.5
+detect_model_path=./hand_416-2021-01-29.pt
+detect_model_arch=yolo
+detect_input_size = 416
+yolo_anchor_scale=1.
+detect_conf_thres=0.4
 detect_nms_thres=0.45

-handpose_x_model_path=./components/hand_keypoints/weights/ReXNetV1-size-256-wingloss102-0.1063.pth
+handpose_x_model_path=./ReXNetV1-size-256-wingloss102-0.1041.pth
 handpose_x_model_arch=rexnetv1

+classify_model_path=./imagenet_size-256_20210409.pth
+classify_model_arch=resnet_50
+
 camera_id = 0
 vis_gesture_lines = True
-charge_cycle_step = 32
+charge_cycle_step = 18
--- a/lib/hand_lib/cores/handpose_fuction.py
+++ b/lib/hand_lib/cores/handpose_fuction.py
@@ -241,6 +241,7 @@ def handpose_track_keypoints21_pipeline(img,hands_dict,hands_click_dict,track_in
 '''
 def audio_recognize(img,algo_img,img_reco_crop,object_recognize_model,info_dict,double_en_pts,flag_click_stable):
    # 开启识别
+    reco_msg = None
    if (len(double_en_pts) == 2) and (flag_click_stable == True):

        x1,y1 = int(double_en_pts[0][0]),int(double_en_pts[0][1])
@@ -258,12 +259,16 @@ def audio_recognize(img,algo_img,img_reco_crop,object_recognize_model,info_dict,
            if ((x2_-x1_)>0) and ((y2_-y1_)>0):
                img_reco_crop = cv2.resize(algo_img[y1_:y2_,x1_:x2_,:], (130,130)) #待识别区域块
                print("------------------------>>> start object_recognize_model ")
-
+                max_index,label_msg,score_ = object_recognize_model.predict(img_reco_crop)
+                reco_msg = {"index":max_index,"label_msg":label_msg,"score":score_}
+                # print(" audio_recognize function ->> reco_msg : ",reco_msg)
+                info_dict["reco_msg"] = reco_msg
        if img_reco_crop is not None: # 绘制识别区域在左下角
            h,w,_ = img.shape
            img[(h-131):(h-1),(w-131):(w-1),:] = img_reco_crop
            cv2.rectangle(img, (w-131,h-131), (w-1,h-1), (225,66,66), 5)
        #-----------------------------------------
+
        info_dict["double_en_pts"] = True

        cv2.rectangle(img, (x1_,y1_), (x2_,y2_), (225,255,62), 5)
@@ -272,8 +277,9 @@ def audio_recognize(img,algo_img,img_reco_crop,object_recognize_model,info_dict,
        cv2.putText(img, ' recognize{}'.format(""), (x1_,y1_),cv2.FONT_HERSHEY_COMPLEX, 0.65, (0,33,255),1)

    else:
+
        info_dict["double_en_pts"] = False
-    return img_reco_crop
+    return img_reco_crop,reco_msg

 '''
    判断各手的click状态是否稳定（点击稳定充电环），即click是否持续一定阈值