Hello, I would like to ask, when I use demo_body script to reason about local images, the results are crazy, I use body-only Pose2Pose fine-tuned on AGORA.The result plot is shown below

Dong09 commented

this is my code

import sys
import os
import os.path as osp
import argparse
import numpy as np
import cv2
import torch
import torchvision.transforms as transforms
from torch.nn.parallel.data_parallel import DataParallel
import torch.backends.cudnn as cudnn
from pycocotools.coco import COCO

from config import cfg
from model import get_model
from utils.preprocessing import process_bbox, generate_patch_image
from utils.human_models import smpl, smpl_x, mano, flame
# from utils.vis import render_mesh, save_obj
import json

def load_img(path, order='RGB'):
    img = cv2.imread(path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
    if not isinstance(img, np.ndarray):
        raise IOError("Fail to read %s" % path)

    if order == 'RGB':
        img = img[:, :, ::-1].copy()

    img = img.astype(np.float32)
    return img

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu',default='0', type=str, dest='gpu_ids')
    parser.add_argument('--img_path', type=str, default=r'image_path')
    parser.add_argument('--start', type=str, default=200)
    parser.add_argument('--end', type=str, default=385)
    parser.add_argument('--output_folder', type=str, default='demo_output')
    args = parser.parse_args()

    # test gpus
    if not args.gpu_ids:
        assert 0, print("Please set proper gpu ids")

    if '-' in args.gpu_ids:
        gpus = args.gpu_ids.split('-')
        gpus[0] = int(gpus[0])
        gpus[1] = int(gpus[1]) + 1
        args.gpu_ids = ','.join(map(lambda x: str(x), list(range(*gpus))))
    return args

args = parse_args()
cfg.set_args(args.gpu_ids, 'body')
cudnn.benchmark = True

# snapshot load
model_path = r'../../output/model_dump/snapshot_12.pth.tar'
assert osp.exists(model_path), 'Cannot find model at ' + model_path
print('Load checkpoint from {}'.format(model_path))
model = get_model('test')
model = model.cuda()
# model = DataParallel(model).cuda()
ckpt = torch.load(model_path)
model.load_state_dict(ckpt['network'], strict=False)

start = int(args.start)
end = start + int(args.end)
for frame in range(start, end):
    img_path = args.img_path + f'\\WIN_20231026_13_35_47_Pro_{int(frame):08d}.png'
    transform = transforms.ToTensor()
    original_img = load_img(img_path)
    vis_img = original_img.copy()
    original_img_height, original_img_width = original_img.shape[:2]
    os.makedirs(args.output_folder, exist_ok=True)

    # prepare bbox
    bbox = [1080, 215, 1636-1080, 1079-215] # xmin, ymin, width, height
    bbox = process_bbox(bbox, original_img_width, original_img_height)
    img, img2bb_trans, bb2img_trans = generate_patch_image(original_img, bbox, 1.0, 0.0, False, cfg.input_img_shape) 
    img = transform(img.astype(np.float32))/255
    img = img.cuda()[None,:,:,:]
    # forward
    inputs = {'img': img}
    targets = {}
    meta_info = {}
    with torch.no_grad():
        out = model(inputs, targets, meta_info, 'test')
    mesh = out['smpl_mesh_cam'].detach().cpu().numpy()[0]

    # vis_img = original_img.copy()
    focal = [cfg.focal[0] / cfg.input_img_shape[1] * bbox[2], cfg.focal[1] / cfg.input_img_shape[0] * bbox[3]]
    princpt = [cfg.princpt[0] / cfg.input_img_shape[1] * bbox[2] + bbox[0], cfg.princpt[1] / cfg.input_img_shape[0] * bbox[3] + bbox[1]]
    rendered_img = render_mesh(vis_img, mesh, smpl.face, {'focal': focal, 'princpt': princpt})
    cv2.imwrite(f'render_body_WIN_20231026_13_35_47_Pro_{int(frame):08d}.jpg', rendered_img)

Your demo code works for the images in this folder?

You should get the same results as the result images in that folder.

I use the (1920,1080,3) image as input, and then I get the bbox on the (1920,1080) shape through the MediaPipe overall model, and the rest of the steps are exactly the same as in the demo.
here is my code:

model = get_model('test')
model = model.cuda()
# model = DataParallel(model).cuda()
ckpt = torch.load(model_path)
model.load_state_dict(ckpt['network'], strict=False)

mp_holistic = # Holistic model
mp_drawing = # Drawing utilities
holistic = mp_holistic.Holistic(static_image_mode=True,

start = int(args.start)
end = start + int(args.end)
for frame in range(start, end):
    img_path = args.img_path + f'\\WIN_20231027_17_43_31_Pro_{int(frame):08d}.png'
    transform = transforms.ToTensor()
    original_img = load_img(img_path)
    vis_img = original_img.copy()
    original_img_height, original_img_width = original_img.shape[:2]
    os.makedirs(args.output_folder, exist_ok=True)
    imgRGB1 = cv2.imread(img_path)
    imgRGB2 = cv2.cvtColor(imgRGB1, cv2.COLOR_BGR2RGB)

    result = holistic.process(imgRGB2)
    pose_coor_list = np.zeros((33,2))
    bbox = []
    if result.pose_landmarks:
        for i, lm in enumerate(result.pose_landmarks.landmark):
            xPos = int(lm.x * original_img_width)
            yPos = int(lm.y * original_img_height)
            pose_coor_list[i][0] = xPos
            pose_coor_list[i][1] = yPos
        mx = int(np.min(pose_coor_list[:,0])) if np.min(pose_coor_list[:,0])>0 else 0
        my = int(np.min(pose_coor_list[:,1])) if np.min(pose_coor_list[:,1])>0 else 0
        xm = int(np.max(pose_coor_list[:,0])) if np.max(pose_coor_list[:,0])<original_img_width else original_img_width
        ym = int(np.max(pose_coor_list[:,1])) if np.max(pose_coor_list[:,1])<original_img_height else original_img_height

        bbox = [mx,my,xm-mx,ym-my]
    bbox = process_bbox(bbox, original_img_width, original_img_height)
    img, img2bb_trans, bb2img_trans = generate_patch_image(original_img, bbox, 1.0, 0.0, False, cfg.input_img_shape) 
    img = transform(img.astype(np.float32))/255
    img = img.cuda()[None,:,:,:]
    # forward
    inputs = {'img': img}
    targets = {}
    meta_info = {}
    with torch.no_grad():
        out = model(inputs, targets, meta_info, 'test')
    mesh = out['smpl_mesh_cam'].detach().cpu().numpy()[0]

Please make sure given my checkpoint, you should get the same results as mine.

Thank you for your patience and answers.