pbcquoc/vietocr

Predict trên 1 dataset lớn

hungtrieu07 opened this issue · 1 comments

Chào anh. Em đang có chạy predict trên tập dữ liệu khoảng 4.5 triệu ảnh, được chạy trên GPU A100, nhưng tốc độ predict khá là chậm. Không biết là em có config sai ở chỗ nào không hoặc code có sai sót ở đâu không, mong được anh chỉ dẫn:

Config:

{'vocab': 'aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ ', 'device': 'cuda:0', 'seq_modeling': 'transformer', 'transformer': {'d_model': 256, 'nhead': 8, 'num_encoder_layers': 6, 'num_decoder_layers': 6, 'dim_feedforward': 2048, 'max_seq_length': 1024, 'pos_dropout': 0.1, 'trans_dropout': 0.1}, 'optimizer': {'max_lr': 0.0003, 'pct_start': 0.1}, 'trainer': {'batch_size': 4096, 'print_every': 200, 'valid_every': 4000, 'iters': 100000, 'export': './weights/transformerocr.pth', 'checkpoint': './checkpoint/transformerocr_checkpoint.pth', 'log': './train.log', 'metrics': None}, 'dataset': {'name': 'data', 'data_root': './img/', 'train_annotation': 'annotation_train.txt', 'valid_annotation': 'annotation_val_small.txt', 'image_height': 32, 'image_min_width': 32, 'image_max_width': 512}, 'dataloader': {'num_workers': 3, 'pin_memory': True}, 'aug': {'image_aug': True, 'masked_language_model': True}, 'predictor': {'beamsearch': False}, 'quiet': False, 'pretrain': 'https://vocr.vn/data/vietocr/vgg_transformer.pth', 'weights': 'https://vocr.vn/data/vietocr/vgg_transformer.pth', 'backbone': 'vgg19_bn', 'cnn': {'pretrained': True, 'ss': [[2, 2], [2, 2], [2, 1], [2, 1], [1, 1]], 'ks': [[2, 2], [2, 2], [2, 1], [2, 1], [1, 1]], 'hidden': 256}}

Code predict:

import argparse
import glob
import os
import sys

import cv2
import torch
from PIL import Image
from vietocr.tool.config import Cfg
from vietocr.tool.predictor import Predictor


def main():
    parser = argparse.ArgumentParser(description="Example script with a required command line argument",
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-F", "--file", type=str, help="input file or folder path", required=True)
    args = parser.parse_args()

    # Check if the provided path is a file or a folder
    if os.path.isfile(args.file):
        img_paths = [args.file]
    elif os.path.isdir(args.file):
        img_paths = glob.glob(os.path.join(args.file, '*.jpg'))
    else:
        print("Error: The provided path is neither a file nor a folder.")
        sys.exit(1)

    # Open the annotation file for writing
    with open("annotation.txt", "w", encoding="utf-8") as f:
        # Configure VietOCR
        config = Cfg.load_config_from_name('vgg_transformer')

        if torch.cuda.is_available():
            config['device'] = "cuda:0"
        else:
            config['device'] = "cpu"

        config['cnn']['pretrained'] = True
        config['predictor']['beamsearch'] = True
        config['trainer']['batch_size'] = 2048
        print(config)

        recognitor = Predictor(config)

        # Process each image
        for img_path in img_paths:
            img = cv2.imread(img_path)
            img = Image.fromarray(img)
            rec_result = recognitor.predict(img)
            f.write(os.path.basename(img_path) + "\t" + rec_result + "\n")
            # print(img_path + "\t" + rec_result)

    print("Processing completed. Results written to annotation.txt.")

if __name__ == "__main__":
    main()

Em đã fix được bằng cách sử dụng hàm predict_batch. Em sẽ để code ở dưới để mọi người tham khảo thêm. Xin phép được đóng issue.

# PREDICT SINGLE ONE IMAGE

# import argparse
# import glob
# import os
# import sys

# import cv2
# import torch
# from PIL import Image
# from vietocr.tool.config import Cfg
# from vietocr.tool.predictor import Predictor


# def main():
#     parser = argparse.ArgumentParser(description="Example script with a required command line argument",
#                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
#     parser.add_argument("-F", "--file", type=str, help="input file or folder path", required=True)
#     args = parser.parse_args()

#     # Check if the provided path is a file or a folder
#     if os.path.isfile(args.file):
#         img_paths = [args.file]
#     elif os.path.isdir(args.file):
#         img_paths = glob.glob(os.path.join(args.file, '*.jpg'))
#     else:
#         print("Error: The provided path is neither a file nor a folder.")
#         sys.exit(1)

#     # Open the annotation file for writing
#     with open("annotation.txt", "w", encoding="utf-8") as f:
#         # Configure VietOCR
#         config = Cfg.load_config_from_name('vgg_transformer')

#         if torch.cuda.is_available():
#             config['device'] = "cuda:0"
#         else:
#             config['device'] = "cpu"

#         config['cnn']['pretrained'] = True
#         config['trainer']['batch_size'] = 2048
#         print(config)

#         recognitor = Predictor(config)

#         # Process each image
#         for img_path in img_paths:
#             img = cv2.imread(img_path)
#             img = Image.fromarray(img)
#             rec_result = recognitor.predict(img)
#             f.write(os.path.basename(img_path) + "\t" + rec_result + "\n")
#             # print(img_path + "\t" + rec_result)

#     print("Processing completed. Results written to annotation.txt.")

# if __name__ == "__main__":
#     main()

# PREDICT BATCH OF IMAGES
import argparse
import glob
import os
import sys

import cv2
import torch
from PIL import Image
from vietocr.tool.config import Cfg
from vietocr.tool.predictor import Predictor


def main():
    parser = argparse.ArgumentParser(description="Example script with a required command line argument",
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-F", "--file", type=str, help="input file or folder path", required=True)
    args = parser.parse_args()

    # Check if the provided path is a file or a folder
    if os.path.isfile(args.file):
        img_paths = [args.file]
    elif os.path.isdir(args.file):
        img_paths = glob.glob(os.path.join(args.file, '*.jpg'))
    else:
        print("Error: The provided path is neither a file nor a folder.")
        sys.exit(1)
        
    # Configure VietOCR
    config = Cfg.load_config_from_name('vgg_transformer')

    if torch.cuda.is_available():
        config['device'] = "cuda:0"
    else:
        config['device'] = "cpu"

    config['cnn']['pretrained'] = True
    config['trainer']['batch_size'] = 2048
    print(config)

    recognitor = Predictor(config)

    # Open the annotation file for writing
    with open("annotation.txt", "w", encoding="utf-8") as f:

        # Process images in batches
        batch_size = 16  # Adjust the batch size based on your available memory
        for i in range(0, len(img_paths), batch_size):
            batch_paths = img_paths[i:i + batch_size]
            batch_images = [cv2.imread(path) for path in batch_paths]
            batch_images = [Image.fromarray(img) for img in batch_images]

            # Use the predict_batch method to get predictions for the batch
            batch_results = recognitor.predict_batch(batch_images)

            # Write results to the annotation file
            for path, result in zip(batch_paths, batch_results):
                f.write(os.path.basename(path) + "\t" + result + "\n")

    print("Processing completed. Results written to annotation.txt.")


if __name__ == "__main__":
    main()