ivanstepanovftw/glam

How can I draw the bounding boxes?

Closed this issue · 1 comments

Thank you very much for making this code repository public. I have been studying the document parsing task recently. I did not train a model but directly used the /models/glam_dln.pt and my own PDF file to perform dln_glam_inference, and got the following results. I would like to know how to draw the bounding boxes on the document or images?Looking forward to your reply.

Error opening data file /usr/share/tesseract-ocr/4.00/tessdata/ara.traineddata
Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory.
Failed loading language 'ara'
node_class_scores torch.Size([146, 12]) edge_class_scores torch.Size([42340, 1])
clusters [{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145}]
cluster_min_spanning_boxes [<POLYGON ((15.32 15.561, 584.6 15.561, 584.6 832.281, 15.32 832.281, 15.32 1...>]
cluster_classes [9]
node_class_scores torch.Size([48, 12]) edge_class_scores torch.Size([4512, 1])
clusters [{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47}]
cluster_min_spanning_boxes [<POLYGON ((15.32 15.561, 584.6 15.561, 584.6 832.281, 15.32 832.281, 15.32 1...>]
cluster_classes [9]
node_class_scores torch.Size([156, 12]) edge_class_scores torch.Size([48360, 1])
clusters [{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155}]
cluster_min_spanning_boxes [<POLYGON ((33.64 41.556, 565.6 41.556, 565.6 804.756, 33.64 804.756, 33.64 4...>]
cluster_classes [9]
node_class_scores torch.Size([111, 12]) edge_class_scores torch.Size([24420, 1])
clusters [{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110}]
cluster_min_spanning_boxes [<POLYGON ((33.64 41.556, 565.6 41.556, 565.6 804.756, 33.64 804.756, 33.64 4...>]
cluster_classes [9]
node_class_scores torch.Size([112, 12]) edge_class_scores torch.Size([24864, 1])
clusters [{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111}]
cluster_min_spanning_boxes [<POLYGON ((33.64 41.556, 565.6 41.556, 565.6 804.756, 33.64 804.756, 33.64 4...>]
cluster_classes [9]
node_class_scores torch.Size([75, 12]) edge_class_scores torch.Size([11100, 1])
clusters [{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74}]
cluster_min_spanning_boxes [<POLYGON ((33.64 41.556, 565.6 41.556, 565.6 804.756, 33.64 804.756, 33.64 4...>]
cluster_classes [9]
node_class_scores torch.Size([133, 12]) edge_class_scores torch.Size([35112, 1])
clusters [{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132}]
cluster_min_spanning_boxes [<POLYGON ((33.64 41.556, 565.6 41.556, 565.6 804.756, 33.64 804.756, 33.64 4...>]
cluster_classes [9]

Sorry for late response, see for reference

glam/dln_glam_prepare.py

Lines 221 to 254 in 7cdae59

# # Debug draw
# if unlabelled_nodes or uncovered_segmentations:
# logger.warning(f"pdf_filepath: {pdf_filepath}, unlabelled_nodes: {unlabelled_nodes}, uncovered_segmentations: {uncovered_segmentations}")
# # Render page
# #image = Image.open(io.BytesIO(data["image"]["bytes"])).convert("L").convert("RGBA")
# overlay = Image.new("RGBA", image.size, (0, 0, 0, 0))
# draw = ImageDraw.Draw(overlay)
#
# # Render segmentation
# for annotation in annotations:
# segmentation = annotation["segmentation"][0]
# segmentation_polygon = Polygon((
# (segmentation[i], segmentation[i + 1])
# for i in range(0, len(segmentation), 2)
# ))
# draw.polygon(segmentation_polygon.exterior.coords, fill=(200, 200, 200, 200), outline="magenta", width=4)
# # area = segmentation_polygon.intersection(node_bbox).area
# # print("area:", area, "node_bbox.area:", node_bbox.area, "area / node_bbox.area:", area / node_bbox.area)
#
# image = Image.alpha_composite(image, overlay)
# draw = ImageDraw.Draw(image)
#
# # Render bad nodes
# for node_i in nodes:
# outline = "green" if isinstance(node_i, TextNode) else "blue"
# draw.rectangle((node_i.bbox_min_x, node_i.bbox_min_y, node_i.bbox_max_x, node_i.bbox_max_y), outline=outline, width=1)
# for node_i in unlabelled_nodes:
# outline = "red" if isinstance(node_i, TextNode) else "orange"
# draw.rectangle((node_i.bbox_min_x, node_i.bbox_min_y, node_i.bbox_max_x, node_i.bbox_max_y), outline=outline, width=1)
#
# image.show()
# print("showing", len(unlabelled_nodes) / len(nodes))
# # breakpoint()
# # time.sleep(3)

glam/dln_glam_prepare.py

Lines 390 to 417 in 7cdae59

# if node_class_accuracy < 0.98:
# draw = ImageDraw.Draw(image)
#
# # for annotation in annotations:
# # for segmentation in annotation["segmentation"]:
# # draw.polygon(segmentation, outline=(0, 0, 255), width=6)
#
# for cluster, cluster_class in zip(clusters, cluster_classes):
# cluster_bbox = (
# min(example.node_features[node_i][2] for node_i in cluster),
# min(example.node_features[node_i][3] for node_i in cluster),
# max(example.node_features[node_i][4] for node_i in cluster),
# max(example.node_features[node_i][5] for node_i in cluster),
# )
# draw.rectangle(cluster_bbox, outline=(0, 255, 0), width=3)
# draw.text(cluster_bbox[:2], CLASSES_MAP[cluster_class], fill=(0, 0, 0))
#
# # for k, node_features in zip(range(example.node_features.size(0)), example.node_features):
# # node_bbox = (node_features[2], node_features[3], node_features[4], node_features[5])
# # draw.rectangle(node_bbox, outline=(255, 0, 0), width=1)
# #
# # for annotation in annotations:
# # for segmentation in annotation["segmentation"]:
# # draw.text(segmentation, CLASSES_MAP[annotation["category_id"]], fill=(0, 0, 0))
#
# logger.debug(f"{split_name}/{image_id}: node_class_accuracy: {node_class_accuracy}")
# image.show(title=f"{split_name}/{image_id}")
# breakpoint()