How does the `newdataset` handle the polygon-processing?

Hi!

I am currently trying to figure out what the following lines of code do. In the newdataset-class, how are you encoding each polygon, and how is the output of newdataset.__getitem__ supposed to be interpreted?

pytorch-polygon-rnn/data.py

Lines 29 to 71 in d7d4194

    
           point_num = len(json_file['polygon']) 
        
           polygon = np.array(json_file['polygon']) 
        
           point_count = 2 
        
           #         img_array = np.zeros([data_num, 3, 224, 224]) 
        
           label_array = np.zeros([self.length, 28 * 28 + 3]) 
        
           label_index_array = np.zeros([self.length]) 
        
           if point_num < self.length - 3: 
        
               for points in polygon: 
        
                   index_a = int(points[0] / 8) 
        
                   index_b = int(points[1] / 8) 
        
                   index = index_b * 28 + index_a 
        
                   label_array[point_count, index] = 1 
        
                   label_index_array[point_count] = index 
        
                   point_count += 1 
        
               label_array[point_count, 28 * 28] = 1 
        
               label_index_array[point_count] = 28 * 28 
        
               for kkk in range(point_count + 1, self.length): 
        
                   if kkk % (point_num + 3) == point_num + 2: 
        
                       index = 28 * 28 
        
                   elif kkk % (point_num + 3) == 0: 
        
                       index = 28 * 28 + 1 
        
                   elif kkk % (point_num + 3) == 1: 
        
                       index = 28 * 28 + 2 
        
                   else: 
        
                       index_a = int(polygon[kkk % (point_num + 3) - 2][0] / 8) 
        
                       index_b = int(polygon[kkk % (point_num + 3) - 2][1] / 8) 
        
                       index = index_b * 28 + index_a 
        
                   label_array[kkk, index] = 1 
        
                   label_index_array[kkk] = index 
        
           else: 
        
               scale = point_num * 1.0 / (self.length - 3) 
        
               index_list = (np.arange(0, self.length - 3) * scale).astype(int) 
        
               for points in polygon[index_list]: 
        
                   index_a = int(points[0] / 8) 
        
                   index_b = int(points[1] / 8) 
        
                   index = index_b * 28 + index_a 
        
                   label_array[point_count, index] = 1 
        
                   label_index_array[point_count] = index 
        
                   point_count += 1 
        
               for kkk in range(point_count, self.length): 
        
                   index = 28 * 28 
        
                   label_array[kkk, index] = 1 
        
                   label_index_array[kkk] = index

If you could provide some insight in your thought process, that would be lovely!
Cheers in advance!

hi @qTipTip , i'm having the same question as you, hopefully @AlexMa011 can answer here.

Specifically, I have the following questions,

for label_array = np.zeros([self.length, 28 * 28 + 3]), why do we need 28 * 28 + 3? what is the 3 slots for?

what is this kkk doing in this block? https://github.com/AlexMa011/pytorch-polygon-rnn/blob/master/data.py#L45-L57

thanks in advance!

Sorry to reply so late, kind of busy in the last 2 years. The 3 more slots are for two starting vertices and one stop vertices.
The filling vector process has two types:
a. the actual number of vertices is less than the fixed vector length: the vertices are repeated to avoid lots of zeros.
b. the actual number of vertices is larger than the fixed vector length: the vertices are proportionally selected to satisfy the fixed length.

I ended up writing it like this @ethanjyx, in case you are still interested. Been a year or so since I last looked at this, but I think this is what you want. Revisiting research code is always difficult 😄

def process_polygon(polygon, polygon_resolution=28, cropped_image_size=224, max_polygon_length=60):
    """
    We superimpose a grid of size polygon_resolution*polygon_resolution over the cropped image.
    We then one-hot-encode the polygon by creating a polygon_resolution*polygon_resolution array of ints,
    where a 0 tells us that there is no vertex present, and a 1 tells us there is a vertex present.
    We need the output to be of a fixed size, so we make sure that every output is of size
    (max_polygon_length, polygon_resolution**2 + additional_info_if_needed).
    """
    assert cropped_image_size % polygon_resolution == 0

    grid_size = cropped_image_size // polygon_resolution

    # we add two to the length, so we can slice for previous, and previous previous vertices, the first vertex is then
    # stored in one_hot_polygon[2].
    one_hot_polygon = torch.zeros((max_polygon_length + 2, polygon_resolution ** 2 + 1))
    one_hot_ordering = np.zeros((max_polygon_length + 2), dtype=np.int)

    # if `polygon` contains more vertices than max_polygon_length, then we subsample the polygon.
    if len(polygon) > max_polygon_length:
        scale = len(polygon) / max_polygon_length
        subsample_selection = (np.arange(0, max_polygon_length) * scale).astype(np.int)
        polygon = polygon[subsample_selection]

    # Fill in the original polygon vertices.
    for i in range(len(polygon)):
        vertex = polygon[i]
        x, y = vertex[0] // grid_size, vertex[1] // grid_size
        vertex_index = np.ravel_multi_index((x, y), dims=(polygon_resolution, polygon_resolution))
        one_hot_polygon[i + 2, vertex_index] = 1
        one_hot_ordering[i + 2] = vertex_index

    # set end-of-polygon bit
    one_hot_polygon[i + 2, polygon_resolution ** 2] = 1
    one_hot_ordering[i + 2] = polygon_resolution ** 2

    # If the polygon is shorter than max_polygon_length, then we pad with the ground truth indices with -1
    for i in range(len(polygon), max_polygon_length):
        one_hot_ordering[i + 2] = -1

    return one_hot_polygon, torch.as_tensor(one_hot_ordering[2:])

	point_num = len(json_file['polygon'])
	polygon = np.array(json_file['polygon'])
	point_count = 2
	# img_array = np.zeros([data_num, 3, 224, 224])
	label_array = np.zeros([self.length, 28 * 28 + 3])
	label_index_array = np.zeros([self.length])
	if point_num < self.length - 3:
	for points in polygon:
	index_a = int(points[0] / 8)
	index_b = int(points[1] / 8)
	index = index_b * 28 + index_a
	label_array[point_count, index] = 1
	label_index_array[point_count] = index
	point_count += 1
	label_array[point_count, 28 * 28] = 1
	label_index_array[point_count] = 28 * 28
	for kkk in range(point_count + 1, self.length):
	if kkk % (point_num + 3) == point_num + 2:
	index = 28 * 28
	elif kkk % (point_num + 3) == 0:
	index = 28 * 28 + 1
	elif kkk % (point_num + 3) == 1:
	index = 28 * 28 + 2
	else:
	index_a = int(polygon[kkk % (point_num + 3) - 2][0] / 8)
	index_b = int(polygon[kkk % (point_num + 3) - 2][1] / 8)
	index = index_b * 28 + index_a
	label_array[kkk, index] = 1
	label_index_array[kkk] = index
	else:
	scale = point_num * 1.0 / (self.length - 3)
	index_list = (np.arange(0, self.length - 3) * scale).astype(int)
	for points in polygon[index_list]:
	index_a = int(points[0] / 8)
	index_b = int(points[1] / 8)
	index = index_b * 28 + index_a
	label_array[point_count, index] = 1
	label_index_array[point_count] = index
	point_count += 1
	for kkk in range(point_count, self.length):
	index = 28 * 28
	label_array[kkk, index] = 1
	label_index_array[kkk] = index