[Feature request] NSFW info as API param or image metadata

Hello @w-e-w,

I know that there is a separate API endpoint, but is there an easy way to receive the NSFW info in an existing (img2img or txt2img) API call?

Right now you will get the censored images, but it would be great to get the info as an API param or maybe inserted as metadata.

Any idea for that?

Thanks in advance

so I'm guessing you wish to receive the detective coordinates and the confidence and label type?

I would need to separate out the parsing of the output from the drawing of the mask to
currently the way I have implemented basically it's integrated inside

the processing of the output is in this block of code

sd-webui-nudenet-nsfw-censor/scripts/nudenet_nsfw_censor_scripts/pil_nude_detector.py

Lines 133 to 213 in 8162bc8

    
               def calculate_censor_mask(self, detection_results, img_size, thresholds, expand_horizontal, expand_vertical, nms_threshold, nudenet_nsfw_censor_mask_shape, rectangle_round_radius): 
        
                   """ 
        
                   Generate binary mask from detection results of nudenet filtered and adjusted based on label_configs 
        
                   Args: 
        
                       detection_results: nudenet output 
        
                       img_size: (width, height) original image width 
        
                       thresholds: 
        
                       expand_horizontal: 
        
                       expand_vertical: 
        
                       nms_threshold: float [0, 1] Non-Maximum Suppression threshold for cv2.dnn.NMSBoxes 
        
                       nudenet_nsfw_censor_mask_shape: 
        
                       rectangle_round_radius: 
        
                   Returns: PIL binary mask 
        
                   """ 
        
                   # if self.thresholds is None: 
        
                   #     self.refresh_label_configs() 
        
                   # [x_center, y_center, box_width, box_height, score_0, score_2, ..., score_16, score_17] 
        
                   outputs = np.transpose(np.squeeze(detection_results[0])) 
        
                   # get a bool array all boxes with its max score greater than the defined threshold of its category 
        
                   filter_results = np.max(outputs[:, 4:], axis=1) > thresholds[np.argmax(outputs[:, 4:], axis=1)] 
        
                   if np.any(filter_results): 
        
                       draw_func = mask_shapes_func_dict[nudenet_nsfw_censor_mask_shape] 
        
                       if draw_func is None: 
        
                           # just return a mask for the entire image 
        
                           return Image.new('1', img_size, 1) 
        
                       else: 
        
                           image_mask = Image.new('1', img_size, 0) 
        
                           draw = ImageDraw.Draw(image_mask) 
        
                           verbose = '' 
        
                           max_score_indices = np.argmax(outputs[:, 4:], axis=1) 
        
                           detection_results = outputs[filter_results] 
        
                           boxes = detection_results[:, :4] 
        
                           scores = detection_results[:, 4:][np.arange(detection_results.shape[0]), max_score_indices[filter_results]] 
        
                           class_index = max_score_indices[filter_results] 
        
                           # convert detected box coordinates (x_center, y_center, box_width, box_height) to (x_1, y_1, box_width, box_height) 
        
                           boxes[:, 0:2] -= boxes[:, 2:4] / 2 
        
                           # Non-Maximum Suppression 
        
                           if nms_threshold < 1: 
        
                               nms = NMSBoxes(boxes, scores, 0, nms_threshold) 
        
                               boxes = boxes[nms] 
        
                               scores = scores[nms] 
        
                               class_index = class_index[nms] 
        
                           # scale to original image width 
        
                           offset = abs(img_size[0] - img_size[1]) / 2 
        
                           if img_size[0] > img_size[1]: 
        
                               factor = img_size[0] / self.input_width 
        
                               boxes *= factor 
        
                               boxes[:, 1] -= offset 
        
                           else: 
        
                               factor = img_size[1] / self.input_height 
        
                               boxes *= factor 
        
                               boxes[:, 0] -= offset 
        
                           wh_e = boxes[:, 2:4] * np.vstack((expand_horizontal[class_index], expand_vertical[class_index])).T 
        
                           boxes[:, 0:2] -= (wh_e - boxes[:, 2:4])/2 
        
                           # x1y1x2y2 
        
                           boxes[:, 2:4] = boxes[:, 0:2] + wh_e 
        
                           boxes = boxes.round() 
        
                           for i in range(scores.shape[0]): 
        
                               x1y1x2y2 = boxes[i] 
        
                               wh = wh_e[i] 
        
                               draw_func(draw, *x1y1x2y2, *wh, rectangle_round_radius) 
        
                               if shared.opts.nudenet_nsfw_censor_verbose_detection: 
        
                                   verbose += ( 
        
                                       f'\n{nudenet_labels_friendly_name[class_index[i]]}: score {scores[i]}, x1 {x1y1x2y2[0]} y1 {x1y1x2y2[1]}, w {wh[0].round()} h {wh[1].round()}, x2 {x1y1x2y2[2]} y2 {x1y1x2y2[3]}' 
        
                                   ) 
        
                           if verbose: 
        
                               print(verbose) 
        
                           return image_mask

Bump. Great library btw

	def calculate_censor_mask(self, detection_results, img_size, thresholds, expand_horizontal, expand_vertical, nms_threshold, nudenet_nsfw_censor_mask_shape, rectangle_round_radius):
	"""
	Generate binary mask from detection results of nudenet filtered and adjusted based on label_configs
	Args:
	detection_results: nudenet output
	img_size: (width, height) original image width
	thresholds:
	expand_horizontal:
	expand_vertical:
	nms_threshold: float [0, 1] Non-Maximum Suppression threshold for cv2.dnn.NMSBoxes
	nudenet_nsfw_censor_mask_shape:
	rectangle_round_radius:
	Returns: PIL binary mask
	"""
	# if self.thresholds is None:
	# self.refresh_label_configs()

	# [x_center, y_center, box_width, box_height, score_0, score_2, ..., score_16, score_17]
	outputs = np.transpose(np.squeeze(detection_results[0]))

	# get a bool array all boxes with its max score greater than the defined threshold of its category
	filter_results = np.max(outputs[:, 4:], axis=1) > thresholds[np.argmax(outputs[:, 4:], axis=1)]

	if np.any(filter_results):
	draw_func = mask_shapes_func_dict[nudenet_nsfw_censor_mask_shape]
	if draw_func is None:
	# just return a mask for the entire image
	return Image.new('1', img_size, 1)
	else:
	image_mask = Image.new('1', img_size, 0)
	draw = ImageDraw.Draw(image_mask)
	verbose = ''

	max_score_indices = np.argmax(outputs[:, 4:], axis=1)
	detection_results = outputs[filter_results]

	boxes = detection_results[:, :4]
	scores = detection_results[:, 4:][np.arange(detection_results.shape[0]), max_score_indices[filter_results]]
	class_index = max_score_indices[filter_results]

	# convert detected box coordinates (x_center, y_center, box_width, box_height) to (x_1, y_1, box_width, box_height)
	boxes[:, 0:2] -= boxes[:, 2:4] / 2

	# Non-Maximum Suppression
	if nms_threshold < 1:
	nms = NMSBoxes(boxes, scores, 0, nms_threshold)
	boxes = boxes[nms]
	scores = scores[nms]
	class_index = class_index[nms]

	# scale to original image width
	offset = abs(img_size[0] - img_size[1]) / 2
	if img_size[0] > img_size[1]:
	factor = img_size[0] / self.input_width
	boxes *= factor
	boxes[:, 1] -= offset
	else:
	factor = img_size[1] / self.input_height
	boxes *= factor
	boxes[:, 0] -= offset

	wh_e = boxes[:, 2:4] * np.vstack((expand_horizontal[class_index], expand_vertical[class_index])).T
	boxes[:, 0:2] -= (wh_e - boxes[:, 2:4])/2
	# x1y1x2y2
	boxes[:, 2:4] = boxes[:, 0:2] + wh_e
	boxes = boxes.round()

	for i in range(scores.shape[0]):
	x1y1x2y2 = boxes[i]
	wh = wh_e[i]
	draw_func(draw, x1y1x2y2, wh, rectangle_round_radius)

	if shared.opts.nudenet_nsfw_censor_verbose_detection:
	verbose += (
	f'\n{nudenet_labels_friendly_name[class_index[i]]}: score {scores[i]}, x1 {x1y1x2y2[0]} y1 {x1y1x2y2[1]}, w {wh[0].round()} h {wh[1].round()}, x2 {x1y1x2y2[2]} y2 {x1y1x2y2[3]}'
	)

	if verbose:
	print(verbose)

	return image_mask