[Feature request] NSFW info as API param or image metadata
Opened this issue · 2 comments
Hello @w-e-w,
I know that there is a separate API endpoint, but is there an easy way to receive the NSFW info in an existing (img2img or txt2img) API call?
Right now you will get the censored images, but it would be great to get the info as an API param or maybe inserted as metadata.
Any idea for that?
Thanks in advance
so I'm guessing you wish to receive the detective coordinates and the confidence and label type?
I would need to separate out the parsing of the output from the drawing of the mask to
currently the way I have implemented basically it's integrated inside
the processing of the output is in this block of code
sd-webui-nudenet-nsfw-censor/scripts/nudenet_nsfw_censor_scripts/pil_nude_detector.py
Lines 133 to 213 in 8162bc8
def calculate_censor_mask(self, detection_results, img_size, thresholds, expand_horizontal, expand_vertical, nms_threshold, nudenet_nsfw_censor_mask_shape, rectangle_round_radius): | |
""" | |
Generate binary mask from detection results of nudenet filtered and adjusted based on label_configs | |
Args: | |
detection_results: nudenet output | |
img_size: (width, height) original image width | |
thresholds: | |
expand_horizontal: | |
expand_vertical: | |
nms_threshold: float [0, 1] Non-Maximum Suppression threshold for cv2.dnn.NMSBoxes | |
nudenet_nsfw_censor_mask_shape: | |
rectangle_round_radius: | |
Returns: PIL binary mask | |
""" | |
# if self.thresholds is None: | |
# self.refresh_label_configs() | |
# [x_center, y_center, box_width, box_height, score_0, score_2, ..., score_16, score_17] | |
outputs = np.transpose(np.squeeze(detection_results[0])) | |
# get a bool array all boxes with its max score greater than the defined threshold of its category | |
filter_results = np.max(outputs[:, 4:], axis=1) > thresholds[np.argmax(outputs[:, 4:], axis=1)] | |
if np.any(filter_results): | |
draw_func = mask_shapes_func_dict[nudenet_nsfw_censor_mask_shape] | |
if draw_func is None: | |
# just return a mask for the entire image | |
return Image.new('1', img_size, 1) | |
else: | |
image_mask = Image.new('1', img_size, 0) | |
draw = ImageDraw.Draw(image_mask) | |
verbose = '' | |
max_score_indices = np.argmax(outputs[:, 4:], axis=1) | |
detection_results = outputs[filter_results] | |
boxes = detection_results[:, :4] | |
scores = detection_results[:, 4:][np.arange(detection_results.shape[0]), max_score_indices[filter_results]] | |
class_index = max_score_indices[filter_results] | |
# convert detected box coordinates (x_center, y_center, box_width, box_height) to (x_1, y_1, box_width, box_height) | |
boxes[:, 0:2] -= boxes[:, 2:4] / 2 | |
# Non-Maximum Suppression | |
if nms_threshold < 1: | |
nms = NMSBoxes(boxes, scores, 0, nms_threshold) | |
boxes = boxes[nms] | |
scores = scores[nms] | |
class_index = class_index[nms] | |
# scale to original image width | |
offset = abs(img_size[0] - img_size[1]) / 2 | |
if img_size[0] > img_size[1]: | |
factor = img_size[0] / self.input_width | |
boxes *= factor | |
boxes[:, 1] -= offset | |
else: | |
factor = img_size[1] / self.input_height | |
boxes *= factor | |
boxes[:, 0] -= offset | |
wh_e = boxes[:, 2:4] * np.vstack((expand_horizontal[class_index], expand_vertical[class_index])).T | |
boxes[:, 0:2] -= (wh_e - boxes[:, 2:4])/2 | |
# x1y1x2y2 | |
boxes[:, 2:4] = boxes[:, 0:2] + wh_e | |
boxes = boxes.round() | |
for i in range(scores.shape[0]): | |
x1y1x2y2 = boxes[i] | |
wh = wh_e[i] | |
draw_func(draw, *x1y1x2y2, *wh, rectangle_round_radius) | |
if shared.opts.nudenet_nsfw_censor_verbose_detection: | |
verbose += ( | |
f'\n{nudenet_labels_friendly_name[class_index[i]]}: score {scores[i]}, x1 {x1y1x2y2[0]} y1 {x1y1x2y2[1]}, w {wh[0].round()} h {wh[1].round()}, x2 {x1y1x2y2[2]} y2 {x1y1x2y2[3]}' | |
) | |
if verbose: | |
print(verbose) | |
return image_mask |
Bump. Great library btw