Learn Computer Vision Step by Step
What is Computer Vision ?
Computer vision is an interdisciplinary scientific field that deals with how computers can gain high-level understanding from digital images or videos. From the perspective of engineering, it seeks to understand and automate tasks that the human visual system can do.[1][2][3]
Computer vision tasks include methods for acquiring, processing, analyzing and understanding digital images, and extraction of high-dimensional data from the real world in order to produce numerical or symbolic information, e.g. in the forms of decisions.[4][5][6][7] Understanding in this context means the transformation of visual images (the input of the retina) into descriptions of the world that make sense to thought processes and can elicit appropriate action. This image understanding can be seen as the disentangling of symbolic information from image data using models constructed with the aid of geometry, physics, statistics, and learning theory.[8]
a Guide to Semantic Segmentation
Semantic segmentation refers to the process of linking each pixel in an image to a class label. These labels could include a person, car, flower, piece of furniture, etc., just to mention a few. We can think of semantic segmentation as image classification at a pixel level. For example, in an image that has many cars, segmentation will label all the objects as car objects. However, a separate class of models known as instance segmentation is able to label the separate instances where an object appears in an image. This kind of segmentation can be very useful in applications that are used to count the number of objects, such as counting the amount of foot traffic in a mall.
OpenCV, Installation steps on Ubuntu
git clone https://github.com/opencv/opencv.git
cd ~/opencv && mkdir release && cd release
cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D GLIBCXX_USE_CXX11_ABI=0 ..
make
sudo make install
NB: When compiling c++ do not forget to include required opencv libraries with:
g++ $1 -I/opencv2/include/ -lopencv_core -lopencv_highgui -lopencv_imgcodecs -lopencv_imgproc -lopencv_videoio
Load, Change to Greyscale and Save an Image
#include <cv.h>
#include <highgui.h>
#include <opencv2/imgproc/imgproc.hpp>
#include <stdio.h>
using namespace cv;
int main( int argc, char** argv )
{
char* imageName = argv[1];
Mat image;
image = imread( imageName, 1 );
if( argc != 2 || !image.data )
{
printf( " No image data \n " );
return -1;
}
Mat gray_image;
cvtColor( image, gray_image, CV_BGR2GRAY );
imwrite( "../../images/Gray_Image.jpg", gray_image );
namedWindow( imageName, CV_WINDOW_AUTOSIZE );
namedWindow( "Gray image", CV_WINDOW_AUTOSIZE );
imshow( imageName, image );
imshow( "Gray image", gray_image );
waitKey(0);
return 0;
}
Increase Contrast of an Image using LAB
import cv2
#-----Reading the image-----------------------------------------------------
img = cv2.imread('Dog.jpg', 1)
cv2.imshow("img",img)
#-----Converting image to LAB Color model-----------------------------------
lab= cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
cv2.imshow("lab",lab)
#-----Splitting the LAB image to different channels-------------------------
l, a, b = cv2.split(lab)
cv2.imshow('l_channel', l)
cv2.imshow('a_channel', a)
cv2.imshow('b_channel', b)
#-----Applying CLAHE to L-channel-------------------------------------------
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
cl = clahe.apply(l)
cv2.imshow('CLAHE output', cl)
#-----Merge the CLAHE enhanced L-channel with the a and b channel-----------
limg = cv2.merge((cl,a,b))
cv2.imshow('limg', limg)
#-----Converting image from LAB Color model to RGB model--------------------
final = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)
cv2.imshow('final', final)
#_____END_____#
### Face [Haar-cascade](https://docs.opencv.org/3.4/db/d28/tutorial_cascade_classifier.html) Detection
```python
import cv2
image_path = "./public/original.png"
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(
gray,
scaleFactor= 1.1,
minNeighbors= 25,
minSize=(10, 10)
)
faces_detected = format(len(faces)) + " faces detected!"
print(faces_detected)
# Draw a rectangle around the faces
for (x, y, w, h) in faces:
cv2.rectangle(image, (x, y), (x+w, y+h), (255, 255, 0), 2)
center = (x + w//2, y + h//2)
image = cv2.ellipse(image, center, (w//2, h//2), 0, 0, 360, (255, 0, 255), 4)
#cv2.viewImage(image,faces_detected)
cv2.imshow('output',image)
cv2.waitKey(0)
if cv2.getWindowProperty('output', cv2.WND_PROP_VISIBLE) < 1:
cv2.destroyAllWindows()
Person Detection
# USAGE
# python mask_rcnn.py --mask-rcnn mask-rcnn-coco --image images/example_01.jpg
# python mask_rcnn.py --mask-rcnn mask-rcnn-coco --image images/example_03.jpg --visualize 1
# import the necessary packages
import numpy as np
import argparse
import random
import time
import cv2
import os
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image")
ap.add_argument("-m", "--mask-rcnn", required=True,
help="base path to mask-rcnn directory")
ap.add_argument("-v", "--visualize", type=int, default=0,
help="whether or not we are going to visualize each instance")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
ap.add_argument("-t", "--threshold", type=float, default=0.3,
help="minimum threshold for pixel-wise mask segmentation")
args = vars(ap.parse_args())
# load the COCO class labels our Mask R-CNN was trained on
labelsPath = os.path.sep.join([args["mask_rcnn"],
"object_detection_classes_coco.txt"])
LABELS = open(labelsPath).read().strip().split("\n")
# load the set of colors that will be used when visualizing a given
# instance segmentation
colorsPath = os.path.sep.join([args["mask_rcnn"], "colors.txt"])
COLORS = open(colorsPath).read().strip().split("\n")
COLORS = [np.array(c.split(",")).astype("int") for c in COLORS]
COLORS = np.array(COLORS, dtype="uint8")
# derive the paths to the Mask R-CNN weights and model configuration
weightsPath = os.path.sep.join([args["mask_rcnn"],
"frozen_inference_graph.pb"])
configPath = os.path.sep.join([args["mask_rcnn"],
"mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"])
# load our Mask R-CNN trained on the COCO dataset (90 classes)
# from disk
print("[INFO] loading Mask R-CNN from disk...")
net = cv2.dnn.readNetFromTensorflow(weightsPath, configPath)
# load our input image and grab its spatial dimensions
image = cv2.imread(args["image"])
(H, W) = image.shape[:2]
# construct a blob from the input image and then perform a forward
# pass of the Mask R-CNN, giving us (1) the bounding box coordinates
# of the objects in the image along with (2) the pixel-wise segmentation
# for each specific object
blob = cv2.dnn.blobFromImage(image, swapRB=True, crop=False)
net.setInput(blob)
start = time.time()
(boxes, masks) = net.forward(["detection_out_final", "detection_masks"])
end = time.time()
# show timing information and volume information on Mask R-CNN
print("[INFO] Mask R-CNN took {:.6f} seconds".format(end - start))
print("[INFO] boxes shape: {}".format(boxes.shape))
print("[INFO] masks shape: {}".format(masks.shape))
# loop over the number of detected objects
for i in range(0, boxes.shape[2]):
# extract the class ID of the detection along with the confidence
# (i.e., probability) associated with the prediction
classID = int(boxes[0, 0, i, 1])
confidence = boxes[0, 0, i, 2]
# filter out weak predictions by ensuring the detected probability
# is greater than the minimum probability
if confidence > args["confidence"]:
# clone our original image so we can draw on it
clone = image.copy()
# scale the bounding box coordinates back relative to the
# size of the image and then compute the width and the height
# of the bounding box
box = boxes[0, 0, i, 3:7] * np.array([W, H, W, H])
(startX, startY, endX, endY) = box.astype("int")
boxW = endX - startX
boxH = endY - startY
# extract the pixel-wise segmentation for the object, resize
# the mask such that it's the same dimensions of the bounding
# box, and then finally threshold to create a *binary* mask
mask = masks[i, classID]
mask = cv2.resize(mask, (boxW, boxH),
interpolation=cv2.INTER_NEAREST)
mask = (mask > args["threshold"])
# extract the ROI of the image
roi = clone[startY:endY, startX:endX]
# check to see if are going to visualize how to extract the
# masked region itself
if args["visualize"] > 0:
# convert the mask from a boolean to an integer mask with
# to values: 0 or 255, then apply the mask
visMask = (mask * 255).astype("uint8")
instance = cv2.bitwise_and(roi, roi, mask=visMask)
# show the extracted ROI, the mask, along with the
# segmented instance
cv2.imshow("ROI", roi)
cv2.imshow("Mask", visMask)
cv2.imshow("Segmented", instance)
# now, extract *only* the masked region of the ROI by passing
# in the boolean mask array as our slice condition
roi = roi[mask]
# randomly select a color that will be used to visualize this
# particular instance segmentation then create a transparent
# overlay by blending the randomly selected color with the ROI
color = random.choice(COLORS)
blended = ((0.4 * color) + (0.6 * roi)).astype("uint8")
# store the blended ROI in the original image
clone[startY:endY, startX:endX][mask] = blended
# draw the bounding box of the instance on the image
color = [int(c) for c in color]
cv2.rectangle(clone, (startX, startY), (endX, endY), color, 2)
# draw the predicted label and associated probability of the
# instance segmentation on the image
text = "{}: {:.4f}".format(LABELS[classID], confidence)
cv2.putText(clone, text, (startX, startY - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
# show the output image
cv2.imshow("Output", clone)
cv2.waitKey(0)
Contours Detection
/**
* @function findContours_Demo.cpp
* @brief Demo code to find contours in an image
* @author OpenCV team
*/
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>
using namespace cv;
using namespace std;
Mat src_gray;
int thresh = 100;
RNG rng(12345);
/// Function header
void thresh_callback(int, void* );
/**
* @function main
*/
int main( int argc, char** argv )
{
/// Load source image
CommandLineParser parser( argc, argv, "{@input | HappyFish.jpg | input image}" );
Mat src = imread( samples::findFile( parser.get<String>( "@input" ) ) );
if( src.empty() )
{
cout << "Could not open or find the image!\n" << endl;
cout << "Usage: " << argv[0] << " <Input image>" << endl;
return -1;
}
/// Convert image to gray and blur it
cvtColor( src, src_gray, COLOR_BGR2GRAY );
blur( src_gray, src_gray, Size(3,3) );
/// Create Window
const char* source_window = "Source";
namedWindow( source_window );
imshow( source_window, src );
const int max_thresh = 255;
createTrackbar( "Canny thresh:", source_window, &thresh, max_thresh, thresh_callback );
thresh_callback( 0, 0 );
waitKey();
return 0;
}
/**
* @function thresh_callback
*/
void thresh_callback(int, void* )
{
/// Detect edges using Canny
Mat canny_output;
Canny( src_gray, canny_output, thresh, thresh*2 );
/// Find contours
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
findContours( canny_output, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE );
/// Draw contours
Mat drawing = Mat::zeros( canny_output.size(), CV_8UC3 );
for( size_t i = 0; i< contours.size(); i++ )
{
Scalar color = Scalar( rng.uniform(0, 256), rng.uniform(0,256), rng.uniform(0,256) );
drawContours( drawing, contours, (int)i, color, 2, LINE_8, hierarchy, 0 );
}
/// Show in a window
imshow( "Contours", drawing );
}
ps: Compile with
g++ $1 -I/opencv2/include/ -lopencv_core -lopencv_highgui -lopencv_imgcodecs -lopencv_imgproc -lopencv_videoio
and give argument as image (./a.out myImage.png)
Automatic Canny edge detection
OpenCV CheatSheet by Salma Ghoneim
Image Classification
https://cv-tricks.com/object-detection/faster-r-cnn-yolo-ssd/
Template matching
Contours Match Shapes
https://stackoverflow.com/questions/33105375/contour-matchshapes-in-opencv
Accuracy of fiding contours
Edge Detection
https://www.sicara.ai/blog/2019-03-12-edge-detection-in-opencv
Hogwart Invisibility Cloak
https://www.learnopencv.com/invisibility-cloak-using-color-detection-and-segmentation-with-opencv/