hassanhub/LipReading

prepare_crop_files.py unclear

Opened this issue · 2 comments

Hello,
As is, it is clear that prepare_crop_files.py will not work as intended with the download commands commented out. However, this makes it unclear which other lines, if any, should be uncommented to achieve the intended preparation. It would be helpful if you updated this so it works as intended without further modification.
Thanks!

I came across the same problem. Here's my version to make it work. Notice that I use opencv3, so I modified the code a little bit.
Besides, I find no variable format_num2 in the source code, and I guess it should be count based on the context.

#pylint: skip-file
import os
import fnmatch
import cv2
import numpy as np
import sys

## This code gets range of the speakers in dataset, e.g. python prepare_crop_files.py 3 6
if(len(sys.argv)<3):
	print('Insufficient arguments')
	quit()

start=int(sys.argv[1])
end=int(sys.argv[2])


path='/home/lht/data/GRID'
os.system('mkdir '+path)
os.system('mkdir '+path+'/Audio')
os.system('mkdir '+path+'/Video')

face_cascade = cv2.CascadeClassifier('/usr/local/share/OpenCV/haarcascades/haarcascade_frontalface_default.xml')

ds_factor = 0.5

#for i in range(start,end):
for i in range(0, 0):

	if i==21:
		continue

	os.chdir(path+'/Audio')
	file='s'+str(i)+'.tar'
	link='http://spandh.dcs.shef.ac.uk/gridcorpus/s'+str(i)+'/audio/'+file

	#downloading and unzipping audio for person 1
	os.system('wget '+link)
	os.system('tar -xf '+file)
	os.system('rm -f -r '+file)

	#renaming
	print(path+'/Audio/s'+str(i))
	os.chdir(path+'/Audio/s'+str(i))
	os.system('ls *.wav | cat -n | while read n f; do mv "$f" "$(printf %06d $n).wav"; done')



for i in range(start,end):

	if i==21:
		continue

	#print(path+'/Video')
	os.chdir(path+'/Video')

	file='s'+str(i)+'.mpg_vcd.zip'
	link='http://spandh.dcs.shef.ac.uk/gridcorpus/s'+str(i)+'/video/'+file

	#downloading and unzipping video for person 1
	#os.system('wget '+link)
	#os.system('unzip '+file)
	#os.system('rm -f -r '+file)

	#renaming 
	#print(path+'/Video/s'+str(i)) 
	os.chdir(path+'/Video/s'+str(i))
	os.system('ls *.mpg | cat -n | while read n f; do mv "$f" "$(printf %06d $n).mpg"; done')

	#cropping faces, creating new video, stabilizing new video 
	source_path=path+'/Video/s'+str(i)+'/'
	if not os.path.exists(source_path+'face'):
		os.mkdir(source_path+'face') 
	# os.chdir(source_path+'face')

	numfiles=len(fnmatch.filter(os.listdir(path+'/Video/s'+str(i)), '*.mpg'))
	for j in range(1,numfiles+1):
	# for j in range(1,2):

		format_num1="{number:06}".format(number=j)

		os.system('mkdir -p '+source_path+'frames/'+str(format_num1))
		print('Reading video from : '+source_path+str(format_num1)+'.mpg')

		cap = cv2.VideoCapture(source_path+str(format_num1)+'.mpg')

		print('Writing video : '+source_path+'face/'+str(format_num1)+'.avi')
		out = cv2.VideoWriter()
		if cv2.__version__[0] == '2':
			# opencv 2
			fourcc = cv2.cv.CV_FOURCC('m','p','4','v')
		else:
			# opencv 3
			fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
		success = out.open(source_path+'face/'+str(format_num1)+'.avi', fourcc, 25.0, (128,128),False)
		print('Success: '+str(success))

		print('Writing frames to : '+source_path+'frames/'+str(format_num1)+'/')
		count=0

		while(cap.isOpened()):
			count=count+1
			ret, frame = cap.read()
			if ret==False:
				break
			#cropping face
			gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

			if count==1:
				face_rects = face_cascade.detectMultiScale(gray,1.05,3,minSize=(128,128))
			if face_rects==():
        		    #count-=1
        		    #continue;
        		    break

			x,y,w,h= face_rects[0]
			#136,210,49,29
			inc=30
			x=136-inc
			y=210-int(inc/2)
			w=49+(2*inc)
			h=29+(inc)
			roi=gray[y:y+h,x:x+w]
			print(str(x)+','+str(y)+','+str(w)+','+str(h))
			#resizing 
			roi=cv2.resize(roi,(128,128))

			# cv2.imwrite(source_path+'frames/'+str(format_num1)+'/'+str(format_num2)+'.jpg',roi,)
			cv2.imwrite(source_path+'frames/'+str(format_num1)+'/'+str(count)+'.jpg',roi,)
			# writing video (unstabilized)
			out.write(roi)
		cap.release()
		out.release()

		with open(path+'/log.txt', 'a') as file:
			file.write(source_path+'frames/'+str(format_num1)+'.mpg '+str(count)+' '+str(success))

		#stabilizing video 
		# os.chdir("..")
		os.system('ffmpeg -i '+ str(format_num1)+'.mpg' +' -vf vidstabdetect -f null -')
		os.system('ffmpeg -i '+ str(format_num1)+'.mpg' +' -vf vidstabtransform=smoothing=5:input="transforms.trf" '+ 's_'+str(format_num1)+'.mpg' )

@bckenstler Will you finally reproduce this result? What is the recognition effect?