checkpoint has good accuracy, however frozen one is not very poor.
kurikabocya opened this issue · 5 comments
Hello
[1]
I trained from scratch some images using multigpu_train.py.
Using the checkpoint files, the eval.py performs very good accuracy.
[2]
So, I converted checkpoint files to frozen file or .pb file by adding the following code to eval.py,
def main(argv=None):
import os
os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
try:
os.makedirs(FLAGS.output_dir)
except OSError as e:
if e.errno != 17:
raise
with tf.get_default_graph().as_default():
input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
f_score, f_geometry = model.model(input_images, is_training=False)
variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
saver = tf.train.Saver(variable_averages.variables_to_restore())
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
print('Restore from {}'.format(model_path))
saver.restore(sess, model_path)
# frozen graph
output_graph = "frozen_model.pb"
output_graph_def = tf.graph_util.convert_variables_to_constants(
sess,
tf.get_default_graph().as_graph_def(),
["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"])
with tf.gfile.GFile(output_graph, "wb") as f:
f.write(output_graph_def.SerializeToString())
# frozen graph
im_fn_list = get_images()
[3]
And, I loaded the pb file by C# using the following code.
And the accuracy was very poor,,,, how to solve this.
The C# code is thanks to
https://gist.github.com/ludwo/c091ed6261d26654c8b71949d89f8142
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using OpenCvSharp.Dnn;
using System.IO;
using OpenCvSharp;
using System.Drawing;
namespace ConsoleApplication1
{
class Program
{
static string EastModelFile = @"D:\EAST\frozenModelFile\frozen_model.pb";
static int InputWidth = 320;
static int InputHeight = 320;
static float ConfThreshold = (float)0.6;
//static float NmsThreshold = (float)0.4;
static float NmsThreshold = (float)0.4;
static void Main(string[] args)
{
string folderName = @"D:\EAST_TEST\images";
ReadAllText(folderName);
}
/// <summary>
/// Read text from image.
/// </summary>
/// <see cref="https://github.com/opencv/opencv/blob/master/samples/dnn/text_detection.cpp"/>
/// <param name="fileName">Name of the image file.</param>
/// <param name="loaderFactory">The loader factory.</param>
/// <returns>Scanned text.</returns>
//public string ReadAllText(string fileName, ITextDocumentLoaderFactory loaderFactory)
public static void ReadAllText(string folderName)
{
// Load network.
using (Net net = CvDnn.ReadNet(Path.GetFullPath(EastModelFile)))
{
string[] files = Directory.GetFiles(folderName, "*");
Scalar mean = new Scalar(123.68, 116, 78, 103.94);
foreach (string fileName in files)
{
//System.Diagnostics.Debug.WriteLine(fileName);
//Console.WriteLine(fileName);
if (fileName.Contains("east"))
{
// eastを含むファイルは処理しない
continue;
}
string ext = System.IO.Path.GetExtension(fileName); //extには".jpg"が代入されます。
string ext2 = ext.ToUpper();
if (!(ext2.Equals(".BMP") || ext2.Equals(".JPG") || ext2.Equals(".PNG")))
{
continue;
}
System.Diagnostics.Debug.WriteLine(fileName);
Console.WriteLine(fileName);
// Prepare input image
using (Mat img = new Mat(fileName))
{
// ここで320×320に分割
int wCount = img.Width / 320;
int hCount = img.Height / 320;
int wAmari = img.Width % 320;
int hAmari = img.Height % 320;
if (wAmari > 0)
{
wCount++;
}
if (hAmari > 0)
{
hCount++;
}
Mat[,] myMat;
myMat = new Mat[wCount, hCount];
Rect[,] myRect;
myRect = new Rect[wCount, hCount];
for (int ww = 0; ww < wCount; ww++)
{
for (int hh = 0; hh < hCount; hh++)
{
myRect[ww, hh].X = 320 * ww;
myRect[ww, hh].Y = 320 * hh;
myRect[ww, hh].Width = 320;
myRect[ww, hh].Height = 320;
if (myRect[ww, hh].X + 320 > img.Width)
{
myRect[ww, hh].X = img.Width - 320;
if(myRect[ww,hh].X < 0)
{
myRect[ww, hh].X = 0;
}
}
if (myRect[ww, hh].Y + 320 > img.Height)
{
myRect[ww, hh].Y = img.Height - 320;
if (myRect[ww, hh].Y < 0)
{
myRect[ww, hh].Y = 0;
}
}
myMat[ww, hh] = img.Clone(myRect[ww, hh]);
var myFileName = String.Format(@"d:\yyyy\" + "{0}_{1}_{2}.png", Path.GetFileName(fileName), ww, hh);
var myBlobFileName = String.Format(@"d:\yyyy\" + "{0}_{1}_{2}_blob.png", Path.GetFileName(fileName), ww, hh);
var myLineFileName = String.Format(@"d:\yyyy\" + "{0}_{1}_{2}_line.png", Path.GetFileName(fileName), ww, hh);
var myImg = myMat[ww, hh];
//myImg.SaveImage(myFileName);
//using (var blob = CvDnn.BlobFromImage(img, 1.0, new OpenCvSharp.Size(InputWidth, InputHeight), mean, true, false))
//using (var blob = CvDnn.BlobFromImage(img, 1.0, new Size(img.Width, img.Height), mean, true, false))
using (var blob = CvDnn.BlobFromImage(myImg, 1.0, new OpenCvSharp.Size(InputWidth, InputHeight), mean, true, false))
{
//blob.SaveImage(myBlobFileName);
// Forward Pass
// Now that we have prepared the input, we will pass it through the network. There are two outputs of the network.
// One specifies the geometry of the Text-box and the other specifies the confidence score of the detected box.
// These are given by the layers :
// feature_fusion/concat_3
// feature_fusion/Conv_7/Sigmoid
var outputBlobNames = new string[] { "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3" };
var outputBlobs = outputBlobNames.Select(_ => new Mat()).ToArray();
net.SetInput(blob);
net.Forward(outputBlobs, outputBlobNames);
Mat scores = outputBlobs[0];
Mat geometry = outputBlobs[1];
// Decode predicted bounding boxes (decode the positions of the text boxes along with their orientation)
//private void Decode(Mat scores, Mat geometry, float confThreshold, out IList<RotatedRect> boxes, out IList<float> confidences)
IList<RotatedRect> boxes;
IList<float> confidences;
Decode(scores, geometry, ConfThreshold, out boxes, out confidences);
// Apply non-maximum suppression procedure for filtering out the false positives and get the final predictions
int[] indices;
CvDnn.NMSBoxes(boxes, confidences, ConfThreshold, NmsThreshold, out indices);
// Render detections.
// for (var i = 0; i < boxes.Length; ++i)
// foreach (var box in boxes)
Point2f ratio = new Point2f((float)myImg.Cols / InputWidth, (float)myImg.Rows / InputHeight);
for (var i = 0; i < indices.Length; ++i)
{
RotatedRect box = boxes[indices[i]];
Point2f[] vertices = box.Points();
for (int j = 0; j < 4; ++j)
{
vertices[j].X *= ratio.X;
vertices[j].Y *= ratio.Y;
}
for (int j = 0; j < 4; ++j)
{
Cv2.Line(myImg, (int)vertices[j].X, (int)vertices[j].Y, (int)vertices[(j + 1) % 4].X, (int)vertices[(j + 1) % 4].Y, new Scalar(0, 255, 0), 3);
Console.WriteLine("{0},{1},{2},{3}",
(int)vertices[j].X,
(int)vertices[j].Y,
(int)vertices[(j + 1) % 4].X,
(int)vertices[(j + 1) % 4].Y);
System.Diagnostics.Debug.WriteLine("{0},{1},{2},{3}",
(int)vertices[j].X,
(int)vertices[j].Y,
(int)vertices[(j + 1) % 4].X,
(int)vertices[(j + 1) % 4].Y);
}
}
}
// Optional - Save detections
myImg.SaveImage(myLineFileName);
}
}
}
}
}
}
unsafe public static void Decode(Mat scores, Mat geometry, float confThreshold, out IList<RotatedRect> boxes, out IList<float> confidences)
{
boxes = new List<RotatedRect>();
confidences = new List<float>();
Console.WriteLine("Width:{0}, Height:{1}", geometry.Width, geometry.Height, geometry);
if ((scores == null || scores.Dims() != 4 || scores.Size(0) != 1 || scores.Size(1) != 1) ||
(geometry == null || geometry.Dims() != 4 || geometry.Size(0) != 1 || geometry.Size(1) != 5) ||
(scores.Size(2) != geometry.Size(2) || scores.Size(3) != geometry.Size(3)))
{
return;
}
int height = scores.Size(2);
int width = scores.Size(3);
for (int y = 0; y < height; ++y)
{
//Console.WriteLine("y:{0}", y);
//var scoresData = new System.ReadOnlySpan<float>((void*)scores.Ptr(0, 0, y), height);
//var x0Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 0, y), height);
//var x1Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 1, y), height);
//var x2Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 2, y), height);
//var x3Data = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 3, y), height);
//var anglesData = new ReadOnlySpan<float>((void*)geometry.Ptr(0, 4, y), height);
var scoresData = Enumerable.Range(0, height).Select(row => scores.At<float>(0, 0, y, row)).ToArray();
var x0Data = Enumerable.Range(0, height).Select(row => geometry.At<float>(0, 0, y, row)).ToArray();
var x1Data = Enumerable.Range(0, height).Select(row => geometry.At<float>(0, 1, y, row)).ToArray();
var x2Data = Enumerable.Range(0, height).Select(row => geometry.At<float>(0, 2, y, row)).ToArray();
var x3Data = Enumerable.Range(0, height).Select(row => geometry.At<float>(0, 3, y, row)).ToArray();
var anglesData = Enumerable.Range(0, height).Select(row => geometry.At<float>(0, 4, y, row)).ToArray();
for (int x = 0; x < width; ++x)
{
//Console.WriteLine("x:{0}", x);
var score = scoresData[x];
if (score >= confThreshold)
{
float offsetX = x * 4.0f;
float offsetY = y * 4.0f;
float angle = anglesData[x];
float cosA = (float)Math.Cos(angle);
float sinA = (float)Math.Sin(angle);
float x0 = x0Data[x];
float x1 = x1Data[x];
float x2 = x2Data[x];
float x3 = x3Data[x];
float h = x0 + x2;
float w = x1 + x3;
Point2f offset = new Point2f(offsetX + (cosA * x1) + (sinA * x2), offsetY - (sinA * x1) + (cosA * x2));
Point2f p1 = new Point2f((-sinA * h) + offset.X, (-cosA * h) + offset.Y);
Point2f p3 = new Point2f((-cosA * w) + offset.X, (sinA * w) + offset.Y);
RotatedRect r = new RotatedRect(new Point2f(0.5f * (p1.X + p3.X), 0.5f * (p1.Y + p3.Y)), new Size2f(w, h), (float)(-angle * 180.0f / Math.PI));
//Rect r = new Rect(Convert.ToInt32(p1.X), Convert.ToInt32(p1.Y), Convert.ToInt32(p3.X), Convert.ToInt32(p3.Y));
boxes.Add(r);
confidences.Add(score);
}
}
}
}
}
}
@kurikabocya I ran into the same issue. Apart from the C# code I'm just using python to load the frozen graph and test.
Any update is you solved this.
Thanks !
I was wrong, If you resize the image, for exampe, 150 x 200 to 160(32 X 5) x 224(32 X 7), then the frozen model file also show the good result.
@kurikabocya That's interesting. Any reason you had to do resize to that specific dimensions (160x224) ?
240 is not multiple of 32
Is this true for the pre-trained model from the authors as well or just the model you trained ?