does not recognize spaces as another found result
lonok opened this issue · 0 comments
lonok commented
Summary
I have a image like that:
Name One <-- MANY SPACE --> Name Two
URL of image: https://ibb.co/VMHKyY3
Wrong Result: Name One Name Two (two names in one result)
Expected Results: Name One, Name Two (two results)
Reproducibility
Reproducibility Frequency
- 100%
Reproducible
client := gosseract.NewClient()
defer client.Close()
client.SetLanguage("eng")
client.SetPageSegMode(gosseract.PSM_AUTO) // <--- I TRY WITH ALL, SAME RESULT
rows, cols := img.Rows(), img.Cols()
scale := 2
to_rows := rows * scale
to_cols := cols * scale
double := gocv.NewMatWithSize(to_rows, to_cols, gocv.MatTypeCV8UC3)
gocv.Resize(*img, &double, image.Pt(to_cols, to_rows), 0, 0, gocv.InterpolationCubic)
// get bytes
buf, _ := gocv.IMEncode(gocv.PNGFileExt, double)
bytes = buf.GetBytes()
client.SetWhitelist("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ \t")
client.SetImageFromBytes(bytes)
boxes, _ := client.GetBoundingBoxes(gosseract.RIL_TEXTLINE)
for _, box := range boxes {
fmt.Printf("%#v\n", box)
}
output
gosseract.BoundingBox{Box:image.Rectangle{Min:image.Point{X:188, Y:808}, Max:image.Point{X:1464, Y:839}}, Word:"Name One Name Two\n", Confidence:95.23932647705078, BlockNum:0, ParNum:0, LineNum:0, WordNum:0}
Environment
Linux dell-g15 6.2.0-33-generic #33~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Sep 7 10:33:52 UTC 2 x86_64 x86_64 x86_64 GNU/Linux
GO111MODULE=''
GOARCH='amd64'
GOBIN=''
GOCACHE='/home/lonok/.cache/go-build'
GOENV='/home/lonok/.config/go/env'
GOEXE=''
GOEXPERIMENT=''
GOFLAGS=''
GOHOSTARCH='amd64'
GOHOSTOS='linux'
GOINSECURE=''
GOMODCACHE='/home/lonok/go/pkg/mod'
GONOPROXY=''
GONOSUMDB=''
GOOS='linux'
GOPATH='/home/lonok/go'
GOPRIVATE=''
GOPROXY='https://proxy.golang.org,direct'
GOROOT='/usr/lib/go'
GOSUMDB='sum.golang.org'
GOTMPDIR=''
GOTOOLCHAIN='auto'
GOTOOLDIR='/usr/lib/go/pkg/tool/linux_amd64'
GOVCS=''
GOVERSION='go1.21.1'
GCCGO='gccgo'
GOAMD64='v1'
AR='ar'
CC='gcc'
CXX='g++'
CGO_ENABLED='1'
GOMOD='/dev/null'
GOWORK=''
CGO_CFLAGS='-O2 -g'
CGO_CPPFLAGS=''
CGO_CXXFLAGS='-O2 -g'
CGO_FFLAGS='-O2 -g'
CGO_LDFLAGS='-O2 -g'
PKG_CONFIG='pkg-config'
GOGCCFLAGS='-fPIC -m64 -pthread -Wl,--no-gc-sections -fmessage-length=0 -ffile-prefix-map=/tmp/go-build3605953866=/tmp/go-build -gno-record-gcc-switches'
go version
go version go1.21.1 linux/amd64
tesseract 5.3.2-17-gce93
leptonica-1.82.0
libgif 5.1.9 : libjpeg 8d (libjpeg-turbo 2.1.1) : libpng 1.6.37 : libtiff 4.3.0 : zlib 1.2.11 : libwebp 1.2.2 : libopenjp2 2.4.0
Found AVX512BW
Found AVX512F
Found AVX512VNNI
Found AVX2
Found AVX
Found FMA
Found SSE4.1
Found OpenMP 201511
Found libarchive 3.6.0 zlib/1.2.11 liblzma/5.2.5 bz2lib/1.0.8 liblz4/1.9.3 libzstd/1.4.8
Found libcurl/7.81.0 OpenSSL/3.0.2 zlib/1.2.11 brotli/1.0.9 zstd/1.4.8 libidn2/2.3.2 libpsl/0.21.0 (+libidn2/2.3.2) libssh/0.9.6/openssl/zlib nghttp2/1.43.0 librtmp/2.3 OpenLDAP/2.5.16