Cannot parse docx and image
Opened this issue · 0 comments
- docx parse error
root@lixl-openwrt:/tmp/tst# curl -X POST -F "file=@/tmp/tst/tst.docx" http://192.168.1.98:8000/parse_document/docs
Internal Server Error
INFO: 192.168.1.130:36334 - "POST /parse_document/docs HTTP/1.1" 500 Internal Server Error
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/uvicorn/protocols/http/httptools_impl.py", line 411, in run_asgi
result = await app( # type: ignore[func-returns-value]
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 69, in call
return await self.app(scope, receive, send)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/fastapi/applications.py", line 1054, in call
await super().call(scope, receive, send)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/applications.py", line 123, in call
await self.middleware_stack(scope, receive, send)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/middleware/errors.py", line 186, in call
raise exc
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/middleware/errors.py", line 164, in call
await self.app(scope, receive, _send)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/middleware/cors.py", line 85, in call
await self.app(scope, receive, send)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 65, in call
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/routing.py", line 756, in call
await self.middleware_stack(scope, receive, send)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/routing.py", line 776, in app
await route.handle(scope, receive, send)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/routing.py", line 297, in handle
await self.app(scope, receive, send)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/routing.py", line 77, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/starlette/routing.py", line 72, in app
response = await func(request)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/fastapi/routing.py", line 278, in app
raw_response = await run_endpoint_function(
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/fastapi/routing.py", line 191, in run_endpoint_function
return await dependant.call(**values)
File "/root/omniparse/omniparse/documents/router.py", line 114, in parse_doc_endpoint
subprocess.run(command, check=True)
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/subprocess.py", line 503, in run
with Popen(*popenargs, **kwargs) as process:
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/subprocess.py", line 971, in init
self._execute_child(args, executable, preexec_fn, close_fds,
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/subprocess.py", line 1863, in _execute_child
raise child_exception_type(errno_num, err_msg, err_filename)
FileNotFoundError: [Errno 2] No such file or directory: 'libreoffice'
1.1 Then I install libreoffice
(base) root@chenjunTest:~/tst# apt install libreoffice-common
Reading package lists... Done
Building dependency tree... Done
...
Still there are many error:
INFO: 192.168.1.130:56824 - "POST /parse_document/docs HTTP/1.1" 500 Internal Server Error
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/uvicorn/protocols/http/httptools_impl.py", line 411, in run_asgi
result = await app( # type: ignore[func-returns-value]
File "/root/miniconda3/envs/omniparse-venv/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 69, in call
return await self.app(scope, receive, send)
...
File "/root/omniparse/omniparse/documents/router.py", line 120, in parse_doc_endpoint
with open(output_pdf_path, "rb") as pdf_file:
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmpfwulh278/tmpuuyr0iuj.pdf'
- image parse error
root@chenjunTest:~/tst# curl -X POST -F "file=@/root/tst/tstenglist.JPG" http://localhost:8000/parse_image/process_image
{"detail":[{"type":"missing","loc":["body","image"],"msg":"Field required","input":null},{"type":"missing","loc":["body","task"],"msg":"Field required","input":null}]}
INFO: 127.0.0.1:60562 - "POST /parse_image/process_image HTTP/1.1" 422 Unprocessable Entity
(base) root@chenjunTest:~/tst# curl -X POST -F "file=@/root/tst/tstenglist.JPG" -F "task=OCR" http://localhost:8000/parse_image/process_image
{"detail":[{"type":"missing","loc":["body","image"],"msg":"Field required","input":null}]}
(base) root@chenjunTest:~/tst# curl -X POST -F "image=@/root/tst/tstenglist.JPG" -F "task=Caption" http://localhost:8000/parse_image/process_image
{"detail":"CUDA out of memory. Tried to allocate 20.00 MiB. GPU "}
INFO: 127.0.0.1:48706 - "POST /parse_image/process_image HTTP/1.1" 500 Internal Server Error
2.1
(base) root@chenjunTest:~# curl -X POST -F "image=@/root/tst/tstpng.PNG" -F "task=OCR" -F "prompt=Optional prompt" http://localhost:8000/parse_media/process_image
{"detail":"Not Found"}
(base) root@chenjunTest:~# curl -X POST -F "image=@/root/tst/tstpng.PNG" -F "task=OCR" http://localhost:8000/parse_media/process_image
{"detail":"Not Found"}
(base) root@chenjunTest:~# curl -X POST -F "file=@/root/tst/tstpng.PNG" http://localhost:8000/parse_media/image
{"detail":"Not Found"}
INFO: 127.0.0.1:54184 - "POST /parse_media/process_image HTTP/1.1" 404 Not Found
INFO: 127.0.0.1:45568 - "POST /parse_media/process_image HTTP/1.1" 404 Not Found
INFO: 127.0.0.1:32800 - "POST /parse_media/image HTTP/1.1" 404 Not Found