alignment of words are sacttered when the images in it
ShyamBasa opened this issue · 0 comments
ShyamBasa commented
Description of the bug
words only pdf files are converting as expected, but when it comes to images with some words inside the pdf file is giving the issue, sample file is attached for your reference.
test_conversion.pdf
How to reproduce the bug
from flask import Flask, request, jsonify
from pdf2docx import Converter
import os
from flask import send_file
import logging
from flask_cors import CORS
logging.basicConfig(level=logging.DEBUG)
app = Flask(name)
CORS(app)
@app.route('/convert_pdf_to_docx', methods=['POST'])
def convert_pdf_to_docx():
pass
if 'file' not in request.files:
return jsonify({"error": "No file part"}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"error": "No selected file"}), 400
try:
# Save the uploaded PDF to a temporary file
pdf_path = 'C:\\Users\\user\\Downloads\\python\\test\\temp.pdf'
file.save(pdf_path)
# Convert PDF to DOCX
docx_path = 'C:\\Users\\user\\Downloads\\python\\test\\output.docx'
cv = Converter(pdf_path)
cv.convert(docx_path, start=0, end=None)
cv.close()
# Return the DOCX file
#return jsonify({"message": "Conversion successful", "docx_file": docx_path})
return send_file(docx_path, as_attachment=True)
except Exception as e:
return jsonify({"error": str(e)}), 500
finally:
# Clean up temporary PDF file
if os.path.exists(pdf_path):
os.remove(pdf_path)
if name == 'main':
app.run(host='192.168.200.5', port=5000)
app.run(debug=True)
pdf2docx version
0.5.3
Operating system
Windows
Python version
3.10