Ubuntu
sudo apt install tesseract-ocr
wget https://github.com/tesseract-ocr/tessdata/raw/master/eng.traineddata
Arch Linux
yay -S tesseract tesseract-data-eng
poetry install
curl https://ceodelhi.gov.in/engdata/AC40/U05A40P1.pdf --output a.pdf
export TESSDATA_PREFIX=/usr/share/tessdata/
poetry run python python p2i.py