A bangla app for learning Al-Quran Word by Word.
We scrap word by word Al Quran Bangla Translation from Hadith BD
Fetch Web Pages From Hadith BD
import requests
import os
for sura_number in range(1,115):
page=0
while page <= 15:
url = f'https://www.hadithbd.com/quran/byword/detail/?pageNum_suraGroup={page}&sura={sura_number}'
response = requests.get(url)
html_content = response.content
if(len(html_content)<35000):
this_page_length = len(html_content)
next_page_url = f'https://www.hadithbd.com/quran/byword/detail/?pageNum_suraGroup={page+1}&sura={sura_number}'
next_page_response = requests.get(next_page_url)
next_page_html_content = next_page_response.content
next_page_length = len(next_page_html_content)
if(this_page_length==next_page_length):
break;
file_name = f'/content/drive/MyDrive/__FilePath__/Surah_{sura_number}_{page+1}.html'
with open(file_name, 'wb') as f:
f.write(html_content)
print(f'Surah{sura_number}.{page+1} Length = {len(html_content)}')
page = page + 1
import os
import re
import pandas as pd
from bs4 import BeautifulSoup
def extract_verse_info(surah_file, filename):
with open(surah_file, 'r', encoding='utf-8') as file:
content = file.read()
soup = BeautifulSoup(content, 'html.parser')
surah_number = ""
for char in filename:
if char.isdigit() or char == '-':
surah_number += char
elif surah_number and not char.isdigit():
break
print('Surah Number is', surah_number)
verses = []
for verse_tag in soup.find_all('p', class_='text-info text-right'):
verse_number = verse_tag.text.strip()
for word_tag in verse_tag.find_next_siblings('div', class_='col-lg-2 col-md-3 col-sm-6 mb-4 text-right box user-select-all'):
arabic_word = word_tag.find_next('p', dir='rtl').text.strip()
english_word = word_tag.find_next('p', dir='ltr', class_='font-kalpurush-reading').text.strip()
bangla_word = word_tag.find_next('p', dir='ltr', class_='font-kalpurush-reading').text.strip()
print(f"Surah Number: {surah_number}", end="\t")
print(f"Verse Number: {verse_number}", end="\t")
print(f"Arabic Word: {arabic_word}", end="\t")
print(f"Bangla Word: {bangla_word}")
verses.append({
'Surah Number': surah_number,
'Verse Number': verse_number,
'Arabic Word': arabic_word,
'Bangla Word': bangla_word
})
return verses
surah_dir = '/content/drive/MyDrive/_FilePath_'
all_verses = []
count=0
for filename in os.listdir(surah_dir):
if filename.endswith('.html'):
print(filename)
surah_file = os.path.join(surah_dir, filename)
verses = extract_verse_info(surah_file,filename)
all_verses.extend(verses)
quran_df = pd.DataFrame(all_verses)
output_csv_path = os.path.join(surah_dir, 'database/hugging_face_quranic_bangla.csv')
quran_df.to_csv(output_csv_path, index=False)
print(f"Complete Database Successfully")