each ocr word is transformed into a sentence using a loop

This commit is contained in:
Hasnain Ahmed 2025-10-07 00:23:42 +05:00
parent c46e4df1bf
commit f54e6b1ada

18
ocr_script.py Normal file
View File

@ -0,0 +1,18 @@
from doctr.models import ocr_predictor
from doctr.io import DocumentFile
model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
single_img_doc = DocumentFile.from_images("assets/images/bank_data.jpg")
result = model(single_img_doc)
# Extract all words and join them
all_words = []
for page in result.pages:
for block in page.blocks:
for line in block.lines:
for word in line.words:
all_words.append(word.value)
# Join as single line
single_line = ' '.join(all_words)
print(single_line)