18 lines
563 B
Python
18 lines
563 B
Python
from doctr.models import ocr_predictor
|
|
from doctr.io import DocumentFile
|
|
|
|
model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
|
|
single_img_doc = DocumentFile.from_images("assets/images/bank_data.jpg")
|
|
result = model(single_img_doc)
|
|
|
|
# Extract all words and join them
|
|
all_words = []
|
|
for page in result.pages:
|
|
for block in page.blocks:
|
|
for line in block.lines:
|
|
for word in line.words:
|
|
all_words.append(word.value)
|
|
|
|
# Join as single line
|
|
single_line = ' '.join(all_words)
|
|
print(single_line) |