diff --git a/ocr_script.py b/ocr_script.py new file mode 100644 index 0000000..7dd6c2a --- /dev/null +++ b/ocr_script.py @@ -0,0 +1,18 @@ +from doctr.models import ocr_predictor +from doctr.io import DocumentFile + +model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True) +single_img_doc = DocumentFile.from_images("assets/images/bank_data.jpg") +result = model(single_img_doc) + +# Extract all words and join them +all_words = [] +for page in result.pages: + for block in page.blocks: + for line in block.lines: + for word in line.words: + all_words.append(word.value) + +# Join as single line +single_line = ' '.join(all_words) +print(single_line) \ No newline at end of file