Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit59fce3a

Browse files
committed
add argument --tessdata-dir
1 parent80a5669 commit59fce3a

File tree

1 file changed

+22
-5
lines changed

1 file changed

+22
-5
lines changed

‎hocr-editor.py‎

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ def _qimage_to_pil(self, qimage: QImage) -> PIL.Image.Image:
437437
returnimg.convert("RGB")
438438

439439
def_ocr_image(self,qimage:QImage,langs:Optional[str]=None,timeout:int=30)->bytes:
440-
ifshutil.which("tesseract")isNone:
440+
ifshutil.which(self.editor.args.tesseract_command)isNone:
441441
# tesseract is not installed
442442
returnNone
443443
langs=langsorself.editor.ocr_langs
@@ -448,14 +448,22 @@ def _ocr_image(self, qimage: QImage, langs: Optional[str] = None, timeout: int =
448448
# TODO? use https://github.com/sirfz/tesserocr
449449
tiff_bytes=pil_to_tiff_bytes(pil_img)
450450
args= [
451-
"tesseract",
451+
self.editor.args.tesseract_command,
452452
"-",# input: stdin
453453
"-",# output: stdout
454454
"-l",langs,
455-
# "-c", "tessedit_create_hocr=1", # config
456-
"quiet",# config: hide "Estimating resolution as N" messages
457-
"hocr",# extension
455+
"-c","tessedit_create_hocr=1",
456+
# TODO get dpi value from hocr file
457+
# <div class='ocr_page' id='page_1' title='...; scan_res 300 300'>
458+
# "--dpi", "300",
459+
"--loglevel","WARN",# ALL, TRACE, DEBUG, INFO, WARN, ERROR, FATAL, OFF
458460
]
461+
ifself.editor.args.tessdata_dir:
462+
args+= [
463+
"--oem","1",
464+
"--psm","6",
465+
"--tessdata-dir",self.editor.args.tessdata_dir,
466+
]
459467
hocr_bytes=subprocess.check_output(args,input=tiff_bytes,timeout=timeout)
460468
returnhocr_bytes
461469

@@ -1100,6 +1108,15 @@ def main():
11001108
default=None,
11011109
help="Overlay color (color name or #RRGGBB)",
11021110
)
1111+
parser.add_argument(
1112+
"--tesseract-command",
1113+
default="tesseract",
1114+
)
1115+
parser.add_argument(
1116+
"--tessdata-dir",
1117+
default=None,
1118+
help="usually a git clone of https://github.com/tesseract-ocr/tessdata_best",
1119+
)
11031120
args=parser.parse_args()
11041121

11051122
# handle Ctrl+C from terminal

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp