On 24/10/2020 08.35, Thomas Huth wrote: > On 21/10/2020 12.50, Philippe Mathieu-Daudé wrote: >> We are going to reuse the tesseract OCR code. >> Create a new tesseract_ocr() helper and use it. >> >> Signed-off-by: Philippe Mathieu-Daudé <[email protected]> >> --- >> tests/acceptance/machine_m68k_nextcube.py | 21 +++++---------------- >> tests/acceptance/tesseract_utils.py | 18 ++++++++++++++++++ >> 2 files changed, 23 insertions(+), 16 deletions(-) >> >> diff --git a/tests/acceptance/machine_m68k_nextcube.py >> b/tests/acceptance/machine_m68k_nextcube.py >> index 3c7400c43e4..09e2745cc52 100644 >> --- a/tests/acceptance/machine_m68k_nextcube.py >> +++ b/tests/acceptance/machine_m68k_nextcube.py >> @@ -7,13 +7,11 @@ >> >> import os >> import time >> -import logging >> >> from avocado_qemu import Test >> from avocado import skipUnless >> -from avocado.utils import process >> >> -from tesseract_utils import tesseract_available >> +from tesseract_utils import tesseract_available, tesseract_ocr >> >> PIL_AVAILABLE = True >> try: >> @@ -61,12 +59,8 @@ def test_bootrom_framebuffer_size(self): >> def test_bootrom_framebuffer_ocr_with_tesseract_v3(self): >> screenshot_path = os.path.join(self.workdir, "dump.ppm") >> self.check_bootrom_framebuffer(screenshot_path) >> - >> - console_logger = logging.getLogger('console') >> - text = process.run("tesseract %s stdout" % >> screenshot_path).stdout_text >> - for line in text.split('\n'): >> - if len(line): >> - console_logger.debug(line) >> + lines = tesseract_ocr(screenshot_path, tesseract_version=3) >> + text = '\n'.join(lines) >> self.assertIn('Backplane', text) >> self.assertIn('Ethernet address', text) >> >> @@ -77,13 +71,8 @@ def test_bootrom_framebuffer_ocr_with_tesseract_v3(self): >> def test_bootrom_framebuffer_ocr_with_tesseract_v4(self): >> screenshot_path = os.path.join(self.workdir, "dump.ppm") >> self.check_bootrom_framebuffer(screenshot_path) >> - >> - console_logger = logging.getLogger('console') >> - proc = process.run("tesseract --oem 1 %s stdout" % screenshot_path) >> - text = proc.stdout_text >> - for line in text.split('\n'): >> - if len(line): >> - console_logger.debug(line) >> + lines = tesseract_ocr(screenshot_path, tesseract_version=4) >> + text = '\n'.join(lines) >> self.assertIn('Testing the FPU, SCC', text) >> self.assertIn('System test failed. Error code', text) >> self.assertIn('Boot command', text) >> diff --git a/tests/acceptance/tesseract_utils.py >> b/tests/acceptance/tesseract_utils.py >> index acd6e8c2faa..72cd9ab7989 100644 >> --- a/tests/acceptance/tesseract_utils.py >> +++ b/tests/acceptance/tesseract_utils.py >> @@ -6,7 +6,9 @@ >> # later. See the COPYING file in the top-level directory. >> >> import re >> +import logging >> >> +from avocado.utils import process >> from avocado.utils.path import find_command, CmdNotFoundError >> >> def tesseract_available(expected_version): >> @@ -26,3 +28,19 @@ def tesseract_available(expected_version): >> return False >> # now this is guaranteed to be a digit >> return int(match.groups()[0]) == expected_version >> + >> + >> +def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3): >> + console_logger = logging.getLogger('tesseract') >> + console_logger.debug(image_path) >> + if tesseract_version == 4: >> + tesseract_args += ' --oem 1' >> + proc = process.run("tesseract {} {} stdout".format(tesseract_args, >> + image_path)) >> + lines = [] >> + for line in proc.stdout_text.split('\n'): >> + sline = line.strip() >> + if len(sline): >> + console_logger.debug(sline) >> + lines += [sline] >> + return lines > > Would it make sense to completely hide the tesseract version handling in > this new tesseract_utils.py file now, so that the tests themselves do not > have to worry about this anymore (i.e. would it be possible to merge > test_bootrom_framebuffer_ocr_with_tesseract_v3 and > test_bootrom_framebuffer_ocr_with_tesseract_v4 into one single test that way?)
If I've got that right, there is also now a proper release 4 of Tesseract, so maybe we can simply scratch the testing with version 3 now? Thomas
