Source code for usseg.Refined_anon_2_html

# /usr/bin/env python3

"""Segments the ultrasound images"""

# Python imports
import os
import logging
import sys
from PIL import Image
import pickle

# Module imports
import matplotlib.pyplot as plt
import pytesseract
import cv2
import traceback
import toml
import numpy as np

# Import sementation module
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
sys.path.append(BASE_DIR)
from usseg import General_functions

logger = logging.getLogger(__file__)


[docs]def setup_tesseract():
    """Checks tesseract is set up appropriately

    Currently does nothing on a linux system and sets the
    pytesseract.pytesseract.tesseract_cmd to "C:/Program Files/Tesseract-OCR/tesseract.exe"
    for Windows and Cygwin systems.

    Any other system (including MACOS) a warning is displayed and nothing is done.
    It is expected, for non-Windows/Cygwin systems that tesseract is available in the PATH.

    If this is not the desired behaviour, please specify tesseract_cmd after running this
    script.

    Returns:
        tesseract_version (str) : Returns the tesseract version installed.
    """
    if sys.platform.startswith('linux'):
        pass
    elif sys.platform.startswith('win32'):
        pytesseract.pytesseract.tesseract_cmd = "C:/Program Files/Tesseract-OCR/tesseract.exe"
    elif sys.platform.startswith('cygwin'):
        pytesseract.pytesseract.tesseract_cmd = "C:/Program Files/Tesseract-OCR/tesseract.exe"
    else:
        logging.warning(
            f"Platform {sys.platform} is not recognised.\n"
            "Please ensure that you added pytesseract to your system's path."
        )

    return pytesseract.get_tesseract_version()


[docs]def segment(filenames=None, output_dir=None, pickle_path=None):
    """Segments the pre-selected ultrasound images

    Args:
        filenames (str or list, optional) : If string, must be either a single
            file name path or a path to a pickle object containing the list of
            files. Pickle objects are expected to have the extension ".pkl"
            or ".pickle".
            If a list, must be a list of filenames to ultrasound images to
            segment.
            If None, will load a test image.

        output_dir (str, optional) : Path to the output directory to store annoated
            images. If None, will load from config file.
            Defaults to None.
        pickle_path (str or bool) : If pickle_path is False, will not store the
            list of likely us images as a pickle file. If None,
            will load the pickle path from "config.toml".
            Else if a string, will dump the pickled list to the specified path.
            Defaults to None.
    Returns:
        (tuple): tuple containing:
            - **filenames** (list): A list of the paths to the images that were segmented.
            - **Digitized_scans** (list): A list of the paths to the digitized scans.
            - **Annotated_scans** (list): A list of the paths to the annotated scans.
            - **Text_data** (list): A list of the text data extracted from the scans, as strings.
    """

    if filenames is None:
        filenames = ["Lt_test_image.png"]

    elif isinstance(filenames, list):
        pass

    elif isinstance(filenames, dict) or filenames.endswith(".pkl") or filenames.endswith(".pickle"):
        if isinstance(filenames, str):
            with open(filenames, "rb") as f:
                text_file = pickle.load(f)
        else:
            text_file = filenames

        # Get a list of all the keys in the dictionary
        subkeys = list(text_file.keys())

        filenames = []
        # Iterate through the sublist of keys
        for key in subkeys:
            # Access the value corresponding to the key
            filenames = filenames + text_file[key]
            #
    elif isinstance(filenames, str):
        filenames = [filenames]
    else:
        logging.warning(
            f"Unrecognised filenames type {type(filenames)}"
            "Excepted either a string or a list"
        )


    if output_dir is None:
        output_dir = toml.load("config.toml")["output_dir"]
    os.makedirs(output_dir, exist_ok=True)
    # xcel_file = output_dir + "sample3_processed_data"
    Text_data = []  # text data extracted from image
    Annotated_scans = []
    Digitized_scans = []

    for input_image_filename in filenames:  # Iterare through all file names and populate excel file
        # input_image_filename = "E:/us-data-anon/0000/IHE_PDI/00003511/AA3A43F2/AAD8766D/0000371E\\EEEAE224.JPG"
        image_name = os.path.basename(input_image_filename)
        print(input_image_filename)

        try:  # Try text extraction
            colRGBA = Image.open(input_image_filename)  # These images are in RGBA form
            #colRGBA = General_functions.upscale_to_fixed_longest_edge(colRGBA)  # upscale to longest edge
            PIL_col = colRGBA.convert("RGB")  # We need RGB, so convert here. with PIL
            cv2_img = np.array(colRGBA) # with cv2.
            # pix = (
            #     col.load()
            # )  # Loads a pixel access object, where pixel values can be edited

            # from General_functions import Colour_extract, Text_from_greyscale
            COL = General_functions.Colour_extract_vectorized(PIL_col, [255, 255, 100], 95, 95)
            logger.info("Done Colour extract")

            Fail, df = General_functions.Text_from_greyscale(cv2_img, COL)
        except Exception:  # flat fail on 1
            traceback.print_exc()  # prints the error message and traceback
            logger.error("Failed Text extraction")
            Text_data.append(None)
            Fail = 0
            pass

        try:  # Try initial segmentation
            segmentation_mask, Xmin, Xmax, Ymin, Ymax = General_functions.Initial_segmentation(
                input_image_obj=PIL_col
            )
        except Exception:  # flat fail on 1
            logger.error("Failed Initial segmentation")
            Fail = Fail + 1
            pass

        try:  # define end ROIs
            Left_dimensions, Right_dimensions = General_functions.Define_end_ROIs(
                segmentation_mask, Xmin, Xmax, Ymin, Ymax
            )
        except Exception:
            logger.error("Failed Defining ROI")
            Fail = Fail + 1
            pass

        try:
            Waveform_dimensions = [Xmin, Xmax, Ymin, Ymax]
        except Exception:
            logger.error("Failed Waveform dimensions")
            Fail = Fail + 1
            pass

        try:  # Search for ticks and labels
            (
                Cs,
                ROIAX,
                CenPoints,
                onY,
                BCs,
                TYLshift,
                thresholded_image,
                Side,
                Left_dimensions,
                Right_dimensions,
                ROI2,
                ROI3,
            ) = General_functions.Search_for_ticks(
                cv2_img, "Left", Left_dimensions, Right_dimensions
            )
            ROIAX, Lnumber, Lpositions, ROIL = General_functions.Search_for_labels(
                Cs,
                ROIAX,
                CenPoints,
                onY,
                BCs,
                TYLshift,
                Side,
                Left_dimensions,
                Right_dimensions,
                cv2_img,
                ROI2,
                ROI3,
            )

            (
                Cs,
                ROIAX,
                CenPoints,
                onY,
                BCs,
                TYLshift,
                thresholded_image,
                Side,
                Left_dimensions,
                Right_dimensions,
                ROI2,
                ROI3,
            ) = General_functions.Search_for_ticks(
                cv2_img, "Right", Left_dimensions, Right_dimensions
            )
            ROIAX, Rnumber, Rpositions, ROIR = General_functions.Search_for_labels(
                Cs,
                ROIAX,
                CenPoints,
                onY,
                BCs,
                TYLshift,
                Side,
                Left_dimensions,
                Right_dimensions,
                cv2_img,
                ROI2,
                ROI3,
            )
        except Exception:
            traceback.print_exc()  # prints the error message and traceback
            logger.error("Failed Axes search")
            
            Fail = Fail + 1
            pass

        try:
            try:  # Refine segmentation
                (
                    refined_segmentation_mask, top_curve_mask, top_curve_coords
                ) = General_functions.Segment_refinement(
                    cv2_img, Xmin, Xmax, Ymin, Ymax, df, Lnumber, Rnumber
                )
            except Exception:
                traceback.print_exc()  # prints the error message and traceback
                logger.error("Failed Segment refinement")
                Fail = Fail + 1
                pass

            Xplot, Yplot, Ynought = General_functions.Plot_Digitized_data(
                Rnumber, Rpositions, Lnumber, Lpositions, top_curve_coords,
            )
            

            col = General_functions.Annotate(
                input_image_obj=colRGBA,
                refined_segmentation_mask=refined_segmentation_mask,
                Left_dimensions=Left_dimensions,
                Right_dimensions=Right_dimensions,
                Waveform_dimensions=Waveform_dimensions,
                Left_axis=ROIL,
                Right_axis=ROIR,
            )
            Annotated_path = output_dir + image_name.partition(".")[0] + "_Annotated.png"
            fig1, ax1 = plt.subplots(1)
            ax1.imshow(col)
            ax1.set_xticks([])
            ax1.set_yticks([])
            ax1.tick_params(axis="both", which="both", length=0)
            fig1.savefig(Annotated_path, dpi=900, bbox_inches="tight", pad_inches=0)
            Annotated_scans.append(Annotated_path)

            try:
                df = General_functions.Plot_correction(Xplot, Yplot, df)
                Text_data.append(df)
            except Exception:
                traceback.print_exc()
                logger.error("Failed correction")
                continue
            Digitized_path = output_dir + image_name.partition(".")[0] + "_Digitized.png"
            plt.figure(2)
            plt.savefig(Digitized_path, dpi=900, bbox_inches="tight", pad_inches=0)
            Digitized_scans.append(Digitized_path)

        except Exception:
            logger.error("Failed Digitization")
            Annotated_scans.append(None)
            traceback.print_exc()
            try:
                Text_data.append(df)
            except Exception:
                traceback.print_exc()
                Text_data.append(None)
            Digitized_scans.append(None)
            Fail = Fail + 1
            pass

        to_del = [
            "df",
            "image_name",
            "Xmax",
            "Xmin",
            "Ymax",
            "Ymin",
            "Rnumber",
            "Rpositions",
            "Lnumber",
            "Lpositions",
            "Left_dimensions",
            "Right_dimensions",
            "segmentation_mask",
        ]
        for i in to_del:
            try:
                exec("del %s" % i)
            except Exception:
                pass

        plt.close("all")
        i = 1

    print(Digitized_scans)
    print(Annotated_scans)
    print(Text_data)
    if pickle_path is not False:
        if pickle_path is None:
            pickle_path = toml.load("config.toml")["pickle"]["segmented_data"]
        with open(pickle_path, "wb") as f:
            pickle.dump([filenames, Digitized_scans, Annotated_scans, Text_data], f)
    i = 0
    return (filenames, Digitized_scans, Annotated_scans, Text_data)

if __name__ == "__main__":
    setup_tesseract()
    pickle_file = toml.load("config.toml")["pickle"]["likely_us_images"]
    segment(filenames=pickle_file)