Source code for usseg.Refined_anon_2_html

# /usr/bin/env python3

"""Segments the ultrasound images"""

# Python imports
import os
import logging
import sys
from PIL import Image
import pickle

# Module imports
import matplotlib.pyplot as plt
import pytesseract
import cv2
import traceback
import toml
import numpy as np

# Import sementation module
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
sys.path.append(BASE_DIR)
from usseg import General_functions

logger = logging.getLogger(__file__)


[docs]def setup_tesseract(): """Checks tesseract is set up appropriately Currently does nothing on a linux system and sets the pytesseract.pytesseract.tesseract_cmd to "C:/Program Files/Tesseract-OCR/tesseract.exe" for Windows and Cygwin systems. Any other system (including MACOS) a warning is displayed and nothing is done. It is expected, for non-Windows/Cygwin systems that tesseract is available in the PATH. If this is not the desired behaviour, please specify tesseract_cmd after running this script. Returns: tesseract_version (str) : Returns the tesseract version installed. """ if sys.platform.startswith('linux'): pass elif sys.platform.startswith('win32'): pytesseract.pytesseract.tesseract_cmd = "C:/Program Files/Tesseract-OCR/tesseract.exe" elif sys.platform.startswith('cygwin'): pytesseract.pytesseract.tesseract_cmd = "C:/Program Files/Tesseract-OCR/tesseract.exe" else: logging.warning( f"Platform {sys.platform} is not recognised.\n" "Please ensure that you added pytesseract to your system's path." ) return pytesseract.get_tesseract_version()
[docs]def segment(filenames=None, output_dir=None, pickle_path=None): """Segments the pre-selected ultrasound images Args: filenames (str or list, optional) : If string, must be either a single file name path or a path to a pickle object containing the list of files. Pickle objects are expected to have the extension ".pkl" or ".pickle". If a list, must be a list of filenames to ultrasound images to segment. If None, will load a test image. output_dir (str, optional) : Path to the output directory to store annoated images. If None, will load from config file. Defaults to None. pickle_path (str or bool) : If pickle_path is False, will not store the list of likely us images as a pickle file. If None, will load the pickle path from "config.toml". Else if a string, will dump the pickled list to the specified path. Defaults to None. Returns: (tuple): tuple containing: - **filenames** (list): A list of the paths to the images that were segmented. - **Digitized_scans** (list): A list of the paths to the digitized scans. - **Annotated_scans** (list): A list of the paths to the annotated scans. - **Text_data** (list): A list of the text data extracted from the scans, as strings. """ if filenames is None: filenames = ["Lt_test_image.png"] elif isinstance(filenames, list): pass elif isinstance(filenames, dict) or filenames.endswith(".pkl") or filenames.endswith(".pickle"): if isinstance(filenames, str): with open(filenames, "rb") as f: text_file = pickle.load(f) else: text_file = filenames # Get a list of all the keys in the dictionary subkeys = list(text_file.keys()) filenames = [] # Iterate through the sublist of keys for key in subkeys: # Access the value corresponding to the key filenames = filenames + text_file[key] # elif isinstance(filenames, str): filenames = [filenames] else: logging.warning( f"Unrecognised filenames type {type(filenames)}" "Excepted either a string or a list" ) if output_dir is None: output_dir = toml.load("config.toml")["output_dir"] os.makedirs(output_dir, exist_ok=True) # xcel_file = output_dir + "sample3_processed_data" Text_data = [] # text data extracted from image Annotated_scans = [] Digitized_scans = [] for input_image_filename in filenames: # Iterare through all file names and populate excel file # input_image_filename = "E:/us-data-anon/0000/IHE_PDI/00003511/AA3A43F2/AAD8766D/0000371E\\EEEAE224.JPG" image_name = os.path.basename(input_image_filename) print(input_image_filename) try: # Try text extraction colRGBA = Image.open(input_image_filename) # These images are in RGBA form #colRGBA = General_functions.upscale_to_fixed_longest_edge(colRGBA) # upscale to longest edge PIL_col = colRGBA.convert("RGB") # We need RGB, so convert here. with PIL cv2_img = np.array(colRGBA) # with cv2. # pix = ( # col.load() # ) # Loads a pixel access object, where pixel values can be edited # from General_functions import Colour_extract, Text_from_greyscale COL = General_functions.Colour_extract_vectorized(PIL_col, [255, 255, 100], 95, 95) logger.info("Done Colour extract") Fail, df = General_functions.Text_from_greyscale(cv2_img, COL) except Exception: # flat fail on 1 traceback.print_exc() # prints the error message and traceback logger.error("Failed Text extraction") Text_data.append(None) Fail = 0 pass try: # Try initial segmentation segmentation_mask, Xmin, Xmax, Ymin, Ymax = General_functions.Initial_segmentation( input_image_obj=PIL_col ) except Exception: # flat fail on 1 logger.error("Failed Initial segmentation") Fail = Fail + 1 pass try: # define end ROIs Left_dimensions, Right_dimensions = General_functions.Define_end_ROIs( segmentation_mask, Xmin, Xmax, Ymin, Ymax ) except Exception: logger.error("Failed Defining ROI") Fail = Fail + 1 pass try: Waveform_dimensions = [Xmin, Xmax, Ymin, Ymax] except Exception: logger.error("Failed Waveform dimensions") Fail = Fail + 1 pass try: # Search for ticks and labels ( Cs, ROIAX, CenPoints, onY, BCs, TYLshift, thresholded_image, Side, Left_dimensions, Right_dimensions, ROI2, ROI3, ) = General_functions.Search_for_ticks( cv2_img, "Left", Left_dimensions, Right_dimensions ) ROIAX, Lnumber, Lpositions, ROIL = General_functions.Search_for_labels( Cs, ROIAX, CenPoints, onY, BCs, TYLshift, Side, Left_dimensions, Right_dimensions, cv2_img, ROI2, ROI3, ) ( Cs, ROIAX, CenPoints, onY, BCs, TYLshift, thresholded_image, Side, Left_dimensions, Right_dimensions, ROI2, ROI3, ) = General_functions.Search_for_ticks( cv2_img, "Right", Left_dimensions, Right_dimensions ) ROIAX, Rnumber, Rpositions, ROIR = General_functions.Search_for_labels( Cs, ROIAX, CenPoints, onY, BCs, TYLshift, Side, Left_dimensions, Right_dimensions, cv2_img, ROI2, ROI3, ) except Exception: traceback.print_exc() # prints the error message and traceback logger.error("Failed Axes search") Fail = Fail + 1 pass try: try: # Refine segmentation ( refined_segmentation_mask, top_curve_mask, top_curve_coords ) = General_functions.Segment_refinement( cv2_img, Xmin, Xmax, Ymin, Ymax, df, Lnumber, Rnumber ) except Exception: traceback.print_exc() # prints the error message and traceback logger.error("Failed Segment refinement") Fail = Fail + 1 pass Xplot, Yplot, Ynought = General_functions.Plot_Digitized_data( Rnumber, Rpositions, Lnumber, Lpositions, top_curve_coords, ) col = General_functions.Annotate( input_image_obj=colRGBA, refined_segmentation_mask=refined_segmentation_mask, Left_dimensions=Left_dimensions, Right_dimensions=Right_dimensions, Waveform_dimensions=Waveform_dimensions, Left_axis=ROIL, Right_axis=ROIR, ) Annotated_path = output_dir + image_name.partition(".")[0] + "_Annotated.png" fig1, ax1 = plt.subplots(1) ax1.imshow(col) ax1.set_xticks([]) ax1.set_yticks([]) ax1.tick_params(axis="both", which="both", length=0) fig1.savefig(Annotated_path, dpi=900, bbox_inches="tight", pad_inches=0) Annotated_scans.append(Annotated_path) try: df = General_functions.Plot_correction(Xplot, Yplot, df) Text_data.append(df) except Exception: traceback.print_exc() logger.error("Failed correction") continue Digitized_path = output_dir + image_name.partition(".")[0] + "_Digitized.png" plt.figure(2) plt.savefig(Digitized_path, dpi=900, bbox_inches="tight", pad_inches=0) Digitized_scans.append(Digitized_path) except Exception: logger.error("Failed Digitization") Annotated_scans.append(None) traceback.print_exc() try: Text_data.append(df) except Exception: traceback.print_exc() Text_data.append(None) Digitized_scans.append(None) Fail = Fail + 1 pass to_del = [ "df", "image_name", "Xmax", "Xmin", "Ymax", "Ymin", "Rnumber", "Rpositions", "Lnumber", "Lpositions", "Left_dimensions", "Right_dimensions", "segmentation_mask", ] for i in to_del: try: exec("del %s" % i) except Exception: pass plt.close("all") i = 1 print(Digitized_scans) print(Annotated_scans) print(Text_data) if pickle_path is not False: if pickle_path is None: pickle_path = toml.load("config.toml")["pickle"]["segmented_data"] with open(pickle_path, "wb") as f: pickle.dump([filenames, Digitized_scans, Annotated_scans, Text_data], f) i = 0 return (filenames, Digitized_scans, Annotated_scans, Text_data)
if __name__ == "__main__": setup_tesseract() pickle_file = toml.load("config.toml")["pickle"]["likely_us_images"] segment(filenames=pickle_file)