Source code for usseg.Organise_files

"""Searches a directory and identifies images likely to be doppler ultrasounds"""
# /usr/bin/env python3

# Python imports
import os
import sys
import re
import pickle
import traceback

# Module imports
import toml

# Local imports
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
sys.path.append(BASE_DIR)
from usseg import General_functions
from concurrent.futures import ThreadPoolExecutor
import os
import re
import traceback
# ... other imports ...

[docs]def check_file_for_us(file_path): """Checks a single file to see if it's a likely ultrasound and returns its path. Args: file_path: The path to the file to be checked. Returns: A tuple of (patient_id, file_path) if the file is a likely ultrasound, or `None` otherwise. """ if file_path.endswith('.JPG'): try: Fail, df = General_functions.Scan_type_test(file_path) if Fail == 0: # Extract patient ID from the file path match = re.search(r"\d{4}", file_path) if match: patient_id = match.group(0) return (patient_id, file_path) except Exception: traceback.print_exc() return None
[docs]def get_likely_us(root_dir, pickle_path=None, use_parallel=True): """Searches a directory and identifies the images that are likely to be doppler ultrasounds. Args: root_dir: The path to the directory to be searched. pickle_path: The path to the pickle file to save the results to. If `None`, the results will be saved to the current directory. use_parallel: Whether to use a parallel thread pool to process the files. Returns: A list of paths to the images that are likely to be doppler ultrasounds. """ # Initialize a dictionary to store the paths for each patient patient_paths = {} # Check if the root_dir is a directory or a single file if os.path.isdir(root_dir): # Collect all JPG files from the root directory all_files = [os.path.join(subdir, file) for subdir, _, files in os.walk(root_dir) for file in files if file.endswith('.JPG')] if use_parallel: # Using ThreadPoolExecutor to parallelize the file processing with ThreadPoolExecutor() as executor: results = list(executor.map(check_file_for_us, all_files)) else: results = list(map(check_file_for_us, all_files)) # Process results and populate patient_paths for res in results: if res: patient_id, file_path = res if patient_id in patient_paths: patient_paths[patient_id].append(file_path) else: patient_paths[patient_id] = [file_path] elif os.path.isfile(root_dir): # If it's a single file, directly use the check_file_for_us function result = check_file_for_us(root_dir) if result: patient_id, file_path = result patient_paths[patient_id] = [file_path] else: print(f"{root_dir} is neither a valid directory nor a file.") return None # save the patient_paths dictionary to a file in current directory if pickle_path is not False: if pickle_path is None: pickle_path = toml.load("config.toml")["pickle"]["likely_us_images"] with open(pickle_path, 'wb') as f: pickle.dump(patient_paths, f) # Convert dictionary values to a list all_paths = [path for sublist in patient_paths.values() for path in sublist] return all_paths
if __name__ == "__main__": root_dir = toml.load("config.toml")["root_dir"] get_likely_us(root_dir)