Source code for wsipipe.preprocess.patching.patchset_utils

"""
Utilities for creating sets of patches
"""

import itertools
from pathlib import Path
from typing import List

import numpy as np
import pandas as pd
from PIL import Image, ImageDraw

from wsipipe.load.datasets import Loader
from wsipipe.preprocess.tissue_detection import TissueDetector
from wsipipe.preprocess.patching.patch_finder import PatchFinder
from wsipipe.preprocess.patching.patchset import PatchSet, PatchSetting


[docs]def make_patchset_for_slide( slide_path: Path, annot_path: Path, loader: Loader, tissue_detector: TissueDetector, patch_finder: PatchFinder, project_root: Path = Path('/') ) -> PatchSet: """Creates a patchset for a single slide This creates a PatchSet for a single slide. Args: slide_path (Path): path to whole slide image annot_path (Path): annotation information for slide loader (Loader): loader to use to load slide and annotations tissue_detector (TissueDetector): tissue detector to use to remove background patch_finder (PatchFinder): patch finder to use to create patches project_root (Path, optional): paths will be stored relative to the project root. Defaults to root (absolute paths) Returns: patchset (PatchSet): A PatchSet for the slide """ with loader.load_slide(project_root / slide_path) as slide: annotations = loader.load_annotations(project_root / annot_path) labels_shape = slide.dimensions[patch_finder.labels_level].as_shape() scale_factor = 2**patch_finder.labels_level labels_image = annotations.render(labels_shape, scale_factor) tissue_mask = tissue_detector(slide.get_thumbnail(patch_finder.labels_level)) labels_image[~tissue_mask] = 0 df, level, size = patch_finder( labels_image, slide.dimensions[patch_finder.patch_level] ) df["setting"] = 0 # set all the patches to reference the first patchsetting patchset = PatchSet(df, [PatchSetting(level, size, slide_path, loader)]) return patchset
[docs]def make_and_save_patchsets_for_dataset( dataset: pd.DataFrame, loader: Loader, tissue_detector: TissueDetector, patch_finder: PatchFinder, output_dir: Path, project_root: Path = Path('/') ) -> List[PatchSet]: """Creates PatchSets for all slides in a dataset For each slide in the dataset this creates the PatchSet then saves it in a sub directory of the output_dir Args: dataset (pd.DataFrame): a dataframe containing columns slide and annotation loader (Loader): loader to use to load slide and annotations tissue_detector (TissueDetector): tissue detector to use to remove background patch_finder (PatchFinder): patch finder to use to create patches output_dir (Path): a directory to save the patchsets in project_root (Path, optional): paths will be stored relative to the project root. Defaults to root (absolute paths) Returns: patchset (List[PatchSet]): A list of PatchSets one for each slide """ patchsets = [] output_dir = Path(output_dir) for row in dataset.itertuples(): patchset_path = output_dir / Path(row.slide).stem if patchset_path.is_dir(): patchset = PatchSet.load(patchset_path) else: patchset = make_patchset_for_slide( row.slide, row.annotation, loader, tissue_detector, patch_finder, project_root ) patchset.save(patchset_path) patchsets.append(patchset) return patchsets
[docs]def make_patchsets_for_dataset( dataset: pd.DataFrame, loader: Loader, tissue_detector: TissueDetector, patch_finder: PatchFinder, project_root: Path = Path('/') ) -> List[PatchSet]: """Creates PatchSets for all slides in a dataset For each slide in the dataset this creates the PatchSet Args: dataset (pd.DataFrame): a dataframe containing columns slide and annotation loader (Loader): loader to use to load slide and annotations tissue_detector (TissueDetector): tissue detector to use to remove background patch_finder (PatchFinder): patch finder to use to create patches project_root (Path, optional): paths will be stored relative to the project root. Defaults to root (absolute paths) Returns: patchset (List[PatchSet]): A list of PatchSets one for each slide """ patchsets = [] for row in dataset.itertuples(): patchset = make_patchset_for_slide( row.slide, row.annotation, loader, tissue_detector, patch_finder, project_root ) patchsets.append(patchset) return patchsets
[docs]def load_patchsets_from_directory(patchsets_dir: Path): """Loads PatchSets from a directory Loads patchsets for a whole dataset stored in subdirectories of patchsets_dir Args: patchsets_dir (Path): a path to a directory containing subdirectories with PatchSets Returns: patchset (List[PatchSet]): A list of PatchSets one for each slide """ patchset_dir_list = [x for x in patchsets_dir.iterdir() if x.is_dir()] patchset_list = [PatchSet.load(p) for p in patchset_dir_list] return patchset_list
[docs]def combine(patchsets: List[PatchSet]) -> PatchSet: """ Combines multiple patchsets into one This gives a combined dataframe with all patches in a dataset, for example to use to sample patches. It also renumbers settings so that indexes in dataframe match correct setting in combined_settings list Args: patchsets (List[PatchSets]): A list of PatchSets Returns: A combined patchset """ # compute and apply the settings index offset # offset is equal to the size of the settings object up to this point offset = 0 for ps in patchsets: ps.df["setting"] += offset # todo: do we mind mutating ps and thus patchsets offset += len(ps.settings) # merge the data frames frames_list = [ps.df for ps in patchsets] combined_df = pd.concat(frames_list, axis=0, ignore_index=True) # merge the setting lists combined_settings = [] for ps in patchsets: combined_settings.extend(ps.settings) return PatchSet(combined_df, combined_settings)
[docs]def visualise_patches_on_slide(ps: PatchSet, vis_level: (int), project_root: Path = Path('/')) -> Image: """ Draws patches on a thumbnail of the slide Visualise where on the slide the patches occur. Assumes a patch set for one slide with only one set of setting Args: ps (PatchSet): A PatchSet for one slide vis_level (int): the level at which to create a slide image to draw patches on Returns: thumb (Image): A thumbnail of the slide with patch locations drawn on """ assert len(ps.settings) == 1, "The input patch set contains patches from more than one slide." slide_settings = ps.settings[0] def convert_ps_to_thumb_level(ps, thumb_lev): ps_df = ps.df.copy() ps_df.x = ps_df.x.divide(2 ** thumb_lev).astype(int) ps_df.y = ps_df.y.divide(2 ** thumb_lev).astype(int) thumb_patch_size = slide_settings.patch_size // 2 ** thumb_lev return PatchSet(ps_df, [PatchSetting(slide_settings.level, thumb_patch_size,slide_settings.slide_path, slide_settings.loader)]) def create_visualisation_frame(ps_in): vis_frame = ps_in.df # TODO: ps.settings[0] as only one settings is there a neater way to do this vis_frame["x2"] = vis_frame.x.add(ps_in.settings[0].patch_size) vis_frame["y2"] = vis_frame.y.add(ps_in.settings[0].patch_size) return vis_frame with slide_settings.loader.load_slide(project_root / slide_settings.slide_path) as slide: thumb = slide.get_thumbnail(vis_level) thumb = Image.fromarray(np.array(thumb, dtype=np.uint8)) ps_out = convert_ps_to_thumb_level(ps, vis_level) vis_frame = create_visualisation_frame(ps_out) thumbdraw = ImageDraw.Draw(thumb) for row in vis_frame.itertuples(): thumbdraw.rectangle([row.x, row.y, row.x2, row.y2], fill=None, outline='black', width=1) return thumb