Source code for wsipipe.preprocess.patching.patchset

"""
PatchSets are sets of patches and all the information
required to create them from the slides.

Many patches in the set may use the same details, (which we call PatchSettings):
    - the path of the slide to read from
    - the level of the slide at which to create the patch
    - the size of the patch to be created
    - how to load the slide

To create an individual patch, you need to know:
    - the top left position of the patch
    - the label to be applied to the patch

Therefore the PatchSets are a dataframe and a settings list.

The settings list is a list of PatchSettings each of which contains: 
    slide_path, level, patch_size, loader
In the dataframe each row represents a patch and contains columns:
   x (top), y (left),  label, settings (index to list)

"""


from dataclasses import asdict, dataclass
import itertools
import json
from pathlib import Path
from typing import List

import pandas as pd
import cv2
import numpy as np

from wsipipe.load.datasets import Loader, get_loader
from wsipipe.load.slides import Region, SlideBase
from wsipipe.utils import invert

[docs]@dataclass class PatchSetting: """Patch Setting Definition Args: level (int): The level at which patches are extracted patch_size (int): The size of patches to be created assumes square slide_path (Path): the path to the whole slide image loader (Loader): A method for loading the slide """ level: int patch_size: int slide_path: Path # not stored in the dataframe loader: Loader # not stored in the dataframe
[docs] def to_sdict(self): """Writes a PatchSetting to a dictionary so it can be saved to disk""" d = asdict(self) d["slide_path"] = str(self.slide_path) d["loader"] = self.loader.name return d
[docs] @classmethod def from_sdict(cls, sdict: dict): """ Converts a dictionary to a PatchSetting""" sdict["slide_path"] = Path(sdict["slide_path"]) sdict["loader"] = get_loader(sdict["loader"]) return cls(**sdict)
[docs]class PatchSet: def __init__(self, df: pd.DataFrame, settings: List[PatchSetting]) -> None: """The dataframe should have the following columns: - x: left position of the patch at level - y: top position of the patch at level - label: which class it belongs to - setting: an index into the settings array. Args: df (pd.DataFrame): The patch locations, labels, and index into settings. settings (List[PatchSetting]): A list of settings. """ self.df = df self.settings = settings
[docs] def save(self, path: Path) -> None: """Saves a PatchSet to disk The dataframe is saved to a csv called frame.csv The settings are saved in a text file called settings.json Args: path (Path): the directory in which to save the patchset """ path.mkdir(parents=True, exist_ok=True) self.df.to_csv(path / "frame.csv", index=False) dicts = [s.to_sdict() for s in self.settings] with open(path / "settings.json", "w") as outfile: json.dump(dicts, outfile)
[docs] @classmethod def load(cls, path: Path) -> "PatchSet": """Loads a PatchSet from disk Assumes: The dataframe is saved to a csv called frame.csv The settings are saved in a text file called settings.json Args: path (Path): the directory in which the patchset is saved """ print(f"loading {path}") df = pd.read_csv(path / "frame.csv") with open(path / "settings.json") as json_file: settings = json.load(json_file) settings = [PatchSetting.from_sdict(s) for s in settings] return cls(df, settings)
[docs] def export_patches(self, output_dir: Path) -> None: """Creates all patches in a patch set Writes patches in subdirectories of their label Patches are name slide_path_x_y_level_patch_size.png Args: output_dir (Path): the directory in which the patches are saved """ groups = self.df.groupby("setting") for setting_idx, group in groups: s = self.settings[setting_idx] self._export_patches_for_setting( group, output_dir, s.slide_path, s.level, s.patch_size, s.loader )
[docs] def description(self): """ Returns basic summary of patchset returns the labels and the total number of patches of each label """ labels = np.unique(self.df.label) sum_totals = [np.sum(self.df.label == label) for label in labels] return labels, sum_totals
def _export_patches_for_setting( self, frame: pd.DataFrame, output_dir: Path, slide_path: Path, level: int, patch_size: int, loader: Loader, ): """Creates all the patches for an individual PatchSetting""" def get_output_dir_for_label(label: str) -> Path: label_str = invert(loader.labels)[label] label_dir = output_dir / label_str return label_dir def make_patch_path(x: int, y: int, label: int) -> Path: filename = f"{Path(slide_path).stem}-{x}-{y}-{level}-{patch_size}.png" label_dir = get_output_dir_for_label(label) label_dir.mkdir(parents=True, exist_ok=True) return label_dir / filename def save_patch(region: Region, slide: SlideBase, filepath: Path) -> None: image = slide.read_region(region) opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) cv2.imwrite(str(filepath), np.array(opencv_image)) with loader.load_slide(slide_path) as slide: for row in frame.itertuples(): filepath = make_patch_path(row.x, row.y, row.label) region = Region.make(row.x, row.y, patch_size, level) save_patch(region, slide, filepath)