Source code for cyto_ml.data.image
import logging
from io import BytesIO
from typing import Optional
import numpy as np
import requests
import torch
from PIL import Image
from torchvision import transforms
[docs]
class ImageProcessingError(Exception):
pass
[docs]
def load_image(path: str, normalise_func: Optional[str] = "base_normalise") -> torch.Tensor:
"""Given an image path, return a tensor suitable to hand to a model
Optional normalise_func which defaults to converting to a range between 0..1
"""
img = Image.open(path)
return prepare_image(img, normalise_func=normalise_func)
[docs]
def load_image_from_url(url: str, normalise_func: Optional[str] = "base_normalise") -> torch.Tensor:
"""Given an image url, return a tensor suitable to hand to a model
Optional normalise_func which defaults to converting to a range between 0..1
"""
response = requests.get(url)
if response.status_code == 200:
img = Image.open(BytesIO(response.content))
return prepare_image(img, normalise_func=normalise_func)
else:
logging.error(f"{url} returned status code {response.status_code}")
[docs]
def prepare_image(image: Image, normalise_func: Optional[str] = "base_normalise") -> torch.Tensor:
"""
Take an xarray of image data and prepare it to pass through the model
a) Converts the image data to a PyTorch tensor
b) Accepts a single image or batch (no need for torch.stack)
"""
if hasattr(image, "mode") and image.mode == "I;16":
# Flow Cytometer images are 16-bit greyscale, in a low range
# Note - tried this and variants, does not have expected result
# https://stackoverflow.com/questions/18522295/python-pil-change-greyscale-tif-to-rgb
#
# Convert to 3 bands because our model has 3 channel input
image = convert_3_band(normalise_flowlr(image))
try:
tensor_image = globals()[normalise_func]()(image)
except (KeyError, Exception) as err: # TODO trigger and catch
logging.error(err)
raise ImageProcessingError(err)
# Single image, add a batch dimension
tensor_image = tensor_image.unsqueeze(0)
return tensor_image
[docs]
def base_normalise() -> transforms.Compose:
"""
Baseline - don't standardise the values, just tensorise
(which automatically translates to a 0-1 range)
"""
return transforms.ToTensor()
[docs]
def resize_normalise() -> transforms.Compose:
"""
Resize to 256x256
https://github.com/ukceh-rse/ViT-LASNet/blob/36235f9b992a6c345f1010dab133549d20f181d9/test/test.py#L115
"""
return transforms.Compose([transforms.Resize((256, 256)), transforms.ToTensor()])
[docs]
def normalise_flowlr(image: Image) -> np.array:
"""Utility function to normalise flow cytometer images.
As output from the flow cytometer, they are 16 bit greyscale,
but all the values are in a low range (max value 1018 across the set)
As recommended by @Kzra, normalise all values by the maximum
Both for display, and before handing to a model.
Image.point(lambda...) should do this, but the values stay integers
So roundtrip this through numpy
"""
pix = np.array(image)
max_val = max(pix.flatten())
pix = pix / max_val
return pix
[docs]
def convert_3_band(image: np.array) -> np.array:
"""
Given a 1-band image normalised between 0 and 1, convert to 3 band
https://stackoverflow.com/a/57723482
This seems very brute-force, but PIL is not converting our odd-format
greyscale images from the Flow Cytometer well. Improvements appreciated
"""
img2 = np.zeros((image.shape[0], image.shape[1], 3))
img2[:, :, 0] = image # same value in each channel
img2[:, :, 1] = image
img2[:, :, 2] = image
# Cast to float32 as this is what the model layers expect
return img2.astype(np.float32)