Source code for kosmos.ml.datasets.organamnist_dataset
from importlib.resources import as_file, files
from typing import ClassVar
import numpy as np
from kosmos.ml.datasets.dataset import SLDataset
[docs]
class OrganAMNISTDataset(SLDataset):
"""OrganAMNIST dataset for organ classification from medical images.
Notes:
- Number of instances: 58,830 (34,561 train + 6,491 val + 17,778 test)
- Number of features: 784 numeric (28x28 pixel images, flattened)
- Classes: 11 (different organ types)
References:
- MedMNIST: https://medmnist.com/
"""
ORGAN_CLASSES: ClassVar[list[str]] = [
"bladder",
"femur-left",
"femur-right",
"heart",
"kidney-left",
"kidney-right",
"liver",
"lung-left",
"lung-right",
"pancreas",
"spleen",
]
def __init__(self, *, min_max_scaler: bool = True) -> None:
"""Initialize the dataset.
Args:
min_max_scaler (bool): Whether to apply min-max scaling to the features.
Defaults to True.
"""
path = files("kosmos.ml.datasets.data") / "organamnist.npz"
with as_file(path) as p:
data = np.load(p)
# Combine train, validation, and test sets
x_train = data["train_images"]
y_train = data["train_labels"]
x_val = data["val_images"]
y_val = data["val_labels"]
x_test = data["test_images"]
y_test = data["test_labels"]
x = np.concatenate([x_train, x_val, x_test], axis=0)
y = np.concatenate([y_train, y_val, y_test], axis=0)
# Flatten images
x = x.reshape(x.shape[0], -1).astype(np.float32, copy=False)
# Flatten labels
y = y.flatten().astype(np.int64, copy=False)
super().__init__(x, y, min_max_scaler=min_max_scaler)
@property
def class_names(self) -> list[str]:
"""Return human-readable class labels for organ types."""
return self.ORGAN_CLASSES