# Diversity

This notebook is used for evaluating the diversity of a set of images.

In [71]:
import h5py
import lpips
import numpy as np
import random
import torch

## Step 1 - Arguments and Configurations

In [72]:
# Arguments
DATASET_PATH = "./datasets/Ra_128_indexed.h5"
NUM_BINS = 20
SAMPLE_PER_BIN = 30
SEED = 42

# Random Seeds
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

## Step 2 - Load Datasets

In [73]:
with h5py.File(DATASET_PATH, "r") as f:
    images = f["images"][:]
    labels = f["labels"][:]

images_tensor = torch.from_numpy(images).permute(0, 3, 1, 2).float() / 255.0
images_tensor = images_tensor * 2 - 1
labels_tensor = torch.from_numpy(labels).float()

print(images_tensor.shape)
print(labels_tensor.shape)

torch.Size([9192, 3, 128, 128])
torch.Size([9192])


## Step 3 - Calculate Diversity

In [74]:
lpips_fn = lpips.LPIPS(net="alex")

def to_rgb(x):
    if x.shape[1] == 1:
        return x.repeat(1, 3, 1, 1)
    return x

def compute_diversity_by_label(images, labels, num_bins=10, samples_per_bin=10):
    images = to_rgb(images)
    labels = labels.cpu().numpy()
    bin_edges = np.linspace(labels.min(), labels.max(), num_bins + 1)

    all_diversities = []

    for i in range(num_bins):
        bin_min = bin_edges[i]
        bin_max = bin_edges[i + 1]

        idx = np.where((labels >= bin_min) & (labels < bin_max))[0]
        if len(idx) < 2:
            continue

        selected_idx = np.random.choice(
            idx, min(samples_per_bin, len(idx)), replace=False
        )
        selected_imgs = images[selected_idx]

        n = selected_imgs.size(0)
        dists = []
        for a in range(n):
            for b in range(a + 1, n):
                dist = lpips_fn(
                    selected_imgs[a].unsqueeze(0), selected_imgs[b].unsqueeze(0)
                )
                dists.append(dist.item())

        diversity = np.mean(dists)
        all_diversities.append(diversity)

    if len(all_diversities) == 0:
        return 0.0
    return np.mean(all_diversities)

Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/homebrew/Caskroom/miniconda/base/envs/2025-04-23_macOS/lib/python3.12/site-packages/lpips/weights/v0.1/alex.pth


Now we can use this function to calculate the diversity of our datasets.

In [75]:
diversity_score = compute_diversity_by_label(
    images_tensor, labels_tensor, num_bins=NUM_BINS, samples_per_bin=SAMPLE_PER_BIN
)
print(f"Diversity Score: {diversity_score:.4f}")

Diversity Score: 0.1561
