Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ dependencies = [
"fire>=0.7.1",
"jinja2>=3.1.6",
"pandas>=3.0.2",
"scikit-image>=0.26",
"scipy>=1.17.1",
]
scripts.ZedProfiler = "ZedProfiler.cli:trigger"

Expand Down
196 changes: 190 additions & 6 deletions src/zedprofiler/featurization/intensity.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,194 @@
"""Intensity featurization module scaffold."""
"""Intensity feature extraction utilities for 3D image objects.

from __future__ import annotations
Provides functions to compute intensity statistics (mean, median, min, max,
standard deviation, quartiles), edge-based measurements, center-of-mass
coordinates, and mass displacement for segmented 3D objects.
"""

from zedprofiler.exceptions import ZedProfilerError
import numpy
import scipy.ndimage
import skimage.segmentation

from zedprofiler.IO.loading_classes import ObjectLoader

def compute() -> dict[str, list[float]]:
"""Placeholder for intensity computation implementation."""
raise ZedProfilerError("intensity.compute is not implemented yet")

def get_outline(mask: numpy.ndarray) -> numpy.ndarray:
"""
Get the outline of a 3D mask.

Parameters
----------
mask : numpy.ndarray
The input mask.

Returns
-------
numpy.ndarray
The outline of the mask.
"""
outline = numpy.zeros_like(mask)
for z in range(mask.shape[0]):
outline[z] = skimage.segmentation.find_boundaries(mask[z])
return outline


def compute_intensity( # noqa: PLR0915
object_loader: ObjectLoader,
) -> dict:
"""
Measure the intensity of objects in a 3D image.

Parameters
----------
object_loader : ObjectLoader
The object loader containing the image and label image.

Returns
-------
dict
A dictionary containing the measurements for each object.
The keys are the measurement names and the values are the corresponding values.
"""
image_object = object_loader.image
label_object = object_loader.label_image
labels = object_loader.object_ids

output_dict = {
"object_id": [],
"feature_name": [],
"channel": [],
"compartment": [],
"value": [],
}
for index, label in enumerate(labels):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding a single comment on top of this for loops explaining what is happening here.

selected_label_object = label_object.copy()
selected_image_object = image_object.copy()
Comment on lines +64 to +65
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the two full-volume copies at the top necessary? Could it be possible to crop first using a bounding-box lookup (e.g. scipy.ndimage.find_objects), then mask within the crop? This might improve performance, though I'm not certain. c:


selected_label_object[selected_label_object != label] = 0
selected_label_object[selected_label_object > 0] = (
1 # binarize the label for volume calcs
)
selected_image_object[selected_label_object != 1] = 0
non_zero_pixels_object = selected_image_object[selected_image_object > 0]
if non_zero_pixels_object.size == 0:
non_zero_pixels_object = numpy.array([0], dtype=numpy.float32)
mask_outlines = get_outline(selected_label_object)

# Extract only coordinates where object exists
z_indices, y_indices, x_indices = numpy.where(selected_label_object > 0)
min_z, max_z = numpy.min(z_indices), numpy.max(z_indices)
min_y, max_y = numpy.min(y_indices), numpy.max(y_indices)
min_x, max_x = numpy.min(x_indices), numpy.max(x_indices)

# Crop to bounding box for efficiency
cropped_label = selected_label_object[
min_z : max_z + 1, min_y : max_y + 1, min_x : max_x + 1
]
cropped_image = selected_image_object[
min_z : max_z + 1, min_y : max_y + 1, min_x : max_x + 1
]

# Create coordinate grids for the bounding box
mesh_z, mesh_y, mesh_x = numpy.mgrid[
min_z : max_z + 1, # + 1 to include the max index
min_y : max_y + 1,
min_x : max_x + 1,
]

# calculate the integrated intensity
integrated_intensity = scipy.ndimage.sum(
selected_image_object,
selected_label_object,
)
# calculate the volume
volume = numpy.sum(selected_label_object)

# Skip if volume is zero to avoid division by zero
if volume == 0 or integrated_intensity == 0:
continue

# calculate the mean intensity
mean_intensity = integrated_intensity / volume
# calculate the standard deviation
std_intensity = numpy.std(non_zero_pixels_object)
# min intensity
min_intensity = numpy.min(non_zero_pixels_object)
# max intensity
max_intensity = numpy.max(non_zero_pixels_object)
# lower quartile
lower_quartile_intensity = numpy.percentile(non_zero_pixels_object, 25)
# upper quartile
upper_quartile_intensity = numpy.percentile(non_zero_pixels_object, 75)
# median intensity
median_intensity = numpy.median(non_zero_pixels_object)
# max intensity location
max_z, max_y, max_x = scipy.ndimage.maximum_position(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm... Are you trying to find the max intensity within the whole images or masks?

If within the mask, using the labels parameter might help. https://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.maximum_position.htm

selected_image_object,
) # z, y, x
Comment on lines +125 to +127
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this overwriting intentional? Because max_z max_y and max_x was also declared. Juzt wanna make sure this is on your radar.


# Calculate center of mass (geometric center) using cropped arrays
object_mask = cropped_label > 0
cm_x = numpy.mean(mesh_x[object_mask])
cm_y = numpy.mean(mesh_y[object_mask])
cm_z = numpy.mean(mesh_z[object_mask])

# Calculate intensity-weighted center of mass using cropped arrays
intensity_x_coord = cropped_image * mesh_x
intensity_y_coord = cropped_image * mesh_y
intensity_z_coord = cropped_image * mesh_z
i_x = numpy.sum(intensity_x_coord[object_mask])
i_y = numpy.sum(intensity_y_coord[object_mask])
i_z = numpy.sum(intensity_z_coord[object_mask])
# calculate the center of mass
cmi_x = i_x / integrated_intensity
cmi_y = i_y / integrated_intensity
cmi_z = i_z / integrated_intensity
# calculate the center of mass distance
diff_x = cm_x - cmi_x
diff_y = cm_y - cmi_y
diff_z = cm_z - cmi_z
# mass displacement
mass_displacement = numpy.sqrt(diff_x**2 + diff_y**2 + diff_z**2)
# mean absolute deviation
mad_intensity = numpy.mean(numpy.abs(non_zero_pixels_object - mean_intensity))
edge_count = scipy.ndimage.sum(mask_outlines)
integrated_intensity_edge = numpy.sum(selected_image_object[mask_outlines > 0])
mean_intensity_edge = integrated_intensity_edge / edge_count
std_intensity_edge = numpy.std(selected_image_object[mask_outlines > 0])
min_intensity_edge = numpy.min(selected_image_object[mask_outlines > 0])
max_intensity_edge = numpy.max(selected_image_object[mask_outlines > 0])
measurements_dict = {
"IntegratedIntensity": integrated_intensity,
"MeanIntensity": mean_intensity,
"StdIntensity": std_intensity,
"MinIntensity": min_intensity,
"MaxIntensity": max_intensity,
"LowerQuartileIntensity": lower_quartile_intensity,
"UpperQuartileIntensity": upper_quartile_intensity,
"MedianIntensity": median_intensity,
"MassDisplacement": mass_displacement,
"MeanAbsoluteDeviationIntensity": mad_intensity,
"IntegratedIntensityEdge": integrated_intensity_edge,
"MeanIntensityEdge": mean_intensity_edge,
"StdIntensityEdge": std_intensity_edge,
"MinIntensityEdge": min_intensity_edge,
"MaxIntensityEdge": max_intensity_edge,
"MaxZ": max_z,
"MaxY": max_y,
"MaxX": max_x,
"CMI.X": cmi_x,
"CMI.Y": cmi_y,
"CMI.Z": cmi_z,
}

for feature_name, measurement_value in measurements_dict.items():
if measurement_value.dtype != numpy.float32:
coerced_value = numpy.float32(measurement_value)
else:
coerced_value = measurement_value
output_dict["object_id"].append(numpy.int32(label))
output_dict["feature_name"].append(feature_name)
output_dict["channel"].append(object_loader.channel)
output_dict["compartment"].append(object_loader.compartment)
output_dict["value"].append(coerced_value)
return output_dict
105 changes: 105 additions & 0 deletions tests/featurization/test_intensity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from types import SimpleNamespace
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be useful to have a test that verifies that the peak intensity location is correctly scoped to its object. My assumption is that scipy.ndimage.maximum_position is currently searching the whole image.

The test would be like:
test_compute_intensity_peak_location_is_within_object().


import numpy as np

from zedprofiler.featurization.intensity import compute_intensity, get_outline

EXPECTED_MEASUREMENT_COUNT = 42


def test_get_outline_marks_boundaries_per_slice() -> None:
mask = np.zeros((2, 4, 4), dtype=bool)
mask[0, 1:3, 1:3] = True
mask[1, 0:2, 0:2] = True

outline = get_outline(mask)

assert outline.shape == mask.shape
assert outline.dtype == bool
assert outline[0].any()
assert outline[1].any()


def test_compute_intensity_returns_measurements_for_objects() -> None:
image = np.zeros((3, 3, 3), dtype=float)
image[0, 0, 0] = 1.0
image[0, 0, 1] = 2.0
image[0, 1, 0] = 3.0
image[0, 1, 1] = 4.0
image[1, 1, 1] = 5.0
image[1, 1, 2] = 6.0

labels = np.zeros((3, 3, 3), dtype=int)
labels[0, 0:2, 0:2] = 1
labels[1, 1, 1:3] = 2

loader = SimpleNamespace(
image=image,
label_image=labels,
object_ids=[1, 2],
channel="channel_a",
compartment="nuclei",
)

result = compute_intensity(loader)

expected_features = {
"IntegratedIntensity",
"MeanIntensity",
"StdIntensity",
"MinIntensity",
"MaxIntensity",
"LowerQuartileIntensity",
"UpperQuartileIntensity",
"MedianIntensity",
"MassDisplacement",
"MeanAbsoluteDeviationIntensity",
"IntegratedIntensityEdge",
"MeanIntensityEdge",
"StdIntensityEdge",
"MinIntensityEdge",
"MaxIntensityEdge",
"MaxZ",
"MaxY",
"MaxX",
"CMI.X",
"CMI.Y",
"CMI.Z",
}

assert set(result) == {
"object_id",
"feature_name",
"channel",
"compartment",
"value",
}
assert len(result["object_id"]) == EXPECTED_MEASUREMENT_COUNT
assert set(result["feature_name"]) == expected_features
assert result["channel"] == ["channel_a"] * EXPECTED_MEASUREMENT_COUNT
assert result["compartment"] == ["nuclei"] * EXPECTED_MEASUREMENT_COUNT
assert len(result["value"]) == EXPECTED_MEASUREMENT_COUNT


def test_compute_intensity_skips_empty_object_without_signal() -> None:
image = np.zeros((2, 2, 2), dtype=float)
labels = np.zeros((2, 2, 2), dtype=int)
labels[0, 0, 0] = 1

loader = SimpleNamespace(
image=image,
label_image=labels,
object_ids=[1],
channel="channel_b",
compartment="cell",
)

result = compute_intensity(loader)

assert result == {
"object_id": [],
"feature_name": [],
"channel": [],
"compartment": [],
"value": [],
}
Loading
Loading