From 5be72cd48b877a6e05f2a3c8a5380c66e4250e85 Mon Sep 17 00:00:00 2001 From: Shalin Mehta Date: Thu, 15 Aug 2024 09:16:45 -0700 Subject: [PATCH 1/9] slice by FOV and track --- .../contrastive_cli/plot_embeddings.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py index 1721c659..af919803 100644 --- a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py +++ b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py @@ -16,6 +16,13 @@ ) dataset +# %% +# Extract a track from the dataset +all_tracks_FOV = dataset.sel(fov_name=slice("A/4/0")) +a_track_in_FOV = all_tracks_FOV.sel(track_id=slice(23)) +print(a_track_in_FOV.sizes()) +# Why is sample dimension ~22000 long after the dataset is sliced by FOV and by track_id? + # %% # load all unprojected features: features = dataset["features"] From aaa91914e341404af754ecec1d1b60b296595021 Mon Sep 17 00:00:00 2001 From: Shalin Mehta Date: Thu, 15 Aug 2024 09:47:07 -0700 Subject: [PATCH 2/9] proper indexing Co-authored-by: Ziwen Liu <67518483+ziw-liu@users.noreply.github.com> --- .../contrastive_cli/plot_embeddings.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py index af919803..76f9bc1d 100644 --- a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py +++ b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py @@ -18,10 +18,8 @@ # %% # Extract a track from the dataset -all_tracks_FOV = dataset.sel(fov_name=slice("A/4/0")) -a_track_in_FOV = all_tracks_FOV.sel(track_id=slice(23)) -print(a_track_in_FOV.sizes()) -# Why is sample dimension ~22000 long after the dataset is sliced by FOV and by track_id? +all_tracks_FOV = dataset.sel(fov_name="/A/4/0") +a_track_in_FOV = all_tracks_FOV.sel(track_id=23) # %% # load all unprojected features: From 1982f92cbde777c7b24ab1bff2aadcc4e2371489 Mon Sep 17 00:00:00 2001 From: Shalin Mehta Date: Thu, 15 Aug 2024 10:02:22 -0700 Subject: [PATCH 3/9] plot features of a single track --- .../contrastive_cli/plot_embeddings.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py index 76f9bc1d..4ee62d9c 100644 --- a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py +++ b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py @@ -20,6 +20,38 @@ # Extract a track from the dataset all_tracks_FOV = dataset.sel(fov_name="/A/4/0") a_track_in_FOV = all_tracks_FOV.sel(track_id=23) +# Why is sample dimension ~22000 long after the dataset is sliced by FOV and by track_id? +indices = np.arange(a_track_in_FOV.sizes["sample"]) +features_track = a_track_in_FOV["features"] +time_stamp = features_track["t"][indices].astype(str) + +px.imshow( + features_track.values[indices], + labels={ + "x": "feature", + "y": "t", + "color": "value", + }, # change labels to match our metadata + y=time_stamp, + # show fov_name as y-axis +) +# %% +# normalize individual features. + +scaled_features = StandardScaler().fit_transform(features_track.values) +px.imshow( + scaled_features, + labels={ + "x": "feature", + "y": "t", + "color": "value", + }, # change labels to match our metadata + y=time_stamp, + # show fov_name as y-axis +) +# Scaled features are centered around 0 with a standard deviation of 1. +# Each feature is individually normalized along the time dimension. + # %% # load all unprojected features: From 392a674edf3a0994d159a0bbb9f8301d579d3daa Mon Sep 17 00:00:00 2001 From: Shalin Mehta Date: Thu, 15 Aug 2024 15:00:47 -0700 Subject: [PATCH 4/9] rename (dataset -> embedding_dataset) --- .../contrastive_cli/plot_embeddings.py | 49 ++++++++++++++----- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py index 4ee62d9c..2aea9ffd 100644 --- a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py +++ b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py @@ -7,19 +7,33 @@ import seaborn as sns from sklearn.preprocessing import StandardScaler from umap import UMAP - +import matplotlib.pyplot as plt from viscy.light.embedding_writer import read_embedding_dataset +from viscy.data.triplet import TripletDataset -# %% -dataset = read_embedding_dataset( - "/hpc/projects/intracellular_dashboard/viral-sensor/infection_classification/models/contrastive_tune_augmentations/predict/2024_02_04-tokenized-drop_path_0_0.zarr" +# %% Paths + +features_path = Path( + "/hpc/projects/intracellular_dashboard/viral-sensor/infection_classification/models/contrastive_tune_augmentations/predict/2024_02_04/tokenized-drop_path_0_0.zarr" +) +data_path = Path( + "/hpc/projects/virtual_staining/2024_02_04_A549_DENV_ZIKV_timelapse/registered_chunked.zarr" +) +tracks_path = Path( + "/hpc/projects/intracellular_dashboard/viral-sensor/2024_02_04_A549_DENV_ZIKV_timelapse/7.1-seg_track/tracking_v1.zarr" ) -dataset # %% -# Extract a track from the dataset -all_tracks_FOV = dataset.sel(fov_name="/A/4/0") -a_track_in_FOV = all_tracks_FOV.sel(track_id=23) +embedding_dataset = read_embedding_dataset(features_path) +embedding_dataset + +# %% +# Extract a track from the dataset and visualize its features. + +fov_name = "/B/4/4" +track_id = 71 +all_tracks_FOV = embedding_dataset.sel(fov_name=fov_name) +a_track_in_FOV = all_tracks_FOV.sel(track_id=track_id) # Why is sample dimension ~22000 long after the dataset is sliced by FOV and by track_id? indices = np.arange(a_track_in_FOV.sizes["sample"]) features_track = a_track_in_FOV["features"] @@ -35,12 +49,11 @@ y=time_stamp, # show fov_name as y-axis ) -# %% # normalize individual features. -scaled_features = StandardScaler().fit_transform(features_track.values) +scaled_features_track = StandardScaler().fit_transform(features_track.values) px.imshow( - scaled_features, + scaled_features_track, labels={ "x": "feature", "y": "t", @@ -52,17 +65,27 @@ # Scaled features are centered around 0 with a standard deviation of 1. # Each feature is individually normalized along the time dimension. +plt.plot(np.mean(scaled_features_track, axis=1), label="scaled_mean") +plt.plot(np.std(scaled_features_track, axis=1), label="scaled_std") +plt.plot(np.mean(features_track.values, axis=1), label="mean") +plt.plot(np.std(features_track.values, axis=1), label="std") +plt.legend() +plt.xlabel("t") +plt.show() + +# %% +# Create the montage of the images of the cells in the track. # %% # load all unprojected features: -features = dataset["features"] +features = embedding_dataset["features"] # or select a well: # features = features[features["fov_name"].str.contains("B/4")] features # %% # examine raw features -random_samples = np.random.randint(0, dataset.sizes["sample"], 700) +random_samples = np.random.randint(0, embedding_dataset.sizes["sample"], 700) # concatenate fov_name, track_id, and t to create a unique sample identifier sample_id = ( features["fov_name"][random_samples] From 4a015908a7e3fac1a50760ccb3f09e9eb4b80e57 Mon Sep 17 00:00:00 2001 From: Shalin Mehta Date: Thu, 15 Aug 2024 15:53:18 -0700 Subject: [PATCH 5/9] trying to read patches of a track --- .../contrastive_cli/plot_embeddings.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py index 2aea9ffd..23585f0b 100644 --- a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py +++ b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py @@ -30,9 +30,9 @@ # %% # Extract a track from the dataset and visualize its features. -fov_name = "/B/4/4" +fov_name = "B/4/4" track_id = 71 -all_tracks_FOV = embedding_dataset.sel(fov_name=fov_name) +all_tracks_FOV = embedding_dataset.sel(fov_name="/" + fov_name) a_track_in_FOV = all_tracks_FOV.sel(track_id=track_id) # Why is sample dimension ~22000 long after the dataset is sliced by FOV and by track_id? indices = np.arange(a_track_in_FOV.sizes["sample"]) @@ -75,6 +75,16 @@ # %% # Create the montage of the images of the cells in the track. +tracks_csv = next((tracks_path / fov_name).glob("*.csv")) +image_dataset = TripletDataset( + positions=(data_path / fov_name), + tracks_tables=pd.read_csv(tracks_csv), + channel_names=["Phase3D", "RFP", "GFP"], + z_range=(25, 40), + fit=False, + initial_yx_patch_size=(256, 256), + include_track_ids=[track_id], +) # %% # load all unprojected features: From 1debe64cd8d5431425dd21c8c1bb033e6f576801 Mon Sep 17 00:00:00 2001 From: Shalin Mehta Date: Thu, 15 Aug 2024 16:54:04 -0700 Subject: [PATCH 6/9] display cells from the chosen track in napari --- .../contrastive_cli/plot_embeddings.py | 85 ++++++++++++++++--- 1 file changed, 74 insertions(+), 11 deletions(-) diff --git a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py index 23585f0b..e299d5cf 100644 --- a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py +++ b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py @@ -9,9 +9,11 @@ from umap import UMAP import matplotlib.pyplot as plt from viscy.light.embedding_writer import read_embedding_dataset -from viscy.data.triplet import TripletDataset +from viscy.data.triplet import TripletDataset, TripletDataModule +from iohub import open_ome_zarr +import monai.transforms as transforms -# %% Paths +# %% Paths and parameters. features_path = Path( "/hpc/projects/intracellular_dashboard/viral-sensor/infection_classification/models/contrastive_tune_augmentations/predict/2024_02_04/tokenized-drop_path_0_0.zarr" @@ -30,9 +32,9 @@ # %% # Extract a track from the dataset and visualize its features. -fov_name = "B/4/4" +fov_name = "/B/4/4" track_id = 71 -all_tracks_FOV = embedding_dataset.sel(fov_name="/" + fov_name) +all_tracks_FOV = embedding_dataset.sel(fov_name=fov_name) a_track_in_FOV = all_tracks_FOV.sel(track_id=track_id) # Why is sample dimension ~22000 long after the dataset is sliced by FOV and by track_id? indices = np.arange(a_track_in_FOV.sizes["sample"]) @@ -75,16 +77,77 @@ # %% # Create the montage of the images of the cells in the track. -tracks_csv = next((tracks_path / fov_name).glob("*.csv")) -image_dataset = TripletDataset( - positions=(data_path / fov_name), - tracks_tables=pd.read_csv(tracks_csv), - channel_names=["Phase3D", "RFP", "GFP"], - z_range=(25, 40), - fit=False, + +# normalizations = [ +# transforms.NormalizeIntensityd( +# keys=["Phase3D"], +# subtrahend=None, +# divisor=None, +# nonzero=False, +# channel_wise=False, +# dtype=None, +# allow_missing_keys=False, +# ), +# transforms.ScaleIntensityRangePercentilesd( +# keys=["RFP"], +# lower=50, +# upper=99, +# b_min=0.0, +# b_max=1.0, +# clip=False, +# relative=False, +# channel_wise=False, +# dtype=None, +# allow_missing_keys=False, +# ), +# ] + +normalizations = None +source_channel = ["Phase3D", "RFP"] +z_range = (28, 43) + +data_module = TripletDataModule( + data_path=data_path, + tracks_path=tracks_path, + source_channel=source_channel, + z_range=z_range, initial_yx_patch_size=(256, 256), + final_yx_patch_size=(256, 256), + batch_size=1, + num_workers=16, + normalizations=normalizations, + predict_cells=True, + include_fov_names=[fov_name], include_track_ids=[track_id], ) +# for train and val +data_module.setup("predict") +predict_dataset = data_module.predict_dataset + +phase = np.stack([p["anchor"][0, 7].numpy() for p in predict_dataset]) +fluor = np.stack([np.max(p["anchor"][1].numpy(), axis=0) for p in predict_dataset]) + +# %% Naive loop to iterate over the images and display + +for t in range(len(predict_dataset)): + fig, axes = plt.subplots(1, 2, figsize=(10, 5)) + axes[0].imshow(phase[t].squeeze(), cmap="gray") + axes[0].set_title("Phase") + axes[0].axis("off") + axes[1].imshow(fluor[t].squeeze(), cmap="gray") + axes[1].set_title("Fluor") + axes[1].axis("off") + plt.title(f"t={t}") + plt.show() + +# %% display the track in napari +import napari +import os + +os.environ["DISPLAY"] = ":1" +viewer = napari.Viewer() +viewer.add_image(phase, name="Phase", colormap="gray") +viewer.add_image(fluor, name="Fluor", colormap="magenta") # %% # load all unprojected features: From d8b2b2f7ace54ae18a8c21208a68b2eb2853dbcd Mon Sep 17 00:00:00 2001 From: Shalin Mehta Date: Thu, 15 Aug 2024 18:03:35 -0700 Subject: [PATCH 7/9] project a track on umap computed from all features --- .../contrastive_cli/plot_embeddings.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py index e299d5cf..6a351ddf 100644 --- a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py +++ b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py @@ -149,6 +149,29 @@ viewer.add_image(phase, name="Phase", colormap="gray") viewer.add_image(fluor, name="Fluor", colormap="magenta") +# %% +# Compute UMAP over all features +features = embedding_dataset["features"] +scaled_features = StandardScaler().fit_transform(features.values) +umap = UMAP() +# Fit UMAP on all features +embedding = umap.fit_transform(scaled_features) +# %% +# Transform the track features +scaled_features_track_umap = umap.transform(scaled_features_track) +plt.plot(scaled_features_track_umap[:, 0], scaled_features_track_umap[:, 1]) +plt.plot(scaled_features_track_umap[0, 0], scaled_features_track_umap[0, 1], marker="o") +plt.plot( + scaled_features_track_umap[-1, 0], scaled_features_track_umap[-1, 1], marker="x" +) +for i in range(1, len(scaled_features_track_umap) - 1): + plt.plot( + scaled_features_track_umap[i, 0], + scaled_features_track_umap[i, 1], + marker=".", + color="blue", + ) +plt.show() # %% # load all unprojected features: features = embedding_dataset["features"] From faf26a2d21e3508af5fd66b6d431cee329536cc1 Mon Sep 17 00:00:00 2001 From: Ziwen Liu <67518483+ziw-liu@users.noreply.github.com> Date: Thu, 15 Aug 2024 21:45:23 -0700 Subject: [PATCH 8/9] Animated latent space (#137) * animated latent space * delete duplicate umap calculation --------- Co-authored-by: Shalin Mehta --- .../contrastive_cli/plot_embeddings.py | 77 ++++++++++--------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py index 6a351ddf..63ce73d4 100644 --- a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py +++ b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py @@ -1,13 +1,14 @@ # %% from pathlib import Path +import matplotlib.pyplot as plt import numpy as np import pandas as pd import plotly.express as px import seaborn as sns from sklearn.preprocessing import StandardScaler from umap import UMAP -import matplotlib.pyplot as plt + from viscy.light.embedding_writer import read_embedding_dataset from viscy.data.triplet import TripletDataset, TripletDataModule from iohub import open_ome_zarr @@ -16,7 +17,7 @@ # %% Paths and parameters. features_path = Path( - "/hpc/projects/intracellular_dashboard/viral-sensor/infection_classification/models/contrastive_tune_augmentations/predict/2024_02_04/tokenized-drop_path_0_0.zarr" + "/hpc/projects/intracellular_dashboard/viral-sensor/infection_classification/models/contrastive_tune_augmentations/predict/2024_02_04/tokenized-drop_path_0_0-2024-06-13.zarr" ) data_path = Path( "/hpc/projects/virtual_staining/2024_02_04_A549_DENV_ZIKV_timelapse/registered_chunked.zarr" @@ -152,10 +153,29 @@ # %% # Compute UMAP over all features features = embedding_dataset["features"] +# or select a well: +# features = features[features["fov_name"].str.contains("B/4")] + scaled_features = StandardScaler().fit_transform(features.values) umap = UMAP() # Fit UMAP on all features embedding = umap.fit_transform(scaled_features) + +# %% +# Add UMAP coordinates to the dataset + +features = ( + features.assign_coords(UMAP1=("sample", embedding[:, 0])) + .assign_coords(UMAP2=("sample", embedding[:, 1])) + .set_index(sample=["UMAP1", "UMAP2"], append=True) +) +features + + +sns.scatterplot( + x=features["UMAP1"], y=features["UMAP2"], hue=features["t"], s=7, alpha=0.8 +) + # %% # Transform the track features scaled_features_track_umap = umap.transform(scaled_features_track) @@ -172,15 +192,9 @@ color="blue", ) plt.show() -# %% -# load all unprojected features: -features = embedding_dataset["features"] -# or select a well: -# features = features[features["fov_name"].str.contains("B/4")] -features # %% -# examine raw features +# examine random features random_samples = np.random.randint(0, embedding_dataset.sizes["sample"], 700) # concatenate fov_name, track_id, and t to create a unique sample identifier sample_id = ( @@ -191,7 +205,7 @@ + features["t"][random_samples].astype(str) ) px.imshow( - features.values[random_samples], + scaled_features[random_samples], labels={ "x": "feature", "y": "sample", @@ -201,24 +215,6 @@ # show fov_name as y-axis ) -# %% -scaled_features = StandardScaler().fit_transform(features.values) - -umap = UMAP() - -embedding = umap.fit_transform(scaled_features) -features = ( - features.assign_coords(UMAP1=("sample", embedding[:, 0])) - .assign_coords(UMAP2=("sample", embedding[:, 1])) - .set_index(sample=["UMAP1", "UMAP2"], append=True) -) -features - -# %% -sns.scatterplot( - x=features["UMAP1"], y=features["UMAP2"], hue=features["t"], s=7, alpha=0.8 -) - # %% def load_annotation(da, path, name, categories: dict | None = None): @@ -275,17 +271,28 @@ def load_annotation(da, path, name, categories: dict | None = None): # %% # interactive scatter plot to associate clusters with specific cells - -px.scatter( - data_frame=pd.DataFrame( - {k: v for k, v in features.coords.items() if k != "features"} - ), +df = pd.DataFrame({k: v for k, v in features.coords.items() if k != "features"}) +df["infection"] = infection.values +df["division"] = division.values +df["well"] = df["fov_name"].str.rsplit("/", n=1).str[0] +df["fov_track_id"] = df["fov_name"] + "-" + df["track_id"].astype(str) +# select row B (DENV) +df = df[df["fov_name"].str.contains("B")] +df.sort_values("t", inplace=True) + +g = px.scatter( + data_frame=df, x="UMAP1", y="UMAP2", - color=(infection.astype(str) + " " + division.astype(str)).rename("annotation"), + symbol="infection", + color="well", hover_name="fov_name", - hover_data=["id", "t"], + hover_data=["id", "t", "track_id"], + animation_frame="t", + animation_group="fov_track_id", ) +g.update_layout(width=800, height=600) + # %% # cluster features in heatmap directly From cbf280f445a3675a3b82a298cdf873935700b894 Mon Sep 17 00:00:00 2001 From: Shalin Mehta Date: Fri, 16 Aug 2024 18:40:58 -0700 Subject: [PATCH 9/9] PCA of features and projections --- .../contrastive_cli/plot_embeddings.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py index 63ce73d4..40cf36db 100644 --- a/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py +++ b/applications/contrastive_phenotyping/contrastive_cli/plot_embeddings.py @@ -7,8 +7,10 @@ import plotly.express as px import seaborn as sns from sklearn.preprocessing import StandardScaler +from sklearn.decomposition import PCA from umap import UMAP + from viscy.light.embedding_writer import read_embedding_dataset from viscy.data.triplet import TripletDataset, TripletDataModule from iohub import open_ome_zarr @@ -30,6 +32,17 @@ embedding_dataset = read_embedding_dataset(features_path) embedding_dataset +# %% +# Compute PCA of the features and projections to estimate the number of components to keep. +PCA_features = PCA().fit(embedding_dataset["features"].values) +PCA_projection = PCA().fit(embedding_dataset["projections"].values) + +plt.plot(PCA_features.explained_variance_ratio_, label="features") +plt.plot(PCA_projection.explained_variance_ratio_, label="projections") +plt.legend() +plt.xlabel("n_components") +plt.show() + # %% # Extract a track from the dataset and visualize its features. @@ -161,6 +174,7 @@ # Fit UMAP on all features embedding = umap.fit_transform(scaled_features) + # %% # Add UMAP coordinates to the dataset