Visualize Module

`GeneralVisualizer`

Bases: BaseVisualizer

Source code in src/autoencodix/visualize/_general_visualizer.py

class GeneralVisualizer(BaseVisualizer):
    plots: Dict[str, Any] = field(
        default_factory=nested_dict
    )  ## Nested dictionary of plots as figure handles

    def __init__(self):
        self.plots = nested_dict()

    def __setitem__(self, key, elem):
        self.plots[key] = elem

    def visualize(self, result: Result, config: DefaultConfig) -> Result:
        ## Make Model Weights plot
        if result.model.input_dim <= 3000:
            self.plots["ModelWeights"] = self._plot_model_weights(model=result.model)
        else:
            warnings.warn(
                f"Model weights plot is skipped since input dimension {result.model.input_dim} is larger than 3000 and heatmap would be too large."
            )

        ## Make long format of losses
        try:
            loss_df_melt = self._make_loss_format(result=result, config=config)

            ## Make plot loss absolute
            self.plots["loss_absolute"] = self._make_loss_plot(
                df_plot=loss_df_melt, plot_type="absolute"
            )
            ## Make plot loss relative
            self.plots["loss_relative"] = self._make_loss_plot(
                df_plot=loss_df_melt, plot_type="relative"
            )
        except Exception as e:
            warnings.warn(
                f"We could not create visualizations for the loss plots.\n"
                f"This usually happens if you try to visualize after saving and loading "
                f"the pipeline object with `save_all=False`. This memory-efficient saving mode "
                f"does not retain past training loss data.\n\n"
                f"Original error message: {e}"
            )

        return result

    ## Plotting methods ##
    @no_type_check
    def show_latent_space(
        self,
        result: Result,
        plot_type: Literal[
            "2D-scatter", "Ridgeline", "Coverage-Correlation"
        ] = "2D-scatter",
        labels: Optional[Union[list, pd.Series, None]] = None,
        param: Optional[Union[list, str]] = None,
        epoch: Optional[Union[int, None]] = None,
        split: str = "all",
        n_downsample: Optional[int] = 10000,
        **kwargs,
    ) -> None:
        """Visualizes the latent space of the given result using different types of plots.

        Args:
            result: The result object containing latent spaces and losses.
            plot_type: The type of plot to generate. Options are "2D-scatter", "Ridgeline", and "Coverage-Correlation". Default is "2D-scatter".
            labels: List of labels for the data points in the latent space. Default is None.
            param: List of parameters provided and stored as metadata. Strings must match column names. If not a list, string "all" is expected for convenient way to make plots for all parameters available. Default is None where no colored labels are plotted.
            epoch: The epoch number to visualize. If None, the last epoch is inferred from the losses. Default is None.
            split: The data split to visualize. Options are "train", "valid", "test", and "all". Default is "all".
            n_downsample: If provided, downsample the data to this number of samples for faster visualization. Default is 10000. Set to None to disable downsampling.
            **kwargs: additional arguments.

        """
        plt.ioff()
        if plot_type == "Coverage-Correlation":
            if "Coverage-Correlation" in self.plots:
                fig = self.plots["Coverage-Correlation"]
                show_figure(fig)
                plt.show()
            else:
                results = []
                for epoch in range(
                    result.model.config.checkpoint_interval,
                    result.model.config.epochs + 1,
                    result.model.config.checkpoint_interval,
                ):
                    for split in ["train", "valid"]:
                        latent_df = result.get_latent_df(epoch=epoch - 1, split=split)
                        tc = self._total_correlation(latent_df)
                        cov = self._coverage_calc(latent_df)
                        results.append(
                            {
                                "epoch": epoch,
                                "split": split,
                                "total_correlation": tc,
                                "coverage": cov,
                            }
                        )

                df_metrics = pd.DataFrame(results)

                fig, axes = plt.subplots(1, 2, figsize=(12, 5))

                # Total Correlation plot
                _ = sns.lineplot(
                    data=df_metrics,
                    x="epoch",
                    y="total_correlation",
                    hue="split",
                    ax=axes[0],
                )
                axes[0].set_title("Total Correlation")
                axes[0].set_xlabel("Epoch")
                axes[0].set_ylabel("Total Correlation")

                # Coverage plot
                _ = sns.lineplot(
                    data=df_metrics, x="epoch", y="coverage", hue="split", ax=axes[1]
                )
                axes[1].set_title("Coverage")
                axes[1].set_xlabel("Epoch")
                axes[1].set_ylabel("Coverage")

                plt.tight_layout()
                self.plots["Coverage-Correlation"] = fig
                show_figure(fig)
                plt.show()

        else:
            # Set Defaults
            if epoch is None:
                epoch = result.model.config.epochs - 1

            # ## Getting clin_data
            clin_data = self._collect_all_metadata(result=result)
            # if hasattr(result.datasets.train, "metadata"):
            #     # Check if metadata is a dictionary and contains 'paired'
            #     if isinstance(result.datasets.train.metadata, dict):
            #         if "paired" in result.datasets.train.metadata:
            #             clin_data = result.datasets.train.metadata["paired"]
            #             if hasattr(result.datasets, "test"):
            #                 clin_data = pd.concat(
            #                     [
            #                         clin_data,
            #                         result.datasets.test.metadata[  # ty: ignore
            #                             "paired"
            #                         ],  # ty: ignore
            #                     ],  # ty: ignore
            #                     axis=0,
            #                 )
            #             if hasattr(result.datasets, "valid"):
            #                 clin_data = pd.concat(
            #                     [
            #                         clin_data,
            #                         result.datasets.valid.metadata[  # ty: ignore
            #                             "paired"
            #                         ],  # ty: ignore
            #                     ],  # ty: ignore
            #                     axis=0,
            #                 )
            #         else:
            #             # Iterate over all splits and keys, concatenate if DataFrame
            #             clin_data = pd.DataFrame()
            #             for split_name in ["train", "test", "valid"]:
            #                 split_temp = getattr(result.datasets, split_name, None)
            #                 if split_temp is not None and hasattr(
            #                     split_temp, "metadata"
            #                 ):
            #                     for key in split_temp.metadata.keys():
            #                         if isinstance(
            #                             split_temp.metadata[key], pd.DataFrame
            #                         ):
            #                             clin_data = pd.concat(
            #                                 [
            #                                     clin_data,
            #                                     split_temp.metadata[key],
            #                                 ],
            #                                 axis=0,
            #                             )
            #             # remove duplicate rows
            #             clin_data = clin_data[~clin_data.index.duplicated(keep="first")]
            #             # if clin_data.empty:
            #             #     # Raise error no annotation given
            #             #     raise ValueError(
            #             #         "Please provide paired annotation data with key 'paired' in metadata dictionary."
            #             #     )
            #     elif isinstance(result.datasets.train.metadata, pd.DataFrame):
            #         clin_data = result.datasets.train.metadata
            #         if hasattr(result.datasets, "test"):
            #             clin_data = pd.concat(
            #                 [clin_data, result.datasets.test.metadata],  # ty: ignore
            #                 axis=0,
            #             )
            #         if hasattr(result.datasets, "valid"):
            #             clin_data = pd.concat(
            #                 [clin_data, result.datasets.valid.metadata],  # ty: ignore
            #                 axis=0,
            #             )
            #     else:
            #         # Raise error no annotation given
            #         raise ValueError(
            #             "Metadata is not a dictionary or DataFrame. Please provide a valid annotation data type."
            #         )
            # else:
            #     # Iterate over all splits and keys, concatenate if DataFrame
            #     clin_data = pd.DataFrame()
            #     for split_name in ["train", "test", "valid"]:
            #         split_temp = getattr(result.datasets, split_name, None)
            #         if split_temp is not None:
            #             for key in split_temp.datasets.keys():
            #                 if isinstance(
            #                     split_temp.datasets[key].metadata, pd.DataFrame
            #                 ):
            #                     clin_data = pd.concat(
            #                         [
            #                             clin_data,
            #                             split_temp.datasets[key].metadata,
            #                         ],
            #                         axis=0,
            #                     )
            #     if len(clin_data) == 0: ## New predict case
            #         for split_name in ["train", "test", "valid"]:
            #             split_temp = getattr(result.new_datasets, split_name, None)
            #             if split_temp is not None:
            #                 if len(split_temp.datasets.keys()) > 0:
            #                     for key in split_temp.datasets.keys():
            #                         if isinstance(
            #                             split_temp.datasets[key].metadata, pd.DataFrame
            #                         ):
            #                             clin_data = pd.concat(
            #                                 [
            #                                     clin_data,
            #                                     split_temp.datasets[key].metadata,
            #                                 ],
            #                                 axis=0,
            #                             )
            #                 else:
            #                     if isinstance(
            #                         split_temp.metadata, pd.DataFrame
            #                     ):
            #                         clin_data = pd.concat(
            #                             [
            #                                 clin_data,
            #                                 split_temp.metadata,
            #                             ],
            #                             axis=0,
            #                         )
            #     # remove duplicate rows
            #     clin_data = clin_data[~clin_data.index.duplicated(keep="first")]

            # # Raise error no annotation given
            # raise ValueError(
            #     "No annotation data found. Please provide a valid annotation data type."
            # )

            if split == "all":
                df_latent = pd.concat(
                    [
                        result.get_latent_df(epoch=epoch, split="train"),
                        result.get_latent_df(epoch=epoch, split="valid"),
                        result.get_latent_df(epoch=-1, split="test"),
                    ]
                )
            else:
                if split == "test":
                    df_latent = result.get_latent_df(epoch=-1, split=split)
                else:
                    df_latent = result.get_latent_df(epoch=epoch, split=split)

            ## Label options
            if labels is None and param is None:
                labels = ["all"] * df_latent.shape[0]

            if labels is None and isinstance(param, str):
                if param == "all":
                    param = list(clin_data.columns)
                else:
                    raise ValueError(
                        "Please provide parameter to plot as a list not as string. If you want to plot all parameters, set param to 'all' and labels to None."
                    )

            if labels is not None and param is not None:
                raise ValueError(
                    "Please provide either labels or param, not both. If you want to plot all parameters, set param to 'all' and labels to None."
                )

            if labels is not None and param is None:
                if isinstance(labels, pd.Series):
                    param = [labels.name]
                    # Order by index of df_latent first, fill missing with "unknown"
                    labels = labels.reindex(
                        df_latent.index, fill_value="unknown"
                    ).tolist()
                else:
                    param = ["user_label"]  # Default label if none provided
            if not isinstance(param, list):
                raise TypeError("Param needs to be converted to a list")
            for p in param:
                if p in clin_data.columns:
                    labels = clin_data.loc[df_latent.index, p].tolist()  # ty: ignore

                if n_downsample is not None:
                    if df_latent.shape[0] > n_downsample:
                        sample_idx = np.random.choice(
                            df_latent.shape[0], n_downsample, replace=False
                        )
                        df_latent = df_latent.iloc[sample_idx]
                        if labels is not None:
                            labels = [labels[i] for i in sample_idx]

                if plot_type == "2D-scatter":
                    ## Make 2D Embedding with UMAP
                    if df_latent.shape[1] > 2:
                        reducer = UMAP(n_components=2)
                        embedding = pd.DataFrame(reducer.fit_transform(df_latent))
                    else:
                        embedding = df_latent

                    self.plots["2D-scatter"][epoch][split][p] = self._plot_2D(
                        embedding=embedding,
                        labels=labels,
                        param=p,
                        layer=f"2D latent space (epoch {epoch+1})",  # we start counting epochs at 0, so add 1 for display
                        figsize=(12, 8),
                        center=True,
                    )

                    fig = self.plots["2D-scatter"][epoch][split][p]
                    show_figure(fig)
                    plt.show()

                if plot_type == "Ridgeline":
                    ## Make ridgeline plot

                    self.plots["Ridgeline"][epoch][split][p] = self._plot_latent_ridge(
                        lat_space=df_latent, labels=labels, param=p
                    )

                    fig = self.plots["Ridgeline"][epoch][split][p].figure
                    show_figure(fig)
                    plt.show()

                if plot_type == "Clustermap":
                    ## Make clustermap plot

                    self.plots["Clustermap"][epoch][split][p] = (
                        self._plot_latent_clustermap(
                            lat_space=df_latent, labels=labels, param=p
                        )
                    )

                    fig = self.plots["Clustermap"][epoch][split][p]
                    show_figure(fig)
                    plt.show()

    def show_weights(self) -> None:
        """Display the model weights plot if it exists in the plots dictionary."""

        if "ModelWeights" not in self.plots.keys():
            print("Model weights not found in the plots dictionary")
            print("You need to run visualize() method first")
        else:
            fig = self.plots["ModelWeights"]
            show_figure(fig)
            plt.show()

    ### Moved to Base
    # def show_evaluation(
    #     self,
    #     param: str,
    #     metric: str,
    #     ml_alg: Optional[str] = None,
    # ) -> None:

    ### Utilities ###
    @staticmethod
    def _plot_2D(
        embedding: pd.DataFrame,
        labels: list,
        param: Optional[Union[str, None]] = None,
        layer: str = "latent space",
        figsize: tuple = (24, 15),
        center: bool = True,
        plot_numeric: bool = False,
        xlim: Optional[Union[tuple, None]] = None,
        ylim: Optional[Union[tuple, None]] = None,
        scale: Optional[Union[str, None]] = None,
        no_leg: bool = False,
    ) -> matplotlib.figure.Figure:
        """Plots a 2D scatter plot of the given embedding with labels.

        Args:
            embedding: DataFrame containing the 2D embedding coordinates.
            labels: List of labels corresponding to each point in the embedding.
            param: Title for the legend. Defaults to None.
            layer: Title for the plot. Defaults to "latent space".
            figsize: Size of the figure. Defaults to (24, 15).
            center: If True, centers the plot based on label means. Defaults to True.
            plot_numeric: If True, treats labels as numeric. Defaults to False.
            xlim: Limits for the x-axis. Defaults to None.
            ylim: Limits for the y-axis. Defaults to None.
            scale:: Scale for the axes (e.g., 'log'). Defaults to None.
            no_leg: If True, no legend is displayed. Defaults to False.

        Returns:
            The resulting matplotlib figure.
        """

        numeric = False
        if not isinstance(labels[0], str):
            if len(np.unique(labels)) > 3:
                if not plot_numeric:
                    print(
                        "The provided label column is numeric and converted to categories."
                    )
                    labels = [
                        float("nan") if not isinstance(x, float) else x for x in labels
                    ]
                    labels = (
                        pd.qcut(
                            x=pd.Series(labels),
                            q=4,
                            labels=["1stQ", "2ndQ", "3rdQ", "4thQ"],
                        )
                        .astype(str)
                        .to_list()
                    )
                else:
                    center = False  ## Disable centering for numeric params
                    numeric = True
            else:
                labels = [str(x) for x in labels]

        fig, ax1 = plt.subplots(figsize=figsize)

        # check if label or embedding is longerm and duplicate the shorter one
        if len(labels) < embedding.shape[0]:
            print(
                "Given labels do not have the same length as given sample size. Labels will be duplicated."
            )
            labels = [
                label
                for label in labels
                for _ in range(embedding.shape[0] // len(labels))
            ]
        elif len(labels) > embedding.shape[0]:
            labels = list(set(labels))

        if numeric:
            ax2 = sns.scatterplot(
                x=embedding.iloc[:, 0],
                y=embedding.iloc[:, 1],
                hue=labels,
                palette="bwr",
                s=40,
                alpha=0.5,
                ec="black",
            )
        else:
            if len(np.unique(labels)) > 8:
                cat_pal = sns.color_palette("tab20", n_colors=len(np.unique(labels)))
            else:
                cat_pal = sns.color_palette("tab10", n_colors=len(np.unique(labels)))
            ax2 = sns.scatterplot(
                x=embedding.iloc[:, 0],
                y=embedding.iloc[:, 1],
                hue=labels,
                hue_order=np.unique(labels),
                palette=cat_pal,
                s=40,
                alpha=0.5,
                ec="black",
            )
        if center:
            means = embedding.groupby(by=labels).mean()

            ax2 = sns.scatterplot(
                x=means.iloc[:, 0],
                y=means.iloc[:, 1],
                hue=np.unique(labels),
                hue_order=np.unique(labels),
                palette=cat_pal,
                s=200,
                ec="black",
                alpha=0.9,
                marker="*",
                legend=False,
                ax=ax2,
            )

        if xlim is not None:
            ax2.set_xlim(xlim[0], xlim[1])

        if ylim is not None:
            ax2.set_ylim(ylim[0], ylim[1])

        if scale is not None:
            plt.yscale(scale)
            plt.xscale(scale)
        ax2.set_xlabel("Dim 1")
        ax2.set_ylabel("Dim 2")
        legend_cols = 1
        if len(np.unique(labels)) > 10:
            legend_cols = 2

        if no_leg:
            plt.legend([], [], frameon=False)
        else:
            sns.move_legend(
                ax2,
                "upper left",
                bbox_to_anchor=(1, 1),
                ncol=legend_cols,
                title=param,
                frameon=False,
            )

        # Add title to the plot
        ax2.set_title(layer)

        plt.close()
        return fig

    @staticmethod
    def _plot_latent_clustermap(
        lat_space: pd.DataFrame,
        labels: Optional[Union[list, pd.Series, None]] = None,
        param: Optional[Union[str, None]] = None,
    ) -> matplotlib.figure.Figure:
        """Creates a clustermap of the latent space dimension where each row shows the intensity of a latent dimension and columns are clustered.

        Args:
            lat_space: DataFrame containing the latent space intensities for samples (rows) and latent dimensions (columns)
            labels: List of labels for each sample. If None, all samples are considered as one group.
            param: Clinical parameter to create groupings and coloring of ridges. Must be a column name (str) of clin_data
        Returns:
            fig: Figure object containing the clustermap
        """
        lat_space[param] = labels

        cluster_figure = sns.clustermap(
            lat_space.groupby(param).mean(),
            col_cluster=False,
            row_cluster=True,
            figsize=(1 * lat_space.shape[1], 4 + 0.5 * len(set(labels))),
            dendrogram_ratio=0.1,
            cmap="icefire",
            cbar_kws={"orientation": "horizontal"},
            cbar_pos=(0.2, 0.95, 0.3, 0.02),
        ).fig

        plt.close()
        lat_space.drop(columns=[param], inplace=True)
        return cluster_figure

    @staticmethod
    def _plot_latent_ridge(
        lat_space: pd.DataFrame,
        labels: Optional[Union[list, pd.Series, None]] = None,
        param: Optional[Union[str, None]] = None,
    ) -> sns.FacetGrid:
        """Creates a ridge line plot of latent space dimension where each row shows the density of a latent dimension and groups (ridges).

        Args:
            lat_space: DataFrame containing the latent space intensities for samples (rows) and latent dimensions (columns)
            labels: List of labels for each sample. If None, all samples are considered as one group.
            param: Clinical parameter to create groupings and coloring of ridges. Must be a column name (str) of clin_data
        Returns:
            g: FacetGrid object containing the ridge line plot
        """
        sns.set_theme(
            style="white", rc={"axes.facecolor": (0, 0, 0, 0)}
        )  ## Necessary to enforce overplotting

        df = pd.melt(lat_space, var_name="latent dim", value_name="latent intensity")
        df["sample"] = len(lat_space.columns) * list(lat_space.index)

        if labels is None:
            param = "all"
            labels = ["all"] * len(df)

        # print(labels[0])
        if not isinstance(labels[0], str):
            if len(np.unique(labels)) > 3:
                # Change all non-float labels to NaN
                labels = [x if isinstance(x, float) else float("nan") for x in labels]
                labels = list(
                    pd.qcut(
                        x=pd.Series(labels),
                        q=4,
                        labels=["1stQ", "2ndQ", "3rdQ", "4thQ"],
                    ).astype(str)
                )
            else:
                labels = [str(x) for x in labels]

        df[param] = len(lat_space.columns) * labels  # type: ignore

        exclude_missing_info = (df[param] == "unknown") | (df[param] == "nan")

        xmin = (
            df.loc[~exclude_missing_info, ["latent intensity", "latent dim", param]]
            .groupby([param, "latent dim"], observed=False)
            .quantile(0.05)
            .min()
        )
        xmax = (
            df.loc[~exclude_missing_info, ["latent intensity", "latent dim", param]]
            .groupby([param, "latent dim"], observed=False)
            .quantile(0.9)
            .max()
        )

        # if len(np.unique(df[param])) > 8:
        #     cat_pal = sns.husl_palette(len(np.unique(df[param])))
        # else:
        #     cat_pal = sns.color_palette(n_colors=len(np.unique(df[param])))

        if len(np.unique(labels)) > 8:
            cat_pal = sns.color_palette("tab20", n_colors=len(labels))
        else:
            cat_pal = sns.color_palette("tab10", n_colors=len(labels))

        g = sns.FacetGrid(
            df[~exclude_missing_info],
            row="latent dim",
            hue=param,
            aspect=12,
            height=0.8,
            xlim=(xmin.iloc[0], xmax.iloc[0]),
            palette=cat_pal,
        )

        g.map_dataframe(
            sns.kdeplot,
            "latent intensity",
            bw_adjust=0.5,
            clip_on=True,
            fill=True,
            alpha=0.5,
            warn_singular=False,
            ec="k",
            lw=1,
        )

        def label(data, color, label, text="latent dim"):
            ax = plt.gca()
            label_text = data[text].unique()[0]
            ax.text(
                0.0,
                0.2,
                label_text,
                fontweight="bold",
                ha="right",
                va="center",
                transform=ax.transAxes,
            )

        g.map_dataframe(label, text="latent dim")

        g.set(xlim=(xmin.iloc[0], xmax.iloc[0]))
        # Set the subplots to overlap
        g.figure.subplots_adjust(hspace=-0.5)

        # Remove axes details that don't play well with overlap
        g.set_titles("")
        g.set(yticks=[], ylabel="")
        g.despine(bottom=True, left=True)

        g.add_legend()

        plt.close()
        return g

    def _plot_evaluation(
        self,
        result: Result,
    ) -> dict:
        """Plots the evaluation results from the Result object.

        Args:
            result: The Result object containing evaluation data.

        Returns:
            The generated dictionary containing the evaluation plots.
        """
        ## Plot all results

        ml_plots = dict()
        plt.ioff()
        if not hasattr(result.embedding_evaluation, "CLINIC_PARAM"):
            warnings.warn(
                "We could not create visualizations for the evaluation plots.\n"
                "This usually happens if you try to visualize after saving and loading "
                "the pipeline object with `save_all=False`. This memory-efficient saving mode "
                "Set save_all=True to avoid this, also this might be fixed soon."
            )
            return {}

        for c in pd.unique(result.embedding_evaluation.CLINIC_PARAM):
            ml_plots[c] = dict()
            for m in pd.unique(  # ty: ignore
                result.embedding_evaluation.loc[
                    result.embedding_evaluation.CLINIC_PARAM == c, "metric"
                ]
            ):
                ml_plots[c][m] = dict()
                for alg in pd.unique(  # ty: ignore
                    result.embedding_evaluation.loc[
                        (result.embedding_evaluation.CLINIC_PARAM == c)
                        & (result.embedding_evaluation.metric == m),
                        "ML_ALG",
                    ]
                ):
                    data = result.embedding_evaluation[
                        (result.embedding_evaluation.metric == m)
                        & (result.embedding_evaluation.CLINIC_PARAM == c)
                        & (result.embedding_evaluation.ML_ALG == alg)
                    ]

                    # Check for missing values
                    if data["value"].isnull().any():
                        warnings.warn(
                            f"Missing values found in evaluation data for parameter '{c}', metric '{m}', and algorithm '{alg}'. These will be ignored in the plot."
                        )
                        data = data.dropna()

                    sns_plot = sns.catplot(
                        data=data,
                        x="score_split",
                        y="value",
                        col="ML_TASK",
                        hue="score_split",
                        kind="bar",
                    )

                    min_y = data.value.min()
                    if min_y > 0:
                        min_y = 0

                    ml_plots[c][m][alg] = sns_plot.set(ylim=(min_y, None))

        self.plots["ML_Evaluation"] = ml_plots

        return ml_plots

    @staticmethod
    def _total_correlation(latent_space: pd.DataFrame) -> float:
        """Function to compute the total correlation as described here (Equation2): https://doi.org/10.3390/e21100921

        Args:
            latent_space: latent space with dimension sample vs. latent dimensions
        Returns:
            tc: total correlation across latent dimensions
        """
        lat_cov = np.cov(latent_space.T)
        tc = 0.5 * (np.sum(np.log(np.diag(lat_cov))) - np.linalg.slogdet(lat_cov)[1])
        return tc

    @staticmethod
    def _coverage_calc(latent_space: pd.DataFrame) -> float:
        """Function to compute the coverage as described here (Equation3): https://doi.org/10.3390/e21100921

        Args:
            latent_space: latent space with dimension sample vs. latent dimensions
        Returns:
            cov: coverage across latent dimensions
        """
        bins_per_dim = int(
            np.power(len(latent_space.index), 1 / len(latent_space.columns))
        )
        if bins_per_dim < 2:
            warnings.warn(
                "Coverage calculation fails since combination of sample size and latent dimension results in less than 2 bins."
            )
            cov = np.nan
        else:
            latent_bins = latent_space.apply(lambda x: pd.cut(x, bins=bins_per_dim))
            latent_bins = pd.Series(zip(*[latent_bins[col] for col in latent_bins]))
            cov = len(latent_bins.unique()) / np.power(
                bins_per_dim, len(latent_space.columns)
            )

        return cov

`show_latent_space(result, plot_type='2D-scatter', labels=None, param=None, epoch=None, split='all', n_downsample=10000, **kwargs)`

Visualizes the latent space of the given result using different types of plots.

Parameters:

Name	Type	Description	Default
`result`	`Result`	The result object containing latent spaces and losses.	required
`plot_type`	`Literal['2D-scatter', 'Ridgeline', 'Coverage-Correlation']`	The type of plot to generate. Options are "2D-scatter", "Ridgeline", and "Coverage-Correlation". Default is "2D-scatter".	`'2D-scatter'`
`labels`	`Optional[Union[list, Series, None]]`	List of labels for the data points in the latent space. Default is None.	`None`
`param`	`Optional[Union[list, str]]`	List of parameters provided and stored as metadata. Strings must match column names. If not a list, string "all" is expected for convenient way to make plots for all parameters available. Default is None where no colored labels are plotted.	`None`
`epoch`	`Optional[Union[int, None]]`	The epoch number to visualize. If None, the last epoch is inferred from the losses. Default is None.	`None`
`split`	`str`	The data split to visualize. Options are "train", "valid", "test", and "all". Default is "all".	`'all'`
`n_downsample`	`Optional[int]`	If provided, downsample the data to this number of samples for faster visualization. Default is 10000. Set to None to disable downsampling.	`10000`
`**kwargs`		additional arguments.	`{}`

Source code in src/autoencodix/visualize/_general_visualizer.py

@no_type_check
def show_latent_space(
    self,
    result: Result,
    plot_type: Literal[
        "2D-scatter", "Ridgeline", "Coverage-Correlation"
    ] = "2D-scatter",
    labels: Optional[Union[list, pd.Series, None]] = None,
    param: Optional[Union[list, str]] = None,
    epoch: Optional[Union[int, None]] = None,
    split: str = "all",
    n_downsample: Optional[int] = 10000,
    **kwargs,
) -> None:
    """Visualizes the latent space of the given result using different types of plots.

    Args:
        result: The result object containing latent spaces and losses.
        plot_type: The type of plot to generate. Options are "2D-scatter", "Ridgeline", and "Coverage-Correlation". Default is "2D-scatter".
        labels: List of labels for the data points in the latent space. Default is None.
        param: List of parameters provided and stored as metadata. Strings must match column names. If not a list, string "all" is expected for convenient way to make plots for all parameters available. Default is None where no colored labels are plotted.
        epoch: The epoch number to visualize. If None, the last epoch is inferred from the losses. Default is None.
        split: The data split to visualize. Options are "train", "valid", "test", and "all". Default is "all".
        n_downsample: If provided, downsample the data to this number of samples for faster visualization. Default is 10000. Set to None to disable downsampling.
        **kwargs: additional arguments.

    """
    plt.ioff()
    if plot_type == "Coverage-Correlation":
        if "Coverage-Correlation" in self.plots:
            fig = self.plots["Coverage-Correlation"]
            show_figure(fig)
            plt.show()
        else:
            results = []
            for epoch in range(
                result.model.config.checkpoint_interval,
                result.model.config.epochs + 1,
                result.model.config.checkpoint_interval,
            ):
                for split in ["train", "valid"]:
                    latent_df = result.get_latent_df(epoch=epoch - 1, split=split)
                    tc = self._total_correlation(latent_df)
                    cov = self._coverage_calc(latent_df)
                    results.append(
                        {
                            "epoch": epoch,
                            "split": split,
                            "total_correlation": tc,
                            "coverage": cov,
                        }
                    )

            df_metrics = pd.DataFrame(results)

            fig, axes = plt.subplots(1, 2, figsize=(12, 5))

            # Total Correlation plot
            _ = sns.lineplot(
                data=df_metrics,
                x="epoch",
                y="total_correlation",
                hue="split",
                ax=axes[0],
            )
            axes[0].set_title("Total Correlation")
            axes[0].set_xlabel("Epoch")
            axes[0].set_ylabel("Total Correlation")

            # Coverage plot
            _ = sns.lineplot(
                data=df_metrics, x="epoch", y="coverage", hue="split", ax=axes[1]
            )
            axes[1].set_title("Coverage")
            axes[1].set_xlabel("Epoch")
            axes[1].set_ylabel("Coverage")

            plt.tight_layout()
            self.plots["Coverage-Correlation"] = fig
            show_figure(fig)
            plt.show()

    else:
        # Set Defaults
        if epoch is None:
            epoch = result.model.config.epochs - 1

        # ## Getting clin_data
        clin_data = self._collect_all_metadata(result=result)
        # if hasattr(result.datasets.train, "metadata"):
        #     # Check if metadata is a dictionary and contains 'paired'
        #     if isinstance(result.datasets.train.metadata, dict):
        #         if "paired" in result.datasets.train.metadata:
        #             clin_data = result.datasets.train.metadata["paired"]
        #             if hasattr(result.datasets, "test"):
        #                 clin_data = pd.concat(
        #                     [
        #                         clin_data,
        #                         result.datasets.test.metadata[  # ty: ignore
        #                             "paired"
        #                         ],  # ty: ignore
        #                     ],  # ty: ignore
        #                     axis=0,
        #                 )
        #             if hasattr(result.datasets, "valid"):
        #                 clin_data = pd.concat(
        #                     [
        #                         clin_data,
        #                         result.datasets.valid.metadata[  # ty: ignore
        #                             "paired"
        #                         ],  # ty: ignore
        #                     ],  # ty: ignore
        #                     axis=0,
        #                 )
        #         else:
        #             # Iterate over all splits and keys, concatenate if DataFrame
        #             clin_data = pd.DataFrame()
        #             for split_name in ["train", "test", "valid"]:
        #                 split_temp = getattr(result.datasets, split_name, None)
        #                 if split_temp is not None and hasattr(
        #                     split_temp, "metadata"
        #                 ):
        #                     for key in split_temp.metadata.keys():
        #                         if isinstance(
        #                             split_temp.metadata[key], pd.DataFrame
        #                         ):
        #                             clin_data = pd.concat(
        #                                 [
        #                                     clin_data,
        #                                     split_temp.metadata[key],
        #                                 ],
        #                                 axis=0,
        #                             )
        #             # remove duplicate rows
        #             clin_data = clin_data[~clin_data.index.duplicated(keep="first")]
        #             # if clin_data.empty:
        #             #     # Raise error no annotation given
        #             #     raise ValueError(
        #             #         "Please provide paired annotation data with key 'paired' in metadata dictionary."
        #             #     )
        #     elif isinstance(result.datasets.train.metadata, pd.DataFrame):
        #         clin_data = result.datasets.train.metadata
        #         if hasattr(result.datasets, "test"):
        #             clin_data = pd.concat(
        #                 [clin_data, result.datasets.test.metadata],  # ty: ignore
        #                 axis=0,
        #             )
        #         if hasattr(result.datasets, "valid"):
        #             clin_data = pd.concat(
        #                 [clin_data, result.datasets.valid.metadata],  # ty: ignore
        #                 axis=0,
        #             )
        #     else:
        #         # Raise error no annotation given
        #         raise ValueError(
        #             "Metadata is not a dictionary or DataFrame. Please provide a valid annotation data type."
        #         )
        # else:
        #     # Iterate over all splits and keys, concatenate if DataFrame
        #     clin_data = pd.DataFrame()
        #     for split_name in ["train", "test", "valid"]:
        #         split_temp = getattr(result.datasets, split_name, None)
        #         if split_temp is not None:
        #             for key in split_temp.datasets.keys():
        #                 if isinstance(
        #                     split_temp.datasets[key].metadata, pd.DataFrame
        #                 ):
        #                     clin_data = pd.concat(
        #                         [
        #                             clin_data,
        #                             split_temp.datasets[key].metadata,
        #                         ],
        #                         axis=0,
        #                     )
        #     if len(clin_data) == 0: ## New predict case
        #         for split_name in ["train", "test", "valid"]:
        #             split_temp = getattr(result.new_datasets, split_name, None)
        #             if split_temp is not None:
        #                 if len(split_temp.datasets.keys()) > 0:
        #                     for key in split_temp.datasets.keys():
        #                         if isinstance(
        #                             split_temp.datasets[key].metadata, pd.DataFrame
        #                         ):
        #                             clin_data = pd.concat(
        #                                 [
        #                                     clin_data,
        #                                     split_temp.datasets[key].metadata,
        #                                 ],
        #                                 axis=0,
        #                             )
        #                 else:
        #                     if isinstance(
        #                         split_temp.metadata, pd.DataFrame
        #                     ):
        #                         clin_data = pd.concat(
        #                             [
        #                                 clin_data,
        #                                 split_temp.metadata,
        #                             ],
        #                             axis=0,
        #                         )
        #     # remove duplicate rows
        #     clin_data = clin_data[~clin_data.index.duplicated(keep="first")]

        # # Raise error no annotation given
        # raise ValueError(
        #     "No annotation data found. Please provide a valid annotation data type."
        # )

        if split == "all":
            df_latent = pd.concat(
                [
                    result.get_latent_df(epoch=epoch, split="train"),
                    result.get_latent_df(epoch=epoch, split="valid"),
                    result.get_latent_df(epoch=-1, split="test"),
                ]
            )
        else:
            if split == "test":
                df_latent = result.get_latent_df(epoch=-1, split=split)
            else:
                df_latent = result.get_latent_df(epoch=epoch, split=split)

        ## Label options
        if labels is None and param is None:
            labels = ["all"] * df_latent.shape[0]

        if labels is None and isinstance(param, str):
            if param == "all":
                param = list(clin_data.columns)
            else:
                raise ValueError(
                    "Please provide parameter to plot as a list not as string. If you want to plot all parameters, set param to 'all' and labels to None."
                )

        if labels is not None and param is not None:
            raise ValueError(
                "Please provide either labels or param, not both. If you want to plot all parameters, set param to 'all' and labels to None."
            )

        if labels is not None and param is None:
            if isinstance(labels, pd.Series):
                param = [labels.name]
                # Order by index of df_latent first, fill missing with "unknown"
                labels = labels.reindex(
                    df_latent.index, fill_value="unknown"
                ).tolist()
            else:
                param = ["user_label"]  # Default label if none provided
        if not isinstance(param, list):
            raise TypeError("Param needs to be converted to a list")
        for p in param:
            if p in clin_data.columns:
                labels = clin_data.loc[df_latent.index, p].tolist()  # ty: ignore

            if n_downsample is not None:
                if df_latent.shape[0] > n_downsample:
                    sample_idx = np.random.choice(
                        df_latent.shape[0], n_downsample, replace=False
                    )
                    df_latent = df_latent.iloc[sample_idx]
                    if labels is not None:
                        labels = [labels[i] for i in sample_idx]

            if plot_type == "2D-scatter":
                ## Make 2D Embedding with UMAP
                if df_latent.shape[1] > 2:
                    reducer = UMAP(n_components=2)
                    embedding = pd.DataFrame(reducer.fit_transform(df_latent))
                else:
                    embedding = df_latent

                self.plots["2D-scatter"][epoch][split][p] = self._plot_2D(
                    embedding=embedding,
                    labels=labels,
                    param=p,
                    layer=f"2D latent space (epoch {epoch+1})",  # we start counting epochs at 0, so add 1 for display
                    figsize=(12, 8),
                    center=True,
                )

                fig = self.plots["2D-scatter"][epoch][split][p]
                show_figure(fig)
                plt.show()

            if plot_type == "Ridgeline":
                ## Make ridgeline plot

                self.plots["Ridgeline"][epoch][split][p] = self._plot_latent_ridge(
                    lat_space=df_latent, labels=labels, param=p
                )

                fig = self.plots["Ridgeline"][epoch][split][p].figure
                show_figure(fig)
                plt.show()

            if plot_type == "Clustermap":
                ## Make clustermap plot

                self.plots["Clustermap"][epoch][split][p] = (
                    self._plot_latent_clustermap(
                        lat_space=df_latent, labels=labels, param=p
                    )
                )

                fig = self.plots["Clustermap"][epoch][split][p]
                show_figure(fig)
                plt.show()

`show_weights()`

Display the model weights plot if it exists in the plots dictionary.

Source code in src/autoencodix/visualize/_general_visualizer.py

def show_weights(self) -> None:
    """Display the model weights plot if it exists in the plots dictionary."""

    if "ModelWeights" not in self.plots.keys():
        print("Model weights not found in the plots dictionary")
        print("You need to run visualize() method first")
    else:
        fig = self.plots["ModelWeights"]
        show_figure(fig)
        plt.show()

`Visualizer`

Bases: BaseVisualizer

Source code in src/autoencodix/visualize/visualize.py

class Visualizer(BaseVisualizer):
    plots: Dict[str, Any] = field(
        default_factory=nested_dict
    )  ## Nested dictionary of plots as figure handles

    def __init__(self):
        self.plots = nested_dict()

    def __setitem__(self, key, elem):
        self.plots[key] = elem

    def visualize(self, result: Result, config: DefaultConfig) -> Result:
        ## Make Model Weights plot
        self.plots["ModelWeights"] = self.plot_model_weights(model=result.model)

        ## Make long format of losses
        loss_df_melt = self.make_loss_format(result=result, config=config)

        ## Make plot loss absolute
        self.plots["loss_absolute"] = self.make_loss_plot(
            df_plot=loss_df_melt, plot_type="absolute"
        )
        ## Make plot loss relative
        self.plots["loss_relative"] = self.make_loss_plot(
            df_plot=loss_df_melt, plot_type="relative"
        )

        return result

    ## Plotting methods ##

    def save_plots(
        self, path: str, which: Union[str, list] = "all", format: str = "png"
    ) -> None:
        """Save specified plots to the given path in the specified format.

        Args:
            path: The directory path where the plots will be saved.
            which: A list of plot names to save or a string specifying which plots to save.
                                If 'all', all plots in the plots dictionary will be saved.
                                If a single plot name is provided as a string, only that plot will be saved.
            format: The file format in which to save the plots (e.g., 'png', 'jpg').

        Raises:
            ValueError: If the 'which' parameter is not a list or a string.
        """
        if not isinstance(which, list):
            ## Case when which is a string
            if which == "all":
                ## Case when all plots are to be saved
                if len(self.plots) == 0:
                    print("No plots found in the plots dictionary")
                    print("You need to run  visualize() method first")
                else:
                    for item in nested_to_tuple(self.plots):
                        fig = item[-1]  ## Figure is in last element of the tuple
                        filename = "_".join(str(x) for x in item[0:-1])
                        fullpath = os.path.join(path, filename)
                        fig.savefig(f"{fullpath}.{format}")
            else:
                ## Case when a single plot is provided as string
                if which not in self.plots.keys():
                    print(f"Plot {which} not found in the plots dictionary")
                    print(f"All available plots are: {list(self.plots.keys())}")
                else:
                    for item in nested_to_tuple(
                        self.plots[which]
                    ):  # Plot all epochs and splits of type which
                        fig = item[-1]  ## Figure is in last element of the tuple
                        filename = (
                            which  # ty: ignore
                            + "_"
                            + "_".join(str(x) for x in item[0:-1])
                        )
                        fullpath = os.path.join(path, filename)
                        fig.savefig(f"{fullpath}.{format}")
        else:
            ## Case when which is a list of plot specified as strings
            for key in which:
                if key not in self.plots.keys():
                    print(f"Plot {key} not found in the plots dictionary")
                    print(f"All available plots are: {list(self.plots.keys())}")
                    continue
                else:
                    for item in nested_to_tuple(
                        self.plots[key]
                    ):  # Plot all epochs and splits of type key
                        fig = item[-1]  ## Figure is in last element of the tuple
                        filename = key + "_" + "_".join(str(x) for x in item[0:-1])
                        fullpath = os.path.join(path, filename)
                        fig.savefig(f"{fullpath}.{format}")

    def show_loss(
        self, plot_type: Literal["absolute", "relative"] = "absolute"
    ) -> None:
        """Display the loss plot.

        Args:
            plot_type: The type of loss plot to display. Defaults to "absolute".
        """
        if plot_type == "absolute":
            if "loss_absolute" not in self.plots.keys():
                print("Absolute loss plot not found in the plots dictionary")
                print("You need to run visualize() method first")
            else:
                fig = self.plots["loss_absolute"]
                show_figure(fig)
                plt.show()
        if plot_type == "relative":
            if "loss_relative" not in self.plots.keys():
                print("Relative loss plot not found in the plots dictionary")
                print("You need to run visualize() method first")
            else:
                fig = self.plots["loss_relative"]
                show_figure(fig)
                plt.show()

        if plot_type not in ["absolute", "relative"]:
            print(
                "Type of loss plot not recognized. Please use 'absolute' or 'relative'"
            )

    @no_type_check
    def show_latent_space(
        self,
        result: Result,
        plot_type: str = "2D-scatter",
        labels: Optional[Union[list, pd.Series, None]] = None,
        param: Optional[Union[list, str]] = None,
        epoch: Optional[Union[int, None]] = None,
        split: str = "all",
        **kwargs,
    ) -> None:
        """Visualizes the latent space of the given result using different types of plots.

        Args:
            result: The result object containing latent spaces and losses.
            plot_type The type of plot to generate. Options are "2D-scatter", "Ridgeline", and "Coverage-Correlation". Default is "2D-scatter".
            labels: List of labels for the data points in the latent space. Default is None.
            param : List of parameters provided and stored as metadata. Strings must match column names. If not a list, string "all" is expected for convenient way to make plots for all parameters available. Default is None where no colored labels are plotted.
            epoch: The epoch number to visualize. If None, the last epoch is inferred from the losses. Default is None.
            split: The data split to visualize. Options are "train", "valid", "test", and "all". Default is "all".

        """
        plt.ioff()
        if plot_type == "Coverage-Correlation":
            if "Coverage-Correlation" in self.plots:
                fig = self.plots["Coverage-Correlation"]
                show_figure(fig)
                plt.show()
            else:
                results = []
                for epoch in range(
                    result.model.config.checkpoint_interval,
                    result.model.config.epochs + 1,
                    result.model.config.checkpoint_interval,
                ):
                    for split in ["train", "valid"]:
                        latent_df = result.get_latent_df(epoch=epoch - 1, split=split)
                        tc = self._total_correlation(latent_df)
                        cov = self._coverage_calc(latent_df)
                        results.append(
                            {
                                "epoch": epoch,
                                "split": split,
                                "total_correlation": tc,
                                "coverage": cov,
                            }
                        )

                df_metrics = pd.DataFrame(results)

                fig, axes = plt.subplots(1, 2, figsize=(12, 5))

                # Total Correlation plot
                _ = sns.lineplot(
                    data=df_metrics,
                    x="epoch",
                    y="total_correlation",
                    hue="split",
                    ax=axes[0],
                )
                axes[0].set_title("Total Correlation")
                axes[0].set_xlabel("Epoch")
                axes[0].set_ylabel("Total Correlation")

                # Coverage plot
                _ = sns.lineplot(
                    data=df_metrics, x="epoch", y="coverage", hue="split", ax=axes[1]
                )
                axes[1].set_title("Coverage")
                axes[1].set_xlabel("Epoch")
                axes[1].set_ylabel("Coverage")

                plt.tight_layout()
                self.plots["Coverage-Correlation"] = fig
                show_figure(fig)
                plt.show()

        else:
            # Set Defaults
            if epoch is None:
                epoch = result.model.config.epochs - 1

            ## Getting clin_data
            if not hasattr(result.datasets, "train"):
                raise ValueError("no train split in datasets")

            if not hasattr(result.datasets, "valid"):
                raise ValueError("no valid split in datasets")
            if result.datasets.train is None:
                raise ValueError("train is None")
            if result.datasets.valid is None:
                raise ValueError("train is None")
            if result.datasets.test is None:
                raise ValueError("train is None")

            if not hasattr(result.datasets.train, "metadata"):
                raise ValueError("train dataset has no metadata")
            if not hasattr(result.datasets.valid, "metadata"):
                raise ValueError("valid dataset has no metadata")

            # Check if metadata is a dictionary and contains 'paired'
            if isinstance(result.datasets.train.metadata, dict):
                if "paired" in result.datasets.train.metadata:
                    clin_data = result.datasets.train.metadata["paired"]
                    if hasattr(result.datasets, "test"):
                        clin_data = pd.concat(
                            [clin_data, result.datasets.test.metadata["paired"]],
                            axis=0,
                        )
                    if hasattr(result.datasets, "valid"):
                        clin_data = pd.concat(
                            [clin_data, result.datasets.valid.metadata["paired"]],
                            axis=0,
                        )
                    else:
                        # Raise error no annotation given
                        raise ValueError(
                            "Please provide paired annotation data with key 'paired' in metadata dictionary."
                        )
                elif isinstance(result.datasets.train.metadata, pd.DataFrame):
                    clin_data = result.datasets.train.metadata
                    if hasattr(result.datasets, "test"):
                        clin_data = pd.concat(
                            [clin_data, result.datasets.test.metadata],
                            axis=0,
                        )
                    if hasattr(result.datasets, "valid"):
                        clin_data = pd.concat(
                            [clin_data, result.datasets.valid.metadata],
                            axis=0,
                        )
                else:
                    # Raise error no annotation given
                    raise ValueError(
                        "Metadata is not a dictionary or DataFrame. Please provide a valid annotation data type."
                    )
            else:
                # Raise error no annotation given
                raise ValueError(
                    "No annotation data found. Please provide a valid annotation data type."
                )

            if split == "all":
                df_latent = pd.concat(
                    [
                        result.get_latent_df(epoch=epoch, split="train"),
                        result.get_latent_df(epoch=epoch, split="valid"),
                        result.get_latent_df(epoch=-1, split="test"),
                    ]
                )
            else:
                if split == "test":
                    df_latent = result.get_latent_df(epoch=-1, split=split)
                else:
                    df_latent = result.get_latent_df(epoch=epoch, split=split)

            if labels is None and param is None:
                labels = ["all"] * df_latent.shape[0]

            if labels is None and isinstance(param, str):
                if param == "all":
                    param = list(clin_data.columns)
                else:
                    raise ValueError(
                        "Please provide parameter to plot as a list not as string. If you want to plot all parameters, set param to 'all' and labels to None."
                    )

            if labels is not None and param is not None:
                raise ValueError(
                    "Please provide either labels or param, not both. If you want to plot all parameters, set param to 'all' and labels to None."
                )

            if labels is not None and param is None:
                if isinstance(labels, pd.Series):
                    param = [labels.name]
                    # Order by index of df_latent first, fill missing with "unknown"
                    labels = labels.reindex(
                        df_latent.index, fill_value="unknown"
                    ).tolist()
                else:
                    param = ["user_label"]  # Default label if none provided

            for p in param:
                if p in clin_data.columns:
                    labels = clin_data.loc[df_latent.index, p].tolist()

                if plot_type == "2D-scatter":
                    ## Make 2D Embedding with UMAP
                    if df_latent.shape[1] > 2:
                        reducer = UMAP(n_components=2)
                        embedding = pd.DataFrame(reducer.fit_transform(df_latent))
                    else:
                        embedding = df_latent

                    self.plots["2D-scatter"][epoch][split][p] = self.plot_2D(
                        embedding=embedding,
                        labels=labels,
                        param=p,
                        layer=f"2D latent space (epoch {epoch + 1})",  # we start counting epochs at 0, so add 1 for display
                        figsize=(12, 8),
                        center=True,
                    )

                    fig = self.plots["2D-scatter"][epoch][split][p]
                    show_figure(fig)
                    plt.show()

                if plot_type == "Ridgeline":
                    ## Make ridgeline plot

                    self.plots["Ridgeline"][epoch][split][p] = self.plot_latent_ridge(
                        lat_space=df_latent, labels=labels, param=p
                    )

                    fig = self.plots["Ridgeline"][epoch][split][p].figure
                    show_figure(fig)
                    plt.show()

    def show_weights(self) -> None:
        """Display the model weights plot if it exists in the plots dictionary."""

        if "ModelWeights" not in self.plots.keys():
            print("Model weights not found in the plots dictionary")
            print("You need to run visualize() method first")
        else:
            fig = self.plots["ModelWeights"]
            show_figure(fig)
            plt.show()

    # def plot_model_weights(model: torch.nn.Module) -> matplotlib.figure.Figure:
    #     """
    #     Visualization of model weights in encoder and decoder layers as heatmap for each layer as subplot.
    #     ARGS:
    #         model (torch.nn.Module): PyTorch model instance.
    #         filepath (str): Path specifying save name and location.
    #     RETURNS:
    #         fig (matplotlib.figure): Figure handle (of last plot)
    #     """
    #     all_weights = []
    #     names = []
    #     if hasattr(model, "ontologies"):
    #         if model.ontologies is not None:
    #             # If model is Ontix
    #             # Get node names from ontologies
    #             node_names = list()
    #             for ontology in model.ontologies:
    #                 node_names.append(ontology.keys())

    #             node_names.append(model.feature_order)  # Add feature order as last layer

    #     for name, param in model.named_parameters():
    #         if "weight" in name and len(param.shape) == 2:
    #             if "var" not in name:  ## For VAE plot only mu weights
    #                 all_weights.append(param.detach().cpu().numpy())
    #                 names.append(name[:-7])

    #     layers = int(len(all_weights) / 2)
    #     fig, axes = plt.subplots(2, layers, sharex=False, figsize=(20, 10))

    #     for layer in range(layers):
    #         ## Encoder Layer
    #         if layers > 1:
    #             sns.heatmap(
    #                 all_weights[layer],
    #                 cmap=sns.color_palette("Spectral", as_cmap=True),
    #                 ax=axes[0, layer],
    #             ).set(title=names[layer])
    #             ## Decoder Layer
    #             sns.heatmap(
    #                 all_weights[layers + layer],
    #                 cmap=sns.color_palette("Spectral", as_cmap=True),
    #                 ax=axes[1, layer],
    #             ).set(title=names[layers + layer])
    #             axes[1, layer].set_xlabel("In Node", size=12)
    #             if model.ontologies is not None:
    #                 axes[1, layer].set_xticks(
    #                     ticks=range(len(node_names[layer])),
    #                     labels=node_names[layer],
    #                     rotation=90,
    #                     fontsize=8,
    #                 )
    #                 axes[1, layer].set_yticks(
    #                     ticks=range(len(node_names[layer + 1])),
    #                     labels=node_names[layer + 1],
    #                     rotation=0,
    #                     fontsize=8,
    #                 )
    #         else:
    #             sns.heatmap(
    #                 all_weights[layer],
    #                 cmap=sns.color_palette("Spectral", as_cmap=True),
    #                 ax=axes[layer],
    #             ).set(title=names[layer])
    #             ## Decoder Layer
    #             sns.heatmap(
    #                 all_weights[layer + 2],
    #                 cmap=sns.color_palette("Spectral", as_cmap=True),
    #                 ax=axes[layer + 1],
    #             ).set(title=names[layer + 2])
    #             axes[1].set_xlabel("In Node", size=12)

    #     if layers > 1:
    #         axes[1, 0].set_ylabel("Out Node", size=12)
    #         axes[0, 0].set_ylabel("Out Node", size=12)
    #     else:
    #         axes[1].set_ylabel("Out Node", size=12)
    #         axes[0].set_ylabel("Out Node", size=12)

    #     ## Add title
    #     fig.suptitle("Model Weights", size=20)
    #     plt.close()
    #     return fig

    ## NEW VERSION
    # @staticmethod
    # def plot_model_weights(model: torch.nn.Module) -> matplotlib.figure.Figure:
    #     """
    #     Visualization of model weights in encoder and decoder layers as heatmap for each layer as subplot.
    #     ARGS:
    #         model (torch.nn.Module): PyTorch model instance.
    #         filepath (str): Path specifying save name and location.
    #     RETURNS:
    #         fig (matplotlib.figure): Figure handle (of last plot)
    #     """
    #     all_weights = []
    #     names = []
    #     if hasattr(model, "ontologies"):
    #         if model.ontologies is not None:
    #             # If model is Ontix
    #             # Get node names from ontologies
    #             node_names = list()
    #             for ontology in model.ontologies:
    #                 node_names.append(ontology.keys())

    #             node_names.append(model.feature_order)  # Add feature order as last layer

    #     for name, param in model.named_parameters():
    #         if "weight" in name and len(param.shape) == 2:
    #             if "var" not in name:  ## For VAE plot only mu weights
    #                 all_weights.append(param.detach().cpu().numpy())
    #                 names.append(name[:-7])

    #     layers = int(len(all_weights) / 2)
    #     fig, axes = plt.subplots(2, layers, sharex=False, figsize=(20, 10))

    #     for layer in range(layers):
    #         ## Encoder Layer
    #         if layers > 1:
    #             sns.heatmap(
    #                 all_weights[layer],
    #                 cmap=sns.color_palette("Spectral", as_cmap=True),
    #                 ax=axes[0, layer],
    #             ).set(title=names[layer])
    #             ## Decoder Layer
    #             sns.heatmap(
    #                 all_weights[layers + layer],
    #                 cmap=sns.color_palette("Spectral", as_cmap=True),
    #                 ax=axes[1, layer],
    #             ).set(title=names[layers + layer])
    #             axes[1, layer].set_xlabel("In Node", size=12)
    #             if model.ontologies is not None:
    #                 axes[1, layer].set_xticks(
    #                     ticks=range(len(node_names[layer])),
    #                     labels=node_names[layer],
    #                     rotation=90,
    #                     fontsize=8,
    #                 )
    #                 axes[1, layer].set_yticks(
    #                     ticks=range(len(node_names[layer + 1])),
    #                     labels=node_names[layer + 1],
    #                     rotation=0,
    #                     fontsize=8,
    #                 )
    #         else:
    #             sns.heatmap(
    #                 all_weights[layer],
    #                 cmap=sns.color_palette("Spectral", as_cmap=True),
    #                 ax=axes[layer],
    #             ).set(title=names[layer])
    #             ## Decoder Layer
    #             sns.heatmap(
    #                 all_weights[layer + 2],
    #                 cmap=sns.color_palette("Spectral", as_cmap=True),
    #                 ax=axes[layer + 1],
    #             ).set(title=names[layer + 2])
    #             axes[1].set_xlabel("In Node", size=12)

    #     if layers > 1:
    #         axes[1, 0].set_ylabel("Out Node", size=12)
    #         axes[0, 0].set_ylabel("Out Node", size=12)
    #     else:
    #         axes[1].set_ylabel("Out Node", size=12)
    #         axes[0].set_ylabel("Out Node", size=12)

    #     ## Add title
    #     fig.suptitle("Model Weights", size=20)
    #     plt.close()
    #     return fig

    ## NEW VERSION
    def plot_model_weights(model: torch.nn.Module) -> matplotlib.figure.Figure:
        """Visualization of model weights in encoder and decoder layers as heatmap for each layer as subplot.

        Handles non-symmetrical autoencoder architectures.
        Plots _mu layer for encoder as well.
        Uses node_names for decoder layers if model has ontologies.

        Args:
            model: PyTorch model instance.
        Returns:
            fig: Figure handle (of last plot)
        """
        all_weights = []
        names = []
        node_names = []
        if hasattr(model, "ontologies"):
            if model.ontologies is not None:
                node_names = []
                for ontology in model.ontologies:
                    node_names.append(list(ontology.keys()))
                node_names.append(model.feature_order)

        # Collect encoder and decoder weights separately
        encoder_weights = []
        encoder_names = []
        decoder_weights = []
        decoder_names = []
        for name, param in model.named_parameters():
            # print(name)
            if "weight" in name and len(param.shape) == 2:
                if "encoder" in name and "var" not in name and "_mu" not in name:
                    encoder_weights.append(param.detach().cpu().numpy())
                    encoder_names.append(name[:-7])
                elif "_mu" in name:
                    encoder_weights.append(param.detach().cpu().numpy())
                    encoder_names.append(name[:-7])
                elif "decoder" in name and "var" not in name:
                    decoder_weights.append(param.detach().cpu().numpy())
                    decoder_names.append(name[:-7])
                elif (
                    "encoder" not in name
                    and "decoder" not in name
                    and "var" not in name
                ):
                    # fallback for models without explicit encoder/decoder in name
                    all_weights.append(param.detach().cpu().numpy())
                    names.append(name[:-7])

        if encoder_weights or decoder_weights:
            n_enc = len(encoder_weights)
            n_dec = len(decoder_weights)
            n_cols = max(n_enc, n_dec)
            fig, axes = plt.subplots(2, n_cols, sharex=False, figsize=(15 * n_cols, 15))
            if n_cols == 1:
                axes = axes.reshape(2, 1)
            # Plot encoder weights
            for i in range(n_enc):
                ax = axes[0, i]
                sns.heatmap(
                    encoder_weights[i],
                    cmap=sns.color_palette("Spectral", as_cmap=True),
                    center=0,
                    ax=ax,
                ).set(title=encoder_names[i])
                ax.set_ylabel("Out Node", size=12)
            # Hide unused encoder subplots
            for i in range(n_enc, n_cols):
                axes[0, i].axis("off")
            # Plot decoder weights
            for i in range(n_dec):
                ax = axes[1, i]
                heatmap_kwargs = {}

                sns.heatmap(
                    decoder_weights[i],
                    cmap=sns.color_palette("Spectral", as_cmap=True),
                    center=0,
                    ax=ax,
                    **heatmap_kwargs,
                ).set(title=decoder_names[i])
                if model.ontologies is not None:
                    axes[1, i].set_xticks(
                        ticks=range(len(node_names[i])),
                        labels=node_names[i],
                        rotation=90,
                        fontsize=8,
                    )
                    axes[1, i].set_yticks(
                        ticks=range(len(node_names[i + 1])),
                        labels=node_names[i + 1],
                        rotation=0,
                        fontsize=8,
                    )
                ax.set_xlabel("In Node", size=12)
                ax.set_ylabel("Out Node", size=12)
            # Hide unused decoder subplots
            for i in range(n_dec, n_cols):
                axes[1, i].axis("off")
        else:
            # fallback: plot all weights in order, split in half for encoder/decoder
            n_layers = len(all_weights) // 2
            fig, axes = plt.subplots(
                2, n_layers, sharex=False, figsize=(5 * n_layers, 10)
            )
            for layer in range(n_layers):
                sns.heatmap(
                    all_weights[layer],
                    cmap=sns.color_palette("Spectral", as_cmap=True),
                    center=0,
                    ax=axes[0, layer],
                ).set(title=names[layer])
                sns.heatmap(
                    all_weights[n_layers + layer],
                    cmap=sns.color_palette("Spectral", as_cmap=True),
                    center=0,
                    ax=axes[1, layer],
                ).set(title=names[n_layers + layer])
                axes[1, layer].set_xlabel("In Node", size=12)
                axes[0, layer].set_ylabel("Out Node", size=12)
                axes[1, layer].set_ylabel("Out Node", size=12)

        fig.suptitle("Model Weights", size=20)
        plt.close()
        return fig

    @staticmethod
    def plot_2D(
        embedding: pd.DataFrame,
        labels: list,
        param: Optional[Union[str, None]] = None,
        layer: str = "latent space",
        figsize: tuple = (24, 15),
        center: bool = True,
        plot_numeric: bool = False,
        xlim: Optional[Union[tuple, None]] = None,
        ylim: Optional[Union[tuple, None]] = None,
        scale: Optional[Union[str, None]] = None,
        no_leg: bool = False,
    ) -> matplotlib.figure.Figure:
        """Plots a 2D scatter plot of the given embedding with labels.

        Args:
            embedding: DataFrame containing the 2D embedding coordinates.
            labels: List of labels corresponding to each point in the embedding.
            param: Title for the legend. Defaults to None.
            layer: Title for the plot. Defaults to "latent space".
            figsize: Size of the figure. Defaults to (24, 15).
            center: If True, centers the plot based on label means. Defaults to True.
            plot_numeric Defaults to False.
            xlim: Defaults to None.
            ylim: Defaults to None.
            scale: Defaults to None.
            no_leg: Defaults to False.

        Returns:
            The resulting matplotlib figure.
        """

        numeric = False
        if not isinstance(labels[0], str):
            if len(np.unique(labels)) > 3:
                if not plot_numeric:
                    print(
                        "The provided label column is numeric and converted to categories."
                    )
                    # Change non-float labels to NaN
                    labels = [
                        x if isinstance(x, float) else float("nan") for x in labels
                    ]
                    labels = (
                        pd.qcut(
                            x=pd.Series(labels),
                            q=4,
                            labels=["1stQ", "2ndQ", "3rdQ", "4thQ"],
                        )
                        .astype(str)
                        .to_list()
                    )
                else:
                    center = False  ## Disable centering for numeric params
                    numeric = True
            else:
                labels = [str(x) for x in labels]

        fig, ax1 = plt.subplots(figsize=figsize)

        # check if label or embedding is longerm and duplicate the shorter one
        if len(labels) < embedding.shape[0]:
            print(
                "Given labels do not have the same length as given sample size. Labels will be duplicated."
            )
            labels = [
                label
                for label in labels
                for _ in range(embedding.shape[0] // len(labels))
            ]
        elif len(labels) > embedding.shape[0]:
            labels = list(set(labels))

        if numeric:
            ax2 = sns.scatterplot(
                x=embedding.iloc[:, 0],
                y=embedding.iloc[:, 1],
                hue=labels,
                palette="bwr",
                s=40,
                alpha=0.5,
                ec="black",
            )
        else:
            ax2 = sns.scatterplot(
                x=embedding.iloc[:, 0],
                y=embedding.iloc[:, 1],
                hue=labels,
                hue_order=np.unique(labels),
                s=40,
                alpha=0.5,
                ec="black",
            )
        if center:
            means = embedding.groupby(by=labels).mean()

            ax2 = sns.scatterplot(
                x=means.iloc[:, 0],
                y=means.iloc[:, 1],
                hue=np.unique(labels),
                hue_order=np.unique(labels),
                s=200,
                ec="black",
                alpha=0.9,
                marker="*",
                legend=False,
                ax=ax2,
            )

        if xlim is not None:
            ax2.set_xlim(xlim[0], xlim[1])

        if ylim is not None:
            ax2.set_ylim(ylim[0], ylim[1])

        if scale is not None:
            plt.yscale(scale)
            plt.xscale(scale)
        ax2.set_xlabel("Dim 1")
        ax2.set_ylabel("Dim 2")
        legend_cols = 1
        if len(np.unique(labels)) > 10:
            legend_cols = 2

        if no_leg:
            plt.legend([], [], frameon=False)
        else:
            sns.move_legend(
                ax2,
                "upper left",
                bbox_to_anchor=(1, 1),
                ncol=legend_cols,
                title=param,
                frameon=False,
            )

        # Add title to the plot
        ax2.set_title(layer)

        plt.close()
        return fig

    @staticmethod
    def plot_latent_ridge(
        lat_space: pd.DataFrame,
        labels: Optional[Union[list, pd.Series, None]] = None,
        param: Optional[Union[str, None]] = None,
    ) -> sns.FacetGrid:
        """Creates a ridge line plot of latent space dimension where each row shows the density of a latent dimension and groups (ridges).
        Args:
            lat_space: If None, all samples are considered as one group.
            param: Must be a column name (str) of clin_data
        Returns:
            g: FacetGrid object containing the ridge line plot
        """
        sns.set_theme(
            style="white", rc={"axes.facecolor": (0, 0, 0, 0)}
        )  ## Necessary to enforce overplotting

        df = pd.melt(lat_space, var_name="latent dim", value_name="latent intensity")
        df["sample"] = len(lat_space.columns) * list(lat_space.index)

        if labels is None:
            param = "all"
            labels = ["all"] * len(df)

        # print(labels[0])
        if not isinstance(labels[0], str):
            if len(np.unique(labels)) > 3:
                # Change non-float labels to NaN
                labels = [x if isinstance(x, float) else float("nan") for x in labels]
                labels = pd.qcut(
                    x=pd.Series(labels),
                    q=4,
                    labels=["1stQ", "2ndQ", "3rdQ", "4thQ"],
                ).astype(str)
            else:
                labels = [str(x) for x in labels]

        df[param] = len(lat_space.columns) * labels  # type: ignore

        exclude_missing_info = (df[param] == "unknown") | (df[param] == "nan")

        xmin = (
            df.loc[~exclude_missing_info, ["latent intensity", "latent dim", param]]
            .groupby([param, "latent dim"], observed=False)
            .quantile(0.05)
            .min()
        )
        xmax = (
            df.loc[~exclude_missing_info, ["latent intensity", "latent dim", param]]
            .groupby([param, "latent dim"], observed=False)
            .quantile(0.9)
            .max()
        )

        if len(np.unique(df[param])) > 8:
            cat_pal = sns.husl_palette(len(np.unique(df[param])))
        else:
            cat_pal = sns.color_palette(n_colors=len(np.unique(df[param])))

        g = sns.FacetGrid(
            df[~exclude_missing_info],
            row="latent dim",
            hue=param,
            aspect=12,
            height=0.8,
            xlim=(xmin.iloc[0], xmax.iloc[0]),
            palette=cat_pal,
        )

        g.map_dataframe(
            sns.kdeplot,
            "latent intensity",
            bw_adjust=0.5,
            clip_on=True,
            fill=True,
            alpha=0.5,
            warn_singular=False,
            ec="k",
            lw=1,
        )

        def label(data, color, label, text="latent dim"):
            ax = plt.gca()
            label_text = data[text].unique()[0]
            ax.text(
                0.0,
                0.2,
                label_text,
                fontweight="bold",
                ha="right",
                va="center",
                transform=ax.transAxes,
            )

        g.map_dataframe(label, text="latent dim")

        g.set(xlim=(xmin.iloc[0], xmax.iloc[0]))
        # Set the subplots to overlap
        g.figure.subplots_adjust(hspace=-0.5)

        # Remove axes details that don't play well with overlap
        g.set_titles("")
        g.set(yticks=[], ylabel="")
        g.despine(bottom=True, left=True)

        g.add_legend()

        plt.close()
        return g

    @staticmethod
    def make_loss_plot(
        df_plot: pd.DataFrame, plot_type: str
    ) -> matplotlib.figure.Figure:
        """Generates a plot for visualizing loss values from a DataFrame.

        Args:
            df_plot: DataFrame containing the loss values to be plotted. It should have the columns:
                - "Loss Term": The type of loss term (e.g., "total_loss", "reconstruction_loss").
                - "Epoch": The epoch number.
                - "Loss Value": The value of the loss.
                - "Split": The data split (e.g., "train", "validation").

            plot_type: The type of plot to generate. It can be either "absolute" or "relative".
                - "absolute": Generates a line plot for each unique loss term.
                - "relative": Generates a density plot for each data split, excluding the "total_loss" term.

        Returns:
            The generated matplotlib figure containing the loss plots.
        """
        fig_width_abs = 5 * len(df_plot["Loss Term"].unique())
        fig_width_rel = 5 * len(df_plot["Split"].unique())
        if plot_type == "absolute":
            fig, axes = plt.subplots(
                1,
                len(df_plot["Loss Term"].unique()),
                figsize=(fig_width_abs, 5),
                sharey=False,
            )
            ax = 0
            for term in df_plot["Loss Term"].unique():
                axes[ax] = sns.lineplot(
                    data=df_plot[(df_plot["Loss Term"] == term)],
                    x="Epoch",
                    y="Loss Value",
                    hue="Split",
                    ax=axes[ax],
                ).set_title(term)
                ax += 1

            plt.close()

        if plot_type == "relative":
            # Check if loss values are positive
            if (df_plot["Loss Value"] < 0).any():
                # Warning
                warnings.warn(
                    "Loss values contain negative values. Check your loss function if correct. Loss will be clipped to zero for plotting."
                )
                df_plot["Loss Value"] = df_plot["Loss Value"].clip(lower=0)

            # Exclude loss terms where all Loss Value are zero or NaN over all epochs
            valid_terms = [
                term
                for term in df_plot["Loss Term"].unique()
                if (
                    (df_plot[df_plot["Loss Term"] == term]["Loss Value"].notna().any())
                    and (df_plot[df_plot["Loss Term"] == term]["Loss Value"] != 0).any()
                )
            ]
            exclude = (
                (df_plot["Loss Term"] != "total_loss")
                & ~(df_plot["Loss Term"].str.contains("_factor"))
                & (df_plot["Loss Term"].isin(valid_terms))
            )

            fig, axes = plt.subplots(1, 2, figsize=(fig_width_rel, 5), sharey=True)

            ax = 0

            for split in df_plot["Split"].unique():
                axes[ax] = sns.kdeplot(
                    data=df_plot[exclude & (df_plot["Split"] == split)],
                    x="Epoch",
                    hue="Loss Term",
                    multiple="fill",
                    weights="Loss Value",
                    clip=[0, df_plot["Epoch"].max()],
                    ax=axes[ax],
                ).set_title(split)
                ax += 1

            plt.close()

        return fig

    @staticmethod
    def make_loss_format(result: Result, config: DefaultConfig) -> pd.DataFrame:
        loss_df_melt = pd.DataFrame()
        for term in result.sub_losses.keys():
            # Get the loss values and ensure it's a dictionary
            loss_values = result.sub_losses.get(key=term).get()

            # Add explicit type checking/conversion
            if not isinstance(loss_values, dict):
                # If it's not a dict, try to convert it or handle appropriately
                if hasattr(loss_values, "to_dict"):
                    loss_values = loss_values.to_dict()  # type: ignore
                else:
                    # For non-convertible types, you might need a custom solution
                    # For numpy arrays, you could do something like:
                    if hasattr(loss_values, "shape"):
                        # For numpy arrays, create a dict with indices as keys
                        loss_values = {i: val for i, val in enumerate(loss_values)}

            # Now create the DataFrame
            loss_df = pd.DataFrame.from_dict(loss_values, orient="index")  # type: ignore

            # Rest of your code remains the same
            if term == "var_loss":
                loss_df = loss_df * config.beta
            loss_df["Epoch"] = loss_df.index + 1
            loss_df["Loss Term"] = term

            loss_df_melt = pd.concat(
                [
                    loss_df_melt,
                    loss_df.melt(
                        id_vars=["Epoch", "Loss Term"],
                        var_name="Split",
                        value_name="Loss Value",
                    ),
                ],
                axis=0,
            ).reset_index(drop=True)

        # Similar handling for the total losses
        loss_values = result.losses.get()
        if not isinstance(loss_values, dict):
            if hasattr(loss_values, "to_dict"):
                loss_values = loss_values.to_dict()  # ty: ignore
            else:
                if hasattr(loss_values, "shape"):
                    loss_values = {i: val for i, val in enumerate(loss_values)}

        loss_df = pd.DataFrame.from_dict(loss_values, orient="index")  # type: ignore
        loss_df["Epoch"] = loss_df.index + 1
        loss_df["Loss Term"] = "total_loss"

        loss_df_melt = pd.concat(
            [
                loss_df_melt,
                loss_df.melt(
                    id_vars=["Epoch", "Loss Term"],
                    var_name="Split",
                    value_name="Loss Value",
                ),
            ],
            axis=0,
        ).reset_index(drop=True)

        loss_df_melt["Loss Value"] = loss_df_melt["Loss Value"].astype(float)
        return loss_df_melt

    @no_type_check
    def plot_evaluation(
        self,
        result: Result,
    ) -> dict:
        """Plots the evaluation results from the Result object.

        Args:
            result: The Result object containing evaluation data.

        Returns:
            The generated dictionary containing the evaluation plots.
        """
        ## Plot all results

        ml_plots = dict()
        plt.ioff()

        for c in pd.unique(result.embedding_evaluation.CLINIC_PARAM):
            ml_plots[c] = dict()
            for m in pd.unique(
                result.embedding_evaluation.loc[
                    result.embedding_evaluation.CLINIC_PARAM == c, "metric"
                ]
            ):
                ml_plots[c][m] = dict()
                for alg in pd.unique(
                    result.embedding_evaluation.loc[
                        (result.embedding_evaluation.CLINIC_PARAM == c)
                        & (result.embedding_evaluation.metric == m),
                        "ML_ALG",
                    ]
                ):
                    data = result.embedding_evaluation[
                        (result.embedding_evaluation.metric == m)
                        & (result.embedding_evaluation.CLINIC_PARAM == c)
                        & (result.embedding_evaluation.ML_ALG == alg)
                    ]

                    sns_plot = sns.catplot(
                        data=data,
                        x="score_split",
                        y="value",
                        col="ML_TASK",
                        hue="score_split",
                        kind="bar",
                    )

                    min_y = data.value.min()
                    if min_y > 0:
                        min_y = 0

                    ml_plots[c][m][alg] = sns_plot.set(ylim=(min_y, None))

        self.plots["ML_Evaluation"] = ml_plots

        return ml_plots

    def show_evaluation(
        self,
        param: str,
        metric: str,
        ml_alg: Optional[str] = None,
    ) -> None:
        """Displays the evaluation plot for a specific clinical parameter, metric, and optionally ML algorithm.

        Args:
            param: The clinical parameter to visualize.
            metric: The metric to visualize.
            ml_alg: If None, plots all available algorithms.
        """
        plt.ioff()
        if "ML_Evaluation" not in self.plots.keys():
            print("ML Evaluation plots not found in the plots dictionary")
            print("You need to run evaluate() method first")
            return None
        if param not in self.plots["ML_Evaluation"].keys():
            print(f"Parameter {param} not found in the ML Evaluation plots")
            print(f"Available parameters: {list(self.plots['ML_Evaluation'].keys())}")
            return None
        if metric not in self.plots["ML_Evaluation"][param].keys():
            print(f"Metric {metric} not found in the ML Evaluation plots for {param}")
            print(
                f"Available metrics: {list(self.plots['ML_Evaluation'][param].keys())}"
            )
            return None

        algs = list(self.plots["ML_Evaluation"][param][metric].keys())
        if ml_alg is not None:
            if ml_alg not in algs:
                print(f"ML algorithm {ml_alg} not found for {param} and {metric}")
                print(f"Available ML algorithms: {algs}")
                return None
            fig = self.plots["ML_Evaluation"][param][metric][ml_alg].figure
            show_figure(fig)
            plt.show()
        else:
            for alg in algs:
                print(f"Showing plot for ML algorithm: {alg}")
                fig = self.plots["ML_Evaluation"][param][metric][alg].figure
                show_figure(fig)
                plt.show()

    @staticmethod
    def _total_correlation(latent_space: pd.DataFrame) -> float:
        """Function to compute the total correlation as described here (Equation2): https://doi.org/10.3390/e21100921

        Args:
            latent_space - (pd.DataFrame): latent space with dimension sample vs. latent dimensions
        Returns:
            tc - (float): total correlation across latent dimensions
        """
        lat_cov = np.cov(latent_space.T)
        tc = 0.5 * (np.sum(np.log(np.diag(lat_cov))) - np.linalg.slogdet(lat_cov)[1])
        return tc

    @staticmethod
    def _coverage_calc(latent_space: pd.DataFrame) -> float:
        """Function to compute the coverage as described here (Equation3): https://doi.org/10.3390/e21100921

        Args:
            latent_space: latent dimensions
        Returns:
            cov: coverage across latent dimensions
        """
        bins_per_dim = int(
            np.power(len(latent_space.index), 1 / len(latent_space.columns))
        )
        if bins_per_dim < 2:
            warnings.warn(
                "Coverage calculation fails since combination of sample size and latent dimension results in less than 2 bins."
            )
            cov = np.nan
        else:
            latent_bins = latent_space.apply(lambda x: pd.cut(x, bins=bins_per_dim))
            latent_bins = pd.Series(zip(*[latent_bins[col] for col in latent_bins]))
            cov = len(latent_bins.unique()) / np.power(
                bins_per_dim, len(latent_space.columns)
            )

        return cov

`make_loss_plot(df_plot, plot_type)` `staticmethod`

Generates a plot for visualizing loss values from a DataFrame.

Parameters:

Name	Type	Description	Default
`df_plot`	`DataFrame`	DataFrame containing the loss values to be plotted. It should have the columns: - "Loss Term": The type of loss term (e.g., "total_loss", "reconstruction_loss"). - "Epoch": The epoch number. - "Loss Value": The value of the loss. - "Split": The data split (e.g., "train", "validation").	required
`plot_type`	`str`	The type of plot to generate. It can be either "absolute" or "relative". - "absolute": Generates a line plot for each unique loss term. - "relative": Generates a density plot for each data split, excluding the "total_loss" term.	required

Returns:

Type	Description
`Figure`	The generated matplotlib figure containing the loss plots.

Source code in src/autoencodix/visualize/visualize.py

@staticmethod
def make_loss_plot(
    df_plot: pd.DataFrame, plot_type: str
) -> matplotlib.figure.Figure:
    """Generates a plot for visualizing loss values from a DataFrame.

    Args:
        df_plot: DataFrame containing the loss values to be plotted. It should have the columns:
            - "Loss Term": The type of loss term (e.g., "total_loss", "reconstruction_loss").
            - "Epoch": The epoch number.
            - "Loss Value": The value of the loss.
            - "Split": The data split (e.g., "train", "validation").

        plot_type: The type of plot to generate. It can be either "absolute" or "relative".
            - "absolute": Generates a line plot for each unique loss term.
            - "relative": Generates a density plot for each data split, excluding the "total_loss" term.

    Returns:
        The generated matplotlib figure containing the loss plots.
    """
    fig_width_abs = 5 * len(df_plot["Loss Term"].unique())
    fig_width_rel = 5 * len(df_plot["Split"].unique())
    if plot_type == "absolute":
        fig, axes = plt.subplots(
            1,
            len(df_plot["Loss Term"].unique()),
            figsize=(fig_width_abs, 5),
            sharey=False,
        )
        ax = 0
        for term in df_plot["Loss Term"].unique():
            axes[ax] = sns.lineplot(
                data=df_plot[(df_plot["Loss Term"] == term)],
                x="Epoch",
                y="Loss Value",
                hue="Split",
                ax=axes[ax],
            ).set_title(term)
            ax += 1

        plt.close()

    if plot_type == "relative":
        # Check if loss values are positive
        if (df_plot["Loss Value"] < 0).any():
            # Warning
            warnings.warn(
                "Loss values contain negative values. Check your loss function if correct. Loss will be clipped to zero for plotting."
            )
            df_plot["Loss Value"] = df_plot["Loss Value"].clip(lower=0)

        # Exclude loss terms where all Loss Value are zero or NaN over all epochs
        valid_terms = [
            term
            for term in df_plot["Loss Term"].unique()
            if (
                (df_plot[df_plot["Loss Term"] == term]["Loss Value"].notna().any())
                and (df_plot[df_plot["Loss Term"] == term]["Loss Value"] != 0).any()
            )
        ]
        exclude = (
            (df_plot["Loss Term"] != "total_loss")
            & ~(df_plot["Loss Term"].str.contains("_factor"))
            & (df_plot["Loss Term"].isin(valid_terms))
        )

        fig, axes = plt.subplots(1, 2, figsize=(fig_width_rel, 5), sharey=True)

        ax = 0

        for split in df_plot["Split"].unique():
            axes[ax] = sns.kdeplot(
                data=df_plot[exclude & (df_plot["Split"] == split)],
                x="Epoch",
                hue="Loss Term",
                multiple="fill",
                weights="Loss Value",
                clip=[0, df_plot["Epoch"].max()],
                ax=axes[ax],
            ).set_title(split)
            ax += 1

        plt.close()

    return fig

`plot_2D(embedding, labels, param=None, layer='latent space', figsize=(24, 15), center=True, plot_numeric=False, xlim=None, ylim=None, scale=None, no_leg=False)` `staticmethod`

Plots a 2D scatter plot of the given embedding with labels.

Parameters:

Name	Type	Description	Default
`embedding`	`DataFrame`	DataFrame containing the 2D embedding coordinates.	required
`labels`	`list`	List of labels corresponding to each point in the embedding.	required
`param`	`Optional[Union[str, None]]`	Title for the legend. Defaults to None.	`None`
`layer`	`str`	Title for the plot. Defaults to "latent space".	`'latent space'`
`figsize`	`tuple`	Size of the figure. Defaults to (24, 15).	`(24, 15)`
`center`	`bool`	If True, centers the plot based on label means. Defaults to True.	`True`
`xlim`	`Optional[Union[tuple, None]]`	Defaults to None.	`None`
`ylim`	`Optional[Union[tuple, None]]`	Defaults to None.	`None`
`scale`	`Optional[Union[str, None]]`	Defaults to None.	`None`
`no_leg`	`bool`	Defaults to False.	`False`

Returns:

Type	Description
`Figure`	The resulting matplotlib figure.

Source code in src/autoencodix/visualize/visualize.py

@staticmethod
def plot_2D(
    embedding: pd.DataFrame,
    labels: list,
    param: Optional[Union[str, None]] = None,
    layer: str = "latent space",
    figsize: tuple = (24, 15),
    center: bool = True,
    plot_numeric: bool = False,
    xlim: Optional[Union[tuple, None]] = None,
    ylim: Optional[Union[tuple, None]] = None,
    scale: Optional[Union[str, None]] = None,
    no_leg: bool = False,
) -> matplotlib.figure.Figure:
    """Plots a 2D scatter plot of the given embedding with labels.

    Args:
        embedding: DataFrame containing the 2D embedding coordinates.
        labels: List of labels corresponding to each point in the embedding.
        param: Title for the legend. Defaults to None.
        layer: Title for the plot. Defaults to "latent space".
        figsize: Size of the figure. Defaults to (24, 15).
        center: If True, centers the plot based on label means. Defaults to True.
        plot_numeric Defaults to False.
        xlim: Defaults to None.
        ylim: Defaults to None.
        scale: Defaults to None.
        no_leg: Defaults to False.

    Returns:
        The resulting matplotlib figure.
    """

    numeric = False
    if not isinstance(labels[0], str):
        if len(np.unique(labels)) > 3:
            if not plot_numeric:
                print(
                    "The provided label column is numeric and converted to categories."
                )
                # Change non-float labels to NaN
                labels = [
                    x if isinstance(x, float) else float("nan") for x in labels
                ]
                labels = (
                    pd.qcut(
                        x=pd.Series(labels),
                        q=4,
                        labels=["1stQ", "2ndQ", "3rdQ", "4thQ"],
                    )
                    .astype(str)
                    .to_list()
                )
            else:
                center = False  ## Disable centering for numeric params
                numeric = True
        else:
            labels = [str(x) for x in labels]

    fig, ax1 = plt.subplots(figsize=figsize)

    # check if label or embedding is longerm and duplicate the shorter one
    if len(labels) < embedding.shape[0]:
        print(
            "Given labels do not have the same length as given sample size. Labels will be duplicated."
        )
        labels = [
            label
            for label in labels
            for _ in range(embedding.shape[0] // len(labels))
        ]
    elif len(labels) > embedding.shape[0]:
        labels = list(set(labels))

    if numeric:
        ax2 = sns.scatterplot(
            x=embedding.iloc[:, 0],
            y=embedding.iloc[:, 1],
            hue=labels,
            palette="bwr",
            s=40,
            alpha=0.5,
            ec="black",
        )
    else:
        ax2 = sns.scatterplot(
            x=embedding.iloc[:, 0],
            y=embedding.iloc[:, 1],
            hue=labels,
            hue_order=np.unique(labels),
            s=40,
            alpha=0.5,
            ec="black",
        )
    if center:
        means = embedding.groupby(by=labels).mean()

        ax2 = sns.scatterplot(
            x=means.iloc[:, 0],
            y=means.iloc[:, 1],
            hue=np.unique(labels),
            hue_order=np.unique(labels),
            s=200,
            ec="black",
            alpha=0.9,
            marker="*",
            legend=False,
            ax=ax2,
        )

    if xlim is not None:
        ax2.set_xlim(xlim[0], xlim[1])

    if ylim is not None:
        ax2.set_ylim(ylim[0], ylim[1])

    if scale is not None:
        plt.yscale(scale)
        plt.xscale(scale)
    ax2.set_xlabel("Dim 1")
    ax2.set_ylabel("Dim 2")
    legend_cols = 1
    if len(np.unique(labels)) > 10:
        legend_cols = 2

    if no_leg:
        plt.legend([], [], frameon=False)
    else:
        sns.move_legend(
            ax2,
            "upper left",
            bbox_to_anchor=(1, 1),
            ncol=legend_cols,
            title=param,
            frameon=False,
        )

    # Add title to the plot
    ax2.set_title(layer)

    plt.close()
    return fig

`plot_evaluation(result)`

Plots the evaluation results from the Result object.

Parameters:

Name	Type	Description	Default
`result`	`Result`	The Result object containing evaluation data.	required

Returns:

Type	Description
`dict`	The generated dictionary containing the evaluation plots.

Source code in src/autoencodix/visualize/visualize.py

@no_type_check
def plot_evaluation(
    self,
    result: Result,
) -> dict:
    """Plots the evaluation results from the Result object.

    Args:
        result: The Result object containing evaluation data.

    Returns:
        The generated dictionary containing the evaluation plots.
    """
    ## Plot all results

    ml_plots = dict()
    plt.ioff()

    for c in pd.unique(result.embedding_evaluation.CLINIC_PARAM):
        ml_plots[c] = dict()
        for m in pd.unique(
            result.embedding_evaluation.loc[
                result.embedding_evaluation.CLINIC_PARAM == c, "metric"
            ]
        ):
            ml_plots[c][m] = dict()
            for alg in pd.unique(
                result.embedding_evaluation.loc[
                    (result.embedding_evaluation.CLINIC_PARAM == c)
                    & (result.embedding_evaluation.metric == m),
                    "ML_ALG",
                ]
            ):
                data = result.embedding_evaluation[
                    (result.embedding_evaluation.metric == m)
                    & (result.embedding_evaluation.CLINIC_PARAM == c)
                    & (result.embedding_evaluation.ML_ALG == alg)
                ]

                sns_plot = sns.catplot(
                    data=data,
                    x="score_split",
                    y="value",
                    col="ML_TASK",
                    hue="score_split",
                    kind="bar",
                )

                min_y = data.value.min()
                if min_y > 0:
                    min_y = 0

                ml_plots[c][m][alg] = sns_plot.set(ylim=(min_y, None))

    self.plots["ML_Evaluation"] = ml_plots

    return ml_plots

`plot_latent_ridge(lat_space, labels=None, param=None)` `staticmethod`

Creates a ridge line plot of latent space dimension where each row shows the density of a latent dimension and groups (ridges). Args: lat_space: If None, all samples are considered as one group. param: Must be a column name (str) of clin_data Returns: g: FacetGrid object containing the ridge line plot

Source code in src/autoencodix/visualize/visualize.py

@staticmethod
def plot_latent_ridge(
    lat_space: pd.DataFrame,
    labels: Optional[Union[list, pd.Series, None]] = None,
    param: Optional[Union[str, None]] = None,
) -> sns.FacetGrid:
    """Creates a ridge line plot of latent space dimension where each row shows the density of a latent dimension and groups (ridges).
    Args:
        lat_space: If None, all samples are considered as one group.
        param: Must be a column name (str) of clin_data
    Returns:
        g: FacetGrid object containing the ridge line plot
    """
    sns.set_theme(
        style="white", rc={"axes.facecolor": (0, 0, 0, 0)}
    )  ## Necessary to enforce overplotting

    df = pd.melt(lat_space, var_name="latent dim", value_name="latent intensity")
    df["sample"] = len(lat_space.columns) * list(lat_space.index)

    if labels is None:
        param = "all"
        labels = ["all"] * len(df)

    # print(labels[0])
    if not isinstance(labels[0], str):
        if len(np.unique(labels)) > 3:
            # Change non-float labels to NaN
            labels = [x if isinstance(x, float) else float("nan") for x in labels]
            labels = pd.qcut(
                x=pd.Series(labels),
                q=4,
                labels=["1stQ", "2ndQ", "3rdQ", "4thQ"],
            ).astype(str)
        else:
            labels = [str(x) for x in labels]

    df[param] = len(lat_space.columns) * labels  # type: ignore

    exclude_missing_info = (df[param] == "unknown") | (df[param] == "nan")

    xmin = (
        df.loc[~exclude_missing_info, ["latent intensity", "latent dim", param]]
        .groupby([param, "latent dim"], observed=False)
        .quantile(0.05)
        .min()
    )
    xmax = (
        df.loc[~exclude_missing_info, ["latent intensity", "latent dim", param]]
        .groupby([param, "latent dim"], observed=False)
        .quantile(0.9)
        .max()
    )

    if len(np.unique(df[param])) > 8:
        cat_pal = sns.husl_palette(len(np.unique(df[param])))
    else:
        cat_pal = sns.color_palette(n_colors=len(np.unique(df[param])))

    g = sns.FacetGrid(
        df[~exclude_missing_info],
        row="latent dim",
        hue=param,
        aspect=12,
        height=0.8,
        xlim=(xmin.iloc[0], xmax.iloc[0]),
        palette=cat_pal,
    )

    g.map_dataframe(
        sns.kdeplot,
        "latent intensity",
        bw_adjust=0.5,
        clip_on=True,
        fill=True,
        alpha=0.5,
        warn_singular=False,
        ec="k",
        lw=1,
    )

    def label(data, color, label, text="latent dim"):
        ax = plt.gca()
        label_text = data[text].unique()[0]
        ax.text(
            0.0,
            0.2,
            label_text,
            fontweight="bold",
            ha="right",
            va="center",
            transform=ax.transAxes,
        )

    g.map_dataframe(label, text="latent dim")

    g.set(xlim=(xmin.iloc[0], xmax.iloc[0]))
    # Set the subplots to overlap
    g.figure.subplots_adjust(hspace=-0.5)

    # Remove axes details that don't play well with overlap
    g.set_titles("")
    g.set(yticks=[], ylabel="")
    g.despine(bottom=True, left=True)

    g.add_legend()

    plt.close()
    return g

`plot_model_weights(model)`

Visualization of model weights in encoder and decoder layers as heatmap for each layer as subplot.

Handles non-symmetrical autoencoder architectures. Plots _mu layer for encoder as well. Uses node_names for decoder layers if model has ontologies.

Parameters:

Name	Type	Description	Default
`model`	`Module`	PyTorch model instance.	required

Returns: fig: Figure handle (of last plot)

Source code in src/autoencodix/visualize/visualize.py

def plot_model_weights(model: torch.nn.Module) -> matplotlib.figure.Figure:
    """Visualization of model weights in encoder and decoder layers as heatmap for each layer as subplot.

    Handles non-symmetrical autoencoder architectures.
    Plots _mu layer for encoder as well.
    Uses node_names for decoder layers if model has ontologies.

    Args:
        model: PyTorch model instance.
    Returns:
        fig: Figure handle (of last plot)
    """
    all_weights = []
    names = []
    node_names = []
    if hasattr(model, "ontologies"):
        if model.ontologies is not None:
            node_names = []
            for ontology in model.ontologies:
                node_names.append(list(ontology.keys()))
            node_names.append(model.feature_order)

    # Collect encoder and decoder weights separately
    encoder_weights = []
    encoder_names = []
    decoder_weights = []
    decoder_names = []
    for name, param in model.named_parameters():
        # print(name)
        if "weight" in name and len(param.shape) == 2:
            if "encoder" in name and "var" not in name and "_mu" not in name:
                encoder_weights.append(param.detach().cpu().numpy())
                encoder_names.append(name[:-7])
            elif "_mu" in name:
                encoder_weights.append(param.detach().cpu().numpy())
                encoder_names.append(name[:-7])
            elif "decoder" in name and "var" not in name:
                decoder_weights.append(param.detach().cpu().numpy())
                decoder_names.append(name[:-7])
            elif (
                "encoder" not in name
                and "decoder" not in name
                and "var" not in name
            ):
                # fallback for models without explicit encoder/decoder in name
                all_weights.append(param.detach().cpu().numpy())
                names.append(name[:-7])

    if encoder_weights or decoder_weights:
        n_enc = len(encoder_weights)
        n_dec = len(decoder_weights)
        n_cols = max(n_enc, n_dec)
        fig, axes = plt.subplots(2, n_cols, sharex=False, figsize=(15 * n_cols, 15))
        if n_cols == 1:
            axes = axes.reshape(2, 1)
        # Plot encoder weights
        for i in range(n_enc):
            ax = axes[0, i]
            sns.heatmap(
                encoder_weights[i],
                cmap=sns.color_palette("Spectral", as_cmap=True),
                center=0,
                ax=ax,
            ).set(title=encoder_names[i])
            ax.set_ylabel("Out Node", size=12)
        # Hide unused encoder subplots
        for i in range(n_enc, n_cols):
            axes[0, i].axis("off")
        # Plot decoder weights
        for i in range(n_dec):
            ax = axes[1, i]
            heatmap_kwargs = {}

            sns.heatmap(
                decoder_weights[i],
                cmap=sns.color_palette("Spectral", as_cmap=True),
                center=0,
                ax=ax,
                **heatmap_kwargs,
            ).set(title=decoder_names[i])
            if model.ontologies is not None:
                axes[1, i].set_xticks(
                    ticks=range(len(node_names[i])),
                    labels=node_names[i],
                    rotation=90,
                    fontsize=8,
                )
                axes[1, i].set_yticks(
                    ticks=range(len(node_names[i + 1])),
                    labels=node_names[i + 1],
                    rotation=0,
                    fontsize=8,
                )
            ax.set_xlabel("In Node", size=12)
            ax.set_ylabel("Out Node", size=12)
        # Hide unused decoder subplots
        for i in range(n_dec, n_cols):
            axes[1, i].axis("off")
    else:
        # fallback: plot all weights in order, split in half for encoder/decoder
        n_layers = len(all_weights) // 2
        fig, axes = plt.subplots(
            2, n_layers, sharex=False, figsize=(5 * n_layers, 10)
        )
        for layer in range(n_layers):
            sns.heatmap(
                all_weights[layer],
                cmap=sns.color_palette("Spectral", as_cmap=True),
                center=0,
                ax=axes[0, layer],
            ).set(title=names[layer])
            sns.heatmap(
                all_weights[n_layers + layer],
                cmap=sns.color_palette("Spectral", as_cmap=True),
                center=0,
                ax=axes[1, layer],
            ).set(title=names[n_layers + layer])
            axes[1, layer].set_xlabel("In Node", size=12)
            axes[0, layer].set_ylabel("Out Node", size=12)
            axes[1, layer].set_ylabel("Out Node", size=12)

    fig.suptitle("Model Weights", size=20)
    plt.close()
    return fig

`save_plots(path, which='all', format='png')`

Save specified plots to the given path in the specified format.

Parameters:

Name	Type	Description	Default
`path`	`str`	The directory path where the plots will be saved.	required
`which`	`Union[str, list]`	A list of plot names to save or a string specifying which plots to save. If 'all', all plots in the plots dictionary will be saved. If a single plot name is provided as a string, only that plot will be saved.	`'all'`
`format`	`str`	The file format in which to save the plots (e.g., 'png', 'jpg').	`'png'`

Raises:

Type	Description
`ValueError`	If the 'which' parameter is not a list or a string.

Source code in src/autoencodix/visualize/visualize.py

def save_plots(
    self, path: str, which: Union[str, list] = "all", format: str = "png"
) -> None:
    """Save specified plots to the given path in the specified format.

    Args:
        path: The directory path where the plots will be saved.
        which: A list of plot names to save or a string specifying which plots to save.
                            If 'all', all plots in the plots dictionary will be saved.
                            If a single plot name is provided as a string, only that plot will be saved.
        format: The file format in which to save the plots (e.g., 'png', 'jpg').

    Raises:
        ValueError: If the 'which' parameter is not a list or a string.
    """
    if not isinstance(which, list):
        ## Case when which is a string
        if which == "all":
            ## Case when all plots are to be saved
            if len(self.plots) == 0:
                print("No plots found in the plots dictionary")
                print("You need to run  visualize() method first")
            else:
                for item in nested_to_tuple(self.plots):
                    fig = item[-1]  ## Figure is in last element of the tuple
                    filename = "_".join(str(x) for x in item[0:-1])
                    fullpath = os.path.join(path, filename)
                    fig.savefig(f"{fullpath}.{format}")
        else:
            ## Case when a single plot is provided as string
            if which not in self.plots.keys():
                print(f"Plot {which} not found in the plots dictionary")
                print(f"All available plots are: {list(self.plots.keys())}")
            else:
                for item in nested_to_tuple(
                    self.plots[which]
                ):  # Plot all epochs and splits of type which
                    fig = item[-1]  ## Figure is in last element of the tuple
                    filename = (
                        which  # ty: ignore
                        + "_"
                        + "_".join(str(x) for x in item[0:-1])
                    )
                    fullpath = os.path.join(path, filename)
                    fig.savefig(f"{fullpath}.{format}")
    else:
        ## Case when which is a list of plot specified as strings
        for key in which:
            if key not in self.plots.keys():
                print(f"Plot {key} not found in the plots dictionary")
                print(f"All available plots are: {list(self.plots.keys())}")
                continue
            else:
                for item in nested_to_tuple(
                    self.plots[key]
                ):  # Plot all epochs and splits of type key
                    fig = item[-1]  ## Figure is in last element of the tuple
                    filename = key + "_" + "_".join(str(x) for x in item[0:-1])
                    fullpath = os.path.join(path, filename)
                    fig.savefig(f"{fullpath}.{format}")

`show_evaluation(param, metric, ml_alg=None)`

Displays the evaluation plot for a specific clinical parameter, metric, and optionally ML algorithm.

Parameters:

Name	Type	Description	Default
`param`	`str`	The clinical parameter to visualize.	required
`metric`	`str`	The metric to visualize.	required
`ml_alg`	`Optional[str]`	If None, plots all available algorithms.	`None`

Source code in src/autoencodix/visualize/visualize.py

def show_evaluation(
    self,
    param: str,
    metric: str,
    ml_alg: Optional[str] = None,
) -> None:
    """Displays the evaluation plot for a specific clinical parameter, metric, and optionally ML algorithm.

    Args:
        param: The clinical parameter to visualize.
        metric: The metric to visualize.
        ml_alg: If None, plots all available algorithms.
    """
    plt.ioff()
    if "ML_Evaluation" not in self.plots.keys():
        print("ML Evaluation plots not found in the plots dictionary")
        print("You need to run evaluate() method first")
        return None
    if param not in self.plots["ML_Evaluation"].keys():
        print(f"Parameter {param} not found in the ML Evaluation plots")
        print(f"Available parameters: {list(self.plots['ML_Evaluation'].keys())}")
        return None
    if metric not in self.plots["ML_Evaluation"][param].keys():
        print(f"Metric {metric} not found in the ML Evaluation plots for {param}")
        print(
            f"Available metrics: {list(self.plots['ML_Evaluation'][param].keys())}"
        )
        return None

    algs = list(self.plots["ML_Evaluation"][param][metric].keys())
    if ml_alg is not None:
        if ml_alg not in algs:
            print(f"ML algorithm {ml_alg} not found for {param} and {metric}")
            print(f"Available ML algorithms: {algs}")
            return None
        fig = self.plots["ML_Evaluation"][param][metric][ml_alg].figure
        show_figure(fig)
        plt.show()
    else:
        for alg in algs:
            print(f"Showing plot for ML algorithm: {alg}")
            fig = self.plots["ML_Evaluation"][param][metric][alg].figure
            show_figure(fig)
            plt.show()

`show_latent_space(result, plot_type='2D-scatter', labels=None, param=None, epoch=None, split='all', **kwargs)`

Visualizes the latent space of the given result using different types of plots.

Parameters:

Name	Type	Description	Default
`result`	`Result`	The result object containing latent spaces and losses.	required
`labels`	`Optional[Union[list, Series, None]]`	List of labels for the data points in the latent space. Default is None.	`None`
`param`		List of parameters provided and stored as metadata. Strings must match column names. If not a list, string "all" is expected for convenient way to make plots for all parameters available. Default is None where no colored labels are plotted.	required
`epoch`	`Optional[Union[int, None]]`	The epoch number to visualize. If None, the last epoch is inferred from the losses. Default is None.	`None`
`split`	`str`	The data split to visualize. Options are "train", "valid", "test", and "all". Default is "all".	`'all'`

Source code in src/autoencodix/visualize/visualize.py

@no_type_check
def show_latent_space(
    self,
    result: Result,
    plot_type: str = "2D-scatter",
    labels: Optional[Union[list, pd.Series, None]] = None,
    param: Optional[Union[list, str]] = None,
    epoch: Optional[Union[int, None]] = None,
    split: str = "all",
    **kwargs,
) -> None:
    """Visualizes the latent space of the given result using different types of plots.

    Args:
        result: The result object containing latent spaces and losses.
        plot_type The type of plot to generate. Options are "2D-scatter", "Ridgeline", and "Coverage-Correlation". Default is "2D-scatter".
        labels: List of labels for the data points in the latent space. Default is None.
        param : List of parameters provided and stored as metadata. Strings must match column names. If not a list, string "all" is expected for convenient way to make plots for all parameters available. Default is None where no colored labels are plotted.
        epoch: The epoch number to visualize. If None, the last epoch is inferred from the losses. Default is None.
        split: The data split to visualize. Options are "train", "valid", "test", and "all". Default is "all".

    """
    plt.ioff()
    if plot_type == "Coverage-Correlation":
        if "Coverage-Correlation" in self.plots:
            fig = self.plots["Coverage-Correlation"]
            show_figure(fig)
            plt.show()
        else:
            results = []
            for epoch in range(
                result.model.config.checkpoint_interval,
                result.model.config.epochs + 1,
                result.model.config.checkpoint_interval,
            ):
                for split in ["train", "valid"]:
                    latent_df = result.get_latent_df(epoch=epoch - 1, split=split)
                    tc = self._total_correlation(latent_df)
                    cov = self._coverage_calc(latent_df)
                    results.append(
                        {
                            "epoch": epoch,
                            "split": split,
                            "total_correlation": tc,
                            "coverage": cov,
                        }
                    )

            df_metrics = pd.DataFrame(results)

            fig, axes = plt.subplots(1, 2, figsize=(12, 5))

            # Total Correlation plot
            _ = sns.lineplot(
                data=df_metrics,
                x="epoch",
                y="total_correlation",
                hue="split",
                ax=axes[0],
            )
            axes[0].set_title("Total Correlation")
            axes[0].set_xlabel("Epoch")
            axes[0].set_ylabel("Total Correlation")

            # Coverage plot
            _ = sns.lineplot(
                data=df_metrics, x="epoch", y="coverage", hue="split", ax=axes[1]
            )
            axes[1].set_title("Coverage")
            axes[1].set_xlabel("Epoch")
            axes[1].set_ylabel("Coverage")

            plt.tight_layout()
            self.plots["Coverage-Correlation"] = fig
            show_figure(fig)
            plt.show()

    else:
        # Set Defaults
        if epoch is None:
            epoch = result.model.config.epochs - 1

        ## Getting clin_data
        if not hasattr(result.datasets, "train"):
            raise ValueError("no train split in datasets")

        if not hasattr(result.datasets, "valid"):
            raise ValueError("no valid split in datasets")
        if result.datasets.train is None:
            raise ValueError("train is None")
        if result.datasets.valid is None:
            raise ValueError("train is None")
        if result.datasets.test is None:
            raise ValueError("train is None")

        if not hasattr(result.datasets.train, "metadata"):
            raise ValueError("train dataset has no metadata")
        if not hasattr(result.datasets.valid, "metadata"):
            raise ValueError("valid dataset has no metadata")

        # Check if metadata is a dictionary and contains 'paired'
        if isinstance(result.datasets.train.metadata, dict):
            if "paired" in result.datasets.train.metadata:
                clin_data = result.datasets.train.metadata["paired"]
                if hasattr(result.datasets, "test"):
                    clin_data = pd.concat(
                        [clin_data, result.datasets.test.metadata["paired"]],
                        axis=0,
                    )
                if hasattr(result.datasets, "valid"):
                    clin_data = pd.concat(
                        [clin_data, result.datasets.valid.metadata["paired"]],
                        axis=0,
                    )
                else:
                    # Raise error no annotation given
                    raise ValueError(
                        "Please provide paired annotation data with key 'paired' in metadata dictionary."
                    )
            elif isinstance(result.datasets.train.metadata, pd.DataFrame):
                clin_data = result.datasets.train.metadata
                if hasattr(result.datasets, "test"):
                    clin_data = pd.concat(
                        [clin_data, result.datasets.test.metadata],
                        axis=0,
                    )
                if hasattr(result.datasets, "valid"):
                    clin_data = pd.concat(
                        [clin_data, result.datasets.valid.metadata],
                        axis=0,
                    )
            else:
                # Raise error no annotation given
                raise ValueError(
                    "Metadata is not a dictionary or DataFrame. Please provide a valid annotation data type."
                )
        else:
            # Raise error no annotation given
            raise ValueError(
                "No annotation data found. Please provide a valid annotation data type."
            )

        if split == "all":
            df_latent = pd.concat(
                [
                    result.get_latent_df(epoch=epoch, split="train"),
                    result.get_latent_df(epoch=epoch, split="valid"),
                    result.get_latent_df(epoch=-1, split="test"),
                ]
            )
        else:
            if split == "test":
                df_latent = result.get_latent_df(epoch=-1, split=split)
            else:
                df_latent = result.get_latent_df(epoch=epoch, split=split)

        if labels is None and param is None:
            labels = ["all"] * df_latent.shape[0]

        if labels is None and isinstance(param, str):
            if param == "all":
                param = list(clin_data.columns)
            else:
                raise ValueError(
                    "Please provide parameter to plot as a list not as string. If you want to plot all parameters, set param to 'all' and labels to None."
                )

        if labels is not None and param is not None:
            raise ValueError(
                "Please provide either labels or param, not both. If you want to plot all parameters, set param to 'all' and labels to None."
            )

        if labels is not None and param is None:
            if isinstance(labels, pd.Series):
                param = [labels.name]
                # Order by index of df_latent first, fill missing with "unknown"
                labels = labels.reindex(
                    df_latent.index, fill_value="unknown"
                ).tolist()
            else:
                param = ["user_label"]  # Default label if none provided

        for p in param:
            if p in clin_data.columns:
                labels = clin_data.loc[df_latent.index, p].tolist()

            if plot_type == "2D-scatter":
                ## Make 2D Embedding with UMAP
                if df_latent.shape[1] > 2:
                    reducer = UMAP(n_components=2)
                    embedding = pd.DataFrame(reducer.fit_transform(df_latent))
                else:
                    embedding = df_latent

                self.plots["2D-scatter"][epoch][split][p] = self.plot_2D(
                    embedding=embedding,
                    labels=labels,
                    param=p,
                    layer=f"2D latent space (epoch {epoch + 1})",  # we start counting epochs at 0, so add 1 for display
                    figsize=(12, 8),
                    center=True,
                )

                fig = self.plots["2D-scatter"][epoch][split][p]
                show_figure(fig)
                plt.show()

            if plot_type == "Ridgeline":
                ## Make ridgeline plot

                self.plots["Ridgeline"][epoch][split][p] = self.plot_latent_ridge(
                    lat_space=df_latent, labels=labels, param=p
                )

                fig = self.plots["Ridgeline"][epoch][split][p].figure
                show_figure(fig)
                plt.show()

`show_loss(plot_type='absolute')`

Display the loss plot.

Parameters:

Name	Type	Description	Default
`plot_type`	`Literal['absolute', 'relative']`	The type of loss plot to display. Defaults to "absolute".	`'absolute'`

Source code in src/autoencodix/visualize/visualize.py

def show_loss(
    self, plot_type: Literal["absolute", "relative"] = "absolute"
) -> None:
    """Display the loss plot.

    Args:
        plot_type: The type of loss plot to display. Defaults to "absolute".
    """
    if plot_type == "absolute":
        if "loss_absolute" not in self.plots.keys():
            print("Absolute loss plot not found in the plots dictionary")
            print("You need to run visualize() method first")
        else:
            fig = self.plots["loss_absolute"]
            show_figure(fig)
            plt.show()
    if plot_type == "relative":
        if "loss_relative" not in self.plots.keys():
            print("Relative loss plot not found in the plots dictionary")
            print("You need to run visualize() method first")
        else:
            fig = self.plots["loss_relative"]
            show_figure(fig)
            plt.show()

    if plot_type not in ["absolute", "relative"]:
        print(
            "Type of loss plot not recognized. Please use 'absolute' or 'relative'"
        )

`show_weights()`

Display the model weights plot if it exists in the plots dictionary.

Source code in src/autoencodix/visualize/visualize.py

def show_weights(self) -> None:
    """Display the model weights plot if it exists in the plots dictionary."""

    if "ModelWeights" not in self.plots.keys():
        print("Model weights not found in the plots dictionary")
        print("You need to run visualize() method first")
    else:
        fig = self.plots["ModelWeights"]
        show_figure(fig)
        plt.show()

`XModalVisualizer`

Bases: BaseVisualizer

Source code in src/autoencodix/visualize/_xmodal_visualizer.py

class XModalVisualizer(BaseVisualizer):
    plots: Dict[str, Any] = field(
        default_factory=nested_dict
    )  ## Nested dictionary of plots as figure handles

    def __init__(self):
        self.plots = nested_dict()

    def __setitem__(self, key, elem):
        self.plots[key] = elem

    def visualize(self, result: Result, config: DefaultConfig) -> Result:
        ## Make Model Weights plot
        ## TODO needs to be adjusted for X-Modalix ##
        ## Plot Model weights for each sub-VAE ##
        # self.plots["ModelWeights"] = self._plot_model_weights(model=result.model)

        ## Make long format of losses
        loss_df_melt = self._make_loss_format(result=result, config=config)

        ## X-Modalix specific ##
        # Filter loss terms which are specific for each modality VAE
        # Plot only combined loss terms as in old autoencodix framework
        if not hasattr(result.datasets, "train"):
            raise ValueError("result.datasets has no attribute train")
        if result.datasets.train is None:
            raise ValueError("Train attribute of datasets is None")
        loss_df_melt = loss_df_melt[
            ~loss_df_melt["Loss Term"].str.startswith(
                tuple(result.datasets.train.datasets.keys())
            )
        ]
        if not result.losses._data:
            import warnings

            warnings.warn(
                "No loss data: This usually happens if you try to visualize after saving and loading the pipeline object with `save_all=False`. This memory-efficient saving mode does not retain past training loss data."
            )
            return result
        ## Make plot loss absolute
        self.plots["loss_absolute"] = self._make_loss_plot(
            df_plot=loss_df_melt, plot_type="absolute"
        )
        ## Make plot loss relative
        self.plots["loss_relative"] = self._make_loss_plot(
            df_plot=loss_df_melt, plot_type="relative"
        )

        return result

    def show_latent_space(
        self,
        result: Result,
        plot_type: str = "2D-scatter",
        labels: Optional[Union[list, pd.Series, None]] = None,
        param: Optional[Union[list, str]] = None,
        epoch: Optional[Union[int, None]] = None,
        split: str = "all",
    ) -> None:
        plt.ioff()
        if plot_type == "Coverage-Correlation":
            print("TODO: Implement Coverage-Correlation plot for X-Modalix")
            # if "Coverage-Correlation" in self.plots:
            #     fig = self.plots["Coverage-Correlation"]
            #     show_figure(fig)
            #     plt.show()
            # else:
            #     results = []
            #     for epoch in range(result.model.config.checkpoint_interval, result.model.config.epochs + 1, result.model.config.checkpoint_interval):
            #         for split in ["train", "valid"]:
            #             latent_df = result.get_latent_df(epoch=epoch-1, split=split)
            #             tc = self._total_correlation(latent_df)
            #             cov = self._coverage_calc(latent_df)
            #             results.append({"epoch": epoch, "split": split, "total_correlation": tc, "coverage": cov})

            #     df_metrics = pd.DataFrame(results)

            #     fig, axes = plt.subplots(1, 2, figsize=(12, 5))

            #     # Total Correlation plot
            #     ax1 = sns.lineplot(data=df_metrics, x="epoch", y="total_correlation", hue="split", ax=axes[0])
            #     axes[0].set_title("Total Correlation")
            #     axes[0].set_xlabel("Epoch")
            #     axes[0].set_ylabel("Total Correlation")

            #     # Coverage plot
            #     ax2 = sns.lineplot(data=df_metrics, x="epoch", y="coverage", hue="split", ax=axes[1])
            #     axes[1].set_title("Coverage")
            #     axes[1].set_xlabel("Epoch")
            #     axes[1].set_ylabel("Coverage")

            #     plt.tight_layout()
            #     self.plots["Coverage-Correlation"] = fig
            #     show_figure(fig)
            #     plt.show()
        else:
            # Set Defaults
            if epoch is None:
                epoch = -1

            ## Collect all metadata and latent spaces from datasets
            clin_data = []
            latent_data = []

            if split == "all":
                split_list = ["train", "test", "valid"]
            else:
                split_list = [split]
            for s in split_list:
                split_ds = getattr(result.datasets, s, None)
                if split_ds is not None:
                    for key, ds in split_ds.datasets.items():
                        if s == "test":
                            df_latent = result.get_latent_df(
                                epoch=-1, split=s, modality=key
                            )
                        else:
                            df_latent = result.get_latent_df(
                                epoch=epoch, split=s, modality=key
                            )
                        df_latent["modality"] = key
                        df_latent["sample_ids"] = (
                            df_latent.index
                        )  # Each sample can occur multiple times in latent space
                        latent_data.append(df_latent)
                        if hasattr(ds, "metadata") and ds.metadata is not None:
                            df = ds.metadata.copy()
                            df["sample_ids"] = df.index.astype(str)
                            df["split"] = s
                            df["modality"] = key
                            clin_data.append(df)

            if latent_data and clin_data:
                latent_data = pd.concat(latent_data, axis=0, ignore_index=True)
                clin_data = pd.concat(clin_data, axis=0, ignore_index=True)
                if "sample_ids" in clin_data.columns:
                    clin_data = clin_data.drop_duplicates(
                        subset="sample_ids"
                    ).set_index("sample_ids")
            else:
                latent_data = pd.DataFrame()
                clin_data = pd.DataFrame()

            ## Label options
            if param is None:
                modality = list(result.model.keys())[
                    0
                ]  # Take the first since configs are same for all sub-VAEs
                model = result.model.get(modality, None)
                if model is None:
                    raise ValueError(
                        f"Model for modality {modality} not found in result.model"
                    )
                param = model.config.data_config.annotation_columns

            if labels is None and param is None:
                labels = ["all"] * latent_data["sample_ids"].unique().shape[0]

            if labels is None and isinstance(param, str):
                if param == "all":
                    param = list(clin_data.columns)
                else:
                    raise ValueError(
                        "Please provide parameter to plot as a list not as string. If you want to plot all parameters, set param to 'all' and labels to None."
                    )

            if labels is not None and param is not None:
                raise ValueError(
                    "Please provide either labels or param, not both. If you want to plot all parameters, set param to 'all' and labels to None."
                )

            if labels is not None and param is None:
                if isinstance(labels, pd.Series):
                    param = [labels.name]
                    # Order by index of latent_data first, fill missing with "unknown"
                    labels = labels.reindex(  # ty: ignore
                        latent_data["sample_ids"],  # ty: ignore
                        fill_value="unknown",  # ty: ignore
                    ).tolist()
                else:
                    param = ["user_label"]  # Default label if none provided
            if not isinstance(param, list):
                raise ValueError(f"param: should be converted to list, got: {param}")
            for p in param:
                if p in clin_data.columns:
                    labels: List = clin_data.loc[
                        latent_data["sample_ids"], p
                    ].tolist()  # ty: ignore
                else:
                    if clin_data.shape[0] == len(labels):  # ty: ignore
                        clin_data[p] = labels
                    else:
                        clin_data[p] = ["all"] * clin_data.shape[0]

                if plot_type == "2D-scatter":
                    ## Make 2D Embedding with UMAP
                    if (
                        latent_data.drop(
                            columns=["sample_ids", "modality"]
                        ).shape[  # ty: ignore
                            1
                        ]  # ty: ignore
                        > 2
                    ):
                        reducer = UMAP(n_components=2)
                        embedding = pd.DataFrame(
                            reducer.fit_transform(
                                latent_data.drop(
                                    columns=["sample_ids", "modality"]
                                )  # ty: ignore
                            )
                        )
                        embedding.columns = ["DIM1", "DIM2"]
                        embedding["sample_ids"] = latent_data["sample_ids"]
                        embedding["modality"] = latent_data["modality"]
                    else:
                        embedding = latent_data

                    # Merge with clinical data via sample_ids
                    clin_data["sample_ids"] = clin_data.index.astype(str)
                    clin_data.index = clin_data.index.astype(str)  # Add this line
                    embedding["sample_ids"] = embedding["sample_ids"].astype(str)

                    embedding = embedding.merge(
                        clin_data.drop(columns=["modality"]),  # ty: ignore
                        left_on="sample_ids",
                        right_index=True,
                        how="left",
                    )

                    self.plots["2D-scatter"][epoch][split][p] = (
                        self._plot_translate_latent(
                            embedding=embedding,
                            color_param=p,
                            style_param="modality",
                        )
                    )

                    fig = self.plots["2D-scatter"][epoch][split][p].figure
                    # show_figure(fig)
                    plt.show()

                if plot_type == "Ridgeline":
                    ## Make ridgeline plot
                    if len(labels) != latent_data.shape[0]:  # ty: ignore
                        if labels[0] == "all":  # ty: ignore
                            labels = ["all"] * latent_data.shape[0]  # ty: ignore
                        else:
                            raise ValueError(
                                "Labels must match the number of samples in the latent space."
                            )

                    self.plots["Ridgeline"][epoch][split][p] = (
                        self._plot_latent_ridge_multi(
                            lat_space=latent_data.drop(
                                columns=["sample_ids"]
                            ),  # ty: ignore
                            labels=labels,
                            modality="modality",
                            param=p,
                        )
                    )

                    fig = self.plots["Ridgeline"][epoch][split][p].figure
                    show_figure(fig)
                    plt.show()

    def show_weights(self) -> None:
        ## TODO
        raise NotImplementedError(
            "Weight visualization for X-Modalix is not implemented."
        )

    @no_type_check
    def show_image_translation(  # ty: ignore
        self,
        result: Result,
        from_key: str,
        to_key: str,
        n_sample_per_class: int = 3,
        param: Optional[str] = None,
    ) -> None:  # ty: ignore
        """Visualizes image translation results for a given dataset.

        Split by displaying a grid of original, translated, and reference images,grouped by class values.
        Args:
            result:The result object containing datasets and reconstructions.
            from_key: The source modality key (not directly used in visualization, but relevant for context).
            to_key: The target modality key. Must correspond to an image dataset (must contain "IMG").
            split: The dataset split to visualize ("test", "train", or "valid"). Default is "test".
            n_sample_per_class: Number of samples to display per class value. Default is 3.
            param: The metadata column name used to group samples by class.
        Raises
            ValueError: If `to_key` does not correspond to an image dataset.
        """

        if "img" not in to_key:
            raise ValueError(
                f"You provided as 'to_key' {to_key} a non-image dataset. "
                "Image translation grid visualization is only possible for translation to IMG data type."
            )
        else:
            split = "test"  # Currently only test split is supported
            ## Get n samples per class
            if split == "test":
                meta = result.datasets.test.datasets[to_key].metadata
                paired_sample_ids = result.datasets.test.paired_sample_ids

            # Restrict meta to only paired sample ids
            meta = meta.loc[paired_sample_ids]

            if param is None:
                param = "user-label"
                meta[param] = (
                    "all"  # Default to all samples if no parameter is provided
                )

            # Get possible class values
            class_values = meta[param].unique()
            if len(class_values) > 10:
                # Make warning
                warnings.warn(
                    f"Found {len(class_values)} class values for parameter '{param}'. Only first 10 will be used to limit figure size"
                )
                class_values = class_values[:10]

            # Build dictionary of sample_ids per class value (max n_sample_per_class per class)
            sample_per_class = {
                val: meta[meta[param] == val]
                .sample(
                    n=min(n_sample_per_class, (meta[param] == val).sum()),
                    random_state=42,
                )
                .index.tolist()
                for val in class_values
            }

            print(f"Sample per class: {sample_per_class}")

            # Lookup of sample indices per modality
            sample_ids_per_key = dict()

            for key in result.sample_ids.get(epoch=-1, split="test").keys():
                sample_ids_per_key[key] = result.sample_ids.get(epoch=-1, split="test")[
                    key
                ]
            # Original
            sample_ids_per_key["original"] = result.datasets.test.datasets[
                to_key
            ].sample_ids

            ## Generate Image Grid
            # Number of test (or train or valid) samples from all values in sample_per_class dictionary
            n_test_samples = sum(len(indices) for indices in sample_per_class.values())

            # #
            col_labels = []
            for class_value in sample_per_class:
                col_labels.extend(
                    [
                        class_value + " " + split + "-sample:" + s
                        for s in sample_per_class[class_value]
                    ]
                )

            row_labels = ["Original", "Translated", "Reference"]

            fig, axes = plt.subplots(
                ncols=n_test_samples,  # Number of classes
                nrows=3,  # Original, translated, reference
                figsize=(n_test_samples * 2, 3 * 2),
            )

            for i, ax in enumerate(axes.flat):
                row = int(i / n_test_samples)
                # test_sample = sample_idx_list[i % n_test_samples]
                # print(f"Row: {row}, Column: {i % n_test_samples}")
                # print(f"Current sample: {col_labels[i % n_test_samples]}")

                if row == 0:
                    if split == "test":
                        idx_original = list(sample_ids_per_key["original"]).index(
                            col_labels[i % n_test_samples].split("sample:")[1]
                        )
                        img_temp = result.datasets.test.datasets[to_key][idx_original][
                            1
                        ].squeeze()  # Stored as Tuple (index, tensor, sample_id)

                    # Original image
                    ax.imshow(np.asarray(img_temp))
                    ax.axis("off")
                    # Sample label
                    ax.text(
                        0.5,
                        1.1,
                        col_labels[i],
                        va="bottom",
                        ha="center",
                        # rotation='vertical',
                        rotation=45,
                        transform=ax.transAxes,
                    )
                    # Row label
                    if i % n_test_samples == 0:
                        ax.text(
                            -0.1,
                            0.5,
                            row_labels[0],
                            va="center",
                            ha="right",
                            transform=ax.transAxes,
                        )

                if row == 1:
                    # Translated image
                    idx_translated = list(sample_ids_per_key["translation"]).index(
                        col_labels[i % n_test_samples].split("sample:")[1]
                    )
                    ax.imshow(
                        result.reconstructions.get(epoch=-1, split=split)[
                            "translation"
                        ][idx_translated].squeeze()
                    )
                    ax.axis("off")
                    # Row label
                    if i % n_test_samples == 0:
                        ax.text(
                            -0.1,
                            0.5,
                            row_labels[1],
                            va="center",
                            ha="right",
                            transform=ax.transAxes,
                        )

                if row == 2:
                    # Reference image reconstruction
                    idx_reference = list(
                        sample_ids_per_key[f"reference_{to_key}_to_{to_key}"]
                    ).index(col_labels[i % n_test_samples].split("sample:")[1])
                    ax.imshow(
                        result.reconstructions.get(epoch=-1, split=split)[
                            f"reference_{to_key}_to_{to_key}"
                        ][idx_reference].squeeze()
                    )
                    ax.axis("off")
                    # Row label
                    if i % n_test_samples == 0:
                        ax.text(
                            -0.1,
                            0.5,
                            row_labels[2],
                            va="center",
                            ha="right",
                            transform=ax.transAxes,
                        )

            self.plots["Image-translation"][to_key][split][param] = fig
            # show_figure(fig)
            plt.show()

    @no_type_check
    def show_2D_translation(
        self,
        result: Result,
        translated_modality: str,
        split: str = "test",
        param: Optional[str] = None,
        reducer: str = "UMAP",
    ) -> None:
        ## TODO add similar labels/param logic from other visualizations
        dataset = result.datasets

        ## Overwrite original datasets with new_datasets if available after predict with other data
        if dataset is None:
            dataset = DatasetContainer()

        if bool(result.new_datasets.test):
            dataset.test = result.new_datasets.test

        if split not in ["train", "valid", "test", "all"]:
            raise ValueError(f"Unknown split: {split}")

        if dataset.test is None:
            raise ValueError("test of dataset is None")

        if split == "test":
            df_processed = dataset.test._to_df(modality=translated_modality)
        else:
            raise NotImplementedError(
                "2D translation visualization is currently only implemented for the 'test' split since reconstruction is only performed on test-split."
            )

        # Get translated reconstruction
        tensor_list = result.reconstructions.get(epoch=-1, split=split)[  # ty: ignore
            "translation"
        ]  # ty: ignore
        print(f"len of tensor-list: {len(tensor_list)}")
        tensor_ids = result.sample_ids.get(epoch=-1, split=split)["translation"]
        print(f"len of tensor_ids: {len(tensor_ids)}")

        # Flatten each tensor and collect as rows (for image case)
        rows = [
            t.flatten().cpu().numpy() if isinstance(t, torch.Tensor) else t.flatten()
            for t in tensor_list
        ]

        # Create DataFrame
        df_translate_flat = pd.DataFrame(
            rows,
            columns=["Feature_" + str(i) for i in range(len(rows[0]))],
            index=tensor_ids,
        )

        if reducer == "UMAP":
            reducer_model = UMAP(n_components=2)
        elif reducer == "PCA":
            reducer_model = PCA(n_components=2)
        elif reducer == "TSNE":
            reducer_model = TSNE(n_components=2)

        # making sure of index alignemnt
        common_ids = df_processed.index.intersection(df_translate_flat.index)
        df_processed = df_processed.loc[common_ids]
        df_translate_flat = df_translate_flat.loc[common_ids]
        df_translate_flat = df_translate_flat.reindex(df_processed.index)
        df_translate_flat.index = pd.Index([i for i in range(len(common_ids))])
        X = np.vstack([df_processed.values, df_translate_flat.values])
        df_red_comb = pd.DataFrame(reducer_model.fit_transform(X))

        # df_comb = pd.concat(
        #     [df_processed, df_translate_flat], axis=0, ignore_index=True
        # )

        df_red_comb["origin"] = ["input"] * df_processed.shape[0] + [
            "translated"
        ] * df_translate_flat.shape[0]

        # df_red_comb = pd.DataFrame(
        #     reducer_model.fit_transform(
        #         pd.concat([df_processed, df_translate_flat], axis=0)
        #     )
        # )

        labels = (
            list(
                result.datasets.test.datasets[translated_modality].metadata[param]
            )  # ty: ignore
            * 2
        )
        df_red_comb[param] = (
            labels + labels[0 : df_red_comb.shape[0] - len(labels)]
        )  ## TODO fix for not matching lengths

        g = sns.FacetGrid(
            df_red_comb,
            col="origin",
            hue=param,
            sharex=True,
            sharey=True,
            height=8,
            aspect=1,
        )
        g.map_dataframe(sns.scatterplot, x=0, y=1, alpha=0.7)
        g.add_legend()
        g.set_axis_labels(reducer + " DIM 1", reducer + " DIM 2")
        g.set_titles(col_template="{col_name}")

        self.plots["2D-translation"][translated_modality][split][param] = g
        plt.show()

    ## Utilities specific for X-Modalix
    @staticmethod
    def _plot_translate_latent(
        embedding,
        color_param,
        style_param=None,
    ):
        """Creates a 2D visualization of the 2D embedding of the latent space.
        Args:
            embedding: embedding on which is visualized. Assumes prior 2D dimension reduction.
            color_params: Clinical parameter to color scatter plot
            style_param: Parameter e.g. "Translate" to facet scatter plot
        Returns:
            fig: Figure handle

        """
        labels = list(embedding[color_param])
        # logger = getlogger(cfg)
        numeric = False
        if not isinstance(labels[0], str):
            if len(np.unique(labels)) > 3:
                # TODO Decide if numeric to category should be optional in new Package
                # print(
                #     f"The provided label column is numeric and converted to categories."
                # )
                # labels = pd.qcut(
                #     labels, q=4, labels=["1stQ", "2ndQ", "3rdQ", "4thQ"]
                # ).astype(str)
                # else:
                numeric = True
            else:
                labels = [str(x) for x in labels]

        # check if label or embedding is longerm and duplicate the shorter one
        if len(labels) < embedding.shape[0]:
            print(
                "Given labels do not have the same length as given sample size. Labels will be duplicated."
            )
            labels = [
                label
                for label in labels
                for _ in range(embedding.shape[0] // len(labels))
            ]
        elif len(labels) > embedding.shape[0]:
            labels = list(set(labels))

        if style_param is not None:
            embedding[color_param] = labels
            if numeric:
                palette = "bwr"
            else:
                palette = None
            plot = sns.relplot(
                data=embedding,
                x="DIM1",
                y="DIM2",
                hue=color_param,
                palette=palette,
                col=style_param,
                style=style_param,
                markers=True,
                alpha=0.4,
                ec="black",
                height=10,
                aspect=1,
                s=150,
            )

        return plot

    @staticmethod
    def _plot_latent_ridge_multi(
        lat_space: pd.DataFrame,
        modality: Optional[str] = None,
        labels: Optional[Union[list, pd.Series, None]] = None,
        param: Optional[Union[str, None]] = None,
    ) -> sns.FacetGrid:
        """Creates a ridge line plot of latent space dimension where each row shows the density of a latent dimension and groups (ridges).
        Args:
            lat_space: DataFrame containing the latent space intensities for samples (rows) and latent dimensions (columns)
            labels: List of labels for each sample. If None, all samples are considered as one group.
            param: Clinical parameter to create groupings and coloring of ridges. Must be a column name (str) of clin_data
        Returns:
            g (sns.FacetGrid): FacetGrid object containing the ridge line plot
        """
        sns.set_theme(
            style="white", rc={"axes.facecolor": (0, 0, 0, 0)}
        )  ## Necessary to enforce overplotting

        df = pd.melt(
            lat_space,
            id_vars=modality,  # ty: ignore
            var_name="latent dim",
            value_name="latent intensity",
        )
        # print(df)
        df["sample"] = len(lat_space.drop(columns=modality).columns) * list(
            lat_space.index
        )

        if labels is None:
            param = "all"
            labels = ["all"] * len(df)

        # print(labels[0])
        if not isinstance(labels[0], str):
            if len(np.unique(labels)) > 3:
                # Change all non-float labels to NaN
                labels = [x if isinstance(x, float) else float("nan") for x in labels]
                labels = pd.qcut(
                    x=pd.Series(labels),
                    q=4,
                    labels=["1stQ", "2ndQ", "3rdQ", "4thQ"],
                ).astype(str)
            else:
                labels = [str(x) for x in labels]

        df[param] = len(lat_space.drop(columns=modality).columns) * labels  # type: ignore

        exclude_missing_info = (df[param] == "unknown") | (df[param] == "nan")

        xmin = (
            df.loc[~exclude_missing_info, ["latent intensity", "latent dim", param]]
            .groupby([param, "latent dim"], observed=False)
            .quantile(0.05)
            .min()
        )
        xmax = (
            df.loc[~exclude_missing_info, ["latent intensity", "latent dim", param]]
            .groupby([param, "latent dim"], observed=False)
            .quantile(0.9)
            .max()
        )

        if len(np.unique(df[param])) > 8:
            cat_pal = sns.husl_palette(len(np.unique(df[param])))
        else:
            cat_pal = sns.color_palette(n_colors=len(np.unique(df[param])))

        g = sns.FacetGrid(
            df[~exclude_missing_info],
            row="latent dim",
            col=modality,
            hue=param,
            aspect=12,
            height=0.8,
            xlim=(xmin.iloc[0], xmax.iloc[0]),
            palette=cat_pal,
        )

        g.map_dataframe(
            sns.kdeplot,
            "latent intensity",
            bw_adjust=0.5,
            clip_on=True,
            fill=True,
            alpha=0.5,
            warn_singular=False,
            ec="k",
            lw=1,
        )

        def label(data, color, label, text="latent dim"):
            ax = plt.gca()
            label_text = data[text].unique()[0]
            ax.text(
                0.0,
                0.2,
                label_text,
                fontweight="bold",
                ha="right",
                va="center",
                transform=ax.transAxes,
            )

        g.map_dataframe(label, text="latent dim")

        g.set(xlim=(xmin.iloc[0], xmax.iloc[0]))
        # Set the subplots to overlap
        g.figure.subplots_adjust(hspace=-0.5)

        # Remove axes details that don't play well with overlap
        g.set_titles("")
        g.set(yticks=[], ylabel="")
        g.despine(bottom=True, left=True)

        for i, m in enumerate(df[modality].unique()):
            g.fig.get_axes()[i].set_title(m)

        g.add_legend()

        plt.close()
        return g

    def _plot_evaluation(
        self,
        result: Result,
    ) -> dict:
        """Plots the evaluation results from the Result object.

        Args:
            result: The Result object containing evaluation data.

        Returns:
            The generated dictionary containing the evaluation plots.
        """
        ## Plot all results

        ml_plots = dict()
        plt.ioff()

        for c in pd.unique(result.embedding_evaluation.CLINIC_PARAM):
            ml_plots[c] = dict()
            for m in pd.unique(
                result.embedding_evaluation.loc[
                    result.embedding_evaluation.CLINIC_PARAM == c, "metric"
                ]
            ):  # ty: ignore
                ml_plots[c][m] = dict()
                for alg in pd.unique(
                    result.embedding_evaluation.loc[
                        (result.embedding_evaluation.CLINIC_PARAM == c)
                        & (result.embedding_evaluation.metric == m),
                        "ML_ALG",
                    ]
                ):  # ty: ignore
                    data = result.embedding_evaluation[
                        (result.embedding_evaluation.metric == m)
                        & (result.embedding_evaluation.CLINIC_PARAM == c)
                        & (result.embedding_evaluation.ML_ALG == alg)
                    ]

                    sns_plot = sns.catplot(
                        data=data,
                        x="score_split",
                        y="value",
                        col="ML_TASK",
                        row="MODALITY",
                        hue="score_split",
                        kind="bar",
                    )

                    min_y = data.value.min()
                    if min_y > 0:
                        min_y = 0

                    ml_plots[c][m][alg] = sns_plot.set(ylim=(min_y, None))

        self.plots["ML_Evaluation"] = ml_plots

        return ml_plots

`show_image_translation(result, from_key, to_key, n_sample_per_class=3, param=None)`

Visualizes image translation results for a given dataset.

Split by displaying a grid of original, translated, and reference images,grouped by class values. Args: result:The result object containing datasets and reconstructions. from_key: The source modality key (not directly used in visualization, but relevant for context). to_key: The target modality key. Must correspond to an image dataset (must contain "IMG"). split: The dataset split to visualize ("test", "train", or "valid"). Default is "test". n_sample_per_class: Number of samples to display per class value. Default is 3. param: The metadata column name used to group samples by class. Raises ValueError: If to_key does not correspond to an image dataset.

Source code in src/autoencodix/visualize/_xmodal_visualizer.py

@no_type_check
def show_image_translation(  # ty: ignore
    self,
    result: Result,
    from_key: str,
    to_key: str,
    n_sample_per_class: int = 3,
    param: Optional[str] = None,
) -> None:  # ty: ignore
    """Visualizes image translation results for a given dataset.

    Split by displaying a grid of original, translated, and reference images,grouped by class values.
    Args:
        result:The result object containing datasets and reconstructions.
        from_key: The source modality key (not directly used in visualization, but relevant for context).
        to_key: The target modality key. Must correspond to an image dataset (must contain "IMG").
        split: The dataset split to visualize ("test", "train", or "valid"). Default is "test".
        n_sample_per_class: Number of samples to display per class value. Default is 3.
        param: The metadata column name used to group samples by class.
    Raises
        ValueError: If `to_key` does not correspond to an image dataset.
    """

    if "img" not in to_key:
        raise ValueError(
            f"You provided as 'to_key' {to_key} a non-image dataset. "
            "Image translation grid visualization is only possible for translation to IMG data type."
        )
    else:
        split = "test"  # Currently only test split is supported
        ## Get n samples per class
        if split == "test":
            meta = result.datasets.test.datasets[to_key].metadata
            paired_sample_ids = result.datasets.test.paired_sample_ids

        # Restrict meta to only paired sample ids
        meta = meta.loc[paired_sample_ids]

        if param is None:
            param = "user-label"
            meta[param] = (
                "all"  # Default to all samples if no parameter is provided
            )

        # Get possible class values
        class_values = meta[param].unique()
        if len(class_values) > 10:
            # Make warning
            warnings.warn(
                f"Found {len(class_values)} class values for parameter '{param}'. Only first 10 will be used to limit figure size"
            )
            class_values = class_values[:10]

        # Build dictionary of sample_ids per class value (max n_sample_per_class per class)
        sample_per_class = {
            val: meta[meta[param] == val]
            .sample(
                n=min(n_sample_per_class, (meta[param] == val).sum()),
                random_state=42,
            )
            .index.tolist()
            for val in class_values
        }

        print(f"Sample per class: {sample_per_class}")

        # Lookup of sample indices per modality
        sample_ids_per_key = dict()

        for key in result.sample_ids.get(epoch=-1, split="test").keys():
            sample_ids_per_key[key] = result.sample_ids.get(epoch=-1, split="test")[
                key
            ]
        # Original
        sample_ids_per_key["original"] = result.datasets.test.datasets[
            to_key
        ].sample_ids

        ## Generate Image Grid
        # Number of test (or train or valid) samples from all values in sample_per_class dictionary
        n_test_samples = sum(len(indices) for indices in sample_per_class.values())

        # #
        col_labels = []
        for class_value in sample_per_class:
            col_labels.extend(
                [
                    class_value + " " + split + "-sample:" + s
                    for s in sample_per_class[class_value]
                ]
            )

        row_labels = ["Original", "Translated", "Reference"]

        fig, axes = plt.subplots(
            ncols=n_test_samples,  # Number of classes
            nrows=3,  # Original, translated, reference
            figsize=(n_test_samples * 2, 3 * 2),
        )

        for i, ax in enumerate(axes.flat):
            row = int(i / n_test_samples)
            # test_sample = sample_idx_list[i % n_test_samples]
            # print(f"Row: {row}, Column: {i % n_test_samples}")
            # print(f"Current sample: {col_labels[i % n_test_samples]}")

            if row == 0:
                if split == "test":
                    idx_original = list(sample_ids_per_key["original"]).index(
                        col_labels[i % n_test_samples].split("sample:")[1]
                    )
                    img_temp = result.datasets.test.datasets[to_key][idx_original][
                        1
                    ].squeeze()  # Stored as Tuple (index, tensor, sample_id)

                # Original image
                ax.imshow(np.asarray(img_temp))
                ax.axis("off")
                # Sample label
                ax.text(
                    0.5,
                    1.1,
                    col_labels[i],
                    va="bottom",
                    ha="center",
                    # rotation='vertical',
                    rotation=45,
                    transform=ax.transAxes,
                )
                # Row label
                if i % n_test_samples == 0:
                    ax.text(
                        -0.1,
                        0.5,
                        row_labels[0],
                        va="center",
                        ha="right",
                        transform=ax.transAxes,
                    )

            if row == 1:
                # Translated image
                idx_translated = list(sample_ids_per_key["translation"]).index(
                    col_labels[i % n_test_samples].split("sample:")[1]
                )
                ax.imshow(
                    result.reconstructions.get(epoch=-1, split=split)[
                        "translation"
                    ][idx_translated].squeeze()
                )
                ax.axis("off")
                # Row label
                if i % n_test_samples == 0:
                    ax.text(
                        -0.1,
                        0.5,
                        row_labels[1],
                        va="center",
                        ha="right",
                        transform=ax.transAxes,
                    )

            if row == 2:
                # Reference image reconstruction
                idx_reference = list(
                    sample_ids_per_key[f"reference_{to_key}_to_{to_key}"]
                ).index(col_labels[i % n_test_samples].split("sample:")[1])
                ax.imshow(
                    result.reconstructions.get(epoch=-1, split=split)[
                        f"reference_{to_key}_to_{to_key}"
                    ][idx_reference].squeeze()
                )
                ax.axis("off")
                # Row label
                if i % n_test_samples == 0:
                    ax.text(
                        -0.1,
                        0.5,
                        row_labels[2],
                        va="center",
                        ha="right",
                        transform=ax.transAxes,
                    )

        self.plots["Image-translation"][to_key][split][param] = fig
        # show_figure(fig)
        plt.show()

Visualize Module

GeneralVisualizer

show_latent_space(result, plot_type='2D-scatter', labels=None, param=None, epoch=None, split='all', n_downsample=10000, **kwargs)

show_weights()

Visualizer

make_loss_plot(df_plot, plot_type) staticmethod

plot_2D(embedding, labels, param=None, layer='latent space', figsize=(24, 15), center=True, plot_numeric=False, xlim=None, ylim=None, scale=None, no_leg=False) staticmethod

plot_evaluation(result)

plot_latent_ridge(lat_space, labels=None, param=None) staticmethod

plot_model_weights(model)

save_plots(path, which='all', format='png')

show_evaluation(param, metric, ml_alg=None)

show_latent_space(result, plot_type='2D-scatter', labels=None, param=None, epoch=None, split='all', **kwargs)

show_loss(plot_type='absolute')

show_weights()

XModalVisualizer

show_image_translation(result, from_key, to_key, n_sample_per_class=3, param=None)

`GeneralVisualizer`

`show_latent_space(result, plot_type='2D-scatter', labels=None, param=None, epoch=None, split='all', n_downsample=10000, **kwargs)`

`show_weights()`

`Visualizer`

`make_loss_plot(df_plot, plot_type)` `staticmethod`

`plot_2D(embedding, labels, param=None, layer='latent space', figsize=(24, 15), center=True, plot_numeric=False, xlim=None, ylim=None, scale=None, no_leg=False)` `staticmethod`

`plot_evaluation(result)`

`plot_latent_ridge(lat_space, labels=None, param=None)` `staticmethod`

`plot_model_weights(model)`

`save_plots(path, which='all', format='png')`

`show_evaluation(param, metric, ml_alg=None)`

`show_latent_space(result, plot_type='2D-scatter', labels=None, param=None, epoch=None, split='all', **kwargs)`

`show_loss(plot_type='absolute')`

`show_weights()`

`XModalVisualizer`

`show_image_translation(result, from_key, to_key, n_sample_per_class=3, param=None)`