Skip to content

Ontix Module

Ontix

Bases: BasePipeline

Ontix specific version of the BasePipeline class.

Inherits preprocess, fit, predict, evaluate, and visualize methods from BasePipeline.

This class extends BasePipeline. See the parent class for a full list of attributes and methods.

Additional Attributes

_default_config: Is set to OntixConfig here.

Source code in src/autoencodix/ontix.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
class Ontix(BasePipeline):
    """Ontix specific version of the BasePipeline class.

    Inherits preprocess, fit, predict, evaluate, and visualize methods from BasePipeline.

    This class extends BasePipeline. See the parent class for a full list
    of attributes and methods.

    Additional Attributes:
        _default_config: Is set to OntixConfig here.

    """

    def __init__(
        self,
        ontologies: Union[Tuple, List],  # Addition to Varix, mandotory for Ontix
        sep: Optional[str] = "\t",  # Addition to Varix, optional to read in ontologies
        data: Optional[Union[DataPackage, DatasetContainer]] = None,
        trainer_type: Type[BaseTrainer] = OntixTrainer,
        dataset_type: Type[BaseDataset] = NumericDataset,
        model_type: Type[BaseAutoencoder] = OntixArchitecture,
        loss_type: Type[BaseLoss] = VarixLoss,
        preprocessor_type: Type[BasePreprocessor] = GeneralPreprocessor,
        visualizer: Type[BaseVisualizer] = GeneralVisualizer,
        evaluator: Optional[Type[BaseEvaluator]] = GeneralEvaluator,
        result: Optional[Result] = None,
        datasplitter_type: Type[DataSplitter] = DataSplitter,
        custom_splits: Optional[Dict[str, np.ndarray]] = None,
        config: Optional[DefaultConfig] = None,
    ) -> None:
        """Initialize Ontix pipeline with customizable components.

        Some components are passed as types rather than instances because they require
        data that is only available after preprocessing.

        See parent class for full list of Arguments.

        Raises:
            TypeError: if ontologies are not a Tuple or List.

        """
        self._default_config = OntixConfig()
        if isinstance(ontologies, tuple):
            self.ontologies = ontologies
        elif isinstance(ontologies, list):
            if sep is None:
                raise ValueError(
                    "If ontologies are provided as a list, the seperator 'sep' cannot be None. "
                )
            ontologies_dict_list = [
                self._read_ont_file(ont_file, sep=sep) for ont_file in ontologies
            ]
            self.ontologies = tuple(ontologies_dict_list)
        else:
            raise TypeError(
                f"Expected ontologies to be of type tuple or list, got {type(ontologies)}."
            )

        super().__init__(
            data=data,
            dataset_type=dataset_type,
            trainer_type=trainer_type,
            model_type=model_type,
            loss_type=loss_type,
            preprocessor_type=preprocessor_type,
            visualizer=visualizer,
            evaluator=evaluator,
            result=result,
            datasplitter_type=datasplitter_type,
            config=config,
            custom_split=custom_splits,
            ontologies=self.ontologies,
        )
        if not isinstance(self.config, OntixConfig):
            raise TypeError(
                f"For Ontix Pipeline, we only allow OntixConfig as type for config, got {type(self.config)}"
            )

    def _read_ont_file(self, file_path: str, sep: str = "\t") -> dict:
        """Function to read-in text files of ontologies with format child - separator - parent into an dictionary.

        Args:
            file_path: Path to file with ontology
            sep: Separator used in file
        Returns:
            ont_dic: Dictionary containing the ontology as described in the text file.

        """
        ont_dic = dict()
        with open(file_path, "r") as ont_file:
            for line in ont_file:
                id_parent = line.strip().split(sep)[1]
                id_child = line.split(sep)[0]

                if id_parent in ont_dic:
                    ont_dic[id_parent].append(id_child)
                else:
                    ont_dic[id_parent] = list()
                    ont_dic[id_parent].append(id_child)

        return ont_dic

__init__(ontologies, sep='\t', data=None, trainer_type=OntixTrainer, dataset_type=NumericDataset, model_type=OntixArchitecture, loss_type=VarixLoss, preprocessor_type=GeneralPreprocessor, visualizer=GeneralVisualizer, evaluator=GeneralEvaluator, result=None, datasplitter_type=DataSplitter, custom_splits=None, config=None)

Initialize Ontix pipeline with customizable components.

Some components are passed as types rather than instances because they require data that is only available after preprocessing.

See parent class for full list of Arguments.

Raises:

Type Description
TypeError

if ontologies are not a Tuple or List.

Source code in src/autoencodix/ontix.py
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
def __init__(
    self,
    ontologies: Union[Tuple, List],  # Addition to Varix, mandotory for Ontix
    sep: Optional[str] = "\t",  # Addition to Varix, optional to read in ontologies
    data: Optional[Union[DataPackage, DatasetContainer]] = None,
    trainer_type: Type[BaseTrainer] = OntixTrainer,
    dataset_type: Type[BaseDataset] = NumericDataset,
    model_type: Type[BaseAutoencoder] = OntixArchitecture,
    loss_type: Type[BaseLoss] = VarixLoss,
    preprocessor_type: Type[BasePreprocessor] = GeneralPreprocessor,
    visualizer: Type[BaseVisualizer] = GeneralVisualizer,
    evaluator: Optional[Type[BaseEvaluator]] = GeneralEvaluator,
    result: Optional[Result] = None,
    datasplitter_type: Type[DataSplitter] = DataSplitter,
    custom_splits: Optional[Dict[str, np.ndarray]] = None,
    config: Optional[DefaultConfig] = None,
) -> None:
    """Initialize Ontix pipeline with customizable components.

    Some components are passed as types rather than instances because they require
    data that is only available after preprocessing.

    See parent class for full list of Arguments.

    Raises:
        TypeError: if ontologies are not a Tuple or List.

    """
    self._default_config = OntixConfig()
    if isinstance(ontologies, tuple):
        self.ontologies = ontologies
    elif isinstance(ontologies, list):
        if sep is None:
            raise ValueError(
                "If ontologies are provided as a list, the seperator 'sep' cannot be None. "
            )
        ontologies_dict_list = [
            self._read_ont_file(ont_file, sep=sep) for ont_file in ontologies
        ]
        self.ontologies = tuple(ontologies_dict_list)
    else:
        raise TypeError(
            f"Expected ontologies to be of type tuple or list, got {type(ontologies)}."
        )

    super().__init__(
        data=data,
        dataset_type=dataset_type,
        trainer_type=trainer_type,
        model_type=model_type,
        loss_type=loss_type,
        preprocessor_type=preprocessor_type,
        visualizer=visualizer,
        evaluator=evaluator,
        result=result,
        datasplitter_type=datasplitter_type,
        config=config,
        custom_split=custom_splits,
        ontologies=self.ontologies,
    )
    if not isinstance(self.config, OntixConfig):
        raise TypeError(
            f"For Ontix Pipeline, we only allow OntixConfig as type for config, got {type(self.config)}"
        )