Skip to content

Pydantic config models


Parse and validate training and evaluate parameters from shell & config files

KerasModelConfig

Bases: BaseModel

Config for a keras Model consisting of several Keras Layers

Source code in conftrainer/configs/base.py
class KerasModelConfig(BaseModel):
    """Config for a keras Model consisting of several Keras Layers"""
    name: str = Field(description="name of the network", default="classifier")
    layers: List[ObjInitConfig] = Field(description="list of layers to create",
                                        default_factory=list)

ObjInitConfig

Bases: BaseModel

Config for initialize an instance of given class with given args

Source code in conftrainer/configs/base.py
class ObjInitConfig(BaseModel):
    """Config for initialize an instance of given class with given args"""
    name: str = Field(description="Cls of the instance to initialize")
    args: Dict[str, Any] = Field(description="Arguments to pass to __init__",
                                 default_factory=dict)

YAMLParser

Bases: BaseModel

Abstract class on top of BaseModel

Source code in conftrainer/configs/base.py
class YAMLParser(BaseModel):
    """
    Abstract class on top of BaseModel
    """

    @classmethod
    def parse_yaml(cls, file_path: str, encoding: str = "utf-8") -> type(BaseModel):
        """
        Parse and validate arguments from a .yaml file

        Parameters
        ----------
        file_path : str
            path to the yaml file
        encoding : str, optional, default:utf-8
            encoding to use when opening the file

        Returns
        -------
        out : TrainConfig
            validated model
        """
        with open(file_path, "r+", encoding=encoding) as file:
            obj = yaml.safe_load(file)
        return cls.parse_obj(obj)

    @classmethod
    def shell_parser(cls, **kwargs):
        """Create a argument parser from models fields"""
        parser = argparse.ArgumentParser()
        parser.add_argument("--config_path", type=str, default=None,
                            help="Path to yaml config file to parse the arguments from")
        add_parser_groups_from_model(parser, model=cls, **kwargs)
        return parser

    @classmethod
    def parse_from_shell(cls, **parser_kwargs) -> type(BaseModel):
        """Parse the model arguments from shell"""
        parser = cls.shell_parser(**parser_kwargs)
        argcomplete.autocomplete(parser)
        args = parser.parse_args()
        # Create a nested dict containing a dict for each arg group
        arg_groups = {group.title: {a.dest.rsplit(".", 1)[-1]: getattr(args, a.dest, None)
                                    for a in group._group_actions}
                      for group in parser._action_groups}

        config = cls.merge_args(shell_args=arg_groups,
                                config_path=args.config_path)
        return config

    @classmethod
    def merge_args(cls, shell_args: dict, config_path: str) -> type(BaseModel):
        """
        Merge parsed shell and config file arguments. If parameter is present in shell arguments,
        it will override the one from config file

        Parameters
        ----------
        shell_args : Namespace
            arguments parsed from shell via argparse
        config_path : str
            path to load a config file

        Returns
        -------
        out : YAMLParser
            pydantic model with merged args
        """

        if config_path is not None:
            try:
                yaml_config = cls.parse_yaml(file_path=config_path)
            except ParserError as exc:
                raise ParserError("Please provide valid yaml path to config_path arg") from exc
            config = yaml_config.fill_args(fill_dict=shell_args)
        else:
            config = cls.parse_obj(shell_args)

        return config

    @staticmethod
    def update_dict(old_dict: dict, new_dict: dict) -> dict:
        """
        Fill non-null values of one dictionary to another.

        Returns
        -------
        out : dict
            updated dictionary
        """
        if new_dict is None:
            return old_dict
        updated = old_dict.copy()
        for k in old_dict:
            shell_arg = new_dict.get(k, None)
            if shell_arg is not None:
                updated[k] = shell_arg

        return updated

    def fill_args(self, fill_dict: Dict[Any, Dict]) -> BaseModel:
        """
        Fill the model with given potentially nested dictionary's values.

        Parameters
        ----------
        fill_dict : Dict[Any, Dict]
            nested dictionary with arguments

        Returns
        -------
        out : BaseModel
            Pydantic Model
        """
        updated = self.dict().copy()
        for k, v in self.dict().items():
            new_value = fill_dict.get(k, None)
            if isinstance(v, dict):
                updated[k] = self.update_dict(old_dict=self.dict()[k], new_dict=new_value)
            else:
                updated[k] = new_value if new_value else v
        return self.parse_obj(obj=updated)

    @classmethod
    def generate_dummy_model(cls):
        """
        Generate a dict with field names as keys and null as values. If the type of any field is a
        model or a Union of models, it will be treated as a model. This work only to 1 level
        deep, i.e. if subfield of any subfield is a model too, we'll still assign it None
        """
        dummy_model = {}
        fields_data = cls.__fields__
        for field_name, field_data in fields_data.items():
            fill_single_field(dummy_model, field_name, field_data)

        return dummy_model

fill_args(fill_dict)

Fill the model with given potentially nested dictionary's values.

Parameters:

Name Type Description Default
fill_dict Dict[Any, Dict]

nested dictionary with arguments

required

Returns:

Name Type Description
out BaseModel

Pydantic Model

Source code in conftrainer/configs/base.py
def fill_args(self, fill_dict: Dict[Any, Dict]) -> BaseModel:
    """
    Fill the model with given potentially nested dictionary's values.

    Parameters
    ----------
    fill_dict : Dict[Any, Dict]
        nested dictionary with arguments

    Returns
    -------
    out : BaseModel
        Pydantic Model
    """
    updated = self.dict().copy()
    for k, v in self.dict().items():
        new_value = fill_dict.get(k, None)
        if isinstance(v, dict):
            updated[k] = self.update_dict(old_dict=self.dict()[k], new_dict=new_value)
        else:
            updated[k] = new_value if new_value else v
    return self.parse_obj(obj=updated)

generate_dummy_model() classmethod

Generate a dict with field names as keys and null as values. If the type of any field is a model or a Union of models, it will be treated as a model. This work only to 1 level deep, i.e. if subfield of any subfield is a model too, we'll still assign it None

Source code in conftrainer/configs/base.py
@classmethod
def generate_dummy_model(cls):
    """
    Generate a dict with field names as keys and null as values. If the type of any field is a
    model or a Union of models, it will be treated as a model. This work only to 1 level
    deep, i.e. if subfield of any subfield is a model too, we'll still assign it None
    """
    dummy_model = {}
    fields_data = cls.__fields__
    for field_name, field_data in fields_data.items():
        fill_single_field(dummy_model, field_name, field_data)

    return dummy_model

merge_args(shell_args, config_path) classmethod

Merge parsed shell and config file arguments. If parameter is present in shell arguments, it will override the one from config file

Parameters:

Name Type Description Default
shell_args Namespace

arguments parsed from shell via argparse

required
config_path str

path to load a config file

required

Returns:

Name Type Description
out YAMLParser

pydantic model with merged args

Source code in conftrainer/configs/base.py
@classmethod
def merge_args(cls, shell_args: dict, config_path: str) -> type(BaseModel):
    """
    Merge parsed shell and config file arguments. If parameter is present in shell arguments,
    it will override the one from config file

    Parameters
    ----------
    shell_args : Namespace
        arguments parsed from shell via argparse
    config_path : str
        path to load a config file

    Returns
    -------
    out : YAMLParser
        pydantic model with merged args
    """

    if config_path is not None:
        try:
            yaml_config = cls.parse_yaml(file_path=config_path)
        except ParserError as exc:
            raise ParserError("Please provide valid yaml path to config_path arg") from exc
        config = yaml_config.fill_args(fill_dict=shell_args)
    else:
        config = cls.parse_obj(shell_args)

    return config

parse_from_shell(**parser_kwargs) classmethod

Parse the model arguments from shell

Source code in conftrainer/configs/base.py
@classmethod
def parse_from_shell(cls, **parser_kwargs) -> type(BaseModel):
    """Parse the model arguments from shell"""
    parser = cls.shell_parser(**parser_kwargs)
    argcomplete.autocomplete(parser)
    args = parser.parse_args()
    # Create a nested dict containing a dict for each arg group
    arg_groups = {group.title: {a.dest.rsplit(".", 1)[-1]: getattr(args, a.dest, None)
                                for a in group._group_actions}
                  for group in parser._action_groups}

    config = cls.merge_args(shell_args=arg_groups,
                            config_path=args.config_path)
    return config

parse_yaml(file_path, encoding='utf-8') classmethod

Parse and validate arguments from a .yaml file

Parameters:

Name Type Description Default
file_path str

path to the yaml file

required
encoding str, optional, default:utf-8

encoding to use when opening the file

'utf-8'

Returns:

Name Type Description
out TrainConfig

validated model

Source code in conftrainer/configs/base.py
@classmethod
def parse_yaml(cls, file_path: str, encoding: str = "utf-8") -> type(BaseModel):
    """
    Parse and validate arguments from a .yaml file

    Parameters
    ----------
    file_path : str
        path to the yaml file
    encoding : str, optional, default:utf-8
        encoding to use when opening the file

    Returns
    -------
    out : TrainConfig
        validated model
    """
    with open(file_path, "r+", encoding=encoding) as file:
        obj = yaml.safe_load(file)
    return cls.parse_obj(obj)

shell_parser(**kwargs) classmethod

Create a argument parser from models fields

Source code in conftrainer/configs/base.py
@classmethod
def shell_parser(cls, **kwargs):
    """Create a argument parser from models fields"""
    parser = argparse.ArgumentParser()
    parser.add_argument("--config_path", type=str, default=None,
                        help="Path to yaml config file to parse the arguments from")
    add_parser_groups_from_model(parser, model=cls, **kwargs)
    return parser

update_dict(old_dict, new_dict) staticmethod

Fill non-null values of one dictionary to another.

Returns:

Name Type Description
out dict

updated dictionary

Source code in conftrainer/configs/base.py
@staticmethod
def update_dict(old_dict: dict, new_dict: dict) -> dict:
    """
    Fill non-null values of one dictionary to another.

    Returns
    -------
    out : dict
        updated dictionary
    """
    if new_dict is None:
        return old_dict
    updated = old_dict.copy()
    for k in old_dict:
        shell_arg = new_dict.get(k, None)
        if shell_arg is not None:
            updated[k] = shell_arg

    return updated

options: docstring_style: numpy members_order: source


Network specific pydantic models

BYOLConfig

Bases: NetConfig

Configuration for a BYOL object

Source code in conftrainer/configs/networks.py
class BYOLConfig(NetConfig):
    """Configuration for a BYOL object"""
    framework: Literal["BYOL"] = Field(default="BYOL", description="Framework to use")
    initial_beta: confloat(ge=0, le=1) = Field(default=0.996, description="Initial beta to mix "
                                                                          "target and online nets")
    update_frequency: int = Field(default=1, description="Frequency to update the target net")
    online_aug_args: KerasModelConfig = Field(default=BYOL_ONLINE_AUGMENTOR,
                                              description="Online augmentor config")
    target_aug_args: KerasModelConfig = Field(default=BYOL_TARGET_AUGMENTOR,
                                              description="Target augmentor config")
    mlp_args: KerasModelConfig = Field(default=BYOL_MLP, description="MLP arguments for BYOLs "
                                                                     "online and target branches")

    @property
    def task_type(self):
        """Define task type for BYOL network. Its always byol"""
        return "byol"

task_type property

Define task type for BYOL network. Its always byol

Backbone

Bases: BaseModel

Configurations for a backbone with hidden layers on top of it

Source code in conftrainer/configs/networks.py
class Backbone(BaseModel):
    """Configurations for a backbone with hidden layers on top of it"""
    name: Optional[str] = Field(default='ResNet50',
                                description="Name of the backbone to load from "
                                            "tf.keras.applications, or kecam. Is overriden by "
                                            "load_path")
    params: Optional[dict] = Field(default=KERAS_APPLICATION_PARAMS)
    cut_from: Optional[str] = Field(default=None, description="Name of the layer to cut the head of the backbone "
                                                              "from. Useful when replacing some last conv blocks.")
    load_path: Optional[str] = Field(default=None, description="Path to load a backbone from disk")

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        if self.load_path is not None:
            self.name = None
            self.params = None

BranchConfig

Bases: BaseModel

Cnfiguraton for a single branch of multibranch network

Source code in conftrainer/configs/networks.py
class BranchConfig(BaseModel):
    """Cnfiguraton for a single branch of multibranch network"""
    name: str = Field(default="branch1", description="Name of the branch. Will be used as an output layer name")
    loss_and_metrics: LossAndMetrics = Field(default_factory=LossAndMetrics,
                                             description="Configuration for losses and metrics")
    classes: List[str] = Field(default_factory=list)
    task_type: Literal['multiclass', 'multilabel'] = 'multiclass'
    mlp: Optional[KerasModelConfig] = Field(default_factory=KerasModelConfig)

    @property
    def num_classes(self):
        """Total number of trainable classes. Will be used to generate the output layer"""
        return len(self.classes)

num_classes property

Total number of trainable classes. Will be used to generate the output layer

ClassifierConfig

Bases: BaseClassifierConfig

Configuration for a classifier object

Source code in conftrainer/configs/networks.py
class ClassifierConfig(BaseClassifierConfig):
    """Configuration for a classifier object"""
    framework: Literal["Classifier"] = Field(default='Classifier', description="Framework to use")
    task_type: Literal["multilabel", "multiclass"] = Field(default='multilabel',
                                                           description="Type of the task")
    num_classes: Optional[conint(ge=0)] = Field(default=0,
                                                description="Number of classes in final classification layer",
                                                exclude=True)

MultiBranchNetworkConfig

Bases: BaseClassifierConfig

Configuration for creating a multibranch network

Source code in conftrainer/configs/networks.py
class MultiBranchNetworkConfig(BaseClassifierConfig):
    """Configuration for creating a multibranch network"""
    branches: List[BranchConfig] = Field(default_factory=list, description="Configurations for output "
                                                                           "branches of the network")

    @property
    def task_type(self):
        """Task type to use in enums"""
        return 'multibranch'

task_type property

Task type to use in enums

NetConfig

Bases: BaseModel

Config for building/loading a network

Source code in conftrainer/configs/networks.py
class NetConfig(BaseModel):
    """
    Config for building/loading a network
    """
    framework: str = Field(default='', description="Framework to use")
    backbone: Optional[Backbone] = Field(default_factory=Backbone, description="Backbone configuration")
    name: str = Field(default='test_model', description="Name of the network")
    input_shape: conlist(item_type=conint(ge=0)) = Field(default=[224, 224, 3],
                                                         description="Input shape of the network")
    mlp_args: Optional[KerasModelConfig] = Field(default=KerasModelConfig(),
                                                 description="Classification head configuration")

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        if self.backbone is not None:
            if isinstance(self.backbone.params, dict):
                self.backbone.params['input_shape'] = self.input_shape

PreprocessingConfig

Bases: BaseModel

Configuration for creating a preprocessing layer

Source code in conftrainer/configs/networks.py
class PreprocessingConfig(BaseModel):
    """
    Configuration for creating a preprocessing layer
    """
    scale: Optional[float] = Field(description="Scale factor for input. All pixels "
                                               "will be divided by this number")
    offset: Optional[float] = Field(description="Offset to apply to inputs after "
                                                "rescaling")
    mean: Optional[Union[List, float]] = Field(description="Mean value to use during "
                                                           "normalization")
    variance: Optional[Union[List, float]] = Field(description="Variance to use during"
                                                               " normalization")
    name: Optional[str] = Field(description="the name of the block", default='preprocessor')

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        if self.scale:
            if self.scale > 1:
                self.scale = 1 / self.scale

options: docstring_style: numpy members_order: source


LossAndMetrics

Bases: BaseModel

Configuration for the loss & metrics for training

Source code in conftrainer/configs/optimization.py
class LossAndMetrics(BaseModel):
    """Configuration for the loss & metrics for training"""
    loss: str = Field(default='', description="Name of the loss function to import from " \
                                              "tensorflow, "
                                              "tensorflow addons or losses.py of this project")
    loss_kwargs: dict = Field(default_factory=dict,
                              description="Keyword arguments for the imported "
                                          "loss")

    metrics: List[ObjInitConfig] = Field(default_factory=list,
                                         description="Metrics to monitor during training. Each "
                                                     "item should be a name of the metric to "
                                                     "import as key and it's parameters dict as "
                                                     "value")
    loss_weight: int = Field(default=1, description="Weight of the loss in the total loss. Usable only when "
                                                    "there are multiple losses")

OptimizationConfig

Bases: BaseModel

Configuration for training loop

Source code in conftrainer/configs/optimization.py
class OptimizationConfig(BaseModel):
    """
    Configuration for training loop
    """
    optimizer: str = Field(default="Adam", description="Name of the optimizer to import from "
                                                       "tensorflow or tensorflow addons")
    optimizer_kwargs: dict = Field(default_factory=dict,
                                   description="Keyword arguments for imported optimizer")
    learning_rate: confloat(ge=0) = Field(default=0.001, description="Initial learning rate to use")
    schedule: Optional[str] = Field(description="Name of the learning rate scheduler to use if any")
    schedule_kwargs: dict = Field(default_factory=dict,
                                  description="Keyword arguments for the scheduler")
    batch_size: conint(ge=0) = Field(default=32, description="Size of the batch to feed the "
                                                             "network")
    epochs: conint(ge=0) = Field(default=50, description="Maximal number of epochs to train the "
                                                         "network")
    fit_kwargs: dict = Field(default_factory=dict,
                             description="Additional arguments for model.fit, "
                                         "like validation step number")

options: docstring_style: numpy members_order: source


Parse and validate training and evaluate parameters from shell & config files

BYOLTrainConfig

Bases: TrainConfig

Configuration for byol training

Source code in conftrainer/configs/training.py
class BYOLTrainConfig(TrainConfig):
    """Configuration for byol training"""
    net_config: BYOLConfig = Field(default_factory=BYOLConfig,
                                   description="BYOL network configuration")
    loss_and_metrics: LossAndMetrics = Field(default=DefaultLossConfig.BYOL.value)

BaseTrainConfig

Bases: YAMLParser

Basic train configuration

Source code in conftrainer/configs/training.py
class BaseTrainConfig(YAMLParser):
    """Basic train configuration"""

    net_config: Any = Field(description="Configuration for Network architecture")
    loss_and_metrics: LossAndMetrics = Field(default_factory=LossAndMetrics,
                                             description="Configuration for loss and metrics")
    pretrain_config: CNNOptimizationConfig = Field(default_factory=CNNOptimizationConfig,
                                                   description="Configurations of pretraining phase")

    finetune_config: FinetuneConfig = Field(default_factory=FinetuneConfig,
                                            description="Configurations for finetuning phase")
    callbacks_config: CallbacksConfig = Field(default_factory=CallbacksConfig,
                                              description="Configuration for creating callbacks")
    data_config: OneOutputDataConfig = Field(default_factory=OneOutputDataConfig,
                                             description="Configuration of creating datasets")
    save_config: SaveLogConfig = Field(default_factory=SaveLogConfig,
                                       description="Configuration of save paths for network and "
                                                   "its logs")

    def __init__(self, **kwargs):
        """Copy the missing fields from pretraining config to fine tuning config"""
        super().__init__(**kwargs)
        pt_conf = self.pretrain_config.dict().copy()
        ft_conf = self.finetune_config.dict().copy()
        pt_conf.update(ft_conf)
        self.finetune_config = FinetuneConfig(**pt_conf)

    @classmethod
    def parse_from_shell(cls, delim=".") -> type(BaseModel):
        """Parse and validate the arguments, and save the configuration copy in the directory to save the network"""
        config = super().parse_from_shell(delim=delim)
        config.postprocess()

        return config

    def postprocess(self):
        """Postprocess the configuration by adding a timestamp to model save dir name"""
        timestamp = int(time())
        model_save_folder = os.path.join(self.save_config.base_save_path,
                                         f"{self.net_config.name}_{timestamp}")
        self.save_config.set_save_paths(model_folder=model_save_folder)

    def save(self, filepath: str = None):
        """Save the configuration to given filepath"""
        if filepath is None:
            filepath = os.path.join(self.save_config.model_folder, "train_config.yaml")
        os.makedirs(self.save_config.model_folder, exist_ok=True)
        save_yaml(self.dict(), filepath=filepath)

__init__(**kwargs)

Copy the missing fields from pretraining config to fine tuning config

Source code in conftrainer/configs/training.py
def __init__(self, **kwargs):
    """Copy the missing fields from pretraining config to fine tuning config"""
    super().__init__(**kwargs)
    pt_conf = self.pretrain_config.dict().copy()
    ft_conf = self.finetune_config.dict().copy()
    pt_conf.update(ft_conf)
    self.finetune_config = FinetuneConfig(**pt_conf)

parse_from_shell(delim='.') classmethod

Parse and validate the arguments, and save the configuration copy in the directory to save the network

Source code in conftrainer/configs/training.py
@classmethod
def parse_from_shell(cls, delim=".") -> type(BaseModel):
    """Parse and validate the arguments, and save the configuration copy in the directory to save the network"""
    config = super().parse_from_shell(delim=delim)
    config.postprocess()

    return config

postprocess()

Postprocess the configuration by adding a timestamp to model save dir name

Source code in conftrainer/configs/training.py
def postprocess(self):
    """Postprocess the configuration by adding a timestamp to model save dir name"""
    timestamp = int(time())
    model_save_folder = os.path.join(self.save_config.base_save_path,
                                     f"{self.net_config.name}_{timestamp}")
    self.save_config.set_save_paths(model_folder=model_save_folder)

save(filepath=None)

Save the configuration to given filepath

Source code in conftrainer/configs/training.py
def save(self, filepath: str = None):
    """Save the configuration to given filepath"""
    if filepath is None:
        filepath = os.path.join(self.save_config.model_folder, "train_config.yaml")
    os.makedirs(self.save_config.model_folder, exist_ok=True)
    save_yaml(self.dict(), filepath=filepath)

ClassifierTrainConfig

Bases: TrainConfig

Configuration for classifier training

Source code in conftrainer/configs/training.py
class ClassifierTrainConfig(TrainConfig):
    """Configuration for classifier training"""
    net_config: ClassifierConfig = Field(default_factory=ClassifierConfig,
                                         description="Classifier configuration")
    loss_and_metrics: LossAndMetrics = Field(default=DefaultLossConfig.CLASSIFIER.value,
                                             description="Losses and metrics")

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.net_config.num_classes = len(self.data_config.trainable_classes)

DefaultLossConfig

Bases: Enum

Define default losses based on task type

Source code in conftrainer/configs/training.py
class DefaultLossConfig(Enum):
    """Define default losses based on task type"""
    CLASSIFIER = LossAndMetrics(loss='BinaryCrossentropy')
    BYOL = LossAndMetrics(loss='BYOLLoss')

MultiBranchTrainConfig

Bases: BaseTrainConfig

Load arguments for model training

Source code in conftrainer/configs/training.py
class MultiBranchTrainConfig(BaseTrainConfig):
    """
    Load arguments for model training
    """
    net_config: MultiBranchNetworkConfig = Field(default_factory=MultiBranchNetworkConfig,
                                                 description="Configuration to train a multibranch network")
    data_config: MultiOutputDataConfig = Field(default_factory=MultiOutputDataConfig,
                                               description="Configuration of creating datasets")
    loss_and_metrics: None = Field(default=None, exclude=True)

SaveLogConfig

Bases: BaseModel

Config for saving the net and its logs

Source code in conftrainer/configs/training.py
class SaveLogConfig(BaseModel):
    """
    Config for saving the net and its logs
    """

    base_save_path: str = Field(default="trained_models/",
                                description="Path to the root folder to save the model and "
                                            "related files")
    aim_log_path: str = Field(default="aim_logs/",
                              description="Path to the root folder to save aim logs")
    _best_checkpoint_path: Optional[str] = Field(None, description="Path to the folder to save the network "
                                                                   "and related files. If not given, "
                                                                   "it will be generated based on base path "
                                                                   "and networks name")
    _model_folder: Optional[str] = None
    _aim_experiment_name: Optional[str] = None

    class Config:
        """Make use of _underscore notation to mark private attributes"""
        underscore_attrs_are_private = True

    @property
    def model_folder(self):
        """The folder where the model, logs and reports will be stored"""
        return self._model_folder

    @property
    def aim_experiment_name(self):
        """Experiment name for aim log tracking"""
        return self._aim_experiment_name

    @property
    def best_checkpoint_path(self):
        """the best checkpoint path"""
        return self._best_checkpoint_path

    # @model_folder.setter will not work. See github.com/samuelcolvin/pydantic/issues/1577
    def set_save_paths(self, model_folder):
        """Set the private attributes, like the folder where the model will be saved"""
        self._model_folder = model_folder
        self._best_checkpoint_path = os.path.join(model_folder, "checkpoint")
        self._aim_experiment_name = model_folder.rsplit(os.sep, 1)[-1]

aim_experiment_name property

Experiment name for aim log tracking

best_checkpoint_path property

the best checkpoint path

model_folder property

The folder where the model, logs and reports will be stored

Config

Make use of _underscore notation to mark private attributes

Source code in conftrainer/configs/training.py
class Config:
    """Make use of _underscore notation to mark private attributes"""
    underscore_attrs_are_private = True

set_save_paths(model_folder)

Set the private attributes, like the folder where the model will be saved

Source code in conftrainer/configs/training.py
def set_save_paths(self, model_folder):
    """Set the private attributes, like the folder where the model will be saved"""
    self._model_folder = model_folder
    self._best_checkpoint_path = os.path.join(model_folder, "checkpoint")
    self._aim_experiment_name = model_folder.rsplit(os.sep, 1)[-1]

TrainConfig

Bases: BaseTrainConfig

Load arguments for model training

Source code in conftrainer/configs/training.py
class TrainConfig(BaseTrainConfig):
    """
    Load arguments for model training
    """

    net_config: Union[ClassifierConfig, BYOLConfig] = \
        Field(description="Configuration for Network architecture", discriminator="framework")

    def validate_with_submodel(self) -> type(BaseModel):
        """Validate the pydantic model using the framework-specific child model, i.e. if framework
        is byol, validate with byol-specific model"""
        discriminator = self.net_config.framework.upper()
        model_to_validate = getattr(TrainConfigEnum, discriminator).value
        config = model_to_validate(**self.dict(exclude_unset=True, exclude_defaults=True))
        return config

    @classmethod
    def parse_from_shell(cls, delim=".") -> type(BaseModel):
        """Parse and validate the arguments, and save the configuration copy in the directory to save the network"""
        config = super().parse_from_shell(delim=delim)
        config = config.validate_with_submodel()
        config.postprocess()

        return config

parse_from_shell(delim='.') classmethod

Parse and validate the arguments, and save the configuration copy in the directory to save the network

Source code in conftrainer/configs/training.py
@classmethod
def parse_from_shell(cls, delim=".") -> type(BaseModel):
    """Parse and validate the arguments, and save the configuration copy in the directory to save the network"""
    config = super().parse_from_shell(delim=delim)
    config = config.validate_with_submodel()
    config.postprocess()

    return config

validate_with_submodel()

Validate the pydantic model using the framework-specific child model, i.e. if framework is byol, validate with byol-specific model

Source code in conftrainer/configs/training.py
def validate_with_submodel(self) -> type(BaseModel):
    """Validate the pydantic model using the framework-specific child model, i.e. if framework
    is byol, validate with byol-specific model"""
    discriminator = self.net_config.framework.upper()
    model_to_validate = getattr(TrainConfigEnum, discriminator).value
    config = model_to_validate(**self.dict(exclude_unset=True, exclude_defaults=True))
    return config

TrainConfigEnum

Bases: Enum

Enum to pick a train config based on framework

Source code in conftrainer/configs/training.py
class TrainConfigEnum(Enum):
    """Enum to pick a train config based on framework"""
    BYOL = BYOLTrainConfig
    CLASSIFIER = ClassifierTrainConfig
    MULTIBRANCH = MultiBranchTrainConfig

options: docstring_style: numpy members_order: source


InferConfig

Bases: YAMLParser

Configurations for inference

Source code in conftrainer/configs/inference.py
class InferConfig(YAMLParser):
    """Configurations for inference"""
    network_path: str = Field(description="Path to the model to load")
    data_dir: str = Field(default='images/', description="Path to the directory containing images")
    extensions: Optional[List[str]] = Field(default=None,
                                            description="Extensions to load from the dir")
    csv_save_path: str = Field(description="Destination to save the prediction dataframe")
    input_shape: List[int] = Field(default=None, description="Input shape of the network. If not "
                                                             "provided, the script will try to "
                                                             "infer it from the network")
    class_names: List[str] = Field(default_factory=list, description="Names of the classes to use "
                                                                     "as dataframe columns. Must "
                                                                     "be in the same order as "
                                                                     "the outputs of the original "
                                                                     "network")
    batch_size: int = Field(default=32, description="Batch size to use during inference")

options: docstring_style: numpy members_order: source


Parse and validate training and evaluate parameters from shell & config files

EvalConfigBase

Bases: YAMLParser

Load arguments for evaluation

Source code in conftrainer/configs/eval.py
class EvalConfigBase(YAMLParser):
    """
    Load arguments for evaluation
    """
    base_dir: Optional[str] = Field(description="Base directory to load and evaluate the networks. "
                                                "If given, model paths parameter can be omitted: "
                                                "script will read all nets from this directory")
    model_paths: Optional[List[str]] = Field(description="List of network paths to load and "
                                                         "evaluate. Can be omitted if base_dir is "
                                                         "given")
    test_csv_path: str = Field(description="Path to the csv file to create a dataset")
    name_col: str = Field(default="Name",
                          description="Name of the column containing filenames in csv")

    batch_size: conint(ge=0) = Field(default=32, description="Batch size to use")
    data_dir: str = Field(default='images/', description="Directory to load the data from")
    clean_dataset: bool = Field(default=False, description="Whether to clean the dataset before "
                                                           "running the main script")
    report_filename: str = Field(description="Name of the file to save the report on")
    input_shape: Optional[List[int]] = Field(description="Input shape of the networks. Used only if it cant be "
                                                         "inferred from the network")

    @classmethod
    def parse_from_shell(cls) -> type(YAMLParser):
        """Parse and validate the arguments, and save the configuration copy in the directory to save the network"""

        config = super().parse_from_shell(delim='', group_name='', depth=0)
        if not config.model_paths:
            model_names = os.listdir(config.base_dir)
            model_paths = [os.path.join(config.base_dir, model_name) for model_name in model_names]
            config.model_paths = [os.path.join(path, "best_weights/") for path in model_paths]

        # generate unique path for saving a report
        timestamp = int(time())
        config.report_filename = f"{config.report_filename}_{timestamp}.json"

        return config

parse_from_shell() classmethod

Parse and validate the arguments, and save the configuration copy in the directory to save the network

Source code in conftrainer/configs/eval.py
@classmethod
def parse_from_shell(cls) -> type(YAMLParser):
    """Parse and validate the arguments, and save the configuration copy in the directory to save the network"""

    config = super().parse_from_shell(delim='', group_name='', depth=0)
    if not config.model_paths:
        model_names = os.listdir(config.base_dir)
        model_paths = [os.path.join(config.base_dir, model_name) for model_name in model_names]
        config.model_paths = [os.path.join(path, "best_weights/") for path in model_paths]

    # generate unique path for saving a report
    timestamp = int(time())
    config.report_filename = f"{config.report_filename}_{timestamp}.json"

    return config

MultiBranchEvalConfig

Bases: EvalConfigBase

Configurations for multibranch evaluation config

Source code in conftrainer/configs/eval.py
class MultiBranchEvalConfig(EvalConfigBase):
    """Configurations for multibranch evaluation config"""
    metrics: Dict[str, List[ObjInitConfig]] = Field(default_factory=dict,
                                                    description="Names and parameters of metrics per each task")
    per_task_data: List[SingleTaskDataConfig] = Field(default_factory=list,
                                                      description="Name and class names for each branch")

options: docstring_style: numpy members_order: source


MultipleNetModifyConfig

Bases: YAMLParser

Arguments for transforming several model at once

Source code in conftrainer/configs/modifications.py
class MultipleNetModifyConfig(YAMLParser):
    """Arguments for transforming several model at once"""
    transform_configs: List[NetModifyConfig] = Field(default_factory=lambda: [NetModifyConfig()],
                                                     description="List of modification "
                                                                 "configs "
                                                                 "for each network")
    serve_config_save_path: str = Field(description="Path to save the serve configuration for modified networks")

NetModifyConfig

Bases: BaseModel

Configuration for transforming a single network with multiple versions

Source code in conftrainer/configs/modifications.py
class NetModifyConfig(BaseModel):
    """Configuration for transforming a single network with multiple versions"""
    name: str = Field(default=None, description="Name of the network")
    save_base_path: str = Field(default='', description="Base save path")
    versions: List[VersionModifyConfig] = Field(default_factory=lambda: [VersionModifyConfig()],
                                                description="Modification Configurations for each version of the net")

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        for subconfig in self.versions:
            subconfig.save_path = os.path.join(self.save_base_path, str(subconfig.version))

RescaleArgs

Bases: BaseModel

Arguments for rescaling the inputs of NN

Source code in conftrainer/configs/modifications.py
class RescaleArgs(BaseModel):
    """Arguments for rescaling the inputs of NN"""
    scale: confloat(ge=1, le=255) = Field(default=1, description="Rescaling factor to divide all inputs by")
    offset: float = Field(default=0, description="Offset factor to add to all inputs after rescaling")

VersionModifyConfig

Bases: BaseModel

Load arguments for transforming a single model

Source code in conftrainer/configs/modifications.py
class VersionModifyConfig(BaseModel):
    """
    Load arguments for transforming a single model
    """
    version: int = Field(default=1, description="Number of version to transform")
    load_path: str = Field(default=None, description="Path of the weights to load relative to base_path")
    input_shape: Optional[conlist(item_type=int)] = Field(default=None,
                                                          description="Input shape of the network. If not provided, "
                                                                      "will be inferred from the net")
    input_func: Optional[str] = Field(default=None, description="Name of the input function to "
                                                                "apply on each input before passing to the net")
    output_func: Optional[str] = Field(default=None,
                                       description="Name of the output function to apply on networks' outputs")
    rescaling_args: Optional[RescaleArgs] = Field(default_factory=RescaleArgs, description="Preprocessing arguments")
    save_path: Optional[str] = Field(default="", description="Path to save the modified network")
    override: bool = Field(default=False, description="Whether to override existing models if present")

options: docstring_style: numpy members_order: source


CallbacksConfig

Bases: BaseModel

Configuration to define callbacks

Source code in conftrainer/configs/callbacks.py
class CallbacksConfig(BaseModel):
    """
    Configuration to define callbacks
    """
    monitor_base: str = Field(default = "val_loss",
                         description="The metric to monitor in base callbacks.")
    mode_base: str = Field(default="auto",
                           description="Decision whether we want to maximize or minimize the monitored metric")
    persistent: List[ObjInitConfig] = Field(default_factory=list,
                                            description="Callbacks to initialize "
                                                        "once and use during both"
                                                        "for pretraining and "
                                                        "fine tuning")
    other: List[ObjInitConfig] = Field(default_factory=list,
                                       description="Callbacks to redefine during fine tuning "
                                                   "with new parameters")

options: docstring_style: numpy members_order: source


CSVsConfig

Bases: BaseModel

Configuration for csv files to read the data from. Includes train, test and val

Source code in conftrainer/configs/data.py
class CSVsConfig(BaseModel):
    """Configuration for csv files to read the data from. Includes train, test and val"""
    train: str = Field(default='train.csv',
                       description="filename of csv with training data")
    val: Optional[str] = Field(default=None, description="filename of csv with validation data")
    test: Optional[str] = Field(default=None, description="filename of csv with test data")

DataConfigBase

Bases: BaseModel

Base configuration to read data

Source code in conftrainer/configs/data.py
class DataConfigBase(BaseModel):
    """Base configuration to read data"""
    data_dir: str = Field(default='images/',
                          description="Directory to read the data from")
    csvs: CSVsConfig = Field(default_factory=CSVsConfig,
                             description="CSV files to read train, validation and test data from")
    name_col: str = Field(default='Name',
                          description="Name of the column with image filenames in train/test/val "
                                      "csv files")
    clean_dataset: bool = Field(default=False,
                                description="Whether to clean the datasets before using. If "
                                            "true, all broken/missing image names will be removed "
                                            "from csv files")

MultiOutputDataConfig

Bases: DataConfigBase

Config for the data to feed a multioutput net

Source code in conftrainer/configs/data.py
class MultiOutputDataConfig(DataConfigBase):
    """
    Config for the data to feed a multioutput net
    """
    per_task_data: List[SingleTaskDataConfig] = Field(default_factory=list)

MultiOutputDatagenConfig

Bases: BaseModel

Configuration for a multi output datagen. Consists of the same filepaths with multiple label options

Source code in conftrainer/configs/data.py
class MultiOutputDatagenConfig(BaseModel):
    """Configuration for a multi output datagen. Consists of the same filepaths with multiple label options"""
    paths: List[str] = Field(default_factory=list, description="Filepaths for the data")
    per_task_data: List[SingleTaskDataConfig] = Field(default_factory=list,
                                                      description="Labels for each individual task")
    name: str = Field(default='datagen', description="Name of the datagen object to create")

OneOutputDataConfig

Bases: DataConfigBase

Config for the data to feed a single-output net

Source code in conftrainer/configs/data.py
class OneOutputDataConfig(DataConfigBase):
    """
    Config for the data to feed a single-output net
    """

    trainable_classes: Optional[conlist(item_type=str)] = \
        Field(default_factory=list, description="Names of classes to train the network on. "
                                                "Must match the names in csv files")

SingleTaskDataConfig

Bases: BaseModel

Configuration for a single task dataset

Source code in conftrainer/configs/data.py
class SingleTaskDataConfig(BaseModel):
    """Configuration for a single task dataset"""
    name: str = Field(default="task", description="Name of the task. Will be used in the data generator")
    classes: List[str] = Field(default_factory=list, description="Names of the trainable classes")
    labels: List = Field(default=None, exclude=True, description="label array for the task")
    task_type: Literal['multilabel', 'multiclass'] = Field(default=None, description="Type of the task. Used for "
                                                                                     "postprocessing net outputs")

options: docstring_style: numpy members_order: source


add_parser_groups_from_model(parser, model=None, fields=None, group_name=None, delim='.', depth=1)

Given the parser and a pydantic model, add arguments of a model to the parser. For each pydantic model in the nested structure of the model, a new group will be created (limited by argument depth).

Parameters:

Name Type Description Default
parser argparse.ArgumentParser

parser to add the groups to

required
model ModelMetaclass = None

pydantic model

None
fields Dict[str, ModelField]

fields to create arguments with. If not provided, they will be inferred from the model

None
group_name str = None

name of the group

None
delim str = "."

delimiter to use in arguments names. Arguments will be named group_name{delim}arg_name.

'.'
depth int

how deep to go when creating parsers from nested pydantic model

1
Source code in conftrainer/configs/utils.py
def add_parser_groups_from_model(
    parser: argparse.ArgumentParser, model: ModelMetaclass = None,
    fields: Dict = None, group_name: str = None, delim: str = '.',
    depth: int = 1
) -> None:
    """
    Given the parser and a pydantic model, add arguments of a model to the parser. For each
    pydantic model in the nested structure of the model, a new group will be created (limited by
    argument depth).

    Parameters
    ----------
    parser : argparse.ArgumentParser
        parser to add the groups to
    model : ModelMetaclass = None
        pydantic model
    fields : Dict[str, ModelField]
        fields to create arguments with. If not provided, they will be inferred from the model
    group_name : str = None
        name of the group
    delim : str = "."
        delimiter to use in arguments names. Arguments will be named group_name{delim}arg_name.
    depth: int = 1
        how deep to go when creating parsers from nested pydantic model
    """
    if not fields:
        fields = model.__fields__
    if group_name is None:
        group_name = model.__name__
    group = parser.add_argument_group(group_name)

    for field_name, field in fields.items():
        if depth and is_pydantic_model(field):
            add_parser_groups_from_model(parser=parser, model=field.type_, group_name=field_name,
                                         delim=delim, depth=depth - 1)
        elif depth and is_union_of_models(field):
            subfields = get_all_subfields(field)
            add_parser_groups_from_model(parser=parser, group_name=field_name,
                                         delim=delim, depth=depth - 1, fields=subfields)
        else:
            arg_name = f"{group_name}{delim}{field_name}"
            group.add_argument(
                f"--{arg_name}",
                dest=arg_name,
                type=get_parser_type(field.type_),
                default=None,
                help=field.field_info.description,
            )

fill_single_field(dict_to_fill, field_name, field_data)

Given a dictionary and a pydantic field information, fill the dictionary with default values of the field. If the field's type is a pydantic model too, recursively fill that sub-model data too

Source code in conftrainer/configs/utils.py
def fill_single_field(dict_to_fill: dict, field_name: str, field_data: ModelField) -> dict:
    """Given a dictionary and a pydantic field information, fill the dictionary with default
    values of the field. If the field's type is a pydantic model too, recursively fill that
    sub-model data too"""
    # Check if there's a default value
    default_value = field_data.default
    if default_value is not None:
        return fill_with_check(dict_to_fill=dict_to_fill, key_=field_name, value=default_value)
    default_factory = field_data.default_factory
    if default_factory is not None:
        return fill_with_check(dict_to_fill=dict_to_fill, key_=field_name, value=default_factory())
    dict_to_fill[field_name] = None
    return dict_to_fill

fill_with_check(dict_to_fill, key_, value)

Fill the key_ in given dictionary with value. If value is a pydantic model, fill with it's dict attribute instead so the result is proper dictionary

Source code in conftrainer/configs/utils.py
def fill_with_check(dict_to_fill, key_, value):
    """Fill the key_ in given dictionary with value. If value is a pydantic model, fill with
    it's dict attribute instead so the result is proper dictionary"""
    if isinstance(value, BaseModel):
        dict_to_fill[key_] = value.dict()
        return dict_to_fill
    if isinstance(value, list):
        dict_to_fill[key_] = process_pydantic_list(value)
        return dict_to_fill

    dict_to_fill[key_] = value
    return dict_to_fill

get_all_subfields(field)

Parse all possible subfields of a field. The field should be of a Union[cls1, cls2] type, where cls1 & cls2 are childs of pydantic.BaseClass

Parameters:

Name Type Description Default
field ModelField

field to check

required

Returns:

Name Type Description
out dict

dictionary consisting of ModelFields and their names

Source code in conftrainer/configs/utils.py
def get_all_subfields(field: ModelField) -> Dict[str, ModelField]:
    """
    Parse all possible subfields of a field. The field should be of a Union[cls1, cls2] type,
    where cls1 & cls2 are childs of pydantic.BaseClass

    Parameters
    ----------
    field : ModelField
        field to check

    Returns
    -------
    out : dict
        dictionary consisting of ModelFields and their names
    """
    fields = {}
    for item in field.type_.__args__:
        fields.update(item.__fields__)
    return fields

get_parser_type(type_)

Map pydantic field types to parse via argparse

Source code in conftrainer/configs/utils.py
def get_parser_type(type_):
    """Map pydantic field types to parse via argparse"""
    if typing.get_origin(type_) == typing.Literal:
        return str
    return TYPE_MAPPING.get(type_, json.loads)

is_pydantic_model(field=None, field_type=None)

Check if BaseModel is in parent classes of a given field

Source code in conftrainer/configs/utils.py
def is_pydantic_model(field: ModelField = None, field_type: Any = None):
    """Check if BaseModel is in parent classes of a given field"""
    if field_type is None:
        if field is None:
            raise ValueError("Please provide either field or field type to proceed")
        if typing.get_origin(field.annotation) is list:
            return False
        field_type = field.type_
    try:
        parents = field_type.mro()
    except AttributeError:
        parents = []
    return BaseModel in parents

is_union(field)

Check if the pydantic field type is a Union

Source code in conftrainer/configs/utils.py
def is_union(field: ModelField) -> bool:
    """Check if the pydantic field type is a Union"""
    return typing.get_origin(field.type_) is typing.Union

is_union_of_models(field)

Check if the pydantic field type is a Union of pydantic models

Source code in conftrainer/configs/utils.py
def is_union_of_models(field: ModelField) -> bool:
    """Check if the pydantic field type is a Union of pydantic models"""
    if not is_union(field):
        return False
    types = typing.get_args(field.type_)
    return all(is_pydantic_model(field_type=type_) for type_ in types)

merge_args(shell_args, config_path, model)

Merge parsed shell and config file arguments. If parameter is present in shell arguments, it will override the one from config file

Parameters:

Name Type Description Default
shell_args Namespace

arguments parsed from shell via argparse

required
config_path str

path to load a config file

required
model ModelMetaclass

pydantic model

required

Returns:

Name Type Description
out ModelMetaclass

pydantic model with merged args

Source code in conftrainer/configs/utils.py
def merge_args(shell_args: dict, config_path: str, model: type(BaseModel)) -> type(BaseModel):
    """
    Merge parsed shell and config file arguments. If parameter is present in shell arguments,
    it will override the one from config file

    Parameters
    ----------
    shell_args : Namespace
        arguments parsed from shell via argparse
    config_path : str
        path to load a config file
    model : ModelMetaclass
        pydantic model

    Returns
    -------
    out : ModelMetaclass
        pydantic model with merged args
    """

    if config_path is not None:
        try:
            yaml_config = model.parse_yaml(file_path=config_path)
        except ParserError as exc:
            raise ParserError("Please provide valid yaml path to config_path arg") from exc
        config = yaml_config.fill_args(fill_dict=shell_args)
    else:
        config = model.parse_obj(shell_args)

    return config

process_pydantic_list(arr)

Check each element of a list and if there are pydantic models, transform them into dictionaries for further serialization

Source code in conftrainer/configs/utils.py
def process_pydantic_list(arr: list) -> list:
    """Check each element of a list and if there are pydantic models, transform them into dictionaries for further
    serialization"""
    if not arr:
        return arr
    new_arr = []
    for item in arr:
        try:  # in case the item is pydantic model
            new_arr.append(item.dict())
        except AttributeError:
            new_arr.append(item)
    return new_arr

options: docstring_style: numpy members_order: source