Source code for gordo.serializer.into_definition

# -*- coding: utf-8 -*-

import inspect
import logging

from sklearn.pipeline import Pipeline


logger = logging.getLogger(__name__)


[docs]def into_definition(pipeline: Pipeline, prune_default_params: bool = False) -> dict: """ Convert an instance of ``sklearn.pipeline.Pipeline`` into a dict definition capable of being reconstructed with ``gordo.serializer.from_definition`` Parameters ---------- pipeline: sklearn.pipeline.Pipeline Instance of pipeline to decompose prune_default_params: bool Whether to prune the default parameters found in current instance of the transformers vs what their default params are. Returns ------- dict definitions for the pipeline, compatible to be reconstructed with :func:`gordo.serializer.from_definition` Example ------- >>> import yaml >>> from sklearn.pipeline import Pipeline >>> from sklearn.decomposition import PCA >>> from gordo.machine.model.models import KerasAutoEncoder >>> >>> pipe = Pipeline([('pca', PCA(4)), ('ae', KerasAutoEncoder(kind='feedforward_model'))]) >>> pipe_definition = into_definition(pipe) # It is now a standard python dict of primitives. >>> print(yaml.dump(pipe_definition)) sklearn.pipeline.Pipeline: memory: null steps: - sklearn.decomposition._pca.PCA: copy: true iterated_power: auto n_components: 4 random_state: null svd_solver: auto tol: 0.0 whiten: false - gordo.machine.model.models.KerasAutoEncoder: kind: feedforward_model verbose: false <BLANKLINE> """ steps = _decompose_node(pipeline, prune_default_params) return steps
def _decompose_node(step: object, prune_default_params: bool = False): """ Decompose a specific instance of a scikit-learn transformer, including Pipelines or FeatureUnions Parameters ---------- step An instance of a Scikit-Learn transformer class prune_default_params Whether to output the default parameter values into the definition. If True, only those parameters differing from the default params will be output. Returns ------- dict decomposed node - Where key is the import string for the class and associated value is a dict of parameters for that class. """ import_str = f"{step.__module__}.{step.__class__.__name__}" if hasattr(step, "into_definition"): definition = getattr(step, "into_definition")() else: params = getattr(step, "get_params")(deep=False) definition = load_definition_from_params(params) definition = ( _prune_default_parameters(step, definition) if prune_default_params else definition ) return {import_str: definition} def _prune_default_parameters(obj: object, current_params) -> dict: """ Take an instance of an object and determine what the default parameters are against what its current parameters are. Parameters ---------- obj: object - An instance of an object current_params: dict - A mapping of current parameters for the obj Returns ------- dict - Containing only parameters which are different from default """ signature = inspect.signature(obj.__class__.__init__) default_params = { k: v.default for k, v in signature.parameters.items() if v.default is not inspect.Parameter.empty } logger.debug(f"Current params: {current_params}, default_params: {default_params}") return { k: v for (k, v) in current_params.items() if current_params[k] != default_params[k] }
[docs]def load_definition_from_params(params: dict) -> dict: """ Recursively decomposing each of values from params into the definition Parameters ---------- params: dict Returns ------- dict """ definition = {} for param, param_val in params.items(): if hasattr(param_val, "get_params") or hasattr(param_val, "into_definition"): definition[param] = _decompose_node(param_val) # Handle parameter value that is a list elif isinstance(param_val, list): # Decompose second elements; these are tuples of (str, BaseEstimator) # or list of other types such as ints. # TODO: Make this more robust, probably via another function to parse the iterable recursively # TODO: b/c it _could_, in theory, be a dict of {str: BaseEstimator} or similar. definition[param] = [ _decompose_node(leaf[1]) if isinstance(leaf, tuple) else leaf for leaf in param_val ] # Handle FunctionTransformer function object type parameters elif callable(param_val): # param_val is a function for FunctionTransformer.func init param definition[param] = f"{param_val.__module__}.{param_val.__name__}" else: definition[param] = param_val return definition