Source code for gordo.serializer.serializer

# -*- coding: utf-8 -*-

import simplejson
import logging
import os
import re
import pickle

from typing import Union, Any  # pragma: no flakes

from sklearn.pipeline import Pipeline
from sklearn.base import TransformerMixin, BaseEstimator  # noqa

from gordo.machine.model.base import GordoBase

logger = logging.getLogger(__name__)

N_STEP_REGEX = re.compile(r".*n_step=([0-9]+)")
CLASS_REGEX = re.compile(r".*class=(.*$)")


[docs]def dumps(model: Union[Pipeline, GordoBase]) -> bytes: """ Dump a model into a bytes representation suitable for loading from ``gordo.serializer.loads`` Parameters ---------- model: Union[Pipeline, GordoBase] A gordo model/pipeline Returns ------- bytes Serialized model which supports loading via ``serializer.loads()`` Example ------- >>> from gordo.machine.model.models import KerasAutoEncoder >>> from gordo import serializer >>> >>> model = KerasAutoEncoder('feedforward_symmetric') >>> serialized = serializer.dumps(model) >>> assert isinstance(serialized, bytes) >>> >>> model_clone = serializer.loads(serialized) >>> assert isinstance(model_clone, KerasAutoEncoder) """ return pickle.dumps(model)
[docs]def loads(bytes_object: bytes) -> GordoBase: """ Load a GordoBase model from bytes dumped from ``gordo.serializer.dumps`` Parameters ---------- bytes_object: bytes Bytes to be loaded, should be the result of `serializer.dumps(model)` Returns ------- Union[GordoBase, Pipeline, BaseEstimator] Custom gordo model, scikit learn pipeline or other scikit learn like object. """ return pickle.loads(bytes_object)
[docs]def load_metadata(source_dir: Union[os.PathLike, str]) -> dict: """ Load the given metadata.json which was saved during the ``serializer.dump`` will return the loaded metadata as a dict, or empty dict if no file was found Parameters ---------- source_dir: Union[os.PathLike, str] Directory of the saved model, As with serializer.load(source_dir) this source_dir can be the top level, or the first dir into the serialized model. Returns ------- dict Raises ------ FileNotFoundError If a 'metadata.json' file isn't found in or above the supplied ``source_dir`` """ # Since this function can take the top level dir, or a dir directly # into the first step of the pipeline, we need to check both for metadata possible_paths = [ os.path.join(source_dir, "metadata.json"), os.path.join(source_dir, "..", "metadata.json"), ] path = next((path for path in possible_paths if os.path.exists(path)), None) if path: with open(path, "r") as f: return simplejson.load(f) else: raise FileNotFoundError( f"Metadata file in source dir: '{source_dir}' not found in or up one directory." )
[docs]def load(source_dir: Union[os.PathLike, str]) -> Any: """ Load an object from a directory, saved by ``gordo.serializer.pipeline_serializer.dump`` This take a directory, which is either top-level, meaning it contains a sub directory in the naming scheme: "n_step=<int>-class=<path.to.Class>" or the aforementioned naming scheme directory directly. Will return that unsterilized object. Parameters ---------- source_dir: Union[os.PathLike, str] Location of the top level dir the pipeline was saved Returns ------- Union[GordoBase, Pipeline, BaseEstimator] """ # This source dir should have a single pipeline entry directory. # may have been passed a top level dir, containing such an entry: with open(os.path.join(source_dir, "model.pkl"), "rb") as f: return pickle.load(f)
[docs]def dump(obj: object, dest_dir: Union[os.PathLike, str], metadata: dict = None): """ Serialize an object into a directory, the object must be pickle-able. Parameters ---------- obj The object to dump. Must be pickle-able. dest_dir: Union[os.PathLike, str] The directory to which to save the model metadata: dict - any additional metadata to be saved alongside this model if it exists, will be returned from the corresponding "load" function metadata: Optional dict of metadata which will be serialized to a file together with the model, and loaded again by :func:`load_metadata`. Returns ------- None Example ------- >>> from sklearn.pipeline import Pipeline >>> from sklearn.decomposition import PCA >>> from gordo.machine.model.models import KerasAutoEncoder >>> from gordo import serializer >>> from tempfile import TemporaryDirectory >>> pipe = Pipeline([ ... ('pca', PCA(3)), ... ('model', KerasAutoEncoder(kind='feedforward_hourglass'))]) >>> with TemporaryDirectory() as tmp: ... serializer.dump(obj=pipe, dest_dir=tmp) ... pipe_clone = serializer.load(source_dir=tmp) """ with open(os.path.join(dest_dir, "model.pkl"), "wb") as m: pickle.dump(obj, m) if metadata is not None: with open(os.path.join(dest_dir, "metadata.json"), "w") as f: simplejson.dump(metadata, f, default=str)