Source code for cartesian.sklearn_api

import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import RegressorMixin
from sklearn.metrics import mean_squared_error
from sklearn.utils.validation import check_array
from sklearn.utils.validation import check_random_state

from .algorithm import oneplus
from .algorithm import optimize
from .cgp import Cartesian
from .cgp import compile
from .cgp import Constant
from .cgp import Primitive
from .cgp import PrimitiveSet
from .cgp import Symbol
from .util import replace_nan

DEFAULT_PRIMITIVES = [Primitive("add", np.add, 2), Primitive("mul", np.multiply, 2)]


def _ensure_1d(yhat, shape):
    try:
        yhat.shape[1]
        return yhat

    except (AttributeError, TypeError, IndexError):
        return np.ones(shape) * yhat


class _Evaluate:  # ugly construct s.th. you can pickle it and use joblib
    def __init__(self, x, y, metric):
        """Wraps metric for optimization"""
        self.n_samples, *n_out = y.shape
        self.multi_output = False
        if n_out and n_out[0] > 1:
            self.multi_output = True
        self.x = x
        self.y = y
        self.metric = metric

    def error(self, f, consts=()):
        if self.multi_output:
            yhat = np.array([_ensure_1d(i, self.n_samples) for i in f(*self.x.T, *consts)]).T
        else:
            yhat = _ensure_1d(f(*self.x.T, *consts), self.n_samples)
        yhat = replace_nan(yhat)
        return self.metric(self.y, yhat)

    def __call__(self, individual):
        return optimize(self.error, individual)


[docs]class Symbolic(BaseEstimator, RegressorMixin):
    def __init__(
        self,
        operators=None,
        n_const=0,
        n_rows=1,
        n_columns=3,
        n_back=1,
        n_mutations=3,
        mutation_method="active",
        maxiter=1000,
        maxfev=10000,
        lambda_=4,
        f_tol=0,
        seeded_individual=None,
        random_state=None,
        n_jobs=1,
        metric=None,
        callback=None,
    ):
        """Wraps the 1 + lambda algorithm in sklearn api.

        Note:
            n_costs provides a convenience method to create Symbols.
            All constants can be directly passed via the operators.

        Args:
            operators: list of primitives
            n_const: number of symbolic constants
            n_rows: number of rows in the code block
            n_columns: number of columns in the code block
            n_back: number of rows to look back for connections
            n_mutations: number of mutations per offspring
            mutation_method: specific mutation method
            maxiter: maximum number of generations
            maxfev: maximum number of function evaluations. Important, if fun is another optimizer
            lambda_: number of offspring per generation
            f_tol: Absolute error in metric(ind) between iterations that is acceptable for convergence
            seeded_individual: an individual used to hot-start the optimization
            random_state: an instance of np.random.RandomState, an integer used as seed, or None
            n_jobs: number of jobs for joblib embarrassingly easy parallel
            metric: callable(individual), function to be optimized
            callback: callable(OptimizeResult), can be optionally used to monitor progress
        """
        self.operators = operators if operators is not None else DEFAULT_PRIMITIVES
        self.constants = [Constant("c_{}".format(i)) for i in range(n_const)]
        self.n_rows = n_rows
        self.n_back = n_back
        self.n_columns = n_columns
        self.n_out = None
        self.pset = None
        self.res = None
        self.model = None
        # parameters for algorithm
        self.maxfev = maxfev
        self.maxiter = maxiter
        self.lambda_ = lambda_
        self.f_tol = f_tol
        self.metric = metric if metric is not None else mean_squared_error
        self.random_state = check_random_state(random_state)
        self.n_jobs = n_jobs
        self.n_mutations = n_mutations
        self.mutation_method = mutation_method
        self.seeded_individual = seeded_individual
        self.callback = callback

[docs]    def fit(self, x, y):
        """Trains the model given the regression task.

        Args:
            x (np.ndarray): input data matrix for fitting of size (number_of_input_points, number_of_features)
            y (np.ndarray): target data vector for fitting of size (number_of_input_points)

        Returns:
            self

        """
        x = check_array(x)
        _, self.n_out = y.reshape(y.shape[0], -1).shape
        _, n_features = x.shape
        terminals = [Symbol("x_{}".format(i)) for i in range(n_features)]
        self.pset = PrimitiveSet.create(self.operators + terminals + self.constants)
        cls = Cartesian(
            str(hash(self)),
            self.pset,
            n_rows=self.n_rows,
            n_columns=self.n_columns,
            n_out=self.n_out,
            n_back=self.n_back,
        )
        self.res = oneplus(
            _Evaluate(x, y, self.metric),
            random_state=self.random_state,
            cls=cls,
            lambda_=self.lambda_,
            maxiter=self.maxiter,
            maxfev=self.maxfev,
            n_mutations=self.n_mutations,
            mutation_method=self.mutation_method,
            f_tol=self.f_tol,
            n_jobs=self.n_jobs,
            seed=self.seeded_individual,
            callback=self.callback,
        )
        self.model = compile(self.res.ind)
        return self

[docs]    def predict(self, x):
        """Use the fitted model f to make a prediction.

        Args:
            x: input data matrix for scoring

        Returns:
            predicted target data vector

        """
        if self.n_out > 1:
            yhat = np.array([_ensure_1d(i, x.shape[0]) for i in self.model(*x.T, *self.res.x)]).T
        else:
            yhat = _ensure_1d(self.model(*x.T, *self.res.x), x.shape[0])
        return yhat