Source code for paralytics.xai.feature_effect

import warnings

from functools import reduce
from itertools import product

import matplotlib.pyplot as plt
import numpy as np

from joblib import delayed, Parallel
from sklearn.base import BaseEstimator
from sklearn.utils import check_random_state

from .base import ExplainerMixin
from ..utils import check_column_existence, is_numeric


__all__ = [
    'FeatureEffectExplainer'
]


[docs]class FeatureEffectExplainer(BaseEstimator, ExplainerMixin):
    """Visualizes the effect of one or two features on the prediction.

    Parameters
    ----------
    estimator: TODO

    features: str or list of length at most 2
        TODO: Grid features.

    dtypes: dict, optional (default=None)
        Types of the passed features. Possible values: 'numeric' or 'category'.
        Has to be passed as a dictionary where the key is the name of the
        feature for which data type is specified. Left by default it is
        determined automatically during `fit` method execution.

        Based on this parameter the appropriate explainers are selected.

    sample_size: int or float, optional (default=None)
        TODO

    estimation_values: int or dict, optional (default=100)
        Declares number of values to generate for the grid feature or explicitly
        specifies those values. When passed as:

        - `int`:

          Automatically detects whether the grid feature is numeric and if:

          - `True`:

            Generates the set of values from the lowest to the highest value
            recorded in the data set passed to the fit method with the
            interspace depending on the number of values to generate specified
            in the `n_estimated_values` parameter.

          - `False`:

            Takes all of the explained feature's unique values and imputes
            into grid feature. When you need to consider only a subset of the
            unique categories, pass them to the dictionary with a key being
            name of the feature.

          When two features are specified then takes the given value for both
          of them.

        - `dict`:

          Manually specify the values or pass separately for every feature how
          many values to generate. Dictionary specification:

          - `key`:

            Feature name passed to the `features` parameter.

          - `value`:

            Integer indicating how many values generate between the lowest and
            the highest value recorded in the data set or array of values with
            which grid feature will be imputed to make predictions for the
            synthetic data set.

    n_jobs: TODO

    random_state: int, optional (default=None)
        Seed for the sample generator. Used when `sample_size` is not None.

    Attributes
    ----------
    dtypes_: list
        Actual data types of grid features after evaluation if the automatic
        determination was specified. Otherwise is equal to dtypes but converted
        to a list where order ise the same as the order of passed features.

    estimation_values_: list
        Actual estimation values used to calculate dependency plots. The order
        of values is the same as the order of passed features.

    base_values_: np.array, shape = (n_samples, n_grid_features)
        TODO

    grid_values_: np.array, shape = (n_grid_values, n_grid_features)
        TODO

    y_grid_predictions_: np.array, shape = (n_samples, n_grid_values)
        Array of predictions for every grid values set where rows are
        predictions for consecutive observations.

    References
    ----------
    [1] C. Molnar, `Interpretable Machine Learning
    <https://christophm.github.io/interpretable-ml-book/pdp.html>`_, 2019

    """
    CORRECT_DTYPES = {'numeric', 'category'}

    def __init__(self, estimator, features, dtypes=None, sample_size=None,
                 estimation_values=100, n_jobs=None, random_state=None):
        self.estimator = estimator
        self.features = features
        self.dtypes = dtypes
        self.sample_size = sample_size
        self.estimation_values = estimation_values
        self.n_jobs = n_jobs
        self.random_state = random_state

    @property
    def features(self):
        return self._features

    @features.setter
    def features(self, features):
        if isinstance(features, str):
            features = [features]
        else:
            assert all([isinstance(feature, str) for feature in features]), (
                "Passing multiple grid features requires an array-like object "
                "of string values being the names of features."
            )
            features = list(features)
            assert len(features) <= 2, "Maximum two grid features are allowed."

        self._features = features

    @property
    def dtypes(self):
        return self._dtypes

    @dtypes.setter
    def dtypes(self, dtypes):
        if dtypes is not None:
            assert isinstance(dtypes, dict), (
                "When manually specifying the data types of grid features the "
                "dictionary is required where the key is the feature's name."
            )
            assert set(dtypes.keys()) == set(self.features), (
                "Manual data types specification of grid features requires "
                "passing appropriate values for every feature given in the "
                "`features` parameter. It should be a 'numeric' string when "
                "the feature is of numerical type and 'category' string "
                "otherwise."
            )
            assert set(dtypes.values()) <= self.CORRECT_DTYPES, (
                "Unavailable data type specified. Data types should be passed "
                "as strings from the given set: {}.".format(self.CORRECT_DTYPES)
            )

        self._dtypes = dtypes

    @property
    def estimation_values(self):
        return self._estimation_values

    @estimation_values.setter
    def estimation_values(self, values):
        if isinstance(values, int):
            values = {feature: values for feature in self.features}
        else:
            assert isinstance(values, dict), (
                "When manually specifying the estimation values for grid "
                "features separately the dictionary is required where the key "
                "is the feature's name."
            )
            assert set(values.keys()) == set(self.features), (
                "Manual values specification for grid features requires "
                "passing appropriate values for every feature given in the "
                "`features` parameter. It can be array-like object with "
                "explicitly declared values or integer indicating how many "
                "values shall be generated."
            )

        self._estimation_values = values

[docs]    def fit(self, X, y=None):
        """Fits creation of synthetic data to X.

        Parameters
        ----------
        X: pandas.DataFrame
            TODO

        y: ignore

        Returns
        -------
        self: object
            Returns the instance itself.

        """
        check_column_existence(X, self.features)

        if self.dtypes is None:
            self.dtypes_ = [
                'numeric' if is_numeric(X[feature]) else 'category'
                for feature in self.features
            ]
        else:
            self.dtypes_ = [self.dtypes[feature] for feature in self.features]

        self.estimation_values_ = self._determine_estimation_values(X)

        X_sample = self.select_sample(X)
        self.base_values_ = X_sample[self.features].values

        self.grid_values_, self.y_grid_predictions_ = \
            self.predict_grid_features(X_sample)

        return self

[docs]    def explain(self, pdplot=True, iceplot=False, mplot=False, aleplot=False,
                automatic_layout=True, centers=None, iceplot_thresh=None,
                neighborhoods=.1, pdline_params=None, iceline_params=None,
                mline_params=None, aleline_params=None, contour_params=None,
                contourf_params=None, bar_params=None, imshow_params=None,
                text_params=None, verbose=True, ax=None):
        """Explains the features effect with use of the selected methods.

        Parameters
        ----------
        pdplot: bool, optional (default=True)
            Defines if Partial Dependence Plot should be displayed. It
            visualizes marginal effect that grid features have on predictions
            with use of the Monte Carlo method.

        iceplot: bool, optional (default=False)
            Defines whether Individual Conditional Expectation plots should be
            displayed. Only possible if a single numeric feature is explained.

        mplot: bool, optional (default=False)
            Defines if Marginal Plot should be displayed. It visualizes
            conditional effect that grid features have on predictions. Only
            possible for numeric features.

        aleplot: bool, optional (default=False)
            Defines if Accumulated Local Effects Plot should be displayed. It
            visualizes accumulated differences between predictions based on the
            conditional distribution of the feature.

        automatic_layout: bool, optional (default=True)
            Specified whether format the plots in the automatic manner including
            ticks adjustment, axis signing, text formatting etc. or leave
            the plot in the raw state.

        centers: int or float or string or list, optional (default=None)
            Defines the center value that all of the predictions will be
            compared to and displayed as a difference in the prediction
            to this point. By default no centering is done. If:

            - `min`:

              Specifies that minimum of the grid features will be used for
              centering.

            Should be passed as the list of values mentioned above if two
            features are passed to explanation, in the same order in which the
            features are given.

        iceplot_thresh: int or float, optional (default=None)
            Declares how many observations to take to visualize the ICE plots.
            If `int`, gives the exact number of observations, if `float`, gives
            a fraction of all observations to be taken.

        neighborhoods: int or float or list, optional (default=.1)
            Neighborhood of the value to determine the interval
            `[current_value - neighborhood, current_value + neighborhood]`
            for which predictions will be averaged. Taken under consideration
            only when `mplot == True` or `aleplot == True`. If:

            - `int`:

              Absolute value that will be deducted and added from the current
              value to determine the interval for synthetic data generation.

            - `float`:

              Fraction of the difference between biggest and smallest value in
              the variable to calculate the interval boundaries.

            Should be passed as the list of values mentioned above if two
            features are passed to explanation, in the same order in which the
            features are given.

        {pd, ice, m, ale}plot_params: dicts, optional (default=None)
            Keyword arguments for underlying plotting functions.

        verbose: TODO

        ax: TODO

        Returns
        -------
        TODO

        """
        assert hasattr(self, 'y_grid_predictions_'), (
            'Could not find the attribute.\n'
            'Fitting is necessary before you do the transformation.'
        )

        if len(self.features) == 2:
            assert sum([pdplot, mplot, aleplot]) <= 1, (
                'When explaining two features it is possible to plot only one '
                'of: `pdplot`, `mplot`, `aleplot`.'
            )

        features_are_numeric = [dtype == 'numeric' for dtype in self.dtypes_]

        if isinstance(neighborhoods, int):
            neighborhoods = [
                neighborhoods for feature_is_numeric in features_are_numeric
                if feature_is_numeric
            ]
        elif isinstance(neighborhoods, float):
            # TODO: Think of method of turning fraction to absolute value.
            pass
        else:
            neighborhoods = list(neighborhoods)
            assert len(neighborhoods) == sum(features_are_numeric), (
                "Neighborhoods can be declared for numeric features only "
                "and its length must be the same size as the number of "
                "specified grid features."
            )

        if centers is not None:
            assert all(features_are_numeric), (
                "Centering is only available for numerical features, because "
                "it needs to know the relations between feature's values to "
                "extract values higher or equal than the centering value.\n"
                "Consider not setting the `center` parameter or pass numerical "
                "features and re-fit the explainer."
            )

            if isinstance(centers, (int, float, str)):
                centers = [centers for _ in range(len(self.features))]
            else:
                centers = list(centers)
                assert len(centers) == len(self.features), (
                    'The number of declared center values must be equal to the '
                    'number of features specified to explanation.'
                )

            grid_values, y_grid = self._center_grid(centers)
        else:
            grid_values = self.grid_values_
            y_grid = self.y_grid_predictions_

        # Get current axis if none has been specified.
        if ax is None:
            ax = plt.gca()

        # Set default plots parameters.
        if contour_params is None:
            contour_params = {'linewidths': .5, 'colors': 'white'}

        if contourf_params is None:
            # TODO
            contourf_params = {}

        if bar_params is None:
            # TODO
            bar_params = {}

        if imshow_params is None:
            imshow_params = {'origin': 'lower', 'aspect': 'auto'}

        if text_params is None:
            text_params = {'ha': 'center', 'va': 'center', 'color': 'white'}

        if iceplot:
            one_feature = len(self.features) == 1
            assert one_feature and is_numeric(self.base_values_.flatten()), (
                'When two features are specified or the feature is categorical '
                'the `iceplot` parameter must be False!\nExplaining two '
                'features effect on predictions with use of Individual '
                'Conditional Expectation plots requires displaying single '
                'two-dimensional plane for every sample and even though it is '
                'possible it would give zero value due to lack of readability.'
            )

            if iceline_params is None:
                iceline_params = {'color': '#ACDBD9'}

            ax = self._plot_iceplot(
                grid_values=grid_values,
                predictions=y_grid,
                thresh=iceplot_thresh,
                line_params=iceline_params,
                ax=ax
            )

        if pdplot:
            if pdline_params is None:
                pdline_params = {'linewidth': 2, 'color': '#A6E22E'}

            ax = self._plot_pdplot(
                grid_values=grid_values,
                predictions=y_grid,
                features_are_numeric=features_are_numeric,
                automatic_layout=automatic_layout,
                line_params=pdline_params,
                contour_params=contour_params,
                contourf_params=contourf_params,
                bar_params=bar_params,
                imshow_params=imshow_params,
                text_params=text_params,
                ax=ax
            )

        if mplot:
            if mline_params is None:
                mline_params = {'linewidth': 2, 'color': '#FF45F9'}

            assert any(features_are_numeric), (
                "When plotting M-Plot at least one feature needs to be of "
                "numeric type. Otherwise, there is no point in calculating "
                "the unrealistic observations in the sense of the Euclidean "
                "distance for categorical features. If you want to visualize "
                "the effect of two features, ceteris paribus, just plot PDPlot "
                "instead."
            )

            ax = self._plot_mplot(
                grid_values=grid_values,
                predictions=y_grid,
                features_are_numeric=features_are_numeric,
                neighborhoods=neighborhoods,
                automatic_layout=automatic_layout,
                line_params=mline_params,
                contour_params=contour_params,
                contourf_params=contourf_params,
                imshow_params=imshow_params,
                verbose=verbose,
                ax=ax
            )

        return ax

[docs]    def select_sample(self, X):
        """Selects sample data with `sample_size` number of samples."""
        if self.sample_size is not None:
            try:
                X_sample = X.sample(
                    n=self.sample_size, random_state=self.random_state
                )
            except ValueError:
                X_sample = X.sample(
                    frac=self.sample_size, random_state=self.random_state
                )
        else:
            X_sample = X.copy()

        return X_sample

[docs]    def predict_grid_features(self, X):
        """Predicts previously substituting grid features with generated values.

        For every combination in the cartesian product of unique grid features
        generates a temporary DataFrame containing this set of values across the
        whole grid features leaving the rest of the features unchanged. Then
        makes prediction for this synthetic DataFrame.

        Returns list of predictions for every synthetic DataFrame and list of
        grid values which replaced the original values across the grid features
        to create these DataFrames for prediction.

        """
        assert hasattr(self, 'estimation_values_'), (
            'Could not find the attribute.\n'
            'Synthetic data preparation is only possible after estimation '
            'values are generated.'
        )

        grid_values, y_grid_predictions = zip(*Parallel(n_jobs=self.n_jobs)(
            delayed(self._predict_single_grid_features)(X, grid_values)
            for grid_values in product(*self.estimation_values_)
        ))

        y_grid_predictions = np.stack(y_grid_predictions).T
        grid_values = np.stack(grid_values)

        return grid_values, y_grid_predictions

    def _determine_estimation_values(self, X):
        """Determines estimation values for grid features."""
        assert hasattr(self, "dtypes_"), (
            "Could not find the attribute.\n"
            "Dtypes evaluation is necessary before the values estimation."
        )

        estimation_values = []
        for feature, dtype in zip(self.features, self.dtypes_):
            values = self.estimation_values[feature]

            if isinstance(values, int) and dtype == "numeric":
                est_values = np.linspace(
                    start=X[feature].astype(np.number).min(),
                    stop=X[feature].astype(np.number).max(),
                    num=values
                )
            elif isinstance(values, int):
                est_values = np.unique(X[feature])
            else:
                est_values = np.sort(values)

            estimation_values.append(est_values)

        return estimation_values

    def _predict_single_grid_features(self, X, grid_values):
        """Makes prediction for single combination of grid features' values."""
        X_grid = X.assign(**dict(zip(self.features, grid_values)))
        try:
            y_grid_preds = self.estimator.predict_proba(X_grid)[:, 1]
        except AttributeError:
            y_grid_preds = self.estimator.predict(X_grid)

        return grid_values, y_grid_preds

    def _center_grid(self, centers):
        """Centers the grid values and predictions to the given list of values.

        Every set of values that is lower than the specified center values
        is removed from the grid and for the other values the central values
        are subtracted from grid values and prediction value for centers is
        subtracted from the grid predictions.

        """
        centers = [
            self.base_values_[:, idx].min()
            if center == 'min' else center
            for idx, center in enumerate(centers)
        ]

        above_central_values = np.all(self.grid_values_ >= centers, axis=1)
        grid_values = self.grid_values_[above_central_values, :]
        y_grid = self.y_grid_predictions_[:, above_central_values]
        y_grid = y_grid - y_grid[:, 0].reshape(-1, 1)

        return grid_values, y_grid

    def _plot_iceplot(self, grid_values, predictions, thresh, line_params, ax):
        """Plots Individual Conditional Expectation."""
        grid_values = np.array(grid_values)
        predictions = np.array(predictions)

        random_state = check_random_state(self.random_state)

        if isinstance(thresh, float):
            thresh = int(len(predictions) * thresh)
        try:
            more_indexes_than_thresh = len(predictions) > thresh
        except TypeError:
            more_indexes_than_thresh = False

        # Select observations to plot Ceteris Paribus profiles for.
        if more_indexes_than_thresh:
            predictions = predictions[random_state.choice(
                len(predictions),
                size=thresh,
                replace=False
            )]

        grid_values = grid_values.flatten()
        for prediction in predictions:
            ax.plot(
                grid_values,
                prediction,
                **line_params
            )

        return ax

    def _plot_pdplot(self, grid_values, predictions, features_are_numeric,
                     automatic_layout, line_params, contour_params,
                     contourf_params, bar_params, imshow_params, text_params,
                     ax):
        """Plots Partial Dependence Plot."""
        grid_values = np.array(grid_values)
        predictions = np.array(predictions)

        predictions_mean = np.mean(predictions, axis=0)

        if all(features_are_numeric):
            ax = self._plot_numerics(
                grid_values, predictions_mean, line_params,
                contour_params, contourf_params, ax
            )
        elif any(features_are_numeric):
            ax = self._plot_category_numeric(
                grid_values, predictions_mean, features_are_numeric,
                automatic_layout, imshow_params, ax
            )
        else:
            ax = self._plot_categories(
                grid_values, predictions_mean, automatic_layout,
                bar_params, imshow_params, text_params, ax
            )

        return ax

    def _plot_mplot(self, grid_values, predictions, features_are_numeric,
                    neighborhoods, automatic_layout, line_params,
                    contour_params, contourf_params, imshow_params, verbose,
                    ax):
        """Plots Marginal Plot."""
        grid_values = np.array(grid_values)
        predictions = np.array(predictions)

        predictions = self._replace_unreal_obs_with_nan(
            grid_values, predictions, features_are_numeric, neighborhoods
        )

        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            predictions_mean = np.nanmean(predictions, axis=0)
            try:
                warn_encountered = issubclass(w[-1].category, RuntimeWarning)
            except IndexError:
                warn_encountered = False
            if verbose and warn_encountered:
                print(
                    "With a given `neighborhoods` and `estimation_values`, "
                    "some values were not considered realistic for any "
                    "observation in the data set and therefore these places "
                    "will be presented as blank in the final plot.\n"
                    "In order to eliminate them, it is worth considering "
                    "changing the parameters mentioned above or getting rid of "
                    "outliers from the data set.\n"
                    "It is worth remembering that often empty spaces in the "
                    "plot will indicate truly unrealistic values, which will "
                    "make them desirable to keep illustrating.\n"
                    "To silence this message set `verbose` to False."
                )

        if all(features_are_numeric):
            ax = self._plot_numerics(
                grid_values, predictions_mean, line_params,
                contour_params, contourf_params, ax
            )
        else:
            ax = self._plot_category_numeric(
                grid_values, predictions_mean, features_are_numeric,
                automatic_layout, imshow_params, ax
            )

        return ax

    def _plot_numerics(self, grid_values, predictions_mean, line_params,
                       contour_params, contourf_params, ax):
        """Plots Partial Dependence Plot for numeric grid features only."""
        if len(self.features) == 1:
            ax.plot(
                grid_values.flatten(),
                predictions_mean,
                **line_params
            )
        else:
            x_shape = len(np.unique(grid_values[:, 0]))

            feature_x = grid_values[:, 0].reshape(x_shape, -1)
            feature_y = grid_values[:, 1].reshape(x_shape, -1)
            values = predictions_mean.reshape(x_shape, -1)

            contourf = ax.contourf(
                feature_x,
                feature_y,
                values,
                **contourf_params
            )
            ax.contour(
                feature_x,
                feature_y,
                values,
                **contour_params
            )

            ax.figure.colorbar(contourf, ax=ax)

            ax.set_xlabel('{}'.format(self.features[0]))
            ax.set_ylabel('{}'.format(self.features[1]))

        return ax

    def _plot_category_numeric(self, grid_values, predictions_mean,
                               features_are_numeric, automatic_layout,
                               imshow_params, ax):
        """Plots Partial Dependence Plot for category vs. numeric features."""
        idx_num, idx_cat = (0, 1) if features_are_numeric[0] else (1, 0)

        feature_num = grid_values[:, idx_num].astype(np.number)
        feature_cat = grid_values[:, idx_cat]

        feature_num_unique = np.unique(feature_num)
        feature_cat_unique = np.unique(feature_cat)

        if idx_cat:
            y_shape = len(feature_num_unique)
        else:
            y_shape = len(feature_cat_unique)

        values = predictions_mean.reshape((-1, y_shape), order='F')

        imshow = ax.imshow(
            values,
            **imshow_params
        )
        ax.figure.colorbar(imshow, ax=ax)

        if automatic_layout:
            span_range = abs(feature_num_unique[-1] - feature_num_unique[0])
            num_format = "{0:.0f}" if span_range > 10 else "{0:.2f}"
            num_labels = [
                num_format.format(value) for value in feature_num_unique
            ]
        else:
            num_labels = feature_num_unique

        # FIXME: More wet than DRY. Rewrite for less spaghetti code.
        if idx_cat:
            ax.set_xticks(np.arange(len(feature_num_unique)))
            ax.set_yticks(np.arange(len(feature_cat_unique)))

            ax.set_xticklabels(num_labels)
            ax.set_yticklabels(feature_cat_unique)

            if automatic_layout:
                tick_spacing = int(len(feature_num_unique) / 5)
                for idx, tick in enumerate(ax.get_xticklabels()):
                    if idx % tick_spacing:
                        tick.set_visible(False)

                ax.set_xlabel('{}'.format(self.features[idx_num]))
                ax.set_ylabel('{}'.format(self.features[idx_cat]))
        else:
            ax.set_xticks(np.arange(len(feature_cat_unique)))
            ax.set_yticks(np.arange(len(feature_num_unique)))

            ax.set_xticklabels(feature_cat_unique)
            ax.set_yticklabels(num_labels)

            if automatic_layout:
                tick_spacing = int(len(feature_num_unique) / 5)
                for idx, tick in enumerate(ax.get_yticklabels()):
                    if idx % tick_spacing:
                        tick.set_visible(False)

                ax.set_xlabel('{}'.format(self.features[idx_cat]))
                ax.set_ylabel('{}'.format(self.features[idx_num]))

        return ax

    def _plot_categories(self, grid_values, predictions_mean,
                         automatic_layout, bar_params, imshow_params,
                         text_params, ax):
        """Plots Partial Dependence Plot for categorical features only."""
        if len(self.features) == 1:
            ax.bar(
                x=grid_values.flatten(),
                height=predictions_mean,
                **bar_params
            )

            if automatic_layout:
                ax.set_xlabel("{}".format(self.features[0]))
                ax.set_ylabel("Average Prediction")
        else:
            # Preserves the order of occurrence.
            feature_x_unique = reduce(
                lambda l, x: l.append(x) or l if x not in l else l,
                grid_values[:, 0],
                []
            )
            n_feature_x_unique = len(feature_x_unique)
            n_feature_y_unique = len(np.unique(grid_values[:, 1]))
            feature_y_unique = grid_values[:n_feature_y_unique, 1].tolist()
            values = predictions_mean.reshape(
                (len(feature_y_unique), -1), order='F'
            )

            imshow = ax.imshow(
                values,
                **imshow_params
            )
            ax.figure.colorbar(imshow, ax=ax)

            ax.set_xticks(np.arange(n_feature_x_unique))
            ax.set_yticks(np.arange(n_feature_y_unique))

            ax.set_xticklabels(feature_x_unique)
            ax.set_yticklabels(feature_y_unique)

            ax.set_xlabel('{}'.format(self.features[0]))
            ax.set_ylabel('{}'.format(self.features[1]))

            if automatic_layout:
                plt.setp(
                    ax.get_xticklabels(), rotation=45,
                    ha="right", rotation_mode="anchor"
                )

                span_range = abs(np.max(values) - np.min(values))
                text_format = "{0:.0f}" if span_range > 10 else "{0:.2f}"
            else:
                text_format = "{0}"

            indexes_product = product(
                range(n_feature_x_unique), range(n_feature_y_unique)
            )

            for i, j in indexes_product:
                ax.text(
                    i, j, text_format.format(values[j, i]), **text_params
                )

        return ax

    def _replace_unreal_obs_with_nan(self, grid_values, predictions,
                                     features_are_numeric, neighborhoods):
        """Replaces unrealistic observations with NaN value.

        Prepares the predictions for plotting mplot by replacing predictions
        with nans for grid values which distance from the real values on which
        they were generated is higher than defined by `neighborhoods`.

        """
        grid_values_num = grid_values[:, features_are_numeric].astype(np.number)

        for idx in range(len(self.base_values_)):
            base_values_num = self.base_values_[idx, features_are_numeric]

            borde_inferior = grid_values_num - neighborhoods <= base_values_num
            borde_superior = grid_values_num + neighborhoods >= base_values_num
            unrealistic_obs = np.any(~(borde_inferior & borde_superior), axis=1)

            predictions[idx, unrealistic_obs] = np.nan

        return predictions