Source code for autoimpute.imputations.series.random

"""This module implements random imputation via the RandomImputer.

The RandomImputer imputes missing data using a random draw with replacement
from the observed data. Dataframe imputers utilize this class when its
strategy is requested. Use SingleImputer or MultipleImputer with
strategy = `random` to broadcast the strategy across all the columns in a
dataframe, or specify this strategy for a given column.
"""

import numpy as np
from sklearn.utils.validation import check_is_fitted
from autoimpute.imputations import method_names
from .base import ISeriesImputer
methods = method_names
# pylint:disable=attribute-defined-outside-init
# pylint:disable=unnecessary-pass

[docs]class RandomImputer(ISeriesImputer): """Impute missing data using random draws from observed data. The RandomImputer samples with replacement from observed data. The imputer can be used directly, but such behavior is discouraged. RandomImputer does not have the flexibility / robustness of dataframe imputers, nor is its behavior identical. Preferred use is MultipleImputer(strategy="random"). """ # class variables strategy = methods.RANDOM
[docs] def __init__(self): """Create an instance of the RandomImputer class.""" pass
[docs] def fit(self, X, y=None): """Fit the Imputer to the dataset and get unique observed to sample. Args: X (pd.Series): Dataset to fit the imputer. y (None): ignored, None to meet requirements of base class Returns: self. Instance of the class. """ # determine set of observed values to sample from random = list(set(X[~X.isnull()])) self.statistics_ = {"param": random, "strategy": self.strategy} return self
[docs] def impute(self, X): """Perform imputations using the statistics generated from fit. The transform method handles the actual imputation. Each missing value in a given dataset is replaced with a random draw from unique set of observed values determined during the fit stage. Args: X (pd.Series): Dataset to impute missing data from fit. Returns: np.array -- imputed dataset """ # check if fitted and identify location of missingness check_is_fitted(self, "statistics_") ind = X[X.isnull()].index # get the observed values and sample from them param = self.statistics_["param"] imp = np.random.choice(param, len(ind)) return imp
[docs] def fit_impute(self, X, y=None): """Convenience method to perform fit and imputation in one go.""" return self.fit(X, y).impute(X)