"""Deletion strategies to handle the missing data in pandas DataFrame."""
from autoimpute.utils import check_nan_columns
[docs]@check_nan_columns
def listwise_delete(data, inplace=False, verbose=False):
"""Delete all rows from a DataFrame where any missing values exist.
Deletion is one way to handle missing values. This method removes any
records that have a missing value in any of the features. This package
focuses on imputation, not deletion. That being said, listwise deletion
is a necessary component of any imputation package, as its the default
method most people (and software) use to handle missing data.
Args:
data (pd.DataFrame): DataFrame used to delete missing rows.
inplace (boolean, optional): perform operation inplace.
Defaults to False.
verbose (boolean, optional): print information to console.
Defaults to False.
Returns:
pd.DataFrame: rows with missing values removed.
Raises:
ValueError: columns with all data missing. Raised through decorator.
"""
num_records_before = len(data.index)
if inplace:
data.dropna(inplace=True)
else:
data = data.dropna(inplace=False)
num_records_after = len(data.index)
if verbose:
print(f"Number of records before delete: {num_records_before}")
print(f"Number of records after delete: {num_records_after}")
return data