Module libratools.lbt_impute

The libratools.lbt_impute module includes methods for detecting and imputing missing values.

Expand source code
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
The libratools.lbt_impute module includes methods for detecting and imputing
missing values.
"""


from . import lbt_utils    # local imports


__author__ = "Vincent (Vince) J. Straub"
__email__ = "vincejstraub@gmail.com"
__status__ = "Testing"


def fill_missing(df, remove_first_last=True, interpolate=['time'],
                 ffill=['timeString']):
    """
    Returns DataFrame with NaN values at very end and beginning dropped
    and missing values for columns with constant values interpolated
    using forward fill or linear interpolation.

    Args:
        df (pandas DataFrame): DataFrame to update.
        remove_first_last (bool, default=True): if remove_first_last=True,
            values at the very beginning and end of the DataFrame are dropped.
        interpolate (list, default=['time']): columns which to interpolate
            using linear interpolation.
        ffill (list, default=['timeString']): columns which to interpolate
            using forward fill.

    Returns:
        df (pandas Dataframe).
    """
    # replace NaN values in columns with constant values
    df = df.fillna(value=lbt_utils.get_modal_col_vals(df))

    # interpolate NaN values in time column
    df = interpolate_nan_vals(df, cols=interpolate)

    # forward fill NaN values in timeString col
    df = ffill_col_vals(df, cols=ffill)

    # drop missing values right at the beginning and the end
    if remove_first_last is True:
        df = remove_first_last_nans(df)

    return df


def interpolate_nan_vals(df, cols=['time']):
    """
    Fills NaN values in select DataFrame columns via linear interpolation.
    """
    for col in cols:
        df[col] = df[col].interpolate()

    return df


def ffill_col_vals(df, cols=['timeString']):
    """
    Fills NaN values in select DataFrame columns via forward fill.
    """
    for col in cols:
        df[col] = df[col].ffill()

    return df


def remove_first_last_nans(df, cols=['x', 'y']):
    """
    Removes values at the beginning and end of a DataFrame by using
    first and last valid index for select columns as new index.
    """
    first_idx = df[cols].first_valid_index()
    last_idx = df[cols].last_valid_index()
    df = df.loc[first_idx:last_idx]

    return df


Functions

def ffill_col_vals(df, cols=['timeString'])

Fills NaN values in select DataFrame columns via forward fill.

Expand source code
def ffill_col_vals(df, cols=['timeString']):
    """
    Fills NaN values in select DataFrame columns via forward fill.
    """
    for col in cols:
        df[col] = df[col].ffill()

    return df
def fill_missing(df, remove_first_last=True, interpolate=['time'], ffill=['timeString'])

Returns DataFrame with NaN values at very end and beginning dropped and missing values for columns with constant values interpolated using forward fill or linear interpolation.

Args

df : pandas DataFrame
DataFrame to update.
remove_first_last : bool, default=True
if remove_first_last=True, values at the very beginning and end of the DataFrame are dropped.

interpolate (list, default=['time']): columns which to interpolate using linear interpolation. ffill (list, default=['timeString']): columns which to interpolate using forward fill.

Returns

df (pandas Dataframe).

Expand source code
def fill_missing(df, remove_first_last=True, interpolate=['time'],
                 ffill=['timeString']):
    """
    Returns DataFrame with NaN values at very end and beginning dropped
    and missing values for columns with constant values interpolated
    using forward fill or linear interpolation.

    Args:
        df (pandas DataFrame): DataFrame to update.
        remove_first_last (bool, default=True): if remove_first_last=True,
            values at the very beginning and end of the DataFrame are dropped.
        interpolate (list, default=['time']): columns which to interpolate
            using linear interpolation.
        ffill (list, default=['timeString']): columns which to interpolate
            using forward fill.

    Returns:
        df (pandas Dataframe).
    """
    # replace NaN values in columns with constant values
    df = df.fillna(value=lbt_utils.get_modal_col_vals(df))

    # interpolate NaN values in time column
    df = interpolate_nan_vals(df, cols=interpolate)

    # forward fill NaN values in timeString col
    df = ffill_col_vals(df, cols=ffill)

    # drop missing values right at the beginning and the end
    if remove_first_last is True:
        df = remove_first_last_nans(df)

    return df
def interpolate_nan_vals(df, cols=['time'])

Fills NaN values in select DataFrame columns via linear interpolation.

Expand source code
def interpolate_nan_vals(df, cols=['time']):
    """
    Fills NaN values in select DataFrame columns via linear interpolation.
    """
    for col in cols:
        df[col] = df[col].interpolate()

    return df
def remove_first_last_nans(df, cols=['x', 'y'])

Removes values at the beginning and end of a DataFrame by using first and last valid index for select columns as new index.

Expand source code
def remove_first_last_nans(df, cols=['x', 'y']):
    """
    Removes values at the beginning and end of a DataFrame by using
    first and last valid index for select columns as new index.
    """
    first_idx = df[cols].first_valid_index()
    last_idx = df[cols].last_valid_index()
    df = df.loc[first_idx:last_idx]

    return df