Source code for table_enforcer.main_classes

# -*- coding: utf-8 -*-
"""Main module."""
import typing as t

from collections import OrderedDict

import pandas as pd

from munch import Munch
# import table_enforcer.errors as e
from table_enforcer import validate as v

__all__ = ["Enforcer", "Column"]

VALIDATOR_FUNCTION = t.Callable[[pd.Series], pd.DataFrame]
RECODER_FUNCTION = t.Callable[[pd.Series], pd.Series]


[docs]class Enforcer(object): """Class to define table definitions."""
[docs] def __init__(self, columns): """Initialize an enforcer instance.""" self._columns = OrderedDict() for column in columns: self._columns[column.name] = column self.columns = list(self._columns.keys())
[docs] def make_validations(self, table: pd.DataFrame) -> Munch: """Return a dict-like object containing dataframes of which tests passed/failed for each column.""" results = Munch() for name, column in self._columns.items(): results[name] = column.validate(table) return results
[docs] def validate(self, table: pd.DataFrame, recode: bool = False) -> bool: """Return True if all validation tests pass: False otherwise.""" if recode: table = self.recode(table) validations = self.make_validations(table=table) results = [df.all().all() for df in validations.values()] return all(results)
[docs] def recode(self, table: pd.DataFrame) -> pd.DataFrame: """Return a fully recoded dataframe.""" df = table[self.columns].copy() for name, column in self._columns.items(): df[name] = column.recode(table) return df
[docs]class Column(object): """Class representing a single table column."""
[docs] def __init__(self, name: str, dtype: type, unique: bool, validators: t.List[VALIDATOR_FUNCTION], recoders: t.List[RECODER_FUNCTION]) -> None: """Construct a new `Column` object.""" if validators is None: validators = [] if recoders is None: recoders = [] self.name = name self.dtype = dtype self.unique = unique self.validators = self._dict_of_funcs(validators) self.recoders = self._dict_of_funcs(recoders)
[docs] def _dict_of_funcs(self, funcs: list) -> pd.Series: """Return a pd.Series of functions with index derived from the function name.""" return {func.__name__: func for func in funcs}
[docs] def _validate_series_dtype(self, series: pd.Series) -> pd.Series: """Validate that the series data is the correct dtype.""" return series.apply(lambda i: isinstance(i, self.dtype))
[docs] def validate(self, table: pd.DataFrame, recode: bool = False) -> pd.DataFrame: """Return a dataframe of validation results for the correct column in table vs the vector of validators.""" col = self.name validators = self.validators if recode: series = self.recode(table) else: series = table[col] results = pd.DataFrame({validator: series for validator in validators}) for name, func in validators.items(): results[name] = func(results[name]) results['dtype'] = self._validate_series_dtype(series) if self.unique: results['unique'] = v.funcs.unique(series) return results
[docs] def recode(self, table: pd.DataFrame) -> pd.Series: """Pass the appropriate column data in `table` through each recoder function in series and return the final result.""" col = self.name series = table[col] data = series.copy() for recoder in self.recoders.values(): data = recoder(data) return data