import numpy as np
import operator
from gtable.lib import fillna_column
from gtable.fast import apply_fast_add, apply_fast_mul, apply_fast_truediv, \
apply_fast_sub, apply_fast_floordiv, apply_fast_and, apply_fast_or, \
apply_fast_xor, apply_fast_pow, apply_fast_mod, apply_fast_ge, \
apply_fast_gt, apply_fast_le, apply_fast_lt, apply_fast_eq, apply_fast_ne, \
apply_mask_column, reindex_column
[docs]class Column:
"""
Indexed column view of the table
"""
def __init__(self, values, index):
self.values = values
self.index = index
def __repr__(self):
return "<Column[ {} ] object at {}>".format(self.values.dtype,
hex(id(self)))
def __add__(self, y):
return apply_add(self, y)
def __radd__(self, y):
return apply_add(self, y)
def __sub__(self, y):
return apply_truediv(self, y)
def __rsub__(self, y):
return apply_truediv(self, y)
def __mul__(self, y):
return apply_mul(self, y)
def __rmul__(self, y):
return apply_mul(self, y)
def __truediv__(self, y):
return apply_truediv(self, y)
def __rtruediv__(self, y):
return apply_truediv(self, y)
def __floordiv__(self, y):
return apply_floordiv(self, y)
def __rfloordiv__(self, y):
return apply_floordiv(self, y)
def __pow__(self, y):
return apply_pow(self, y)
def __mod__(self, y):
return apply_mod(self, y)
def __lt__(self, y):
return apply_lt(self, y)
def __le__(self, y):
return apply_le(self, y)
def __gt__(self, y):
return apply_gt(self, y)
def __ge__(self, y):
return apply_ge(self, y)
def __eq__(self, y):
return apply_eq(self, y)
def __ne__(self, y):
return apply_ne(self, y)
def __and__(self, y):
return apply_and(self, y)
def __or__(self, y):
return apply_or(self, y)
def __xor__(self, y):
return apply_xor(self, y)
def __neg__(self):
return Column(-self.values, self.index)
def __getitem__(self, i):
# TODO: This algorithm makes getitem O(N)
if self.index[i]:
return self.values[int(self.index[:i+1].sum()) - 1]
else:
return None
def __len__(self):
return len(self.index)
[docs] def copy(self):
"""Return a copy of the column"""
return Column(self.values[:], self.index[:])
[docs] def astype(self, dtype):
"""Changes the (numpy) datatype of the values"""
self.values[:] = self.values.astype(dtype)
@property
def dtype(self):
"""Returns the datatype of the column"""
return self.values.dtype
[docs] def fillna(self, reverse=False, fillvalue=None):
"""
Fills the non available value sequentially with the previous
available position. Operates inplace.
"""
self.values, self.index = fillna_column(self.values, self.index,
reverse, fillvalue)
[docs] def fill(self, fillvalue):
"""
Fills the N/A values of the column with the fillvalue
:param fillvalue:
:return:
"""
new_values = np.empty(len(self.index), dtype=self.values.dtype)
bool_index = self.index.astype(np.bool_)
if np.any(bool_index):
new_values[bool_index] = self.values
new_values[~bool_index] = fillvalue
self.values = new_values
self.index = np.ones(len(self.index))
[docs] def reorder(self, order):
"""
Reorder the column inplace
:param order:
:return:
"""
self.values[:] = self.values[(np.cumsum(self.index) - np.array(1)
)[order[self.index.astype(np.bool)]]]
self.index[:] = self.index[order]
[docs] def mask(self, mask):
"""
Apply mask on data to the data and the index out of place
:param mask:
:return:
"""
return Column(*apply_mask_column(self.values, self.index, mask))
[docs] def reindex(self, index):
"""
Reindex according to a global index
:param index:
:return:
"""
return Column(*reindex_column(self.values, self.index, index))
[docs] def date_range(self, fr='1970-01-01', to='2262-01-01', include_fr=True,
include_to=True):
"""
Filter a column by date range.
:param fr:
:param to:
:param include_fr:
:param include_to:
:return:
"""
if isinstance(self.values.dtype, np.datetime64):
print(self.values.dtype)
raise ValueError("Method valid only with dates")
if type(fr) == str:
fr = np.datetime64(fr)
if type(to) == str:
to = np.datetime64(to)
if include_fr:
before = self.values >= fr
else:
before = self.values > fr
if include_to:
after = self.values <= to
else:
after = self.values < to
return Column(before & after, self.index)
[docs] def is_empty(self):
"""
True if the column is empty
:return:
"""
return self.values.shape == (0,)
[docs] def is_sorted(self):
"""
True if the column is sorted
:return:
"""
return np.sort(self.values) == self.values
[docs] def contains(self, item):
"""
Returns a column with the value of the column present in item.
:param item:
:return:
"""
if isinstance(item, np.ndarray):
return Column(np.in1d(self.values, item), self.index)
elif type(item) == Column:
return Column(np.in1d(self.values, item.values), self.index)
else:
raise ValueError('Argument must be an array or a column.')
def apply_add(left: Column, right):
if type(right) == Column:
result, index = apply_fast_add(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.add(left.values, right), left.index)
def apply_sub(left: Column, right):
if type(right) == Column:
result, index = apply_fast_sub(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.sub(left.values, right), left.index)
def apply_mul(left: Column, right):
if type(right) == Column:
result, index = apply_fast_mul(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.mul(left.values, right), left.index)
def apply_truediv(left: Column, right):
if type(right) == Column:
result, index = apply_fast_truediv(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.truediv(left.values, right), left.index)
def apply_floordiv(left: Column, right):
if type(right) == Column:
result, index = apply_fast_floordiv(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.floordiv(left.values, right), left.index)
def apply_pow(left: Column, right):
if type(right) == Column:
result, index = apply_fast_pow(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.pow(left.values, right), left.index)
def apply_mod(left: Column, right):
if type(right) == Column:
result, index = apply_fast_mod(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.mod(left.values, right), left.index)
def apply_gt(left: Column, right):
if type(right) == Column:
result, index = apply_fast_gt(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.gt(left.values, right), left.index)
def apply_ge(left: Column, right):
if type(right) == Column:
result, index = apply_fast_ge(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.ge(left.values, right), left.index)
def apply_lt(left: Column, right):
if type(right) == Column:
result, index = apply_fast_lt(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.lt(left.values, right), left.index)
def apply_le(left: Column, right):
if type(right) == Column:
result, index = apply_fast_le(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.le(left.values, right), left.index)
def apply_and(left: Column, right):
if type(right) == Column:
result, index = apply_fast_and(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.and_(left.values.astype(np.bool), right),
left.index)
def apply_or(left: Column, right):
if type(right) == Column:
result, index = apply_fast_or(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.or_(left.values.astype(np.bool), right),
left.index)
def apply_xor(left: Column, right):
if type(right) == Column:
result, index = apply_fast_xor(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.xor(left.values.astype(np.bool), right),
left.index)
def apply_eq(left: Column, right):
if type(right) == Column:
result, index = apply_fast_eq(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.eq(left.values, right), left.index)
def apply_ne(left: Column, right):
if type(right) == Column:
result, index = apply_fast_ne(left.values, right.values,
left.index, right.index)
return Column(result, index)
else:
return Column(operator.ne(left.values, right), left.index)