{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.Operations.Aggregation where

import qualified Data.Set as S

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Map.Strict as MS
import qualified Data.Text as T
import qualified Data.Vector.Generic as VG
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as VM
import qualified Data.Vector.Unboxed as VU
import qualified Statistics.Quantile as SS
import qualified Statistics.Sample as SS

import Control.Exception (throw)
import Control.Monad (foldM_)
import Control.Monad.ST (runST)
import DataFrame.Internal.Column (Column(..), toColumn', getIndicesUnboxed, getIndices)
import DataFrame.Internal.DataFrame (DataFrame(..), empty, getColumn)
import DataFrame.Internal.Parsing
import DataFrame.Internal.Types
import DataFrame.Errors
import DataFrame.Operations.Core
import DataFrame.Operations.Subset
import Data.Function ((&))
import Data.Hashable
import Data.Maybe
import Data.Type.Equality (type (:~:)(Refl), TestEquality(..))
import Type.Reflection (typeRep, typeOf)

-- | O(k * n) groups the dataframe by the given rows aggregating the remaining rows
-- into vector that should be reduced later.
groupBy ::
  [T.Text] ->
  DataFrame ->
  DataFrame
groupBy :: [Text] -> DataFrame -> DataFrame
groupBy [Text]
names DataFrame
df
  | (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` DataFrame -> [Text]
columnNames DataFrame
df) [Text]
names = DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException ([Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ [Text] -> [Char]
forall a. Show a => a -> [Char]
show ([Text] -> [Char]) -> [Text] -> [Char]
forall a b. (a -> b) -> a -> b
$ [Text]
names [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
L.\\ DataFrame -> [Text]
columnNames DataFrame
df) Text
"groupBy" (DataFrame -> [Text]
columnNames DataFrame
df)
  | Bool
otherwise = (DataFrame -> Text -> DataFrame)
-> DataFrame -> [Text] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' DataFrame -> Text -> DataFrame
insertColumns DataFrame
initDf [Text]
groupingColumns
  where
    insertOrAdjust :: k -> a -> Map k [a] -> Map k [a]
insertOrAdjust k
k a
v Map k [a]
m = if k -> Map k [a] -> Bool
forall k a. Ord k => k -> Map k a -> Bool
MS.notMember k
k Map k [a]
m then k -> [a] -> Map k [a] -> Map k [a]
forall k a. Ord k => k -> a -> Map k a -> Map k a
MS.insert k
k [a
v] Map k [a]
m else ([a] -> [a]) -> k -> Map k [a] -> Map k [a]
forall k a. Ord k => (a -> a) -> k -> Map k a -> Map k a
MS.adjust (a -> [a] -> [a]
forall a. Ord a => a -> [a] -> [a]
appendWithFrontMin a
v) k
k Map k [a]
m
    -- Create a string representation of each row.
    values :: Vector Int
values = Int -> (Int -> Int) -> Vector Int
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)) (DataFrame -> Set Text -> Int -> Int
mkRowRep DataFrame
df ([Text] -> Set Text
forall a. Ord a => [a] -> Set a
S.fromList [Text]
names))
    -- Create a mapping from the row representation to the list of indices that
    -- have that row representation. This will allow us sortedIndexesto combine the indexes
    -- where the rows are the same.
    valueIndices :: Map Int [Int]
valueIndices = (Map Int [Int] -> Int -> Int -> Map Int [Int])
-> Map Int [Int] -> Vector Int -> Map Int [Int]
forall a b. (a -> Int -> b -> a) -> a -> Vector b -> a
V.ifoldl' (\Map Int [Int]
m Int
index Int
rowRep -> Int -> Int -> Map Int [Int] -> Map Int [Int]
forall {k} {a}. (Ord k, Ord a) => k -> a -> Map k [a] -> Map k [a]
insertOrAdjust Int
rowRep Int
index Map Int [Int]
m) Map Int [Int]
forall k a. Map k a
M.empty Vector Int
values
    -- Since the min is at the head this allows us to get the min in constant time and sort by it
    -- That way we can recover the original order of the rows.
    -- valueIndicesInitOrder = L.sortBy (compare `on` snd) $! MS.toList $ MS.map VU.head valueIndices
    valueIndicesInitOrder :: Vector (Vector Int)
valueIndicesInitOrder = (forall s. ST s (Vector (Vector Int))) -> Vector (Vector Int)
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector (Vector Int))) -> Vector (Vector Int))
-> (forall s. ST s (Vector (Vector Int))) -> Vector (Vector Int)
forall a b. (a -> b) -> a -> b
$ do
      MVector s (Vector Int)
v <- Int -> ST s (MVector (PrimState (ST s)) (Vector Int))
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.new (Map Int [Int] -> Int
forall k a. Map k a -> Int
MS.size Map Int [Int]
valueIndices)
      (Int -> [Int] -> ST s Int) -> Int -> Map Int [Int] -> ST s ()
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m ()
foldM_ (\Int
i [Int]
idxs -> MVector (PrimState (ST s)) (Vector Int)
-> Int -> Vector Int -> ST s ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector s (Vector Int)
MVector (PrimState (ST s)) (Vector Int)
v Int
i ([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList [Int]
idxs) ST s () -> ST s Int -> ST s Int
forall a b. ST s a -> ST s b -> ST s b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> ST s Int
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)) Int
0 Map Int [Int]
valueIndices
      MVector (PrimState (ST s)) (Vector Int)
-> ST s (Vector (Vector Int))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.unsafeFreeze MVector s (Vector Int)
MVector (PrimState (ST s)) (Vector Int)
v

    -- These are the indexes of the grouping/key rows i.e the minimum elements
    -- of the list.
    keyIndices :: Vector Int
keyIndices = Int -> (Int -> Int) -> Vector Int
forall a. Unbox a => Int -> (Int -> a) -> Vector a
VU.generate (Vector (Vector Int) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length Vector (Vector Int)
valueIndicesInitOrder) (\Int
i -> Vector Int -> Int
forall (v :: * -> *) a. Vector v a => v a -> a
VG.head (Vector Int -> Int) -> Vector Int -> Int
forall a b. (a -> b) -> a -> b
$ Vector (Vector Int)
valueIndicesInitOrder Vector (Vector Int) -> Int -> Vector Int
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
i)
    -- this will be our main worker function in the fold that takes all
    -- indices and replaces each value in a column with a list of
    -- the elements with the indices where the grouped row
    -- values are the same.
    insertColumns :: DataFrame -> Text -> DataFrame
insertColumns = Vector (Vector Int) -> DataFrame -> DataFrame -> Text -> DataFrame
groupColumns Vector (Vector Int)
valueIndicesInitOrder DataFrame
df
    -- Out initial DF will just be all the grouped rows added to an
    -- empty dataframe. The entries are dedued and are in their
    -- initial order.
    initDf :: DataFrame
initDf = (DataFrame -> Text -> DataFrame)
-> DataFrame -> [Text] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (Vector Int -> DataFrame -> DataFrame -> Text -> DataFrame
mkGroupedColumns Vector Int
keyIndices DataFrame
df) DataFrame
empty [Text]
names
    -- All the rest of the columns that we are grouping by.
    groupingColumns :: [Text]
groupingColumns = DataFrame -> [Text]
columnNames DataFrame
df [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
L.\\ [Text]
names

mkRowRep :: DataFrame -> S.Set T.Text -> Int -> Int
mkRowRep :: DataFrame -> Set Text -> Int -> Int
mkRowRep DataFrame
df Set Text
names Int
i = [Double] -> Int
forall a. Hashable a => a -> Int
hash ([Double] -> Int) -> [Double] -> Int
forall a b. (a -> b) -> a -> b
$ ([Double] -> Int -> Maybe Column -> [Double])
-> [Double] -> Vector (Maybe Column) -> [Double]
forall a b. (a -> Int -> b -> a) -> a -> Vector b -> a
V.ifoldl' [Double] -> Int -> Maybe Column -> [Double]
go [] (DataFrame -> Vector (Maybe Column)
columns DataFrame
df)
  where
    indexMap :: Map Int Text
indexMap = [(Int, Text)] -> Map Int Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList (((Text, Int) -> (Int, Text)) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
a, Int
b) -> (Int
b, Text
a)) ([(Text, Int)] -> [(Int, Text)]) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df))
    go :: [Double] -> Int -> Maybe Column -> [Double]
go [Double]
acc Int
k Maybe Column
Nothing = [Double]
acc
    go [Double]
acc Int
k (Just (BoxedColumn (Vector a
c :: V.Vector a))) =
      if Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
S.notMember (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k) Set Text
names
        then [Double]
acc
        else case Vector a
c Vector a -> Int -> Maybe a
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
          Just a
e -> forall a. Columnable a => a -> Double
hash' @a a
e Double -> [Double] -> [Double]
forall a. a -> [a] -> [a]
: [Double]
acc
          Maybe a
Nothing ->
            [Char] -> [Double]
forall a. HasCallStack => [Char] -> a
error ([Char] -> [Double]) -> [Char] -> [Double]
forall a b. (a -> b) -> a -> b
$
              [Char]
"Column "
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" has less items than "
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"the other columns at index "
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i
    go [Double]
acc Int
k (Just (OptionalColumn (Vector (Maybe a)
c :: V.Vector (Maybe a)))) =
      if Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
S.notMember (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k) Set Text
names
        then [Double]
acc
        else case Vector (Maybe a)
c Vector (Maybe a) -> Int -> Maybe (Maybe a)
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
          Just Maybe a
e -> forall a. Columnable a => a -> Double
hash' @(Maybe a) Maybe a
e Double -> [Double] -> [Double]
forall a. a -> [a] -> [a]
: [Double]
acc
          Maybe (Maybe a)
Nothing ->
            [Char] -> [Double]
forall a. HasCallStack => [Char] -> a
error ([Char] -> [Double]) -> [Char] -> [Double]
forall a b. (a -> b) -> a -> b
$
              [Char]
"Column "
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" has less items than "
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"the other columns at index "
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i
    go [Double]
acc Int
k (Just (UnboxedColumn (Vector a
c :: VU.Vector a))) =
      if Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
S.notMember (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k) Set Text
names
        then [Double]
acc
        else case Vector a
c Vector a -> Int -> Maybe a
forall a. Unbox a => Vector a -> Int -> Maybe a
VU.!? Int
i of
          Just a
e -> forall a. Columnable a => a -> Double
hash' @a a
e Double -> [Double] -> [Double]
forall a. a -> [a] -> [a]
: [Double]
acc
          Maybe a
Nothing ->
            [Char] -> [Double]
forall a. HasCallStack => [Char] -> a
error ([Char] -> [Double]) -> [Char] -> [Double]
forall a b. (a -> b) -> a -> b
$
              [Char]
"Column "
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" has less items than "
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"the other columns at index "
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i

-- | This hash function returns the hash when given a non numeric type but
-- the value when given a numeric.
hash' :: Columnable a => a -> Double
hash' :: forall a. Columnable a => a -> Double
hash' a
value = case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
value) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
  Just a :~: Double
Refl -> a
Double
value
  Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
value) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
    Just a :~: Int
Refl -> a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
value
    Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
value) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
      Just a :~: Text
Refl -> Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ a -> Int
forall a. Hashable a => a -> Int
hash a
value
      Maybe (a :~: Text)
Nothing -> Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ [Char] -> Int
forall a. Hashable a => a -> Int
hash (a -> [Char]
forall a. Show a => a -> [Char]
show a
value)

mkGroupedColumns :: VU.Vector Int -> DataFrame -> DataFrame -> T.Text -> DataFrame
mkGroupedColumns :: Vector Int -> DataFrame -> DataFrame -> Text -> DataFrame
mkGroupedColumns Vector Int
indices DataFrame
df DataFrame
acc Text
name =
  case Vector (Maybe Column) -> Int -> Maybe Column
forall a. Vector a -> Int -> a
(V.!) (DataFrame -> Vector (Maybe Column)
columns DataFrame
df) (DataFrame -> Map Text Int
columnIndices DataFrame
df Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
M.! Text
name) of
    Maybe Column
Nothing -> [Char] -> DataFrame
forall a. HasCallStack => [Char] -> a
error [Char]
"Unexpected"
    (Just (BoxedColumn Vector a
column)) ->
      let vs :: Vector a
vs = Vector Int
indices Vector Int -> Vector a -> Vector a
forall a. Vector Int -> Vector a -> Vector a
`getIndices` Vector a
column
       in Text -> Vector a -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn Text
name Vector a
vs DataFrame
acc
    (Just (OptionalColumn Vector (Maybe a)
column)) ->
      let vs :: Vector (Maybe a)
vs = Vector Int
indices Vector Int -> Vector (Maybe a) -> Vector (Maybe a)
forall a. Vector Int -> Vector a -> Vector a
`getIndices` Vector (Maybe a)
column
       in Text -> Vector (Maybe a) -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn Text
name Vector (Maybe a)
vs DataFrame
acc
    (Just (UnboxedColumn Vector a
column)) ->
      let vs :: Vector a
vs = Vector Int
indices Vector Int -> Vector a -> Vector a
forall a. Unbox a => Vector Int -> Vector a -> Vector a
`getIndicesUnboxed` Vector a
column
       in Text -> Vector a -> DataFrame -> DataFrame
forall a.
(Columnable a, Unbox a) =>
Text -> Vector a -> DataFrame -> DataFrame
insertUnboxedColumn Text
name Vector a
vs DataFrame
acc

groupColumns :: V.Vector (VU.Vector Int) -> DataFrame -> DataFrame -> T.Text -> DataFrame
groupColumns :: Vector (Vector Int) -> DataFrame -> DataFrame -> Text -> DataFrame
groupColumns Vector (Vector Int)
indices DataFrame
df DataFrame
acc Text
name =
  case Vector (Maybe Column) -> Int -> Maybe Column
forall a. Vector a -> Int -> a
(V.!) (DataFrame -> Vector (Maybe Column)
columns DataFrame
df) (DataFrame -> Map Text Int
columnIndices DataFrame
df Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
M.! Text
name) of
    Maybe Column
Nothing -> DataFrame
df
    (Just (BoxedColumn Vector a
column)) ->
      let vs :: Vector (Vector a)
vs = (Vector Int -> Vector a)
-> Vector (Vector Int) -> Vector (Vector a)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Vector Int -> Vector a -> Vector a
forall a. Vector Int -> Vector a -> Vector a
`getIndices` Vector a
column) Vector (Vector Int)
indices
       in Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector (Vector a) -> Column
forall a. Columnable a => Vector (Vector a) -> Column
GroupedBoxedColumn Vector (Vector a)
vs) DataFrame
acc
    (Just (OptionalColumn Vector (Maybe a)
column)) ->
      let vs :: Vector (Vector (Maybe a))
vs = (Vector Int -> Vector (Maybe a))
-> Vector (Vector Int) -> Vector (Vector (Maybe a))
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Vector Int -> Vector (Maybe a) -> Vector (Maybe a)
forall a. Vector Int -> Vector a -> Vector a
`getIndices` Vector (Maybe a)
column) Vector (Vector Int)
indices
       in Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector (Vector (Maybe a)) -> Column
forall a. Columnable a => Vector (Vector a) -> Column
GroupedBoxedColumn Vector (Vector (Maybe a))
vs) DataFrame
acc
    (Just (UnboxedColumn Vector a
column)) ->
      let vs :: Vector (Vector a)
vs = (Vector Int -> Vector a)
-> Vector (Vector Int) -> Vector (Vector a)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Vector Int -> Vector a -> Vector a
forall a. Unbox a => Vector Int -> Vector a -> Vector a
`getIndicesUnboxed` Vector a
column) Vector (Vector Int)
indices
       in Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector (Vector a) -> Column
forall a. (Columnable a, Unbox a) => Vector (Vector a) -> Column
GroupedUnboxedColumn Vector (Vector a)
vs) DataFrame
acc

data Aggregation = Count
                 | Mean
                 | Minimum
                 | Median
                 | Maximum
                 | Sum deriving (Int -> Aggregation -> [Char] -> [Char]
[Aggregation] -> [Char] -> [Char]
Aggregation -> [Char]
(Int -> Aggregation -> [Char] -> [Char])
-> (Aggregation -> [Char])
-> ([Aggregation] -> [Char] -> [Char])
-> Show Aggregation
forall a.
(Int -> a -> [Char] -> [Char])
-> (a -> [Char]) -> ([a] -> [Char] -> [Char]) -> Show a
$cshowsPrec :: Int -> Aggregation -> [Char] -> [Char]
showsPrec :: Int -> Aggregation -> [Char] -> [Char]
$cshow :: Aggregation -> [Char]
show :: Aggregation -> [Char]
$cshowList :: [Aggregation] -> [Char] -> [Char]
showList :: [Aggregation] -> [Char] -> [Char]
Show, Aggregation -> Aggregation -> Bool
(Aggregation -> Aggregation -> Bool)
-> (Aggregation -> Aggregation -> Bool) -> Eq Aggregation
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Aggregation -> Aggregation -> Bool
== :: Aggregation -> Aggregation -> Bool
$c/= :: Aggregation -> Aggregation -> Bool
/= :: Aggregation -> Aggregation -> Bool
Eq)

groupByAgg :: Aggregation -> [T.Text] -> DataFrame -> DataFrame
groupByAgg :: Aggregation -> [Text] -> DataFrame -> DataFrame
groupByAgg Aggregation
agg [Text]
columnNames DataFrame
df = let
  in case Aggregation
agg of
    Aggregation
Count -> forall a.
Columnable a =>
a -> Text -> Vector a -> DataFrame -> DataFrame
insertColumnWithDefault @Int Int
1 ([Char] -> Text
T.pack (Aggregation -> [Char]
forall a. Show a => a -> [Char]
show Aggregation
agg)) Vector Int
forall a. Vector a
V.empty DataFrame
df
           DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& [Text] -> DataFrame -> DataFrame
groupBy [Text]
columnNames
           DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& forall a b.
(Columnable a, Columnable b) =>
(forall (v :: * -> *). Vector v a => v a -> b)
-> Text -> DataFrame -> DataFrame
reduceBy @Int v Int -> Int
forall (v :: * -> *). Vector v Int => v Int -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length Text
"Count"
    Aggregation
_ -> [Char] -> DataFrame
forall a. HasCallStack => [Char] -> a
error [Char]
"UNIMPLEMENTED"

-- O (k * n) Reduces a vector valued volumn with a given function.
reduceBy ::
  forall a b . (Columnable a, Columnable b) =>
  (forall v . (VG.Vector v a) => v a -> b) ->
  T.Text ->
  DataFrame ->
  DataFrame
reduceBy :: forall a b.
(Columnable a, Columnable b) =>
(forall (v :: * -> *). Vector v a => v a -> b)
-> Text -> DataFrame -> DataFrame
reduceBy forall (v :: * -> *). Vector v a => v a -> b
f Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Just ((GroupedBoxedColumn (Vector (Vector a)
column :: V.Vector (V.Vector a')))) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') of
      Just a :~: a
Refl -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector b -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> b) -> Vector (Vector a) -> Vector b
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector a -> b
forall (v :: * -> *). Vector v a => v a -> b
f Vector (Vector a)
Vector (Vector a)
column)) DataFrame
df
      Maybe (a :~: a)
Nothing -> [Char] -> DataFrame
forall a. HasCallStack => [Char] -> a
error [Char]
"Type error"
    Just ((GroupedUnboxedColumn (Vector (Vector a)
column :: V.Vector (VU.Vector a')))) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') of
      Just a :~: a
Refl -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector b -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> b) -> Vector (Vector a) -> Vector b
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector a -> b
forall (v :: * -> *). Vector v a => v a -> b
f Vector (Vector a)
Vector (Vector a)
column)) DataFrame
df
      Maybe (a :~: a)
Nothing -> [Char] -> DataFrame
forall a. HasCallStack => [Char] -> a
error [Char]
"Type error"
    Maybe Column
_ -> [Char] -> DataFrame
forall a. HasCallStack => [Char] -> a
error [Char]
"Column is ungrouped"

reduceByAgg :: Aggregation
            -> T.Text
            -> DataFrame
            -> DataFrame
reduceByAgg :: Aggregation -> Text -> DataFrame -> DataFrame
reduceByAgg Aggregation
agg Text
name DataFrame
df = case Aggregation
agg of
  Aggregation
Count   -> case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Just ((GroupedBoxedColumn (Vector (Vector a)
column :: V.Vector (V.Vector a')))) ->  Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector Int -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> Int) -> Vector (Vector a) -> Vector Int
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector a -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length Vector (Vector a)
column)) DataFrame
df
    Just ((GroupedUnboxedColumn (Vector (Vector a)
column :: V.Vector (VU.Vector a')))) ->  Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector Int -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> Int) -> Vector (Vector a) -> Vector Int
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector a -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length Vector (Vector a)
column)) DataFrame
df
    Maybe Column
_ -> [Char] -> DataFrame
forall a. HasCallStack => [Char] -> a
error ([Char] -> DataFrame) -> [Char] -> DataFrame
forall a b. (a -> b) -> a -> b
$ [Char]
"Cannot count ungrouped Column: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
name 
  Aggregation
Mean    -> case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Just ((GroupedBoxedColumn (Vector (Vector a)
column :: V.Vector (V.Vector a')))) -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
      Just a :~: Int
Refl -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector Double -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> Double) -> Vector (Vector a) -> Vector Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
SS.mean (Vector Double -> Double)
-> (Vector a -> Vector Double) -> Vector a -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> Double) -> Vector a -> Vector Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral) Vector (Vector a)
column)) DataFrame
df
      Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
        Just a :~: Double
Refl -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector Double -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector Double -> Double)
-> Vector (Vector Double) -> Vector Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
SS.mean Vector (Vector a)
Vector (Vector Double)
column)) DataFrame
df
        Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Float) of
          Just a :~: Float
Refl -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector Double -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> Double) -> Vector (Vector a) -> Vector Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
SS.mean (Vector Double -> Double)
-> (Vector a -> Vector Double) -> Vector a -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> Double) -> Vector a -> Vector Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac) Vector (Vector a)
column)) DataFrame
df
          Maybe (a :~: Float)
Nothing -> [Char] -> DataFrame
forall a. HasCallStack => [Char] -> a
error ([Char] -> DataFrame) -> [Char] -> DataFrame
forall a b. (a -> b) -> a -> b
$ [Char]
"Cannot get mean of non-numeric column: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
name -- Not sure what to do with no numeric - return nothing???
    Just ((GroupedUnboxedColumn (Vector (Vector a)
column :: V.Vector (VU.Vector a')))) -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
      Just a :~: Int
Refl -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector Double -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> Double) -> Vector (Vector a) -> Vector Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
SS.mean (Vector Double -> Double)
-> (Vector a -> Vector Double) -> Vector a -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> Double) -> Vector a -> Vector Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral) Vector (Vector a)
column)) DataFrame
df
      Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
        Just a :~: Double
Refl -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector Double -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector Double -> Double)
-> Vector (Vector Double) -> Vector Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
SS.mean Vector (Vector a)
Vector (Vector Double)
column)) DataFrame
df
        Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Float) of
          Just a :~: Float
Refl -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector Double -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> Double) -> Vector (Vector a) -> Vector Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
SS.mean (Vector Double -> Double)
-> (Vector a -> Vector Double) -> Vector a -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> Double) -> Vector a -> Vector Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac) Vector (Vector a)
column)) DataFrame
df
          Maybe (a :~: Float)
Nothing -> [Char] -> DataFrame
forall a. HasCallStack => [Char] -> a
error ([Char] -> DataFrame) -> [Char] -> DataFrame
forall a b. (a -> b) -> a -> b
$ [Char]
"Cannot get mean of non-numeric column: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
name -- Not sure what to do with no numeric - return nothing???
  Aggregation
Minimum -> case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Just ((GroupedBoxedColumn (Vector (Vector a)
column :: V.Vector (V.Vector a')))) ->  Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector a -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> a) -> Vector (Vector a) -> Vector a
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector a -> a
forall (v :: * -> *) a. (Vector v a, Ord a) => v a -> a
VG.minimum Vector (Vector a)
column)) DataFrame
df
    Just ((GroupedUnboxedColumn (Vector (Vector a)
column :: V.Vector (VU.Vector a')))) ->  Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector a -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> a) -> Vector (Vector a) -> Vector a
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector a -> a
forall (v :: * -> *) a. (Vector v a, Ord a) => v a -> a
VG.minimum Vector (Vector a)
column)) DataFrame
df
  Aggregation
Maximum -> case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Just ((GroupedBoxedColumn (Vector (Vector a)
column :: V.Vector (V.Vector a')))) ->  Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector a -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> a) -> Vector (Vector a) -> Vector a
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector a -> a
forall (v :: * -> *) a. (Vector v a, Ord a) => v a -> a
VG.maximum Vector (Vector a)
column)) DataFrame
df
    Just ((GroupedUnboxedColumn (Vector (Vector a)
column :: V.Vector (VU.Vector a')))) ->  Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector a -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> a) -> Vector (Vector a) -> Vector a
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector a -> a
forall (v :: * -> *) a. (Vector v a, Ord a) => v a -> a
VG.maximum Vector (Vector a)
column)) DataFrame
df
  Aggregation
Sum -> case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Just ((GroupedBoxedColumn (Vector (Vector a)
column :: V.Vector (V.Vector a')))) -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
      Just a :~: Int
Refl -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector a -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> a) -> Vector (Vector a) -> Vector a
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum Vector (Vector a)
column)) DataFrame
df
      Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
        Just a :~: Double
Refl -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector a -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> a) -> Vector (Vector a) -> Vector a
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum Vector (Vector a)
column)) DataFrame
df
        Maybe (a :~: Double)
Nothing -> [Char] -> DataFrame
forall a. HasCallStack => [Char] -> a
error ([Char] -> DataFrame) -> [Char] -> DataFrame
forall a b. (a -> b) -> a -> b
$ [Char]
"Cannot get sum of non-numeric column: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
name -- Not sure what to do with no numeric - return nothing???
    Just ((GroupedUnboxedColumn (Vector (Vector a)
column :: V.Vector (VU.Vector a')))) -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
      Just a :~: Int
Refl -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector a -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> a) -> Vector (Vector a) -> Vector a
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum Vector (Vector a)
column)) DataFrame
df
      Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
        Just a :~: Double
Refl -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
name (Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector a -> Column
forall a. Columnify a => Vector a -> Column
toColumn' ((Vector a -> a) -> Vector (Vector a) -> Vector a
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum Vector (Vector a)
column)) DataFrame
df
        Maybe (a :~: Double)
Nothing -> [Char] -> DataFrame
forall a. HasCallStack => [Char] -> a
error ([Char] -> DataFrame) -> [Char] -> DataFrame
forall a b. (a -> b) -> a -> b
$ [Char]
"Cannot get sum of non-numeric column: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
name -- Not sure what to do with no numeric - return nothing???
  Aggregation
_ -> [Char] -> DataFrame
forall a. HasCallStack => [Char] -> a
error [Char]
"UNIMPLEMENTED"

aggregate :: [(T.Text, Aggregation)] -> DataFrame -> DataFrame
aggregate :: [(Text, Aggregation)] -> DataFrame -> DataFrame
aggregate [(Text, Aggregation)]
aggs DataFrame
df = let
    f :: (Text, Aggregation) -> DataFrame -> DataFrame
f (Text
name, Aggregation
agg) DataFrame
d = Text -> Text -> DataFrame -> DataFrame
cloneColumn Text
name Text
alias DataFrame
d DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Aggregation -> Text -> DataFrame -> DataFrame
reduceByAgg Aggregation
agg Text
alias
      where alias :: Text
alias = ([Char] -> Text
T.pack ([Char] -> Text) -> (Aggregation -> [Char]) -> Aggregation -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Aggregation -> [Char]
forall a. Show a => a -> [Char]
show) Aggregation
agg Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"_" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
name 
  in ((Text, Aggregation) -> DataFrame -> DataFrame)
-> [(Text, Aggregation)] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold (Text, Aggregation) -> DataFrame -> DataFrame
f [(Text, Aggregation)]
aggs DataFrame
df DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& [Text] -> DataFrame -> DataFrame
exclude (((Text, Aggregation) -> Text) -> [(Text, Aggregation)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Aggregation) -> Text
forall a b. (a, b) -> a
fst [(Text, Aggregation)]
aggs)


appendWithFrontMin :: (Ord a) => a -> [a] -> [a]
appendWithFrontMin :: forall a. Ord a => a -> [a] -> [a]
appendWithFrontMin a
x [] = [a
x]
appendWithFrontMin a
x xs :: [a]
xs@(a
f:[a]
rest)
  | a
x a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
f = a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
xs
  | Bool
otherwise = a
fa -> [a] -> [a]
forall a. a -> [a] -> [a]
:a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
rest
{-# INLINE appendWithFrontMin #-}

distinct :: DataFrame -> DataFrame
distinct :: DataFrame -> DataFrame
distinct DataFrame
df = [Text] -> DataFrame -> DataFrame
groupBy (DataFrame -> [Text]
columnNames DataFrame
df) DataFrame
df