{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module Data.DataFrame.Operations.Transformations where

import qualified Data.List as L
import qualified Data.Text as T
import qualified Data.Map as M
import qualified Data.Vector.Generic as VG
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU

import Control.Exception (throw)
import Data.DataFrame.Errors (DataFrameException(..))
import Data.DataFrame.Internal.Column (Column(..), columnTypeString, itransform, ifoldrColumn)
import Data.DataFrame.Internal.DataFrame (DataFrame(..), getColumn)
import Data.DataFrame.Internal.Function (Function(..), funcApply)
import Data.DataFrame.Internal.Row (mkRowFromArgs)
import Data.DataFrame.Internal.Types (Columnable, RowValue, toRowValue, transform)
import Data.DataFrame.Operations.Core
import Type.Reflection (typeRep, typeOf)

-- | O(k) Apply a function to a given column in a dataframe.
apply ::
  forall b c.
  (Columnable b, Columnable c) =>
  -- | function to apply
  (b -> c) ->
  -- | Column name
  T.Text ->
  -- | DataFrame to apply operation to
  DataFrame ->
  DataFrame
apply :: forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply b -> c
f Text
columnName DataFrame
d = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
d of
  Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"apply" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
d)
  Just Column
column -> case (b -> c) -> Column -> Maybe Column
forall a b c.
(Transformable a, Columnable b, Columnable c) =>
(b -> c) -> a -> Maybe a
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Column -> Maybe Column
transform b -> c
f Column
column of
    Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeRep b -> String -> Text -> Text -> DataFrameException
forall a.
Typeable a =>
TypeRep a -> String -> Text -> Text -> DataFrameException
TypeMismatchException' (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) (Column -> String
columnTypeString Column
column) Text
columnName Text
"apply"
    Maybe Column
column' -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
columnName Maybe Column
column' DataFrame
d

-- | O(k) Apply a function to a combination of columns in a dataframe and
-- add the result into `alias` column.
deriveFrom :: ([T.Text], Function) -> T.Text -> DataFrame -> DataFrame
deriveFrom :: ([Text], Function) -> Text -> DataFrame -> DataFrame
deriveFrom ([Text]
args, Function
f) Text
name DataFrame
df = case Function
f of
  (F4 (a -> b -> c -> d -> e
f' :: a -> b -> c -> d -> e)) -> let
      xs :: Vector e
xs = (Vector RowValue -> e) -> Vector (Vector RowValue) -> Vector e
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (\Vector RowValue
row -> forall c. Columnable c => Vector RowValue -> Function -> c
funcApply @e Vector RowValue
row Function
f) (Vector (Vector RowValue) -> Vector e)
-> Vector (Vector RowValue) -> Vector e
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Vector RowValue) -> Vector (Vector RowValue)
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)) ([Text] -> DataFrame -> Int -> Vector RowValue
mkRowFromArgs [Text]
args DataFrame
df)
    in Text -> Vector e -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn Text
name Vector e
xs DataFrame
df
  (F3 (a -> b -> c -> d
f' :: a -> b -> c -> d)) -> let
      xs :: Vector d
xs = (Vector RowValue -> d) -> Vector (Vector RowValue) -> Vector d
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (\Vector RowValue
row -> forall c. Columnable c => Vector RowValue -> Function -> c
funcApply @d Vector RowValue
row Function
f) (Vector (Vector RowValue) -> Vector d)
-> Vector (Vector RowValue) -> Vector d
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Vector RowValue) -> Vector (Vector RowValue)
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)) ([Text] -> DataFrame -> Int -> Vector RowValue
mkRowFromArgs [Text]
args DataFrame
df)
    in Text -> Vector d -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn Text
name Vector d
xs DataFrame
df
  (F2 (a -> b -> c
f' :: a -> b -> c)) -> let
      xs :: Vector c
xs = (Vector RowValue -> c) -> Vector (Vector RowValue) -> Vector c
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (\Vector RowValue
row -> forall c. Columnable c => Vector RowValue -> Function -> c
funcApply @c Vector RowValue
row Function
f) (Vector (Vector RowValue) -> Vector c)
-> Vector (Vector RowValue) -> Vector c
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Vector RowValue) -> Vector (Vector RowValue)
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)) ([Text] -> DataFrame -> Int -> Vector RowValue
mkRowFromArgs [Text]
args DataFrame
df)
    in Text -> Vector c -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn Text
name Vector c
xs DataFrame
df
  (F1 (a -> b
f' :: a -> b)) -> let
      xs :: Vector b
xs = (Vector RowValue -> b) -> Vector (Vector RowValue) -> Vector b
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (\Vector RowValue
row -> forall c. Columnable c => Vector RowValue -> Function -> c
funcApply @b Vector RowValue
row Function
f) (Vector (Vector RowValue) -> Vector b)
-> Vector (Vector RowValue) -> Vector b
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Vector RowValue) -> Vector (Vector RowValue)
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)) ([Text] -> DataFrame -> Int -> Vector RowValue
mkRowFromArgs [Text]
args DataFrame
df)
    in Text -> Vector b -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn Text
name Vector b
xs DataFrame
df

-- | O(k) Apply a function to a given column in a dataframe and
-- add the result into alias column.

derive ::
  forall b c.
  (Columnable b, Columnable c) =>
  -- | New name
  T.Text ->
  -- | function to apply
  (b -> c) ->
  -- | Derivative column name
  T.Text ->
  -- | DataFrame to apply operation to
  DataFrame ->
  DataFrame
derive :: forall b c.
(Columnable b, Columnable c) =>
Text -> (b -> c) -> Text -> DataFrame -> DataFrame
derive Text
alias b -> c
f Text
columnName DataFrame
d = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
d of
  Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"derive" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
d)
  Just Column
column -> case (b -> c) -> Column -> Maybe Column
forall a b c.
(Transformable a, Columnable b, Columnable c) =>
(b -> c) -> a -> Maybe a
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Column -> Maybe Column
transform b -> c
f Column
column of
    Maybe Column
Nothing  -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeRep Column -> TypeRep b -> Text -> Text -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeRep a -> TypeRep b -> Text -> Text -> DataFrameException
TypeMismatchException (Column -> TypeRep Column
forall a. Typeable a => a -> TypeRep a
typeOf Column
column) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) Text
columnName Text
"derive"
    Just Column
res -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
alias (Column -> Maybe Column
forall a. a -> Maybe a
Just Column
res) DataFrame
d

-- | O(k * n) Apply a function to given column names in a dataframe.
applyMany ::
  (Columnable b, Columnable c) =>
  (b -> c) ->
  [T.Text] ->
  DataFrame ->
  DataFrame
applyMany :: forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> [Text] -> DataFrame -> DataFrame
applyMany b -> c
f [Text]
names DataFrame
df = (DataFrame -> Text -> DataFrame)
-> DataFrame -> [Text] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' ((Text -> DataFrame -> DataFrame) -> DataFrame -> Text -> DataFrame
forall a b c. (a -> b -> c) -> b -> a -> c
flip ((b -> c) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply b -> c
f)) DataFrame
df [Text]
names

-- | O(k) Convenience function that applies to an int column.
applyInt ::
  (Columnable b) =>
  -- | Column name
  -- | function to apply
  (Int -> b) ->
  T.Text ->
  -- | DataFrame to apply operation to
  DataFrame ->
  DataFrame
applyInt :: forall b.
Columnable b =>
(Int -> b) -> Text -> DataFrame -> DataFrame
applyInt = (Int -> b) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply

-- | O(k) Convenience function that applies to an double column.
applyDouble ::
  (Columnable b) =>
  -- | Column name
  -- | function to apply
  (Double -> b) ->
  T.Text ->
  -- | DataFrame to apply operation to
  DataFrame ->
  DataFrame
applyDouble :: forall b.
Columnable b =>
(Double -> b) -> Text -> DataFrame -> DataFrame
applyDouble = (Double -> b) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply

-- | O(k * n) Apply a function to a column only if there is another column
-- value that matches the given criterion.
--
-- > applyWhere "Age" (<20) "Generation" (const "Gen-Z")
applyWhere ::
  forall a b .
  (Columnable a, Columnable b) =>
  (a -> Bool) -> -- Filter condition
  T.Text -> -- Criterion Column
  (b -> b) -> -- function to apply
  T.Text -> -- Column name
  DataFrame -> -- DataFrame to apply operation to
  DataFrame
applyWhere :: forall a b.
(Columnable a, Columnable b) =>
(a -> Bool) -> Text -> (b -> b) -> Text -> DataFrame -> DataFrame
applyWhere a -> Bool
condition Text
filterColumnName b -> b
f Text
columnName DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
filterColumnName DataFrame
df of
  Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
filterColumnName Text
"applyWhere" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
  Just Column
column -> case (Int -> a -> Vector Int -> Vector Int)
-> Vector Int -> Column -> Maybe (Vector Int)
forall a b.
(Columnable a, Columnable b) =>
(Int -> a -> b -> b) -> b -> Column -> Maybe b
ifoldrColumn (\Int
i a
val Vector Int
acc -> if a -> Bool
condition a
val then Int -> Vector Int -> Vector Int
forall a. a -> Vector a -> Vector a
V.cons Int
i Vector Int
acc else Vector Int
acc) Vector Int
forall a. Vector a
V.empty Column
column of
      Maybe (Vector Int)
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String -> Text -> Text -> DataFrameException
forall a.
Typeable a =>
TypeRep a -> String -> Text -> Text -> DataFrameException
TypeMismatchException' (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (Column -> String
columnTypeString Column
column) Text
filterColumnName Text
"applyWhere"
      Just Vector Int
indexes -> if Vector Int -> Bool
forall a. Vector a -> Bool
V.null Vector Int
indexes
                      then DataFrame
df
                      else (DataFrame -> Int -> DataFrame)
-> DataFrame -> Vector Int -> DataFrame
forall b a. (b -> a -> b) -> b -> Vector a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
d Int
i -> Int -> (b -> b) -> Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Int -> (a -> a) -> Text -> DataFrame -> DataFrame
applyAtIndex Int
i b -> b
f Text
columnName DataFrame
d) DataFrame
df Vector Int
indexes

-- | O(k) Apply a function to the column at a given index.
applyAtIndex ::
  forall a.
  (Columnable a) =>
  -- | Index
  Int ->
  -- | function to apply
  (a -> a) ->
  -- | Column name
  T.Text ->
  -- | DataFrame to apply operation to
  DataFrame ->
  DataFrame
applyAtIndex :: forall a.
Columnable a =>
Int -> (a -> a) -> Text -> DataFrame -> DataFrame
applyAtIndex Int
i a -> a
f Text
columnName DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
df of
  Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"applyAtIndex" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
  Just Column
column -> case (Int -> a -> a) -> Column -> Maybe Column
forall b c.
(Columnable b, Columnable c) =>
(Int -> b -> c) -> Column -> Maybe Column
itransform (\Int
index a
value -> if Int
index Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
i then a -> a
f a
value else a
value) Column
column of
    Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String -> Text -> Text -> DataFrameException
forall a.
Typeable a =>
TypeRep a -> String -> Text -> Text -> DataFrameException
TypeMismatchException' (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (Column -> String
columnTypeString Column
column) Text
columnName Text
"applyAtIndex"
    Maybe Column
column' -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
columnName Maybe Column
column' DataFrame
df