{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module Data.DataFrame.Operations.Transformations where
import qualified Data.List as L
import qualified Data.Text as T
import qualified Data.Map as M
import qualified Data.Vector.Generic as VG
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import Control.Exception (throw)
import Data.DataFrame.Errors (DataFrameException(..))
import Data.DataFrame.Internal.Column (Column(..), columnTypeString, itransform, ifoldrColumn)
import Data.DataFrame.Internal.DataFrame (DataFrame(..), getColumn)
import Data.DataFrame.Internal.Function (Function(..), funcApply)
import Data.DataFrame.Internal.Row (mkRowFromArgs)
import Data.DataFrame.Internal.Types (Columnable, RowValue, toRowValue, transform)
import Data.DataFrame.Operations.Core
import Type.Reflection (typeRep, typeOf)
apply ::
forall b c.
(Columnable b, Columnable c) =>
(b -> c) ->
T.Text ->
DataFrame ->
DataFrame
apply :: forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply b -> c
f Text
columnName DataFrame
d = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
d of
Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"apply" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
d)
Just Column
column -> case (b -> c) -> Column -> Maybe Column
forall a b c.
(Transformable a, Columnable b, Columnable c) =>
(b -> c) -> a -> Maybe a
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Column -> Maybe Column
transform b -> c
f Column
column of
Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeRep b -> String -> Text -> Text -> DataFrameException
forall a.
Typeable a =>
TypeRep a -> String -> Text -> Text -> DataFrameException
TypeMismatchException' (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) (Column -> String
columnTypeString Column
column) Text
columnName Text
"apply"
Maybe Column
column' -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
columnName Maybe Column
column' DataFrame
d
deriveFrom :: ([T.Text], Function) -> T.Text -> DataFrame -> DataFrame
deriveFrom :: ([Text], Function) -> Text -> DataFrame -> DataFrame
deriveFrom ([Text]
args, Function
f) Text
name DataFrame
df = case Function
f of
(F4 (a -> b -> c -> d -> e
f' :: a -> b -> c -> d -> e)) -> let
xs :: Vector e
xs = (Vector RowValue -> e) -> Vector (Vector RowValue) -> Vector e
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (\Vector RowValue
row -> forall c. Columnable c => Vector RowValue -> Function -> c
funcApply @e Vector RowValue
row Function
f) (Vector (Vector RowValue) -> Vector e)
-> Vector (Vector RowValue) -> Vector e
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Vector RowValue) -> Vector (Vector RowValue)
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)) ([Text] -> DataFrame -> Int -> Vector RowValue
mkRowFromArgs [Text]
args DataFrame
df)
in Text -> Vector e -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn Text
name Vector e
xs DataFrame
df
(F3 (a -> b -> c -> d
f' :: a -> b -> c -> d)) -> let
xs :: Vector d
xs = (Vector RowValue -> d) -> Vector (Vector RowValue) -> Vector d
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (\Vector RowValue
row -> forall c. Columnable c => Vector RowValue -> Function -> c
funcApply @d Vector RowValue
row Function
f) (Vector (Vector RowValue) -> Vector d)
-> Vector (Vector RowValue) -> Vector d
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Vector RowValue) -> Vector (Vector RowValue)
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)) ([Text] -> DataFrame -> Int -> Vector RowValue
mkRowFromArgs [Text]
args DataFrame
df)
in Text -> Vector d -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn Text
name Vector d
xs DataFrame
df
(F2 (a -> b -> c
f' :: a -> b -> c)) -> let
xs :: Vector c
xs = (Vector RowValue -> c) -> Vector (Vector RowValue) -> Vector c
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (\Vector RowValue
row -> forall c. Columnable c => Vector RowValue -> Function -> c
funcApply @c Vector RowValue
row Function
f) (Vector (Vector RowValue) -> Vector c)
-> Vector (Vector RowValue) -> Vector c
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Vector RowValue) -> Vector (Vector RowValue)
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)) ([Text] -> DataFrame -> Int -> Vector RowValue
mkRowFromArgs [Text]
args DataFrame
df)
in Text -> Vector c -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn Text
name Vector c
xs DataFrame
df
(F1 (a -> b
f' :: a -> b)) -> let
xs :: Vector b
xs = (Vector RowValue -> b) -> Vector (Vector RowValue) -> Vector b
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (\Vector RowValue
row -> forall c. Columnable c => Vector RowValue -> Function -> c
funcApply @b Vector RowValue
row Function
f) (Vector (Vector RowValue) -> Vector b)
-> Vector (Vector RowValue) -> Vector b
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Vector RowValue) -> Vector (Vector RowValue)
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)) ([Text] -> DataFrame -> Int -> Vector RowValue
mkRowFromArgs [Text]
args DataFrame
df)
in Text -> Vector b -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn Text
name Vector b
xs DataFrame
df
derive ::
forall b c.
(Columnable b, Columnable c) =>
T.Text ->
(b -> c) ->
T.Text ->
DataFrame ->
DataFrame
derive :: forall b c.
(Columnable b, Columnable c) =>
Text -> (b -> c) -> Text -> DataFrame -> DataFrame
derive Text
alias b -> c
f Text
columnName DataFrame
d = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
d of
Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"derive" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
d)
Just Column
column -> case (b -> c) -> Column -> Maybe Column
forall a b c.
(Transformable a, Columnable b, Columnable c) =>
(b -> c) -> a -> Maybe a
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Column -> Maybe Column
transform b -> c
f Column
column of
Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeRep Column -> TypeRep b -> Text -> Text -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeRep a -> TypeRep b -> Text -> Text -> DataFrameException
TypeMismatchException (Column -> TypeRep Column
forall a. Typeable a => a -> TypeRep a
typeOf Column
column) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) Text
columnName Text
"derive"
Just Column
res -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
alias (Column -> Maybe Column
forall a. a -> Maybe a
Just Column
res) DataFrame
d
applyMany ::
(Columnable b, Columnable c) =>
(b -> c) ->
[T.Text] ->
DataFrame ->
DataFrame
applyMany :: forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> [Text] -> DataFrame -> DataFrame
applyMany b -> c
f [Text]
names DataFrame
df = (DataFrame -> Text -> DataFrame)
-> DataFrame -> [Text] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' ((Text -> DataFrame -> DataFrame) -> DataFrame -> Text -> DataFrame
forall a b c. (a -> b -> c) -> b -> a -> c
flip ((b -> c) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply b -> c
f)) DataFrame
df [Text]
names
applyInt ::
(Columnable b) =>
(Int -> b) ->
T.Text ->
DataFrame ->
DataFrame
applyInt :: forall b.
Columnable b =>
(Int -> b) -> Text -> DataFrame -> DataFrame
applyInt = (Int -> b) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply
applyDouble ::
(Columnable b) =>
(Double -> b) ->
T.Text ->
DataFrame ->
DataFrame
applyDouble :: forall b.
Columnable b =>
(Double -> b) -> Text -> DataFrame -> DataFrame
applyDouble = (Double -> b) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply
applyWhere ::
forall a b .
(Columnable a, Columnable b) =>
(a -> Bool) ->
T.Text ->
(b -> b) ->
T.Text ->
DataFrame ->
DataFrame
applyWhere :: forall a b.
(Columnable a, Columnable b) =>
(a -> Bool) -> Text -> (b -> b) -> Text -> DataFrame -> DataFrame
applyWhere a -> Bool
condition Text
filterColumnName b -> b
f Text
columnName DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
filterColumnName DataFrame
df of
Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
filterColumnName Text
"applyWhere" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just Column
column -> case (Int -> a -> Vector Int -> Vector Int)
-> Vector Int -> Column -> Maybe (Vector Int)
forall a b.
(Columnable a, Columnable b) =>
(Int -> a -> b -> b) -> b -> Column -> Maybe b
ifoldrColumn (\Int
i a
val Vector Int
acc -> if a -> Bool
condition a
val then Int -> Vector Int -> Vector Int
forall a. a -> Vector a -> Vector a
V.cons Int
i Vector Int
acc else Vector Int
acc) Vector Int
forall a. Vector a
V.empty Column
column of
Maybe (Vector Int)
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String -> Text -> Text -> DataFrameException
forall a.
Typeable a =>
TypeRep a -> String -> Text -> Text -> DataFrameException
TypeMismatchException' (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (Column -> String
columnTypeString Column
column) Text
filterColumnName Text
"applyWhere"
Just Vector Int
indexes -> if Vector Int -> Bool
forall a. Vector a -> Bool
V.null Vector Int
indexes
then DataFrame
df
else (DataFrame -> Int -> DataFrame)
-> DataFrame -> Vector Int -> DataFrame
forall b a. (b -> a -> b) -> b -> Vector a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
d Int
i -> Int -> (b -> b) -> Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Int -> (a -> a) -> Text -> DataFrame -> DataFrame
applyAtIndex Int
i b -> b
f Text
columnName DataFrame
d) DataFrame
df Vector Int
indexes
applyAtIndex ::
forall a.
(Columnable a) =>
Int ->
(a -> a) ->
T.Text ->
DataFrame ->
DataFrame
applyAtIndex :: forall a.
Columnable a =>
Int -> (a -> a) -> Text -> DataFrame -> DataFrame
applyAtIndex Int
i a -> a
f Text
columnName DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
df of
Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"applyAtIndex" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just Column
column -> case (Int -> a -> a) -> Column -> Maybe Column
forall b c.
(Columnable b, Columnable c) =>
(Int -> b -> c) -> Column -> Maybe Column
itransform (\Int
index a
value -> if Int
index Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
i then a -> a
f a
value else a
value) Column
column of
Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String -> Text -> Text -> DataFrameException
forall a.
Typeable a =>
TypeRep a -> String -> Text -> Text -> DataFrameException
TypeMismatchException' (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (Column -> String
columnTypeString Column
column) Text
columnName Text
"applyAtIndex"
Maybe Column
column' -> Text -> Maybe Column -> DataFrame -> DataFrame
insertColumn' Text
columnName Maybe Column
column' DataFrame
df