{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.Operations.Typing where
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import DataFrame.Internal.Column (Column(..))
import DataFrame.Internal.DataFrame (DataFrame(..))
import DataFrame.Internal.Parsing
import Data.Either
import Data.Maybe
import Data.Time
import Data.Type.Equality (type (:~:)(Refl), TestEquality(..))
import Type.Reflection (typeRep)
parseDefaults :: Bool -> DataFrame -> DataFrame
parseDefaults :: Bool -> DataFrame -> DataFrame
parseDefaults Bool
safeRead DataFrame
df = DataFrame
df {columns = V.map (parseDefault safeRead) (columns df)}
parseDefault :: Bool -> Maybe Column -> Maybe Column
parseDefault :: Bool -> Maybe Column -> Maybe Column
parseDefault Bool
_ Maybe Column
Nothing = Maybe Column
forall a. Maybe a
Nothing
parseDefault Bool
safeRead (Just (BoxedColumn (Vector a
c :: V.Vector a))) = let
parseTimeOpt :: Text -> Maybe Day
parseTimeOpt Text
s = Bool -> TimeLocale -> String -> String -> Maybe Day
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> String -> String -> m t
parseTimeM Bool
True TimeLocale
defaultTimeLocale String
"%Y-%m-%d" (Text -> String
T.unpack Text
s) :: Maybe Day
unsafeParseTime :: Text -> Day
unsafeParseTime Text
s = Bool -> TimeLocale -> String -> String -> Day
forall t.
ParseTime t =>
Bool -> TimeLocale -> String -> String -> t
parseTimeOrError Bool
True TimeLocale
defaultTimeLocale String
"%Y-%m-%d" (Text -> String
T.unpack Text
s) :: Day
in case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Maybe (a :~: Text)
Nothing -> case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep String -> Maybe (a :~: String)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
Just a :~: String
Refl -> let
emptyToNothing :: String -> Maybe String
emptyToNothing String
v = if Text -> Bool
isNullish (String -> Text
T.pack String
v) then Maybe String
forall a. Maybe a
Nothing else String -> Maybe String
forall a. a -> Maybe a
Just String
v
safeVector :: Vector (Maybe String)
safeVector = (String -> Maybe String) -> Vector String -> Vector (Maybe String)
forall a b. (a -> b) -> Vector a -> Vector b
V.map String -> Maybe String
emptyToNothing Vector a
Vector String
c
hasNulls :: Bool
hasNulls = (Bool -> Maybe String -> Bool)
-> Bool -> Vector (Maybe String) -> Bool
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl' (\Bool
acc Maybe String
v -> if Maybe String -> Bool
forall a. Maybe a -> Bool
isNothing Maybe String
v then Bool
acc Bool -> Bool -> Bool
|| Bool
True else Bool
acc) Bool
False Vector (Maybe String)
safeVector
in Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ if Bool
safeRead Bool -> Bool -> Bool
&& Bool
hasNulls then Vector (Maybe String) -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector (Maybe String)
safeVector else Vector a -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector a
c
Maybe (a :~: String)
Nothing -> Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ Vector a -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector a
c
Just a :~: Text
Refl ->
let example :: Text
example = Text -> Text
T.strip (Vector Text -> Text
forall a. Vector a -> a
V.head Vector a
Vector Text
c)
emptyToNothing :: Text -> Maybe Text
emptyToNothing Text
v = if Text -> Bool
isNullish Text
v then Maybe Text
forall a. Maybe a
Nothing else Text -> Maybe Text
forall a. a -> Maybe a
Just Text
v
in case HasCallStack => Text -> Maybe Int
Text -> Maybe Int
readInt Text
example of
Just Int
_ ->
let safeVector :: Vector (Maybe Int)
safeVector = (Text -> Maybe Int) -> Vector Text -> Vector (Maybe Int)
forall a b. (a -> b) -> Vector a -> Vector b
V.map ((Text -> Maybe Int) -> Maybe Text -> Maybe Int
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
(=<<) HasCallStack => Text -> Maybe Int
Text -> Maybe Int
readInt (Maybe Text -> Maybe Int)
-> (Text -> Maybe Text) -> Text -> Maybe Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Maybe Text
emptyToNothing) Vector a
Vector Text
c
hasNulls :: Bool
hasNulls = Maybe Int -> Vector (Maybe Int) -> Bool
forall a. Eq a => a -> Vector a -> Bool
V.elem Maybe Int
forall a. Maybe a
Nothing Vector (Maybe Int)
safeVector
in Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ if Bool
safeRead Bool -> Bool -> Bool
&& Bool
hasNulls then Vector (Maybe Int) -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector (Maybe Int)
safeVector else Vector Int -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn (Int -> (Int -> Int) -> Vector Int
forall a. Unbox a => Int -> (Int -> a) -> Vector a
VU.generate (Vector a -> Int
forall a. Vector a -> Int
V.length Vector a
c) (Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
0 (Maybe Int -> Int) -> (Int -> Maybe Int) -> Int -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Vector (Maybe Int)
safeVector Vector (Maybe Int) -> Int -> Maybe Int
forall a. Vector a -> Int -> a
V.!)))
Maybe Int
Nothing -> case HasCallStack => Text -> Maybe Double
Text -> Maybe Double
readDouble Text
example of
Just Double
_ ->
let safeVector :: Vector (Maybe Double)
safeVector = (Text -> Maybe Double) -> Vector Text -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map ((Text -> Maybe Double) -> Maybe Text -> Maybe Double
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
(=<<) HasCallStack => Text -> Maybe Double
Text -> Maybe Double
readDouble (Maybe Text -> Maybe Double)
-> (Text -> Maybe Text) -> Text -> Maybe Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Maybe Text
emptyToNothing) Vector a
Vector Text
c
hasNulls :: Bool
hasNulls = Maybe Double -> Vector (Maybe Double) -> Bool
forall a. Eq a => a -> Vector a -> Bool
V.elem Maybe Double
forall a. Maybe a
Nothing Vector (Maybe Double)
safeVector
in Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ if Bool
safeRead Bool -> Bool -> Bool
&& Bool
hasNulls then Vector (Maybe Double) -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector (Maybe Double)
safeVector else Vector Double -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn (Int -> (Int -> Double) -> Vector Double
forall a. Unbox a => Int -> (Int -> a) -> Vector a
VU.generate (Vector a -> Int
forall a. Vector a -> Int
V.length Vector a
c) (Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
0 (Maybe Double -> Double) -> (Int -> Maybe Double) -> Int -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Vector (Maybe Double)
safeVector Vector (Maybe Double) -> Int -> Maybe Double
forall a. Vector a -> Int -> a
V.!)))
Maybe Double
Nothing -> case Text -> Maybe Day
parseTimeOpt Text
example of
Just Day
d -> let
emptyToNothing' :: Text -> Either Text Text
emptyToNothing' Text
v = if Text -> Bool
isNullish Text
v then Text -> Either Text Text
forall a b. a -> Either a b
Left Text
v else Text -> Either Text Text
forall a b. b -> Either a b
Right Text
v
parseTimeEither :: Text -> Either Text Day
parseTimeEither Text
v = case Text -> Maybe Day
parseTimeOpt Text
v of
Just Day
v' -> Day -> Either Text Day
forall a b. b -> Either a b
Right Day
v'
Maybe Day
Nothing -> Text -> Either Text Day
forall a b. a -> Either a b
Left Text
v
safeVector :: Vector (Either Text Day)
safeVector = (Text -> Either Text Day)
-> Vector Text -> Vector (Either Text Day)
forall a b. (a -> b) -> Vector a -> Vector b
V.map ((Text -> Either Text Day) -> Either Text Text -> Either Text Day
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
(=<<) Text -> Either Text Day
parseTimeEither (Either Text Text -> Either Text Day)
-> (Text -> Either Text Text) -> Text -> Either Text Day
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Either Text Text
emptyToNothing') Vector a
Vector Text
c
toMaybe :: Either a a -> Maybe a
toMaybe (Left a
_) = Maybe a
forall a. Maybe a
Nothing
toMaybe (Right a
value) = a -> Maybe a
forall a. a -> Maybe a
Just a
value
lefts :: Vector (Either Text Day)
lefts = (Either Text Day -> Bool)
-> Vector (Either Text Day) -> Vector (Either Text Day)
forall a. (a -> Bool) -> Vector a -> Vector a
V.filter Either Text Day -> Bool
forall a b. Either a b -> Bool
isLeft Vector (Either Text Day)
safeVector
onlyNulls :: Bool
onlyNulls = (Bool -> Bool
not (Vector (Either Text Day) -> Bool
forall a. Vector a -> Bool
V.null Vector (Either Text Day)
lefts) Bool -> Bool -> Bool
&& (Either Text Day -> Bool) -> Vector (Either Text Day) -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.all (Text -> Bool
isNullish (Text -> Bool)
-> (Either Text Day -> Text) -> Either Text Day -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Either Text Day -> Text
forall a b. a -> Either a b -> a
fromLeft Text
"non-null") Vector (Either Text Day)
lefts)
in Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ if Bool
safeRead
then if Bool
onlyNulls
then Vector (Maybe Day) -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn ((Either Text Day -> Maybe Day)
-> Vector (Either Text Day) -> Vector (Maybe Day)
forall a b. (a -> b) -> Vector a -> Vector b
V.map Either Text Day -> Maybe Day
forall {a} {a}. Either a a -> Maybe a
toMaybe Vector (Either Text Day)
safeVector)
else if (Either Text Day -> Bool) -> Vector (Either Text Day) -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.any Either Text Day -> Bool
forall a b. Either a b -> Bool
isLeft Vector (Either Text Day)
safeVector
then Vector (Either Text Day) -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector (Either Text Day)
safeVector
else Vector Day -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn ((Text -> Day) -> Vector Text -> Vector Day
forall a b. (a -> b) -> Vector a -> Vector b
V.map Text -> Day
unsafeParseTime Vector a
Vector Text
c)
else Vector Day -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn ((Text -> Day) -> Vector Text -> Vector Day
forall a b. (a -> b) -> Vector a -> Vector b
V.map Text -> Day
unsafeParseTime Vector a
Vector Text
c)
Maybe Day
Nothing -> let
safeVector :: Vector (Maybe Text)
safeVector = (Text -> Maybe Text) -> Vector Text -> Vector (Maybe Text)
forall a b. (a -> b) -> Vector a -> Vector b
V.map Text -> Maybe Text
emptyToNothing Vector a
Vector Text
c
hasNulls :: Bool
hasNulls = (Text -> Bool) -> Vector Text -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.any Text -> Bool
isNullish Vector a
Vector Text
c
in Column -> Maybe Column
forall a. a -> Maybe a
Just (Column -> Maybe Column) -> Column -> Maybe Column
forall a b. (a -> b) -> a -> b
$ if Bool
safeRead Bool -> Bool -> Bool
&& Bool
hasNulls then Vector (Maybe Text) -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector (Maybe Text)
safeVector else Vector a -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector a
c
parseDefault Bool
safeRead Maybe Column
column = Maybe Column
column