{-# LANGUAGE OverloadedStrings #-}
module DataFrame.Internal.Row where

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Algorithms.Merge as VA

import Control.Exception (throw)
import Control.Monad.ST (runST)
import DataFrame.Errors (DataFrameException(..))
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame
import DataFrame.Internal.Types
import Data.Function (on)

type Row = V.Vector RowValue

toRowList :: [T.Text] -> DataFrame -> [Row]
toRowList :: [Text] -> DataFrame -> [Row]
toRowList [Text]
names DataFrame
df = let
    nameSet :: Set Text
nameSet = [Text] -> Set Text
forall a. Ord a => [a] -> Set a
S.fromList [Text]
names
  in (Int -> Row) -> [Int] -> [Row]
forall a b. (a -> b) -> [a] -> [b]
map (DataFrame -> Set Text -> Int -> Row
mkRowRep DataFrame
df Set Text
nameSet) [Int
0..((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)]

toRowVector :: [T.Text] -> DataFrame -> V.Vector Row
toRowVector :: [Text] -> DataFrame -> Vector Row
toRowVector [Text]
names DataFrame
df = let
    nameSet :: Set Text
nameSet = [Text] -> Set Text
forall a. Ord a => [a] -> Set a
S.fromList [Text]
names
  in Int -> (Int -> Row) -> Vector Row
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df)) (DataFrame -> Set Text -> Int -> Row
mkRowRep DataFrame
df Set Text
nameSet)

mkRowFromArgs :: [T.Text] -> DataFrame -> Int -> Row
mkRowFromArgs :: [Text] -> DataFrame -> Int -> Row
mkRowFromArgs [Text]
names DataFrame
df Int
i = (Text -> RowValue) -> Vector Text -> Row
forall a b. (a -> b) -> Vector a -> Vector b
V.map Text -> RowValue
get ([Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text]
names)
  where
    get :: Text -> RowValue
get Text
name = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
      Maybe Column
Nothing -> DataFrameException -> RowValue
forall a e. Exception e => e -> a
throw (DataFrameException -> RowValue) -> DataFrameException -> RowValue
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"[INTERNAL] mkRowFromArgs" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
      Just (BoxedColumn Vector a
column) -> a -> RowValue
forall a. Columnable a => a -> RowValue
toRowValue (Vector a
column Vector a -> Int -> a
forall a. Vector a -> Int -> a
V.! Int
i)
      Just (UnboxedColumn Vector a
column) -> a -> RowValue
forall a. Columnable a => a -> RowValue
toRowValue (Vector a
column Vector a -> Int -> a
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i)
      Just (OptionalColumn Vector (Maybe a)
column) -> Maybe a -> RowValue
forall a. Columnable a => a -> RowValue
toRowValue (Vector (Maybe a)
column Vector (Maybe a) -> Int -> Maybe a
forall a. Vector a -> Int -> a
V.! Int
i)

mkRowRep :: DataFrame -> S.Set T.Text -> Int -> Row
mkRowRep :: DataFrame -> Set Text -> Int -> Row
mkRowRep DataFrame
df Set Text
names Int
i = Int -> (Int -> RowValue) -> Row
forall a. Int -> (Int -> a) -> Vector a
V.generate (Set Text -> Int
forall a. Set a -> Int
S.size Set Text
names) (\Int
index -> Text -> RowValue
get (Vector Text
names' Vector Text -> Int -> Text
forall a. Vector a -> Int -> a
V.! Int
index))
  where
    inOrderIndexes :: [Text]
inOrderIndexes = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df)
    names' :: Vector Text
names' = [Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text
n | Text
n <- [Text]
inOrderIndexes, Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
S.member Text
n Set Text
names]
    throwError :: Text -> a
throwError Text
name = [Char] -> a
forall a. HasCallStack => [Char] -> a
error ([Char] -> a) -> [Char] -> a
forall a b. (a -> b) -> a -> b
$ [Char]
"Column "
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
name
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" has less items than "
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"the other columns at index "
                [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i
    get :: Text -> RowValue
get Text
name = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
      Just (BoxedColumn Vector a
c) -> case Vector a
c Vector a -> Int -> Maybe a
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
        Just a
e -> a -> RowValue
forall a. Columnable a => a -> RowValue
toRowValue a
e
        Maybe a
Nothing -> Text -> RowValue
forall {a}. Text -> a
throwError Text
name
      Just (OptionalColumn Vector (Maybe a)
c) -> case Vector (Maybe a)
c Vector (Maybe a) -> Int -> Maybe (Maybe a)
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
        Just Maybe a
e -> Maybe a -> RowValue
forall a. Columnable a => a -> RowValue
toRowValue Maybe a
e
        Maybe (Maybe a)
Nothing -> Text -> RowValue
forall {a}. Text -> a
throwError Text
name
      Just (UnboxedColumn Vector a
c) -> case Vector a
c Vector a -> Int -> Maybe a
forall a. Unbox a => Vector a -> Int -> Maybe a
VU.!? Int
i of
        Just a
e -> a -> RowValue
forall a. Columnable a => a -> RowValue
toRowValue a
e
        Maybe a
Nothing -> Text -> RowValue
forall {a}. Text -> a
throwError Text
name
      Just (GroupedBoxedColumn Vector (Vector a)
c) -> case Vector (Vector a)
c Vector (Vector a) -> Int -> Maybe (Vector a)
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
        Just Vector a
e -> Vector a -> RowValue
forall a. Columnable a => a -> RowValue
toRowValue Vector a
e
        Maybe (Vector a)
Nothing -> Text -> RowValue
forall {a}. Text -> a
throwError Text
name
      Just (GroupedUnboxedColumn Vector (Vector a)
c) -> case Vector (Vector a)
c Vector (Vector a) -> Int -> Maybe (Vector a)
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
        Just Vector a
e -> Vector a -> RowValue
forall a. Columnable a => a -> RowValue
toRowValue Vector a
e
        Maybe (Vector a)
Nothing -> Text -> RowValue
forall {a}. Text -> a
throwError Text
name

sortedIndexes' :: Bool -> V.Vector Row -> VU.Vector Int
sortedIndexes' :: Bool -> Vector Row -> Vector Int
sortedIndexes' Bool
asc Vector Row
rows = (forall s. ST s (Vector Int)) -> Vector Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector Int)) -> Vector Int)
-> (forall s. ST s (Vector Int)) -> Vector Int
forall a b. (a -> b) -> a -> b
$ do
  MVector s (Int, Row)
withIndexes <- Vector (Int, Row)
-> ST s (Mutable Vector (PrimState (ST s)) (Int, Row))
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
v a -> m (Mutable v (PrimState m) a)
VG.thaw (Vector Row -> Vector (Int, Row)
forall a. Vector a -> Vector (Int, a)
V.indexed Vector Row
rows)
  Comparison (Int, Row)
-> MVector (PrimState (ST s)) (Int, Row) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VA.sortBy ((if Bool
asc then Row -> Row -> Ordering
forall a. Ord a => a -> a -> Ordering
compare else (Row -> Row -> Ordering) -> Row -> Row -> Ordering
forall a b c. (a -> b -> c) -> b -> a -> c
flip Row -> Row -> Ordering
forall a. Ord a => a -> a -> Ordering
compare) (Row -> Row -> Ordering)
-> ((Int, Row) -> Row) -> Comparison (Int, Row)
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Int, Row) -> Row
forall a b. (a, b) -> b
snd) MVector s (Int, Row)
MVector (PrimState (ST s)) (Int, Row)
withIndexes
  Vector (Int, Row)
sorted <- Mutable Vector (PrimState (ST s)) (Int, Row)
-> ST s (Vector (Int, Row))
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
VG.unsafeFreeze MVector s (Int, Row)
Mutable Vector (PrimState (ST s)) (Int, Row)
withIndexes
  Vector Int -> ST s (Vector Int)
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (Vector Int -> ST s (Vector Int))
-> Vector Int -> ST s (Vector Int)
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Int) -> Vector Int
forall a. Unbox a => Int -> (Int -> a) -> Vector a
VU.generate (Vector Row -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length Vector Row
rows) (\Int
i -> (Int, Row) -> Int
forall a b. (a, b) -> a
fst (Vector (Int, Row)
sorted Vector (Int, Row) -> Int -> (Int, Row)
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
i))