{-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE StrictData #-} module DataFrame.Operations.Statistics where import qualified Data.List as L import qualified Data.Text as T import qualified Data.Vector.Generic as VG import qualified Data.Vector as V import qualified Data.Vector.Unboxed as VU import qualified Statistics.Quantile as SS import qualified Statistics.Sample as SS import Prelude as P import Control.Exception (throw) import DataFrame.Errors (DataFrameException(..)) import DataFrame.Internal.Column import DataFrame.Internal.DataFrame (DataFrame(..), getColumn, empty) import DataFrame.Internal.Types (Columnable, transform) import DataFrame.Operations.Core import Data.Foldable (asum) import Data.Maybe (isJust, fromMaybe) import Data.Function ((&)) import Data.Type.Equality (type (:~:)(Refl), TestEquality (testEquality)) import Type.Reflection (typeRep) frequencies :: T.Text -> DataFrame -> DataFrame frequencies :: Text -> DataFrame -> DataFrame frequencies Text name DataFrame df = case Text -> DataFrame -> Maybe Column getColumn Text name DataFrame df of Just ((BoxedColumn (Vector a column :: V.Vector a))) -> let counts :: [(a, Int)] counts = forall a. Columnable a => Text -> DataFrame -> [(a, Int)] valueCounts @a Text name DataFrame df total :: Int total = [Int] -> Int forall a. Num a => [a] -> a forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a P.sum ([Int] -> Int) -> [Int] -> Int forall a b. (a -> b) -> a -> b $ ((a, Int) -> Int) -> [(a, Int)] -> [Int] forall a b. (a -> b) -> [a] -> [b] map (a, Int) -> Int forall a b. (a, b) -> b snd [(a, Int)] counts vText :: forall a . (Columnable a) => a -> T.Text vText :: forall a. Columnable a => a -> Text vText a c' = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @T.Text) of Just a :~: Text Refl -> a Text c' Maybe (a :~: Text) Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @String) of Just a :~: String Refl -> String -> Text T.pack a String c' Maybe (a :~: String) Nothing -> (String -> Text T.pack (String -> Text) -> (a -> String) -> a -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . a -> String forall a. Show a => a -> String show) a c' initDf :: DataFrame initDf = DataFrame empty DataFrame -> (DataFrame -> DataFrame) -> DataFrame forall a b. a -> (a -> b) -> b & Text -> Vector Text -> DataFrame -> DataFrame forall a. Columnable a => Text -> Vector a -> DataFrame -> DataFrame insertColumn Text "Statistic" ([Text] -> Vector Text forall a. [a] -> Vector a V.fromList [Text "Count" :: T.Text, Text "Percentage (%)"]) in (DataFrame -> (a, Int) -> DataFrame) -> DataFrame -> [(a, Int)] -> DataFrame forall b a. (b -> a -> b) -> b -> [a] -> b forall (t :: * -> *) b a. Foldable t => (b -> a -> b) -> b -> t a -> b L.foldl' (\DataFrame df (a col, Int k) -> Text -> Vector Int -> DataFrame -> DataFrame forall a. Columnable a => Text -> Vector a -> DataFrame -> DataFrame insertColumn (a -> Text forall a. Columnable a => a -> Text vText a col) ([Int] -> Vector Int forall a. [a] -> Vector a V.fromList [Int k, Int k Int -> Int -> Int forall a. Num a => a -> a -> a * Int 100 Int -> Int -> Int forall a. Integral a => a -> a -> a `div` Int total]) DataFrame df) DataFrame initDf [(a, Int)] counts Just ((OptionalColumn (Vector (Maybe a) column :: V.Vector a))) -> let counts :: [(Maybe a, Int)] counts = forall a. Columnable a => Text -> DataFrame -> [(a, Int)] valueCounts @a Text name DataFrame df total :: Int total = [Int] -> Int forall a. Num a => [a] -> a forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a P.sum ([Int] -> Int) -> [Int] -> Int forall a b. (a -> b) -> a -> b $ ((Maybe a, Int) -> Int) -> [(Maybe a, Int)] -> [Int] forall a b. (a -> b) -> [a] -> [b] map (Maybe a, Int) -> Int forall a b. (a, b) -> b snd [(Maybe a, Int)] counts vText :: forall a . (Columnable a) => a -> T.Text vText :: forall a. Columnable a => a -> Text vText a c' = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @T.Text) of Just a :~: Text Refl -> a Text c' Maybe (a :~: Text) Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @String) of Just a :~: String Refl -> String -> Text T.pack a String c' Maybe (a :~: String) Nothing -> (String -> Text T.pack (String -> Text) -> (a -> String) -> a -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . a -> String forall a. Show a => a -> String show) a c' initDf :: DataFrame initDf = DataFrame empty DataFrame -> (DataFrame -> DataFrame) -> DataFrame forall a b. a -> (a -> b) -> b & Text -> Vector Text -> DataFrame -> DataFrame forall a. Columnable a => Text -> Vector a -> DataFrame -> DataFrame insertColumn Text "Statistic" ([Text] -> Vector Text forall a. [a] -> Vector a V.fromList [Text "Count" :: T.Text, Text "Percentage (%)"]) in (DataFrame -> (Maybe a, Int) -> DataFrame) -> DataFrame -> [(Maybe a, Int)] -> DataFrame forall b a. (b -> a -> b) -> b -> [a] -> b forall (t :: * -> *) b a. Foldable t => (b -> a -> b) -> b -> t a -> b L.foldl' (\DataFrame df (Maybe a col, Int k) -> Text -> Vector Int -> DataFrame -> DataFrame forall a. Columnable a => Text -> Vector a -> DataFrame -> DataFrame insertColumn (Maybe a -> Text forall a. Columnable a => a -> Text vText Maybe a col) ([Int] -> Vector Int forall a. [a] -> Vector a V.fromList [Int k, Int k Int -> Int -> Int forall a. Num a => a -> a -> a * Int 100 Int -> Int -> Int forall a. Integral a => a -> a -> a `div` Int total]) DataFrame df) DataFrame initDf [(Maybe a, Int)] counts Just ((UnboxedColumn (Vector a column :: VU.Vector a))) -> let counts :: [(a, Int)] counts = forall a. Columnable a => Text -> DataFrame -> [(a, Int)] valueCounts @a Text name DataFrame df total :: Int total = [Int] -> Int forall a. Num a => [a] -> a forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a P.sum ([Int] -> Int) -> [Int] -> Int forall a b. (a -> b) -> a -> b $ ((a, Int) -> Int) -> [(a, Int)] -> [Int] forall a b. (a -> b) -> [a] -> [b] map (a, Int) -> Int forall a b. (a, b) -> b snd [(a, Int)] counts vText :: forall a . (Columnable a) => a -> T.Text vText :: forall a. Columnable a => a -> Text vText a c' = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @T.Text) of Just a :~: Text Refl -> a Text c' Maybe (a :~: Text) Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @String) of Just a :~: String Refl -> String -> Text T.pack a String c' Maybe (a :~: String) Nothing -> (String -> Text T.pack (String -> Text) -> (a -> String) -> a -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . a -> String forall a. Show a => a -> String show) a c' initDf :: DataFrame initDf = DataFrame empty DataFrame -> (DataFrame -> DataFrame) -> DataFrame forall a b. a -> (a -> b) -> b & Text -> Vector Text -> DataFrame -> DataFrame forall a. Columnable a => Text -> Vector a -> DataFrame -> DataFrame insertColumn Text "Statistic" ([Text] -> Vector Text forall a. [a] -> Vector a V.fromList [Text "Count" :: T.Text, Text "Percentage (%)"]) in (DataFrame -> (a, Int) -> DataFrame) -> DataFrame -> [(a, Int)] -> DataFrame forall b a. (b -> a -> b) -> b -> [a] -> b forall (t :: * -> *) b a. Foldable t => (b -> a -> b) -> b -> t a -> b L.foldl' (\DataFrame df (a col, Int k) -> Text -> Vector Int -> DataFrame -> DataFrame forall a. Columnable a => Text -> Vector a -> DataFrame -> DataFrame insertColumn (a -> Text forall a. Columnable a => a -> Text vText a col) ([Int] -> Vector Int forall a. [a] -> Vector a V.fromList [Int k, Int k Int -> Int -> Int forall a. Num a => a -> a -> a * Int 100 Int -> Int -> Int forall a. Integral a => a -> a -> a `div` Int total]) DataFrame df) DataFrame initDf [(a, Int)] counts mean :: T.Text -> DataFrame -> Maybe Double mean :: Text -> DataFrame -> Maybe Double mean = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double applyStatistic Vector Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double SS.mean median :: T.Text -> DataFrame -> Maybe Double median :: Text -> DataFrame -> Maybe Double median = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double applyStatistic (ContParam -> Vector Double -> Double forall (v :: * -> *). Vector v Double => ContParam -> v Double -> Double SS.median ContParam SS.medianUnbiased) standardDeviation :: T.Text -> DataFrame -> Maybe Double standardDeviation :: Text -> DataFrame -> Maybe Double standardDeviation = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double applyStatistic Vector Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double SS.fastStdDev skewness :: T.Text -> DataFrame -> Maybe Double skewness :: Text -> DataFrame -> Maybe Double skewness = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double applyStatistic Vector Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double SS.skewness variance :: T.Text -> DataFrame -> Maybe Double variance :: Text -> DataFrame -> Maybe Double variance = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double applyStatistic Vector Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double SS.variance interQuartileRange :: T.Text -> DataFrame -> Maybe Double interQuartileRange :: Text -> DataFrame -> Maybe Double interQuartileRange = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double applyStatistic (ContParam -> Int -> Vector Double -> Double forall (v :: * -> *). Vector v Double => ContParam -> Int -> v Double -> Double SS.midspread ContParam SS.medianUnbiased Int 4) correlation :: T.Text -> T.Text -> DataFrame -> Maybe Double correlation :: Text -> Text -> DataFrame -> Maybe Double correlation Text first Text second DataFrame df = do Vector Double f <- Text -> DataFrame -> Maybe (Vector Double) _getColumnAsDouble Text first DataFrame df Vector Double s <- Text -> DataFrame -> Maybe (Vector Double) _getColumnAsDouble Text second DataFrame df Double -> Maybe Double forall a. a -> Maybe a forall (m :: * -> *) a. Monad m => a -> m a return (Double -> Maybe Double) -> Double -> Maybe Double forall a b. (a -> b) -> a -> b $ Vector (Double, Double) -> Double forall (v :: * -> *). Vector v (Double, Double) => v (Double, Double) -> Double SS.correlation (Vector Double -> Vector Double -> Vector (Double, Double) forall (v :: * -> *) a b. (Vector v a, Vector v b, Vector v (a, b)) => v a -> v b -> v (a, b) VG.zip Vector Double f Vector Double s) _getColumnAsDouble :: T.Text -> DataFrame -> Maybe (VU.Vector Double) _getColumnAsDouble :: Text -> DataFrame -> Maybe (Vector Double) _getColumnAsDouble Text name DataFrame df = case Text -> DataFrame -> Maybe Column getColumn Text name DataFrame df of Just (UnboxedColumn (Vector a f :: VU.Vector a)) -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @Double) of Just a :~: Double Refl -> Vector Double -> Maybe (Vector Double) forall a. a -> Maybe a Just Vector a Vector Double f Maybe (a :~: Double) Nothing -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a) (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @Int) of Just a :~: Int Refl -> Vector Double -> Maybe (Vector Double) forall a. a -> Maybe a Just (Vector Double -> Maybe (Vector Double)) -> Vector Double -> Maybe (Vector Double) forall a b. (a -> b) -> a -> b $ (a -> Double) -> Vector a -> Vector Double forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b VU.map a -> Double forall a b. (Integral a, Num b) => a -> b fromIntegral Vector a f Maybe (a :~: Int) Nothing -> Maybe (Vector Double) forall a. Maybe a Nothing Maybe Column _ -> Maybe (Vector Double) forall a. Maybe a Nothing sum :: T.Text -> DataFrame -> Maybe Double sum :: Text -> DataFrame -> Maybe Double sum Text name DataFrame df = case Text -> DataFrame -> Maybe Column getColumn Text name DataFrame df of Just ((UnboxedColumn (Vector a column :: VU.Vector a'))) -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a') (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @Int) of Just a :~: Int Refl -> Double -> Maybe Double forall a. a -> Maybe a Just (Double -> Maybe Double) -> Double -> Maybe Double forall a b. (a -> b) -> a -> b $ Vector Double -> Double forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a VG.sum ((a -> Double) -> Vector a -> Vector Double forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b VU.map a -> Double forall a b. (Integral a, Num b) => a -> b fromIntegral Vector a column) Maybe (a :~: Int) Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a') (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @Double) of Just a :~: Double Refl -> Double -> Maybe Double forall a. a -> Maybe a Just (Double -> Maybe Double) -> Double -> Maybe Double forall a b. (a -> b) -> a -> b $ Vector Double -> Double forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a VG.sum Vector a Vector Double column Maybe (a :~: Double) Nothing -> Maybe Double forall a. Maybe a Nothing Maybe Column Nothing -> Maybe Double forall a. Maybe a Nothing applyStatistic :: (VU.Vector Double -> Double) -> T.Text -> DataFrame -> Maybe Double applyStatistic :: (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double applyStatistic Vector Double -> Double f Text name DataFrame df = do Column column <- Text -> DataFrame -> Maybe Column getColumn Text name DataFrame df if Column -> String columnTypeString Column column String -> String -> Bool forall a. Eq a => a -> a -> Bool == String "Double" then (Vector Double -> Double) -> Column -> Maybe Double forall a b. Typeable a => (a -> b) -> Column -> Maybe b safeReduceColumn Vector Double -> Double f Column column else do Column matching <- [Maybe Column] -> Maybe Column forall (t :: * -> *) (f :: * -> *) a. (Foldable t, Alternative f) => t (f a) -> f a asum [(Int -> Double) -> Column -> Maybe Column forall a b c. (Transformable a, Columnable b, Columnable c) => (b -> c) -> a -> Maybe a forall b c. (Columnable b, Columnable c) => (b -> c) -> Column -> Maybe Column transform (Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegral :: Int -> Double) Column column, (Integer -> Double) -> Column -> Maybe Column forall a b c. (Transformable a, Columnable b, Columnable c) => (b -> c) -> a -> Maybe a forall b c. (Columnable b, Columnable c) => (b -> c) -> Column -> Maybe Column transform (Integer -> Double forall a b. (Integral a, Num b) => a -> b fromIntegral :: Integer -> Double) Column column, (Float -> Double) -> Column -> Maybe Column forall a b c. (Transformable a, Columnable b, Columnable c) => (b -> c) -> a -> Maybe a forall b c. (Columnable b, Columnable c) => (b -> c) -> Column -> Maybe Column transform (Float -> Double forall a b. (Real a, Fractional b) => a -> b realToFrac :: Float -> Double) Column column, Column -> Maybe Column forall a. a -> Maybe a Just Column column ] (Vector Double -> Double) -> Column -> Maybe Double forall a b. Typeable a => (a -> b) -> Column -> Maybe b safeReduceColumn Vector Double -> Double f Column matching applyStatistics :: (VU.Vector Double -> VU.Vector Double) -> T.Text -> DataFrame -> Maybe (VU.Vector Double) applyStatistics :: (Vector Double -> Vector Double) -> Text -> DataFrame -> Maybe (Vector Double) applyStatistics Vector Double -> Vector Double f Text name DataFrame df = case Text -> DataFrame -> Maybe Column getColumn Text name DataFrame df of Just ((UnboxedColumn (Vector a column :: VU.Vector a'))) -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a') (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @Int) of Just a :~: Int Refl -> Vector Double -> Maybe (Vector Double) forall a. a -> Maybe a Just (Vector Double -> Maybe (Vector Double)) -> Vector Double -> Maybe (Vector Double) forall a b. (a -> b) -> a -> b $! Vector Double -> Vector Double f ((a -> Double) -> Vector a -> Vector Double forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b VU.map a -> Double forall a b. (Integral a, Num b) => a -> b fromIntegral Vector a column) Maybe (a :~: Int) Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a') (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @Double) of Just a :~: Double Refl -> Vector Double -> Maybe (Vector Double) forall a. a -> Maybe a Just (Vector Double -> Maybe (Vector Double)) -> Vector Double -> Maybe (Vector Double) forall a b. (a -> b) -> a -> b $! Vector Double -> Vector Double f Vector a Vector Double column Maybe (a :~: Double) Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @a') (forall a. Typeable a => TypeRep a forall {k} (a :: k). Typeable a => TypeRep a typeRep @Float) of Just a :~: Float Refl -> Vector Double -> Maybe (Vector Double) forall a. a -> Maybe a Just (Vector Double -> Maybe (Vector Double)) -> Vector Double -> Maybe (Vector Double) forall a b. (a -> b) -> a -> b $! Vector Double -> Vector Double f ((a -> Double) -> Vector a -> Vector Double forall (v :: * -> *) a b. (Vector v a, Vector v b) => (a -> b) -> v a -> v b VG.map a -> Double forall a b. (Real a, Fractional b) => a -> b realToFrac Vector a column) Maybe (a :~: Float) Nothing -> Maybe (Vector Double) forall a. Maybe a Nothing Maybe Column _ -> Maybe (Vector Double) forall a. Maybe a Nothing summarize :: DataFrame -> DataFrame summarize :: DataFrame -> DataFrame summarize DataFrame df = (Text -> DataFrame -> DataFrame) -> [Text] -> DataFrame -> DataFrame forall a. (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame fold Text -> DataFrame -> DataFrame columnStats (DataFrame -> [Text] columnNames DataFrame df) ([(Text, Column)] -> DataFrame fromList [(Text "Statistic", [Text] -> Column forall a. ColumnifyList a => [a] -> Column toColumn [Text "Mean" :: T.Text, Text "Minimum", Text "25%" ,Text "Median", Text "75%", Text "Max", Text "StdDev", Text "IQR", Text "Skewness"])]) where columnStats :: Text -> DataFrame -> DataFrame columnStats Text name DataFrame d = if (Maybe Double -> Bool) -> [Maybe Double] -> Bool forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool all Maybe Double -> Bool forall a. Maybe a -> Bool isJust (Text -> [Maybe Double] stats Text name) then Text -> Vector Double -> DataFrame -> DataFrame forall a. (Columnable a, Unbox a) => Text -> Vector a -> DataFrame -> DataFrame insertUnboxedColumn Text name ([Double] -> Vector Double forall a. Unbox a => [a] -> Vector a VU.fromList ((Maybe Double -> Double) -> [Maybe Double] -> [Double] forall a b. (a -> b) -> [a] -> [b] map (Int -> Double -> Double roundTo Int 2 (Double -> Double) -> (Maybe Double -> Double) -> Maybe Double -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c . Double -> Maybe Double -> Double forall a. a -> Maybe a -> a fromMaybe Double 0) ([Maybe Double] -> [Double]) -> [Maybe Double] -> [Double] forall a b. (a -> b) -> a -> b $ Text -> [Maybe Double] stats Text name)) DataFrame d else DataFrame d stats :: Text -> [Maybe Double] stats Text name = let quantiles :: Maybe (Vector Double) quantiles = (Vector Double -> Vector Double) -> Text -> DataFrame -> Maybe (Vector Double) applyStatistics (ContParam -> Vector Int -> Int -> Vector Double -> Vector Double forall (v :: * -> *). (Vector v Double, Vector v Int) => ContParam -> v Int -> Int -> v Double -> v Double SS.quantilesVec ContParam SS.medianUnbiased ([Int] -> Vector Int forall a. Unbox a => [a] -> Vector a VU.fromList [Int 0,Int 1,Int 2,Int 3,Int 4]) Int 4) Text name DataFrame df min' :: Maybe Double min' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double forall a b c. (a -> b -> c) -> b -> a -> c flip Vector Double -> Int -> Double forall (v :: * -> *) a. (HasCallStack, Vector v a) => v a -> Int -> a (VG.!) Int 0 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> Maybe (Vector Double) quantiles quartile1 :: Maybe Double quartile1 = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double forall a b c. (a -> b -> c) -> b -> a -> c flip Vector Double -> Int -> Double forall (v :: * -> *) a. (HasCallStack, Vector v a) => v a -> Int -> a (VG.!) Int 1 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> Maybe (Vector Double) quantiles median' :: Maybe Double median' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double forall a b c. (a -> b -> c) -> b -> a -> c flip Vector Double -> Int -> Double forall (v :: * -> *) a. (HasCallStack, Vector v a) => v a -> Int -> a (VG.!) Int 2 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> Maybe (Vector Double) quantiles quartile3 :: Maybe Double quartile3 = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double forall a b c. (a -> b -> c) -> b -> a -> c flip Vector Double -> Int -> Double forall (v :: * -> *) a. (HasCallStack, Vector v a) => v a -> Int -> a (VG.!) Int 3 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> Maybe (Vector Double) quantiles max' :: Maybe Double max' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double forall a b c. (a -> b -> c) -> b -> a -> c flip Vector Double -> Int -> Double forall (v :: * -> *) a. (HasCallStack, Vector v a) => v a -> Int -> a (VG.!) Int 4 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> Maybe (Vector Double) quantiles iqr :: Maybe Double iqr = (-) (Double -> Double -> Double) -> Maybe Double -> Maybe (Double -> Double) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> Maybe Double quartile3 Maybe (Double -> Double) -> Maybe Double -> Maybe Double forall a b. Maybe (a -> b) -> Maybe a -> Maybe b forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> Maybe Double quartile1 in [Text -> DataFrame -> Maybe Double mean Text name DataFrame df, Maybe Double min', Maybe Double quartile1, Maybe Double median', Maybe Double quartile3, Maybe Double max', Text -> DataFrame -> Maybe Double standardDeviation Text name DataFrame df, Maybe Double iqr, Text -> DataFrame -> Maybe Double skewness Text name DataFrame df] roundTo :: Int -> Double -> Double roundTo :: Int -> Double -> Double roundTo Int n Double x = Integer -> Double forall a. Num a => Integer -> a fromInteger (Double -> Integer forall b. Integral b => Double -> b forall a b. (RealFrac a, Integral b) => a -> b round (Double -> Integer) -> Double -> Integer forall a b. (a -> b) -> a -> b $ Double x Double -> Double -> Double forall a. Num a => a -> a -> a * (Double 10Double -> Int -> Double forall a b. (Num a, Integral b) => a -> b -> a ^Int n)) Double -> Double -> Double forall a. Fractional a => a -> a -> a / (Double 10.0Double -> Int -> Double forall a b. (Fractional a, Integral b) => a -> b -> a ^^Int n)