{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StrictData #-}
module DataFrame.Operations.Statistics where

import qualified Data.List as L
import qualified Data.Text as T
import qualified Data.Vector.Generic as VG
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import qualified Statistics.Quantile as SS
import qualified Statistics.Sample as SS

import Prelude as P

import Control.Exception (throw)
import DataFrame.Errors (DataFrameException(..))
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (DataFrame(..), getColumn, empty)
import DataFrame.Internal.Types (Columnable, transform)
import DataFrame.Operations.Core
import Data.Foldable (asum)
import Data.Maybe (isJust, fromMaybe)
import Data.Function ((&))
import Data.Type.Equality (type (:~:)(Refl), TestEquality (testEquality))
import Type.Reflection (typeRep)


frequencies :: T.Text -> DataFrame -> DataFrame
frequencies :: Text -> DataFrame -> DataFrame
frequencies Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
  Just ((BoxedColumn (Vector a
column :: V.Vector a))) -> let
      counts :: [(a, Int)]
counts = forall a. Columnable a => Text -> DataFrame -> [(a, Int)]
valueCounts @a Text
name DataFrame
df
      total :: Int
total = [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
P.sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ ((a, Int) -> Int) -> [(a, Int)] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (a, Int) -> Int
forall a b. (a, b) -> b
snd [(a, Int)]
counts
      vText :: forall a . (Columnable a) => a -> T.Text
      vText :: forall a. Columnable a => a -> Text
vText a
c' = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
        Just a :~: Text
Refl -> a
Text
c'
        Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
          Just a :~: String
Refl -> String -> Text
T.pack a
String
c'
          Maybe (a :~: String)
Nothing -> (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) a
c'
      initDf :: DataFrame
initDf = DataFrame
empty DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Vector Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn Text
"Statistic" ([Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text
"Count" :: T.Text,  Text
"Percentage (%)"])
    in (DataFrame -> (a, Int) -> DataFrame)
-> DataFrame -> [(a, Int)] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
df (a
col, Int
k) -> Text -> Vector Int -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn (a -> Text
forall a. Columnable a => a -> Text
vText a
col) ([Int] -> Vector Int
forall a. [a] -> Vector a
V.fromList [Int
k, Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
100 Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
total]) DataFrame
df) DataFrame
initDf [(a, Int)]
counts
  Just ((OptionalColumn (Vector (Maybe a)
column :: V.Vector a))) -> let
      counts :: [(Maybe a, Int)]
counts = forall a. Columnable a => Text -> DataFrame -> [(a, Int)]
valueCounts @a Text
name DataFrame
df
      total :: Int
total = [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
P.sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ ((Maybe a, Int) -> Int) -> [(Maybe a, Int)] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Maybe a, Int) -> Int
forall a b. (a, b) -> b
snd [(Maybe a, Int)]
counts
      vText :: forall a . (Columnable a) => a -> T.Text
      vText :: forall a. Columnable a => a -> Text
vText a
c' = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
        Just a :~: Text
Refl -> a
Text
c'
        Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
          Just a :~: String
Refl -> String -> Text
T.pack a
String
c'
          Maybe (a :~: String)
Nothing -> (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) a
c'
      initDf :: DataFrame
initDf = DataFrame
empty DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Vector Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn Text
"Statistic" ([Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text
"Count" :: T.Text,  Text
"Percentage (%)"])
    in (DataFrame -> (Maybe a, Int) -> DataFrame)
-> DataFrame -> [(Maybe a, Int)] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
df (Maybe a
col, Int
k) -> Text -> Vector Int -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn (Maybe a -> Text
forall a. Columnable a => a -> Text
vText Maybe a
col) ([Int] -> Vector Int
forall a. [a] -> Vector a
V.fromList [Int
k, Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
100 Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
total]) DataFrame
df) DataFrame
initDf [(Maybe a, Int)]
counts
  Just ((UnboxedColumn (Vector a
column :: VU.Vector a))) -> let
      counts :: [(a, Int)]
counts = forall a. Columnable a => Text -> DataFrame -> [(a, Int)]
valueCounts @a Text
name DataFrame
df
      total :: Int
total = [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
P.sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ ((a, Int) -> Int) -> [(a, Int)] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (a, Int) -> Int
forall a b. (a, b) -> b
snd [(a, Int)]
counts
      vText :: forall a . (Columnable a) => a -> T.Text
      vText :: forall a. Columnable a => a -> Text
vText a
c' = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
        Just a :~: Text
Refl -> a
Text
c'
        Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
          Just a :~: String
Refl -> String -> Text
T.pack a
String
c'
          Maybe (a :~: String)
Nothing -> (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) a
c'
      initDf :: DataFrame
initDf = DataFrame
empty DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Vector Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn Text
"Statistic" ([Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text
"Count" :: T.Text,  Text
"Percentage (%)"])
    in (DataFrame -> (a, Int) -> DataFrame)
-> DataFrame -> [(a, Int)] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
df (a
col, Int
k) -> Text -> Vector Int -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertColumn (a -> Text
forall a. Columnable a => a -> Text
vText a
col) ([Int] -> Vector Int
forall a. [a] -> Vector a
V.fromList [Int
k, Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
100 Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
total]) DataFrame
df) DataFrame
initDf [(a, Int)]
counts

mean :: T.Text -> DataFrame -> Maybe Double
mean :: Text -> DataFrame -> Maybe Double
mean = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
SS.mean

median :: T.Text -> DataFrame -> Maybe Double
median :: Text -> DataFrame -> Maybe Double
median = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic (ContParam -> Vector Double -> Double
forall (v :: * -> *).
Vector v Double =>
ContParam -> v Double -> Double
SS.median ContParam
SS.medianUnbiased)

standardDeviation :: T.Text -> DataFrame -> Maybe Double
standardDeviation :: Text -> DataFrame -> Maybe Double
standardDeviation = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
SS.fastStdDev

skewness :: T.Text -> DataFrame -> Maybe Double
skewness :: Text -> DataFrame -> Maybe Double
skewness = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
SS.skewness

variance :: T.Text -> DataFrame -> Maybe Double
variance :: Text -> DataFrame -> Maybe Double
variance = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
SS.variance

interQuartileRange :: T.Text -> DataFrame -> Maybe Double
interQuartileRange :: Text -> DataFrame -> Maybe Double
interQuartileRange = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic (ContParam -> Int -> Vector Double -> Double
forall (v :: * -> *).
Vector v Double =>
ContParam -> Int -> v Double -> Double
SS.midspread ContParam
SS.medianUnbiased Int
4)

correlation :: T.Text -> T.Text -> DataFrame -> Maybe Double
correlation :: Text -> Text -> DataFrame -> Maybe Double
correlation Text
first Text
second DataFrame
df = do
  Vector Double
f <- Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
first DataFrame
df
  Vector Double
s <- Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
second DataFrame
df
  Double -> Maybe Double
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Double -> Maybe Double) -> Double -> Maybe Double
forall a b. (a -> b) -> a -> b
$ Vector (Double, Double) -> Double
forall (v :: * -> *).
Vector v (Double, Double) =>
v (Double, Double) -> Double
SS.correlation (Vector Double -> Vector Double -> Vector (Double, Double)
forall (v :: * -> *) a b.
(Vector v a, Vector v b, Vector v (a, b)) =>
v a -> v b -> v (a, b)
VG.zip Vector Double
f Vector Double
s)

_getColumnAsDouble :: T.Text -> DataFrame -> Maybe (VU.Vector Double)
_getColumnAsDouble :: Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
  Just (UnboxedColumn (Vector a
f :: VU.Vector a)) -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
    Just a :~: Double
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just Vector a
Vector Double
f
    Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
      Just a :~: Int
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just (Vector Double -> Maybe (Vector Double))
-> Vector Double -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
f
      Maybe (a :~: Int)
Nothing -> Maybe (Vector Double)
forall a. Maybe a
Nothing
  Maybe Column
_ -> Maybe (Vector Double)
forall a. Maybe a
Nothing

sum :: T.Text -> DataFrame -> Maybe Double
sum :: Text -> DataFrame -> Maybe Double
sum Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
  Just ((UnboxedColumn (Vector a
column :: VU.Vector a'))) -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
    Just a :~: Int
Refl -> Double -> Maybe Double
forall a. a -> Maybe a
Just (Double -> Maybe Double) -> Double -> Maybe Double
forall a b. (a -> b) -> a -> b
$ Vector Double -> Double
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum ((a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
column)
    Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
      Just a :~: Double
Refl -> Double -> Maybe Double
forall a. a -> Maybe a
Just (Double -> Maybe Double) -> Double -> Maybe Double
forall a b. (a -> b) -> a -> b
$ Vector Double -> Double
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum Vector a
Vector Double
column
      Maybe (a :~: Double)
Nothing -> Maybe Double
forall a. Maybe a
Nothing
  Maybe Column
Nothing -> Maybe Double
forall a. Maybe a
Nothing

applyStatistic :: (VU.Vector Double -> Double) -> T.Text -> DataFrame -> Maybe Double
applyStatistic :: (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
f Text
name DataFrame
df = do
      Column
column <- Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df
      if Column -> String
columnTypeString Column
column String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"Double"
      then (Vector Double -> Double) -> Column -> Maybe Double
forall a b. Typeable a => (a -> b) -> Column -> Maybe b
safeReduceColumn Vector Double -> Double
f Column
column
      else do
        Column
matching <- [Maybe Column] -> Maybe Column
forall (t :: * -> *) (f :: * -> *) a.
(Foldable t, Alternative f) =>
t (f a) -> f a
asum [(Int -> Double) -> Column -> Maybe Column
forall a b c.
(Transformable a, Columnable b, Columnable c) =>
(b -> c) -> a -> Maybe a
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Column -> Maybe Column
transform (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral :: Int -> Double) Column
column,
                          (Integer -> Double) -> Column -> Maybe Column
forall a b c.
(Transformable a, Columnable b, Columnable c) =>
(b -> c) -> a -> Maybe a
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Column -> Maybe Column
transform (Integer -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral :: Integer -> Double) Column
column,
                          (Float -> Double) -> Column -> Maybe Column
forall a b c.
(Transformable a, Columnable b, Columnable c) =>
(b -> c) -> a -> Maybe a
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Column -> Maybe Column
transform (Float -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac :: Float -> Double) Column
column,
                          Column -> Maybe Column
forall a. a -> Maybe a
Just Column
column ]
        (Vector Double -> Double) -> Column -> Maybe Double
forall a b. Typeable a => (a -> b) -> Column -> Maybe b
safeReduceColumn Vector Double -> Double
f Column
matching

applyStatistics :: (VU.Vector Double -> VU.Vector Double) -> T.Text -> DataFrame -> Maybe (VU.Vector Double)
applyStatistics :: (Vector Double -> Vector Double)
-> Text -> DataFrame -> Maybe (Vector Double)
applyStatistics Vector Double -> Vector Double
f Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
  Just ((UnboxedColumn (Vector a
column :: VU.Vector a'))) -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
    Just a :~: Int
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just (Vector Double -> Maybe (Vector Double))
-> Vector Double -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$! Vector Double -> Vector Double
f ((a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
column)
    Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
      Just a :~: Double
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just (Vector Double -> Maybe (Vector Double))
-> Vector Double -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$! Vector Double -> Vector Double
f Vector a
Vector Double
column
      Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Float) of
        Just a :~: Float
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just (Vector Double -> Maybe (Vector Double))
-> Vector Double -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$! Vector Double -> Vector Double
f ((a -> Double) -> Vector a -> Vector Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac Vector a
column)
        Maybe (a :~: Float)
Nothing -> Maybe (Vector Double)
forall a. Maybe a
Nothing
  Maybe Column
_ -> Maybe (Vector Double)
forall a. Maybe a
Nothing

summarize :: DataFrame -> DataFrame
summarize :: DataFrame -> DataFrame
summarize DataFrame
df = (Text -> DataFrame -> DataFrame)
-> [Text] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold Text -> DataFrame -> DataFrame
columnStats (DataFrame -> [Text]
columnNames DataFrame
df) ([(Text, Column)] -> DataFrame
fromList [(Text
"Statistic", [Text] -> Column
forall a. ColumnifyList a => [a] -> Column
toColumn [Text
"Mean" :: T.Text, Text
"Minimum", Text
"25%" ,Text
"Median", Text
"75%", Text
"Max", Text
"StdDev", Text
"IQR", Text
"Skewness"])])
  where columnStats :: Text -> DataFrame -> DataFrame
columnStats Text
name DataFrame
d = if (Maybe Double -> Bool) -> [Maybe Double] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Maybe Double -> Bool
forall a. Maybe a -> Bool
isJust (Text -> [Maybe Double]
stats Text
name) then Text -> Vector Double -> DataFrame -> DataFrame
forall a.
(Columnable a, Unbox a) =>
Text -> Vector a -> DataFrame -> DataFrame
insertUnboxedColumn Text
name ([Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
VU.fromList ((Maybe Double -> Double) -> [Maybe Double] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Double -> Double
roundTo Int
2 (Double -> Double)
-> (Maybe Double -> Double) -> Maybe Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
0) ([Maybe Double] -> [Double]) -> [Maybe Double] -> [Double]
forall a b. (a -> b) -> a -> b
$ Text -> [Maybe Double]
stats Text
name)) DataFrame
d else DataFrame
d
        stats :: Text -> [Maybe Double]
stats Text
name = let
            quantiles :: Maybe (Vector Double)
quantiles = (Vector Double -> Vector Double)
-> Text -> DataFrame -> Maybe (Vector Double)
applyStatistics (ContParam -> Vector Int -> Int -> Vector Double -> Vector Double
forall (v :: * -> *).
(Vector v Double, Vector v Int) =>
ContParam -> v Int -> Int -> v Double -> v Double
SS.quantilesVec ContParam
SS.medianUnbiased ([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList [Int
0,Int
1,Int
2,Int
3,Int
4]) Int
4) Text
name DataFrame
df
            min' :: Maybe Double
min' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
0 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
            quartile1 :: Maybe Double
quartile1 = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
1 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
            median' :: Maybe Double
median' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
2 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
            quartile3 :: Maybe Double
quartile3 = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
3 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
            max' :: Maybe Double
max' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
4 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
            iqr :: Maybe Double
iqr = (-) (Double -> Double -> Double)
-> Maybe Double -> Maybe (Double -> Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Double
quartile3 Maybe (Double -> Double) -> Maybe Double -> Maybe Double
forall a b. Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Maybe Double
quartile1
          in [Text -> DataFrame -> Maybe Double
mean Text
name DataFrame
df,
              Maybe Double
min',
              Maybe Double
quartile1,
              Maybe Double
median',
              Maybe Double
quartile3,
              Maybe Double
max',
              Text -> DataFrame -> Maybe Double
standardDeviation Text
name DataFrame
df,
              Maybe Double
iqr,
              Text -> DataFrame -> Maybe Double
skewness Text
name DataFrame
df]
        roundTo :: Int -> Double -> Double
        roundTo :: Int -> Double -> Double
roundTo Int
n Double
x = Integer -> Double
forall a. Num a => Integer -> a
fromInteger (Double -> Integer
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Integer) -> Double -> Integer
forall a b. (a -> b) -> a -> b
$ Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
10Double -> Int -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^Int
n)) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
10.0Double -> Int -> Double
forall a b. (Fractional a, Integral b) => a -> b -> a
^^Int
n)