dataframe-0.2.0.1: An intuitive, dynamically-typed DataFrame library.
Safe HaskellNone
LanguageHaskell2010

DataFrame

Synopsis

Documentation

range :: (Int, Int) -> DataFrame -> DataFrame Source #

O(k * n) Take a range of rows of a DataFrame.

eq :: (Columnable a, Eq a) => Expr a -> Expr a -> Expr Bool Source #

type family If (cond :: Bool) (yes :: k) (no :: k) :: k where ... Source #

Equations

If 'True (yes :: k) (_1 :: k) = yes 
If 'False (_1 :: k) (no :: k) = no 

apply Source #

Arguments

:: (Columnable b, Columnable c) 
=> (b -> c)

function to apply

-> Text

Column name

-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Apply a function to a given column in a dataframe.

data RowValue where Source #

Constructors

Value :: forall a. Columnable' a => a -> RowValue 

Instances

Instances details
Show RowValue Source # 
Instance details

Defined in DataFrame.Internal.Row

Eq RowValue Source # 
Instance details

Defined in DataFrame.Internal.Row

Ord RowValue Source # 
Instance details

Defined in DataFrame.Internal.Row

empty :: DataFrame Source #

O(1) Creates an empty dataframe

data Expr a where Source #

Constructors

Col :: forall a. Columnable a => Text -> Expr a 
Lit :: forall a. Columnable a => a -> Expr a 
Apply :: forall a b. (Columnable a, Columnable b) => Text -> (b -> a) -> Expr b -> Expr a 
BinOp :: forall c b a. (Columnable c, Columnable b, Columnable a) => Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a 

Instances

Instances details
(Floating a, Columnable a) => Floating (Expr a) Source # 
Instance details

Defined in DataFrame.Internal.Expression

Methods

pi :: Expr a #

exp :: Expr a -> Expr a #

log :: Expr a -> Expr a #

sqrt :: Expr a -> Expr a #

(**) :: Expr a -> Expr a -> Expr a #

logBase :: Expr a -> Expr a -> Expr a #

sin :: Expr a -> Expr a #

cos :: Expr a -> Expr a #

tan :: Expr a -> Expr a #

asin :: Expr a -> Expr a #

acos :: Expr a -> Expr a #

atan :: Expr a -> Expr a #

sinh :: Expr a -> Expr a #

cosh :: Expr a -> Expr a #

tanh :: Expr a -> Expr a #

asinh :: Expr a -> Expr a #

acosh :: Expr a -> Expr a #

atanh :: Expr a -> Expr a #

log1p :: Expr a -> Expr a #

expm1 :: Expr a -> Expr a #

log1pexp :: Expr a -> Expr a #

log1mexp :: Expr a -> Expr a #

(Num a, Columnable a) => Num (Expr a) Source # 
Instance details

Defined in DataFrame.Internal.Expression

Methods

(+) :: Expr a -> Expr a -> Expr a #

(-) :: Expr a -> Expr a -> Expr a #

(*) :: Expr a -> Expr a -> Expr a #

negate :: Expr a -> Expr a #

abs :: Expr a -> Expr a #

signum :: Expr a -> Expr a #

fromInteger :: Integer -> Expr a #

(Fractional a, Columnable a) => Fractional (Expr a) Source # 
Instance details

Defined in DataFrame.Internal.Expression

Methods

(/) :: Expr a -> Expr a -> Expr a #

recip :: Expr a -> Expr a #

fromRational :: Rational -> Expr a #

Show a => Show (Expr a) Source # 
Instance details

Defined in DataFrame.Internal.Expression

Methods

showsPrec :: Int -> Expr a -> ShowS #

show :: Expr a -> String #

showList :: [Expr a] -> ShowS #

filter Source #

Arguments

:: Columnable a 
=> Text

Column to filter by

-> (a -> Bool)

Filter condition

-> DataFrame

Dataframe to filter

-> DataFrame 

O(n * k) Filter rows by a given condition.

filter "x" even df

defaultOptions :: ReadOptions Source #

By default we assume the file has a header, we infer the types on read and we convert any rows with nullish objects into Maybe (safeRead).

lift :: (Columnable a, Columnable b) => (a -> b) -> Expr a -> Expr b Source #

take :: Int -> DataFrame -> DataFrame Source #

O(k * n) Take the first n rows of a DataFrame.

sortBy :: SortOrder -> [Text] -> DataFrame -> DataFrame Source #

O(k log n) Sorts the dataframe by a given row.

sortBy "Age" df

data Aggregation Source #

Constructors

Count 
Mean 
Minimum 
Median 
Maximum 
Sum 

Instances

Instances details
Show Aggregation Source # 
Instance details

Defined in DataFrame.Operations.Aggregation

Eq Aggregation Source # 
Instance details

Defined in DataFrame.Operations.Aggregation

fold :: (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame Source #

groupBy :: [Text] -> DataFrame -> DataFrame Source #

O(k * n) groups the dataframe by the given rows aggregating the remaining rows into vector that should be reduced later.

toVector :: Columnable a => Column -> Vector a Source #

select :: [Text] -> DataFrame -> DataFrame Source #

O(n) Selects a number of columns in a given dataframe.

select ["name", "age"] df

data Column where Source #

Our representation of a column is a GADT that can store data in either a vector with boxed elements or

Constructors

BoxedColumn :: forall a. Columnable a => Vector a -> Column 
UnboxedColumn :: forall a. (Columnable a, Unbox a) => Vector a -> Column 
OptionalColumn :: forall a. Columnable a => Vector (Maybe a) -> Column 
GroupedBoxedColumn :: forall a. Columnable a => Vector (Vector a) -> Column 
GroupedUnboxedColumn :: forall a. (Columnable a, Unbox a) => Vector (Vector a) -> Column 
GroupedOptionalColumn :: forall a. Columnable a => Vector (Vector (Maybe a)) -> Column 
MutableBoxedColumn :: forall a. Columnable a => IOVector a -> Column 
MutableUnboxedColumn :: forall a. (Columnable a, Unbox a) => IOVector a -> Column 

Instances

Instances details
Show Column Source # 
Instance details

Defined in DataFrame.Internal.Column

Eq Column Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

(==) :: Column -> Column -> Bool #

(/=) :: Column -> Column -> Bool #

clip :: Int -> Int -> Int -> Int Source #

geq :: (Columnable a, Ord a, Eq a) => Expr a -> Expr a -> Expr Bool Source #

selectBy :: (Text -> Bool) -> DataFrame -> DataFrame Source #

O(n) select columns by column predicate name.

data DataFrame Source #

Constructors

DataFrame (Vector (Maybe Column)) (Map Text Int) [Int] (Int, Int) 

Instances

Instances details
Show DataFrame Source # 
Instance details

Defined in DataFrame.Internal.DataFrame

Eq DataFrame Source # 
Instance details

Defined in DataFrame.Internal.DataFrame

type Row = Vector RowValue Source #

data DataFrameException where Source #

Constructors

TypeMismatchException 

Fields

TypeMismatchException' 

Fields

ColumnNotFoundException :: Text -> Text -> [Text] -> DataFrameException 

toColumn' :: (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column Source #

columnLength :: Column -> Int Source #

O(1) Gets the number of elements in the column.

dimensions :: DataFrame -> (Int, Int) Source #

O(1) Get DataFrame dimensions i.e. (rows, columns)

itransform :: (Typeable b, Typeable c, Columnable b, Columnable c) => (Int -> b -> c) -> Column -> Maybe Column Source #

Applies a function that returns an unboxed result to an unboxed vector, storing the result in a column.

ifoldrColumn :: (Columnable a, Columnable b) => (Int -> a -> b -> b) -> b -> Column -> Maybe b Source #

data TypedColumn a where Source #

Constructors

TColumn :: forall a. Columnable a => Column -> TypedColumn a 

Instances

Instances details
Show a => Show (TypedColumn a) Source # 
Instance details

Defined in DataFrame.Internal.Column

transform :: (Columnable b, Columnable c, UnboxIf c, Typeable b, Typeable c) => (b -> c) -> Column -> Maybe Column Source #

Generic column transformation (no index).

getIndicesUnboxed :: Unbox a => Vector Int -> Vector a -> Vector a Source #

getIndices :: Vector Int -> Vector a -> Vector a Source #

lt :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool Source #

gt :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool Source #

type Columnable' a = (Typeable a, Show a, Ord a, Eq a) Source #

type family KindOf a :: Rep where ... Source #

Compute the column representation tag for any ‘a’.

Equations

KindOf (Maybe a) = 'ROptional 
KindOf (Vector a) = 'RGBoxed 
KindOf (Vector a) = 'RGUnboxed 
KindOf a = If (Unboxable a) 'RUnboxed 'RBoxed 

class ColumnifyRep (r :: Rep) a where Source #

Methods

toColumnRep :: Vector a -> Column Source #

Instances

Instances details
Columnable a => ColumnifyRep 'RBoxed a Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

toColumnRep :: Vector a -> Column Source #

(Columnable a, Unbox a) => ColumnifyRep 'RUnboxed a Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

toColumnRep :: Vector a -> Column Source #

Columnable a => ColumnifyRep 'RGBoxed (Vector a) Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

toColumnRep :: Vector (Vector a) -> Column Source #

(Columnable a, Unbox a) => ColumnifyRep 'RGUnboxed (Vector a) Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

toColumnRep :: Vector (Vector a) -> Column Source #

Columnable a => ColumnifyRep 'ROptional (Maybe a) Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

toColumnRep :: Vector (Maybe a) -> Column Source #

type UnboxIf a = When (Unboxable a) (Unbox a) Source #

class SBoolI (b :: Bool) where Source #

Methods

sbool :: SBool b Source #

Instances

Instances details
SBoolI 'False Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

sbool :: SBool 'False Source #

SBoolI 'True Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

sbool :: SBool 'True Source #

data SBool (b :: Bool) where Source #

Constructors

STrue :: SBool 'True 
SFalse :: SBool 'False 

type family When (flag :: Bool) c where ... Source #

Equations

When 'True c = c 
When 'False c = () 

toColumnUnboxed :: (Columnable a, Unbox a) => Vector a -> Column Source #

Converts a an unboxed vector to a column making sure to put the vector into an appropriate column type by reflection on the vector's type parameter.

ifilterColumn :: Columnable a => (Int -> a -> Bool) -> Column -> Maybe Column Source #

Filter column with index.

ifoldlColumn :: (Columnable a, Columnable b) => (b -> Int -> a -> b) -> b -> Column -> Maybe b Source #

reduceColumn :: Columnable a => (a -> b) -> Column -> b Source #

safeReduceColumn :: Typeable a => (a -> b) -> Column -> Maybe b Source #

zipWithColumns :: (Columnable a, Columnable b, Columnable c) => (a -> b -> c) -> Column -> Column -> Column Source #

leq :: (Columnable a, Ord a, Eq a) => Expr a -> Expr a -> Expr Bool Source #

toRowVector :: [Text] -> DataFrame -> Vector Row Source #

interpret :: forall a b. Columnable a => DataFrame -> Expr a -> TypedColumn a Source #

lit :: Columnable a => a -> Expr a Source #

lift2 :: (Columnable c, Columnable b, Columnable a) => (c -> b -> a) -> Expr c -> Expr b -> Expr a Source #

columnNames :: DataFrame -> [Text] Source #

O(k) Get column names of the DataFrame in order of insertion.

insertColumn Source #

Arguments

:: Columnable a 
=> Text

Column Name

-> Vector a

Vector to add to column

-> DataFrame

DataFrame to add column to

-> DataFrame 

O(n) Adds a vector to the dataframe.

insertColumn' Source #

Arguments

:: Text

Column Name

-> Maybe Column

Column to add

-> DataFrame

DataFrame to add to column

-> DataFrame 

insertUnboxedColumn Source #

Arguments

:: (Columnable a, Unbox a) 
=> Text

Column Name

-> Vector a

Unboxed vector to add to column

-> DataFrame

DataFrame to add to column

-> DataFrame 

O(n) Adds an unboxed vector to the dataframe.

insertColumnWithDefault Source #

Arguments

:: Columnable a 
=> a

Default Value

-> Text

Column name

-> Vector a

Data to add to column

-> DataFrame

DataFrame to add to column

-> DataFrame 

O(k) Add a column to the dataframe providing a default. This constructs a new vector and also may convert it to an unboxed vector if necessary. Since columns are usually large the runtime is dominated by the length of the list, k.

columnSize :: Text -> DataFrame -> Maybe Int Source #

O(1) Get the number of elements in a given column.

columnInfo :: DataFrame -> DataFrame Source #

O(n) Returns the number of non-null columns in the dataframe and the type associated with each column.

valueCounts :: Columnable a => Text -> DataFrame -> [(a, Int)] Source #

O (k * n) Counts the occurences of each value in a given column.

data PlotColumns Source #

Constructors

PlotAll 
PlotSubset [Text] 

countOccurrences :: Ord a => Vector a -> [(a, Int)] Source #

smallestPartition :: Ord a => a -> [a] -> a Source #

largestPartition :: Ord a => a -> [a] -> a Source #

data SortOrder Source #

Sort order taken as a parameter by the sortby function.

Constructors

Ascending 
Descending 

Instances

Instances details
Eq SortOrder Source # 
Instance details

Defined in DataFrame.Operations.Sorting

derive :: Columnable a => Text -> Expr a -> DataFrame -> DataFrame Source #

O(k) Apply a function to a combination of columns in a dataframe and add the result into alias column.

applyMany :: (Columnable b, Columnable c) => (b -> c) -> [Text] -> DataFrame -> DataFrame Source #

O(k * n) Apply a function to given column names in a dataframe.

applyInt Source #

Arguments

:: Columnable b 
=> (Int -> b)

Column name | function to apply

-> Text 
-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Convenience function that applies to an int column.

applyDouble Source #

Arguments

:: Columnable b 
=> (Double -> b)

Column name | function to apply

-> Text 
-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Convenience function that applies to an double column.

applyWhere :: (Columnable a, Columnable b) => (a -> Bool) -> Text -> (b -> b) -> Text -> DataFrame -> DataFrame Source #

O(k * n) Apply a function to a column only if there is another column value that matches the given criterion.

applyWhere "Age" (<20) "Generation" (const "Gen-Z")

applyAtIndex Source #

Arguments

:: Columnable a 
=> Int

Index

-> (a -> a)

function to apply

-> Text

Column name

-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Apply a function to the column at a given index.

filterBy :: Columnable a => (a -> Bool) -> Text -> DataFrame -> DataFrame Source #

O(k) a version of filter where the predicate comes first.

filterBy even "x" df

filterWhere :: Expr Bool -> DataFrame -> DataFrame Source #

O(k) filters the dataframe with a row predicate. The arguments in the function must appear in the same order as they do in the list.

filterWhere (["x", "y"], func (\x y -> x + y > 5)) df

filterJust :: Text -> DataFrame -> DataFrame Source #

O(k) removes all rows with Nothing in a given column from the dataframe.

filterJust df

filterAllJust :: DataFrame -> DataFrame Source #

O(n * k) removes all rows with Nothing from the dataframe.

filterJust df

cube :: (Int, Int) -> DataFrame -> DataFrame Source #

O(k) cuts the dataframe in a cube of size (a, b) where a is the length and b is the width.

cube (10, 5) df

selectIntRange :: (Int, Int) -> DataFrame -> DataFrame Source #

O(n) select columns by index range of column names.

selectRange :: (Text, Text) -> DataFrame -> DataFrame Source #

O(n) select columns by index range of column names.

exclude :: [Text] -> DataFrame -> DataFrame Source #

O(n) inverse of select

exclude ["Name"] df

appendWithFrontMin :: Ord a => a -> [a] -> [a] Source #

hash' :: Columnable a => a -> Double Source #

This hash function returns the hash when given a non numeric type but the value when given a numeric.

reduceBy :: (Columnable a, Columnable b) => (forall (v :: Type -> Type). Vector v a => v a -> b) -> Text -> DataFrame -> DataFrame Source #

data ReadOptions Source #

Record for CSV read options.

Constructors

ReadOptions 

readCsv :: String -> IO DataFrame Source #

Reads a CSV file from the given path. Note this file stores intermediate temporary files while converting the CSV from a row to a columnar format.

readSeparated :: Char -> ReadOptions -> String -> IO DataFrame Source #

Reads a character separated file into a dataframe using mutable vectors.

readTsv :: String -> IO DataFrame Source #

Reads a tab separated file from the given path. Note this file stores intermediate temporary files while converting the CSV from a row to a columnar format.

countRows :: Char -> FilePath -> IO Int Source #

First pass to count rows for exact allocation

getInitialDataVectors :: Int -> IOVector Column -> [Text] -> IO () Source #

fillColumns :: Int -> Char -> IOVector Column -> IOVector [(Int, Text)] -> Handle -> IO () Source #

Reads rows from the handle and stores values in mutable vectors.

freezeColumn :: IOVector Column -> Vector [(Int, Text)] -> ReadOptions -> Int -> IO (Maybe Column) Source #

Freezes a mutable vector into an immutable one, trimming it to the actual row count.

writeValue :: IOVector Column -> IOVector [(Int, Text)] -> Int -> Int -> Text -> IO () Source #

Writes a value into the appropriate column, resizing the vector if necessary.

writeSeparated Source #

Arguments

:: Char

Separator

-> String

Path to write to

-> DataFrame 
-> IO () 

(|>) :: a -> (a -> b) -> b Source #