dataframe-0.1.0.2: An intuitive, dynamically-typed DataFrame library.
Safe HaskellNone
LanguageHaskell2010

DataFrame

Synopsis

Documentation

data Function where Source #

Constructors

F1 :: forall a b. (Columnable a, Columnable b) => (a -> b) -> Function 
F2 :: forall a b c. (Columnable a, Columnable b, Columnable c) => (a -> b -> c) -> Function 
F3 :: forall a b c d. (Columnable a, Columnable b, Columnable c, Columnable d) => (a -> b -> c -> d) -> Function 
F4 :: forall a b c d e. (Columnable a, Columnable b, Columnable c, Columnable d, Columnable e) => (a -> b -> c -> d -> e) -> Function 
Cond :: forall a. Columnable a => (a -> Bool) -> Function 
ICond :: forall a. Columnable a => (Int -> a -> Bool) -> Function 

apply Source #

Arguments

:: (Columnable b, Columnable c) 
=> (b -> c)

function to apply

-> Text

Column name

-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Apply a function to a given column in a dataframe.

data RowValue where Source #

Constructors

Value :: forall a. Columnable a => a -> RowValue 

Instances

Instances details
Show RowValue Source # 
Instance details

Defined in DataFrame.Internal.Types

Eq RowValue Source # 
Instance details

Defined in DataFrame.Internal.Types

Ord RowValue Source # 
Instance details

Defined in DataFrame.Internal.Types

empty :: DataFrame Source #

O(1) Creates an empty dataframe

filter Source #

Arguments

:: Columnable a 
=> Text

Column to filter by

-> (a -> Bool)

Filter condition

-> DataFrame

Dataframe to filter

-> DataFrame 

O(n * k) Filter rows by a given condition.

filter "x" even df

defaultOptions :: ReadOptions Source #

By default we assume the file has a header, we infer the types on read and we convert any rows with nullish objects into Maybe (safeRead).

pattern Empty :: Vector a Source #

range :: (Int, Int) -> DataFrame -> DataFrame Source #

O(k * n) Take a range of rows of a DataFrame.

take :: Int -> DataFrame -> DataFrame Source #

O(k * n) Take the first n rows of a DataFrame.

data Aggregation Source #

Constructors

Count 
Mean 
Minimum 
Median 
Maximum 
Sum 

Instances

Instances details
Show Aggregation Source # 
Instance details

Defined in DataFrame.Operations.Aggregation

Eq Aggregation Source # 
Instance details

Defined in DataFrame.Operations.Aggregation

groupBy :: [Text] -> DataFrame -> DataFrame Source #

O(k * n) groups the dataframe by the given rows aggregating the remaining rows into vector that should be reduced later.

sortBy :: SortOrder -> [Text] -> DataFrame -> DataFrame Source #

O(k log n) Sorts the dataframe by a given row.

sortBy "Age" df

uncons :: Vector a -> Maybe (a, Vector a) Source #

fold :: (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame Source #

data TypeRepList (xs :: [Type]) where Source #

Constructors

Nil :: TypeRepList ('[] :: [Type]) 
Cons :: forall x (xs1 :: [Type]). Typeable x => TypeRep x -> TypeRepList xs1 -> TypeRepList (x ': xs1) 

pattern (:<|) :: a -> Vector a -> Vector a Source #

toVector :: Columnable a => Column -> Vector a Source #

select :: [Text] -> DataFrame -> DataFrame Source #

O(n) Selects a number of columns in a given dataframe.

select ["name", "age"] df

data Column where Source #

Our representation of a column is a GADT that can store data in either a vector with boxed elements or

Constructors

BoxedColumn :: forall a. Columnable a => Vector a -> Column 
UnboxedColumn :: forall a. (Columnable a, Unbox a) => Vector a -> Column 
OptionalColumn :: forall a. Columnable a => Vector (Maybe a) -> Column 
GroupedBoxedColumn :: forall a. Columnable a => Vector (Vector a) -> Column 
GroupedUnboxedColumn :: forall a. (Columnable a, Unbox a) => Vector (Vector a) -> Column 
GroupedOptionalColumn :: forall a. Columnable a => Vector (Vector (Maybe a)) -> Column 
MutableBoxedColumn :: forall a. Columnable a => IOVector a -> Column 
MutableUnboxedColumn :: forall a. (Columnable a, Unbox a) => IOVector a -> Column 

Instances

Instances details
Show Column Source # 
Instance details

Defined in DataFrame.Internal.Column

Transformable Column Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

transform :: (Columnable b, Columnable c) => (b -> c) -> Column -> Maybe Column Source #

Eq Column Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

(==) :: Column -> Column -> Bool #

(/=) :: Column -> Column -> Bool #

clip :: Int -> Int -> Int -> Int Source #

selectBy :: (Text -> Bool) -> DataFrame -> DataFrame Source #

O(n) select columns by column predicate name.

data DataFrame Source #

Constructors

DataFrame (Vector (Maybe Column)) (Map Text Int) [Int] (Int, Int) 

Instances

Instances details
Show DataFrame Source # 
Instance details

Defined in DataFrame.Internal.DataFrame

Eq DataFrame Source # 
Instance details

Defined in DataFrame.Internal.DataFrame

type Row = Vector RowValue Source #

data DataFrameException where Source #

Constructors

TypeMismatchException 

Fields

TypeMismatchException' 

Fields

ColumnNotFoundException :: Text -> Text -> [Text] -> DataFrameException 

class Columnable a => Columnify a where Source #

Methods

toColumn' :: Vector a -> Column Source #

Converts a boxed vector to a column making sure to put the vector into an appropriate column type by reflection on the vector's type parameter.

Instances

Instances details
Columnable a => Columnify a Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

toColumn' :: Vector a -> Column Source #

Columnable a => Columnify (Vector a) Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

toColumn' :: Vector (Vector a) -> Column Source #

(Columnable a, Unbox a) => Columnify (Vector a) Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

toColumn' :: Vector (Vector a) -> Column Source #

Columnable a => Columnify (Maybe a) Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

toColumn' :: Vector (Maybe a) -> Column Source #

class Columnable a => ColumnifyList a where Source #

Methods

toColumn :: [a] -> Column Source #

Converts a boxed vector to a column making sure to put the vector into an appropriate column type by reflection on the vector's type parameter.

Instances

Instances details
Columnable a => ColumnifyList a Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

toColumn :: [a] -> Column Source #

Columnable a => ColumnifyList (Maybe a) Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

toColumn :: [Maybe a] -> Column Source #

columnLength :: Column -> Int Source #

O(1) Gets the number of elements in the column.

type Columnable a = (Typeable a, Show a, Ord a, Eq a) Source #

dimensions :: DataFrame -> (Int, Int) Source #

O(1) Get DataFrame dimensions i.e. (rows, columns)

class Transformable a where Source #

Essentially a "functor" instance of our type-erased Column.

Methods

transform :: (Columnable b, Columnable c) => (b -> c) -> a -> Maybe a Source #

Instances

Instances details
Transformable Column Source # 
Instance details

Defined in DataFrame.Internal.Column

Methods

transform :: (Columnable b, Columnable c) => (b -> c) -> Column -> Maybe Column Source #

itransform :: (Columnable b, Columnable c) => (Int -> b -> c) -> Column -> Maybe Column Source #

tranform with index.

ifoldrColumn :: (Columnable a, Columnable b) => (Int -> a -> b -> b) -> b -> Column -> Maybe b Source #

funcApply :: Columnable c => Vector RowValue -> Function -> c Source #

getIndicesUnboxed :: Unbox a => Vector Int -> Vector a -> Vector a Source #

getIndices :: Vector Int -> Vector a -> Vector a Source #

matchesAnyType :: forall a (xs :: [Type]). Typeable a => TypeRepList xs -> TypeRep a -> Bool Source #

class WrapFunction a where Source #

Methods

wrapFunction :: a -> Function Source #

Instances

Instances details
(Columnable a, Columnable b, Columnable c, Columnable d, Columnable e) => WrapFunction (a -> b -> c -> d -> e) Source # 
Instance details

Defined in DataFrame.Internal.Function

Methods

wrapFunction :: (a -> b -> c -> d -> e) -> Function Source #

(Columnable a, Columnable b, Columnable c, Columnable d) => WrapFunction (a -> b -> c -> d) Source # 
Instance details

Defined in DataFrame.Internal.Function

Methods

wrapFunction :: (a -> b -> c -> d) -> Function Source #

(Columnable a, Columnable b, Columnable c) => WrapFunction (a -> b -> c) Source # 
Instance details

Defined in DataFrame.Internal.Function

Methods

wrapFunction :: (a -> b -> c) -> Function Source #

(Columnable a, Columnable b) => WrapFunction (a -> b) Source # 
Instance details

Defined in DataFrame.Internal.Function

Methods

wrapFunction :: (a -> b) -> Function Source #

toColumnUnboxed :: (Columnable a, Unbox a) => Vector a -> Column Source #

Converts a an unboxed vector to a column making sure to put the vector into an appropriate column type by reflection on the vector's type parameter.

transformUnboxed :: (Columnable a, Unbox a, Columnable b) => (a -> b) -> Vector a -> Column Source #

Applies a function that returns an unboxed result to an unboxed vector, storing the result in a column.

itransformUnboxed :: (Columnable a, Unbox a, Columnable b) => (Int -> a -> b) -> Vector a -> Column Source #

ifilterColumn :: Columnable a => (Int -> a -> Bool) -> Column -> Maybe Column Source #

Filter column with index.

ifoldlColumn :: (Columnable a, Columnable b) => (b -> Int -> a -> b) -> b -> Column -> Maybe b Source #

reduceColumn :: Columnable a => (a -> b) -> Column -> b Source #

safeReduceColumn :: Typeable a => (a -> b) -> Column -> Maybe b Source #

toRowVector :: [Text] -> DataFrame -> Vector Row Source #

columnNames :: DataFrame -> [Text] Source #

O(k) Get column names of the DataFrame in order of insertion.

insertColumn Source #

Arguments

:: Columnable a 
=> Text

Column Name

-> Vector a

Vector to add to column

-> DataFrame

DataFrame to add column to

-> DataFrame 

O(n) Adds a vector to the dataframe.

insertColumn' Source #

Arguments

:: Text

Column Name

-> Maybe Column

Column to add

-> DataFrame

DataFrame to add to column

-> DataFrame 

insertUnboxedColumn Source #

Arguments

:: (Columnable a, Unbox a) 
=> Text

Column Name

-> Vector a

Unboxed vector to add to column

-> DataFrame

DataFrame to add to column

-> DataFrame 

O(n) Adds an unboxed vector to the dataframe.

insertColumnWithDefault Source #

Arguments

:: Columnable a 
=> a

Default Value

-> Text

Column name

-> Vector a

Data to add to column

-> DataFrame

DataFrame to add to column

-> DataFrame 

O(k) Add a column to the dataframe providing a default. This constructs a new vector and also may convert it to an unboxed vector if necessary. Since columns are usually large the runtime is dominated by the length of the list, k.

columnSize :: Text -> DataFrame -> Maybe Int Source #

O(1) Get the number of elements in a given column.

columnInfo :: DataFrame -> DataFrame Source #

O(n) Returns the number of non-null columns in the dataframe and the type associated with each column.

valueCounts :: Columnable a => Text -> DataFrame -> [(a, Int)] Source #

O (k * n) Counts the occurences of each value in a given column.

data PlotColumns Source #

Constructors

PlotAll 
PlotSubset [Text] 

countOccurrences :: Ord a => Vector a -> [(a, Int)] Source #

smallestPartition :: Ord a => a -> [a] -> a Source #

largestPartition :: Ord a => a -> [a] -> a Source #

data SortOrder Source #

Sort order taken as a parameter by the sortby function.

Constructors

Ascending 
Descending 

Instances

Instances details
Eq SortOrder Source # 
Instance details

Defined in DataFrame.Operations.Sorting

deriveFrom :: ([Text], Function) -> Text -> DataFrame -> DataFrame Source #

O(k) Apply a function to a combination of columns in a dataframe and add the result into alias column.

derive Source #

Arguments

:: (Columnable b, Columnable c) 
=> Text

New name

-> (b -> c)

function to apply

-> Text

Derivative column name

-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Apply a function to a given column in a dataframe and add the result into alias column.

applyMany :: (Columnable b, Columnable c) => (b -> c) -> [Text] -> DataFrame -> DataFrame Source #

O(k * n) Apply a function to given column names in a dataframe.

applyInt Source #

Arguments

:: Columnable b 
=> (Int -> b)

Column name | function to apply

-> Text 
-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Convenience function that applies to an int column.

applyDouble Source #

Arguments

:: Columnable b 
=> (Double -> b)

Column name | function to apply

-> Text 
-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Convenience function that applies to an double column.

applyWhere :: (Columnable a, Columnable b) => (a -> Bool) -> Text -> (b -> b) -> Text -> DataFrame -> DataFrame Source #

O(k * n) Apply a function to a column only if there is another column value that matches the given criterion.

applyWhere "Age" (<20) "Generation" (const "Gen-Z")

applyAtIndex Source #

Arguments

:: Columnable a 
=> Int

Index

-> (a -> a)

function to apply

-> Text

Column name

-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Apply a function to the column at a given index.

filterBy :: Columnable a => (a -> Bool) -> Text -> DataFrame -> DataFrame Source #

O(k) a version of filter where the predicate comes first.

filterBy even "x" df

filterWhere :: ([Text], Function) -> DataFrame -> DataFrame Source #

O(k) filters the dataframe with a row predicate. The arguments in the function must appear in the same order as they do in the list.

filterWhere (["x", "y"], func (\x y -> x + y > 5)) df

filterJust :: Text -> DataFrame -> DataFrame Source #

O(k) removes all rows with Nothing in a given column from the dataframe.

filterJust df

filterAllJust :: DataFrame -> DataFrame Source #

O(n * k) removes all rows with Nothing from the dataframe.

filterJust df

cube :: (Int, Int) -> DataFrame -> DataFrame Source #

O(k) cuts the dataframe in a cube of size (a, b) where a is the length and b is the width.

cube (10, 5) df

selectIntRange :: (Int, Int) -> DataFrame -> DataFrame Source #

O(n) select columns by index range of column names.

selectRange :: (Text, Text) -> DataFrame -> DataFrame Source #

O(n) select columns by index range of column names.

exclude :: [Text] -> DataFrame -> DataFrame Source #

O(n) inverse of select

exclude ["Name"] df

appendWithFrontMin :: Ord a => a -> [a] -> [a] Source #

hash' :: Columnable a => a -> Double Source #

This hash function returns the hash when given a non numeric type but the value when given a numeric.

reduceBy :: (Columnable a, Columnable b) => (forall (v :: Type -> Type). Vector v a => v a -> b) -> Text -> DataFrame -> DataFrame Source #

data ReadOptions Source #

Record for CSV read options.

Constructors

ReadOptions 

readCsv :: String -> IO DataFrame Source #

Reads a CSV file from the given path. Note this file stores intermediate temporary files while converting the CSV from a row to a columnar format.

readSeparated :: Char -> ReadOptions -> String -> IO DataFrame Source #

Reads a character separated file into a dataframe using mutable vectors.

readTsv :: String -> IO DataFrame Source #

Reads a tab separated file from the given path. Note this file stores intermediate temporary files while converting the CSV from a row to a columnar format.

countRows :: Char -> FilePath -> IO Int Source #

First pass to count rows for exact allocation

getInitialDataVectors :: Int -> IOVector Column -> [Text] -> IO () Source #

fillColumns :: Int -> Char -> IOVector Column -> IOVector [(Int, Text)] -> Handle -> IO () Source #

Reads rows from the handle and stores values in mutable vectors.

freezeColumn :: IOVector Column -> Vector [(Int, Text)] -> ReadOptions -> Int -> IO (Maybe Column) Source #

Freezes a mutable vector into an immutable one, trimming it to the actual row count.

writeValue :: IOVector Column -> IOVector [(Int, Text)] -> Int -> Int -> Text -> IO () Source #

Writes a value into the appropriate column, resizing the vector if necessary.

writeSeparated Source #

Arguments

:: Char

Separator

-> String

Path to write to

-> DataFrame 
-> IO () 

(|>) :: a -> (a -> b) -> b Source #