dataframe-0.1.0.1: An intuitive, dynamically-typed DataFrame library.
Safe HaskellSafe-Inferred
LanguageHaskell2010

Data.DataFrame

Synopsis

Documentation

data Function where Source #

Constructors

F1 :: forall a b. (Columnable a, Columnable b) => (a -> b) -> Function 
F2 :: forall a b c. (Columnable a, Columnable b, Columnable c) => (a -> b -> c) -> Function 
F3 :: forall a b c d. (Columnable a, Columnable b, Columnable c, Columnable d) => (a -> b -> c -> d) -> Function 
F4 :: forall a b c d e. (Columnable a, Columnable b, Columnable c, Columnable d, Columnable e) => (a -> b -> c -> d -> e) -> Function 
Cond :: forall a. Columnable a => (a -> Bool) -> Function 
ICond :: forall a. Columnable a => (Int -> a -> Bool) -> Function 

data Column where Source #

Our representation of a column is a GADT that can store data in either a vector with boxed elements or

Constructors

BoxedColumn :: Columnable a => Vector a -> Column 
UnboxedColumn :: (Columnable a, Unbox a) => Vector a -> Column 
OptionalColumn :: Columnable a => Vector (Maybe a) -> Column 
GroupedBoxedColumn :: Columnable a => Vector (Vector a) -> Column 
GroupedUnboxedColumn :: (Columnable a, Unbox a) => Vector (Vector a) -> Column 
GroupedOptionalColumn :: Columnable a => Vector (Vector (Maybe a)) -> Column 
MutableBoxedColumn :: Columnable a => IOVector a -> Column 
MutableUnboxedColumn :: (Columnable a, Unbox a) => IOVector a -> Column 

Instances

Instances details
Show Column Source # 
Instance details

Defined in Data.DataFrame.Internal.Column

Transformable Column Source # 
Instance details

Defined in Data.DataFrame.Internal.Column

Methods

transform :: (Columnable b, Columnable c) => (b -> c) -> Column -> Maybe Column Source #

Eq Column Source # 
Instance details

Defined in Data.DataFrame.Internal.Column

Methods

(==) :: Column -> Column -> Bool #

(/=) :: Column -> Column -> Bool #

data DataFrame Source #

Constructors

DataFrame (Vector (Maybe Column)) (Map Text Int) [Int] (Int, Int) 

Instances

Instances details
Show DataFrame Source # 
Instance details

Defined in Data.DataFrame.Internal.DataFrame

Eq DataFrame Source # 
Instance details

Defined in Data.DataFrame.Internal.DataFrame

type Row = Vector RowValue Source #

data DataFrameException where Source #

Constructors

TypeMismatchException 

Fields

TypeMismatchException' 

Fields

ColumnNotFoundException :: Text -> Text -> [Text] -> DataFrameException 

type Columnable a = (Typeable a, Show a, Ord a, Eq a) Source #

data RowValue where Source #

Constructors

Value :: Columnable a => a -> RowValue 

Instances

Instances details
Show RowValue Source # 
Instance details

Defined in Data.DataFrame.Internal.Types

Eq RowValue Source # 
Instance details

Defined in Data.DataFrame.Internal.Types

Ord RowValue Source # 
Instance details

Defined in Data.DataFrame.Internal.Types

class Transformable a where Source #

Essentially a "functor" instance of our type-erased Column.

Methods

transform :: forall b c. (Columnable b, Columnable c) => (b -> c) -> a -> Maybe a Source #

Instances

Instances details
Transformable Column Source # 
Instance details

Defined in Data.DataFrame.Internal.Column

Methods

transform :: (Columnable b, Columnable c) => (b -> c) -> Column -> Maybe Column Source #

data TypeRepList (xs :: [Type]) where Source #

Constructors

Nil :: TypeRepList '[] 
Cons :: Typeable x => TypeRep x -> TypeRepList xs -> TypeRepList (x ': xs) 

class WrapFunction a where Source #

Methods

wrapFunction :: a -> Function Source #

Instances

Instances details
(Columnable a, Columnable b, Columnable c, Columnable d, Columnable e) => WrapFunction (a -> b -> c -> d -> e) Source # 
Instance details

Defined in Data.DataFrame.Internal.Function

Methods

wrapFunction :: (a -> b -> c -> d -> e) -> Function Source #

(Columnable a, Columnable b, Columnable c, Columnable d) => WrapFunction (a -> b -> c -> d) Source # 
Instance details

Defined in Data.DataFrame.Internal.Function

Methods

wrapFunction :: (a -> b -> c -> d) -> Function Source #

(Columnable a, Columnable b, Columnable c) => WrapFunction (a -> b -> c) Source # 
Instance details

Defined in Data.DataFrame.Internal.Function

Methods

wrapFunction :: (a -> b -> c) -> Function Source #

(Columnable a, Columnable b) => WrapFunction (a -> b) Source # 
Instance details

Defined in Data.DataFrame.Internal.Function

Methods

wrapFunction :: (a -> b) -> Function Source #

class Columnable a => Columnify a where Source #

Methods

toColumn' :: Vector a -> Column Source #

Converts a boxed vector to a column making sure to put the vector into an appropriate column type by reflection on the vector's type parameter.

Instances

Instances details
Columnable a => Columnify a Source # 
Instance details

Defined in Data.DataFrame.Internal.Column

Methods

toColumn' :: Vector a -> Column Source #

Columnable a => Columnify (Vector a) Source # 
Instance details

Defined in Data.DataFrame.Internal.Column

Methods

toColumn' :: Vector (Vector a) -> Column Source #

(Columnable a, Unbox a) => Columnify (Vector a) Source # 
Instance details

Defined in Data.DataFrame.Internal.Column

Methods

toColumn' :: Vector0 (Vector a) -> Column Source #

Columnable a => Columnify (Maybe a) Source # 
Instance details

Defined in Data.DataFrame.Internal.Column

Methods

toColumn' :: Vector (Maybe a) -> Column Source #

class Columnable a => ColumnifyList a where Source #

Methods

toColumn :: [a] -> Column Source #

Converts a boxed vector to a column making sure to put the vector into an appropriate column type by reflection on the vector's type parameter.

Instances

Instances details
Columnable a => ColumnifyList a Source # 
Instance details

Defined in Data.DataFrame.Internal.Column

Methods

toColumn :: [a] -> Column Source #

Columnable a => ColumnifyList (Maybe a) Source # 
Instance details

Defined in Data.DataFrame.Internal.Column

Methods

toColumn :: [Maybe a] -> Column Source #

data PlotColumns Source #

Constructors

PlotAll 
PlotSubset [Text] 

data SortOrder Source #

Sort order taken as a parameter by the sortby function.

Constructors

Ascending 
Descending 

Instances

Instances details
Eq SortOrder Source # 
Instance details

Defined in Data.DataFrame.Operations.Sorting

data ReadOptions Source #

Record for CSV read options.

Constructors

ReadOptions 

pattern Empty :: Vector a Source #

pattern (:<|) :: a -> Vector a -> Vector a Source #

apply Source #

Arguments

:: forall b c. (Columnable b, Columnable c) 
=> (b -> c)

function to apply

-> Text

Column name

-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Apply a function to a given column in a dataframe.

empty :: DataFrame Source #

O(1) Creates an empty dataframe

filter Source #

Arguments

:: forall a. Columnable a 
=> Text

Column to filter by

-> (a -> Bool)

Filter condition

-> DataFrame

Dataframe to filter

-> DataFrame 

O(n * k) Filter rows by a given condition.

defaultOptions :: ReadOptions Source #

By default we assume the file has a header, we infer the types on read and we convert any rows with nullish objects into Maybe (safeRead).

range :: (Int, Int) -> DataFrame -> DataFrame Source #

O(k * n) Take a range of rows of a DataFrame.

take :: Int -> DataFrame -> DataFrame Source #

O(k * n) Take the first n rows of a DataFrame.

sortBy :: SortOrder -> [Text] -> DataFrame -> DataFrame Source #

O(k log n) Sorts the dataframe by a given row.

sortBy "Age" df

groupBy :: [Text] -> DataFrame -> DataFrame Source #

O(k * n) groups the dataframe by the given rows aggregating the remaining rows into vector that should be reduced later.

uncons :: Vector a -> Maybe (a, Vector a) Source #

fold :: (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame Source #

select :: [Text] -> DataFrame -> DataFrame Source #

O(n) Selects a number of columns in a given dataframe.

select ["name", "age"] df

clip :: Int -> Int -> Int -> Int Source #

selectBy :: (Text -> Bool) -> DataFrame -> DataFrame Source #

O(n) select columns by column predicate name.

columnLength :: Column -> Int Source #

O(1) Gets the number of elements in the column.

dimensions :: DataFrame -> (Int, Int) Source #

O(1) Get DataFrame dimensions i.e. (rows, columns)

toRowValue :: forall a. Columnable a => a -> RowValue Source #

itransform :: forall b c. (Columnable b, Columnable c) => (Int -> b -> c) -> Column -> Maybe Column Source #

tranform with index.

ifoldrColumn :: forall a b. (Columnable a, Columnable b) => (Int -> a -> b -> b) -> b -> Column -> Maybe b Source #

funcApply :: forall c. Columnable c => Vector RowValue -> Function -> c Source #

getIndicesUnboxed :: Unbox a => Vector Int -> Vector a -> Vector a Source #

getIndices :: Vector Int -> Vector a -> Vector a Source #

func :: forall fn. WrapFunction fn => fn -> Function Source #

matchesAnyType :: forall a xs. Typeable a => TypeRepList xs -> TypeRep a -> Bool Source #

testUnboxable :: forall a. Typeable a => TypeRep a -> Bool Source #

testNumeric :: forall a. Typeable a => TypeRep a -> Bool Source #

toColumnUnboxed :: forall a. (Columnable a, Unbox a) => Vector a -> Column Source #

Converts a an unboxed vector to a column making sure to put the vector into an appropriate column type by reflection on the vector's type parameter.

transformUnboxed :: forall a b. (Columnable a, Unbox a, Columnable b) => (a -> b) -> Vector a -> Column Source #

Applies a function that returns an unboxed result to an unboxed vector, storing the result in a column.

itransformUnboxed :: forall a b. (Columnable a, Unbox a, Columnable b) => (Int -> a -> b) -> Vector a -> Column Source #

ifilterColumn :: forall a. Columnable a => (Int -> a -> Bool) -> Column -> Maybe Column Source #

Filter column with index.

ifoldlColumn :: forall a b. (Columnable a, Columnable b) => (b -> Int -> a -> b) -> b -> Column -> Maybe b Source #

reduceColumn :: forall a b. Columnable a => (a -> b) -> Column -> b Source #

safeReduceColumn :: forall a b. Typeable a => (a -> b) -> Column -> Maybe b Source #

toRowVector :: [Text] -> DataFrame -> Vector Row Source #

columnNames :: DataFrame -> [Text] Source #

O(k) Get column names of the DataFrame in order of insertion.

insertColumn Source #

Arguments

:: forall a. Columnable a 
=> Text

Column Name

-> Vector a

Vector to add to column

-> DataFrame

DataFrame to add column to

-> DataFrame 

O(n) Adds a vector to the dataframe.

insertColumn' Source #

Arguments

:: Text

Column Name

-> Maybe Column

Column to add

-> DataFrame

DataFrame to add to column

-> DataFrame 

insertUnboxedColumn Source #

Arguments

:: forall a. (Columnable a, Unbox a) 
=> Text

Column Name

-> Vector a

Unboxed vector to add to column

-> DataFrame

DataFrame to add to column

-> DataFrame 

O(n) Adds an unboxed vector to the dataframe.

insertColumnWithDefault Source #

Arguments

:: forall a. Columnable a 
=> a

Default Value

-> Text

Column name

-> Vector a

Data to add to column

-> DataFrame

DataFrame to add to column

-> DataFrame 

O(k) Add a column to the dataframe providing a default. This constructs a new vector and also may convert it to an unboxed vector if necessary. Since columns are usually large the runtime is dominated by the length of the list, k.

columnSize :: Text -> DataFrame -> Maybe Int Source #

O(1) Get the number of elements in a given column.

columnInfo :: DataFrame -> DataFrame Source #

O(n) Returns the number of non-null columns in the dataframe and the type associated with each column.

valueCounts :: forall a. Columnable a => Text -> DataFrame -> [(a, Int)] Source #

O (k * n) Counts the occurences of each value in a given column.

countOccurrences :: Ord a => Vector a -> [(a, Int)] Source #

numericHistogram :: forall a. (HasCallStack, Columnable a) => Text -> Vector a -> String Source #

smallestPartition :: Ord a => a -> [a] -> a Source #

largestPartition :: Ord a => a -> [a] -> a Source #

deriveFrom :: ([Text], Function) -> Text -> DataFrame -> DataFrame Source #

O(k) Apply a function to a combination of columns in a dataframe and add the result into alias column.

derive Source #

Arguments

:: forall b c. (Columnable b, Columnable c) 
=> Text

New name

-> (b -> c)

function to apply

-> Text

Derivative column name

-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Apply a function to a given column in a dataframe and add the result into alias column.

applyMany :: (Columnable b, Columnable c) => (b -> c) -> [Text] -> DataFrame -> DataFrame Source #

O(k * n) Apply a function to given column names in a dataframe.

applyInt Source #

Arguments

:: Columnable b 
=> (Int -> b)

Column name | function to apply

-> Text 
-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Convenience function that applies to an int column.

applyDouble Source #

Arguments

:: Columnable b 
=> (Double -> b)

Column name | function to apply

-> Text 
-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Convenience function that applies to an double column.

applyWhere :: forall a b. (Columnable a, Columnable b) => (a -> Bool) -> Text -> (b -> b) -> Text -> DataFrame -> DataFrame Source #

O(k * n) Apply a function to a column only if there is another column value that matches the given criterion.

applyWhere "Age" (<20) "Generation" (const "Gen-Z")

applyAtIndex Source #

Arguments

:: forall a. Columnable a 
=> Int

Index

-> (a -> a)

function to apply

-> Text

Column name

-> DataFrame

DataFrame to apply operation to

-> DataFrame 

O(k) Apply a function to the column at a given index.

cube :: (Int, Int) -> DataFrame -> DataFrame Source #

O(k) cuts the dataframe in a cube of size (a, b) where a is the length and b is the width.

cube (10, 5) df

selectIntRange :: (Int, Int) -> DataFrame -> DataFrame Source #

O(n) select columns by index range of column names.

selectRange :: (Text, Text) -> DataFrame -> DataFrame Source #

O(n) select columns by index range of column names.

exclude :: [Text] -> DataFrame -> DataFrame Source #

O(n) inverse of select

exclude ["Name"] df

appendWithFrontMin :: Ord a => a -> [a] -> [a] Source #

hash' :: Columnable a => a -> Double Source #

This hash function returns the hash when given a non numeric type but the value when given a numeric.

reduceBy :: forall a b. (Columnable a, Columnable b) => (forall v. Vector v a => v a -> b) -> Text -> DataFrame -> DataFrame Source #

readCsv :: String -> IO DataFrame Source #

Reads a CSV file from the given path. Note this file stores intermediate temporary files while converting the CSV from a row to a columnar format.

readSeparated :: Char -> ReadOptions -> String -> IO DataFrame Source #

Reads a character separated file into a dataframe using mutable vectors.

readTsv :: String -> IO DataFrame Source #

Reads a tab separated file from the given path. Note this file stores intermediate temporary files while converting the CSV from a row to a columnar format.

countRows :: Char -> FilePath -> IO Int Source #

First pass to count rows for exact allocation

getInitialDataVectors :: Int -> IOVector Column -> [Text] -> IO () Source #

fillColumns :: Int -> Char -> IOVector Column -> IOVector [(Int, Text)] -> Handle -> IO () Source #

Reads rows from the handle and stores values in mutable vectors.

freezeColumn :: IOVector Column -> Vector [(Int, Text)] -> ReadOptions -> Int -> IO (Maybe Column) Source #

Freezes a mutable vector into an immutable one, trimming it to the actual row count.

writeValue :: IOVector Column -> IOVector [(Int, Text)] -> Int -> Int -> Text -> IO () Source #

Writes a value into the appropriate column, resizing the vector if necessary.

writeSeparated Source #

Arguments

:: Char

Separator

-> String

Path to write to

-> DataFrame 
-> IO () 

(|>) :: a -> (a -> b) -> b Source #