9
9
from pandas .core .index import Index , MultiIndex
10
10
from pandas .core .frame import DataFrame
11
11
12
- def read_csv (filepath_or_buffer , sep = None , header = 0 , skiprows = None , index_col = 0 ,
13
- na_values = None , date_parser = None , names = None ):
14
- """
15
- Read CSV file into DataFrame
16
12
17
- Parameters
18
- ----------
19
- filepath_or_buffer : string or file handle / StringIO
20
- sep : string, default None
21
- Delimiter to use. By default will try to automatically determine
22
- this
23
- header : int, default 0
24
- Row to use for the column labels of the parsed DataFrame
25
- skiprows : list-like
26
- Row numbers to skip (0-indexed)
27
- index_col : int or sequence., default 0
28
- Column to use as the row labels of the DataFrame. Pass None if there is
29
- no such column. If a sequence is given, a MultiIndex is used.
30
- na_values : list-like, default None
31
- List of additional strings to recognize as NA/NaN
32
- date_parser : function
33
- Function to use for converting dates to strings. Defaults to
34
- dateutil.parser
35
- names : array-like
36
- List of column names
37
-
38
- Returns
39
- -------
40
- parsed : DataFrame
41
- """
13
+ def read_csv (filepath_or_buffer , sep = None , header = 0 , index_col = None , names = None ,
14
+ skiprows = None , na_values = None , parse_dates = False ,
15
+ date_parser = None ):
42
16
import csv
43
17
44
18
if hasattr (filepath_or_buffer , 'read' ):
@@ -71,43 +45,77 @@ def read_csv(filepath_or_buffer, sep=None, header=0, skiprows=None, index_col=0,
71
45
else :
72
46
lines = [l for l in reader ]
73
47
f .close ()
74
- return _simple_parser (lines , header = header , indexCol = index_col ,
75
- colNames = names , na_values = na_values ,
76
- date_parser = date_parser )
77
48
78
- def read_table (filepath_or_buffer , sep = '\t ' , header = 0 , skiprows = None ,
79
- index_col = 0 , na_values = None , date_parser = None , names = None ):
80
- """
81
- Read delimited file into DataFrame
49
+ if date_parser is not None :
50
+ parse_dates = True
82
51
83
- Parameters
84
- ----------
85
- filepath_or_buffer : string or file handle
86
- sep : string, default '\t '
87
- Delimiter to use
88
- header : int, default 0
89
- Row to use for the column labels of the parsed DataFrame
90
- skiprows : list-like
91
- Row numbers to skip (0-indexed)
92
- index_col : int or sequence, default 0
93
- Column to use as the row labels of the DataFrame. Pass None if there is
94
- no such column. If a sequence is given, a MultiIndex is used.
95
- na_values : list-like, default None
96
- List of additional strings to recognize as NA/NaN
97
- date_parser : function
98
- Function to use for converting dates to strings. Defaults to
99
- dateutil.parser
100
- names : array-like
101
- List of column names
102
-
103
- Returns
104
- -------
105
- parsed : DataFrame
106
- """
107
- return read_csv (filepath_or_buffer , sep , header , skiprows ,
108
- index_col , na_values , date_parser , names )
52
+ return _simple_parser (lines ,
53
+ header = header ,
54
+ index_col = index_col ,
55
+ colNames = names ,
56
+ na_values = na_values ,
57
+ parse_dates = parse_dates ,
58
+ date_parser = date_parser )
109
59
110
- def _simple_parser (lines , colNames = None , header = 0 , indexCol = 0 ,
60
+
61
+ def read_table (filepath_or_buffer , sep = '\t ' , header = 0 , index_col = None ,
62
+ names = None , skiprows = None , na_values = None , parse_dates = False ,
63
+ date_parser = None ):
64
+ return read_csv (filepath_or_buffer , sep = sep , header = header ,
65
+ skiprows = skiprows , index_col = index_col ,
66
+ na_values = na_values , date_parser = date_parser ,
67
+ names = names , parse_dates = parse_dates )
68
+
69
+ _parser_params = """Parameters
70
+ ----------
71
+ filepath_or_buffer : string or file handle / StringIO
72
+ %s
73
+ header : int, default 0
74
+ Row to use for the column labels of the parsed DataFrame
75
+ skiprows : list-like
76
+ Row numbers to skip (0-indexed)
77
+ index_col : int or sequence, default None
78
+ Column to use as the row labels of the DataFrame. If a sequence is
79
+ given, a MultiIndex is used.
80
+ na_values : list-like, default None
81
+ List of additional strings to recognize as NA/NaN
82
+ parse_dates : boolean, default False
83
+ Attempt to parse dates in the index column(s)
84
+ date_parser : function
85
+ Function to use for converting dates to strings. Defaults to
86
+ dateutil.parser
87
+ names : array-like
88
+ List of column names"""
89
+
90
+ _csv_sep = """sep : string, default None
91
+ Delimiter to use. By default will try to automatically determine
92
+ this"""
93
+
94
+ _table_sep = """sep : string, default \\ t (tab-stop)
95
+ Delimiter to use"""
96
+
97
+ read_csv .__doc__ = """
98
+ Read CSV (comma-separated) file into DataFrame
99
+
100
+ %s
101
+
102
+ Returns
103
+ -------
104
+ parsed : DataFrame
105
+ """ % (_parser_params % _csv_sep )
106
+
107
+ read_table .__doc__ = """
108
+ Read delimited file into DataFrame
109
+
110
+ %s
111
+
112
+ Returns
113
+ -------
114
+ parsed : DataFrame
115
+ """ % (_parser_params % _table_sep )
116
+
117
+
118
+ def _simple_parser (lines , colNames = None , header = 0 , index_col = 0 ,
111
119
na_values = None , date_parser = None , parse_dates = True ):
112
120
"""
113
121
Workhorse function for processing nested list into DataFrame
@@ -142,30 +150,48 @@ def _simple_parser(lines, colNames=None, header=0, indexCol=0,
142
150
zipped_content = zip (* content )
143
151
144
152
if len (content ) == 0 : # pragma: no cover
145
- raise Exception ('No content to parse' )
153
+ if index_col is not None :
154
+ if np .isscalar (index_col ):
155
+ index = Index ([], name = columns .pop (index_col ))
156
+ else :
157
+ cp_cols = list (columns )
158
+ names = []
159
+ for i in index_col :
160
+ name = cp_cols [i ]
161
+ columns .remove (name )
162
+ names .append (name )
163
+ index = MultiIndex .fromarrays ([[]] * len (index_col ),
164
+ names = names )
165
+ else :
166
+ index = Index ([])
167
+
168
+ return DataFrame (index = index , columns = columns )
169
+
170
+ if index_col is None and len (content [0 ]) == len (columns ) + 1 :
171
+ index_col = 0
146
172
147
173
# no index column specified, so infer that's what is wanted
148
- if indexCol is not None :
149
- if np .isscalar (indexCol ):
150
- if indexCol == 0 and len (content [0 ]) == len (columns ) + 1 :
174
+ if index_col is not None :
175
+ if np .isscalar (index_col ):
176
+ if index_col == 0 and len (content [0 ]) == len (columns ) + 1 :
151
177
index = zipped_content [0 ]
152
178
zipped_content = zipped_content [1 :]
153
179
else :
154
- index = zipped_content .pop (indexCol )
155
- columns .pop (indexCol )
180
+ index = zipped_content .pop (index_col )
181
+ columns .pop (index_col )
156
182
else : # given a list of index
157
183
idx_names = []
158
184
index = []
159
- for idx in indexCol :
185
+ for idx in index_col :
160
186
idx_names .append (columns [idx ])
161
187
index .append (zipped_content [idx ])
162
188
#remove index items from content and columns, don't pop in loop
163
- for i in range (len (indexCol )):
189
+ for i in range (len (index_col )):
164
190
columns .remove (idx_names [i ])
165
191
zipped_content .remove (index [i ])
166
192
167
193
168
- if np .isscalar (indexCol ):
194
+ if np .isscalar (index_col ):
169
195
if parse_dates :
170
196
index = _try_parse_dates (index , parser = date_parser )
171
197
index = Index (_maybe_convert_int (np .array (index , dtype = object )))
@@ -232,9 +258,6 @@ def _maybe_convert_int(arr):
232
258
return arr
233
259
234
260
def _maybe_convert_int_mindex (index , parse_dates , date_parser ):
235
- if len (index ) == 0 :
236
- return index
237
-
238
261
for i in range (len (index )):
239
262
try :
240
263
int (index [i ][0 ])
@@ -298,8 +321,8 @@ def __init__(self, path):
298
321
def __repr__ (self ):
299
322
return object .__repr__ (self )
300
323
301
- def parse (self , sheetname , header = 0 , skiprows = None , index_col = 0 ,
302
- na_values = None ):
324
+ def parse (self , sheetname , header = 0 , skiprows = None , index_col = None ,
325
+ parse_dates = False , date_parser = None , na_values = None ):
303
326
"""
304
327
Read Excel table into DataFrame
305
328
@@ -348,7 +371,8 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=0,
348
371
value = datetime (* dt )
349
372
row .append (value )
350
373
data .append (row )
351
- return _simple_parser (data , header = header , indexCol = index_col ,
374
+ return _simple_parser (data , header = header , index_col = index_col ,
375
+ parse_dates = parse_dates , date_parser = date_parser ,
352
376
na_values = na_values )
353
377
354
378
#-------------------------------------------------------------------------------
@@ -363,7 +387,8 @@ def parseCSV(filepath, header=0, skiprows=None, indexCol=0,
363
387
"""
364
388
warnings .warn ("parseCSV is deprecated. Use read_csv instead" , FutureWarning )
365
389
return read_csv (filepath , header = header , skiprows = skiprows ,
366
- index_col = indexCol , na_values = na_values )
390
+ index_col = indexCol , na_values = na_values ,
391
+ parse_dates = True )
367
392
368
393
def parseText (filepath , sep = '\t ' , header = 0 ,
369
394
indexCol = 0 , colNames = None ): # pragma: no cover
@@ -374,7 +399,7 @@ def parseText(filepath, sep='\t', header=0,
374
399
warnings .warn ("parseText is deprecated. Use read_table instead" ,
375
400
FutureWarning )
376
401
return read_table (filepath , sep = sep , header = header , index_col = indexCol ,
377
- names = colNames )
402
+ names = colNames , parse_dates = True )
378
403
379
404
380
405
def parseExcel (filepath , header = None , indexCol = 0 ,
@@ -385,6 +410,7 @@ def parseExcel(filepath, header=None, indexCol=0,
385
410
warnings .warn ("parseExcel is deprecated. Use the ExcelFile class instead" ,
386
411
FutureWarning )
387
412
excel_file = ExcelFile (filepath )
388
- return excel_file .parse (sheetname , header = header , index_col = indexCol )
413
+ return excel_file .parse (sheetname , header = header , index_col = indexCol ,
414
+ parse_dates = True )
389
415
390
416
0 commit comments