Skip to content

Exception with read_html and header #5129

Closed
@cancan101

Description

@cancan101
In [5]: pd.read_html("http://pastebin.com/raw.php?i=7mAF0Ei6",infer_types=False, header=[0,1])[0]
---------------------------------------------------------------------------

----> 1 pd.read_html("http://pastebin.com/raw.php?i=7mAF0Ei6",infer_types=False, header=[0,1])[0]

/home/alex/git/pandas/pandas/io/html.pyc in read_html(io, match, flavor, header, index_col, skiprows, infer_types, attrs, parse_dates, tupleize_cols, thousands)
    838                          'data (you passed a negative value)')
    839     return _parse(flavor, io, match, header, index_col, skiprows, infer_types,
--> 840                   parse_dates, tupleize_cols, thousands, attrs)

/home/alex/git/pandas/pandas/io/html.pyc in _parse(flavor, io, match, header, index_col, skiprows, infer_types, parse_dates, tupleize_cols, thousands, attrs)
    710     return [_data_to_frame(table, header, index_col, skiprows, infer_types,
    711                            parse_dates, tupleize_cols, thousands)
--> 712             for table in tables]
    713 
    714 

/home/alex/git/pandas/pandas/io/html.pyc in _data_to_frame(data, header, index_col, skiprows, infer_types, parse_dates, tupleize_cols, thousands)
    600                     skiprows=_get_skiprows(skiprows),
    601                     parse_dates=parse_dates, tupleize_cols=tupleize_cols,
--> 602                     thousands=thousands)
    603     df = tp.read()
    604 

/home/alex/git/pandas/pandas/io/parsers.pyc in TextParser(*args, **kwds)
   1171     """
   1172     kwds['engine'] = 'python'
-> 1173     return TextFileReader(*args, **kwds)
   1174 
   1175 

/home/alex/git/pandas/pandas/io/parsers.pyc in __init__(self, f, engine, **kwds)
    481             self.options['has_index_names'] = kwds['has_index_names']
    482 
--> 483         self._make_engine(self.engine)
    484 
    485     def _get_options_with_defaults(self, engine):

/home/alex/git/pandas/pandas/io/parsers.pyc in _make_engine(self, engine)
    596             elif engine == 'python-fwf':
    597                 klass = FixedWidthFieldParser
--> 598             self._engine = klass(self.f, **self.options)
    599 
    600     def _failover_to_python(self):

/home/alex/git/pandas/pandas/io/parsers.pyc in __init__(self, f, **kwds)
   1294         if len(self.columns) > 1:
   1295             self.columns, self.index_names, self.col_names, _ = self._extract_multi_indexer_columns(
-> 1296                 self.columns, self.index_names, self.col_names)
   1297         else:
   1298             self.columns = self.columns[0]

/home/alex/git/pandas/pandas/io/parsers.pyc in _extract_multi_indexer_columns(self, header, index_names, col_names, passed_names)
    734         # if we find 'Unnamed' all of a single level, then our header was too long
    735         for n in range(len(columns[0])):
--> 736             if all([ 'Unnamed' in c[n] for c in columns ]):
    737                 raise _parser.CParserError("Passed header=[%s] are too many rows for this "
    738                                            "multi_index of columns" % ','.join([ str(x) for x in self.header ]))

TypeError: argument of type 'float' is not iterable

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugIO DataIO issues that don't fit into a more specific labelIO HTMLread_html, to_html, Styler.apply, Styler.applymap

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions