Skip to content

TST: Fix Py2 test_to_hdf_with_object_column_names test #20907

Closed
@TomAugspurger

Description

@TomAugspurger

This "worked" before becuase we used pytest.raises(..., msg=), but msg isn't an argument for pytest, so it wasn't validated.

Skipping for now

pytest pandas/tests/io/test_pytables.py -k test_to_hdf_with_object_column_names  -x --pdb
==================================================================== test session starts =====================================================================
platform darwin -- Python 2.7.14, pytest-3.5.1, py-1.5.3, pluggy-0.6.0
rootdir: /Users/taugspurger/sandbox/pandas-27, inifile: setup.cfg
plugins: xdist-1.22.2, forked-0.2
collected 177 items / 176 deselected

pandas/tests/io/test_pytables.py F
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> traceback >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

self = <pandas.tests.io.test_pytables.TestHDFStore object at 0x1f1f92bed0>

    def test_to_hdf_with_object_column_names(self):
        # GH9057
        # Writing HDF5 table format should only work for string-like
        # column types

        types_should_fail = [tm.makeIntIndex, tm.makeFloatIndex,
                             tm.makeDateIndex, tm.makeTimedeltaIndex,
                             tm.makePeriodIndex]
        types_should_run = [tm.makeStringIndex, tm.makeCategoricalIndex]

        if compat.PY3:
            types_should_run.append(tm.makeUnicodeIndex)
        else:
            types_should_fail.append(tm.makeUnicodeIndex)

        for index in types_should_fail:
            df = DataFrame(np.random.randn(10, 2), columns=index(2))
            with ensure_clean_path(self.path) as path:
                with catch_warnings(record=True):
                    with tm.assert_raises_regex(
                        ValueError, ("cannot have non-object label "
                                     "DataIndexableCol")):
                        df.to_hdf(path, 'df', format='table',
>                                 data_columns=True)

pandas/tests/io/test_pytables.py:4890:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/util/testing.py:2393: in __exit__
    return self.exception_matches(exc_type, exc_value, trace_back)
pandas/util/testing.py:2429: in exception_matches
    raise_with_traceback(e, trace_back)
pandas/tests/io/test_pytables.py:4890: in test_to_hdf_with_object_column_names
    data_columns=True)
pandas/core/generic.py:1990: in to_hdf
    return pytables.to_hdf(path_or_buf, key, self, **kwargs)
pandas/io/pytables.py:279: in to_hdf
    f(store)
pandas/io/pytables.py:273: in <lambda>
    f = lambda store: store.put(key, value, **kwargs)
pandas/io/pytables.py:886: in put
    self._write_to_group(key, value, append=append, **kwargs)
pandas/io/pytables.py:1363: in _write_to_group
    s.write(obj=value, append=append, complib=complib, **kwargs)
pandas/io/pytables.py:3945: in write
    self._handle.create_table(self.group, **options)
../../miniconda3/envs/travis-27/lib/python2.7/site-packages/tables/file.py:1055: in create_table
    chunkshape=chunkshape, byteorder=byteorder)
../../miniconda3/envs/travis-27/lib/python2.7/site-packages/tables/table.py:773: in __init__
    self.description = Description(description)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <[UnicodeEncodeError("'ascii' codec can't encode character u'\u05db' in position 51: ordinal not in range(128)") raised in repr()] SafeRepr object at 0x1f216c58c0>
classdict = {'35כזכקל01ט': Float64Col(shape=(), dflt=0.0, pos=2), 'index': Int64Col(shape=(), dflt=0, pos=0), 'כ9כחגההי86': Float64Col(shape=(), dflt=0.0, pos=1)}
nestedlvl = -1, validate = True

    def __init__(self, classdict, nestedlvl=-1, validate=True):

        if not classdict:
            raise ValueError("cannot create an empty data type")

        # Do a shallow copy of classdict just in case this is going to
        # be shared by other instances
        newdict = self.__dict__
        newdict["_v_name"] = "/"   # The name for root descriptor
        newdict["_v_names"] = []
        newdict["_v_dtypes"] = {}
        newdict["_v_types"] = {}
        newdict["_v_dflts"] = {}
        newdict["_v_colobjects"] = {}
        newdict["_v_is_nested"] = False
        nestedFormats = []
        nestedDType = []

        if not hasattr(newdict, "_v_nestedlvl"):
            newdict["_v_nestedlvl"] = nestedlvl + 1

        cols_with_pos = []  # colum (position, name) pairs
        cols_no_pos = []  # just column names

        # Check for special variables and convert column descriptions
        for (name, descr) in six.iteritems(classdict):
            if name.startswith('_v_'):
                if name in newdict:
                    # print("Warning!")
                    # special methods &c: copy to newdict, warn about conflicts
                    warnings.warn("Can't set attr %r in description class %r"
                                  % (name, self))
                else:
                    # print("Special variable!-->", name, classdict[name])
                    newdict[name] = descr
                continue  # This variable is not needed anymore

            columns = None
            if (type(descr) == type(IsDescription) and
                    issubclass(descr, IsDescription)):
                # print("Nested object (type I)-->", name)
                columns = descr().columns
            elif (type(descr.__class__) == type(IsDescription) and
                  issubclass(descr.__class__, IsDescription)):
                # print("Nested object (type II)-->", name)
                columns = descr.columns
            elif isinstance(descr, dict):
                # print("Nested object (type III)-->", name)
                columns = descr
            else:
                # print("Nested object (type IV)-->", name)
                descr = copy.copy(descr)
            # The copies above and below ensure that the structures
            # provided by the user will remain unchanged even if we
            # tamper with the values of ``_v_pos`` here.
            if columns is not None:
                descr = Description(copy.copy(columns), self._v_nestedlvl)
            classdict[name] = descr

            pos = getattr(descr, '_v_pos', None)
            if pos is None:
                cols_no_pos.append(name)
            else:
                cols_with_pos.append((pos, name))

        # Sort field names:
        #
        # 1. Fields with explicit positions, according to their
        #    positions (and their names if coincident).
        # 2. Fields with no position, in alfabetical order.
        cols_with_pos.sort()
        cols_no_pos.sort()
        keys = [name for (pos, name) in cols_with_pos] + cols_no_pos

        pos = 0
        # Get properties for compound types
        for k in keys:
            if validate:
                # Check for key name validity
                check_name_validity(k)
            # Class variables
            object = classdict[k]
            newdict[k] = object    # To allow natural naming
            if not (isinstance(object, Col) or
                    isinstance(object, Description)):
                raise TypeError('Passing an incorrect value to a table column.'
                                ' Expected a Col (or subclass) instance and '
                                'got: "%s". Please make use of the Col(), or '
                                'descendant, constructor to properly '
                                'initialize columns.' % object)
            object._v_pos = pos  # Set the position of this object
            object._v_parent = self  # The parent description
            pos += 1
            newdict['_v_colobjects'][k] = object
            newdict['_v_names'].append(k)
            object.__dict__['_v_name'] = k

            if not isinstance(k, str):
                # numpy only accepts "str" for field names
                if sys.version_info[0] < 3:
                    # Python 2.x: unicode --> str
>                   kk = k.encode()  # use the default encoding
E                   AssertionError: "cannot have non-object label DataIndexableCol" does not match "'ascii' codec can't encode character u'\u05db' in position 0: ordinal not in range(128)"


Working around this for now.

Metadata

Metadata

Assignees

No one assigned

    Labels

    IO HDF5read_hdf, HDFStoreTestingpandas testing functions or related to the test suite

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions