Skip to content

Excel output in non-ascii encodings #3710

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1470,14 +1470,15 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,

def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
float_format=None, cols=None, header=True, index=True,
index_label=None, startrow=0, startcol=0):
index_label=None, startrow=0, startcol=0, encoding = 'ascii'):
"""
Write DataFrame to a excel sheet

Parameters
----------
excel_writer : string or ExcelWriter object
File path or existing ExcelWriter
encoding: Ecoding used for the worksheet
sheet_name : string, default 'sheet1'
Name of sheet which will contain DataFrame
na_rep : string, default ''
Expand Down Expand Up @@ -1512,7 +1513,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
from pandas.io.parsers import ExcelWriter
need_save = False
if isinstance(excel_writer, basestring):
excel_writer = ExcelWriter(excel_writer)
excel_writer = ExcelWriter(excel_writer, encoding = encoding)
need_save = True

formatter = fmt.ExcelFormatter(self,
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1996,7 +1996,6 @@ class ExcelFile(object):
"""
def __init__(self, path_or_buf, kind=None, **kwds):
self.kind = kind

import xlrd # throw an ImportError if we need to
ver = tuple(map(int,xlrd.__VERSION__.split(".")[:2]))
if ver < (0, 9):
Expand All @@ -2009,7 +2008,7 @@ def __init__(self, path_or_buf, kind=None, **kwds):
self.book = xlrd.open_workbook(path_or_buf)
else:
data = path_or_buf.read()
self.book = xlrd.open_workbook(file_contents=data)
self.book = xlrd.open_workbook(file_contents = data)

def __repr__(self):
return object.__repr__(self)
Expand Down Expand Up @@ -2264,12 +2263,13 @@ class ExcelWriter(object):
path : string
Path to xls file
"""
def __init__(self, path):
def __init__(self, path, encoding = 'ascii'):
self.use_xlsx = True
self.encoding = encoding
if path.endswith('.xls'):
self.use_xlsx = False
import xlwt
self.book = xlwt.Workbook()
self.book = xlwt.Workbook(encoding = self.encoding)
self.fm_datetime = xlwt.easyxf(
num_format_str='YYYY-MM-DD HH:MM:SS')
self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD')
Expand Down
Binary file added pandas/io/tests/data/excel_test_ascii.xls
Binary file not shown.
Binary file added pandas/io/tests/data/excel_test_noascii.xls
Binary file not shown.
Binary file added pandas/io/tests/data/excel_writer_ascii.xls
Binary file not shown.
Binary file added pandas/io/tests/data/excel_writer_noascii.xls
Binary file not shown.
8 changes: 7 additions & 1 deletion pandas/io/tests/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,11 @@ def test_to_excel_float_format(self):
[12.32, 123123.20, 321321.20]],
index=['A', 'B'], columns=['X', 'Y', 'Z'])
tm.assert_frame_equal(rs, xp)






def test_to_excel_unicode_filename(self):
_skip_if_no_excelsuite()

Expand Down Expand Up @@ -858,7 +862,9 @@ def roundtrip(df, header=True, parser_hdr=0):
res = roundtrip(DataFrame([0]), False, None)
self.assertEqual(res.shape, (1, 2))
self.assertTrue(res.ix[0, 0] is not np.nan)



if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
213 changes: 213 additions & 0 deletions pandas/io/tests/test_excel_encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
# pylint: disable=E1101
# -*- coding: utf-8 -*-


from pandas.util.py3compat import StringIO, BytesIO, PY3
from datetime import datetime
from os.path import split as psplit
import csv
import os
import sys
import re
import unittest

import nose

from numpy import nan
import numpy as np

from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex
import pandas.io.parsers as parsers
from pandas.io.parsers import (read_csv, read_table, read_fwf,
ExcelFile, TextFileReader, TextParser)
from pandas.util.testing import (assert_almost_equal,
assert_series_equal,
network,
ensure_clean)
import pandas.util.testing as tm
import pandas as pd

import pandas.lib as lib
from pandas.util import py3compat
from pandas.lib import Timestamp
from pandas.tseries.index import date_range
import pandas.tseries.tools as tools

from numpy.testing.decorators import slow

from pandas._parser import OverflowError

from pandas.io.parsers import (ExcelFile, ExcelWriter, read_csv)


def _skip_if_no_xlrd():
try:
import xlrd
ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
if ver < (0, 9):
raise nose.SkipTest('xlrd not installed, skipping')
except ImportError:
raise nose.SkipTest('xlrd not installed, skipping')


def _skip_if_no_xlwt():
try:
import xlwt
except ImportError:
raise nose.SkipTest('xlwt not installed, skipping')


def _skip_if_no_openpyxl():
try:
import openpyxl
except ImportError:
raise nose.SkipTest('openpyxl not installed, skipping')


def _skip_if_no_excelsuite():
_skip_if_no_xlrd()
_skip_if_no_xlwt()
_skip_if_no_openpyxl()


_seriesd = tm.getSeriesData()
_tsd = tm.getTimeSeriesData()
_frame = DataFrame(_seriesd)[:10]
_frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])[:10]
_tsframe = tm.makeTimeDataFrame()[:5]
_mixed_frame = _frame.copy()
_mixed_frame['foo'] = 'bar'


class ExcelTests(unittest.TestCase):

def setUp(self):
self.dirpath = tm.get_data_path()
self.xls_ta = os.path.join(self.dirpath, 'excel_test_ascii.xls')
self.xls_tna = os.path.join(self.dirpath, 'excel_test_noascii.xls')
self.xls_wa = os.path.join(self.dirpath, 'excel_writer_ascii.xls')
self.xls_wna = os.path.join(self.dirpath, 'excel_writer_noascii.xls')

def test_excel_output_encoding(self):
_skip_if_no_xlrd()
_skip_if_no_xlwt()

# TESTS IF DataFrame.to_excel() WORKS WITH ENCODING PARAMETER MAKING POSSIBLE TO
# WORK WITH ENCODINGS OTHER TAN ASCII

#FIRST WITH ONLY ASCII

data_ascii = {
'index' : ['A', 'B', 'C', 'C', 'B', 'A'],
'columns' : ['One', 'One', 'One', 'Two', 'Two', 'Two'],
'values' : [1., 2., 3., 3., 2., 1.]
}

original_ascii = DataFrame(data_ascii)

original_ascii.to_excel(self.xls_ta, sheet_name='DataFrame_TEST')

get_xls_ascii = ExcelFile(self.xls_ta)

saved_ascii = get_xls_ascii.parse('DataFrame_TEST', index_col=None, na_values=['NA'])

# NOW WITH NON-ASCII CHARS AND SUPPLYING THE PARAMETER encoding TO DataFrame.to_excel()

data_noascii = {
'index' : ['Año', 'Baldío', 'Trócola', 'Mínimo', 'Barça', 'Cigüeña'],
'columns' : ['Año', 'Narices', 'Búlgaro', 'Libélula', 'Cínico', '1º'],
'values' : ['Céfiro', 'Tarugo', 'Déspota', 'Camión', 'Añejo', 'º']
}

original_noascii = DataFrame(data_noascii)

original_noascii.to_excel(self.xls_tna, sheet_name='DataFrame_TEST', encoding='utf8')

get_xls_noascii = ExcelFile(self.xls_tna, encoding = 'uft8')

#saved_noascii = get_xls_noascii.parse('DataFrame_TEST', index_col=None, na_values=['NA'])

saved_noascii = get_xls_noascii.parse('DataFrame_TEST', index_col=None, na_values=['NA'])

print original_noascii,saved_noascii

tm.assert_frame_equal(original_ascii, saved_ascii)
tm.assert_frame_equal(original_noascii, saved_noascii)


# TESTS IF CLASS ExcelWriter WORKS WITH ENCODING PARAMETER MAKING POSSIBLE TO
# WORK WITH ENCODINGS OTHER TAN ASCII

#FIRST WITH ONLY ASCII

data_ascii_1 = {
'index' : ['A', 'B', 'C', 'C', 'B', 'A'],
'columns' : ['One', 'One', 'One', 'Two', 'Two', 'Two'],
'values' : [1., 2., 3., 3., 2., 1.]
}

data_ascii_2 = {
'index' : ['A', 'B', 'C', 'C', 'B', 'A'],
'columns' : ['One', 'One', 'One', 'Two', 'Two', 'Two'],
'values' : [1., 2., 3., 3., 2., 1.]
}

excel_writer_ascii=ExcelWriter(self.xls_wa)

original_ascii_1 = DataFrame(data_ascii_1)

original_ascii_2 = DataFrame(data_ascii_2)

original_ascii_1.to_excel(excel_writer_ascii, sheet_name = 'DataFrame_TEST')

original_ascii_2.to_excel(excel_writer_ascii, sheet_name = 'DataFrame_TEST_2')

excel_writer_ascii.save()

get_xls_writer_ascii = ExcelFile(self.xls_wa)

saved_ascii_1 = get_xls_writer_ascii.parse('DataFrame_TEST', index_col = None, na_values = ['NA'])

saved_ascii_2 = get_xls_writer_ascii.parse('DataFrame_TEST_2', index_col = None, na_values = ['NA'])

# NOW WITH NON-ASCII CHARS AND SUPPLYING THE PARAMETER encoding TO class ExcelWriter

data_noascii_1 = {
'index' : ['Puño', 'Mísero', 'Brújula', 'Pájaro', 'Barça', 'Cigüeña'],
'columns' : ['Años', 'Nariz', 'Bígaro', 'Céfiro', '2º', '2€'],
'values' : ['Tímido', 'Variado', 'Efímero', 'Trágico', 'Compañero', '5º']
}

data_noascii_2 = {
'index' : ['Año', 'Baldío', 'Trócola', 'Mínimo', 'Barça', 'Cigüeña'],
'columns' : ['Año', 'Narices', 'Búlgaro', 'Libélula', 'Cínico', '1º'],
'values' : ['Céfiro', 'Tarugo', 'Déspota', 'Camión', 'Añejo', 'º']
}

excel_writer_noascii=ExcelWriter(self.xls_wna,encoding = 'utf8')

original_noascii_1 = DataFrame(data_noascii_1)

original_noascii_2 = DataFrame(data_noascii_2)

original_noascii_1.to_excel(excel_writer_noascii, sheet_name = 'DataFrame_TEST')

original_noascii_2.to_excel(excel_writer_noascii, sheet_name = 'DataFrame_TEST_2')

excel_writer_noascii.save()

get_xls_writer_noascii = ExcelFile(self.xls_wna,encoding = 'uft8')

saved_noascii_1 = get_xls_writer_noascii.parse('DataFrame_TEST', index_col = None, na_values = ['NA'])

saved_noascii_2 = get_xls_writer_noascii.parse('DataFrame_TEST_2', index_col = None, na_values = ['NA'])

tm.assert_frame_equal(original_ascii_1, saved_ascii_1)
tm.assert_frame_equal(original_ascii_2, saved_ascii_2)

tm.assert_frame_equal(original_noascii_1, saved_noascii_1)
tm.assert_frame_equal(original_noascii_2, saved_noascii_2)

if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
Loading