pandas-dev · jtornero · May 29, 2013 · May 29, 2013 · May 29, 2013 · May 30, 2013
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1470,14 +1470,15 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
 
     def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
                  float_format=None, cols=None, header=True, index=True,
-                 index_label=None, startrow=0, startcol=0):
+                 index_label=None, startrow=0, startcol=0,  encoding = 'ascii'):
         """
         Write DataFrame to a excel sheet
 
         Parameters
         ----------
         excel_writer : string or ExcelWriter object
             File path or existing ExcelWriter
+	encoding: Ecoding used for the worksheet
         sheet_name : string, default 'sheet1'
             Name of sheet which will contain DataFrame
         na_rep : string, default ''
@@ -1512,7 +1513,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
         from pandas.io.parsers import ExcelWriter
         need_save = False
         if isinstance(excel_writer, basestring):
-            excel_writer = ExcelWriter(excel_writer)
+            excel_writer = ExcelWriter(excel_writer, encoding = encoding)
             need_save = True
 
         formatter = fmt.ExcelFormatter(self,

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1996,7 +1996,6 @@ class ExcelFile(object):
     """
     def __init__(self, path_or_buf, kind=None, **kwds):
         self.kind = kind
-
         import xlrd # throw an ImportError if we need to
         ver = tuple(map(int,xlrd.__VERSION__.split(".")[:2]))
         if ver < (0, 9):
@@ -2009,7 +2008,7 @@ def __init__(self, path_or_buf, kind=None, **kwds):
             self.book = xlrd.open_workbook(path_or_buf)
         else:
             data = path_or_buf.read()
-            self.book = xlrd.open_workbook(file_contents=data)
+            self.book = xlrd.open_workbook(file_contents = data)
 
     def __repr__(self):
         return object.__repr__(self)
@@ -2264,12 +2263,13 @@ class ExcelWriter(object):
     path : string
         Path to xls file
     """
-    def __init__(self, path):
+    def __init__(self, path, encoding = 'ascii'):
         self.use_xlsx = True
+	self.encoding = encoding
         if path.endswith('.xls'):
             self.use_xlsx = False
             import xlwt
-            self.book = xlwt.Workbook()
+            self.book = xlwt.Workbook(encoding = self.encoding)
             self.fm_datetime = xlwt.easyxf(
                 num_format_str='YYYY-MM-DD HH:MM:SS')
             self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD')

diff --git a/pandas/io/tests/data/excel_test_ascii.xls b/pandas/io/tests/data/excel_test_ascii.xls
diff --git a/pandas/io/tests/data/excel_test_noascii.xls b/pandas/io/tests/data/excel_test_noascii.xls
diff --git a/pandas/io/tests/data/excel_writer_ascii.xls b/pandas/io/tests/data/excel_writer_ascii.xls
diff --git a/pandas/io/tests/data/excel_writer_noascii.xls b/pandas/io/tests/data/excel_writer_noascii.xls
diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
@@ -639,7 +639,11 @@ def test_to_excel_float_format(self):
                                 [12.32, 123123.20, 321321.20]],
                                index=['A', 'B'], columns=['X', 'Y', 'Z'])
                 tm.assert_frame_equal(rs, xp)
-
+
+
+
+
+
     def test_to_excel_unicode_filename(self):
         _skip_if_no_excelsuite()
 
@@ -858,7 +862,9 @@ def roundtrip(df, header=True, parser_hdr=0):
         res = roundtrip(DataFrame([0]), False, None)
         self.assertEqual(res.shape, (1, 2))
         self.assertTrue(res.ix[0, 0] is not np.nan)
+
 
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
                    exit=False)
diff --git a/pandas/io/tests/test_excel_encoding.py b/pandas/io/tests/test_excel_encoding.py
@@ -0,0 +1,213 @@
+# pylint: disable=E1101
+# -*- coding: utf-8 -*-
+
+
+from pandas.util.py3compat import StringIO, BytesIO, PY3
+from datetime import datetime
+from os.path import split as psplit
+import csv
+import os
+import sys
+import re
+import unittest
+
+import nose
+
+from numpy import nan
+import numpy as np
+
+from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex
+import pandas.io.parsers as parsers
+from pandas.io.parsers import (read_csv, read_table, read_fwf,
+                               ExcelFile, TextFileReader, TextParser)
+from pandas.util.testing import (assert_almost_equal,
+                                 assert_series_equal, 
+                                 network,
+                                 ensure_clean)
+import pandas.util.testing as tm
+import pandas as pd
+
+import pandas.lib as lib
+from pandas.util import py3compat
+from pandas.lib import Timestamp
+from pandas.tseries.index import date_range
+import pandas.tseries.tools as tools
+
+from numpy.testing.decorators import slow
+
+from pandas._parser import OverflowError
+
+from pandas.io.parsers import (ExcelFile, ExcelWriter, read_csv)
+
+
+def _skip_if_no_xlrd():
+    try:
+        import xlrd
+        ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
+        if ver < (0, 9):
+            raise nose.SkipTest('xlrd not installed, skipping')
+    except ImportError:
+        raise nose.SkipTest('xlrd not installed, skipping')
+
+
+def _skip_if_no_xlwt():
+    try:
+        import xlwt
+    except ImportError:
+        raise nose.SkipTest('xlwt not installed, skipping')
+
+
+def _skip_if_no_openpyxl():
+    try:
+        import openpyxl
+    except ImportError:
+        raise nose.SkipTest('openpyxl not installed, skipping')
+
+
+def _skip_if_no_excelsuite():
+    _skip_if_no_xlrd()
+    _skip_if_no_xlwt()
+    _skip_if_no_openpyxl()
+
+
+_seriesd = tm.getSeriesData()
+_tsd = tm.getTimeSeriesData()
+_frame = DataFrame(_seriesd)[:10]
+_frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])[:10]
+_tsframe = tm.makeTimeDataFrame()[:5]
+_mixed_frame = _frame.copy()
+_mixed_frame['foo'] = 'bar'
+
+
+class ExcelTests(unittest.TestCase):
+
+    def setUp(self):
+        self.dirpath = tm.get_data_path()
+        self.xls_ta = os.path.join(self.dirpath, 'excel_test_ascii.xls')
+        self.xls_tna = os.path.join(self.dirpath, 'excel_test_noascii.xls')
+        self.xls_wa = os.path.join(self.dirpath, 'excel_writer_ascii.xls')
+        self.xls_wna = os.path.join(self.dirpath, 'excel_writer_noascii.xls')
+
+    def test_excel_output_encoding(self):
+        _skip_if_no_xlrd()
+        _skip_if_no_xlwt()
+
+        # TESTS IF DataFrame.to_excel() WORKS WITH ENCODING PARAMETER MAKING POSSIBLE TO
+        # WORK WITH ENCODINGS OTHER TAN ASCII
+
+        #FIRST WITH ONLY ASCII 
+
+        data_ascii = {
+        'index' : ['A', 'B', 'C', 'C', 'B', 'A'],
+        'columns' : ['One', 'One', 'One', 'Two', 'Two', 'Two'],
+        'values' : [1., 2., 3., 3., 2., 1.]
+        }
+
+        original_ascii = DataFrame(data_ascii)
+
+        original_ascii.to_excel(self.xls_ta, sheet_name='DataFrame_TEST')
+
+        get_xls_ascii = ExcelFile(self.xls_ta)
+
+        saved_ascii = get_xls_ascii.parse('DataFrame_TEST', index_col=None, na_values=['NA'])
+
+        # NOW WITH NON-ASCII CHARS AND SUPPLYING THE PARAMETER encoding TO DataFrame.to_excel()
+
+        data_noascii = {
+            'index' : ['Año', 'Baldío', 'Trócola', 'Mínimo', 'Barça', 'Cigüeña'],
+            'columns' : ['Año', 'Narices', 'Búlgaro', 'Libélula', 'Cínico', '1º'],
+            'values' : ['Céfiro', 'Tarugo', 'Déspota', 'Camión', 'Añejo', 'º']
+        }
+
+        original_noascii = DataFrame(data_noascii)
+
+        original_noascii.to_excel(self.xls_tna, sheet_name='DataFrame_TEST', encoding='utf8')
+
+        get_xls_noascii = ExcelFile(self.xls_tna, encoding = 'uft8')
+
+        #saved_noascii = get_xls_noascii.parse('DataFrame_TEST', index_col=None, na_values=['NA'])
+
+        saved_noascii = get_xls_noascii.parse('DataFrame_TEST', index_col=None, na_values=['NA'])
+
+        print original_noascii,saved_noascii
+
+        tm.assert_frame_equal(original_ascii, saved_ascii)
+        tm.assert_frame_equal(original_noascii, saved_noascii)
+
+
+        # TESTS IF CLASS ExcelWriter WORKS WITH ENCODING PARAMETER MAKING POSSIBLE TO
+        # WORK WITH ENCODINGS OTHER TAN ASCII
+
+        #FIRST WITH ONLY ASCII 
+
+        data_ascii_1 = {
+        'index' : ['A', 'B', 'C', 'C', 'B', 'A'],
+        'columns' : ['One', 'One', 'One', 'Two', 'Two', 'Two'],
+        'values' : [1., 2., 3., 3., 2., 1.]
+        }
+
+        data_ascii_2 = {
+        'index' : ['A', 'B', 'C', 'C', 'B', 'A'],
+        'columns' : ['One', 'One', 'One', 'Two', 'Two', 'Two'],
+        'values' : [1., 2., 3., 3., 2., 1.]
+        }
+
+        excel_writer_ascii=ExcelWriter(self.xls_wa)
+
+        original_ascii_1 = DataFrame(data_ascii_1)
+
+        original_ascii_2 = DataFrame(data_ascii_2)
+
+        original_ascii_1.to_excel(excel_writer_ascii, sheet_name = 'DataFrame_TEST')
+
+        original_ascii_2.to_excel(excel_writer_ascii, sheet_name = 'DataFrame_TEST_2')
+
+        excel_writer_ascii.save()
+
+        get_xls_writer_ascii = ExcelFile(self.xls_wa)
+
+        saved_ascii_1 = get_xls_writer_ascii.parse('DataFrame_TEST', index_col = None, na_values = ['NA'])
+
+        saved_ascii_2 = get_xls_writer_ascii.parse('DataFrame_TEST_2', index_col = None, na_values = ['NA'])
+
+        # NOW WITH NON-ASCII CHARS AND SUPPLYING THE PARAMETER encoding TO class ExcelWriter
+
+        data_noascii_1 = {
+            'index' : ['Puño', 'Mísero', 'Brújula', 'Pájaro', 'Barça', 'Cigüeña'],
+            'columns' : ['Años', 'Nariz', 'Bígaro', 'Céfiro', '2º', '2€'],
+            'values' : ['Tímido', 'Variado', 'Efímero', 'Trágico', 'Compañero', '5º']
+        }
+
+        data_noascii_2 = {
+            'index' : ['Año', 'Baldío', 'Trócola', 'Mínimo', 'Barça', 'Cigüeña'],
+            'columns' : ['Año', 'Narices', 'Búlgaro', 'Libélula', 'Cínico', '1º'],
+            'values' : ['Céfiro', 'Tarugo', 'Déspota', 'Camión', 'Añejo', 'º']
+        }
+
+        excel_writer_noascii=ExcelWriter(self.xls_wna,encoding = 'utf8')
+
+        original_noascii_1 = DataFrame(data_noascii_1)
+
+        original_noascii_2 = DataFrame(data_noascii_2)
+
+        original_noascii_1.to_excel(excel_writer_noascii, sheet_name = 'DataFrame_TEST')
+
+        original_noascii_2.to_excel(excel_writer_noascii, sheet_name = 'DataFrame_TEST_2')
+
+        excel_writer_noascii.save()
+
+        get_xls_writer_noascii = ExcelFile(self.xls_wna,encoding = 'uft8')
+
+        saved_noascii_1 = get_xls_writer_noascii.parse('DataFrame_TEST', index_col = None, na_values = ['NA'])
+
+        saved_noascii_2 = get_xls_writer_noascii.parse('DataFrame_TEST_2', index_col = None, na_values = ['NA'])
+
+        tm.assert_frame_equal(original_ascii_1, saved_ascii_1)
+        tm.assert_frame_equal(original_ascii_2, saved_ascii_2)
+
+        tm.assert_frame_equal(original_noascii_1, saved_noascii_1)
+        tm.assert_frame_equal(original_noascii_2, saved_noascii_2)
+
+if __name__ == '__main__':
+    nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
+                   exit=False)