Hi,
I wrote the program below to merge all xls files in a given directory into one multisheet xls file. It uses xlwt and xlrd. The xls files I use for input are generated by Spss. When I open and re-save the files in Excel, the program works, but when I use the xls files as they were created by Spss, the program returns an error message (see below). Some details: Python 2.4, xlwt version 0.7.0, xlrd version 0.5.2, Win NT.
Any ideas?
Thanks in advance!
Albert-Jan
"""
Merge all xls files in a given directory into one multisheet xls file.
The sheets get the orginal file name, without the extension.
File names should not exceed 31 characters, as this is the maximum
sheet name length
"""
import xlrd, xlwt
import glob, os.path
def merge_xls (in_dir, out_file="d:/merged_output.x ls"):
xls_files = glob.glob(in_di r + "*.xls")
sheet_names = []
merged_book = xlwt.Workbook()
[sheet_names.app end(os.path.bas ename(v)[:-4]) for k, v in enumerate(xls_f iles)]
for k, xls_file in enumerate(xls_f iles):
if len (sheet_names[k]) <= 31:
book = xlrd.open_workb ook(xls_file)
ws = merged_book.add _sheet(sheet_na mes[k])
for sheetx in range(book.nshe ets):
sheet = book.sheet_by_i ndex(sheetx)
for rx in range(sheet.nro ws):
for cx in range(sheet.nco ls):
ws.write(rx, cx, sheet.cell_valu e(rx, cx))
else:
print "File name too long: <%s.xls(maxim um is 31 chars) " % (sheet_names[k])
print "File <%s.xlsis *not* included in the merged xls file." % (sheet_names[k])
merged_book.sav e(out_file)
print "---Merged xls file written to %s using the following source files: " % (out_file)
for k, v in enumerate(sheet _names):
if len(v) <= 31: print "\t", str(k+1).zfill( 3), "%s.xls" % (v)
merge_xls(in_di r="d:/temp/")
*** WARNING: OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero ***
put_cell 0 1
Traceback (most recent call last):
File "G:\generic_syn taxes\merge_xls .py", line 37, in -toplevel-
merge_xls(in_di r="d:/temp/")
File "G:\generic_syn taxes\merge_xls .py", line 21, in merge_xls
book = xlrd.open_workb ook(xls_file)
File "C:\Python24\li b\site-packages\xlrd\_ _init__.py", line 139, in open_workbook
bk.get_sheets()
File "C:\Python24\li b\site-packages\xlrd\_ _init__.py", line 389, in get_sheets
sht = self.get_sheet( sheetno)
File "C:\Python24\li b\site-packages\xlrd\_ _init__.py", line 379, in get_sheet
sh.read(self)
File "C:\Python24\li b\site-packages\xlrd\s heet.py", line 285, in read
self.put_cell(r owx, colx, XL_CELL_TEXT, bk._sharedstrin gs[index])
File "C:\Python24\li b\site-packages\xlrd\s heet.py", line 214, in put_cell
self._cell_type s[rowx][colx] = ctype
IndexError: list assignment index out of range
I wrote the program below to merge all xls files in a given directory into one multisheet xls file. It uses xlwt and xlrd. The xls files I use for input are generated by Spss. When I open and re-save the files in Excel, the program works, but when I use the xls files as they were created by Spss, the program returns an error message (see below). Some details: Python 2.4, xlwt version 0.7.0, xlrd version 0.5.2, Win NT.
Any ideas?
Thanks in advance!
Albert-Jan
"""
Merge all xls files in a given directory into one multisheet xls file.
The sheets get the orginal file name, without the extension.
File names should not exceed 31 characters, as this is the maximum
sheet name length
"""
import xlrd, xlwt
import glob, os.path
def merge_xls (in_dir, out_file="d:/merged_output.x ls"):
xls_files = glob.glob(in_di r + "*.xls")
sheet_names = []
merged_book = xlwt.Workbook()
[sheet_names.app end(os.path.bas ename(v)[:-4]) for k, v in enumerate(xls_f iles)]
for k, xls_file in enumerate(xls_f iles):
if len (sheet_names[k]) <= 31:
book = xlrd.open_workb ook(xls_file)
ws = merged_book.add _sheet(sheet_na mes[k])
for sheetx in range(book.nshe ets):
sheet = book.sheet_by_i ndex(sheetx)
for rx in range(sheet.nro ws):
for cx in range(sheet.nco ls):
ws.write(rx, cx, sheet.cell_valu e(rx, cx))
else:
print "File name too long: <%s.xls(maxim um is 31 chars) " % (sheet_names[k])
print "File <%s.xlsis *not* included in the merged xls file." % (sheet_names[k])
merged_book.sav e(out_file)
print "---Merged xls file written to %s using the following source files: " % (out_file)
for k, v in enumerate(sheet _names):
if len(v) <= 31: print "\t", str(k+1).zfill( 3), "%s.xls" % (v)
merge_xls(in_di r="d:/temp/")
*** WARNING: OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero ***
put_cell 0 1
Traceback (most recent call last):
File "G:\generic_syn taxes\merge_xls .py", line 37, in -toplevel-
merge_xls(in_di r="d:/temp/")
File "G:\generic_syn taxes\merge_xls .py", line 21, in merge_xls
book = xlrd.open_workb ook(xls_file)
File "C:\Python24\li b\site-packages\xlrd\_ _init__.py", line 139, in open_workbook
bk.get_sheets()
File "C:\Python24\li b\site-packages\xlrd\_ _init__.py", line 389, in get_sheets
sht = self.get_sheet( sheetno)
File "C:\Python24\li b\site-packages\xlrd\_ _init__.py", line 379, in get_sheet
sh.read(self)
File "C:\Python24\li b\site-packages\xlrd\s heet.py", line 285, in read
self.put_cell(r owx, colx, XL_CELL_TEXT, bk._sharedstrin gs[index])
File "C:\Python24\li b\site-packages\xlrd\s heet.py", line 214, in put_cell
self._cell_type s[rowx][colx] = ctype
IndexError: list assignment index out of range
Comment