|
Form extractor from word to Excel |
I had a bunch of filled out word documents with word forms in them and neded them in Excel. initially i tried CSV but it didn't play nice with encodings. So I decided to write directly to XLS.
"""
Copyright 2009 Konrads Smelkovs <konrads [at] smelkovs.com>
UTF8Recorder and UnicodeWriter come from python docs
"""
import sys,os,csv
import win32com.client
import pywintypes
class ExcelWriter(object):
def __init__(self,excelfile):
self.excelapp=win32com.client.DispatchEx('Excel.Application')
self.excelapp.Visible=0
self.excelapp.Application.AskToUpdateLinks=0
self.workbook=self.excelapp.Workbooks.Add()
os.unlink(excelfile) #TODO: remove for release
self.workbook.SaveAs(excelfile)
# Only worksheet 1 is used.
self.worksheet=self.workbook.Worksheets.Item(1)
self.currentrow=1
def _getrow(self,row):
"""Convert integer row index to Alphabetical:
1 -> A
2 -> B
...
"""
if row<27:
return chr(ord('A')-1 + row)
else:
first=row / 26
return chr(ord('A')-1 + first) + chr(ord('A')-1 + row % 26)
def __del__(self):
self.workbook.Save()
self.workbook.Close()
self.excelapp.Quit()
def writerow(self,data):
for col in xrange(1,len(data)+1):
range=self._getrow(col)+str(self.currentrow)
print >>sys.stderr,"Range: %s" % range
cell=self.worksheet.Range(range)
cell.Value=data[col-1]
self.currentrow+=1
def main():
if len(sys.argv)<3:
print "Usage: %s <directory> <outfile.csv>" % sys.argv[0]
print "Where <directory> - directory containing word docs with forms"
print "and <outfile.csv> - file where to put results"
sys.exit(-1)
directory=os.path.abspath(sys.argv[1])
wordapp = win32com.client.Dispatch("Word.Application")
wordapp.Visible=0 # Hide word app
results=[]
for docfile in os.listdir(directory):
thisdocresults=[]
if docfile.endswith(".doc") or docfile.endswith(".docx"):
print >> sys.stderr, "Processing %s" % docfile
worddoc=wordapp.Documents.Open(os.path.join(directory,docfile))
for i in range(1,worddoc.FormFields.Count+1):
try:
form=worddoc.FormFields.Item(i)
name=form.Name
value=form.Result
thisdocresults.append((name,value))
try:
print >>sys.stderr, "%s: %s" % (name,value)
except UnicodeEncodeError,e:
print >>sys.stderr, "Error decoding charset,%s" % e
except pywintypes.com_error,e:
print >>sys.stderr, "Exception: %s" % str(e)
results.append(thisdocresults)
worddoc.Close()
wordapp.Quit()
writer=ExcelWriter(os.path.abspath(sys.argv[2]))
print >>sys.stderr,"Writing to Excel"
for docres in results:
data=[]
for (n,v) in docres:
data.append(v)
writer.writerow(data)
if __name__=="__main__":
main()
|
|
Last Updated ( Thursday, 18 March 2010 )
|