Como processar arquivos em lote com formulários de palavras?

2

Eu tenho um monte de formulários preenchidos e preciso obter esses dados para o Excel / CSV / qualquer coisa estruturada. Eu vi soluções na web sobre como fazer isso uma de cada vez, mas existem métodos estabelecidos sobre como fazer isso em lote?

Eu queria perguntar antes de escrever um script do powershell.

    
por Konrads 06.03.2010 / 21:01

1 resposta

0

Aqui está a minha solução em Python:

"""
Copyright 2009 Konrads Smelkovs <[email protected]>
UTF8Recorder and UnicodeWriter come from python docs
"""

import sys,os,csv
import win32com.client
import pywintypes


import codecs, cStringIO

class UTF8Recoder:
    """
    Iterator that reads an encoded stream and reencodes the input to UTF-8
    """
    def __init__(self, f, encoding):
        self.reader = codecs.getreader(encoding)(f)

    def __iter__(self):
        return self

    def next(self):
        return self.reader.next().encode("utf-8")

class UnicodeWriter:
    """
    A CSV writer which will write rows to CSV file "f",
    which is encoded in the given encoding.
    """

    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
        # Redirect output to a queue
        self.queue = cStringIO.StringIO()
        self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
        self.stream = f
        self.encoder = codecs.getincrementalencoder(encoding)()

    def writerow(self, row):
        self.writer.writerow([s.encode("utf-8") for s in row])
        # Fetch UTF-8 output from the queue ...
        data = self.queue.getvalue()
        data = data.decode("utf-8")
        # ... and reencode it into the target encoding
        data = self.encoder.encode(data)
        # write to the target stream
        self.stream.write(data)
        # empty queue
        self.queue.truncate(0)

    def writerows(self, rows):
        for row in rows:
            self.writerow(row)

def main():
 if len(sys.argv)<3:
    print "Usage: %s <directory> <outfile.csv>" % sys.argv[0]
    print "Where <directory> - directory containing word docs with forms"
    print "and <outfile.csv> - file where to put results"
    sys.exit(-1)
 directory=os.path.abspath(sys.argv[1])
 wordapp = win32com.client.Dispatch("Word.Application")
 wordapp.Visible=0 # Hide word app
 results=[]
 for docfile in os.listdir(directory):
     thisdocresults=[]
     if docfile.endswith(".doc") or docfile.endswith(".docx"):
         print >> sys.stderr, "Processing %s" % docfile
         worddoc=wordapp.Documents.Open(os.path.join(directory,docfile))
         for i in range(1,worddoc.FormFields.Count+1):
            try:
                form=worddoc.FormFields.Item(i)
                name=form.Name
                value=form.Result
                thisdocresults.append((name,value))
                try:
                    print >>sys.stderr, "%s: %s" % (name,value)
                except UnicodeEncodeError,e:
                    print >>sys.stderr, "Error decoding charset,%s" % e
            except pywintypes.com_error,e:
                print >>sys.stderr, "Exception: %s" % str(e)
         results.append(thisdocresults)
         worddoc.Close()
 csvfile=file(sys.argv[2],"wb")
 csvwriter=UnicodeWriter(csvfile,quoting=csv.QUOTE_ALL)
 for docres in results:
     data=[]
     for (n,v) in docres:
         data.append(v)
     csvwriter.writerow(data)
 wordapp.Quit()

if __name__=="__main__":
    main()

Converter isso em powershell é trivial. Se alguém realmente precisar, me escreva um e-mail

    
por 07.03.2010 / 13:01