标签:ica stack 链接 nta shu end false extension enum
暂时记录,改天再整理
import re import os import pandas as pd from requests import get from docx import Document import win32com.client as win import subprocess # dir2 = ‘C:/Users/User/Documents/gzzw/‘ # names = os.listdir(dir2) # data = {} # word = win.Dispatch(‘Word.Application‘) # for (i, n) in enumerate(names): # subprocess.call([‘soffice‘, ‘--headless‘, ‘--convert-to‘, ‘docx‘, ‘--outdir‘, dir2 + str(i) + ‘.docx‘, dir + n]) # doc = Document(docx=dir2 + n) # table = doc.tables[1] # row = table.rows[0] # row1 = table.rows[1] # for k, v in zip(row.cells, row1.cells): # if i == 0: # data[k.text] = [v.text] # else: # data[k.text].append(v.text) # gs = re.match(r‘.*_(.*)_.*‘, n) # dn.append(gs.group(1)) # os.rename(dir + n, dir + str(i) + ‘.doc‘) # doc = word.Documents.Open(dir + n) # doc.SaveAs(dir2 + str(i) + ‘.docx‘, FileFormat=12) # table = doc.Tables(2) # for j in range(table.Columns.Count): # print(table.Cell(Row=1, Column=i + 1).Range.Text) # label.append(table.Cell(Row=1, Column=i + 1).Range.Text.encode(‘utf8‘)) # dn.append(table.Cell(Row=2, Column=i + 1).Range.Text.encode(‘utf8‘)) # word.Quit() # sheel = pd.DataFrame(data) # sheel.to_excel(dir2 + ‘statics.xlsx‘, index=False, encoding=‘utf8‘)
一些参考链接:
1.https://code.activestate.com/recipes/279003-converting-word-documents-to-text/
2.https://stackoverflow.com/questions/1468099/python-win32-extensions-documentation
4.https://stackoverflow.com/questions/38468442/multiple-doc-to-docx-file-conversion-using-python
5.https://www.jianshu.com/p/4fa504c720c1
标签:ica stack 链接 nta shu end false extension enum
原文地址:https://www.cnblogs.com/darkchii/p/12051950.html