讀取word文檔並提取和寫入數據(基於python 3.6)

#!/usr/bin/python3# -*- coding: utf-8 -*-# @File    : delete_file# @Author  : moucong# @Date    : 2018/4/1 16:33# @Software: PyCharm#讀取docx中的文本代碼示例import docximport re#獲取文檔file=docx.Document("E:\\python_word\\word.docx")print("段落數:"+str(len(file.paragraphs))) #輸出段落數file_word = docx.Document()#輸出每一段的內容for para in file.paragraphs:    print(para.text)#輸出段落編號及段落內容para_data = []for i in range(len(file.paragraphs)):    # for j in map(lambda x:x.split(' '),file.paragraphs[i].text):    para_single = file.paragraphs[i].text.split(' ')    while '' in para_single:  # 移除空格        para_single.remove('')    # para_data.append(para_single)    for data_number in range(len(para_single)):        data_num = re.findall(r"\d", para_single[data_number])        data_num = ''.join(data_num)        para_data.append(data_num + '    ')file_word.add_paragraph(para_data)file_word.save("E:\\python_word\\number.docx")
相關文章
相關標籤/搜索