import docximport refrom docx import Documentimport stringpath = '/Users/ya/Desktop/11246441.docx'document = Document(path)for line in document.paragraphs: line = line.text.strip().split() temp = [] if len(line) != 0: for i in line: rule = re.compile("[^a-zA-Z0-9u4e00-u9fa5]") line = rule.sub('',i) print(line)