def extract_pictures(word_path, result_path): """ 提取 Word 文档中的所有图片
:param word_path: Word 文件路径 :param result_path: 结果保存路径 :return: None 或生成器(如果无图片返回 None,否则返回图片路径的生成器) """ tmp_path = f'{os.path.splitext(word_path)[0]}' # 拷贝源文件后重命名再解压 zip_path = shutil.copy(word_path, f'{tmp_path}_new.docx') with zipfile.ZipFile(zip_path, 'r') as zf: zf.extractall(tmp_path) os.remove(zip_path)
pic_path = os.path.join(tmp_path, 'word', 'media') if not os.path.exists(pic_path): shutil.rmtree(tmp_path) return 'no pictures found'
pictures = os.listdir(pic_path) if not os.path.exists(result_path): os.makedirs(result_path)
for picture in pictures: word_name = os.path.splitext(os.path.basename(word_path))[0] picture_name = f'{word_name}_{picture}' shutil.copy(os.path.join(pic_path, picture), os.path.join(result_path, picture_name))
shutil.rmtree(tmp_path) return (os.path.join(result_path, pic) for pic in os.listdir(result_path)) ```
#### 方法二:使用 `python-docx` 库 `python-docx` 是一个用于处理 Word 文档的 Python 库,支持更简便地提取图片。
1. **安装库** ```bash pip install python-docx ```
2. **编写代码实现提取** 以下是使用 `python-docx` 库提取图片的示例代码:
```python import docx import os import re
def extract_pictures_with_docx(word_path, result_path): """ 使用 python-docx 提取 Word 文档中的图片
:param word_path: Word 文件路径 :param result_path: 结果保存路径 :return: None """ doc = docx.Document(word_path) dict_rel = doc.part._rels if not os.path.exists(result_path): os.makedirs(result_path)
for rel in dict_rel: rel = dict_rel[rel] if "image" in rel.target_ref: img_name = re.findall("/(.*)", rel.target_ref)[0] word_name = os.path.splitext(os.path.basename(word_path))[0] img_name = f'{word_name}_{img_name}' with open(os.path.join(result_path, img_name), 'wb') as f: f.write(rel.target_part.blob) ```