操作系统:windows 10
安装模块:Pillow pytesseract tesseract-ocr
1、首先安装Pillow,可以选择安装windows版的安装包
下载地址:https://pypi.org/project/Pillow/#files
此处我选择安装 Pillow-6.1.0.win-amd64-py2.7.exe
2、安装完成后再进行安装 pytesseract
下载地址:https://pypi.org/project/pytesseract/#files
解压安装包,然后切换到根目录,执行 : python setup.py install
3、接着继续安装 tesseract-ocr
下载地址:https://download.csdn.net/download/wangleittt/10358361
安装完成后,设置环境变量
TESSDATA_PREFIX = C:\Program Files (x86)\Tesseract-OCR
然后在Path 新增 C:\Program Files (x86)\Tesseract-OCR
以上即完成了环境的安装,接下来就愉快的玩耍代码吧!
附上我的代码:
#!/usr/bin/python
#-*- coding:utf8 -*-
# encoding: utf-8
# auth by huwj 2019-07-29
from selenium import webdriver
import time
from PIL import Image, ImageEnhance
from pytesseract import image_to_string
URL = "http://www.xxxx.com"
user = 'hu_wen'
password = '123456'
def login():
try:
# 静默模式
# option = webdriver.ChromeOptions()
# option.add_argument('headless')
# driver = webdriver.Chrome(chrome_optiOns=option)
# 实例化出一个chrome浏览器
driver = webdriver.Chrome()
# 设置浏览器窗口的位置和大小
driver.maximize_window()
driver.get(URL)
time.sleep(2)
# 通过使用选择器选择到表单元素进行模拟输入和点击按钮提交
driver.find_element_by_id('txt_user').clear()
driver.find_element_by_id('txt_user').send_keys(user)
driver.find_element_by_id('txt_pwd').clear()
driver.find_element_by_id('txt_pwd').send_keys(password)
# 识别图片验证码
driver.save_screenshot('D://360Downloads/All.png') # 截取当前网页,该网页有我们需要的验证码
imgelement = driver.find_element_by_id('verifyImg')
location = imgelement.location # 获取验证码x,y轴坐标
size = imgelement.size # 获取验证码的长宽
rangle = (int(location['x']), int(location['y']), int(location['x'] + size['width']),
int(location['y'] + size['height'])) # 写成我们需要截取的位置坐标
i = Image.open("D://360Downloads/All.png") # 打开截图
result = i.crop(rangle) # 使用Image的crop函数,从截图中再次截取我们需要的区域
result.save('D://360Downloads/result.png')
text = image_to_string('D://360Downloads/result.png', 'eng').strip()
print("pic_text:", text)
driver.find_element_by_id("txt_verifyCode").send_keys(text)
driver.find_element_by_id('btn_login').click()
time.sleep(6)
return driver
except Exception, e:
print e
driver.quit()