1 package com.github.wycm;
2
3 import org.apache.commons.io.FileUtils;
4 import org.jsoup.Jsoup;
5 import org.jsoup.nodes.Document;
6 import org.jsoup.nodes.Element;
7 import org.jsoup.select.Elements;
8 import org.openqa.selenium.By;
9 import org.openqa.selenium.Point;
10 import org.openqa.selenium.WebDriver;
11 import org.openqa.selenium.WebElement;
12 import org.openqa.selenium.chrome.ChromeDriver;
13 import org.openqa.selenium.interactions.Actions;
14 import org.openqa.selenium.support.ui.ExpectedCondition;
15 import org.openqa.selenium.support.ui.WebDriverWait;
16
17 import javax.imageio.ImageIO;
18 import javax.imageio.ImageReadParam;
19 import javax.imageio.ImageReader;
20 import javax.imageio.stream.ImageInputStream;
21 import java.awt.*;
22 import java.awt.image.BufferedImage;
23 import java.io.File;
24 import java.io.FileInputStream;
25 import java.io.IOException;
26 import java.net.URL;
27 import java.util.Iterator;
28 import java.util.Random;
29 import java.util.regex.Matcher;
30 import java.util.regex.Pattern;
31
32 public class GeettestCrawler {
33 private static String basePath = "src/main/resources/";
34 private static String FULL_IMAGE_NAME = "full-image";
35 private static String BG_IMAGE_NAME = "bg-image";
36 private static int[][] moveArray = new int[52][2];
37 private static boolean moveArrayInit = false;
38 private static String INDEX_URL = "https://passport.feng.com/?r=user/register";
39 private static WebDriver driver;
40
41 static {
42 System.setProperty("webdriver.chrome.driver", "D:/dev/selenium/chromedriver_V2.30/chromedriver_win32/chromedriver.exe");
43 if (!System.getProperty("os.name").toLowerCase().contains("windows")){
44 System.setProperty("webdriver.chrome.driver", "/Users/wangyang/workspace/selenium/chromedriver_V2.30/chromedriver");
45 }
46 driver = new ChromeDriver();
47 }
48
49 public static void main(String[] args) throws InterruptedException {
50 for (int i = 0; i <10; i++){
51 try {
52 invoke();
53 } catch (IOException e) {
54 e.printStackTrace();
55 } catch (InterruptedException e) {
56 e.printStackTrace();
57 }
58 }
59 driver.quit();
60 }
61 private static void invoke() throws IOException, InterruptedException {
62 //设置input参数
63 driver.get(INDEX_URL);
64
65 //通过[class=gt_slider_knob gt_show]
66 By moveBtn = By.cssSelector(".gt_slider_knob.gt_show");
67 waitForLoad(driver, moveBtn);
68 WebElement moveElemet = driver.findElement(moveBtn);
69 int i = 0;
70 while (i++ <15){
71 int distance = getMoveDistance(driver);
72 move(driver, moveElemet, distance - 6);
73 By gtTypeBy = By.cssSelector(".gt_info_type");
74 By gtInfoBy = By.cssSelector(".gt_info_content");
75 waitForLoad(driver, gtTypeBy);
76 waitForLoad(driver, gtInfoBy);
77 String gtType = driver.findElement(gtTypeBy).getText();
78 String gtInfo = driver.findElement(gtInfoBy).getText();
79 System.out.println(gtType + "---" + gtInfo);
80 /**
81 * 再来一次:
82 * 验证失败:
83 */
84 if(!gtType.equals("再来一次:") && !gtType.equals("验证失败:")){
85 Thread.sleep(4000);
86 System.out.println(driver);
87 break;
88 }
89 Thread.sleep(4000);
90 }
91 }
92
93 /**
94 * 移动
95 * @param driver
96 * @param element
97 * @param distance
98 * @throws InterruptedException
99 */
100 public static void move(WebDriver driver, WebElement element, int distance) throws InterruptedException {
101 int xDis = distance + 11;
102 System.out.println("应平移距离:" + xDis);
103 int moveX = new Random().nextInt(8) - 5;
104 int moveY = 1;
105 Actions actiOns= new Actions(driver);
106 new Actions(driver).clickAndHold(element).perform();
107 Thread.sleep(200);
108 printLocation(element);
109 actions.moveToElement(element, moveX, moveY).perform();
110 System.out.println(moveX + "--" + moveY);
111 printLocation(element);
112 for (int i = 0; i <22; i++){
113 int s = 10;
114 if (i % 2 == 0){
115 s = -10;
116 }
117 actions.moveToElement(element, s, 1).perform();
118 // printLocation(element);
119 Thread.sleep(new Random().nextInt(100) + 150);
120 }
121
122 System.out.println(xDis + "--" + 1);
123 actions.moveByOffset(xDis, 1).perform();
124 printLocation(element);
125 Thread.sleep(200);
126 actions.release(element).perform();
127 }
128 private static void printLocation(WebElement element){
129 Point point = element.getLocation();
130 System.out.println(point.toString());
131 }
132 /**
133 * 等待元素加载,10s超时
134 * @param driver
135 * @param by
136 */
137 public static void waitForLoad(final WebDriver driver, final By by){
138 new WebDriverWait(driver, 10).until(new ExpectedCondition() {
139 public Boolean apply(WebDriver d) {
140 WebElement element = driver.findElement(by);
141 if (element != null){
142 return true;
143 }
144 return false;
145 }
146 });
147 }
148
149 /**
150 * 计算需要平移的距离
151 * @param driver
152 * @return
153 * @throws IOException
154 */
155 public static int getMoveDistance(WebDriver driver) throws IOException {
156 String pageSource = driver.getPageSource();
157 String fullImageUrl = getFullImageUrl(pageSource);
158 FileUtils.copyURLToFile(new URL(fullImageUrl), new File(basePath + FULL_IMAGE_NAME + ".jpg"));
159 String getBgImageUrl = getBgImageUrl(pageSource);
160 FileUtils.copyURLToFile(new URL(getBgImageUrl), new File(basePath + BG_IMAGE_NAME + ".jpg"));
161 initMoveArray(driver);
162 restoreImage(FULL_IMAGE_NAME);
163 restoreImage(BG_IMAGE_NAME);
164 BufferedImage fullBI = ImageIO.read(new File(basePath + "result/" + FULL_IMAGE_NAME + "result3.jpg"));
165 BufferedImage bgBI = ImageIO.read(new File(basePath + "result/" + BG_IMAGE_NAME + "result3.jpg"));
166 for (int i = 0; i ){
167 for (int j = 0; j ) {
168 int[] fullRgb = new int[3];
169 fullRgb[0] = (fullBI.getRGB(i, j) & 0xff0000) >> 16;
170 fullRgb[1] = (fullBI.getRGB(i, j) & 0xff00) >> 8;
171 fullRgb[2] = (fullBI.getRGB(i, j) & 0xff);
172
173 int[] bgRgb = new int[3];
174 bgRgb[0] = (bgBI.getRGB(i, j) & 0xff0000) >> 16;
175 bgRgb[1] = (bgBI.getRGB(i, j) & 0xff00) >> 8;
176 bgRgb[2] = (bgBI.getRGB(i, j) & 0xff);
177 if(difference(fullRgb, bgRgb) > 255){
178 return i;
179 }
180 }
181 }
182 throw new RuntimeException("未找到需要平移的位置");
183 }
184 private static int difference(int[] a, int[] b){
185 return Math.abs(a[0] - b[0]) + Math.abs(a[1] - b[1]) + Math.abs(a[2] - b[2]);
186 }
187 /**
188 * 获取move数组
189 * @param driver
190 */
191 private static void initMoveArray(WebDriver driver){
192 if (moveArrayInit){
193 return;
194 }
195 Document document = Jsoup.parse(driver.getPageSource());
196 Elements elements = document.select("[class=gt_cut_bg gt_show]").first().children();
197 int i = 0;
198 for(Element element : elements){
199 Pattern pattern = Pattern.compile(".*background-position: (.*?)px (.*?)px.*");
200 Matcher matcher = pattern.matcher(element.toString());
201 if (matcher.find()){
202 String width = matcher.group(1);
203 String height = matcher.group(2);
204 moveArray[i][0] = Integer.parseInt(width);
205 moveArray[i++][1] = Integer.parseInt(height);
206 } else {
207 throw new RuntimeException("解析异常");
208 }
209 }
210 moveArrayInit = true;
211 }
212 /**
213 *还原图片
214 * @param type
215 */
216 private static void restoreImage(String type) throws IOException {
217 //把图片裁剪为2 * 26份
218 for(int i = 0; i <52; i++){
219 cutPic(basePath + type +".jpg"
220 ,basePath + "result/" + type + i + ".jpg", -moveArray[i][0], -moveArray[i][1], 10, 58);
221 }
222 //拼接图片
223 String[] b = new String[26];
224 for(int i = 0; i <26; i++){
225 b[i] = String.format(basePath + "result/" + type + "%d.jpg", i);
226 }
227 mergeImage(b, 1, basePath + "result/" + type + "result1.jpg");
228 //拼接图片
229 String[] c = new String[26];
230 for(int i = 0; i <26; i++){
231 c[i] = String.format(basePath + "result/" + type + "%d.jpg", i + 26);
232 }
233 mergeImage(c, 1, basePath + "result/" + type + "result2.jpg");
234 mergeImage(new String[]{basePath + "result/" + type + "result1.jpg",
235 basePath + "result/" + type + "result2.jpg"}, 2, basePath + "result/" + type + "result3.jpg");
236 //删除产生的中间图片
237 for(int i = 0; i <52; i++){
238 new File(basePath + "result/" + type + i + ".jpg").deleteOnExit();
239 }
240 new File(basePath + "result/" + type + "result1.jpg").deleteOnExit();
241 new File(basePath + "result/" + type + "result2.jpg").deleteOnExit();
242 }
243 /**
244 * 获取原始图url
245 * @param pageSource
246 * @return
247 */
248 private static String getFullImageUrl(String pageSource){
249 String url = null;
250 Document document = Jsoup.parse(pageSource);
251 String style = document.select("[class=gt_cut_fullbg_slice]").first().attr("style");
252 Pattern pattern = Pattern.compile("url\\(\"(.*)\"\\)");
253 Matcher matcher = pattern.matcher(style);
254 if (matcher.find()){
255 url = matcher.group(1);
256 }
257 url = url.replace(".webp", ".jpg");
258 System.out.println(url);
259 return url;
260 }
261 /**
262 * 获取带背景的url
263 * @param pageSource
264 * @return
265 */
266 private static String getBgImageUrl(String pageSource){
267 String url = null;
268 Document document = Jsoup.parse(pageSource);
269 String style = document.select(".gt_cut_bg_slice").first().attr("style");
270 Pattern pattern = Pattern.compile("url\\(\"(.*)\"\\)");
271 Matcher matcher = pattern.matcher(style);
272 if (matcher.find()){
273 url = matcher.group(1);
274 }
275 url = url.replace(".webp", ".jpg");
276 System.out.println(url);
277 return url;
278 }
279 public static boolean cutPic(String srcFile, String outFile, int x, int y,
280 int width, int height) {
281 FileInputStream is = null;
282 ImageInputStream iis = null;
283 try {
284 if (!new File(srcFile).exists()) {
285 return false;
286 }
287 is = new FileInputStream(srcFile);
288 String ext = srcFile.substring(srcFile.lastIndexOf(".") + 1);
289 Iterator it = ImageIO.getImageReadersByFormatName(ext);
290 ImageReader reader = it.next();
291 iis = ImageIO.createImageInputStream(is);
292 reader.setInput(iis, true);
293 ImageReadParam param = reader.getDefaultReadParam();
294 Rectangle rect = new Rectangle(x, y, width, height);
295 param.setSourceRegion(rect);
296 BufferedImage bi = reader.read(0, param);
297 File tempOutFile = new File(outFile);
298 if (!tempOutFile.exists()) {
299 tempOutFile.mkdirs();
300 }
301 ImageIO.write(bi, ext, new File(outFile));
302 return true;
303 } catch (Exception e) {
304 e.printStackTrace();
305 return false;
306 } finally {
307 try {
308 if (is != null) {
309 is.close();
310 }
311 if (iis != null) {
312 iis.close();
313 }
314 } catch (IOException e) {
315 e.printStackTrace();
316 return false;
317 }
318 }
319 }
320 /**
321 * 图片拼接 (注意:必须两张图片长宽一致哦)
322 * @param files 要拼接的文件列表
323 * @param type 1横向拼接,2 纵向拼接
324 * @param targetFile 输出文件
325 */
326 private static void mergeImage(String[] files, int type, String targetFile) {
327 int length = files.length;
328 File[] src = new File[length];
329 BufferedImage[] images = new BufferedImage[length];
330 int[][] ImageArrays = new int[length][];
331 for (int i = 0; i ) {
332 try {
333 src[i] = new File(files[i]);
334 images[i] = ImageIO.read(src[i]);
335 } catch (Exception e) {
336 throw new RuntimeException(e);
337 }
338 int width = images[i].getWidth();
339 int height = images[i].getHeight();
340 ImageArrays[i] = new int[width * height];
341 ImageArrays[i] = images[i].getRGB(0, 0, width, height, ImageArrays[i], 0, width);
342 }
343 int newHeight = 0;
344 int newWidth = 0;
345 for (int i = 0; i ) {
346 // 横向
347 if (type == 1) {
348 newHeight = newHeight > images[i].getHeight() ? newHeight : images[i].getHeight();
349 newWidth += images[i].getWidth();
350 } else if (type == 2) {// 纵向
351 newWidth = newWidth > images[i].getWidth() ? newWidth : images[i].getWidth();
352 newHeight += images[i].getHeight();
353 }
354 }
355 if (type == 1 && newWidth <1) {
356 return;
357 }
358 if (type == 2 && newHeight <1) {
359 return;
360 }
361 // 生成新图片
362 try {
363 BufferedImage ImageNew = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
364 int height_i = 0;
365 int width_i = 0;
366 for (int i = 0; i ) {
367 if (type == 1) {
368 ImageNew.setRGB(width_i, 0, images[i].getWidth(), newHeight, ImageArrays[i], 0,
369 images[i].getWidth());
370 width_i += images[i].getWidth();
371 } else if (type == 2) {
372 ImageNew.setRGB(0, height_i, newWidth, images[i].getHeight(), ImageArrays[i], 0, newWidth);
373 height_i += images[i].getHeight();
374 }
375 }
376 //输出想要的图片
377 ImageIO.write(ImageNew, targetFile.split("\\.")[1], new File(targetFile));
378
379 } catch (Exception e) {
380 throw new RuntimeException(e);
381 }
382 }
383 }
384
385 作者:卧颜沉默
386 链接:https://www.jianshu.com/p/1466f1ba3275
387 來源:简书
388 著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。