作者:未来不是梦r | 来源:互联网 | 2024-11-28 17:04
在开发Android应用程序时,有时需要从互联网上抓取特定网站的信息或图片。下面的代码示例展示了如何使用Java实现这一功能。该代码首先定义了一个名为GetContentPicture
的类,该类包含了从网页获取HTML内容、解析HTML以提取图片链接以及根据这些链接下载图片的方法。
示例代码
package cn.mypic;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class GetContentPicture {
private int fileCount;
public GetContentPicture() {
FileNumber fileNumber = new FileNumber();
fileCount = fileNumber.readFromFile();
}
public void downloadImage(String imageUrl) {
try {
URL url = new URL(imageUrl);
BufferedInputStream in = new BufferedInputStream(url.openStream());
File imageFile = new File("d:/image/" + fileCount + imageUrl.substring(imageUrl.lastIndexOf(".")));
FileOutputStream fos = new FileOutputStream(imageFile);
byte[] buffer = new byte[1024];
int read;
while ((read = in.read(buffer, 0, 1024)) != -1) {
fos.write(buffer, 0, read);
}
fos.close();
in.close();
fileCount++;
new FileNumber().writeToFile(fileCount);
System.out.println("图片下载完成:" + imageUrl);
} catch (Exception e) {
e.printStackTrace();
}
}
public String fetchWebContent(String webUrl) throws IOException {
StringBuilder cOntent= new StringBuilder();
URL url = new URL(webUrl);
BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream()));
String line;
while ((line = reader.readLine()) != null) {
content.append(line);
}
reader.close();
return content.toString();
}
public void extractAndDownloadImages(String webUrl) throws IOException {
String webCOntent= fetchWebContent(webUrl);
Pattern pattern = Pattern.compile("(?i)(src|background)=(['"])?/?(\S+(?:jpg|jpeg|png|gif))(['"])?");
Matcher matcher = pattern.matcher(webContent);
while (matcher.find()) {
String imgPath = matcher.group(3);
if (!imgPath.startsWith("http")) {
imgPath = webUrl + imgPath;
}
downloadImage(imgPath);
}
}
public static void main(String[] args) throws IOException {
String targetUrl = "http://example.com";
GetContentPicture gcp = new GetContentPicture();
gcp.extractAndDownloadImages(targetUrl);
}
}
// 文件操作辅助类
public class FileNumber {
public void writeToFile(int number) {
try (FileOutputStream fos = new FileOutputStream(new File("d:/image/number.txt"));
DataOutputStream dos = new DataOutputStream(fos)) {
dos.writeInt(number);
} catch (IOException e) {
e.printStackTrace();
}
}
public int readFromFile() {
try (FileInputStream fis = new FileInputStream(new File("d:/image/number.txt"));
DataInputStream dis = new DataInputStream(fis)) {
return dis.readInt();
} catch (IOException e) {
e.printStackTrace();
}
return 0;
}
}