java jsoup 爬取1688网站信息,详细例子
java jsoup 爬取1688网站信息,详细例子,采用javajsoup

java jsoup 爬取1688网站信息,详细例子
提供附件下载,里面包含了java的jsoup jar包,下载后可以直接使用。看代码:
package com.test;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
* java jsoup 爬取1688网站信息,详细例子
*
* @author 原创 http://www.javakcsj.com/
* @date2018-8-19
*/
public class DoGet {
public static void main(String[] args) throws IOException {
DoGet t = new DoGet();
t.beginOne();
// t.beginTwo();
}
public void beginOne() throws IOException {
String[] list = new String[] { "https://weiqufood.1688.com/page/offerlist.htm?spm=a2615.2177701.0.0.3ce1bb3dwMLc39"
};
for (int j = 0; j < list.length; j++) {
Document doc = Jsoup
.connect(list[j])
.data("query", "Java")
.userAgent(
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)") // 设置
// User-Agent
.timeout(30000).get();
Elements es = doc.select("div.image");
Element e = null;
for (int i = 0; i < es.size(); i++) {
e = es.get(i);
String a = e.select("a").attr("title");
String b = e.select("a").attr("href");
System.out.println(a);
System.out.println(b);
}
}
}
public void beginTwo() throws IOException {
Document doc = Jsoup
.connect(
"https://detail.1688.com/offer/535597877009.html?spm=a2615.7691456.0.0.QyqIYA")
.data("query", "Java")
.userAgent(
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)") // 设置
// User-Agent
.timeout(30000).get();
// Map cookies = null;
// Response res =
// Jsoup.connect("http://www.chengmi.com/shanghai").data("query",
// "Java")
// .userAgent("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)")
// // 设置 User-Agent
// .timeout(30000).execute();
// cookies = res.cookies();
// System.out.println(cookies);
// System.out.println(doc.toString());
// if(1==1){
// return;
// }
Elements es = doc.select("tr.price").select("span.value");
Element e = null;
// 获取到的价格 三个
String jiage = "";
String shuliangfw = "";
for (int i = 0; i < es.size(); i++) {
e = es.get(i);
String price = e.html();
System.out.println(price);
jiage += "@" + price;
}
// 价格对应的数量
es = doc.select("tr.amount").select("span.value");
for (int i = 0; i < es.size(); i++) {
e = es.get(i);
String amount = e.html();
System.out.println(amount);
shuliangfw += "@" + amount;
}
System.out.println("--" + jiage);
System.out.println("--" + shuliangfw);
// 商品种类和可售数量
String guige = "";
String shuliang = "";
es = doc.select("div.d-content");
for (int i = 0; i < es.size(); i++) {
e = es.get(i);
String abc = "";
Elements ess = e.select("td.name").select("span");// 包装规格,用来批示多少包起售
for (int j = 0; j < ess.size(); j++) {
Element e3 = ess.get(j);
if (j == 0) {
guige += "" + e3.html();
abc += "" + e3.html();
} else {
guige += "@" + e3.html();
abc += "@" + e3.html();
}
}
if (abc == null || "".equals(abc)) {
continue;
}
Elements ess2 = e.select("td.count");// 包装规格,用来批示多少包起售
for (int j = 0; j < ess2.size(); j++) {
Element e3 = ess2.get(j);
if (j == 0) {
shuliang += "" + e3.select("em.value").html();
} else {
shuliang += "@" + e3.select("em.value").html();
}
}
}
System.out.println("规格:" + guige);
System.out.println("数量:" + shuliang);
// 详细参数
es = doc.select("div.offerdetail_ditto_attributes");
e = es.get(0);
String table = e.select("table").html();
System.out.println(table);// 详细信息表格
es = doc.select("div.tab-content-container").select("img");
// 轮播图照片
String img1 = "";
String img2 = "";
String img3 = "";
String img4 = "";
String img5 = "";
for (int i = 0; i < es.size(); i++) {
e = es.get(i);
String url = e.attr("src");
url = url.replace(".60x60", ".400x400");
if (i == 0) {
img1 = url;
} else if (i == 1) {
img2 = url;
} else if (i == 2) {
img3 = url;
} else if (i == 3) {
img4 = url;
} else if (i == 4) {
img5 = url;
}
System.out.println(url);
}
// es = doc.select("div.desc-lazyload-container").select("img");
// System.out.println(es.size());
// 获取异步加载url
String lazyUrl = doc.select("div.desc-lazyload-container").attr(
"data-tfs-url");
System.out.println(lazyUrl);
}
// 模拟异步调用
public Document moni(String lazyUrl) throws IOException {
Document doc = Jsoup
.connect(lazyUrl)
.ignoreContentType(true)
.data("query", "Java")
.userAgent(
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)") // 设置
// User-Agent
.timeout(30000).get();
return doc;
}
public static void done(String url, String yeshu, String timesId)
throws IOException {
int size = Integer.parseInt(yeshu);
int a = url.indexOf("&offset=");
String aa = url.substring(0, a);
for (int i = 1; i < size; i++) {
aa = aa + "&page=" + i + "&click=0";
parseUrl(aa, timesId);
}
}
public static void parseUrl(String aa, String timesId) throws IOException {
Document doc = Jsoup
.connect(aa)
.data("query", "Java")
.userAgent(
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)") // 设置
// User-Agent
.timeout(30000).get();
Elements es = doc.select("div.gl-i-wrap");
String sql = "insert into t_one(v1,v2,v3,timesId) values(?,?,?,?);";
Element e = null;
for (int i = 0; i < es.size(); i++) {
e = es.get(i);
String a = e.select("div.p-name").select("a").attr("title");
String b = e.select("div.p-price").select("strong")
.attr("data-price");
String c = e.select("div.p-commit").select("a").text();
System.out.println(a + "*************" + b + "****" + c);
// System.out.println("-----------------------------------------");
}
}
}
大家都在看
java批量压缩文件

利用zip工具,批量将文件夹或者文件压缩,提供java源代码下载(java课程设计网原创)...查看更多
java图片切割

java课程设计之java图片切割,提供java源代码下载(java课程设计网原创)...查看更多
在图片上加水印,支持字体样式,位置,大小,颜色

在图片上加水印,支持字体样式,位置,大小,颜色...查看更多
java_jsoup爬虫获取百度新闻接口

java_jsoup爬虫获取百度新闻接口...查看更多
发送邮件客户端,带发送附件

java课程设计之发送邮件客户端,带发送附件,提供java源代码下载(java课程设计网原创)...查看更多
java经纬度算两点之间的距离

java课程设计之java经纬度算两点之间的距离,提供java源代码下载(java课程设计网原创)...查看更多
java jsoup 爬取1688网站信息,详细例子

java jsoup 爬取1688网站信息,详细例子,采用javajsoup...查看更多
java对mp3的播放功能

java课程设计之java对mp3的播放功能,提供java源代码下载(java课程设计网原创)...查看更多
(0) 回复