java课程设计不会做?

java课程设计网提供java相关课程设计的学习方法以及java案例、javaweb案例、java课程设计的源代码、题目、报告论文、操作步骤等,可供您免费下载学习,您可以根据您的需求查询您要的java课程设计案例或者java课程设计报告、论文,试试吧!

java jsoup 爬取1688网站信息,详细例子

java jsoup 爬取1688网站信息,详细例子,采用javajsoup

java jsoup 爬取1688网站信息,详细例子

java jsoup 爬取1688网站信息,详细例子

提供附件下载,里面包含了java的jsoup jar包,下载后可以直接使用。看代码:

 


package com.test;

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * java jsoup 爬取1688网站信息,详细例子
 * 
 * @author 原创 http://www.javakcsj.com/
 * @date2018-8-19
 */
public class DoGet {

	public static void main(String[] args) throws IOException {
		DoGet t = new DoGet();

		t.beginOne();
		// t.beginTwo();
	}

	public void beginOne() throws IOException {
		String[] list = new String[] { "https://weiqufood.1688.com/page/offerlist.htm?spm=a2615.2177701.0.0.3ce1bb3dwMLc39"

		};
		for (int j = 0; j < list.length; j++) {
			Document doc = Jsoup
					.connect(list[j])
					.data("query", "Java")
					.userAgent(
							"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)") // 设置
																														// User-Agent
					.timeout(30000).get();
			Elements es = doc.select("div.image");
			Element e = null;
			for (int i = 0; i < es.size(); i++) {
				e = es.get(i);
				String a = e.select("a").attr("title");
				String b = e.select("a").attr("href");
				System.out.println(a);
				System.out.println(b);
			}
		}
	}

	public void beginTwo() throws IOException {

		Document doc = Jsoup
				.connect(
						"https://detail.1688.com/offer/535597877009.html?spm=a2615.7691456.0.0.QyqIYA")
				.data("query", "Java")
				.userAgent(
						"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)") // 设置
																													// User-Agent
				.timeout(30000).get();
		// Map cookies = null;
		// Response res =
		// Jsoup.connect("http://www.chengmi.com/shanghai").data("query",
		// "Java")
		// .userAgent("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)")
		// // 设置 User-Agent
		// .timeout(30000).execute();
		// cookies = res.cookies();
		// System.out.println(cookies);
		// System.out.println(doc.toString());
		// if(1==1){
		// return;
		// }
		Elements es = doc.select("tr.price").select("span.value");

		Element e = null;
		// 获取到的价格 三个
		String jiage = "";
		String shuliangfw = "";
		for (int i = 0; i < es.size(); i++) {
			e = es.get(i);
			String price = e.html();
			System.out.println(price);
			jiage += "@" + price;
		}
		// 价格对应的数量
		es = doc.select("tr.amount").select("span.value");
		for (int i = 0; i < es.size(); i++) {
			e = es.get(i);
			String amount = e.html();
			System.out.println(amount);
			shuliangfw += "@" + amount;
		}
		System.out.println("--" + jiage);
		System.out.println("--" + shuliangfw);
		// 商品种类和可售数量
		String guige = "";
		String shuliang = "";
		es = doc.select("div.d-content");
		for (int i = 0; i < es.size(); i++) {
			e = es.get(i);
			String abc = "";
			Elements ess = e.select("td.name").select("span");// 包装规格,用来批示多少包起售
			for (int j = 0; j < ess.size(); j++) {
				Element e3 = ess.get(j);
				if (j == 0) {
					guige += "" + e3.html();
					abc += "" + e3.html();
				} else {
					guige += "@" + e3.html();
					abc += "@" + e3.html();
				}
			}
			if (abc == null || "".equals(abc)) {
				continue;
			}

			Elements ess2 = e.select("td.count");// 包装规格,用来批示多少包起售
			for (int j = 0; j < ess2.size(); j++) {
				Element e3 = ess2.get(j);
				if (j == 0) {
					shuliang += "" + e3.select("em.value").html();
				} else {
					shuliang += "@" + e3.select("em.value").html();
				}
			}
		}
		System.out.println("规格:" + guige);
		System.out.println("数量:" + shuliang);
		// 详细参数
		es = doc.select("div.offerdetail_ditto_attributes");
		e = es.get(0);
		String table = e.select("table").html();
		System.out.println(table);// 详细信息表格

		es = doc.select("div.tab-content-container").select("img");
		// 轮播图照片
		String img1 = "";
		String img2 = "";
		String img3 = "";
		String img4 = "";
		String img5 = "";
		for (int i = 0; i < es.size(); i++) {
			e = es.get(i);
			String url = e.attr("src");
			url = url.replace(".60x60", ".400x400");
			if (i == 0) {
				img1 = url;
			} else if (i == 1) {
				img2 = url;
			} else if (i == 2) {
				img3 = url;
			} else if (i == 3) {
				img4 = url;
			} else if (i == 4) {
				img5 = url;
			}
			System.out.println(url);
		}
		// es = doc.select("div.desc-lazyload-container").select("img");
		// System.out.println(es.size());
		// 获取异步加载url
		String lazyUrl = doc.select("div.desc-lazyload-container").attr(
				"data-tfs-url");
		System.out.println(lazyUrl);

	}

	// 模拟异步调用
	public Document moni(String lazyUrl) throws IOException {
		Document doc = Jsoup
				.connect(lazyUrl)
				.ignoreContentType(true)
				.data("query", "Java")
				.userAgent(
						"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)") // 设置
																													// User-Agent
				.timeout(30000).get();
		return doc;
	}

	public static void done(String url, String yeshu, String timesId)
			throws IOException {
		int size = Integer.parseInt(yeshu);
		int a = url.indexOf("&offset=");
		String aa = url.substring(0, a);
		for (int i = 1; i < size; i++) {
			aa = aa + "&page=" + i + "&click=0";
			parseUrl(aa, timesId);
		}
	}

	public static void parseUrl(String aa, String timesId) throws IOException {
		Document doc = Jsoup
				.connect(aa)
				.data("query", "Java")
				.userAgent(
						"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; BIDUBrowser 2.x)") // 设置
																													// User-Agent
				.timeout(30000).get();
		Elements es = doc.select("div.gl-i-wrap");
		String sql = "insert into t_one(v1,v2,v3,timesId) values(?,?,?,?);";
		Element e = null;
		for (int i = 0; i < es.size(); i++) {
			e = es.get(i);
			String a = e.select("div.p-name").select("a").attr("title");
			String b = e.select("div.p-price").select("strong")
					.attr("data-price");
			String c = e.select("div.p-commit").select("a").text();
			System.out.println(a + "*************" + b + "****" + c);
			// System.out.println("-----------------------------------------");
		}
	}

}


大家都在看

(0) 回复

留言评论

*请勿填写与java课程设计无关内容,如内容中包含有关政治、色情等不健康内容,且违反《中华人民共和国网络安全法》本站将第一时间移交公安部门。