相关文章推荐
单身的麦片  ·  R ...·  1 年前    · 
爱看书的钥匙  ·  overflow:hidden属性 - ...·  1 年前    · 
深情的水桶  ·  FreeCAD does not run ...·  1 年前    · 
import java.io.BufferedInputStream; import java.io.InputStream; import java.net.HttpURLConnection; import java.util.ArrayList; import java.util.List; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.Page; import com.gargoylesoftware.htmlunit.ProxyConfig; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.WebResponse; import com.gargoylesoftware.htmlunit.html.HtmlPage; import com.gargoylesoftware.htmlunit.util.NameValuePair; * 这个DEMO主要为了测试爬虫(动态)代理IP的稳定性 * 完美支持企业信息天眼查、电商Ebay、亚马逊、新浪微博、法院文书、分类信息等 * 也可以作为爬虫参考项目,如需使用,请自行修改webParseHtml方法 public class TestDynamicIpContinue { public static List ipList = new ArrayList<>(); public static boolean gameOver = false; public static void main(String[] args) throws Exception { // 每隔几秒提取一次IP long fetchIpSeconds = 5; int testTime = 3; // 请填写无忧代理IP订单号,填写之后才可以提取到IP哦 String order = "88888888888888888888888888888"; // 你要抓去的目标网址 // 企业信息天眼查 http://www.tianyancha.com/company/1184508115 // 企业信息工商系统 http://www.gsxt.gov.cn/%7BLtkX_Us_Uuw_QRrZ9mfv2cbf8ANpkJNT8_EzigHHLIvfwbsXfxY0o15JwumCNmvtm_nv9Wtm2Iy_ptgrdpD7p-dP6C8an4IYel_Bx4EnhQhxk8Q4jptLj9IMw9N0lCP-4i0Q4MN55e0wtKOgDy4GEw-1493711400352%7D // 电商Ebay http://www.ebay.com/sch/tenco-tech/m.html?_ipg=200&_sop=12&_rdc=1 // 电商天猫 https://list.tmall.com/search_product.htm?cat=56594003&brand=97814105&sort=s&style=g&search_condition=23&from=sn_1_cat&industryCatId=50025174#J_crumbs // 电商京东 https://search.jd.com/Search?keyword=%E8%8B%8F%E6%89%93%E7%B2%89&enc=utf-8&suggest=1.def.0.T15&wq=s%27d%27f&pvid=1d962d789b81461aa6cce40b26a90429 // IP检测 http://ip.chinaz.com/getip.aspx // 匿名度检测 http://www.xxorg.com/tools/checkproxy/ // 新浪微博 https://m.weibo.cn/api/container/getIndex?containerid=100103type%3D3%26q%3D%E6%B1%BD%E8%BD%A6&queryVal=%E6%B1%BD%E8%BD%A6&type=user&page=2 // 法院文书 https://m.itslaw.com/mobile // 分类信息百姓网 http://china.baixing.com/cheliang/ String targetUrl = "http://pv.sohu.com/cityjson?ie=utf-8"; // 设置referer信息,如果抓取淘宝、天猫需要设置 String referer = ""; // 开启对https的支持 boolean https = true; // 是否输出Header信息 boolean outputHeaderInfo = false; // 是否加载JS,加载JS会导致速度变慢 boolean useJS = false; // 请求超时时间,单位毫秒,默认5秒 int timeOut = 10000; if (order == null || "".equals(order)) { System.err.println("请输入爬虫(动态)代理订单号"); return; System.out.println(">>>>>>>>>>>>>>动态IP测试开始<<<<<<<<<<<<<>>>>>>>>>>>>>动态IP测试结束<<<<<<<<<<<<< headers = response.getResponseHeaders(); for (NameValuePair nameValuePair : headers) { System.out.println(nameValuePair.getName() + "-->" + nameValuePair.getValue()); boolean isJson = false ; if (response.getContentType().equals("application/json")) { html = response.getContentAsString(); isJson = true ; }else if(page.isHtmlPage()){ html = ((HtmlPage)page).asXml(); long endMs = System.currentTimeMillis(); if (url.indexOf("2017.ip138.com") != -1) { System.out.println(getName() + " " + ipport + " 用时 " + (endMs - startMs) + "毫秒 :" + Jsoup.parse(html).select("center").text()); }else if(url.equals("http://www.xxorg.com/tools/checkproxy/")) { System.out.println(getName() + " " + ipport + " 用时 " + (endMs - startMs) + "毫秒 :" + Jsoup.parse(html).select("#result .jiacu").text()); }else if(isJson) { System.out.println(getName() + " " + ipport + " 用时 " + (endMs - startMs) + "毫秒 :" +html); }else if(url.indexOf("tianyancha.com") != -1) { Document doc = Jsoup.parse(html); Elements els = doc.select(".c8"); System.out.println(getName() + "企业基本信息:"); for (Element element : els) { System.out.println("\t*" + element.text()); els = doc.select(".companyInfo-table tr"); System.out.println(getName() + "企业股东信息:"); for (Element element : els) { System.out.println("\t*" + element.text()); els = doc.select("#_container_check tr"); System.out.println(getName() + "企业抽查息:"); for (Element element : els) { System.out.println("\t*" + element.text()); }else{ Document doc = Jsoup.parse(html); System.out.println(getName() + " " + ipport + " 用时 " + (endMs - startMs) + "毫秒 :" + doc.select("title").text()); } catch (Exception e) { System.err.println(ipport + ":" + e.getMessage()); } finally { client.close(); return html; // 定时获取动态IP public class GetIP implements Runnable{ long sleepMs = 1000; int maxTime = 3; String order = ""; String targetUrl; boolean useJs; int timeOut; String referer; boolean https; boolean outputHeaderInfo; public GetIP(long sleepMs, int maxTime, String order, String targetUrl, boolean useJs, int timeOut, String referer, boolean https, boolean outputHeaderInfo) { this.sleepMs = sleepMs; this.maxTime = maxTime; this.order = order; this.targetUrl = targetUrl; this.useJs = useJs; this.timeOut = timeOut; this.referer=referer; this.https=https; this.outputHeaderInfo=outputHeaderInfo; @Override public void run() { int time = 1; while(!gameOver){ if(time >= 4){ gameOver = true; break; try { java.net.URL url = new java.net.URL("http://api.ip.data5u.com/dynamic/get.html?order=" + order + "&ttl&random=true"); HttpURLConnection connection = (HttpURLConnection)url.openConnection(); connection.setConnectTimeout(3000); connection = (HttpURLConnection)url.openConnection(); InputStream raw = connection.getInputStream(); InputStream in = new BufferedInputStream(raw); byte[] data = new byte[in.available()]; int bytesRead = 0; int offset = 0; while(offset < data.length) { bytesRead = in.read(data, offset, data.length - offset); if(bytesRead == -1) { break; offset += bytesRead; in.close(); raw.close(); String[] res = new String(data, "UTF-8").split("\n"); System.out.println(">>>>>>>>>>>>>>当前返回IP量 " + res.length); for (String ip : res) { new Crawler(100, targetUrl, useJs, timeOut, ip, referer, https, outputHeaderInfo).start(); } catch (Exception e) { System.err.println(">>>>>>>>>>>>>>获取IP出错, " + e.getMessage()); try { Thread.sleep(sleepMs); } catch (InterruptedException e) { e.printStackTrace(); public String joinList(List list){ StringBuilder re = new StringBuilder(); for (String string : list) { re.append(string).append(","); return re.toString(); public String trim(String html) { if (html != null) { return html.replaceAll(" ", "").replaceAll("\n", ""); return null; import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.Authenticator; import java.net.HttpURLConnection; import java.net.InetSocketAddress; import java.net.PasswordAuthentication; import java.net.Proxy; import java.net.URL; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLSession; * 测试无忧代理动态转发代理,本段代码支持请求HTTP和HTTPS协议的网址,比如http://www.example.com、https://www.example.com * @author www.data5u.com public class TestHttps { private static void trustAllHttpsCertificates() throws Exception { javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1]; javax.net.ssl.TrustManager tm = new miTM(); trustAllCerts[0] = tm; javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext.getInstance("TLS"); sc.init(null, trustAllCerts, null); javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory()); static class miTM implements javax.net.ssl.TrustManager, javax.net.ssl.X509TrustManager { public java.security.cert.X509Certificate[] getAcceptedIssuers() { return null; public boolean isServerTrusted(java.security.cert.X509Certificate[] certs) { return true; public boolean isClientTrusted(java.security.cert.X509Certificate[] certs) { return true; public void checkServerTrusted(java.security.cert.X509Certificate[] certs, String authType) throws java.security.cert.CertificateException { return; public void checkClientTrusted(java.security.cert.X509Certificate[] certs, String authType) throws java.security.cert.CertificateException { return; public static void main(String[] args) { // 如果爬虫请求HTTPS网址,必须加入这两行 System.setProperty("jdk.http.auth.proxying.disabledSchemes", ""); System.setProperty("jdk.http.auth.tunneling.disabledSchemes", ""); // 固定为tunnel.data5u.com:56789 final String httpsIpport = "tunnel.data5u.com:56789"; final String order = "【把这里换成你的IP提取码】"; // 用户名 final String pwd = "【把这里换成你的动态转发密码】"; // 密码 final String targetUrl = "http://myip.ipip.net/"; // 要抓取的目标网址 int requestTime = 5; for(int i = 0; i < requestTime; i++) { final int x = i; new Thread(new Runnable() { @Override public void run() { try { long startTime = System.currentTimeMillis(); // 如果爬虫请求HTTPS网址,必须加入这两行 System.setProperty("jdk.http.auth.proxying.disabledSchemes", ""); System.setProperty("jdk.http.auth.tunneling.disabledSchemes", ""); // 信任所有证书,当请求HTTPS网址时需要 // 该部分必须在获取connection前调用 trustAllHttpsCertificates(); HttpsURLConnection.setDefaultHostnameVerifier(new HostnameVerifier() { public boolean verify(String urlHostName, SSLSession session) { return true; URL link = new URL(targetUrl); Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress((httpsIpport.split(":"))[0], Integer.parseInt((httpsIpport.split(":"))[1]))); HttpURLConnection connection = (HttpURLConnection)link.openConnection(proxy); // Java系统自带的鉴权模式,请求HTTPS网址时需要 Authenticator.setDefault(new Authenticator() { public PasswordAuthentication getPasswordAuthentication() { return new PasswordAuthentication(order, pwd.toCharArray()); connection.setRequestMethod("GET"); connection.setDoInput(true); connection.setDoOutput(true); connection.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"); connection.setUseCaches(false); connection.setConnectTimeout(60000); connection.connect(); String line = null; StringBuilder html = new StringBuilder(); BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8")); while((line = reader.readLine()) != null){ html.append(line); try { if (reader != null) { reader.close(); } catch (Exception e) { connection.disconnect(); long endTime = System.currentTimeMillis(); System.out.println(x + " [OK]" + "→→→→→" + targetUrl + " " + (endTime - startTime) + "ms " + connection.getResponseCode() + " " + html.toString()); } catch (Exception e) { e.printStackTrace(); System.err.println(x + " [ERR]" + "→→→→→" + e.getMessage()); }).start(); import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.InetSocketAddress; import java.net.Proxy; import java.net.URL; import java.net.Authenticator; import java.net.PasswordAuthentication; /**因Socks5代理需要密码验证,所以本DEMO带验证逻辑**/ public class Socks5IpDemo { /**内置的密码验证类**/ class BasicAuthenticator extends Authenticator { String userName; String password; public BasicAuthenticator(String userName, String password) { this.userName = userName; this.password = password; @Override protected PasswordAuthentication getPasswordAuthentication() { return new PasswordAuthentication(userName, password.toCharArray()); public static void main(String[] args) { try { String targetUrl = "http://pv.sohu.com/cityjson?ie=utf-8"; HttpURLConnection connection = null; URL link = new URL(targetUrl); // 这个IP要换 成可用的IP哦,这里案例只是随便写的一个IP String ipport = "218.26.204.66:8080"; String charset = "UTF-8"; // 设置代理 Proxy proxy = new Proxy(Proxy.Type.SOCKS, new InetSocketAddress((ipport.split(":"))[0], Integer.parseInt((ipport.split(":"))[1]))); connection = (HttpURLConnection)link.openConnection(proxy); // 密码验证,用户名和密码要改为正确的哦 Authenticator.setDefault(new BasicAuthenticator("data5u", "123321")); connection.setDoOutput(true); connection.setRequestProperty("User-agent", ""); connection.setRequestProperty("Accept", "*/*"); connection.setRequestProperty("Accept-Charset", charset); connection.setRequestProperty("Referer", ""); connection.setRequestProperty("Upgrade-Insecure-Requests", "1"); connection.setRequestProperty("Cookie", ""); connection.setUseCaches(false); connection.setReadTimeout(10000); int rcode = connection.getResponseCode(); if (rcode != 200) { System.out.println("使用代理IP连接网络失败,状态码:" + connection.getResponseCode()); }else { String line = null; StringBuilder html = new StringBuilder(); BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream(), charset)); while((line = reader.readLine()) != null){ html.append(line); try { if (reader != null) { reader.close(); } catch (Exception e) { System.out.println("请求" + targetUrl + ", 得到如下信息:"); System.out.println(html.toString()); } catch (Exception e) { System.err.println("发生异常:" + e.getMessage());
  • Https代理IP是什么?适用范围有哪些?
  • ① Https代理IP是数据无忧_无忧代理IP_DATA5U提供的高效、稳定的代理IP,具体参考 动态代理IP
  • ② Https代理IP,IP有效期最长60秒。
  • ③ Https代理IP适用于做数据爬虫、大数据业务。
  • 数据无忧_无忧代理IP_DATA5U·专业的代理IP服务商 电话:18210476952
    备案号: 京ICP备16045418号 ICP经营许可:京B2-20192105     国内互联网虚拟专用网业务许可:B1-20200383
    声明:本站资源仅限用来计算机技术学习研究,所有IP都是中国大陆(内地)的机房IP,不支持访问国外网站。

    京公网安备 11011402011314号