首页 > 编程知识 正文

实现了几何代数结合的事,讯代理ip怎么提取

时间:2023-05-03 07:13:53 阅读:243616 作者:2949


遇见问题


        嘘嘘,不要让太多的人知道!!!! 

        代理ip是爬虫工资必要的消费,那么如何很好的利用各家服务商提供的免费代理IP呢?


使用方案

       


     讲解一下,就是在爬取之前先到各家服务商哪里爬取最新的可利用代理IP,然后放在ip池里,然后再去爬取目标网站。如此以来,维护好这个ip池,就可以源源不断的接收新的可以使用的代理ip,剩下的工作就是从网上四处寻找代理网站了。


给个demo吧

import com.alibaba.fastjson.JSON;import com.alibaba.fastjson.JSONArray;import com.alibaba.fastjson.JSONObject;import org.apache.http.HttpEntity;import org.apache.http.HttpHost;import org.apache.http.client.methods.CloseableHttpResponse;import org.apache.http.client.methods.HttpGet;import org.apache.http.impl.client.CloseableHttpClient;import org.apache.http.impl.client.HttpClientBuilder;import org.apache.http.impl.client.HttpClients;import org.apache.http.util.EntityUtils;import java.util.ArrayList;import java.util.List;/** * 使用免费代理demo * * Created by zc on 2017/8/11. */public class HttpClientUtilTest { public static void main(String[] args) throws Exception { //第一步,爬取ip代理池 List<ProxyModel> proxyModelList = spriderProxyIp(); String ip = proxyModelList.get(2).getIp(); int port = proxyModelList.get(2).getPort(); System.out.println(ip + " " + port + " " + proxyModelList.get(2).getAnony()); //添加白名单 whilteList(); //请求目标地址 reqWeb(ip, port); } /** * 请求目标地址 * * @param ip 代理Ip * @param port 端口 * @throws Exception 异常 */ private static void reqWeb(String ip, int port) throws Exception { HttpClientBuilder build = HttpClients.custom(); HttpHost proxy = new HttpHost(ip, port); CloseableHttpClient client = build.setProxy(proxy).build(); String url = "http://write.blog.csdn.net/postedit/77099632"; HttpGet request = new HttpGet(url); CloseableHttpResponse response = client.execute(request); HttpEntity entity = response.getEntity(); System.out.println(EntityUtils.toString(entity)); } /** * 添加自己外网ip到讯代理白名单 * * @throws Exception 异常 */ private static void whilteList() throws Exception { String url = "http://www.xdaili.cn/ipagent/whilteList/addIp?spiderId=dce0442efaac42618205f177c2xxxxip=xx.xx.xx.xx"; HttpGet request = new HttpGet(url); CloseableHttpClient client = HttpClients.custom().build(); CloseableHttpResponse response = client.execute(request); System.out.println(response.getStatusLine()); } /** * 爬取讯代理IP池 * * @return ip集合 * @throws Exception 异常 */ private static List<ProxyModel> spriderProxyIp() throws Exception { List<ProxyModel> proxyModelList = new ArrayList<>(); String url = "http://www.xdaili.cn/ipagent//freeip/getFreeIps?page=1&rows=10"; HttpGet request = new HttpGet(url); CloseableHttpClient client = HttpClients.custom().build(); CloseableHttpResponse response = client.execute(request); HttpEntity entity = response.getEntity(); String resTxt = EntityUtils.toString(entity); JSONObject jsonObject = JSON.parseObject(resTxt); JSONArray rows = jsonObject.getJSONArray("rows"); rows.stream().map(v -> (JSONObject) v).filter(v -> v.getString("anony").equals("高匿")).forEach(v -> { ProxyModel model = new ProxyModel(); model.setIp(v.getString("ip")); model.setPort(Integer.parseInt(v.getString("port"))); model.setResponsetime(v.getString("responsetime")); model.setAnony(v.getString("anony")); proxyModelList.add(model); }); return proxyModelList; } public static class ProxyModel { private String ip; private int port; private String responsetime; private String anony; public String getIp() { return ip; } public void setIp(String ip) { this.ip = ip; } public int getPort() { return port; } public void setPort(int port) { this.port = port; } public String getResponsetime() { return responsetime; } public void setResponsetime(String responsetime) { this.responsetime = responsetime; } public String getAnony() { return anony; } public void setAnony(String anony) { this.anony = anony; } }



版权声明:该文观点仅代表作者本人。处理文章:请发送邮件至 三1五14八八95#扣扣.com 举报,一经查实,本站将立刻删除。