做运动鞋的网站视频,网络策划是什么,网站排名软件 利搜,网站详细设计一、xxxxxx获取指定任务爬取的所有url的接口接口名称#xff1a;xxxxxx获取指定任务爬取的所有url的接口访问链接#xff1a;http://IP:PORT/crwalTask/findUrlExceptionById?ctIdctIdValtimetimeVallimitlimitVal传入参数类型#xff1a;String,int参数内容:返回…一、xxxxxx获取指定任务爬取的所有url的接口接口名称xxxxxx获取指定任务爬取的所有url的接口访问链接http://IP:PORT/crwalTask/findUrlExceptionById?ctIdctIdValtimetimeVallimitlimitVal传入参数类型String,int参数内容:返回类型JSONArray返回内容调用方法Demo1 public static void main(String[] args) throws Exception {2 //爬虫访问接口地址3 String req_url http://192.168.1.105:8080/crwalTask/findUrlExceptionById?ctIdctIdValtimetimeVallimitlimitVal;4 JSONArray jsonArray httpRequest(req_url,ba716af7-105c-481b-bf28-2e9231529947,SelectUtil.time,SelectUtil.number);//2005 System.out.println(jsonArray);6 }78 public class SelectUtil {9 public static final String time 2018-03-05.replaceAll( , );//按时间筛选 格式yyyy-mm-dd或yyyy-mm-dd HH:mm:ss10 public static final int number 162;//查询限制数量11 }12 /**13 * 获取指定任务爬取的所有url信息14 * param req_url 访问指定任务爬取的url的链接地址15 * param ctId 指定的任务Id16 * param time 查询时间17 * param limit 查询限制的条数18 * return19 */20 public static JSONArray httpRequest(String req_url,String ctId,String time,int limit) {21 req_url req_url.replace(ctIdVal,ctId);22 req_url req_url.replace(timeVal,time);23 req_url req_url.replace(limitVal,String.valueOf(limit));24 StringBuffer buffer new StringBuffer();25 JSONArray jsonArray null;26 try {27 URL url new URL(req_url);28 HttpURLConnection httpUrlConn (HttpURLConnection) url.openConnection();2930 httpUrlConn.setDoOutput(false);31 httpUrlConn.setDoInput(true);32 httpUrlConn.setUseCaches(false);3334 httpUrlConn.setRequestMethod(POST);35 httpUrlConn.connect();3637 // 将返回的输入流转换成字符串38 InputStream inputStream httpUrlConn.getInputStream();39 InputStreamReader inputStreamReader new InputStreamReader(inputStream, utf-8);40 BufferedReader bufferedReader new BufferedReader(inputStreamReader);4142 String str null;43 while ((str bufferedReader.readLine()) ! null) {44 buffer.append(str);45 }46 bufferedReader.close();47 inputStreamReader.close();48 // 释放资源49 inputStream.close();50 inputStream null;51 httpUrlConn.disconnect();52 if(.equals(buffer.toString())){53 String exception [\exception\,\查询的记录数超过240\];5455 jsonArray JSONArray.fromObject(exception);56 }else{57 jsonArray JSONArray.fromObject(buffer.toString());58 }59 } catch (Exception e) {60 System.out.println(e.getMessage());61 }6263 return jsonArray;64 }View Code需要的Jar包commons-beanutils-1.9.3.jarcommons-collections-3.2.2.jarcommons-lang-2.6.jarcommons-logging-1.2.jarezmorph-1.0.6.jarjson-lib-2.4-jdk15.jarSql脚本alter table urlpathmapper add exceptionInfo varchar(2048) comment URL运行错误信息alter table urlpathmapper add title varchar(256) comment 爬取标题alter table crawltaskmanage add checkFile varchar(8) comment 文件是否校验0是1否alter table crawltaskmanage add SimHashValue int(8) comment SimHash算法重复度比较值希望与广大网友互动点此进行留言吧