Scripts 学盟

标题: 关于 Java 的 HTTP 抓取 [GET 方式请求] [打印本页]

作者: Alvin    时间: 2011-4-29 22:12:14     标题: 关于 Java 的 HTTP 抓取 [GET 方式请求]

下面是一个简单例子代码,可供参考.
  1. /**
  2. * (#)HttpGet.java    创建时间:Apr 29, 2011 9:36:43 PM<br />
  3. */
  4. package org.iscripts.common.net;

  5. import java.io.BufferedReader;
  6. import java.io.IOException;
  7. import java.io.InputStream;
  8. import java.io.InputStreamReader;
  9. import java.net.URL;
  10. import java.net.URLConnection;
  11. import java.util.regex.Matcher;
  12. import java.util.regex.Pattern;

  13. /**
  14. * @author 林俊海(ialvin.cn) 广东·普宁·里湖
  15. */
  16. public class HttpGet {

  17.   public static void main(String[] args) throws IOException {
  18.     System.out.println(HttpGet.get("http://www.iscripts.org/", "utf-8"));
  19.   }
  20.   
  21.   public static String get(String strURL) throws IOException {
  22.     URLConnection conn = HttpGet.openURL(strURL);
  23.     return HttpGet.read(conn.getInputStream(), HttpGet.getContentEncoding(conn));
  24.   }
  25.   
  26.   public static String get(String strURL, String encoding) throws IOException {
  27.     return HttpGet.read(HttpGet.openURL(strURL).getInputStream(), encoding);
  28.   }
  29.   
  30.   private static URLConnection openURL(String strURL) throws IOException {
  31.     URLConnection conn = new URL(strURL).openConnection();
  32.     conn.connect();
  33.     return conn;
  34.   }
  35.   
  36.   private static String read(InputStream in, String encoding) throws IOException {
  37.     if (encoding == null) encoding = "utf-8";
  38.     BufferedReader reader = new BufferedReader(new InputStreamReader(in, encoding));
  39.     char[] str = new char[4096];
  40.     StringBuilder builder = new StringBuilder();
  41.     for (int len; (len = reader.read(str)) > -1;)
  42.       builder.append(str, 0, len);
  43.     return builder.toString();
  44.   }
  45.   
  46.   private static String getContentEncoding(URLConnection conn) {
  47.     String contentType = conn.getContentType();
  48.     if (contentType == null) return null;
  49.     final Pattern ptnCharset = Pattern.compile("(?i)\\bcharset=([^\\s;]+)");
  50.     Matcher m = ptnCharset.matcher(contentType);
  51.     if (m.find())
  52.       return m.group(1);
  53.     return null;
  54.   }
  55. }
复制代码





欢迎光临 Scripts 学盟 (http://www.iscripts.org/) Powered by Discuz! X2