提交 d6d86265 编写于 作者: 亦蔚然's avatar 亦蔚然

对代码进行重构

上级 583c9698
...@@ -82,4 +82,8 @@ ...@@ -82,4 +82,8 @@
- 算法 - 算法
- DFS 深度优先算法 - DFS 深度优先算法
- BFS 广度优先 - BFS 广度优先
- 重构
- 短方法:
- a.便于人脑理解
- b.越短越容易复用
- c.对于Java来说可以方便的对方法进行覆盖
...@@ -37,50 +37,80 @@ public class Main { ...@@ -37,50 +37,80 @@ public class Main {
continue; continue;
} }
// 判断是否是感兴趣滴内容【新浪站内的网页】 // 判断是否是感兴趣滴内容【新浪站内的网页】
// link.contains("sina.cn") && !link.contains("passport.sina.cn") && if (isInterestingLink(link)) {
if ((link.contains("news.sina.cn")) || "https://sina.cn".equals(link)) { Document doc = httpGetAndParseHtml(link);
try (CloseableHttpClient httpclient = HttpClients.createDefault()) { // 使用CSS选择器,html中去获取
if (link.startsWith("//")) { ArrayList<Element> links = doc.select("a");
link = "https:" + link; links.stream().map(aTag -> aTag.attr("href")).forEach(linkPool::add);
} // 假设这是一个新闻的详情页,就存入数据库,否则,就什么都不做
HttpGet httpGet = new HttpGet(link); storeIntoDatabaseIfItIsNewsPage(doc);
httpGet.addHeader("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"); processedLinks.add(link);
try (CloseableHttpResponse response1 = httpclient.execute(httpGet)) {
System.out.println(response1.getStatusLine());
System.out.println(link);
HttpEntity entity1 = response1.getEntity();
String html = EntityUtils.toString(entity1);
Document doc = Jsoup.parse(html);
// 使用CSS选择器,html中去获取
ArrayList<Element> links = doc.select("a");
for (Element aTag : links) {
// 获取href属性
linkPool.add(aTag.attr("href"));
}
// 假设这是一个新闻的详情页,就存入数据库,否则,就什么都不做
ArrayList<Element> articleTags = doc.select("article");
if (!articleTags.isEmpty()) {
for (Element articleTag : articleTags) {
String titile = articleTags.get(0).child(0).text();
System.out.println(titile);
}
}
processedLinks.add(link);
}
}
} else { } else {
// 不感兴趣 // 不感兴趣
continue; continue;
} }
} }
// try (CloseableHttpClient httpclient = HttpClients.createDefault()) {
// HttpGet httpGet = new HttpGet("https://sina.cn/");
// try (CloseableHttpResponse response1 = httpclient.execute(httpGet)) {
// System.out.println(response1.getStatusLine());
// HttpEntity entity1 = response1.getEntity();
// System.out.println(EntityUtils.toString(entity1));
// }
// }
} }
/*
* 2、将表达不同逻辑的代码抽象为短方法
* 优点:
* a.便于人脑理解
* b.越短越容易复用
* c.对于Java来说可以方便的对方法进行覆盖
*/
// 通过http请求拿到HTML文档
private static Document httpGetAndParseHtml(String link) throws IOException {
try (CloseableHttpClient httpclient = HttpClients.createDefault()) {
if (link.startsWith("//")) {
link = "https:" + link;
}
HttpGet httpGet = new HttpGet(link);
httpGet.addHeader("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36");
try (CloseableHttpResponse response1 = httpclient.execute(httpGet)) {
System.out.println(response1.getStatusLine());
System.out.println(link);
HttpEntity entity1 = response1.getEntity();
String html = EntityUtils.toString(entity1);
return Jsoup.parse(html);
}
}
}
// 若是新闻页面就存到数据库中
private static void storeIntoDatabaseIfItIsNewsPage(Document doc) {
ArrayList<Element> articleTags = doc.select("article");
if (!articleTags.isEmpty()) {
for (Element articleTag : articleTags) {
String titile = articleTags.get(0).child(0).text();
System.out.println(titile);
}
}
}
/*
* 1、将长的判断条件抽取为不同的方法
*/
// 感兴趣的链接
private static boolean isInterestingLink(String link) {
return (isNewsPage(link) || isIndexPage(link) && isNotLoginPage(link));
}
// 首页
private static boolean isIndexPage(String link) {
return "https://sina.cn".equals(link);
}
// 新闻页
private static boolean isNewsPage(String link) {
return link.contains("news.sina.cn");
}
// 登录页
private static boolean isNotLoginPage(String link) {
return !link.contains("passport.sina.cn");
}
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册