提交 d6d86265 编写于 作者: 亦蔚然's avatar 亦蔚然

对代码进行重构

上级 583c9698
......@@ -82,4 +82,8 @@
- 算法
- DFS 深度优先算法
- BFS 广度优先
- 重构
- 短方法:
- a.便于人脑理解
- b.越短越容易复用
- c.对于Java来说可以方便的对方法进行覆盖
......@@ -37,50 +37,80 @@ public class Main {
continue;
}
// 判断是否是感兴趣滴内容【新浪站内的网页】
// link.contains("sina.cn") && !link.contains("passport.sina.cn") &&
if ((link.contains("news.sina.cn")) || "https://sina.cn".equals(link)) {
try (CloseableHttpClient httpclient = HttpClients.createDefault()) {
if (link.startsWith("//")) {
link = "https:" + link;
}
HttpGet httpGet = new HttpGet(link);
httpGet.addHeader("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36");
try (CloseableHttpResponse response1 = httpclient.execute(httpGet)) {
System.out.println(response1.getStatusLine());
System.out.println(link);
HttpEntity entity1 = response1.getEntity();
String html = EntityUtils.toString(entity1);
if (isInterestingLink(link)) {
Document doc = httpGetAndParseHtml(link);
// 使用CSS选择器,html中去获取
ArrayList<Element> links = doc.select("a");
links.stream().map(aTag -> aTag.attr("href")).forEach(linkPool::add);
// 假设这是一个新闻的详情页,就存入数据库,否则,就什么都不做
storeIntoDatabaseIfItIsNewsPage(doc);
processedLinks.add(link);
Document doc = Jsoup.parse(html);
// 使用CSS选择器,html中去获取
ArrayList<Element> links = doc.select("a");
for (Element aTag : links) {
// 获取href属性
linkPool.add(aTag.attr("href"));
}
// 假设这是一个新闻的详情页,就存入数据库,否则,就什么都不做
ArrayList<Element> articleTags = doc.select("article");
if (!articleTags.isEmpty()) {
for (Element articleTag : articleTags) {
String titile = articleTags.get(0).child(0).text();
System.out.println(titile);
}
}
processedLinks.add(link);
}
}
} else {
// 不感兴趣
continue;
}
}
// try (CloseableHttpClient httpclient = HttpClients.createDefault()) {
// HttpGet httpGet = new HttpGet("https://sina.cn/");
// try (CloseableHttpResponse response1 = httpclient.execute(httpGet)) {
// System.out.println(response1.getStatusLine());
// HttpEntity entity1 = response1.getEntity();
// System.out.println(EntityUtils.toString(entity1));
// }
// }
}
/*
* 2、将表达不同逻辑的代码抽象为短方法
* 优点:
* a.便于人脑理解
* b.越短越容易复用
* c.对于Java来说可以方便的对方法进行覆盖
*/
// 通过http请求拿到HTML文档
private static Document httpGetAndParseHtml(String link) throws IOException {
try (CloseableHttpClient httpclient = HttpClients.createDefault()) {
if (link.startsWith("//")) {
link = "https:" + link;
}
HttpGet httpGet = new HttpGet(link);
httpGet.addHeader("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36");
try (CloseableHttpResponse response1 = httpclient.execute(httpGet)) {
System.out.println(response1.getStatusLine());
System.out.println(link);
HttpEntity entity1 = response1.getEntity();
String html = EntityUtils.toString(entity1);
return Jsoup.parse(html);
}
}
}
// 若是新闻页面就存到数据库中
private static void storeIntoDatabaseIfItIsNewsPage(Document doc) {
ArrayList<Element> articleTags = doc.select("article");
if (!articleTags.isEmpty()) {
for (Element articleTag : articleTags) {
String titile = articleTags.get(0).child(0).text();
System.out.println(titile);
}
}
}
/*
* 1、将长的判断条件抽取为不同的方法
*/
// 感兴趣的链接
private static boolean isInterestingLink(String link) {
return (isNewsPage(link) || isIndexPage(link) && isNotLoginPage(link));
}
// 首页
private static boolean isIndexPage(String link) {
return "https://sina.cn".equals(link);
}
// 新闻页
private static boolean isNewsPage(String link) {
return link.contains("news.sina.cn");
}
// 登录页
private static boolean isNotLoginPage(String link) {
return !link.contains("passport.sina.cn");
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册