提交 0901951a 编写于 作者: 亦蔚然's avatar 亦蔚然

v4_使用了多线程

上级 a2980969
......@@ -16,34 +16,40 @@ import java.util.ArrayList;
import java.util.stream.Collectors;
public class Crawler {
CrawlerDao dao = new MyBatisCrawlerDao();
public class Crawler extends Thread {
private CrawlerDao dao;
public void run() throws SQLException, IOException {
String link = null;
// 从数据库中加载下一个链接,若能加载到则进行下一个循环
while ((link = dao.getNextLinkThenDelete()) != null) {
// 若链接已经处理过了就跳到下一次循环
if (dao.isLinkProcessed(link)) {
continue;
}
// 判断是否是感兴趣滴内容【新浪站内的网页】
if (isInterestingLink(link)) {
Document doc = httpGetAndParseHtml(link);
// 分析页面url将它们放到即将处理的url池子中去
parseUrlsFromAndStoreIntoDatabase(doc);
storeIntoDatabaseIfItIsNewsPage(doc, link);
dao.insertProcessedLinked(link);
// dao.updataDatabase(link, "insert into LINKS_ALREADY_PROCESSED(link) values (?)");
} else {
// 不感兴趣
continue;
}
}
public Crawler(CrawlerDao dao) {
// 这样每个线程共享同一个链接
this.dao = dao;
}
public static void main(String[] args) throws IOException, SQLException {
new Crawler().run();
@Override
public void run() {
try {
String link;
// 从数据库中加载下一个链接,若能加载到则进行下一个循环
while ((link = dao.getNextLinkThenDelete()) != null) {
// 若链接已经处理过了就跳到下一次循环
if (dao.isLinkProcessed(link)) {
continue;
}
// 判断是否是感兴趣滴内容【新浪站内的网页】
if (isInterestingLink(link)) {
Document doc = httpGetAndParseHtml(link);
// 分析页面url将它们放到即将处理的url池子中去
parseUrlsFromAndStoreIntoDatabase(doc);
storeIntoDatabaseIfItIsNewsPage(doc, link);
dao.insertProcessedLinked(link);
// dao.updataDatabase(link, "insert into LINKS_ALREADY_PROCESSED(link) values (?)");
} else {
// 不感兴趣
continue;
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private void parseUrlsFromAndStoreIntoDatabase(Document doc) throws SQLException {
......
package com.github.weiranyi;
import java.io.IOException;
import java.sql.SQLException;
public class Main {
public static void main(String[] args) throws IOException, SQLException {
CrawlerDao dao = new MyBatisCrawlerDao();
for (int i = 0; i < 5; i++) {
new Crawler(dao).start();
}
}
}
......@@ -26,9 +26,9 @@ public class MyBatisCrawlerDao implements CrawlerDao {
sqlSessionFactory = new SqlSessionFactoryBuilder().build(inputStream);
}
// 获取下一个链接再删除
// 【synchronized转原子操作】获取下一个链接再删除
@Override
public String getNextLinkThenDelete() throws SQLException {
public synchronized String getNextLinkThenDelete() throws SQLException {
// SqlSession openSession(boolean autoCommit);这里设计事务,必须提交才生效,要设置参数为true
try (SqlSession session = sqlSessionFactory.openSession(true)) {
String url = session.selectOne("com.github.weiranyi.MyMapper.selectNextAvailableLink");
......@@ -47,7 +47,6 @@ public class MyBatisCrawlerDao implements CrawlerDao {
}
}
//
@Override
public boolean isLinkProcessed(String link) throws SQLException {
try (SqlSession session = sqlSessionFactory.openSession(true)) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册