提交 9e982de7 编写于 作者: S shi.zeyuan

1.历史数据爬取开发完成,可通过配置指定赛季爬取

上级 3a45f12f
......@@ -100,6 +100,13 @@ func main() {
base.Log.Info("--------数据更新周期结束--------")
time.Sleep(time.Duration(configService.GetSpiderCycleTime()) * time.Minute)
}
case "spiderHistory\n", "spiderHistory":
for {
base.Log.Info("--------开始抓取历史数据--------")
launch.Spider_History()
base.Log.Info("--------结束抓取历史数据--------")
time.Sleep(58 * time.Minute)
}
case "autoleisu\n", "autoleisu":
for {
base.Log.Info("--------发布开始运行--------")
......
......@@ -15,9 +15,7 @@ func main() {
fmt.Println(parse)
fmt.Println(parse2)
//launch.Spider_league()
//launch.Spider_leagueSeason()
launch.Spider_match_his()
launch.Spider_History()
}
[mysql]
url=root:Meta.123@tcp(mysql.io:3306)/foot?charset=utf8
#url=root:awe.FOG5188@tcp(39.108.212.245:3306)/foot?charset=utf8
#url=root:Meta.123@tcp(mysql.io:3306)/foot?charset=utf8
url=root:awe.FOG5188@tcp(39.108.212.245:3306)/foot?charset=utf8
maxIdle=10
maxConn=50
[cookies]
......@@ -23,6 +23,8 @@ cycle_time=48
euro_comp_ids=81,616,104,281,1129
###配置要爬取亚赔的博彩公司ID,使用,间隔,为空或不配置则爬取全部(暂不必,因目前win007提供的新api就是抓取全部)
asia_comp_id=
#配置要抓取的历史数据的赛季年份,使用,间隔
history_season=2018,2019
[analy]
###确认命中次数
hit_count=3
......
......@@ -22,6 +22,8 @@ cycle_time=48
euro_comp_ids=81,616,104,281,1129
###配置要爬取亚赔的博彩公司ID,使用,间隔,为空或不配置则爬取全部(暂不必,因目前win007提供的新api就是抓取全部)
asia_comp_id=
#配置要抓取的历史数据的赛季年份,使用,间隔
history_season=2018,2019
[analy]
###确认命中次数
hit_count=3
......
......@@ -10,7 +10,6 @@ type MatchHisService struct {
mysql.BaseService
}
func (this *MatchHisService) Exist(v *pojo.MatchHis) bool {
has, err := mysql.GetEngine().Table("`t_match_his`").Where(" `Id` = ? ", v.Id).Exist()
if err != nil {
......@@ -25,8 +24,6 @@ func (this *MatchHisService) FindAll() []*pojo.MatchHis {
return dataList
}
func (this *MatchHisService) FindById(matchId string) *pojo.MatchHis {
data := new(pojo.MatchHis)
data.Id = matchId
......@@ -36,3 +33,25 @@ func (this *MatchHisService) FindById(matchId string) *pojo.MatchHis {
}
return data
}
/**
查找未结束的比赛
*/
func (this *MatchHisService) FindBySeason(season string) []*pojo.MatchLast {
sql_build := `
SELECT
la.*
FROM
foot.t_match_his la,
foot.t_league l
WHERE la.LeagueId = l.Id
AND 1=1
`
sql_build = sql_build + " AND la.MatchDate > '" + season + "-00-01 :00:00:00'"
//结果值
dataList := make([]*pojo.MatchLast, 0)
//执行查询
this.FindBySQL(sql_build, &dataList)
return dataList
}
......@@ -2,12 +2,13 @@ package launch
import (
"strconv"
"strings"
"tesou.io/platform/foot-parent/foot-core/common/utils"
"tesou.io/platform/foot-parent/foot-core/module/spider/constants"
"time"
)
func Clean(){
func Clean() {
//清空数据表
//Before_spider_match()
//Before_spider_baseFace()
......@@ -15,8 +16,6 @@ func Clean(){
Before_spider_euroLast()
}
func Spider() {
//记录数据爬取时间
constants.SpiderDateStr = time.Now().Format("2006-01-02 15:04:05")
......@@ -39,7 +38,6 @@ func Spider() {
Spider_euroHis_Incomplete()
}
func Spider_Near() {
//记录数据爬取时间
constants.SpiderDateStr = time.Now().Format("2006-01-02 15:04:05")
......@@ -57,18 +55,27 @@ func Spider_Near() {
Spider_euroHis_near()
}
func Spider_History() {
Spider_league()
Spider_leagueSeason()
//执行抓取比赛数据
//执行抓取比赛欧赔数据
//执行抓取亚赔数据
//执行抓取欧赔历史
Spider_match_his()
Spider_baseFace(false)
Spider_asiaLastNew(false)
Spider_euroLast()
Spider_euroHis()
//再对欧赔数据不完整的比赛进行两次抓取
Spider_euroHis_Incomplete()
}
var seasons []string
season_str := utils.GetVal("spider", "history_season")
if len(season_str) <= 0 {
seasons = []string{"2019"}
} else {
seasons = strings.Split(season_str, ",")
}
for _, v := range seasons {
Spider_match_his(v)
Spider_baseFace_his(v)
Spider_asiaLastNew_his(v)
Spider_euroLast_his(v)
Spider_euroHis_his(v)
}
}
......@@ -3,12 +3,12 @@ package launch
import "tesou.io/platform/foot-parent/foot-spider/module/win007/proc"
//抓取比赛数据
func Spider_match_his() {
func Spider_match_his(season string) {
processer := proc.GetMatchHisProcesser()
// /联赛时间/联赛id_联赛子id_第几轮.htm
//http://m.win007.com/info/fixture/2019-2020/36_0_1.htm
processer.Season = "2019"
processer.Season = season
processer.Startup()
}
......@@ -7,21 +7,20 @@ import (
"tesou.io/platform/foot-parent/foot-spider/module/win007/proc"
)
func Before_spider_baseFace(){
func Before_spider_baseFace() {
//抓取前清空当前比较表
opsService := new(mysql.DBOpsService)
//指定需要清空的数据表
opsService.TruncateTable([]string{"t_b_f_battle","t_b_f_future_event","t_b_f_score"})
opsService.TruncateTable([]string{"t_b_f_battle", "t_b_f_future_event", "t_b_f_score"})
}
//查询标识为win007,且欧赔未抓取的配置数据,指定菠菜公司
func Spider_baseFace(spiderAll bool) {
matchLastService := new(service2.MatchLastService)
var matchLasts []*pojo.MatchLast
if spiderAll{
if spiderAll {
matchLasts = matchLastService.FindAll()
}else{
} else {
matchLasts = matchLastService.FindNotFinished()
}
......@@ -30,7 +29,16 @@ func Spider_baseFace(spiderAll bool) {
processer.Startup()
}
//查询标识为win007,且欧赔未抓取的配置数据,指定菠菜公司
func Spider_baseFace_his(season string) {
matchLastService := new(service2.MatchHisService)
var matchLasts []*pojo.MatchLast
matchLasts = matchLastService.FindBySeason(season)
processer := proc.GetBaseFaceProcesser()
processer.MatchLastList = matchLasts
processer.Startup()
}
func Spider_baseFace_near() {
matchLastService := new(service2.MatchLastService)
......@@ -40,4 +48,3 @@ func Spider_baseFace_near() {
processer.MatchLastList = matchLasts
processer.Startup()
}
......@@ -45,6 +45,16 @@ func Spider_asiaLastNew(spiderAll bool) {
processer.Startup()
}
func Spider_asiaLastNew_his(season string) {
matchLastService := new(service2.MatchHisService)
var matchLasts []*pojo.MatchLast
matchLasts = matchLastService.FindBySeason(season)
processer := proc.GetAsiaLastNewProcesser()
processer.MatchLastList = matchLasts
processer.Startup()
}
func Spider_asiaLastNew_near() {
matchLastService := new(service2.MatchLastService)
matchLasts := matchLastService.FindNear()
......
......@@ -2,6 +2,7 @@ package launch
import (
"strings"
"tesou.io/platform/foot-parent/foot-api/module/match/pojo"
"tesou.io/platform/foot-parent/foot-core/common/base/service/mysql"
"tesou.io/platform/foot-parent/foot-core/common/utils"
"tesou.io/platform/foot-parent/foot-core/module/elem/service"
......@@ -33,7 +34,7 @@ func Spider_euroLast() {
//为空会抓取所有,这里没有必要配置所有的波菜公司ID
compService := new(service.CompService)
compIds = compService.FindEuroIds()
}else{
} else {
compIds = strings.Split(val, ",")
}
......@@ -43,6 +44,21 @@ func Spider_euroLast() {
processer.Startup()
}
func Spider_euroLast_his(season string) {
matchLastService := new(service2.MatchHisService)
var matchLasts []*pojo.MatchLast
matchLasts = matchLastService.FindBySeason(season)
var compIds []string
//为空会抓取所有,这里没有必要配置所有的波菜公司ID
compService := new(service.CompService)
compIds = compService.FindEuroIds()
processer := proc.GetEuroLastProcesser()
processer.MatchLastList = matchLasts
processer.CompWin007Ids = compIds
processer.Startup()
}
//查询标识为win007,且欧赔未抓取的配置数据,指定菠菜公司
func Spider_euroLast_near() {
......@@ -58,7 +74,7 @@ func Spider_euroLast_near() {
//为空会抓取所有,这里没有必要配置所有的波菜公司ID
compService := new(service.CompService)
compIds = compService.FindEuroIds()
}else{
} else {
compIds = strings.Split(val, ",")
}
......@@ -66,4 +82,4 @@ func Spider_euroLast_near() {
processer.MatchLastList = matchLasts
processer.CompWin007Ids = compIds
processer.Startup()
}
\ No newline at end of file
}
......@@ -2,6 +2,7 @@ package launch
import (
"strings"
"tesou.io/platform/foot-parent/foot-api/module/match/pojo"
"tesou.io/platform/foot-parent/foot-core/common/utils"
"tesou.io/platform/foot-parent/foot-core/module/elem/service"
service2 "tesou.io/platform/foot-parent/foot-core/module/match/service"
......@@ -34,7 +35,7 @@ func Spider_euroHis() {
//为空会抓取所有,这里没有必要配置所有的波菜公司ID
compService := new(service.CompService)
compIds = compService.FindEuroIds()
}else{
} else {
compIds = strings.Split(val, ",")
}
......@@ -45,6 +46,22 @@ func Spider_euroHis() {
}
//查询标识为win007,且欧赔未抓取的配置数据,指定菠菜公司
func Spider_euroHis_his(season string) {
matchLastService := new(service2.MatchHisService)
var matchLasts []*pojo.MatchLast
matchLasts = matchLastService.FindBySeason(season)
var compIds []string
compService := new(service.CompService)
compIds = compService.FindEuroIds()
processer := proc.GetEuroTrackProcesser()
processer.CompWin007Ids = compIds
processer.MatchLastList = matchLasts
processer.Startup()
}
//查询标识为win007,且欧赔未抓取的配置数据,指定菠菜公司
func Spider_euroHis_near() {
......@@ -60,7 +77,7 @@ func Spider_euroHis_near() {
//为空会抓取所有,这里没有必要配置所有的波菜公司ID
compService := new(service.CompService)
compIds = compService.FindEuroIds()
}else{
} else {
compIds = strings.Split(val, ",")
}
......@@ -78,7 +95,7 @@ func Spider_euroHis_Incomplete() {
//为空会抓取所有,这里没有必要配置所有的波菜公司ID
compService := new(service.CompService)
compIds = compService.FindEuroIds()
}else{
} else {
compIds = strings.Split(val, ",")
}
......
......@@ -70,7 +70,7 @@ func (this *MatchHisProcesser) Startup() {
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 100, 2000)
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
}
......@@ -115,7 +115,6 @@ func (this *MatchHisProcesser) Process(p *page.Page) {
his := new(pojo2.MatchHis)
index := 0
//比赛时间
index++
temp_matchDate := val_arr[index]
seasonYear := season.Season
if strings.Contains(season.Season, "-") {
......@@ -147,8 +146,8 @@ func (this *MatchHisProcesser) Process(p *page.Page) {
half_arr := strings.Split(score_arr[1], ":")
his.MainTeamGoals, _ = strconv.Atoi(full_arr[0])
his.MainTeamHalfGoals, _ = strconv.Atoi(half_arr[0])
his.GuestTeamGoals, _ = strconv.Atoi(full_arr[0])
his.GuestTeamHalfGoals, _ = strconv.Atoi(half_arr[0])
his.GuestTeamGoals, _ = strconv.Atoi(full_arr[1])
his.GuestTeamHalfGoals, _ = strconv.Atoi(half_arr[1])
//客队名称
index++
temp_guestTeam := val_arr[index]
......@@ -187,5 +186,4 @@ func (this *MatchHisProcesser) Finish() {
}
this.MatchHisService.SaveList(matchHis_list_slice)
this.MatchHisService.ModifyList(matchHis_modify_list_slice)
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册