Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
东方怂天
Stock-forecast
提交
f52bac0d
S
Stock-forecast
项目概览
东方怂天
/
Stock-forecast
通知
6
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
Stock-forecast
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
f52bac0d
编写于
6月 30, 2020
作者:
东方怂天
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
略作修改
上级
29cc6ce7
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
92 addition
and
77 deletion
+92
-77
一班-1825101045-杨祉.ipynb
一班-1825101045-杨祉.ipynb
+92
-77
未找到文件。
一班-1825101045-杨祉.ipynb
浏览文件 @
f52bac0d
...
@@ -95,8 +95,8 @@
...
@@ -95,8 +95,8 @@
"execution_count": 1,
"execution_count": 1,
"metadata": {
"metadata": {
"ExecuteTime": {
"ExecuteTime": {
"end_time": "2020-06-
22T00:57:31.811677
Z",
"end_time": "2020-06-
30T02:54:55.500263
Z",
"start_time": "2020-06-
22T00:57:31.407717
Z"
"start_time": "2020-06-
30T02:54:54.444909
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -157,8 +157,8 @@
...
@@ -157,8 +157,8 @@
"execution_count": 2,
"execution_count": 2,
"metadata": {
"metadata": {
"ExecuteTime": {
"ExecuteTime": {
"end_time": "2020-06-
22T00:57:31.901434
Z",
"end_time": "2020-06-
30T02:54:55.580119
Z",
"start_time": "2020-06-
22T00:57:31.812674
Z"
"start_time": "2020-06-
30T02:54:55.501297
Z"
}
}
},
},
"outputs": [
"outputs": [
...
@@ -301,8 +301,8 @@
...
@@ -301,8 +301,8 @@
"execution_count": 3,
"execution_count": 3,
"metadata": {
"metadata": {
"ExecuteTime": {
"ExecuteTime": {
"end_time": "2020-06-
22T00:57:32.040039
Z",
"end_time": "2020-06-
30T02:54:55.705015
Z",
"start_time": "2020-06-
22T00:57:31.903398
Z"
"start_time": "2020-06-
30T02:54:55.581124
Z"
}
}
},
},
"outputs": [
"outputs": [
...
@@ -421,7 +421,7 @@
...
@@ -421,7 +421,7 @@
" lambda x: x.split(\"Includes:\")[0].replace(\"| About: \", \"\").split(\",\"))\n",
" lambda x: x.split(\"Includes:\")[0].replace(\"| About: \", \"\").split(\",\"))\n",
"# 提取【针对公司】中的简写\n",
"# 提取【针对公司】中的简写\n",
"Content_data[\"针对公司\"] = Content_data[\"针对公司\"].map(\n",
"Content_data[\"针对公司\"] = Content_data[\"针对公司\"].map(\n",
" lambda x: [\"\".join(re.findall('[(](.*?)[)]', i, re.S)) for i in x if not i.find(\"(\")
==
-1])\n",
" lambda x: [\"\".join(re.findall('[(](.*?)[)]', i, re.S)) for i in x if not i.find(\"(\")
==
-1])\n",
"\n",
"\n",
"# 显示前五行内容\n",
"# 显示前五行内容\n",
"Content_data.head()"
"Content_data.head()"
...
@@ -436,11 +436,11 @@
...
@@ -436,11 +436,11 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
5
,
"execution_count":
10
,
"metadata": {
"metadata": {
"ExecuteTime": {
"ExecuteTime": {
"end_time": "2020-06-
22T00:58:16.698088
Z",
"end_time": "2020-06-
30T02:58:07.744754
Z",
"start_time": "2020-06-
22T00:58:14.47284
2Z"
"start_time": "2020-06-
30T02:58:06.46582
2Z"
}
}
},
},
"outputs": [
"outputs": [
...
@@ -466,76 +466,58 @@
...
@@ -466,76 +466,58 @@
" <tr style=\"text-align: right;\">\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>标题</th>\n",
" <th>标题</th>\n",
" <th>针对公司_x</th>\n",
" <th>发布时间_x</th>\n",
" <th>作者</th>\n",
" <th>作者</th>\n",
" <th>评论数</th>\n",
" <th>评论数</th>\n",
" <th>发布时间_y</th>\n",
" <th>针对公司_y</th>\n",
" <th>摘要</th>\n",
" <th>摘要</th>\n",
" <th>针对公司</th>\n",
" <th>针对公司</th>\n",
" <th>发布时间</th>\n",
" </tr>\n",
" </tr>\n",
" </thead>\n",
" </thead>\n",
" <tbody>\n",
" <tbody>\n",
" <tr>\n",
" <tr>\n",
" <th>Micron Technology: Insanely Cheap Stock Given Its High Earnings Quality</th>\n",
" <th>Micron Technology: Insanely Cheap Stock Given Its High Earnings Quality</th>\n",
" <td>Micron Technology: Insanely Cheap Stock Given ...</td>\n",
" <td>Micron Technology: Insanely Cheap Stock Given ...</td>\n",
" <td>MU</td>\n",
" <td>Dec. 31, 2018, 7:57 PM</td>\n",
" <td>Ruerd Heeg</td>\n",
" <td>Ruerd Heeg</td>\n",
" <td>75</td>\n",
" <td>75</td>\n",
" <td>Dec. 31, 2018 7:57 PM</td>\n",
" <td>[MU, MU, MU, MU, MU]</td>\n",
" <td>SummaryLast year, a combination of relatively ...</td>\n",
" <td>SummaryLast year, a combination of relatively ...</td>\n",
" <td>[None]</td>\n",
" <td>MU</td>\n",
" <td>Dec. 31, 2018 7:57 PM</td>\n",
" </tr>\n",
" </tr>\n",
" <tr>\n",
" <tr>\n",
" <th>Molson Coors Seems Attractive At These Valuations</th>\n",
" <th>Molson Coors Seems Attractive At These Valuations</th>\n",
" <td>Molson Coors Seems Attractive At These Valuations</td>\n",
" <td>Molson Coors Seems Attractive At These Valuations</td>\n",
" <td>TAP</td>\n",
" <td>Dec. 31, 2018, 7:44 PM</td>\n",
" <td>Sanjit Deepalam</td>\n",
" <td>Sanjit Deepalam</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>Dec. 31, 2018 7:44 PM</td>\n",
" <td>[TAP, TAP, TAP]</td>\n",
" <td>SummaryMolson Coors's stock has fallen over 30...</td>\n",
" <td>SummaryMolson Coors's stock has fallen over 30...</td>\n",
" <td>[None]</td>\n",
" <td>TAP</td>\n",
" <td>Dec. 31, 2018 7:44 PM</td>\n",
" </tr>\n",
" </tr>\n",
" <tr>\n",
" <tr>\n",
" <th>Gerdau: The Brazilian Play On U.S. Steel</th>\n",
" <th>Gerdau: The Brazilian Play On U.S. Steel</th>\n",
" <td>Gerdau: The Brazilian Play On U.S. Steel</td>\n",
" <td>Gerdau: The Brazilian Play On U.S. Steel</td>\n",
" <td>GGB</td>\n",
" <td>Dec. 31, 2018, 7:10 PM</td>\n",
" <td>Shannon Bruce</td>\n",
" <td>Shannon Bruce</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Dec. 31, 2018 7:10 PM</td>\n",
" <td>[GGB, GGB, GGB]</td>\n",
" <td>SummaryGerdau is delivering good results, incl...</td>\n",
" <td>SummaryGerdau is delivering good results, incl...</td>\n",
" <td>[None]</td>\n",
" <td>GGB</td>\n",
" <td>Dec. 31, 2018 7:10 PM</td>\n",
" </tr>\n",
" </tr>\n",
" <tr>\n",
" <tr>\n",
" <th>Will Apple Get Its Mojo Back?</th>\n",
" <th>Will Apple Get Its Mojo Back?</th>\n",
" <td>Will Apple Get Its Mojo Back?</td>\n",
" <td>Will Apple Get Its Mojo Back?</td>\n",
" <td>AAPL</td>\n",
" <td>Dec. 31, 2018, 5:36 PM</td>\n",
" <td>TipRanks</td>\n",
" <td>TipRanks</td>\n",
" <td>68</td>\n",
" <td>68</td>\n",
" <td>Dec. 31, 2018 5:36 PM</td>\n",
" <td>[AAPL, AAPL, AAPL]</td>\n",
" <td>SummaryApple has been resting on a reputation ...</td>\n",
" <td>SummaryApple has been resting on a reputation ...</td>\n",
" <td>[None]</td>\n",
" <td>AAPL</td>\n",
" <td>Dec. 31, 2018 5:36 PM</td>\n",
" </tr>\n",
" </tr>\n",
" <tr>\n",
" <tr>\n",
" <th>Lululemon Stock Looks Compelling On This Dip</th>\n",
" <th>Lululemon Stock Looks Compelling On This Dip</th>\n",
" <td>Lululemon Stock Looks Compelling On This Dip</td>\n",
" <td>Lululemon Stock Looks Compelling On This Dip</td>\n",
" <td>LULU</td>\n",
" <td>Dec. 31, 2018, 5:26 PM</td>\n",
" <td>L&F Capital Management</td>\n",
" <td>L&F Capital Management</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>Dec. 31, 2018 5:26 PM</td>\n",
" <td>[LULU, LULU, LULU]</td>\n",
" <td>SummaryLululemon stock had a strong 2018 but f...</td>\n",
" <td>SummaryLululemon stock had a strong 2018 but f...</td>\n",
" <td>[None]</td>\n",
" <td>LULU</td>\n",
" <td>Dec. 31, 2018 5:26 PM</td>\n",
" </tr>\n",
" </tr>\n",
" </tbody>\n",
" </tbody>\n",
"</table>\n",
"</table>\n",
...
@@ -549,20 +531,6 @@
...
@@ -549,20 +531,6 @@
"Will Apple Get Its Mojo Back? Will Apple Get Its Mojo Back? \n",
"Will Apple Get Its Mojo Back? Will Apple Get Its Mojo Back? \n",
"Lululemon Stock Looks Compelling On This Dip Lululemon Stock Looks Compelling On This Dip \n",
"Lululemon Stock Looks Compelling On This Dip Lululemon Stock Looks Compelling On This Dip \n",
"\n",
"\n",
" 针对公司_x \\\n",
"Micron Technology: Insanely Cheap Stock Given I... MU \n",
"Molson Coors Seems Attractive At These Valuations TAP \n",
"Gerdau: The Brazilian Play On U.S. Steel GGB \n",
"Will Apple Get Its Mojo Back? AAPL \n",
"Lululemon Stock Looks Compelling On This Dip LULU \n",
"\n",
" 发布时间_x \\\n",
"Micron Technology: Insanely Cheap Stock Given I... Dec. 31, 2018, 7:57 PM \n",
"Molson Coors Seems Attractive At These Valuations Dec. 31, 2018, 7:44 PM \n",
"Gerdau: The Brazilian Play On U.S. Steel Dec. 31, 2018, 7:10 PM \n",
"Will Apple Get Its Mojo Back? Dec. 31, 2018, 5:36 PM \n",
"Lululemon Stock Looks Compelling On This Dip Dec. 31, 2018, 5:26 PM \n",
"\n",
" 作者 \\\n",
" 作者 \\\n",
"Micron Technology: Insanely Cheap Stock Given I... Ruerd Heeg \n",
"Micron Technology: Insanely Cheap Stock Given I... Ruerd Heeg \n",
"Molson Coors Seems Attractive At These Valuations Sanjit Deepalam \n",
"Molson Coors Seems Attractive At These Valuations Sanjit Deepalam \n",
...
@@ -577,20 +545,6 @@
...
@@ -577,20 +545,6 @@
"Will Apple Get Its Mojo Back? 68 \n",
"Will Apple Get Its Mojo Back? 68 \n",
"Lululemon Stock Looks Compelling On This Dip 4 \n",
"Lululemon Stock Looks Compelling On This Dip 4 \n",
"\n",
"\n",
" 发布时间_y \\\n",
"Micron Technology: Insanely Cheap Stock Given I... Dec. 31, 2018 7:57 PM \n",
"Molson Coors Seems Attractive At These Valuations Dec. 31, 2018 7:44 PM \n",
"Gerdau: The Brazilian Play On U.S. Steel Dec. 31, 2018 7:10 PM \n",
"Will Apple Get Its Mojo Back? Dec. 31, 2018 5:36 PM \n",
"Lululemon Stock Looks Compelling On This Dip Dec. 31, 2018 5:26 PM \n",
"\n",
" 针对公司_y \\\n",
"Micron Technology: Insanely Cheap Stock Given I... [MU, MU, MU, MU, MU] \n",
"Molson Coors Seems Attractive At These Valuations [TAP, TAP, TAP] \n",
"Gerdau: The Brazilian Play On U.S. Steel [GGB, GGB, GGB] \n",
"Will Apple Get Its Mojo Back? [AAPL, AAPL, AAPL] \n",
"Lululemon Stock Looks Compelling On This Dip [LULU, LULU, LULU] \n",
"\n",
" 摘要 \\\n",
" 摘要 \\\n",
"Micron Technology: Insanely Cheap Stock Given I... SummaryLast year, a combination of relatively ... \n",
"Micron Technology: Insanely Cheap Stock Given I... SummaryLast year, a combination of relatively ... \n",
"Molson Coors Seems Attractive At These Valuations SummaryMolson Coors's stock has fallen over 30... \n",
"Molson Coors Seems Attractive At These Valuations SummaryMolson Coors's stock has fallen over 30... \n",
...
@@ -598,15 +552,22 @@
...
@@ -598,15 +552,22 @@
"Will Apple Get Its Mojo Back? SummaryApple has been resting on a reputation ... \n",
"Will Apple Get Its Mojo Back? SummaryApple has been resting on a reputation ... \n",
"Lululemon Stock Looks Compelling On This Dip SummaryLululemon stock had a strong 2018 but f... \n",
"Lululemon Stock Looks Compelling On This Dip SummaryLululemon stock had a strong 2018 but f... \n",
"\n",
"\n",
" 针对公司 \n",
" 针对公司 \\\n",
"Micron Technology: Insanely Cheap Stock Given I... [None] \n",
"Micron Technology: Insanely Cheap Stock Given I... MU \n",
"Molson Coors Seems Attractive At These Valuations [None] \n",
"Molson Coors Seems Attractive At These Valuations TAP \n",
"Gerdau: The Brazilian Play On U.S. Steel [None] \n",
"Gerdau: The Brazilian Play On U.S. Steel GGB \n",
"Will Apple Get Its Mojo Back? [None] \n",
"Will Apple Get Its Mojo Back? AAPL \n",
"Lululemon Stock Looks Compelling On This Dip [None] "
"Lululemon Stock Looks Compelling On This Dip LULU \n",
"\n",
" 发布时间 \n",
"Micron Technology: Insanely Cheap Stock Given I... Dec. 31, 2018 7:57 PM \n",
"Molson Coors Seems Attractive At These Valuations Dec. 31, 2018 7:44 PM \n",
"Gerdau: The Brazilian Play On U.S. Steel Dec. 31, 2018 7:10 PM \n",
"Will Apple Get Its Mojo Back? Dec. 31, 2018 5:36 PM \n",
"Lululemon Stock Looks Compelling On This Dip Dec. 31, 2018 5:26 PM "
]
]
},
},
"execution_count":
5
,
"execution_count":
10
,
"metadata": {},
"metadata": {},
"output_type": "execute_result"
"output_type": "execute_result"
}
}
...
@@ -616,17 +577,71 @@
...
@@ -616,17 +577,71 @@
" Title_data, Content_data, right_on='标题', left_index=True, how='outer')\n",
" Title_data, Content_data, right_on='标题', left_index=True, how='outer')\n",
"\n",
"\n",
"\n",
"\n",
"def Update(x):\n",
"def Update
Company
(x):\n",
" try:\n",
" try:\n",
" return np.unique(x[\"针对公司_y\"].append(x[\"针对公司_x\"]))\n",
" if(x[\"针对公司_x\"]):\n",
" return x[\"针对公司_x\"]\n",
" else:\n",
" return x[\"针对公司_y\"][0]\n",
" except:\n",
" except:\n",
" return np.NAN\n",
" return np.NAN\n",
"\n",
"\n",
"\n",
"def UpdateTime(x):\n",
" try:\n",
" return np.unique([x[\"发布时间_x\"].strip(), x[\"发布时间_y\"].strip()])[0]\n",
" except:\n",
" return np.NAN\n",
"\n",
"\n",
"Title_Content_data['针对公司'] = Title_Content_data[['针对公司_x', '针对公司_y']].apply(\n",
"Title_Content_data['针对公司'] = Title_Content_data[['针对公司_x', '针对公司_y']].apply(\n",
" lambda x: Update(x), axis=1)\n",
" lambda x: UpdateCompany(x), axis=1)\n",
"\n",
"Title_Content_data['发布时间'] = Title_Content_data[[\"发布时间_x\", \"发布时间_y\"]].apply(\n",
" lambda x: UpdateTime(x), axis=1)\n",
"\n",
"# pd.to_datetime(df)\n",
"\n",
"# 删除无用列\n",
"Title_Content_data.drop([\"发布时间_x\", \"发布时间_y\"], axis=1, inplace=True)\n",
"Title_Content_data.drop([\"针对公司_x\", \"针对公司_y\"], axis=1, inplace=True)\n",
"\n",
"# 删除无用行\n",
"Title_Content_data.dropna(axis=0, how='any', subset=[\n",
" \"发布时间\", \"针对公司\", \"评论数\"], inplace=True)\n",
"\n",
"\n",
"Title_Content_data.head()"
"Title_Content_data.head()"
]
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2.2 回帖聚合\n",
"这是网友在各文章下的回复内容 \n",
"Title:各文章的标题;空标题的,用最靠近的有内容的下方标题 \n",
"Content:回复的全文字内容"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 创建一个空的 DataFrame\n",
"Reply_data = pd.DataFrame(columns=['字段', '标题1'])\n",
"\n",
"for root, dirs, files in os.walk(os.path.join(os.getcwd(), \"\")):\n",
" for file in files:\n",
" if(file.endswith('.xlsx')):\n",
" # 获取文件路径\n",
" data_xls = pd.read_excel(os.path.join(root, file), index_col=0)\n",
" data_xls.to_csv(os.path.join(root, file).replace(\n",
" '.xlsx', '.csv'), encoding='utf-8')\n",
" print(os.path.join(root, file), \"转化成功\")\n",
" os.remove(os.path.join(root, file))"
]
}
}
],
],
"metadata": {
"metadata": {
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录