Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Hypo
SZU_CSSE_master
提交
24336635
S
SZU_CSSE_master
项目概览
Hypo
/
SZU_CSSE_master
通知
0
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
SZU_CSSE_master
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
24336635
编写于
12月 06, 2019
作者:
H
hypox64
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
0.12229 by KernelRidge
上级
4049f4a5
变更
8
展开全部
隐藏空白更改
内联
并排
Showing
8 changed file
with
1833 addition
and
94 deletion
+1833
-94
dataloader.py
dataloader.py
+146
-85
description_map.py
description_map.py
+163
-0
evaluation.py
evaluation.py
+6
-1
ml.py
ml.py
+35
-0
model.py
model.py
+1
-1
result/0.02839_0.12229.csv
result/0.02839_0.12229.csv
+1460
-0
train.py
train.py
+11
-7
transformer.py
transformer.py
+11
-0
未找到文件。
dataloader.py
浏览文件 @
24336635
...
...
@@ -2,7 +2,12 @@ import os
import
csv
import
numpy
as
np
import
random
# import matplotlib.pyplot as plt
from
sklearn.decomposition
import
PCA
from
sklearn.model_selection
import
GridSearchCV
from
sklearn.kernel_ridge
import
KernelRidge
from
sklearn.svm
import
SVR
import
evaluation
from
description_map
import
value_map
,
fix_key
,
fix_miss
,
add_future
# load description_txt
description_txt
=
[]
...
...
@@ -14,38 +19,46 @@ for i,line in enumerate(open('./datasets/data_description.txt'),0):
description_txt
.
append
(
line
)
colon_indexs
.
append
(
524
)
#the end of description
description_length
=
len
(
colon_indexs
)
-
1
print
(
'Description length:'
,
description_length
)
desc
ription
s
=
[]
Full_map
=
{}
desc
_key
s
=
[]
for
i
in
range
(
len
(
colon_indexs
)
-
1
):
mapping
=
{}
description_title
=
description_txt
[
colon_indexs
[
i
]]
ori_map
=
{}
my_map
=
{}
desc_key
=
description_txt
[
colon_indexs
[
i
]]
desc_key
=
desc_key
[:
desc_key
.
find
(
':'
)]
desc_keys
.
append
(
desc_key
)
# print(desc_key)
interspace
=
colon_indexs
[
i
+
1
]
-
colon_indexs
[
i
]
-
2
#two space line
if
interspace
==
0
:
mapping
[
'Just_num'
]
=
'None'
descriptions
.
append
(
mapping
)
ori_map
[
'Just_num'
]
=
'None'
Full_map
[
desc_key
]
=
ori_map
else
:
for
j
in
range
(
interspace
-
1
):
#del low space line
line
=
description_txt
[
colon_indexs
[
i
]
+
j
+
2
]
mapping_
key
=
line
[:
line
.
find
(
'
\t
'
)]
key
=
line
[:
line
.
find
(
'
\t
'
)]
#data_description.txt is wrong here
if
mapping_key
==
'NA '
:
mapping_key
=
'NA'
if
mapping_key
==
'WD '
:
mapping_key
=
'WD'
if
mapping_key
==
'BrkComm'
or
mapping_key
==
'Brk Cmn'
:
mapping_key
=
'BrkCmn'
mapping
[
mapping_key
]
=
j
descriptions
.
append
(
mapping
)
# print(descriptions)
def
match_random
(
a
,
b
):
state
=
np
.
random
.
get_state
()
np
.
random
.
shuffle
(
a
)
np
.
random
.
set_state
(
state
)
np
.
random
.
shuffle
(
b
)
if
key
==
'NA '
:
key
=
'NA'
if
key
==
'WD '
:
key
=
'WD'
if
key
==
'BrkComm'
or
key
==
'Brk Cmn'
:
key
=
'BrkCmn'
if
desc_key
in
value_map
:
my_map
[
key
]
=
value_map
[
desc_key
][
key
]
Full_map
[
'my_'
+
desc_key
]
=
my_map
ori_map
[
key
]
=
interspace
-
j
-
1
#change word to vector
Full_map
[
desc_key
]
=
ori_map
# def normlize(npdata,justprice = False):
# _mean = np.mean(npdata)
# _std = np.std(npdata)
# if justprice:
# _mean = 180921.195
# _std = 79415.2918
# return (npdata-_mean)/_std
def
normlize
(
npdata
,
justprice
=
False
):
_min
=
np
.
min
(
npdata
)
...
...
@@ -55,92 +68,124 @@ def normlize(npdata,justprice = False):
_max
=
755000.0
return
(
npdata
-
_min
)
/
(
_max
-
_min
)
# def convert2price(tensor):
# return tensor*79415.2918+180921.195
def
convert2price
(
tensor
):
return
tensor
*
(
755000.0
-
34900.0
)
+
34900
def
fix_key
(
key
):
#csv is wrong here
if
key
==
'Wd Shng'
:
key
=
'WdShing'
if
key
==
'2fmCon'
:
key
=
'2FmCon'
if
key
==
'NAmes'
:
key
=
'Names'
if
key
==
'Duplex'
:
key
=
'Duplx'
if
key
==
'CmentBd'
:
key
=
'CemntBd'
if
key
==
'C (all)'
:
key
=
'C'
if
key
==
'Twnhs'
:
key
=
'TwnhsI'
if
key
==
'Brk Cmn'
or
key
==
'BrkComm'
:
key
=
'BrkCmn'
else
:
key
=
key
return
key
def
load_train
():
##load train csv
desc_map
=
{}
price_map
=
{}
csv_data
=
[]
#train_del_1299_524.csv
reader
=
csv
.
reader
(
open
(
'./datasets/train.csv'
))
for
line
in
reader
:
csv_data
.
append
(
line
)
id_length
=
len
(
csv_data
)
-
1
data
=
np
.
zeros
((
id_length
,
description_length
+
1
))
for
i
in
range
(
id_length
):
for
j
in
range
(
description_length
+
1
):
key
=
csv_data
[
i
+
1
][
j
+
1
]
for
i
in
range
(
80
):
arr
=
np
.
zeros
(
id_length
)
my_arr
=
np
.
zeros
(
id_length
)
for
j
in
range
(
id_length
):
key
=
csv_data
[
j
+
1
][
i
+
1
]
key
=
fix_key
(
key
)
if
j
==
description_length
:
data
[
i
]
[
j
]
=
float
(
key
)
if
i
==
79
:
arr
[
j
]
=
float
(
key
)
else
:
if
key
in
descriptions
[
j
]:
#SalePrice
data
[
i
][
j
]
=
float
(
descriptions
[
j
][
key
])
else
:
#just num here
# print(i,j)
#my map
if
desc_keys
[
i
]
in
value_map
:
if
key
==
'NA'
:
my_arr
[
j
]
=
fix_miss
(
desc_keys
[
i
])
else
:
my_arr
[
j
]
=
Full_map
[
'my_'
+
desc_keys
[
i
]][
key
]
#auto map
if
key
in
Full_map
[
desc_keys
[
i
]]:
arr
[
j
]
=
Full_map
[
desc_keys
[
i
]][
key
]
else
:
if
key
==
'NA'
:
key
=
0
;
data
[
i
][
j
]
=
float
(
key
)
return
data
arr
[
j
]
=
fix_miss
(
desc_keys
[
i
])
else
:
arr
[
j
]
=
float
(
key
)
if
i
==
79
:
price_map
[
'price'
]
=
arr
else
:
if
desc_keys
[
i
]
in
value_map
:
desc_map
[
'my_'
+
desc_keys
[
i
]]
=
my_arr
# else:
desc_map
[
desc_keys
[
i
]]
=
arr
return
desc_map
,
price_map
def
load_test
():
##load train csv
desc_map
=
{}
csv_data
=
[]
reader
=
csv
.
reader
(
open
(
'./datasets/test.csv'
))
for
line
in
reader
:
csv_data
.
append
(
line
)
id_length
=
len
(
csv_data
)
-
1
data
=
np
.
zeros
((
id_length
,
description_length
))
for
i
in
range
(
id_length
):
for
j
in
range
(
description_length
):
key
=
csv_data
[
i
+
1
][
j
+
1
]
for
i
in
range
(
79
):
arr
=
np
.
zeros
(
id_length
)
my_arr
=
np
.
zeros
(
id_length
)
for
j
in
range
(
id_length
):
key
=
csv_data
[
j
+
1
][
i
+
1
]
key
=
fix_key
(
key
)
if
j
==
description_length
:
data
[
i
][
j
]
=
float
(
key
)
#my map
if
desc_keys
[
i
]
in
value_map
:
if
key
==
'NA'
:
my_arr
[
j
]
=
fix_miss
(
desc_keys
[
i
])
else
:
my_arr
[
j
]
=
Full_map
[
'my_'
+
desc_keys
[
i
]][
key
]
#auto map
if
key
in
Full_map
[
desc_keys
[
i
]]:
arr
[
j
]
=
Full_map
[
desc_keys
[
i
]][
key
]
else
:
if
key
in
descriptions
[
j
]:
#SalePrice
data
[
i
][
j
]
=
float
(
descriptions
[
j
][
key
])
else
:
#just num here
# print(i,j)
if
key
==
'NA'
:
key
=
0
;
data
[
i
][
j
]
=
float
(
key
)
return
data
def
load_all
():
train_desc
=
load_train
()[:,:
79
]
train_price
=
load_train
()[:,
79
]
test_desc
=
load_test
()
if
key
==
'NA'
:
arr
[
j
]
=
fix_miss
(
desc_keys
[
i
])
else
:
arr
[
j
]
=
float
(
key
)
if
desc_keys
[
i
]
in
value_map
:
desc_map
[
'my_'
+
desc_keys
[
i
]]
=
my_arr
# else:
desc_map
[
desc_keys
[
i
]]
=
arr
return
desc_map
# for i,word in enumerate(wordlist,0):
def
dict2numpy
(
dict_data
):
value_0
=
list
(
dict_data
.
values
())[
0
]
np_data
=
np
.
zeros
((
len
(
value_0
),
len
(
dict_data
)))
for
i
,
key
in
enumerate
(
dict_data
.
keys
(),
0
):
np_data
[:,
i
]
=
np
.
array
(
dict_data
[
key
])
return
np_data
def
load_all
(
dimension
):
desc_map
,
price_map
=
load_train
()
desc_map
=
add_future
(
desc_map
)
# print(len(desc_map))
# print(desc_map)
# print(desc_map)
train_price
=
np
.
array
(
price_map
[
'price'
])
train_desc
=
dict2numpy
(
desc_map
)
desc_map
=
load_test
()
desc_map
=
add_future
(
desc_map
)
test_desc
=
dict2numpy
(
desc_map
)
desc_all
=
np
.
concatenate
((
train_desc
,
test_desc
),
axis
=
0
)
for
i
in
range
(
description_length
):
for
i
in
range
(
len
(
desc_all
[
0
])
):
desc_all
[:,
i
]
=
normlize
(
desc_all
[:,
i
])
train_price
=
normlize
(
train_price
)
# print(desc_all)
pca
=
PCA
(
n_components
=
dimension
)
#加载PCA算法,设置降维后主成分数目为
desc_all
=
pca
.
fit_transform
(
desc_all
)
#对样本进行降维
train_price
=
normlize
(
train_price
,
True
)
train_desc
=
desc_all
[:
len
(
train_desc
)]
test_desc
=
desc_all
[
len
(
train_desc
):]
return
train_desc
.
astype
(
np
.
float32
),
train_price
.
astype
(
np
.
float32
),
test_desc
.
astype
(
np
.
float32
)
def
write_csv
(
prices
,
path
):
...
...
@@ -152,9 +197,25 @@ def write_csv(prices,path):
csvFile
.
close
()
def
main
():
train_desc
,
train_price
,
test_desc
=
load_all
()
print
(
len
(
test_desc
))
dimension
=
80
train_desc
,
train_price
,
test_desc
=
load_all
(
dimension
)
# # KRR = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)
# kr = GridSearchCV(KernelRidge(kernel='polynomial', gamma=0.1),
# param_grid={"alpha": [1e0, 0.1, 1e-2, 1e-3],
# "gamma": np.logspace(-2, 2, 5)})
# kr.fit(train_desc, train_price)
# y_kr = kr.predict(test_desc)
# for i in range(len(y_kr)):
# y_kr[i] = convert2price(y_kr[i])
# # print(y_kr.shape)
# print(dimension,evaluation.eval_test(y_kr))
# write_csv(train_price, './result.csv')
# # print(data)
...
...
description_map.py
0 → 100644
浏览文件 @
24336635
import
numpy
as
np
value_map
=
{}
value_map
[
"MSSubClass"
]
=
{
'180'
:
1
,
'30'
:
2
,
'45'
:
2
,
'190'
:
3
,
'50'
:
3
,
'90'
:
3
,
'85'
:
4
,
'40'
:
4
,
'160'
:
4
,
'70'
:
5
,
'20'
:
5
,
'75'
:
5
,
'80'
:
5
,
'150'
:
5
,
'120'
:
6
,
'60'
:
6
}
value_map
[
"MSZoning"
]
=
{
'A'
:
1
,
'C'
:
4
,
'FV'
:
1
,
'I'
:
3
,
'RH'
:
3
,
'RL'
:
2
,
'RP'
:
3
,
'RM'
:
2
}
value_map
[
"Neighborhood"
]
=
{
'MeadowV'
:
1
,
'IDOTRR'
:
2
,
'BrDale'
:
2
,
'OldTown'
:
3
,
'Edwards'
:
3
,
'BrkSide'
:
3
,
'Sawyer'
:
4
,
'Blueste'
:
4
,
'SWISU'
:
4
,
'Names'
:
4
,
'NPkVill'
:
5
,
'Mitchel'
:
5
,
'SawyerW'
:
6
,
'Gilbert'
:
6
,
'NWAmes'
:
6
,
'Blmngtn'
:
7
,
'CollgCr'
:
7
,
'ClearCr'
:
7
,
'Crawfor'
:
7
,
'Veenker'
:
8
,
'Somerst'
:
8
,
'Timber'
:
8
,
'StoneBr'
:
9
,
'NoRidge'
:
10
,
'NridgHt'
:
10
}
value_map
[
"Condition1"
]
=
{
'Artery'
:
1
,
'Feedr'
:
2
,
'RRAe'
:
2
,
'Norm'
:
3
,
'RRAn'
:
3
,
'PosN'
:
4
,
'RRNe'
:
4
,
'PosA'
:
5
,
'RRNn'
:
5
}
value_map
[
"BldgType"
]
=
{
'2FmCon'
:
1
,
'Duplx'
:
1
,
'TwnhsI'
:
1
,
'1Fam'
:
2
,
'TwnhsE'
:
2
}
value_map
[
"HouseStyle"
]
=
{
'1.5Unf'
:
1
,
'1.5Fin'
:
2
,
'2.5Unf'
:
2
,
'SFoyer'
:
2
,
'1Story'
:
3
,
'SLvl'
:
3
,
'2Story'
:
4
,
'2.5Fin'
:
4
}
value_map
[
"Exterior1st"
]
=
{
'BrkCmn'
:
1
,
'AsphShn'
:
2
,
'CBlock'
:
2
,
'AsbShng'
:
2
,
'WdShing'
:
3
,
'Wd Sdng'
:
3
,
'MetalSd'
:
3
,
'Stucco'
:
3
,
'HdBoard'
:
3
,
'Other'
:
3
,
'BrkFace'
:
4
,
'Plywood'
:
4
,
'PreCast'
:
4
,
'VinylSd'
:
5
,
'CemntBd'
:
6
,
'Stone'
:
7
,
'ImStucc'
:
7
}
value_map
[
"MasVnrType"
]
=
{
'BrkCmn'
:
1
,
'None'
:
1
,
'CBlock'
:
1
,
'BrkFace'
:
2
,
'Stone'
:
3
}
value_map
[
"ExterQual"
]
=
{
'Po'
:
1
,
'Fa'
:
2
,
'TA'
:
3
,
'Gd'
:
4
,
'Ex'
:
5
}
value_map
[
"Foundation"
]
=
{
'Slab'
:
1
,
'BrkTil'
:
2
,
'CBlock'
:
2
,
'Stone'
:
2
,
'Wood'
:
3
,
'PConc'
:
4
}
value_map
[
"BsmtQual"
]
=
{
'NA'
:
1
,
'Po'
:
2
,
'Fa'
:
3
,
'TA'
:
4
,
'Gd'
:
5
,
'Ex'
:
6
}
value_map
[
"BsmtExposure"
]
=
{
'NA'
:
1
,
'No'
:
2
,
'Av'
:
3
,
'Mn'
:
3
,
'Gd'
:
4
}
value_map
[
"Heating"
]
=
{
'Floor'
:
1
,
'Grav'
:
1
,
'Wall'
:
2
,
'OthW'
:
3
,
'GasW'
:
4
,
'GasA'
:
5
}
value_map
[
"HeatingQC"
]
=
{
'Po'
:
1
,
'Fa'
:
2
,
'TA'
:
3
,
'Gd'
:
4
,
'Ex'
:
5
}
value_map
[
"KitchenQual"
]
=
{
'Po'
:
1
,
'Fa'
:
2
,
'TA'
:
3
,
'Gd'
:
4
,
'Ex'
:
5
}
value_map
[
"Functional"
]
=
{
'Sal'
:
1
,
'Sev'
:
2
,
'Maj2'
:
3
,
'Maj1'
:
3
,
'Mod'
:
4
,
'Min2'
:
5
,
'Min1'
:
5
,
'Typ'
:
6
}
value_map
[
"FireplaceQu"
]
=
{
'NA'
:
1
,
'Po'
:
1
,
'Fa'
:
2
,
'TA'
:
3
,
'Gd'
:
4
,
'Ex'
:
5
}
value_map
[
"GarageType"
]
=
{
'CarPort'
:
1
,
'NA'
:
1
,
'Detchd'
:
2
,
'2Types'
:
3
,
'Basment'
:
3
,
'Attchd'
:
4
,
'BuiltIn'
:
5
}
value_map
[
"GarageFinish"
]
=
{
'NA'
:
1
,
'Unf'
:
2
,
'RFn'
:
3
,
'Fin'
:
4
}
value_map
[
"PavedDrive"
]
=
{
'N'
:
1
,
'P'
:
2
,
'Y'
:
3
}
value_map
[
"SaleType"
]
=
{
'COD'
:
1
,
'ConLD'
:
1
,
'ConLI'
:
1
,
'ConLw'
:
1
,
'Oth'
:
1
,
'WD'
:
1
,
'CWD'
:
2
,
'VWD'
:
2
,
'Con'
:
3
,
'New'
:
3
}
value_map
[
"SaleCondition"
]
=
{
'AdjLand'
:
1
,
'Abnorml'
:
2
,
'Alloca'
:
2
,
'Family'
:
2
,
'Normal'
:
3
,
'Partial'
:
4
}
def
fix_key
(
key
):
#csv is wrong here
if
key
==
'Wd Shng'
:
key
=
'WdShing'
if
key
==
'2fmCon'
:
key
=
'2FmCon'
if
key
==
'NAmes'
:
key
=
'Names'
if
key
==
'Duplex'
:
key
=
'Duplx'
if
key
==
'CmentBd'
:
key
=
'CemntBd'
if
key
==
'C (all)'
:
key
=
'C'
if
key
==
'Twnhs'
:
key
=
'TwnhsI'
if
key
==
'Brk Cmn'
or
key
==
'BrkComm'
:
key
=
'BrkCmn'
else
:
key
=
key
return
key
miss_0
=
[
"PoolQC"
,
"MiscFeature"
,
"Alley"
,
"Fence"
,
"FireplaceQu"
,
"GarageQual"
,
"GarageCond"
,
"GarageFinish"
,
"GarageYrBlt"
,
"GarageType"
,
"BsmtExposure"
,
"BsmtCond"
,
"BsmtQual"
,
"BsmtFinType2"
,
"BsmtFinType1"
,
"MasVnrType"
]
miss_1
=
[
"MasVnrArea"
,
"BsmtUnfSF"
,
"TotalBsmtSF"
,
"GarageCars"
,
"BsmtFinSF2"
,
"BsmtFinSF1"
,
"GarageArea"
]
miss_2
=
[
'LotFrontage'
]
def
fix_miss
(
name
):
if
name
in
miss_0
:
return
1
else
:
return
0
# def fix_LotFrontage(Full_map):
# a = np.zeros(25)
# for i in range(25):
# a[Full_map['Neighborhood'][i]-1] +=
def
add_future
(
features
):
features
[
"TotalHouse"
]
=
features
[
"TotalBsmtSF"
]
+
features
[
"1stFlrSF"
]
+
features
[
"2ndFlrSF"
]
features
[
"TotalArea"
]
=
features
[
"TotalBsmtSF"
]
+
features
[
"1stFlrSF"
]
+
features
[
"2ndFlrSF"
]
+
features
[
"GarageArea"
]
features
[
"TotalHouse_OverallQual"
]
=
features
[
"TotalHouse"
]
*
features
[
"OverallQual"
]
features
[
"GrLivArea_OverallQual"
]
=
features
[
"GrLivArea"
]
*
features
[
"OverallQual"
]
features
[
"my_MSZoning_TotalHouse"
]
=
features
[
"my_MSZoning"
]
*
features
[
"TotalHouse"
]
features
[
"my_MSZoning_OverallQual"
]
=
features
[
"my_MSZoning"
]
+
features
[
"OverallQual"
]
features
[
"my_MSZoning_YearBuilt"
]
=
features
[
"my_MSZoning"
]
+
features
[
"YearBuilt"
]
features
[
"my_Neighborhood_TotalHouse"
]
=
features
[
"my_Neighborhood"
]
*
features
[
"TotalHouse"
]
features
[
"my_Neighborhood_OverallQual"
]
=
features
[
"my_Neighborhood"
]
+
features
[
"OverallQual"
]
features
[
"my_Neighborhood_YearBuilt"
]
=
features
[
"my_Neighborhood"
]
+
features
[
"YearBuilt"
]
features
[
"BsmtFinSF1_OverallQual"
]
=
features
[
"BsmtFinSF1"
]
*
features
[
"OverallQual"
]
features
[
"my_Functional_TotalHouse"
]
=
features
[
"my_Functional"
]
*
features
[
"TotalHouse"
]
features
[
"my_Functional_OverallQual"
]
=
features
[
"my_Functional"
]
+
features
[
"OverallQual"
]
features
[
"LotArea_OverallQual"
]
=
features
[
"LotArea"
]
*
features
[
"OverallQual"
]
features
[
"TotalHouse_LotArea"
]
=
features
[
"TotalHouse"
]
+
features
[
"LotArea"
]
features
[
"my_Condition1_TotalHouse"
]
=
features
[
"my_Condition1"
]
*
features
[
"TotalHouse"
]
features
[
"my_Condition1_OverallQual"
]
=
features
[
"my_Condition1"
]
+
features
[
"OverallQual"
]
features
[
"Bsmt"
]
=
features
[
"BsmtFinSF1"
]
+
features
[
"BsmtFinSF2"
]
+
features
[
"BsmtUnfSF"
]
features
[
"Rooms"
]
=
features
[
"FullBath"
]
+
features
[
"TotRmsAbvGrd"
]
features
[
"PorchArea"
]
=
features
[
"OpenPorchSF"
]
+
features
[
"EnclosedPorch"
]
+
features
[
"3SsnPorch"
]
+
features
[
"ScreenPorch"
]
features
[
"TotalPlace"
]
=
features
[
"TotalBsmtSF"
]
+
features
[
"1stFlrSF"
]
+
features
[
"2ndFlrSF"
]
+
features
[
"GarageArea"
]
+
features
[
"OpenPorchSF"
]
+
features
[
"EnclosedPorch"
]
+
features
[
"3SsnPorch"
]
+
features
[
"ScreenPorch"
]
features
[
'YrBltAndRemod'
]
=
features
[
'YearBuilt'
]
+
features
[
'YearRemodAdd'
]
features
[
'TotalSF'
]
=
features
[
'TotalBsmtSF'
]
+
features
[
'1stFlrSF'
]
+
features
[
'2ndFlrSF'
]
features
[
'Total_sqr_footage'
]
=
(
features
[
'BsmtFinSF1'
]
+
features
[
'BsmtFinSF2'
]
+
features
[
'1stFlrSF'
]
+
features
[
'2ndFlrSF'
])
features
[
'Total_Bathrooms'
]
=
(
features
[
'FullBath'
]
+
(
0.5
*
features
[
'HalfBath'
])
+
features
[
'BsmtFullBath'
]
+
(
0.5
*
features
[
'BsmtHalfBath'
]))
features
[
'Total_porch_sf'
]
=
(
features
[
'OpenPorchSF'
]
+
features
[
'3SsnPorch'
]
+
features
[
'EnclosedPorch'
]
+
features
[
'ScreenPorch'
]
+
features
[
'WoodDeckSF'
])
# features['haspool'] = features['PoolArea'].apply(lambda x: 1 if x > 0 else 0)
# features['has2ndfloor'] = features['2ndFlrSF'].apply(lambda x: 1 if x > 0 else 0)
# features['hasgarage'] = features['GarageArea'].apply(lambda x: 1 if x > 0 else 0)
# features['hasbsmt'] = features['TotalBsmtSF'].apply(lambda x: 1 if x > 0 else 0)
# features['hasfireplace'] = features['Fireplaces'].apply(lambda x: 1 if x > 0 else 0)
return
features
\ No newline at end of file
evaluation.py
浏览文件 @
24336635
...
...
@@ -2,6 +2,7 @@ import csv
import
math
import
numpy
as
np
import
dataloader
import
transformer
def
load_submission
(
path
):
csv_data
=
[]
...
...
@@ -19,6 +20,8 @@ def eval_test(records_predict):
return
RMSE
(
records_real
,
records_predict
)
def
RMSE
(
records_real
,
records_predict
):
# records_real = np.log1p(records_real)
# records_predict = np.log1p(records_predict)
records_real
=
dataloader
.
normlize
(
np
.
array
(
records_real
),
True
)
records_predict
=
dataloader
.
normlize
(
np
.
array
(
records_predict
),
True
)
if
len
(
records_real
)
==
len
(
records_predict
):
...
...
@@ -28,7 +31,9 @@ def RMSE(records_real,records_predict):
return
None
def
main
():
my_price
=
load_submission
(
'./datasets/sample_submission.csv'
)
# my_price = load_submission('./datasets/sample_submission.csv')
my_price
=
load_submission
(
'./result/0.03688_0.14435.csv'
)
print
(
eval_test
(
my_price
))
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
ml.py
0 → 100644
浏览文件 @
24336635
from
sklearn.linear_model
import
ElasticNet
,
Lasso
,
BayesianRidge
,
LassoLarsIC
from
sklearn.ensemble
import
RandomForestRegressor
,
GradientBoostingRegressor
from
sklearn.kernel_ridge
import
KernelRidge
from
sklearn.pipeline
import
make_pipeline
from
sklearn.preprocessing
import
RobustScaler
from
sklearn.base
import
BaseEstimator
,
TransformerMixin
,
RegressorMixin
,
clone
from
sklearn.model_selection
import
KFold
,
cross_val_score
,
train_test_split
from
sklearn.metrics
import
mean_squared_error
from
sklearn.model_selection
import
GridSearchCV
# import xgboost as xgb
# import lightgbm as lgb
import
numpy
as
np
import
torch
import
dataloader
import
evaluation
import
time
import
transformer
dimension
=
85
train_desc
,
train_price
,
test_desc
=
dataloader
.
load_all
(
dimension
)
kr
=
GridSearchCV
(
KernelRidge
(
kernel
=
'polynomial'
),
param_grid
=
{
"alpha"
:
np
.
logspace
(
-
3
,
2
,
6
),
"gamma"
:
np
.
logspace
(
-
2
,
2
,
5
)})
# print(np.logspace(-2, 2, 5))
kr
.
fit
(
train_desc
,
train_price
)
y_kr
=
kr
.
predict
(
test_desc
)
for
i
in
range
(
len
(
y_kr
)):
y_kr
[
i
]
=
dataloader
.
convert2price
(
y_kr
[
i
])
# print(y_kr.shape)
print
(
dimension
,
evaluation
.
eval_test
(
y_kr
))
dataloader
.
write_csv
(
y_kr
,
'./result/result.csv'
)
\ No newline at end of file
model.py
浏览文件 @
24336635
...
...
@@ -19,4 +19,4 @@ class Linear(nn.Module):
x
=
self
.
relu
(
x
)
x
=
self
.
dropout
(
x
)
x
=
self
.
fc2
(
x
)
return
x
\ No newline at end of file
return
x
result/0.02839_0.12229.csv
0 → 100644
浏览文件 @
24336635
此差异已折叠。
点击以展开。
train.py
浏览文件 @
24336635
...
...
@@ -5,7 +5,7 @@ import model
import
evaluation
from
torch
import
nn
,
optim
import
time
import
transformer
#parameter
LR
=
0.0001
...
...
@@ -14,14 +14,14 @@ BATCHSIZE = 64
CONTINUE
=
False
use_gpu
=
True
SAVE_FRE
=
5
Dimension
=
120
#load data
train_desc
,
train_price
,
test_desc
=
dataloader
.
load_all
()
train_desc
,
train_price
,
test_desc
=
dataloader
.
load_all
(
Dimension
)
train_desc
.
tolist
()
train_price
.
tolist
()
#def network
net
=
model
.
Linear
(
79
,
256
,
1
)
net
=
model
.
Linear
(
Dimension
,
256
,
1
)
print
(
net
)
if
CONTINUE
:
...
...
@@ -43,9 +43,13 @@ for epoch in range(EPOCHS):
price_pres
=
[]
price_trues
=
[]
dataloader
.
match_random
(
train_desc
,
train_price
)
transformer
.
match_random
(
train_desc
,
train_price
)
train_desc
=
np
.
array
(
train_desc
)
train_price
=
np
.
array
(
train_price
)
# train_desc = transformer.random_transform(train_desc, 0.02)
# train_price = transformer.random_transform(train_price, 0.02)
for
i
in
range
(
int
(
len
(
train_desc
)
/
BATCHSIZE
)):
desc
=
np
.
zeros
((
BATCHSIZE
,
79
),
dtype
=
np
.
float32
)
desc
=
np
.
zeros
((
BATCHSIZE
,
Dimension
),
dtype
=
np
.
float32
)
price
=
np
.
zeros
((
BATCHSIZE
,
1
),
dtype
=
np
.
float32
)
for
j
in
range
(
BATCHSIZE
):
desc
[
j
]
=
train_desc
[
i
*
BATCHSIZE
+
j
:
i
*
BATCHSIZE
+
j
+
1
]
...
...
@@ -69,7 +73,7 @@ for epoch in range(EPOCHS):
net
.
eval
()
price_pres
=
[]
for
i
in
range
(
len
(
test_desc
)):
desc
=
(
test_desc
[
i
]).
reshape
(
1
,
79
)
desc
=
(
test_desc
[
i
]).
reshape
(
1
,
Dimension
)
desc
=
torch
.
from_numpy
(
desc
).
cuda
()
price_pre
=
net
(
desc
)
price_pres
.
append
(
dataloader
.
convert2price
(
price_pre
.
cpu
().
detach
().
numpy
()[
0
][
0
]))
...
...
transformer.py
0 → 100644
浏览文件 @
24336635
import
numpy
as
np
import
random
def
match_random
(
a
,
b
):
state
=
np
.
random
.
get_state
()
np
.
random
.
shuffle
(
a
)
np
.
random
.
set_state
(
state
)
np
.
random
.
shuffle
(
b
)
def
random_transform
(
a
,
alpha
):
return
a
*
random
.
uniform
(
1
-
alpha
,
1
+
alpha
)
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录