Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenDocCN
pycaret
提交
bb8db3a1
pycaret
项目概览
OpenDocCN
/
pycaret
通知
2
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
pycaret
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
bb8db3a1
编写于
12月 31, 2019
作者:
P
pycaret
提交者:
GitHub
12月 31, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add files via upload
上级
b45a03b9
变更
2
展开全部
隐藏空白更改
内联
并排
Showing
2 changed file
with
1920 addition
and
29 deletion
+1920
-29
anomaly.py
anomaly.py
+152
-29
clustering.py
clustering.py
+1768
-0
未找到文件。
anomaly.py
浏览文件 @
bb8db3a1
def
setup
(
data
,
session_id
=
None
,
verbose
=
True
):
def
setup
(
data
,
session_id
=
None
,
normalize
=
False
,
verbose
=
True
):
"""
...
...
@@ -26,7 +29,9 @@ def setup(data, session_id = None, verbose=True):
is then distributed as a seed in all other functions used during experiment. This can
be used later for reproducibility of entire experiment.
normalize: bool, default = False
scaling of feature set using MinMaxScaler. by default normalize is set to False.
Returns:
--------
...
...
@@ -107,7 +112,7 @@ def setup(data, session_id = None, verbose=True):
import
random
#defining global variables
global
data_
,
experiment__
,
seed
global
X
,
data_
,
experiment__
,
seed
#copying data
data_
=
data
.
copy
()
...
...
@@ -128,14 +133,39 @@ def setup(data, session_id = None, verbose=True):
progress
.
value
+=
1
#scaling
if
normalize
:
from
sklearn.preprocessing
import
MinMaxScaler
scaler
=
MinMaxScaler
()
X
=
pd
.
get_dummies
(
data_
)
scaler
=
scaler
.
fit
(
X
)
#append to experiment__
experiment__
.
append
((
'Scaler'
,
scaler
))
X
=
scaler
.
transform
(
X
)
X
=
pd
.
DataFrame
(
X
)
else
:
X
=
data_
.
copy
()
X
=
pd
.
get_dummies
(
data_
)
'''
Final display Starts
'''
shape
=
data
.
shape
#normalize param
if
normalize
:
scaling
=
'True'
else
:
scaling
=
'False'
functions
=
pd
.
DataFrame
(
[
[
'session_id'
,
seed
],
[
'Scaling'
,
scaling
],
[
'Shape'
,
shape
],
],
columns
=
[
'Description'
,
'Value'
]
)
...
...
@@ -150,16 +180,16 @@ def setup(data, session_id = None, verbose=True):
'''
#log into experiment
if
len
(
experiment__
)
==
0
:
experiment__
.
append
((
'Info'
,
functions
))
if
verbose
:
experiment__
.
append
((
'
Anomaly
Info'
,
functions
))
experiment__
.
append
((
'Dataset'
,
data_
))
experiment__
.
append
((
'Scaled Dataset'
,
X
))
return
data_
,
seed
,
experiment__
return
X
,
data_
,
seed
,
experiment__
def
create_model
(
model
=
None
,
fraction
=
0.05
,
verbose
=
True
):
verbose
=
True
):
...
...
@@ -211,7 +241,7 @@ def create_model(model = None,
Returns:
--------
model:
trained model object
model: trained model object
------
Warnings:
...
...
@@ -285,17 +315,10 @@ def create_model(model = None,
"""
#monitor update
monitor
.
iloc
[
1
,
1
:]
=
'
Scaling the Data
'
monitor
.
iloc
[
1
,
1
:]
=
'
Importing the Model
'
if
verbose
:
update_display
(
monitor
,
display_id
=
'monitor'
)
#scaling data
from
sklearn.preprocessing
import
MinMaxScaler
scaler
=
MinMaxScaler
()
X
=
pd
.
get_dummies
(
data_
)
X
=
scaler
.
fit_transform
(
X
)
X
=
pd
.
DataFrame
(
X
)
progress
.
value
+=
1
#create model
...
...
@@ -446,7 +469,7 @@ def assign_model(model,
#checking for allowed models
allowed_type
=
[
'pyod'
]
if
'pyod'
not
in
allowe
d_type
:
if
'pyod'
not
in
mo
d_type
:
sys
.
exit
(
'(Value Error): Model Not Recognized. Please see docstring for list of available models.'
)
#checking verbose parameter
...
...
@@ -670,7 +693,7 @@ def tune_model(model=None,
"""
#testing
#no active test
global
master
,
master_df
#ignore warnings
import
warnings
...
...
@@ -820,6 +843,7 @@ def tune_model(model=None,
progress
.
value
+=
1
#defining tuning grid
param_grid_with_zero
=
[
0
,
0.01
,
0.02
,
0.03
,
0.04
,
0.05
,
0.06
,
0.07
,
0.08
,
0.09
,
0.10
]
param_grid
=
[
0.01
,
0.02
,
0.03
,
0.04
,
0.05
,
0.06
,
0.07
,
0.08
,
0.09
,
0.10
]
master
=
[];
master_df
=
[]
...
...
@@ -827,6 +851,10 @@ def tune_model(model=None,
monitor
.
iloc
[
1
,
1
:]
=
'Creating Outlier Detection Model'
update_display
(
monitor
,
display_id
=
'monitor'
)
#adding dummy model in master
master
.
append
(
'No Model Required'
)
master_df
.
append
(
'No Model Required'
)
#removing target variable from data by defining new setup
target_
=
pd
.
DataFrame
(
data_
[
supervised_target
])
data_without_target
=
data_
.
copy
()
...
...
@@ -975,9 +1003,57 @@ def tune_model(model=None,
acc
=
[];
auc
=
[];
recall
=
[];
prec
=
[];
kappa
=
[];
f1
=
[]
for
i
in
range
(
0
,
len
(
master_df
)):
#build model without anomaly detection
monitor
.
iloc
[
2
,
1
:]
=
'Evaluating Classifier Without Anomaly Detector'
update_display
(
monitor
,
display_id
=
'monitor'
)
d
=
master_df
[
1
].
copy
()
d
.
drop
([
'Label'
,
'Score'
],
axis
=
1
,
inplace
=
True
)
#drop NA's caution
d
.
dropna
(
axis
=
0
,
inplace
=
True
)
#get_dummies to caste categorical variables for supervised learning
d
=
pd
.
get_dummies
(
d
)
#split the dataset
X
=
d
.
drop
(
supervised_target
,
axis
=
1
)
y
=
d
[
supervised_target
]
#fit the model
model
.
fit
(
X
,
y
)
#generate the prediction and evaluate metric
pred
=
cross_val_predict
(
model
,
X
,
y
,
cv
=
fold
,
method
=
'predict'
)
acc_
=
metrics
.
accuracy_score
(
y
,
pred
)
acc
.
append
(
acc_
)
recall_
=
metrics
.
recall_score
(
y
,
pred
)
recall
.
append
(
recall_
)
precision_
=
metrics
.
precision_score
(
y
,
pred
)
prec
.
append
(
precision_
)
kappa_
=
metrics
.
cohen_kappa_score
(
y
,
pred
)
kappa
.
append
(
kappa_
)
f1_
=
metrics
.
f1_score
(
y
,
pred
)
f1
.
append
(
f1_
)
if
hasattr
(
model
,
'predict_proba'
):
pred_
=
cross_val_predict
(
model
,
X
,
y
,
cv
=
fold
,
method
=
'predict_proba'
)
pred_prob
=
pred_
[:,
1
]
auc_
=
metrics
.
roc_auc_score
(
y
,
pred_prob
)
auc
.
append
(
auc_
)
else
:
auc
.
append
(
0
)
for
i
in
range
(
1
,
len
(
master_df
)):
progress
.
value
+=
1
param_grid_val
=
param_grid
[
i
]
param_grid_val
=
param_grid
[
i
-
1
]
monitor
.
iloc
[
2
,
1
:]
=
'Evaluating Classifier With '
+
str
(
param_grid_val
)
+
' Fraction'
update_display
(
monitor
,
display_id
=
'monitor'
)
...
...
@@ -1036,7 +1112,7 @@ def tune_model(model=None,
monitor
.
iloc
[
1
,
1
:]
=
'Finalizing'
update_display
(
monitor
,
display_id
=
'monitor'
)
df
=
pd
.
DataFrame
({
'Fraction'
:
param_grid
,
'Accuracy'
:
acc
,
'AUC'
:
auc
,
'Recall'
:
recall
,
df
=
pd
.
DataFrame
({
'Fraction'
:
param_grid
_with_zero
,
'Accuracy'
:
acc
,
'AUC'
:
auc
,
'Recall'
:
recall
,
'Precision'
:
prec
,
'F1'
:
f1
,
'Kappa'
:
kappa
})
sorted_df
=
df
.
sort_values
(
by
=
optimize
,
ascending
=
False
)
...
...
@@ -1226,9 +1302,56 @@ def tune_model(model=None,
score
=
[]
metric
=
[]
for
i
in
range
(
0
,
len
(
master_df
)):
#build model without clustering
monitor
.
iloc
[
2
,
1
:]
=
'Evaluating Regressor Without Clustering'
update_display
(
monitor
,
display_id
=
'monitor'
)
d
=
master_df
[
1
].
copy
()
d
.
drop
([
'Label'
,
'Score'
],
axis
=
1
,
inplace
=
True
)
#drop NA's caution
d
.
dropna
(
axis
=
0
,
inplace
=
True
)
#get_dummies to caste categorical variables for supervised learning
d
=
pd
.
get_dummies
(
d
)
#split the dataset
X
=
d
.
drop
(
supervised_target
,
axis
=
1
)
y
=
d
[
supervised_target
]
#fit the model
model
.
fit
(
X
,
y
)
#generate the prediction and evaluate metric
pred
=
cross_val_predict
(
model
,
X
,
y
,
cv
=
fold
,
method
=
'predict'
)
if
optimize
==
'R2'
:
r2_
=
metrics
.
r2_score
(
y
,
pred
)
score
.
append
(
r2_
)
elif
optimize
==
'MAE'
:
mae_
=
metrics
.
mean_absolute_error
(
y
,
pred
)
score
.
append
(
mae_
)
elif
optimize
==
'MSE'
:
mse_
=
metrics
.
mean_squared_error
(
y
,
pred
)
score
.
append
(
mse_
)
elif
optimize
==
'RMSE'
:
mse_
=
metrics
.
mean_squared_error
(
y
,
pred
)
rmse_
=
np
.
sqrt
(
mse_
)
score
.
append
(
rmse_
)
elif
optimize
==
'ME'
:
max_error_
=
metrics
.
max_error
(
y
,
pred
)
score
.
append
(
max_error_
)
metric
.
append
(
str
(
optimize
))
for
i
in
range
(
1
,
len
(
master_df
)):
progress
.
value
+=
1
param_grid_val
=
param_grid
[
i
]
param_grid_val
=
param_grid
[
i
-
1
]
monitor
.
iloc
[
2
,
1
:]
=
'Evaluating Regressor With '
+
str
(
param_grid_val
)
+
' Fraction'
update_display
(
monitor
,
display_id
=
'monitor'
)
...
...
@@ -1248,8 +1371,6 @@ def tune_model(model=None,
d
=
pd
.
get_dummies
(
d
)
#clean.append(d) #for testing only
#split the dataset
X
=
d
.
drop
(
supervised_target
,
axis
=
1
)
y
=
d
[
supervised_target
]
...
...
@@ -1287,7 +1408,7 @@ def tune_model(model=None,
monitor
.
iloc
[
1
,
1
:]
=
'Finalizing'
update_display
(
monitor
,
display_id
=
'monitor'
)
df
=
pd
.
DataFrame
({
'Fraction'
:
param_grid
,
'Score'
:
score
,
'Metric'
:
metric
})
df
=
pd
.
DataFrame
({
'Fraction'
:
param_grid
_with_zero
,
'Score'
:
score
,
'Metric'
:
metric
})
df
.
columns
=
[
'Fraction'
,
optimize
,
'Metric'
]
#sorting to return best model
...
...
@@ -1409,6 +1530,8 @@ def plot_model(model,
Label
=
pd
.
DataFrame
(
b
[
'Label'
])
b
.
dropna
(
axis
=
0
,
inplace
=
True
)
#droping rows with NA's
b
.
drop
([
'Label'
],
axis
=
1
,
inplace
=
True
)
b
=
pd
.
get_dummies
(
b
)
#casting categorical variables
from
sklearn.manifold
import
TSNE
X_embedded
=
TSNE
(
n_components
=
3
).
fit_transform
(
b
)
...
...
@@ -1428,6 +1551,7 @@ def plot_model(model,
Label
=
pd
.
DataFrame
(
b
[
'Label'
])
b
.
dropna
(
axis
=
0
,
inplace
=
True
)
#droping rows with NA's
b
.
drop
([
'Label'
],
axis
=
1
,
inplace
=
True
)
b
=
pd
.
get_dummies
(
b
)
#casting categorical variables
import
umap
reducer
=
umap
.
UMAP
()
...
...
@@ -1637,4 +1761,3 @@ def load_experiment(experiment_name):
display
(
ind
)
return
exp
clustering.py
0 → 100644
浏览文件 @
bb8db3a1
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录