未验证 提交 39b8adbc 编写于 作者: P PyCaret 提交者: GitHub

Add files via upload

上级 eaf9a7fc
......@@ -2,10 +2,10 @@
PyCaret is end-to-end open source machine learning library for python programming language. Its primary objective is to reduce the cycle time of hypothesis to insights by providing an easy to use high level unified API. PyCaret's vision is to become defacto standard for teaching machine learning and data science. Our strength is in our easy to use unified interface for both supervised and unsupervised learning. It saves time and effort that citizen data scientists, students and researchers spent on coding or learning to code using different interfaces, so that now they can focus on business problem.
## Current Release
The current release is beta 0.0.33 (as of 04/02/2020). A full release is targetted in the first week of February 2020.
The current release is beta 0.0.34 (as of 05/02/2020). A full release is targetted in the first week of February 2020.
## Features Currently Available
As per beta 0.0.33 following modules are generally available:
As per beta 0.0.34 following modules are generally available:
* pycaret.datasets <br/>
* pycaret.classification (binary and multiclass) <br/>
* pycaret.regression <br/>
......@@ -31,7 +31,7 @@ pip install pycaret
```
## Quick Start
As of beta 0.0.33 classification, regression, nlp, arules, anomaly and clustering modules are available.
As of beta 0.0.34 classification, regression, nlp, arules, anomaly and clustering modules are available.
### Classification / Regression
......
......@@ -3,12 +3,13 @@
# License: MIT
def setup(data,
categorical_features = None,
categorical_imputation = 'constant',
ordinal_features = None, #new
numeric_features = None,
numeric_imputation = 'mean',
numeric_imputation = 'mean',m
date_features = None,
ignore_features = None,
normalize = False,
......@@ -194,8 +195,8 @@ def setup(data,
remove_multicollinearity: bool, default = False
When set to True, the variables with inter-correlations higher than the threshold
defined under the multicollinearity_threshold param are dropped. When two features
are highly correlated with each other, the feature with less average correlation in
the feature space is dropped.
are highly correlated with each other, the feature with higher average correlation
in the feature space is dropped.
multicollinearity_threshold: float, default = 0.9
Threshold used for dropping the correlated features. Only comes into effect when
......@@ -434,6 +435,10 @@ def setup(data,
from IPython.display import display, HTML, clear_output, update_display
import datetime, time
#pandas option
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
#progress bar
max_steps = 4
......@@ -753,8 +758,8 @@ def setup(data,
"""
#reset pandas option
#pd.reset_option("display.max_rows")
#pd.reset_option("display.max_columns")
pd.reset_option("display.max_rows")
pd.reset_option("display.max_columns")
#create an empty list for pickling later.
if supervised is False:
......
......@@ -613,6 +613,10 @@ def setup(data,
import ipywidgets as ipw
from IPython.display import display, HTML, clear_output, update_display
import datetime, time
#pandas option
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
#progress bar
if sampling:
......@@ -1006,8 +1010,8 @@ def setup(data,
"""
#reset pandas option
#pd.reset_option("display.max_rows")
#pd.reset_option("display.max_columns")
pd.reset_option("display.max_rows")
pd.reset_option("display.max_columns")
#create an empty list for pickling later.
experiment__ = []
......@@ -1453,7 +1457,6 @@ def setup(data,
def create_model(estimator = None,
ensemble = False,
method = None,
......@@ -5069,8 +5072,8 @@ def stack_models(estimator_list,
meta_model = None,
fold = 10,
round = 4,
method = 'hard',
restack = False,
method = 'soft',
restack = True,
plot = False,
finalize = False,
verbose = True):
......@@ -5120,11 +5123,11 @@ def stack_models(estimator_list,
round: integer, default = 4
Number of decimal places the metrics in the score grid will be rounded to.
method: string, default = 'hard'
'hard', uses predicted class labels as an input to the meta model.
method: string, default = 'soft'
'soft', uses predicted probabilities as an input to the meta model.
'hard', uses predicted class labels as an input to the meta model.
restack: Boolean, default = False
restack: Boolean, default = True
When restack is set to True, raw data will be exposed to meta model when
making predictions, otherwise when False, only the predicted label or
probabilities is passed to meta model when making final predictions.
......@@ -5236,6 +5239,7 @@ def stack_models(estimator_list,
from IPython.display import display, HTML, clear_output, update_display
import time, datetime
from copy import deepcopy
from sklearn.base import clone
#copy estimator_list
estimator_list = deepcopy(estimator_list)
......@@ -5606,8 +5610,8 @@ def create_stacknet(estimator_list,
meta_model = None,
fold = 10,
round = 4,
method = 'hard',
restack = False,
method = 'soft',
restack = True,
finalize = False,
verbose = True):
......@@ -5654,11 +5658,11 @@ def create_stacknet(estimator_list,
round: integer, default = 4
Number of decimal places the metrics in the score grid will be rounded to.
method: string, default = 'hard'
'hard', uses predicted class labels as an input to the meta model.
method: string, default = 'soft'
'soft', uses predicted probabilities as an input to the meta model.
'hard', uses predicted class labels as an input to the meta model.
restack: Boolean, default = False
restack: Boolean, default = True
When restack is set to True, raw data and prediction of all layers will be
exposed to the meta model when making predictions. When set to False, only
the predicted label or probabilities of last layer is passed to meta model
......@@ -5709,7 +5713,7 @@ def create_stacknet(estimator_list,
'''
#testing
#no active test
#global inter_level_names
#exception checking
import sys
......@@ -5717,13 +5721,17 @@ def create_stacknet(estimator_list,
#checking estimator_list
if type(estimator_list[0]) is not list:
sys.exit("(Type Error): estimator_list parameter must be list of list. ")
#blocking stack_models usecase
if len(estimator_list) == 1:
sys.exit("(Type Error): Single Layer stacking must be performed using stack_models(). ")
#checking error for estimator_list
for i in estimator_list:
for j in i:
if 'sklearn' not in str(type(j)) and 'CatBoostClassifier' not in str(type(j)):
sys.exit("(Value Error): estimator_list parameter only trained model object")
#checking meta model
if meta_model is not None:
if 'sklearn' not in str(type(meta_model)) and 'CatBoostClassifier' not in str(type(meta_model)):
......@@ -5767,6 +5775,7 @@ def create_stacknet(estimator_list,
from IPython.display import display, HTML, clear_output, update_display
import time, datetime
from copy import deepcopy
from sklearn.base import clone
#copy estimator_list
estimator_list = deepcopy(estimator_list)
......@@ -5845,8 +5854,13 @@ def create_stacknet(estimator_list,
#defining inter_level names
for item in inter_level:
level_list=[]
for m in item:
inter_level_names = np.append(inter_level_names, str(m).split("(")[0])
if 'CatBoostClassifier' in str(m).split("(")[0]:
level_list.append('CatBoostClassifier')
else:
level_list.append(str(m).split("(")[0])
inter_level_names.append(level_list)
#defining data_X and data_y
if finalize:
......@@ -5931,15 +5945,15 @@ def create_stacknet(estimator_list,
MONITOR UPDATE STARTS
'''
monitor.iloc[1,1:] = 'Evaluating ' + inter_level_names[inter_counter]
monitor.iloc[1,1:] = 'Evaluating ' + inter_level_names[inter_counter][model_counter]
update_display(monitor, display_id = 'monitor')
'''
MONITOR UPDATE ENDS
'''
model = model.fit(X = base_array_df, y = data_y) #changed to data_y
inter_inner.append(model)
model = clone(model)
inter_inner.append(model.fit(X = base_array_df, y = data_y)) #changed to data_y
if method == 'soft':
try:
......@@ -5983,6 +5997,7 @@ def create_stacknet(estimator_list,
base_array_df = base_array_df.iloc[:,i:]
inter_counter += 1
progress.value += 1
model = meta_model
......@@ -6180,7 +6195,7 @@ def create_stacknet(estimator_list,
else:
clear_output()
return models_
return models_
......@@ -7295,7 +7310,7 @@ def predict_model(estimator,
"""
#testing
#no active tests
#global base_pred_df, base_pred_df_no_restack, df, df_restack, stacker_method, combined_df, inter_pred_df
#ignore warnings
import warnings
......@@ -7437,7 +7452,7 @@ def predict_model(estimator,
"""
base_pred = []
for i in stacker_base:
if stacker_method == 'soft':
if 'soft' in stacker_method:
try:
a = i.predict_proba(Xtest) #change
a = a[:,1]
......@@ -7456,7 +7471,7 @@ def predict_model(estimator,
base_pred_df_no_restack = base_pred_df.copy()
base_pred_df = pd.concat([Xtest,base_pred_df], axis=1)
"""
inter level predictions
......@@ -7468,15 +7483,16 @@ def predict_model(estimator,
inter_counter = 0
for level in stacker:
inter_pred_df = pd.DataFrame()
model_counter = 0
for model in level:
try:
if inter_counter == 0:
if stacker_method == 'soft':
if 'soft' in stacker_method: #changed
try:
p = model.predict_proba(base_pred_df)
p = p[:,1]
......@@ -7495,7 +7511,7 @@ def predict_model(estimator,
except:
p = model.predict(base_pred_df_no_restack)
else:
if stacker_method == 'soft':
if 'soft' in stacker_method:
try:
p = model.predict_proba(last_level_df)
p = p[:,1]
......@@ -7504,14 +7520,15 @@ def predict_model(estimator,
else:
p = model.predict(last_level_df)
except:
if stacker_method == 'soft':
if 'soft' in stacker_method:
try:
p = model.predict_proba(combined_df)
p = p[:,1]
except:
p = model.predict(combined_df)
p = model.predict(combined_df)
p = pd.DataFrame(p)
col = str(model).split("(")[0]
if 'CatBoostClassifier' in col:
col = 'CatBoostClassifier'
......@@ -7533,6 +7550,7 @@ def predict_model(estimator,
"""
#final meta predictions
try:
pred_ = stacker_meta.predict(combined_df)
except:
......@@ -7644,6 +7662,7 @@ def predict_model(estimator,
p = i.predict(Xtest) #change
else:
try:
p = i.predict_proba(Xtest) #change
p = p[:,1]
......@@ -7667,7 +7686,7 @@ def predict_model(estimator,
df.fillna(value=0,inplace=True)
df_restack.fillna(value=0,inplace=True)
#restacking check
try:
pred_ = meta_model.predict(df)
......@@ -7684,7 +7703,7 @@ def predict_model(estimator,
pred_prob = pred_prob[:,1]
except:
pass
if data is None:
sca = metrics.accuracy_score(ytest,pred_)
......@@ -7815,6 +7834,7 @@ def predict_model(estimator,
return X_test_
def deploy_model(model,
model_name,
authentication,
......
......@@ -195,8 +195,8 @@ def setup(data,
remove_multicollinearity: bool, default = False
When set to True, the variables with inter-correlations higher than the threshold
defined under the multicollinearity_threshold param are dropped. When two features
are highly correlated with each other, the feature with less average correlation in
the feature space is dropped.
are highly correlated with each other, the feature with higher average correlation
in the feature space is dropped.
multicollinearity_threshold: float, default = 0.9
Threshold used for dropping the correlated features. Only comes into effect when
......@@ -435,6 +435,10 @@ def setup(data,
from IPython.display import display, HTML, clear_output, update_display
import datetime, time
#pandas option
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
#progress bar
max_steps = 4
......@@ -754,8 +758,8 @@ def setup(data,
"""
#reset pandas option
#pd.reset_option("display.max_rows")
#pd.reset_option("display.max_columns")
pd.reset_option("display.max_rows")
pd.reset_option("display.max_columns")
#create an empty list for pickling later.
if supervised is False:
......
......@@ -635,7 +635,11 @@ def setup(data,
import ipywidgets as ipw
from IPython.display import display, HTML, clear_output, update_display
import datetime, time
#pandas option
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
#progress bar
if sampling:
max = 10 + 3
......@@ -1045,8 +1049,8 @@ def setup(data,
"""
#reset pandas option
#pd.reset_option("display.max_rows") #switch back on
#pd.reset_option("display.max_columns")
pd.reset_option("display.max_rows") #switch back on
pd.reset_option("display.max_columns")
#create an empty list for pickling later.
experiment__ = []
......@@ -4555,12 +4559,11 @@ def tune_model(estimator = None,
def stack_models(estimator_list,
meta_model = None,
fold = 10,
round = 4,
restack = False,
restack = True,
plot = False,
finalize = False,
verbose = True):
......@@ -4611,7 +4614,7 @@ def stack_models(estimator_list,
round: integer, default = 4
Number of decimal places the metrics in the score grid will be rounded to.
restack: Boolean, default = False
restack: Boolean, default = True
When restack is set to True, raw data will be exposed to meta model when
making predictions, otherwise when False, only the predicted label is passed
to meta model when making final predictions.
......@@ -5049,12 +5052,11 @@ def stack_models(estimator_list,
def create_stacknet(estimator_list,
meta_model = None,
fold = 10,
round = 4,
restack = False,
restack = True,
finalize = False,
verbose = True):
......@@ -5100,7 +5102,7 @@ def create_stacknet(estimator_list,
round: integer, default = 4
Number of decimal places the metrics in the score grid will be rounded to.
restack: Boolean, default = False
restack: Boolean, default = True
When restack is set to True, raw data and prediction of all layers will be
exposed to the meta model when making predictions. When set to False, only
the predicted label of last layer is passed to meta model when making final
......@@ -5143,7 +5145,7 @@ def create_stacknet(estimator_list,
'''
#for checking only
#No active test
global inter_level_names
#exception checking
import sys
......@@ -5151,7 +5153,11 @@ def create_stacknet(estimator_list,
#checking estimator_list
if type(estimator_list[0]) is not list:
sys.exit("(Type Error): estimator_list parameter must be list of list. ")
#blocking stack_models usecase
if len(estimator_list) == 1:
sys.exit("(Type Error): Single Layer stacking must be performed using stack_models(). ")
#checking error for estimator_list
for i in estimator_list:
for j in i:
......@@ -5185,12 +5191,15 @@ def create_stacknet(estimator_list,
'''
global inter_level_names
#pre-load libraries
import pandas as pd
import ipywidgets as ipw
from IPython.display import display, HTML, clear_output, update_display
import time, datetime
from copy import deepcopy
from sklearn.base import clone
#copy estimator_list
estimator_list = deepcopy(estimator_list)
......@@ -5238,7 +5247,6 @@ def create_stacknet(estimator_list,
progress.value += 1
base_level = estimator_list[0]
base_level_names = []
......@@ -5273,13 +5281,14 @@ def create_stacknet(estimator_list,
#defining inter_level names
for item in inter_level:
level_list=[]
for m in item:
inter_level_names = np.append(inter_level_names, str(m).split("(")[0])
if 'CatBoostRegressor' in str(m).split("(")[0]:
level_list.append('CatBoostRegressor')
else:
level_list.append(str(m).split("(")[0])
inter_level_names.append(level_list)
#defining inter_level names
for item in inter_level:
for m in item:
inter_level_names = np.append(inter_level_names, str(m).split("(")[0])
#defining data_X and data_y
if finalize:
......@@ -5350,15 +5359,17 @@ def create_stacknet(estimator_list,
MONITOR UPDATE STARTS
'''
monitor.iloc[1,1:] = 'Evaluating ' + inter_level_names[inter_counter]
monitor.iloc[1,1:] = 'Evaluating ' + inter_level_names[inter_counter][model_counter]
update_display(monitor, display_id = 'monitor')
'''
MONITOR UPDATE ENDS
'''
model = model.fit(X = base_array_df, y = data_y) #changed to data_y
inter_inner.append(model)
model = clone(model)
inter_inner.append(model.fit(X = base_array_df, y = data_y)) #changed to data_y
#model = model.fit(X = base_array_df, y = data_y) #changed to data_y
#inter_inner.append(model)
base_array = cross_val_predict(model,X = base_array_df, y = data_y,cv=fold, method='predict')
base_array = pd.DataFrame(base_array)
......@@ -5391,6 +5402,7 @@ def create_stacknet(estimator_list,
base_array_df = base_array_df.iloc[:,i:]
inter_counter += 1
progress.value += 1
model = meta_model
......@@ -5587,6 +5599,7 @@ def create_stacknet(estimator_list,
def plot_model(estimator,
plot = 'residuals'):
......@@ -6467,6 +6480,7 @@ def load_experiment(experiment_name):
def predict_model(estimator,
data=None,
platform=None,
......@@ -6992,6 +7006,7 @@ def predict_model(estimator,
def deploy_model(model,
model_name,
authentication,
......
......@@ -28,5 +28,4 @@ datefinder==0.7.0
datetime
DateTime==4.3
tqdm==4.36.1
awscli
boto3
\ No newline at end of file
awscli
\ No newline at end of file
......@@ -27,7 +27,7 @@ def readme():
setup(
name="pycaret",
version="0.0.33",
version="0.0.34",
description="A Python package for supervised and unsupervised machine learning.",
long_description=readme(),
long_description_content_type="text/markdown",
......@@ -47,5 +47,5 @@ setup(
"wordcloud", "textblob", "plotly==4.4.1", "cufflinks==0.17.0", "umap-learn",
"lightgbm==2.3.1", "pyLDAvis", "gensim", "spacy", "nltk", "mlxtend",
"pyod", "catboost==0.20.2", "pandas-profiling==2.3.0", "kmodes==0.10.1",
"datefinder==0.7.0", "datetime", "DateTime==4.3", "tqdm==4.36.1", "awscli", "boto3"]
"datefinder==0.7.0", "datetime", "DateTime==4.3", "tqdm==4.36.1", "awscli"]
)
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册