未验证 提交 bb8db3a1 编写于 作者: P pycaret 提交者: GitHub

Add files via upload

上级 b45a03b9
def setup(data, session_id = None, verbose=True):
def setup(data,
session_id = None,
normalize = False,
verbose=True):
"""
......@@ -26,7 +29,9 @@ def setup(data, session_id = None, verbose=True):
is then distributed as a seed in all other functions used during experiment. This can
be used later for reproducibility of entire experiment.
normalize: bool, default = False
scaling of feature set using MinMaxScaler. by default normalize is set to False.
Returns:
--------
......@@ -107,7 +112,7 @@ def setup(data, session_id = None, verbose=True):
import random
#defining global variables
global data_, experiment__, seed
global X, data_, experiment__, seed
#copying data
data_ = data.copy()
......@@ -128,14 +133,39 @@ def setup(data, session_id = None, verbose=True):
progress.value += 1
#scaling
if normalize:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X = pd.get_dummies(data_)
scaler = scaler.fit(X)
#append to experiment__
experiment__.append(('Scaler',scaler))
X = scaler.transform(X)
X = pd.DataFrame(X)
else:
X = data_.copy()
X = pd.get_dummies(data_)
'''
Final display Starts
'''
shape = data.shape
#normalize param
if normalize:
scaling = 'True'
else:
scaling = 'False'
functions = pd.DataFrame ( [ ['session_id', seed ],
['Scaling', scaling],
['Shape', shape ],
], columns = ['Description', 'Value'] )
......@@ -150,16 +180,16 @@ def setup(data, session_id = None, verbose=True):
'''
#log into experiment
if len(experiment__) == 0:
experiment__.append(('Info', functions))
if verbose:
experiment__.append(('Anomaly Info', functions))
experiment__.append(('Dataset', data_))
experiment__.append(('Scaled Dataset', X))
return data_, seed, experiment__
return X, data_, seed, experiment__
def create_model(model = None,
fraction = 0.05,
verbose=True):
verbose = True):
......@@ -211,7 +241,7 @@ def create_model(model = None,
Returns:
--------
model: trained model object
model: trained model object
------
Warnings:
......@@ -285,17 +315,10 @@ def create_model(model = None,
"""
#monitor update
monitor.iloc[1,1:] = 'Scaling the Data'
monitor.iloc[1,1:] = 'Importing the Model'
if verbose:
update_display(monitor, display_id = 'monitor')
#scaling data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X = pd.get_dummies(data_)
X = scaler.fit_transform(X)
X = pd.DataFrame(X)
progress.value += 1
#create model
......@@ -446,7 +469,7 @@ def assign_model(model,
#checking for allowed models
allowed_type = ['pyod']
if 'pyod' not in allowed_type:
if 'pyod' not in mod_type:
sys.exit('(Value Error): Model Not Recognized. Please see docstring for list of available models.')
#checking verbose parameter
......@@ -670,7 +693,7 @@ def tune_model(model=None,
"""
#testing
#no active test
global master, master_df
#ignore warnings
import warnings
......@@ -820,6 +843,7 @@ def tune_model(model=None,
progress.value += 1
#defining tuning grid
param_grid_with_zero = [0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]
param_grid = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]
master = []; master_df = []
......@@ -827,6 +851,10 @@ def tune_model(model=None,
monitor.iloc[1,1:] = 'Creating Outlier Detection Model'
update_display(monitor, display_id = 'monitor')
#adding dummy model in master
master.append('No Model Required')
master_df.append('No Model Required')
#removing target variable from data by defining new setup
target_ = pd.DataFrame(data_[supervised_target])
data_without_target = data_.copy()
......@@ -975,9 +1003,57 @@ def tune_model(model=None,
acc = []; auc = []; recall = []; prec = []; kappa = []; f1 = []
for i in range(0,len(master_df)):
#build model without anomaly detection
monitor.iloc[2,1:] = 'Evaluating Classifier Without Anomaly Detector'
update_display(monitor, display_id = 'monitor')
d = master_df[1].copy()
d.drop(['Label', 'Score'], axis=1, inplace=True)
#drop NA's caution
d.dropna(axis=0, inplace=True)
#get_dummies to caste categorical variables for supervised learning
d = pd.get_dummies(d)
#split the dataset
X = d.drop(supervised_target, axis=1)
y = d[supervised_target]
#fit the model
model.fit(X,y)
#generate the prediction and evaluate metric
pred = cross_val_predict(model,X,y,cv=fold, method = 'predict')
acc_ = metrics.accuracy_score(y,pred)
acc.append(acc_)
recall_ = metrics.recall_score(y,pred)
recall.append(recall_)
precision_ = metrics.precision_score(y,pred)
prec.append(precision_)
kappa_ = metrics.cohen_kappa_score(y,pred)
kappa.append(kappa_)
f1_ = metrics.f1_score(y,pred)
f1.append(f1_)
if hasattr(model,'predict_proba'):
pred_ = cross_val_predict(model,X,y,cv=fold, method = 'predict_proba')
pred_prob = pred_[:,1]
auc_ = metrics.roc_auc_score(y,pred_prob)
auc.append(auc_)
else:
auc.append(0)
for i in range(1,len(master_df)):
progress.value += 1
param_grid_val = param_grid[i]
param_grid_val = param_grid[i-1]
monitor.iloc[2,1:] = 'Evaluating Classifier With ' + str(param_grid_val) + ' Fraction'
update_display(monitor, display_id = 'monitor')
......@@ -1036,7 +1112,7 @@ def tune_model(model=None,
monitor.iloc[1,1:] = 'Finalizing'
update_display(monitor, display_id = 'monitor')
df = pd.DataFrame({'Fraction': param_grid, 'Accuracy' : acc, 'AUC' : auc, 'Recall' : recall,
df = pd.DataFrame({'Fraction': param_grid_with_zero, 'Accuracy' : acc, 'AUC' : auc, 'Recall' : recall,
'Precision' : prec, 'F1' : f1, 'Kappa' : kappa})
sorted_df = df.sort_values(by=optimize, ascending=False)
......@@ -1226,9 +1302,56 @@ def tune_model(model=None,
score = []
metric = []
for i in range(0,len(master_df)):
#build model without clustering
monitor.iloc[2,1:] = 'Evaluating Regressor Without Clustering'
update_display(monitor, display_id = 'monitor')
d = master_df[1].copy()
d.drop(['Label', 'Score'], axis=1, inplace=True)
#drop NA's caution
d.dropna(axis=0, inplace=True)
#get_dummies to caste categorical variables for supervised learning
d = pd.get_dummies(d)
#split the dataset
X = d.drop(supervised_target, axis=1)
y = d[supervised_target]
#fit the model
model.fit(X,y)
#generate the prediction and evaluate metric
pred = cross_val_predict(model,X,y,cv=fold, method = 'predict')
if optimize == 'R2':
r2_ = metrics.r2_score(y,pred)
score.append(r2_)
elif optimize == 'MAE':
mae_ = metrics.mean_absolute_error(y,pred)
score.append(mae_)
elif optimize == 'MSE':
mse_ = metrics.mean_squared_error(y,pred)
score.append(mse_)
elif optimize == 'RMSE':
mse_ = metrics.mean_squared_error(y,pred)
rmse_ = np.sqrt(mse_)
score.append(rmse_)
elif optimize == 'ME':
max_error_ = metrics.max_error(y,pred)
score.append(max_error_)
metric.append(str(optimize))
for i in range(1,len(master_df)):
progress.value += 1
param_grid_val = param_grid[i]
param_grid_val = param_grid[i-1]
monitor.iloc[2,1:] = 'Evaluating Regressor With ' + str(param_grid_val) + ' Fraction'
update_display(monitor, display_id = 'monitor')
......@@ -1248,8 +1371,6 @@ def tune_model(model=None,
d = pd.get_dummies(d)
#clean.append(d) #for testing only
#split the dataset
X = d.drop(supervised_target, axis=1)
y = d[supervised_target]
......@@ -1287,7 +1408,7 @@ def tune_model(model=None,
monitor.iloc[1,1:] = 'Finalizing'
update_display(monitor, display_id = 'monitor')
df = pd.DataFrame({'Fraction': param_grid, 'Score' : score, 'Metric': metric})
df = pd.DataFrame({'Fraction': param_grid_with_zero, 'Score' : score, 'Metric': metric})
df.columns = ['Fraction', optimize, 'Metric']
#sorting to return best model
......@@ -1409,6 +1530,8 @@ def plot_model(model,
Label = pd.DataFrame(b['Label'])
b.dropna(axis=0, inplace=True) #droping rows with NA's
b.drop(['Label'], axis=1, inplace=True)
b = pd.get_dummies(b) #casting categorical variables
from sklearn.manifold import TSNE
X_embedded = TSNE(n_components=3).fit_transform(b)
......@@ -1428,6 +1551,7 @@ def plot_model(model,
Label = pd.DataFrame(b['Label'])
b.dropna(axis=0, inplace=True) #droping rows with NA's
b.drop(['Label'], axis=1, inplace=True)
b = pd.get_dummies(b) #casting categorical variables
import umap
reducer = umap.UMAP()
......@@ -1637,4 +1761,3 @@ def load_experiment(experiment_name):
display(ind)
return exp
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册