Add files via upload

f4110d1f · pycaret · GitHub · ea33caa0 · f4110d1f · f4110d1f
隐藏空白更改
内联并排

Showing with 25 addition and 11 deletion

anomaly.py anomaly.py +6 -4

clustering.py clustering.py +6 -4

nlp.py nlp.py +13 -3

未找到文件。
--- a/anomaly.py
+++ b/anomaly.py
@@ -542,6 +542,7 @@ def assign_model(model,
    return data__


+
 def tune_model(model=None,
               supervised_target=None,
               method = 'drop',
@@ -1126,7 +1127,7 @@ def tune_model(model=None,
        monitor.iloc[1,1:] = 'Finalizing'
        update_display(monitor, display_id = 'monitor')
                             
-        df = pd.DataFrame({'Fraction': param_grid_with_zero, 'Accuracy' : acc, 'AUC' : auc, 'Recall' : recall, 
+        df = pd.DataFrame({'Fraction %': param_grid_with_zero, 'Accuracy' : acc, 'AUC' : auc, 'Recall' : recall, 
                   'Precision' : prec, 'F1' : f1, 'Kappa' : kappa})
        
        sorted_df = df.sort_values(by=optimize, ascending=False)
@@ -1135,10 +1136,10 @@ def tune_model(model=None,
        best_model = master[ival]
        best_model_df = master_df[ival]
        progress.value += 1 
-        sd = pd.melt(df, id_vars=['Fraction'], value_vars=['Accuracy', 'AUC', 'Recall', 'Precision', 'F1', 'Kappa'], 
+        sd = pd.melt(df, id_vars=['Fraction %'], value_vars=['Accuracy', 'AUC', 'Recall', 'Precision', 'F1', 'Kappa'], 
                     var_name='Metric', value_name='Score')

-        fig = px.line(sd, x='Fraction', y='Score', color='Metric', line_shape='linear', range_y = [0,1])
+        fig = px.line(sd, x='Fraction %', y='Score', color='Metric', line_shape='linear', range_y = [0,1])
        fig.update_layout(plot_bgcolor='rgb(245,245,245)')
        title= str(full_name) + ' Metrics and Fraction %'
        fig.update_layout(title={'text': title, 'y':0.95,'x':0.45,'xanchor': 'center','yanchor': 'top'})
@@ -1147,7 +1148,7 @@ def tune_model(model=None,

        fig.show()
        
-        best_k = np.array(sorted_df.head(1)['Fraction'])[0]
+        best_k = np.array(sorted_df.head(1)['Fraction %'])[0]
        best_m = round(np.array(sorted_df.head(1)[optimize])[0],4)
        p = 'Best Model: ' + model_name + ' |' + ' Fraction %: ' + str(best_k) + ' | ' + str(optimize) + ' : ' + str(best_m)
        print(p)
@@ -1462,6 +1463,7 @@ def tune_model(model=None,
    return best_model
    

+
 def plot_model(model,
               plot = 'tsne'):
    

--- a/clustering.py
+++ b/clustering.py
@@ -546,6 +546,7 @@ def assign_model(model,
        
    return data__

+
 def tune_model(model=None,
               supervised_target=None,
               estimator=None,
@@ -1102,7 +1103,7 @@ def tune_model(model=None,
        monitor.iloc[1,1:] = 'Finalizing'
        update_display(monitor, display_id = 'monitor')
                             
-        df = pd.DataFrame({'Clusters': param_grid_with_zero, 'Accuracy' : acc, 'AUC' : auc, 'Recall' : recall, 
+        df = pd.DataFrame({'# of Clusters': param_grid_with_zero, 'Accuracy' : acc, 'AUC' : auc, 'Recall' : recall, 
                   'Precision' : prec, 'F1' : f1, 'Kappa' : kappa})
        
        sorted_df = df.sort_values(by=optimize, ascending=False)
@@ -1111,10 +1112,10 @@ def tune_model(model=None,
        best_model = master[ival]
        best_model_df = master_df[ival]
        progress.value += 1 
-        sd = pd.melt(df, id_vars=['Clusters'], value_vars=['Accuracy', 'AUC', 'Recall', 'Precision', 'F1', 'Kappa'], 
+        sd = pd.melt(df, id_vars=['# of Clusters'], value_vars=['Accuracy', 'AUC', 'Recall', 'Precision', 'F1', 'Kappa'], 
                     var_name='Metric', value_name='Score')

-        fig = px.line(sd, x='Clusters', y='Score', color='Metric', line_shape='linear', range_y = [0,1])
+        fig = px.line(sd, x='# of Clusters', y='Score', color='Metric', line_shape='linear', range_y = [0,1])
        fig.update_layout(plot_bgcolor='rgb(245,245,245)')
        title= str(full_name) + ' Metrics and Number of Clusters'
        fig.update_layout(title={'text': title, 'y':0.95,'x':0.45,'xanchor': 'center','yanchor': 'top'})
@@ -1123,7 +1124,7 @@ def tune_model(model=None,

        fig.show()
        
-        best_k = np.array(sorted_df.head(1)['Clusters'])[0]
+        best_k = np.array(sorted_df.head(1)['# of Clusters'])[0]
        best_m = round(np.array(sorted_df.head(1)[optimize])[0],4)
        p = 'Best Model: ' + model_name + ' |' + ' Number of Clusters : ' + str(best_k) + ' | ' + str(optimize) + ' : ' + str(best_m)
        print(p)
@@ -1431,6 +1432,7 @@ def tune_model(model=None,
    return best_model
    

+
 def plot_model(model, plot='cluster', feature=None):
    
    

--- a/nlp.py
+++ b/nlp.py
@@ -225,11 +225,21 @@ def setup(data,
    try:
        import nltk
        nltk.download('stopwords')
+        from nltk.corpus import stopwords
+        stop_words = stopwords.words('english')
+        
    except:
-        pass
+        stop_words = ['ourselves', 'hers', 'between', 'yourself', 'but', 'again', 'there', 'about', 'once', 'during', 
+                      'out', 'very', 'having', 'with', 'they', 'own', 'an', 'be', 'some', 'for', 'do', 'its', 'yours', 
+                      'such', 'into', 'of', 'most', 'itself', 'other', 'off', 'is', 's', 'am', 'or', 'who', 'as', 'from', 
+                      'him', 'each', 'the', 'themselves', 'until', 'below', 'are', 'we', 'these', 'your', 'his', 'through', 
+                      'don', 'nor', 'me', 'were', 'her', 'more', 'himself', 'this', 'down', 'should', 'our', 'their', 'while', 
+                      'above', 'both', 'up', 'to', 'ours', 'had', 'she', 'all', 'no', 'when', 'at', 'any', 'before', 'them', 
+                      'same', 'and', 'been', 'have', 'in', 'will', 'on', 'does', 'yourselves', 'then', 'that', 'because', 'what', 
+                      'over', 'why', 'so', 'can', 'did', 'not', 'now', 'under', 'he', 'you', 'herself', 'has', 'just', 'where', 
+                      'too', 'only', 'myself', 'which', 'those', 'i', 'after', 'few', 'whom', 't', 'being', 'if', 'theirs', 'my', 
+                      'against', 'a', 'by', 'doing', 'it', 'how', 'further', 'was', 'here', 'than']

-    from nltk.corpus import stopwords
-    stop_words = stopwords.words('english')
    
    if custom_stopwords is not None:
        stop_words = stop_words + custom_stopwords