From b4aa0976f3e4afb303001c025e7655fdd667c732 Mon Sep 17 00:00:00 2001 From: jiangzhonglian Date: Wed, 1 Apr 2020 23:55:41 +0800 Subject: [PATCH] =?UTF-8?q?=E7=A7=BB=E5=8A=A8=20=E6=A1=88=E4=BE=8B?= =?UTF-8?q?=E5=88=B0=20docs/examples=20=E7=9B=AE=E5=BD=95=E4=B8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...ot_document_classification_20newsgroups.md | 646 ------------------ .../plot_lasso_and_elasticnet.md | 105 --- .../plot_lasso_coordinate_descent_path.md | 138 ---- .../plot_lasso_model_selection.md | 172 ----- .../plot_multi_task_lasso_support.md | 80 --- auto_examples/Linear-Models/plot_ols.md | 85 --- .../Linear-Models/plot_ridge_path.md | 59 -- .../plot_tomography_l1_reconstruction.md | 146 ---- 8 files changed, 1431 deletions(-) delete mode 100644 auto_examples/Linear-Models/plot_document_classification_20newsgroups.md delete mode 100644 auto_examples/Linear-Models/plot_lasso_and_elasticnet.md delete mode 100644 auto_examples/Linear-Models/plot_lasso_coordinate_descent_path.md delete mode 100644 auto_examples/Linear-Models/plot_lasso_model_selection.md delete mode 100644 auto_examples/Linear-Models/plot_multi_task_lasso_support.md delete mode 100644 auto_examples/Linear-Models/plot_ols.md delete mode 100644 auto_examples/Linear-Models/plot_ridge_path.md delete mode 100644 auto_examples/Linear-Models/plot_tomography_l1_reconstruction.md diff --git a/auto_examples/Linear-Models/plot_document_classification_20newsgroups.md b/auto_examples/Linear-Models/plot_document_classification_20newsgroups.md deleted file mode 100644 index e9fcf39..0000000 --- a/auto_examples/Linear-Models/plot_document_classification_20newsgroups.md +++ /dev/null @@ -1,646 +0,0 @@ - -# 分类特征稀疏的文本 - ->翻译者:[@Loopy](https://github.com/loopyme) ->校验者:[@barrycg](https://github.com/barrycg) - -这个例子展示了如何使用scikit-learn中的单词包方法,根据主题对文档进行分类。本例使用scipy.sparse中的矩阵来存储特征,并演示各种能够有效处理稀疏矩阵的分类器。 - -本例中使用的数据集是20条新闻组数据集。通过scikit-learn可以自动下载该数据集,并进行缓存。 - -下述条形图展示了各个不同分类器,其信息包括精度、训练时间(已归一化)和测试时间(已归一化)。 - - -```python -import logging -import numpy as np -from optparse import OptionParser -import sys -from time import time -import matplotlib.pyplot as plt - -from sklearn.datasets import fetch_20newsgroups -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.feature_extraction.text import HashingVectorizer -from sklearn.feature_selection import SelectFromModel -from sklearn.feature_selection import SelectKBest, chi2 -from sklearn.linear_model import RidgeClassifier -from sklearn.pipeline import Pipeline -from sklearn.svm import LinearSVC -from sklearn.linear_model import SGDClassifier -from sklearn.linear_model import Perceptron -from sklearn.linear_model import PassiveAggressiveClassifier -from sklearn.naive_bayes import BernoulliNB, ComplementNB, MultinomialNB -from sklearn.neighbors import KNeighborsClassifier -from sklearn.neighbors import NearestCentroid -from sklearn.ensemble import RandomForestClassifier -from sklearn.utils.extmath import density -from sklearn import metrics -``` - - -```python -# 在stdout上显示进度日志 -logging.basicConfig(level=logging.INFO,format='%(asctime)s %(levelname)s %(message)s') -``` - - -```python -# 解析命令行参数 -op = OptionParser() -op.add_option("--report", - action="store_true", dest="print_report", - help="Print a detailed classification report.") -op.add_option("--chi2_select", - action="store", type="int", dest="select_chi2", - help="Select some number of features using a chi-squared test") -op.add_option("--confusion_matrix", - action="store_true", dest="print_cm", - help="Print the confusion matrix.") -op.add_option("--top10", - action="store_true", dest="print_top10", - help="Print ten most discriminative terms per class" - " for every classifier.") -op.add_option("--all_categories", - action="store_true", dest="all_categories", - help="Whether to use all categories or not.") -op.add_option("--use_hashing", - action="store_true", - help="Use a hashing vectorizer.") -op.add_option("--n_features", - action="store", type=int, default=2 ** 16, - help="n_features when using the hashing vectorizer.") -op.add_option("--filtered", - action="store_true", - help="Remove newsgroup information that is easily overfit: " - "headers, signatures, and quoting.") -``` - - - - -