2019-09-03 15:04:23

51922645 · wizardforcel · f750d867 · 51922645 · 51922645 · 51922645
隐藏空白更改
内联并排

Showing with 152 addition and 1 deletion

.gitignore .gitignore +115 -0

README.md README.md +3 -1

src/CCF2019-discrete-manufacturing/644baseline.py src/CCF2019-discrete-manufacturing/644baseline.py +34 -0

未找到文件。
--- a/.gitignore
+++ b/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.DS_Store
+
+# gitbook
+_book
+
+# node.js
+node_modules
+
+# windows
+Thumbs.db
+
+# word
+~$*.docx
+~$*.doc
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@
    EDA: http://lambda-xmu.club/2018/08/25/2019CCF-Work-Piece-EDA/

    Baseline 0.6777: https://github.com/destiny19960207/CCF_BDCI2019_discrete-manufacturing
+    Baseline 0.644: <src/CCF2019-discrete-manufacturing/644baseline.py>

 1.  2018科大讯飞AI营销算法大赛

@@ -288,4 +289,5 @@

 +   [Smile](https://github.com/Smilexuhc)
 +   [飞龙](https://github.com/wizardforcel)
-+   [SlideLucask](https://github.com/SlideLucask)
\ No newline at end of file
+   [SlideLucask](https://github.com/SlideLucask)
+   [知识星球：Kaggle 数据竞赛免费版](https://t.zsxq.com/IAQvF2F)
\ No newline at end of file
--- a/src/CCF2019-discrete-manufacturing/644baseline.py
+++ b/src/CCF2019-discrete-manufacturing/644baseline.py
+# 开源来自：知识星球-Kaggle数据竞赛免费版，转载请注明出处
+
+import pandas as pd
+import numpy as np
+from sklearn.ensemble import GradientBoostingClassifier
+
+train = pd.read_csv('first_round_training_data.csv')
+test = pd.read_csv('first_round_testing_data.csv')
+
+features = ["Parameter1","Parameter2","Parameter3","Parameter4","Parameter5","Parameter6","Parameter7","Parameter8","Parameter9","Parameter10"]
+
+def encoder(x):
+    return {'Excellent':0,'Good':1,'Pass':2,'Fail':3}[x]
+
+train['label'] = train.Quality_label.apply(encoder)
+train['label_Excellent'] = 1*(train['label'] == 0)
+train['label_Good'] = 1*(train['label'] == 1)
+train['label_Pass'] = 1*(train['label'] == 2)
+train['label_Fail'] = 1*(train['label'] == 3)
+
+model = GradientBoostingClassifier(max_depth=3,learning_rate=0.1,n_estimators=100,random_state=2019)
+model.fit(train.loc[:,features],train.label)
+
+test['prediction'] = model.predict(test.loc[:,features])
+test['prob_Excellent'] = 0.0
+test['prob_Good'] = 0.0
+test['prob_Pass'] = 0.0
+test['prob_Fail'] = 0.0
+test.loc[:,['prob_Excellent','prob_Good','prob_Pass','prob_Fail']] = model.predict_proba(test.loc[:,features])
+
+
+prediction = test.groupby(['Group'],as_index=False)['prob_Excellent','prob_Good','prob_Pass','prob_Fail'].mean()
+prediction.columns = ['Group','Excellent ratio','Good ratio','Pass ratio','Fail ratio']
+prediction.to_csv('baseline.csv',index=False)
\ No newline at end of file