提交 51922645 编写于 作者: W wizardforcel

2019-09-03 15:04:23

上级 f750d867
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.DS_Store
# gitbook
_book
# node.js
node_modules
# windows
Thumbs.db
# word
~$*.docx
~$*.doc
......@@ -15,6 +15,7 @@
EDA: http://lambda-xmu.club/2018/08/25/2019CCF-Work-Piece-EDA/
Baseline 0.6777: https://github.com/destiny19960207/CCF_BDCI2019_discrete-manufacturing
Baseline 0.644: <src/CCF2019-discrete-manufacturing/644baseline.py>
1. 2018科大讯飞AI营销算法大赛
......@@ -288,4 +289,5 @@
+ [Smile](https://github.com/Smilexuhc)
+ [飞龙](https://github.com/wizardforcel)
+ [SlideLucask](https://github.com/SlideLucask)
\ No newline at end of file
+ [SlideLucask](https://github.com/SlideLucask)
+ [知识星球:Kaggle 数据竞赛免费版](https://t.zsxq.com/IAQvF2F)
\ No newline at end of file
# 开源来自:知识星球-Kaggle数据竞赛免费版,转载请注明出处
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
train = pd.read_csv('first_round_training_data.csv')
test = pd.read_csv('first_round_testing_data.csv')
features = ["Parameter1","Parameter2","Parameter3","Parameter4","Parameter5","Parameter6","Parameter7","Parameter8","Parameter9","Parameter10"]
def encoder(x):
return {'Excellent':0,'Good':1,'Pass':2,'Fail':3}[x]
train['label'] = train.Quality_label.apply(encoder)
train['label_Excellent'] = 1*(train['label'] == 0)
train['label_Good'] = 1*(train['label'] == 1)
train['label_Pass'] = 1*(train['label'] == 2)
train['label_Fail'] = 1*(train['label'] == 3)
model = GradientBoostingClassifier(max_depth=3,learning_rate=0.1,n_estimators=100,random_state=2019)
model.fit(train.loc[:,features],train.label)
test['prediction'] = model.predict(test.loc[:,features])
test['prob_Excellent'] = 0.0
test['prob_Good'] = 0.0
test['prob_Pass'] = 0.0
test['prob_Fail'] = 0.0
test.loc[:,['prob_Excellent','prob_Good','prob_Pass','prob_Fail']] = model.predict_proba(test.loc[:,features])
prediction = test.groupby(['Group'],as_index=False)['prob_Excellent','prob_Good','prob_Pass','prob_Fail'].mean()
prediction.columns = ['Group','Excellent ratio','Good ratio','Pass ratio','Fail ratio']
prediction.to_csv('baseline.csv',index=False)
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册