Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Stevezhangz
BERT Pytorch
提交
c7a1481a
B
BERT Pytorch
项目概览
Stevezhangz
/
BERT Pytorch
通知
14
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
B
BERT Pytorch
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
c7a1481a
编写于
4月 26, 2021
作者:
Stevezhangz
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
上传新文件
上级
cc64bb13
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
236 addition
and
0 deletion
+236
-0
Bert_finetune.py
Bert_finetune.py
+236
-0
未找到文件。
Bert_finetune.py
0 → 100644
浏览文件 @
c7a1481a
from
bert
import
*
from
Config_load
import
*
class
Bert_word_pre
(
nn
.
Module
):
def
__init__
(
self
,
n_layers
,
vocab_size
,
emb_size
,
max_len
,
seg_size
,
dff
,
dk
,
dv
,
n_head
,
n_class
,
drop
):
super
(
Bert_word_pre
,
self
).
__init__
()
self
.
vocab_size
=
vocab_size
self
.
emb_size
=
emb_size
self
.
emb_layer
=
Embedding
(
vocab_size
,
emb_size
,
max_len
,
seg_size
)
self
.
encoder_layer
=
nn
.
Sequential
(
*
[
basic_block
(
emb_size
,
dff
,
dk
,
dv
,
n_head
)
for
i
in
range
(
n_layers
)])
self
.
fc1
=
nn
.
Sequential
(
nn
.
Linear
(
emb_size
,
vocab_size
),
nn
.
Dropout
(
drop
),
nn
.
Tanh
(),
nn
.
Linear
(
vocab_size
,
n_class
)
)
fc2
=
nn
.
Linear
(
emb_size
,
vocab_size
)
fc2
.
weight
=
self
.
emb_layer
.
emb_x
.
weight
self
.
fc2
=
nn
.
Sequential
(
nn
.
Linear
(
emb_size
,
emb_size
),
Grelu
(),
fc2
)
def
get_mask
(
self
,
In
):
batch_size
,
length
,
mask
=
In
.
size
()[
0
],
In
.
size
()[
1
],
In
mask
=
mask
.
eq
(
0
).
unsqueeze
(
1
)
return
mask
.
data
.
expand
(
batch_size
,
length
,
length
)
def
forward
(
self
,
x
):
mask
=
self
.
get_mask
(
x
)
output
=
self
.
emb_layer
(
x
=
x
,
seg
=
None
)
for
layer
in
self
.
encoder_layer
:
output
=
layer
(
output
,
mask
)
cls
=
self
.
fc2
(
output
[:,
1
:])
return
cls
def
display
(
self
,
batch
,
load_dir
,
map_dir
):
import
json
if
load_dir
!=
None
:
if
os
.
path
.
exists
(
load_dir
):
checkpoint
=
torch
.
load
(
load_dir
)
try
:
self
.
load_state_dict
(
checkpoint
[
'model'
])
except
:
print
(
"fail to load the state_dict"
)
map_file
=
json
.
load
(
open
(
map_dir
,
"r"
))[
"idx2word"
]
for
x
in
batch
:
pre
=
self
(
x
)
pre
=
pre
.
data
.
max
(
2
)[
1
][
0
].
data
.
numpy
()
transform
=
[]
for
i
in
pre
:
try
:
word_pre
=
map_file
[
int
(
i
)]
except
:
word_pre
=
"mistake"
transform
.
append
(
word_pre
)
print
(
"prediction_words:"
,
transform
)
print
(
"prediction_token:"
,
pre
)
def
Train
(
self
,
epoches
,
criterion
,
optimizer
,
train_data_loader
,
use_gpu
,
device
,
eval_data_loader
=
None
,
save_dir
=
"./checkpoint"
,
load_dir
=
None
,
save_freq
=
5
,
):
import
tqdm
if
load_dir
!=
None
:
if
os
.
path
.
exists
(
load_dir
):
checkpoint
=
torch
.
load
(
load_dir
)
try
:
self
.
load_state_dict
(
checkpoint
[
'model'
])
optimizer
.
load_state_dict
(
checkpoint
[
'optimizer'
])
except
:
print
(
"fail to load the state_dict"
)
if
not
os
.
path
.
exists
(
save_dir
):
os
.
makedirs
(
save_dir
)
for
epc
in
range
(
epoches
):
tq
=
tqdm
.
tqdm
(
train_data_loader
)
for
seq
,
(
input_ids
,
classi
)
in
enumerate
(
tq
):
if
use_gpu
:
input_ids
,
classi
=
input_ids
.
to
(
device
),
classi
.
to
(
device
)
logits_clsf
=
self
(
x
=
input_ids
)
loss_cls
=
criterion
(
logits_clsf
,
classi
)
optimizer
.
zero_grad
()
loss_cls
.
backward
()
optimizer
.
step
()
tq
.
set_description
(
f
"train Epoch
{
epc
+
1
}
, Batch
{
seq
}
"
)
tq
.
set_postfix
(
train_loss
=
loss_cls
)
if
eval_data_loader
!=
None
:
tq
=
tqdm
.
tqdm
(
eval_data_loader
)
with
torch
.
no_grad
():
for
epc
in
range
(
epoches
):
tq
=
tqdm
.
tqdm
(
train_data_loader
)
for
seq
,
(
input_ids
,
classi
)
in
enumerate
(
tq
):
if
use_gpu
:
input_ids
,
classi
=
input_ids
.
to
(
device
),
classi
.
to
(
device
)
logits_clsf
=
self
(
x
=
input_ids
)
loss_cls
=
criterion
(
logits_clsf
,
classi
)
optimizer
.
zero_grad
()
loss_cls
.
backward
()
optimizer
.
step
()
tq
.
set_description
(
f
"Eval Epoch
{
epc
+
1
}
, Batch
{
seq
}
"
)
tq
.
set_postfix
(
train_loss
=
loss_cls
)
if
(
epc
+
1
)
%
save_freq
==
0
:
checkpoint
=
{
'epoch'
:
epc
,
'best_loss'
:
criterion
,
'model'
:
self
.
state_dict
(),
'optimizer'
:
optimizer
.
state_dict
()
}
torch
.
save
(
checkpoint
,
save_dir
+
f
"/checkpoint_
{
epc
}
.pth"
)
class
Bert_classify
(
nn
.
Module
):
def
__init__
(
self
,
n_layers
,
vocab_size
,
emb_size
,
max_len
,
seg_size
,
dff
,
dk
,
dv
,
n_head
,
n_class
,
drop
):
super
(
Bert_classify
,
self
).
__init__
()
self
.
vocab_size
=
vocab_size
self
.
emb_size
=
emb_size
self
.
emb_layer
=
Embedding
(
vocab_size
,
emb_size
,
max_len
,
seg_size
)
self
.
encoder_layer
=
nn
.
Sequential
(
*
[
basic_block
(
emb_size
,
dff
,
dk
,
dv
,
n_head
)
for
i
in
range
(
n_layers
)])
self
.
fc1
=
nn
.
Sequential
(
nn
.
Linear
(
emb_size
,
vocab_size
),
nn
.
Dropout
(
drop
),
nn
.
Tanh
(),
nn
.
Linear
(
vocab_size
,
n_class
)
)
fc2
=
nn
.
Linear
(
emb_size
,
vocab_size
)
fc2
.
weight
=
self
.
emb_layer
.
emb_x
.
weight
self
.
fc2
=
nn
.
Sequential
(
nn
.
Linear
(
emb_size
,
emb_size
),
Grelu
(),
fc2
)
def
get_mask
(
self
,
In
):
batch_size
,
length
,
mask
=
In
.
size
()[
0
],
In
.
size
()[
1
],
In
mask
=
mask
.
eq
(
0
).
unsqueeze
(
1
)
return
mask
.
data
.
expand
(
batch_size
,
length
,
length
)
def
forward
(
self
,
x
):
mask
=
self
.
get_mask
(
x
)
output
=
self
.
emb_layer
(
x
=
x
,
seg
=
None
)
for
layer
in
self
.
encoder_layer
:
output
=
layer
(
output
,
mask
)
cls
=
self
.
fc1
(
output
[:,
0
])
return
cls
def
display
(
self
,
batch
,
load_dir
):
if
load_dir
!=
None
:
if
os
.
path
.
exists
(
load_dir
):
checkpoint
=
torch
.
load
(
load_dir
)
try
:
self
.
load_state_dict
(
checkpoint
[
'model'
])
except
:
print
(
"fail to load the state_dict"
)
for
i
in
batch
:
logits_clsf
=
self
(
x
=
i
)
print
(
logits_clsf
)
def
Train
(
self
,
epoches
,
criterion
,
optimizer
,
train_data_loader
,
use_gpu
,
device
,
eval_data_loader
=
None
,
save_dir
=
"./checkpoint"
,
load_dir
=
None
,
save_freq
=
5
,
):
import
tqdm
if
load_dir
!=
None
:
if
os
.
path
.
exists
(
load_dir
):
checkpoint
=
torch
.
load
(
load_dir
)
try
:
self
.
load_state_dict
(
checkpoint
[
'model'
])
optimizer
.
load_state_dict
(
checkpoint
[
'optimizer'
])
except
:
print
(
"fail to load the state_dict"
)
if
not
os
.
path
.
exists
(
save_dir
):
os
.
makedirs
(
save_dir
)
for
epc
in
range
(
epoches
):
tq
=
tqdm
.
tqdm
(
train_data_loader
)
for
seq
,(
input_ids
,
classi
)
in
enumerate
(
tq
):
if
use_gpu
:
input_ids
,
classi
=
input_ids
.
to
(
device
),
classi
.
to
(
device
)
logits_clsf
=
self
(
x
=
input_ids
)
loss_word
=
criterion
(
logits_clsf
.
view
(
-
1
,
self
.
vocab_size
),
logits_clsf
.
view
(
-
1
))
loss_word
=
(
loss_word
.
float
()).
mean
()
optimizer
.
zero_grad
()
loss_word
.
backward
()
optimizer
.
step
()
tq
.
set_description
(
f
"train Epoch
{
epc
+
1
}
, Batch
{
seq
}
"
)
tq
.
set_postfix
(
train_loss
=
loss_word
)
if
eval_data_loader
!=
None
:
tq
=
tqdm
.
tqdm
(
eval_data_loader
)
with
torch
.
no_grad
():
for
epc
in
range
(
epoches
):
tq
=
tqdm
.
tqdm
(
train_data_loader
)
for
seq
,
(
input_ids
,
classi
)
in
enumerate
(
tq
):
if
use_gpu
:
input_ids
,
classi
=
input_ids
.
to
(
device
),
classi
.
to
(
device
)
logits_clsf
=
self
(
x
=
input_ids
)
loss_word
=
criterion
(
logits_clsf
.
view
(
-
1
,
self
.
vocab_size
),
logits_clsf
.
view
(
-
1
))
loss_word
=
(
loss_word
.
float
()).
mean
()
optimizer
.
zero_grad
()
loss_word
.
backward
()
optimizer
.
step
()
tq
.
set_description
(
f
"Eval Epoch
{
epc
+
1
}
, Batch
{
seq
}
"
)
tq
.
set_postfix
(
train_loss
=
loss_word
)
if
(
epc
+
1
)
%
save_freq
==
0
:
checkpoint
=
{
'epoch'
:
epc
,
'best_loss'
:
criterion
,
'model'
:
self
.
state_dict
(),
'optimizer'
:
optimizer
.
state_dict
()
}
torch
.
save
(
checkpoint
,
save_dir
+
f
"/checkpoint_
{
epc
}
.pth"
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录