提交 fd83e025 编写于 作者: M Megvii Engine Team

fix(mge/test): fix reproducibility of test_correctness.py

GitOrigin-RevId: 70bd43bbabd022ce9538d92291ddb1c93a596e0a
上级 b8d27934
......@@ -7,6 +7,8 @@
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import os
import re
import subprocess
import sys
import numpy as np
......@@ -15,18 +17,48 @@ import megengine as mge
import megengine.functional as F
from megengine import jit, tensor
from megengine.functional.debug_param import set_conv_execution_strategy
from megengine.module import BatchNorm2d, Conv2d, Linear, MaxPool2d, Module
from megengine.module import AvgPool2d, BatchNorm2d, Conv2d, Linear, Module
from megengine.optimizer import SGD
from megengine.test import assertTensorClose
def get_gpu_name():
try:
gpu_info = subprocess.check_output(
["nvidia-smi", "--query-gpu=gpu_name", "--format=csv,noheader"]
)
gpu_info = gpu_info.decode("ascii").split("\n")[0]
except:
gpu_info = "None"
return gpu_info
def get_cpu_name():
cpu_info = "None"
try:
cpu_info = subprocess.check_output(["cat", "/proc/cpuinfo"]).decode("ascii")
for line in cpu_info.split("\n"):
if "model name" in line:
return re.sub(".*model name.*:", "", line, 1).strip()
except:
pass
return cpu_info
def get_xpu_name():
if mge.is_cuda_available():
return get_gpu_name()
else:
return get_cpu_name()
class MnistNet(Module):
def __init__(self, has_bn=False):
super().__init__()
self.conv0 = Conv2d(1, 20, kernel_size=5, bias=True)
self.pool0 = MaxPool2d(2)
self.pool0 = AvgPool2d(2)
self.conv1 = Conv2d(20, 20, kernel_size=5, bias=True)
self.pool1 = MaxPool2d(2)
self.pool1 = AvgPool2d(2)
self.fc0 = Linear(20 * 4 * 4, 500, bias=True)
self.fc1 = Linear(500, 10, bias=True)
self.bn0 = None
......@@ -67,6 +99,13 @@ def update_model(model_path):
The model with pre-trained weights is trained for one iter with the test data attached.
The loss and updated net state dict is dumped.
.. code-block:: python
from test_correctness import update_model
update_model('mnist_model_with_test.mge') # for gpu
update_model('mnist_model_with_test_cpu.mge') # for cpu
"""
net = MnistNet(has_bn=True)
checkpoint = mge.load(model_path)
......@@ -83,7 +122,11 @@ def update_model(model_path):
loss = train(data, label, net=net, opt=opt)
opt.step()
checkpoint.update({"net_updated": net.state_dict(), "loss": loss.numpy()})
xpu_name = get_xpu_name()
checkpoint.update(
{"net_updated": net.state_dict(), "loss": loss.numpy(), "xpu": xpu_name}
)
mge.save(checkpoint, model_path)
......@@ -92,9 +135,9 @@ def run_test(model_path, use_jit, use_symbolic):
"""
Load the model with test cases and run the training for one iter.
The loss and updated weights are compared with reference value to verify the correctness.
Dump a new file with updated result by calling update_model
if you think the test fails due to numerical rounding errors instead of bugs.
Dump a new file with updated result by calling update_model
if you think the test fails due to numerical rounding errors instead of bugs.
Please think twice before you do so.
"""
......@@ -109,7 +152,7 @@ def run_test(model_path, use_jit, use_symbolic):
data.set_value(checkpoint["data"])
label.set_value(checkpoint["label"])
max_err = 1e-1
max_err = 1e-5
train_func = train
if use_jit:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册