未验证 提交 362338cb 编写于 作者: W wuzewu 提交者: GitHub

Merge pull request #350 from wuyefeilin/dygraph

add environment information collection
......@@ -21,6 +21,7 @@ from dygraph.datasets import DATASETS
import dygraph.transforms as T
from dygraph.models import MODELS
from dygraph.utils import get_environ_info
from dygraph.utils import logger
from dygraph.core import train
......@@ -129,8 +130,12 @@ def parse_args():
def main(args):
env_info = get_environ_info()
info = ['{}: {}'.format(k, v) for k, v in env_info.items()]
info = '\n'.join(['\n', format('Environment Information', '-^48s')] + info +
['-' * 48])
logger.info(info)
places = fluid.CUDAPlace(ParallelEnv().dev_id) \
if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \
if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \
else fluid.CPUPlace()
if args.dataset not in DATASETS:
......
......@@ -21,6 +21,7 @@ from dygraph.datasets import DATASETS
import dygraph.transforms as T
from dygraph.models import MODELS
from dygraph.utils import get_environ_info
from dygraph.utils import logger
from dygraph.core import train
......@@ -129,8 +130,12 @@ def parse_args():
def main(args):
env_info = get_environ_info()
info = ['{}: {}'.format(k, v) for k, v in env_info.items()]
info = '\n'.join(['\n', format('Environment Information', '-^48s')] + info +
['-' * 48])
logger.info(info)
places = fluid.CUDAPlace(ParallelEnv().dev_id) \
if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \
if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \
else fluid.CPUPlace()
if args.dataset not in DATASETS:
......
......@@ -21,7 +21,7 @@ import cv2
import tqdm
from dygraph import utils
import dygraph.utils.logging as logging
import dygraph.utils.logger as logger
def mkdir(path):
......@@ -39,7 +39,7 @@ def infer(model, test_dataset=None, model_dir=None, save_dir='output'):
added_saved_dir = os.path.join(save_dir, 'added')
pred_saved_dir = os.path.join(save_dir, 'prediction')
logging.info("Start to predict...")
logger.info("Start to predict...")
for im, im_info, im_path in tqdm.tqdm(test_dataset):
im = to_variable(im)
pred, _ = model(im)
......
......@@ -19,7 +19,7 @@ from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.io import DataLoader
from paddle.incubate.hapi.distributed import DistributedBatchSampler
import dygraph.utils.logging as logging
import dygraph.utils.logger as logger
from dygraph.utils import load_pretrained_model
from dygraph.utils import resume
from dygraph.utils import Timer, calculate_eta
......@@ -111,7 +111,7 @@ def train(model,
train_batch_cost = 0.0
remain_steps = total_steps - num_steps
eta = calculate_eta(remain_steps, avg_train_batch_cost)
logging.info(
logger.info(
"[TRAIN] Epoch={}/{}, Step={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}"
.format(epoch + 1, num_epochs, step + 1, steps_per_epoch,
avg_loss * nranks, lr, avg_train_batch_cost,
......@@ -152,7 +152,7 @@ def train(model,
best_model_dir = os.path.join(save_dir, "best_model")
fluid.save_dygraph(model.state_dict(),
os.path.join(best_model_dir, 'model'))
logging.info(
logger.info(
'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}'
.format(best_model_epoch, best_mean_iou))
......
......@@ -20,7 +20,7 @@ import cv2
from paddle.fluid.dygraph.base import to_variable
import paddle.fluid as fluid
import dygraph.utils.logging as logging
import dygraph.utils.logger as logger
from dygraph.utils import ConfusionMatrix
from dygraph.utils import Timer, calculate_eta
......@@ -39,7 +39,7 @@ def evaluate(model,
total_steps = len(eval_dataset)
conf_mat = ConfusionMatrix(num_classes, streaming=True)
logging.info(
logger.info(
"Start to evaluating(total_samples={}, total_steps={})...".format(
len(eval_dataset), total_steps))
timer = Timer()
......@@ -69,7 +69,7 @@ def evaluate(model,
time_step = timer.elapsed_time()
remain_step = total_steps - step - 1
logging.debug(
logger.debug(
"[EVAL] Epoch={}, Step={}/{}, iou={:4f}, sec/step={:.4f} | ETA {}".
format(epoch_id, step + 1, total_steps, iou, time_step,
calculate_eta(remain_step, time_step)))
......@@ -77,9 +77,9 @@ def evaluate(model,
category_iou, miou = conf_mat.mean_iou()
category_acc, macc = conf_mat.accuracy()
logging.info("[EVAL] #Images={} mAcc={:.4f} mIoU={:.4f}".format(
logger.info("[EVAL] #Images={} mAcc={:.4f} mIoU={:.4f}".format(
len(eval_dataset), macc, miou))
logging.info("[EVAL] Category IoU: " + str(category_iou))
logging.info("[EVAL] Category Acc: " + str(category_acc))
logging.info("[EVAL] Kappa:{:.4f} ".format(conf_mat.kappa()))
logger.info("[EVAL] Category IoU: " + str(category_iou))
logger.info("[EVAL] Category Acc: " + str(category_acc))
logger.info("[EVAL] Kappa:{:.4f} ".format(conf_mat.kappa()))
return miou, macc
......@@ -84,7 +84,7 @@ def parse_args():
def main(args):
env_info = get_environ_info()
places = fluid.CUDAPlace(ParallelEnv().dev_id) \
if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \
if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \
else fluid.CPUPlace()
if args.dataset not in DATASETS:
......
......@@ -22,6 +22,7 @@ import dygraph.transforms as T
#from dygraph.models import MODELS
from dygraph.cvlibs import manager
from dygraph.utils import get_environ_info
from dygraph.utils import logger
from dygraph.core import train
......@@ -130,8 +131,13 @@ def parse_args():
def main(args):
env_info = get_environ_info()
info = ['{}: {}'.format(k, v) for k, v in env_info.items()]
info = '\n'.join(['\n', format('Environment Information', '-^48s')] + info +
['-' * 48])
logger.info(info)
places = fluid.CUDAPlace(ParallelEnv().dev_id) \
if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \
if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \
else fluid.CPUPlace()
if args.dataset not in DATASETS:
......
......@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from . import logging
from . import logger
from . import download
from .metrics import ConfusionMatrix
from .utils import *
from .timer import Timer, calculate_eta
from .get_environ_info import get_environ_info
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
from collections import OrderedDict
import subprocess
import glob
import paddle
import paddle.fluid as fluid
import cv2
IS_WINDOWS = sys.platform == 'win32'
def _find_cuda_home():
'''Finds the CUDA install path. It refers to the implementation of
pytorch <https://github.com/pytorch/pytorch/blob/master/torch/utils/cpp_extension.py>.
'''
# Guess #1
cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
if cuda_home is None:
# Guess #2
try:
which = 'where' if IS_WINDOWS else 'which'
nvcc = subprocess.check_output([which,
'nvcc']).decode().rstrip('\r\n')
cuda_home = os.path.dirname(os.path.dirname(nvcc))
except Exception:
# Guess #3
if IS_WINDOWS:
cuda_homes = glob.glob(
'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*')
if len(cuda_homes) == 0:
cuda_home = ''
else:
cuda_home = cuda_homes[0]
else:
cuda_home = '/usr/local/cuda'
if not os.path.exists(cuda_home):
cuda_home = None
return cuda_home
def _get_nvcc_info(cuda_home):
if cuda_home is not None and os.path.isdir(cuda_home):
try:
nvcc = os.path.join(cuda_home, 'bin/nvcc')
nvcc = subprocess.check_output(
"{} -V".format(nvcc), shell=True).decode()
nvcc = nvcc.strip().split('\n')[-1]
except subprocess.SubprocessError:
nvcc = "Not Available"
return nvcc
def _get_gpu_info():
try:
gpu_info = subprocess.check_output(['nvidia-smi',
'-L']).decode().strip()
gpu_info = gpu_info.split('\n')
for i in range(len(gpu_info)):
gpu_info[i] = ' '.join(gpu_info[i].split(' ')[:4])
except:
gpu_info = ' Can not get GPU information. Please make sure CUDA have been installed successfully.'
return gpu_info
def get_environ_info():
"""collect environment information"""
env_info = {}
env_info['System Platform'] = sys.platform
if env_info['System Platform'] == 'linux':
lsb_v = subprocess.check_output(['lsb_release', '-v']).decode().strip()
lsb_v = lsb_v.replace('\t', ' ')
lsb_d = subprocess.check_output(['lsb_release', '-d']).decode().strip()
lsb_d = lsb_d.replace('\t', ' ')
env_info['LSB'] = [lsb_v, lsb_d]
env_info['Python'] = sys.version.replace('\n', '')
compiled_with_cuda = paddle.fluid.is_compiled_with_cuda()
env_info['Paddle compiled with cuda'] = compiled_with_cuda
if compiled_with_cuda:
cuda_home = _find_cuda_home()
env_info['NVCC'] = _get_nvcc_info(cuda_home)
gpu_nums = fluid.core.get_cuda_device_count()
env_info['GPUs used'] = gpu_nums
env_info['CUDA_VISIBLE_DEVICES'] = os.environ.get(
'CUDA_VISIBLE_DEVICES')
env_info['GPU'] = _get_gpu_info()
gcc = subprocess.check_output(['gcc', '--version']).decode()
gcc = gcc.strip().split('\n')[0]
env_info['GCC'] = gcc
env_info['PaddlePaddle'] = paddle.__version__
env_info['OpenCV'] = cv2.__version__
return env_info
......@@ -18,7 +18,7 @@ import math
import cv2
import paddle.fluid as fluid
from . import logging
from . import logger
def seconds_to_hms(seconds):
......@@ -29,27 +29,9 @@ def seconds_to_hms(seconds):
return hms_str
def get_environ_info():
info = dict()
info['place'] = 'cpu'
info['num'] = int(os.environ.get('CPU_NUM', 1))
if os.environ.get('CUDA_VISIBLE_DEVICES', None) != "":
if hasattr(fluid.core, 'get_cuda_device_count'):
gpu_num = 0
try:
gpu_num = fluid.core.get_cuda_device_count()
except:
os.environ['CUDA_VISIBLE_DEVICES'] = ''
pass
if gpu_num > 0:
info['place'] = 'cuda'
info['num'] = fluid.core.get_cuda_device_count()
return info
def load_pretrained_model(model, pretrained_model):
if pretrained_model is not None:
logging.info('Load pretrained model from {}'.format(pretrained_model))
logger.info('Load pretrained model from {}'.format(pretrained_model))
if os.path.exists(pretrained_model):
ckpt_path = os.path.join(pretrained_model, 'model')
try:
......@@ -62,10 +44,10 @@ def load_pretrained_model(model, pretrained_model):
num_params_loaded = 0
for k in keys:
if k not in para_state_dict:
logging.warning("{} is not in pretrained model".format(k))
logger.warning("{} is not in pretrained model".format(k))
elif list(para_state_dict[k].shape) != list(
model_state_dict[k].shape):
logging.warning(
logger.warning(
"[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})"
.format(k, para_state_dict[k].shape,
model_state_dict[k].shape))
......@@ -73,7 +55,7 @@ def load_pretrained_model(model, pretrained_model):
model_state_dict[k] = para_state_dict[k]
num_params_loaded += 1
model.set_dict(model_state_dict)
logging.info("There are {}/{} varaibles are loaded.".format(
logger.info("There are {}/{} varaibles are loaded.".format(
num_params_loaded, len(model_state_dict)))
else:
......@@ -81,12 +63,12 @@ def load_pretrained_model(model, pretrained_model):
'The pretrained model directory is not Found: {}'.format(
pretrained_model))
else:
logging.info('No pretrained model to load, train from scratch')
logger.info('No pretrained model to load, train from scratch')
def resume(model, optimizer, resume_model):
if resume_model is not None:
logging.info('Resume model from {}'.format(resume_model))
logger.info('Resume model from {}'.format(resume_model))
if os.path.exists(resume_model):
resume_model = os.path.normpath(resume_model)
ckpt_path = os.path.join(resume_model, 'model')
......@@ -102,7 +84,7 @@ def resume(model, optimizer, resume_model):
'The resume model directory is not Found: {}'.format(
resume_model))
else:
logging.info('No model need to resume')
logger.info('No model need to resume')
def visualize(image, result, save_dir=None, weight=0.6):
......
......@@ -72,7 +72,7 @@ def parse_args():
def main(args):
env_info = get_environ_info()
places = fluid.CUDAPlace(ParallelEnv().dev_id) \
if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \
if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \
else fluid.CPUPlace()
if args.dataset not in DATASETS:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册