Add profile (#463)

* fix benchmark * Add profile

Add profile (#463)
* fix benchmark * Add profile
79b2287d · lzzyzlbb · GitHub · bc27b744 · 79b2287d · 79b2287d
隐藏空白更改
内联并排

Showing with 21 addition and 4 deletion

benchmark/README.md benchmark/README.md +7 -0

benchmark/run_all.sh benchmark/run_all.sh +4 -3

benchmark/run_benchmark.sh benchmark/run_benchmark.sh +10 -1

未找到文件。
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -42,6 +42,13 @@ nvidia-docker run --name test_paddlegan -i  \
    ${ImageName}  /bin/bash -c "${run_cmd}"
 ```

+如果需要打开profile选项，可以直接替换`run_cmd`为：
+```
+run_cmd="set -xe;
+        cd /workspace ;
+        bash -x benchmark/run_all.sh on"
+```
+
 ## 输出

 执行完成后，在PaddleGAN目录会产出模型训练性能数据的文件，比如`esrgan_mp_bs32_fp32_8`等文件。
--- a/benchmark/run_all.sh
+++ b/benchmark/run_all.sh
@@ -53,6 +53,7 @@ function parse_yaml {
 }
 eval $(parse_yaml "benchmark/benchmark.yaml")

+profile=${1:-"off"}

 for model_mode in ${model_mode_list[@]}; do
      eval fp_item_list='$'"${model_mode}_fp_item"
@@ -82,15 +83,15 @@ for model_mode in ${model_mode_list[@]}; do
            do
            echo "index is speed, 1gpus, begin, ${model_name}"
            run_mode=sp
-            CUDA_VISIBLE_DEVICES=0 benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval}   #  (5min)
+            CUDA_VISIBLE_DEVICES=0 benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile}  #  (5min)
            sleep 60
            echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
            run_mode=mp
            basicvsr_name=basicvsr
            if [ ${model_mode} = ${basicvsr_name} ]; then
-                CUDA_VISIBLE_DEVICES=0,1,2,3 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval}
+                CUDA_VISIBLE_DEVICES=0,1,2,3 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile}
            else
-                CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval}
+                CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile}
            fi
            sleep 60
            done

--- a/benchmark/run_benchmark.sh
+++ b/benchmark/run_benchmark.sh
@@ -12,6 +12,7 @@ function _set_params(){
    config=${7:-"config"}
    log_interval=${8:-"1"}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # TRAIN_LOG_DIR 后续QA设置该参数
+    need_profile=${9:-"off"}

 #   以下不用修改
    device=${CUDA_VISIBLE_DEVICES//,/ }
@@ -19,6 +20,7 @@ function _set_params(){
    num_gpu_devices=${#arr[*]}
    log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}
    res_log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}_speed
+    log_profile=${run_log_path}/${model_name}_model.profile
 }

 function _analysis_log(){
@@ -29,7 +31,14 @@ function _train(){
    echo "Train on ${num_gpu_devices} GPUs"
    echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"

-    train_cmd="--config-file=${config}
+    profiler_cmd=""
+    profiler_options="batch_range=[10,20];profile_path=${log_profile}"
+    if [ $need_profile = "on" ]; then
+        profiler_cmd="--profiler_options=${profiler_options}"
+    fi
+
+    train_cmd="${profiler_cmd} 
+               --config-file=${config}
               -o dataset.train.batch_size=${batch_size}
               log_config.interval=${log_interval}
               ${mode}=${max_iter} "