scripts: improve pegasus_manual_compact.sh; add PID in tmp file name for all scripts (#71)

08548f79 · QinZuoyan · GitHub · d31c1431 · 08548f79 · 08548f79
9 changed file
--- a/scripts/downgrade_node.sh
+++ b/scripts/downgrade_node.sh
 #!/bin/bash

+PID=$$
+
 if [ $# -ne 4 ]
 then
  echo "This tool is for downgrading replicas of specified node."
@@ -24,9 +26,13 @@ then
  exit -1
 fi

-echo "set_meta_level steady" | ./run.sh shell --cluster $cluster &>/tmp/$UID.pegasus.set_meta_level
+echo "UID=$UID"
+echo "PID=$PID"
+echo
+
+echo "set_meta_level steady" | ./run.sh shell --cluster $cluster &>/tmp/$UID.$PID.pegasus.set_meta_level

-echo ls | ./run.sh shell --cluster $cluster &>/tmp/$UID.pegasus.ls
+echo ls | ./run.sh shell --cluster $cluster &>/tmp/$UID.$PID.pegasus.ls

 while read app_line
 do
@@ -40,7 +46,7 @@ do
      continue
    fi

-    echo "app $app -d" | ./run.sh shell --cluster $cluster &>/tmp/$UID.pegasus.app.$app
+    echo "app $app -d" | ./run.sh shell --cluster $cluster &>/tmp/$UID.$PID.pegasus.app.$app

    while read line
    do
@@ -66,17 +72,18 @@ do
        fi
        echo "propose --gpid ${gid}.${pid} --type DOWNGRADE_TO_INACTIVE -t $pri -n $node"
      fi
-    done </tmp/$UID.pegasus.app.$app >/tmp/$UID.pegasus.cmd.$app
+    done </tmp/$UID.$PID.pegasus.app.$app >/tmp/$UID.$PID.pegasus.cmd.$app

    if [ "$type" = "run" ]
    then
-      cat /tmp/$UID.pegasus.cmd.$app
-      cat /tmp/$UID.pegasus.cmd.$app | ./run.sh shell --cluster $cluster 2>/dev/null
+      cat /tmp/$UID.$PID.pegasus.cmd.$app
+      cat /tmp/$UID.$PID.pegasus.cmd.$app | ./run.sh shell --cluster $cluster 2>/dev/null
      echo
      echo
    else
-      cat /tmp/$UID.pegasus.cmd.$app
+      cat /tmp/$UID.$PID.pegasus.cmd.$app
    fi
  fi
-done </tmp/$UID.pegasus.ls
+done </tmp/$UID.$PID.pegasus.ls

+rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null
--- a/scripts/migrate_node.sh
+++ b/scripts/migrate_node.sh
 #!/bin/bash

+PID=$$
+
 if [ $# -ne 4 ]
 then
  echo "This tool is for migrating primary replicas out of specified node."
@@ -24,9 +26,13 @@ then
  exit -1
 fi

-echo "set_meta_level steady" | ./run.sh shell --cluster $cluster &>/tmp/$UID.pegasus.set_meta_level
+echo "UID=$UID"
+echo "PID=$PID"
+echo
+
+echo "set_meta_level steady" | ./run.sh shell --cluster $cluster &>/tmp/$UID.$PID.pegasus.set_meta_level

-echo ls | ./run.sh shell --cluster $cluster &>/tmp/$UID.pegasus.ls
+echo ls | ./run.sh shell --cluster $cluster &>/tmp/$UID.$PID.pegasus.ls

 while read app_line
 do
@@ -40,7 +46,7 @@ do
      continue
    fi

-    echo "app $app -d" | ./run.sh shell --cluster $cluster &>/tmp/$UID.pegasus.app.$app
+    echo "app $app -d" | ./run.sh shell --cluster $cluster &>/tmp/$UID.$PID.pegasus.app.$app

    while read line
    do
@@ -51,16 +57,17 @@ do
        to=`echo $line | awk '{print $5}' | grep -o '\[.*\]' | grep -o '[0-9.:,]*' | cut -d, -f$((RANDOM%2+1))`
        echo "balance --gpid ${gid}.${pid} --type move_pri -f $node -t $to"
      fi
-    done </tmp/$UID.pegasus.app.$app >/tmp/$UID.pegasus.cmd.$app
+    done </tmp/$UID.$PID.pegasus.app.$app >/tmp/$UID.$PID.pegasus.cmd.$app

    if [ "$type" = "run" ]
    then
-      cat /tmp/$UID.pegasus.cmd.$app | ./run.sh shell --cluster $cluster 2>/dev/null
+      cat /tmp/$UID.$PID.pegasus.cmd.$app | ./run.sh shell --cluster $cluster 2>/dev/null
      echo
      echo
    else
-      cat /tmp/$UID.pegasus.cmd.$app
+      cat /tmp/$UID.$PID.pegasus.cmd.$app
    fi
  fi
-done </tmp/$UID.pegasus.ls
+done </tmp/$UID.$PID.pegasus.ls

+rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null
--- a/scripts/pegasus_manual_compact.sh
+++ b/scripts/pegasus_manual_compact.sh
 #!/bin/bash

+PID=$$
+
 function usage()
 {
    echo "This tool is for manual compact specified table(app)."
@@ -50,7 +52,7 @@ function get_env()
    app_name=$2
    key=$3

-    log_file="/tmp/$UID.pegasus.get_app_envs.${app_name}"
+    log_file="/tmp/$UID.$PID.pegasus.get_app_envs.${app_name}"
    echo -e "use ${app_name}\n get_app_envs" | ./run.sh shell --cluster ${cluster} &>${log_file}
    get_ok=`grep 'get app envs succeed' ${log_file} | wc -l`
    if [ ${get_ok} -ne 1 ]; then
@@ -69,7 +71,7 @@ function set_env()
    value=$4

    echo "set_app_envs ${key}=${value}"
-    log_file="/tmp/$UID.pegasus.set_app_envs.${app_name}"
+    log_file="/tmp/$UID.$PID.pegasus.set_app_envs.${app_name}"
    echo -e "use ${app_name}\n set_app_envs ${key} ${value}" | ./run.sh shell --cluster ${cluster} &>${log_file}
    set_ok=`grep 'set app envs succeed' ${log_file} | wc -l`
    if [ ${set_ok} -ne 1 ]; then
@@ -92,16 +94,18 @@ function wait_manual_compact()
    slept=0
    while true
    do
-        query_log_file="/tmp/$UID.pegasus.query_compact.${app_id}"
+        query_log_file="/tmp/$UID.$PID.pegasus.query_compact.${app_id}"
        echo "${query_cmd}" | ./run.sh shell --cluster ${cluster} &>${query_log_file}

        queue_count=`grep 'recent enqueue at' ${query_log_file} | grep -v 'recent start at' | wc -l`
        running_count=`grep 'recent start at' ${query_log_file} | wc -l`
-        not_finish_count=$((queue_count+running_count))
+        processing_count=$((queue_count+running_count))
        finish_count=`grep "last finish at" ${query_log_file} | grep -v "recent enqueue at" | grep -v "recent start at" | grep -o 'last finish at [^,]*' | sed 's/\[/,/;s/\]//' | awk -F"," -v date="$earliest_finish_time_ms" 'BEGIN{count=0}{if(length($2)==23 && $2>=date){count++;}}END{print count}'`
+        not_finish_count=$((total_replica_count-finish_count))

-        if [ ${not_finish_count} -eq 0 -a ${finish_count} -eq ${total_replica_count} ]; then
-            echo "All finished."
+        if [ ${processing_count} -eq 0 -a ${finish_count} -eq ${total_replica_count} ]; then
+            echo "[${slept}s] $finish_count finished, $not_finish_count not finished ($queue_count in queue, $running_count in running), estimate remaining 0 seconds."
+            echo "All finished, total $total_replica_count replicas."
            break
        else
            left_time="unknown"
@@ -123,7 +127,7 @@ function create_checkpoint()
    app_id=$2

    echo "Start to create checkpoint..."
-    chkpt_log_file="/tmp/$UID.pegasus.trigger_checkpoint.${app_id}"
+    chkpt_log_file="/tmp/$UID.$PID.pegasus.trigger_checkpoint.${app_id}"
    echo "remote_command -t replica-server replica.trigger-checkpoint ${app_id}" | ./run.sh shell --cluster ${cluster} &>${chkpt_log_file}
    not_found_count=`grep '^    .*not found' ${chkpt_log_file} | wc -l`
    triggered_count=`grep '^    .*triggered' ${chkpt_log_file} | wc -l`
@@ -253,12 +257,14 @@ fi

 # record start time
 all_start_time=`date +%s`
+echo "UID=$UID"
+echo "PID=$PID"
 echo "Start time: `date -d @${all_start_time} +"%Y-%m-%d %H:%M:%S"`"
 echo

 if [ "${type}" == "periodic" ] || [ "${type}" == "once" -a "${wait_only}" == "false" ]; then
    # set steady
-    echo "set_meta_level steady" | ./run.sh shell --cluster ${cluster} &>/tmp/$UID.pegasus.set_meta_level
+    echo "set_meta_level steady" | ./run.sh shell --cluster ${cluster} &>/tmp/$UID.$PID.pegasus.set_meta_level

    # set manual compact envs
    if [ "${target_level}" != "" ]; then
@@ -273,6 +279,7 @@ fi

 # only `once` manual compact will check progress
 if [ "${type}" != "once" ]; then
+    rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null
    exit 0
 fi

@@ -282,7 +289,7 @@ if [ "${disabled}" == "true" ]; then
    exit -1
 fi

-ls_log_file="/tmp/$UID.pegasus.ls"
+ls_log_file="/tmp/$UID.$PID.pegasus.ls"
 echo ls | ./run.sh shell --cluster ${cluster} &>${ls_log_file}

 while read app_line
@@ -311,3 +318,5 @@ done <${ls_log_file}
 all_finish_time=`date +%s`
 echo "Finish time: `date -d @${all_finish_time} +"%Y-%m-%d %H:%M:%S"`"
 echo "Manual compact done, elapsed time is $((all_finish_time - all_start_time)) seconds."
+
+rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null
--- a/scripts/pegasus_migrate_zookeeper.sh
+++ b/scripts/pegasus_migrate_zookeeper.sh
@@ -3,6 +3,8 @@
 # Migrate zookeeper using minos.
 #

+PID=$$
+
 if [ $# -le 2 ]; then
  echo "USAGE: $0 <cluster-name> <cluster-meta-list> <target-zookeeper-hosts>"
  echo
@@ -49,6 +51,10 @@ if [ $low_version_count -gt 0 ]; then
  exit -1
 fi

+echo "UID=$UID"
+echo "PID=$PID"
+echo
+
 echo ">>>> Backuping app list..."
 echo "ls -o ${cluster}.apps" | ./run.sh shell --cluster $meta_list &>/dev/null
 if [ `cat ${cluster}.apps | wc -l` -eq 0 ]; then
@@ -80,7 +86,7 @@ sed -i "s/ hosts_list = .*/ hosts_list = ${target_zk}/" $minos_config

 echo ">>>> Stopping all meta-servers..."
 cd $minos_client_dir
-./deploy stop pegasus $cluster --skip_confirm --job meta 2>&1 | tee /tmp/$UID.pegasus.migrate_zookeeper.minos.stop.meta.all
+./deploy stop pegasus $cluster --skip_confirm --job meta 2>&1 | tee /tmp/$UID.$PID.pegasus.migrate_zookeeper.minos.stop.meta.all
 cd $shell_dir

 echo ">>>> Sleep for 15 seconds..."
@@ -90,9 +96,9 @@ function rolling_update_meta()
 {
  task_id=$1
  cd $minos_client_dir
-  ./deploy rolling_update pegasus $cluster --skip_confirm --time_interval 10 --update_config --job meta --task $task_id 2>&1 | tee /tmp/$UID.pegasus.migrate_zookeeper.minos.rolling.meta.$task_id
-  if [ `cat /tmp/$UID.pegasus.migrate_zookeeper.minos.rolling.meta.$task_id | grep "Start task $task_id of meta .* success" | wc -l` -ne 1 ]; then
-    echo "ERROR: rolling update meta-servers task $task_id failed, refer to /tmp/$UID.pegasus.migrate_zookeeper.minos.rolling.meta.$task_id"
+  ./deploy rolling_update pegasus $cluster --skip_confirm --time_interval 10 --update_config --job meta --task $task_id 2>&1 | tee /tmp/$UID.$PID.pegasus.migrate_zookeeper.minos.rolling.meta.$task_id
+  if [ `cat /tmp/$UID.$PID.pegasus.migrate_zookeeper.minos.rolling.meta.$task_id | grep "Start task $task_id of meta .* success" | wc -l` -ne 1 ]; then
+    echo "ERROR: rolling update meta-servers task $task_id failed, refer to /tmp/$UID.$PID.pegasus.migrate_zookeeper.minos.rolling.meta.$task_id"
    cd $shell_dir
    return 1
  fi
@@ -116,26 +122,26 @@ if [ $? -ne 0 ]; then
 fi

 echo ">>>> Sending recover command..."
-echo "recover -f ${cluster}.recover.nodes" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.migrate_zookeeper.shell.recover
-cat /tmp/$UID.pegasus.migrate_zookeeper.shell.recover
-if [ `cat /tmp/$UID.pegasus.migrate_zookeeper.shell.recover | grep "Recover result: ERR_OK" | wc -l` -ne 1 ]; then
-  echo "ERROR: recover failed, refer to /tmp/$UID.pegasus.migrate_zookeeper.shell.recover"
+echo "recover -f ${cluster}.recover.nodes" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.migrate_zookeeper.shell.recover
+cat /tmp/$UID.$PID.pegasus.migrate_zookeeper.shell.recover
+if [ `cat /tmp/$UID.$PID.pegasus.migrate_zookeeper.shell.recover | grep "Recover result: ERR_OK" | wc -l` -ne 1 ]; then
+  echo "ERROR: recover failed, refer to /tmp/$UID.$PID.pegasus.migrate_zookeeper.shell.recover"
  undo
  exit 1
 fi

-echo ">>>> Checking recover result, refer to /tmp/$UID.pegasus.migrate_zookeeper.diff..."
-awk '{print $1,$2,$3}' ${cluster}.nodes >/tmp/$UID.pegasus.migrate_zookeeper.diff.old
+echo ">>>> Checking recover result, refer to /tmp/$UID.$PID.pegasus.migrate_zookeeper.diff..."
+awk '{print $1,$2,$3}' ${cluster}.nodes >/tmp/$UID.$PID.pegasus.migrate_zookeeper.diff.old
 while true
 do
-  rm -f /tmp/$UID.pegasus.migrate_zookeeper.shell.nodes
-  echo "nodes -d -o /tmp/$UID.pegasus.migrate_zookeeper.shell.nodes" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.migrate_zookeeper.shell.nodes.log
-  if [ `cat /tmp/$UID.pegasus.migrate_zookeeper.shell.nodes | wc -l` -eq 0 ]; then
-    echo "ERROR: get node list failed, refer to /tmp/$UID.pegasus.migrate_zookeeper.shell.nodes.log"
+  rm -f /tmp/$UID.$PID.pegasus.migrate_zookeeper.shell.nodes
+  echo "nodes -d -o /tmp/$UID.$PID.pegasus.migrate_zookeeper.shell.nodes" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.migrate_zookeeper.shell.nodes.log
+  if [ `cat /tmp/$UID.$PID.pegasus.migrate_zookeeper.shell.nodes | wc -l` -eq 0 ]; then
+    echo "ERROR: get node list failed, refer to /tmp/$UID.$PID.pegasus.migrate_zookeeper.shell.nodes.log"
    exit -1
  fi
-  awk '{print $1,$2,$3}' /tmp/$UID.pegasus.migrate_zookeeper.shell.nodes >/tmp/$UID.pegasus.migrate_zookeeper.diff.new
-  diff /tmp/$UID.pegasus.migrate_zookeeper.diff.old /tmp/$UID.pegasus.migrate_zookeeper.diff.new &>/tmp/$UID.pegasus.migrate_zookeeper.diff
+  awk '{print $1,$2,$3}' /tmp/$UID.$PID.pegasus.migrate_zookeeper.shell.nodes >/tmp/$UID.$PID.pegasus.migrate_zookeeper.diff.new
+  diff /tmp/$UID.$PID.pegasus.migrate_zookeeper.diff.old /tmp/$UID.$PID.pegasus.migrate_zookeeper.diff.new &>/tmp/$UID.$PID.pegasus.migrate_zookeeper.diff
  if [ $? -eq 0 ]; then
    break
  fi
@@ -162,3 +168,4 @@ echo "cluster_info" | ./run.sh shell --cluster $meta_list

 echo "Migrate zookeeper done."

+rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null
--- a/scripts/pegasus_offline_node.sh
+++ b/scripts/pegasus_offline_node.sh
@@ -3,6 +3,8 @@
 # Offline replica server using minos.
 #

+PID=$$
+
 if [ $# -le 2 ]; then
  echo "USAGE: $0 <cluster-name> <cluster-meta-list> <replica-task-id>"
  echo
@@ -34,55 +36,57 @@ if [ ! -f $minos_client ]; then
  exit -1
 fi

+echo "UID=$UID"
+echo "PID=$PID"
 echo "Start time: `date`"
 all_start_time=$((`date +%s`))
 echo

-echo "Generating /tmp/$UID.pegasus.offline_node.minos.show..."
+echo "Generating /tmp/$UID.$PID.pegasus.offline_node.minos.show..."
 cd $minos_client_dir
-./deploy show pegasus $cluster &>/tmp/$UID.pegasus.offline_node.minos.show
+./deploy show pegasus $cluster &>/tmp/$UID.$PID.pegasus.offline_node.minos.show

-echo "Generating /tmp/$UID.pegasus.offline_node.rs.list..."
-grep 'Showing task [0-9][0-9]* of replica' /tmp/$UID.pegasus.offline_node.minos.show | awk '{print $5,$9}' | sed 's/(.*)$//' >/tmp/$UID.pegasus.offline_node.rs.list
-replica_server_count=`cat /tmp/$UID.pegasus.offline_node.rs.list | wc -l`
+echo "Generating /tmp/$UID.$PID.pegasus.offline_node.rs.list..."
+grep 'Showing task [0-9][0-9]* of replica' /tmp/$UID.$PID.pegasus.offline_node.minos.show | awk '{print $5,$9}' | sed 's/(.*)$//' >/tmp/$UID.$PID.pegasus.offline_node.rs.list
+replica_server_count=`cat /tmp/$UID.$PID.pegasus.offline_node.rs.list | wc -l`
 if [ $replica_server_count -eq 0 ]; then
  echo "ERROR: replica server count is 0 by minos show"
  exit -1
 fi
 cd $shell_dir

-echo "Generating /tmp/$UID.pegasus.offline_node.cluster_info..."
-echo cluster_info | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node.cluster_info
-cname=`grep zookeeper_root /tmp/$UID.pegasus.offline_node.cluster_info | grep -o '/[^/]*$' | grep -o '[^/]*$'`
+echo "Generating /tmp/$UID.$PID.pegasus.offline_node.cluster_info..."
+echo cluster_info | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.offline_node.cluster_info
+cname=`grep zookeeper_root /tmp/$UID.$PID.pegasus.offline_node.cluster_info | grep -o '/[^/]*$' | grep -o '[^/]*$'`
 if [ "$cname" != "$cluster" ]; then
  echo "ERROR: cluster name and meta list not matched"
  exit -1
 fi
-pmeta=`grep primary_meta_server /tmp/$UID.pegasus.offline_node.cluster_info | grep -o '[0-9.:]*$'`
+pmeta=`grep primary_meta_server /tmp/$UID.$PID.pegasus.offline_node.cluster_info | grep -o '[0-9.:]*$'`
 if [ "$pmeta" == "" ]; then
  echo "ERROR: extract primary_meta_server by shell failed"
  exit -1
 fi

-echo "Generating /tmp/$UID.pegasus.offline_node.nodes..."
-echo nodes | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node.nodes
-rs_port=`grep '^[0-9.]*:' /tmp/$UID.pegasus.offline_node.nodes | head -n 1 | grep -o ':[0-9]*' | grep -o '[0-9]*'`
+echo "Generating /tmp/$UID.$PID.pegasus.offline_node.nodes..."
+echo nodes | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.offline_node.nodes
+rs_port=`grep '^[0-9.]*:' /tmp/$UID.$PID.pegasus.offline_node.nodes | head -n 1 | grep -o ':[0-9]*' | grep -o '[0-9]*'`
 if [ "$rs_port" == "" ]; then
  echo "ERROR: extract replica server port by shell failed"
  exit -1
 fi

 echo "Set meta level to steady..."
-echo "set_meta_level steady" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node.set_meta_level
-set_ok=`grep 'control meta level ok' /tmp/$UID.pegasus.offline_node.set_meta_level | wc -l`
+echo "set_meta_level steady" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.offline_node.set_meta_level
+set_ok=`grep 'control meta level ok' /tmp/$UID.$PID.pegasus.offline_node.set_meta_level | wc -l`
 if [ $set_ok -ne 1 ]; then
  echo "ERROR: set meta level to steady failed"
  exit -1
 fi

 echo "Set lb.assign_delay_ms to 10..."
-echo "remote_command -l $pmeta meta.lb.assign_delay_ms 10" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node.assign_delay_ms
-set_ok=`grep OK /tmp/$UID.pegasus.offline_node.assign_delay_ms | wc -l`
+echo "remote_command -l $pmeta meta.lb.assign_delay_ms 10" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.offline_node.assign_delay_ms
+set_ok=`grep OK /tmp/$UID.$PID.pegasus.offline_node.assign_delay_ms | wc -l`
 if [ $set_ok -ne 1 ]; then
  echo "ERROR: set lb.assign_delay_ms to 10 failed"
  exit -1
@@ -110,9 +114,9 @@ do
  echo

  echo "Migrating primary replicas out of node..."
-  ./run.sh migrate_node -c $meta_list -n $node -t run &>/tmp/$UID.pegasus.offline_node.migrate_node
+  ./run.sh migrate_node -c $meta_list -n $node -t run &>/tmp/$UID.$PID.pegasus.offline_node.migrate_node
  echo "Wait [$node] to migrate done..."
-  echo "Refer to /tmp/$UID.pegasus.offline_node.migrate_node for details"
+  echo "Refer to /tmp/$UID.$PID.pegasus.offline_node.migrate_node for details"
  while true
  do
    pri_count=`echo 'nodes -d' | ./run.sh shell --cluster $meta_list | grep $node | awk '{print $4}'`
@@ -128,9 +132,9 @@ do
  sleep 1

  echo "Downgrading replicas on node..."
-  ./run.sh downgrade_node -c $meta_list -n $node -t run &>/tmp/$UID.pegasus.offline_node.downgrade_node
+  ./run.sh downgrade_node -c $meta_list -n $node -t run &>/tmp/$UID.$PID.pegasus.offline_node.downgrade_node
  echo "Wait [$node] to downgrade done..."
-  echo "Refer to /tmp/$UID.pegasus.offline_node.downgrade_node for details"
+  echo "Refer to /tmp/$UID.$PID.pegasus.offline_node.downgrade_node for details"
  while true
  do
    rep_count=`echo 'nodes -d' | ./run.sh shell --cluster $meta_list | grep $node | awk '{print $3}'`
@@ -146,13 +150,13 @@ do
  sleep 1

  echo "Send kill_partition to node..."
-  grep '^propose ' /tmp/$UID.pegasus.offline_node.downgrade_node >/tmp/$UID.pegasus.offline_node.downgrade_node.propose
+  grep '^propose ' /tmp/$UID.$PID.pegasus.offline_node.downgrade_node >/tmp/$UID.$PID.pegasus.offline_node.downgrade_node.propose
  while read line2 
  do
    gpid=`echo $line2 | awk '{print $3}' | sed 's/\./ /'`
-    echo "remote_command -l $node replica.kill_partition $gpid" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node.kill_partition
-  done </tmp/$UID.pegasus.offline_node.downgrade_node.propose
-  echo "Sent kill_partition to `cat /tmp/$UID.pegasus.offline_node.downgrade_node.propose | wc -l` partitions"
+    echo "remote_command -l $node replica.kill_partition $gpid" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.offline_node.kill_partition
+  done </tmp/$UID.$PID.pegasus.offline_node.downgrade_node.propose
+  echo "Sent kill_partition to `cat /tmp/$UID.$PID.pegasus.offline_node.downgrade_node.propose | wc -l` partitions"
  echo
  sleep 1

@@ -178,11 +182,11 @@ do
  done
  echo
  sleep 1
-done </tmp/$UID.pegasus.offline_node.rs.list
+done </tmp/$UID.$PID.pegasus.offline_node.rs.list

 echo "Set lb.assign_delay_ms to DEFAULT..."
-echo "remote_command -l $pmeta meta.lb.assign_delay_ms DEFAULT" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node.assign_delay_ms
-set_ok=`grep OK /tmp/$UID.pegasus.offline_node.assign_delay_ms | wc -l`
+echo "remote_command -l $pmeta meta.lb.assign_delay_ms DEFAULT" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.offline_node.assign_delay_ms
+set_ok=`grep OK /tmp/$UID.$PID.pegasus.offline_node.assign_delay_ms | wc -l`
 if [ $set_ok -ne 1 ]; then
  echo "ERROR: set lb.assign_delay_ms to DEFAULT failed"
  exit -1
@@ -192,3 +196,5 @@ echo
 all_finish_time=$((`date +%s`))
 echo "Offline replica server task $replica_task_id done."
 echo "Elapsed time is $((all_finish_time - all_start_time)) seconds."
+
+rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null
--- a/scripts/pegasus_offline_node_list.sh
+++ b/scripts/pegasus_offline_node_list.sh
@@ -3,6 +3,8 @@
 # Offline replica servers using minos.
 #

+PID=$$
+
 if [ $# -le 2 ]; then
  echo "USAGE: $0 <cluster-name> <cluster-meta-list> <replica-task-id-list>"
  echo
@@ -34,39 +36,41 @@ if [ ! -f $minos_client ]; then
  exit -1
 fi

+echo "UID=$UID"
+echo "PID=$PID"
 echo "Start time: `date`"
 all_start_time=$((`date +%s`))
 echo

-echo "Generating /tmp/$UID.pegasus.offline_node_list.minos.show..."
+echo "Generating /tmp/$UID.$PID.pegasus.offline_node_list.minos.show..."
 cd $minos_client_dir
-./deploy show pegasus $cluster &>/tmp/$UID.pegasus.offline_node_list.minos.show
+./deploy show pegasus $cluster &>/tmp/$UID.$PID.pegasus.offline_node_list.minos.show

-echo "Generating /tmp/$UID.pegasus.offline_node_list.rs.list..."
-grep 'Showing task [0-9][0-9]* of replica' /tmp/$UID.pegasus.offline_node_list.minos.show | awk '{print $5,$9}' | sed 's/(.*)$//' >/tmp/$UID.pegasus.offline_node_list.rs.list
-replica_server_count=`cat /tmp/$UID.pegasus.offline_node_list.rs.list | wc -l`
+echo "Generating /tmp/$UID.$PID.pegasus.offline_node_list.rs.list..."
+grep 'Showing task [0-9][0-9]* of replica' /tmp/$UID.$PID.pegasus.offline_node_list.minos.show | awk '{print $5,$9}' | sed 's/(.*)$//' >/tmp/$UID.$PID.pegasus.offline_node_list.rs.list
+replica_server_count=`cat /tmp/$UID.$PID.pegasus.offline_node_list.rs.list | wc -l`
 if [ $replica_server_count -eq 0 ]; then
  echo "ERROR: replica server count is 0 by minos show"
  exit -1
 fi
 cd $shell_dir

-echo "Generating /tmp/$UID.pegasus.offline_node_list.cluster_info..."
-echo cluster_info | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node_list.cluster_info
-cname=`grep zookeeper_root /tmp/$UID.pegasus.offline_node_list.cluster_info | grep -o '/[^/]*$' | grep -o '[^/]*$'`
+echo "Generating /tmp/$UID.$PID.pegasus.offline_node_list.cluster_info..."
+echo cluster_info | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.offline_node_list.cluster_info
+cname=`grep zookeeper_root /tmp/$UID.$PID.pegasus.offline_node_list.cluster_info | grep -o '/[^/]*$' | grep -o '[^/]*$'`
 if [ "$cname" != "$cluster" ]; then
  echo "ERROR: cluster name and meta list not matched"
  exit -1
 fi
-pmeta=`grep primary_meta_server /tmp/$UID.pegasus.offline_node_list.cluster_info | grep -o '[0-9.:]*$'`
+pmeta=`grep primary_meta_server /tmp/$UID.$PID.pegasus.offline_node_list.cluster_info | grep -o '[0-9.:]*$'`
 if [ "$pmeta" == "" ]; then
  echo "ERROR: extract primary_meta_server by shell failed"
  exit -1
 fi

-echo "Generating /tmp/$UID.pegasus.offline_node_list.nodes..."
-echo nodes | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node_list.nodes
-rs_port=`grep '^[0-9.]*:' /tmp/$UID.pegasus.offline_node_list.nodes | head -n 1 | grep -o ':[0-9]*' | grep -o '[0-9]*'`
+echo "Generating /tmp/$UID.$PID.pegasus.offline_node_list.nodes..."
+echo nodes | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.offline_node_list.nodes
+rs_port=`grep '^[0-9.]*:' /tmp/$UID.$PID.pegasus.offline_node_list.nodes | head -n 1 | grep -o ':[0-9]*' | grep -o '[0-9]*'`
 if [ "$rs_port" == "" ]; then
  echo "ERROR: extract replica server port by shell failed"
  exit -1
@@ -82,9 +86,9 @@ for id in `echo $replica_task_id_list | sed 's/,/ /g'` ; do
      exit -1;
    fi
  fi
-  pair=`grep "^$id " /tmp/$UID.pegasus.offline_node_list.rs.list`
+  pair=`grep "^$id " /tmp/$UID.$PID.pegasus.offline_node_list.rs.list`
  if [ "$pair" == "" ]; then
-    echo "ERROR: replica task id $id not found, refer to /tmp/$UID.pegasus.offline_node_list.minos.show"
+    echo "ERROR: replica task id $id not found, refer to /tmp/$UID.$PID.pegasus.offline_node_list.minos.show"
    exit -1;
  fi
  address=`echo $pair | awk '{print $2}'`
@@ -98,10 +102,10 @@ for id in `echo $replica_task_id_list | sed 's/,/ /g'` ; do
 done

 echo "Set lb.assign_secondary_black_list..."
-echo "remote_command -l $pmeta meta.lb.assign_secondary_black_list $address_list" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node_list.assign_secondary_black_list
-set_ok=`grep "set ok" /tmp/$UID.pegasus.offline_node_list.assign_secondary_black_list | wc -l`
+echo "remote_command -l $pmeta meta.lb.assign_secondary_black_list $address_list" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.offline_node_list.assign_secondary_black_list
+set_ok=`grep "set ok" /tmp/$UID.$PID.pegasus.offline_node_list.assign_secondary_black_list | wc -l`
 if [ $set_ok -ne 1 ]; then
-  echo "ERROR: set lb.assign_secondary_black_list failed, refer to /tmp/$UID.pegasus.offline_node_list.assign_secondary_black_list"
+  echo "ERROR: set lb.assign_secondary_black_list failed, refer to /tmp/$UID.$PID.pegasus.offline_node_list.assign_secondary_black_list"
  exit -1
 fi

@@ -120,13 +124,15 @@ do
 done

 echo "Clear lb.assign_secondary_black_list..."
-echo "remote_command -l $pmeta meta.lb.assign_secondary_black_list clear" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.offline_node_list.assign_secondary_black_list
-set_ok=`grep "clear ok" /tmp/$UID.pegasus.offline_node_list.assign_secondary_black_list | wc -l`
+echo "remote_command -l $pmeta meta.lb.assign_secondary_black_list clear" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.offline_node_list.assign_secondary_black_list
+set_ok=`grep "clear ok" /tmp/$UID.$PID.pegasus.offline_node_list.assign_secondary_black_list | wc -l`
 if [ $set_ok -ne 1 ]; then
-  echo "ERROR: clear lb.assign_secondary_black_list failed, refer to /tmp/$UID.pegasus.offline_node_list.assign_secondary_black_list"
+  echo "ERROR: clear lb.assign_secondary_black_list failed, refer to /tmp/$UID.$PID.pegasus.offline_node_list.assign_secondary_black_list"
  exit -1
 fi

 all_finish_time=$((`date +%s`))
 echo "Offline replica server task list done."
 echo "Elapsed time is $((all_finish_time - all_start_time)) seconds."
+
+rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null
--- a/scripts/pegasus_rolling_update.sh
+++ b/scripts/pegasus_rolling_update.sh
@@ -3,6 +3,8 @@
 # Rolling update pegasus cluster using minos.
 #

+PID=$$
+
 if [ $# -le 3 ]; then
  echo "USAGE: $0 <cluster-name> <cluster-meta-list> <type> <start_task_id>"
  echo
@@ -45,47 +47,49 @@ if [ ! -f $minos_client ]; then
  exit -1
 fi

+echo "UID=$UID"
+echo "PID=$PID"
 echo "Start time: `date`"
 all_start_time=$((`date +%s`))
 echo

-echo "Generating /tmp/$UID.pegasus.rolling_update.minos.show..."
+echo "Generating /tmp/$UID.$PID.pegasus.rolling_update.minos.show..."
 cd $minos_client_dir
-./deploy show pegasus $cluster &>/tmp/$UID.pegasus.rolling_update.minos.show
+./deploy show pegasus $cluster &>/tmp/$UID.$PID.pegasus.rolling_update.minos.show

-echo "Generating /tmp/$UID.pegasus.rolling_update.rs.list..."
-grep 'Showing task [0-9][0-9]* of replica' /tmp/$UID.pegasus.rolling_update.minos.show | awk '{print $5,$9}' | sed 's/(.*)$//' >/tmp/$UID.pegasus.rolling_update.rs.list
-replica_server_count=`cat /tmp/$UID.pegasus.rolling_update.rs.list | wc -l`
+echo "Generating /tmp/$UID.$PID.pegasus.rolling_update.rs.list..."
+grep 'Showing task [0-9][0-9]* of replica' /tmp/$UID.$PID.pegasus.rolling_update.minos.show | awk '{print $5,$9}' | sed 's/(.*)$//' >/tmp/$UID.$PID.pegasus.rolling_update.rs.list
+replica_server_count=`cat /tmp/$UID.$PID.pegasus.rolling_update.rs.list | wc -l`
 if [ $replica_server_count -eq 0 ]; then
  echo "ERROR: replica server count is 0 by minos show"
  exit -1
 fi
 cd $shell_dir

-echo "Generating /tmp/$UID.pegasus.rolling_update.cluster_info..."
-echo cluster_info | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.rolling_update.cluster_info
-cname=`grep zookeeper_root /tmp/$UID.pegasus.rolling_update.cluster_info | grep -o '/[^/]*$' | grep -o '[^/]*$'`
+echo "Generating /tmp/$UID.$PID.pegasus.rolling_update.cluster_info..."
+echo cluster_info | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rolling_update.cluster_info
+cname=`grep zookeeper_root /tmp/$UID.$PID.pegasus.rolling_update.cluster_info | grep -o '/[^/]*$' | grep -o '[^/]*$'`
 if [ "$cname" != "$cluster" ]; then
  echo "ERROR: cluster name and meta list not matched"
  exit -1
 fi
-pmeta=`grep primary_meta_server /tmp/$UID.pegasus.rolling_update.cluster_info | grep -o '[0-9.:]*$'`
+pmeta=`grep primary_meta_server /tmp/$UID.$PID.pegasus.rolling_update.cluster_info | grep -o '[0-9.:]*$'`
 if [ "$pmeta" == "" ]; then
  echo "ERROR: extract primary_meta_server by shell failed"
  exit -1
 fi

-echo "Generating /tmp/$UID.pegasus.rolling_update.nodes..."
-echo nodes | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.rolling_update.nodes
-rs_port=`grep '^[0-9.]*:' /tmp/$UID.pegasus.rolling_update.nodes | head -n 1 | grep -o ':[0-9]*' | grep -o '[0-9]*'`
+echo "Generating /tmp/$UID.$PID.pegasus.rolling_update.nodes..."
+echo nodes | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rolling_update.nodes
+rs_port=`grep '^[0-9.]*:' /tmp/$UID.$PID.pegasus.rolling_update.nodes | head -n 1 | grep -o ':[0-9]*' | grep -o '[0-9]*'`
 if [ "$rs_port" == "" ]; then
  echo "ERROR: extract replica server port by shell failed"
  exit -1
 fi

 echo "Set meta level to steady..."
-echo "set_meta_level steady" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.rolling_update.set_meta_level
-set_ok=`grep 'control meta level ok' /tmp/$UID.pegasus.rolling_update.set_meta_level | wc -l`
+echo "set_meta_level steady" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rolling_update.set_meta_level
+set_ok=`grep 'control meta level ok' /tmp/$UID.$PID.pegasus.rolling_update.set_meta_level | wc -l`
 if [ $set_ok -ne 1 ]; then
  echo "ERROR: set meta level to steady failed"
  exit -1
@@ -114,17 +118,17 @@ do
  echo

  echo "Set lb.add_secondary_max_count_for_one_node to 0..."
-  echo "remote_command -l $pmeta meta.lb.add_secondary_max_count_for_one_node 0" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.rolling_update.add_secondary_max_count_for_one_node
-  set_ok=`grep OK /tmp/$UID.pegasus.rolling_update.add_secondary_max_count_for_one_node | wc -l`
+  echo "remote_command -l $pmeta meta.lb.add_secondary_max_count_for_one_node 0" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rolling_update.add_secondary_max_count_for_one_node
+  set_ok=`grep OK /tmp/$UID.$PID.pegasus.rolling_update.add_secondary_max_count_for_one_node | wc -l`
  if [ $set_ok -ne 1 ]; then
    echo "ERROR: set lb.add_secondary_max_count_for_one_node to 0 failed"
    exit -1
  fi

  echo "Migrating primary replicas out of node..."
-  ./run.sh migrate_node -c $meta_list -n $node -t run &>/tmp/$UID.pegasus.rolling_update.migrate_node
+  ./run.sh migrate_node -c $meta_list -n $node -t run &>/tmp/$UID.$PID.pegasus.rolling_update.migrate_node
  echo "Wait [$node] to migrate done..."
-  echo "Refer to /tmp/$UID.pegasus.rolling_update.migrate_node for details"
+  echo "Refer to /tmp/$UID.$PID.pegasus.rolling_update.migrate_node for details"
  while true
  do
    pri_count=`echo 'nodes -d' | ./run.sh shell --cluster $meta_list | grep $node | awk '{print $4}'`
@@ -140,9 +144,9 @@ do
  sleep 1

  echo "Downgrading replicas on node..."
-  ./run.sh downgrade_node -c $meta_list -n $node -t run &>/tmp/$UID.pegasus.rolling_update.downgrade_node
+  ./run.sh downgrade_node -c $meta_list -n $node -t run &>/tmp/$UID.$PID.pegasus.rolling_update.downgrade_node
  echo "Wait [$node] to downgrade done..."
-  echo "Refer to /tmp/$UID.pegasus.rolling_update.downgrade_node for details"
+  echo "Refer to /tmp/$UID.$PID.pegasus.rolling_update.downgrade_node for details"
  while true
  do
    rep_count=`echo 'nodes -d' | ./run.sh shell --cluster $meta_list | grep $node | awk '{print $3}'`
@@ -158,13 +162,13 @@ do
  sleep 1

  echo "Send kill_partition commands to node..."
-  grep '^propose ' /tmp/$UID.pegasus.rolling_update.downgrade_node >/tmp/$UID.pegasus.rolling_update.downgrade_node.propose
+  grep '^propose ' /tmp/$UID.$PID.pegasus.rolling_update.downgrade_node >/tmp/$UID.$PID.pegasus.rolling_update.downgrade_node.propose
  while read line2 
  do
    gpid=`echo $line2 | awk '{print $3}' | sed 's/\./ /'`
-    echo "remote_command -l $node replica.kill_partition $gpid" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.rolling_update.kill_partition
-  done </tmp/$UID.pegasus.rolling_update.downgrade_node.propose
-  echo "Sent to `cat /tmp/$UID.pegasus.rolling_update.downgrade_node.propose | wc -l` partitions."
+    echo "remote_command -l $node replica.kill_partition $gpid" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rolling_update.kill_partition
+  done </tmp/$UID.$PID.pegasus.rolling_update.downgrade_node.propose
+  echo "Sent to `cat /tmp/$UID.$PID.pegasus.rolling_update.downgrade_node.propose | wc -l` partitions."
  echo
  sleep 1

@@ -172,10 +176,10 @@ do
  sleeped=0
  while true
  do
-    echo "remote_command -l $node perf-counters '.*replica(Count)'" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.rolling_update.replica_count_perf_counters
-    serving_count=`grep -o 'replica_stub.replica(Count)","type":"NUMBER","value":[0-9]*' /tmp/$UID.pegasus.rolling_update.replica_count_perf_counters | grep -o '[0-9]*$'`
-    opening_count=`grep -o 'replica_stub.opening.replica(Count)","type":"NUMBER","value":[0-9]*' /tmp/$UID.pegasus.rolling_update.replica_count_perf_counters | grep -o '[0-9]*$'`
-    closing_count=`grep -o 'replica_stub.closing.replica(Count)","type":"NUMBER","value":[0-9]*' /tmp/$UID.pegasus.rolling_update.replica_count_perf_counters | grep -o '[0-9]*$'`
+    echo "remote_command -l $node perf-counters '.*replica(Count)'" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rolling_update.replica_count_perf_counters
+    serving_count=`grep -o 'replica_stub.replica(Count)","type":"NUMBER","value":[0-9]*' /tmp/$UID.$PID.pegasus.rolling_update.replica_count_perf_counters | grep -o '[0-9]*$'`
+    opening_count=`grep -o 'replica_stub.opening.replica(Count)","type":"NUMBER","value":[0-9]*' /tmp/$UID.$PID.pegasus.rolling_update.replica_count_perf_counters | grep -o '[0-9]*$'`
+    closing_count=`grep -o 'replica_stub.closing.replica(Count)","type":"NUMBER","value":[0-9]*' /tmp/$UID.$PID.pegasus.rolling_update.replica_count_perf_counters | grep -o '[0-9]*$'`
    if [ "$serving_count" = "" -o "$opening_count" = "" -o "$closing_count" = "" ]; then
      echo "ERROR: extract replica count from perf counters failed"
      exit -1
@@ -199,8 +203,8 @@ do
  echo "remote_command -l $node flush-log" | ./run.sh shell --cluster $meta_list &>/dev/null

  echo "Set lb.add_secondary_max_count_for_one_node to 100..."
-  echo "remote_command -l $pmeta meta.lb.add_secondary_max_count_for_one_node 100" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.rolling_update.add_secondary_max_count_for_one_node
-  set_ok=`grep OK /tmp/$UID.pegasus.rolling_update.add_secondary_max_count_for_one_node | wc -l`
+  echo "remote_command -l $pmeta meta.lb.add_secondary_max_count_for_one_node 100" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rolling_update.add_secondary_max_count_for_one_node
+  set_ok=`grep OK /tmp/$UID.$PID.pegasus.rolling_update.add_secondary_max_count_for_one_node | wc -l`
  if [ $set_ok -ne 1 ]; then
    echo "ERROR: set lb.add_secondary_max_count_for_one_node to 100 failed"
    exit -1
@@ -250,11 +254,11 @@ do
  if [ "$type" = "one" ]; then
    break
  fi
-done </tmp/$UID.pegasus.rolling_update.rs.list
+done </tmp/$UID.$PID.pegasus.rolling_update.rs.list

 echo "Set lb.add_secondary_max_count_for_one_node to DEFAULT..."
-echo "remote_command -l $pmeta meta.lb.add_secondary_max_count_for_one_node DEFAULT" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.rolling_update.add_secondary_max_count_for_one_node
-set_ok=`grep OK /tmp/$UID.pegasus.rolling_update.add_secondary_max_count_for_one_node | wc -l`
+echo "remote_command -l $pmeta meta.lb.add_secondary_max_count_for_one_node DEFAULT" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rolling_update.add_secondary_max_count_for_one_node
+set_ok=`grep OK /tmp/$UID.$PID.pegasus.rolling_update.add_secondary_max_count_for_one_node | wc -l`
 if [ $set_ok -ne 1 ]; then
  echo "ERROR: set lb.add_secondary_max_count_for_one_node to DEFAULT failed"
  exit -1
@@ -270,8 +274,8 @@ if [ "$type" = "all" ]; then
  echo

  echo "Set meta level to lively..."
-  echo "set_meta_level lively" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.rolling_update.set_meta_level
-  set_ok=`grep 'control meta level ok' /tmp/$UID.pegasus.rolling_update.set_meta_level | wc -l`
+  echo "set_meta_level lively" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rolling_update.set_meta_level
+  set_ok=`grep 'control meta level ok' /tmp/$UID.$PID.pegasus.rolling_update.set_meta_level | wc -l`
  if [ $set_ok -ne 1 ]; then
    echo "ERROR: set meta level to lively failed"
    exit -1
@@ -283,8 +287,8 @@ if [ "$type" = "all" ]; then
  echo

  echo "Set meta level to steady..."
-  echo "set_meta_level steady" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.pegasus.rolling_update.set_meta_level
-  set_ok=`grep 'control meta level ok' /tmp/$UID.pegasus.rolling_update.set_meta_level | wc -l`
+  echo "set_meta_level steady" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rolling_update.set_meta_level
+  set_ok=`grep 'control meta level ok' /tmp/$UID.$PID.pegasus.rolling_update.set_meta_level | wc -l`
  if [ $set_ok -ne 1 ]; then
    echo "ERROR: set meta level to steady failed"
    exit -1
@@ -295,3 +299,5 @@ fi
 echo "Finish time: `date`"
 all_finish_time=$((`date +%s`))
 echo "Rolling update $type done, elasped time is $((all_finish_time - all_start_time)) seconds."
+
+rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null
--- a/scripts/pegasus_set_usage_scenario.sh
+++ b/scripts/pegasus_set_usage_scenario.sh
 #!/bin/bash

+PID=$$
+
 if [ $# -ne 3 ]
 then
  echo "This tool is for set usage scenario of specified table(app)."
@@ -21,19 +23,21 @@ if [ "$scenario" != "normal" -a "$scenario" != "prefer_write" -a "$scenario" !=
    exit -1
 fi

+echo "UID=$UID"
+echo "PID=$PID"
 echo "Start time: `date`"
 all_start_time=$((`date +%s`))
 echo

-echo -e "use $app_name\nset_app_envs $scenario_key $scenario" | ./run.sh shell --cluster $cluster &>/tmp/$UID.pegasus.set_app_envs
-set_ok=`grep 'set app envs succeed' /tmp/$UID.pegasus.set_app_envs | wc -l`
+echo -e "use $app_name\nset_app_envs $scenario_key $scenario" | ./run.sh shell --cluster $cluster &>/tmp/$UID.$PID.pegasus.set_app_envs
+set_ok=`grep 'set app envs succeed' /tmp/$UID.$PID.pegasus.set_app_envs | wc -l`
 if [ $set_ok -ne 1 ]; then
-  grep ERR /tmp/$UID.pegasus.set_app_envs
-  echo "ERROR: set app envs failed, refer to /tmp/$UID.pegasus.set_app_envs"
+  grep ERR /tmp/$UID.$PID.pegasus.set_app_envs
+  echo "ERROR: set app envs failed, refer to /tmp/$UID.$PID.pegasus.set_app_envs"
  exit -1
 fi

-echo ls | ./run.sh shell --cluster $cluster &>/tmp/$UID.pegasus.ls
+echo ls | ./run.sh shell --cluster $cluster &>/tmp/$UID.$PID.pegasus.ls

 while read app_line
 do
@@ -53,8 +57,8 @@ do
    sleeped=0
    while true
    do
-      echo "remote_command -t replica-server replica.query-app-envs $gid" | ./run.sh shell --cluster $cluster &>/tmp/$UID.pegasus.query_app_envs.$app
-      effect_count=`grep "$scenario_key=$scenario" /tmp/$UID.pegasus.query_app_envs.$app | wc -l`
+      echo "remote_command -t replica-server replica.query-app-envs $gid" | ./run.sh shell --cluster $cluster &>/tmp/$UID.$PID.pegasus.query_app_envs.$app
+      effect_count=`grep "$scenario_key=$scenario" /tmp/$UID.$PID.pegasus.query_app_envs.$app | wc -l`
      total_count=$((partition_count * replica_count))
      if [ $effect_count -ge $total_count ]; then
        echo "All finished."
@@ -67,9 +71,10 @@ do
    done
    echo
  fi
-done </tmp/$UID.pegasus.ls
+done </tmp/$UID.$PID.pegasus.ls

 echo "Finish time: `date`"
 all_finish_time=$((`date +%s`))
 echo "Set usage scenario done, elasped time is $((all_finish_time - all_start_time)) seconds."

+rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null
--- a/scripts/pegasus_stat_available.sh
+++ b/scripts/pegasus_stat_available.sh
 #!/bin/bash

+PID=$$
+
 if [ $# -ne 2 ]; then
  echo "USGAE: $0 <cluster> <filter>"
  echo "   eg: $0 onebox 2017-07"
@@ -29,7 +31,7 @@ if [ "$detect_table" == "" ]; then
 fi

 result_file="pegasus.stat_available.scan_result"
-tmp_file="/tmp/$UID.pegasus.stat_available.scan"
+tmp_file="/tmp/$UID.$PID.pegasus.stat_available.scan"
 echo -e "use $detect_table\nhash_scan detect_available_day '' '' -s prefix -y \"$filter\" -o $result_file" | ./run.sh shell -n $cluster &>$tmp_file
 scan_ok=`grep 'key-value pairs got' $tmp_file | wc -l`
 if [ $scan_ok -ne 1 ]; then
@@ -48,3 +50,5 @@ fi
 available=`cat $result_file | grep -o '[0-9]*,[0-9]*,[0-9]*' | awk -F, '{a+=$1;b+=$2}END{printf("%f\n",(double)b/a);}'`
 rm -f $result_file
 echo "$cluster $filter $days $available"
+
+rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null