improve onebox layout; improve pegasus_rolling_update.sh; add pegasus_offline_node.sh

Summary: Ref T10570 Test Plan: N/A Reviewers: sunweijie, cailiuyang Reviewed By: sunweijie Subscribers: #pegasus Maniphest Tasks: T10570 Differential Revision: https://phabricator.d.xiaomi.net/D83607 Conflicts: rdsn

improve onebox layout; improve pegasus_rolling_update.sh; add pegasus_offline_node.sh
Summary: Ref T10570 Test Plan: N/A Reviewers: sunweijie, cailiuyang Reviewed By: sunweijie Subscribers: #pegasus Maniphest Tasks: T10570 Differential Revision: https://phabricator.d.xiaomi.net/D83607 Conflicts: rdsn
6e1281db · qinzuoyan · WeijieSun · 456200bb · 2b3cbff7 · 3c46af2e
13 changed file
--- a/rdsn @ 2b3cbff7
+++ b/rdsn @ 2b3cbff7
-Subproject commit 3c46af2ec013abeb20e6a0d2ec6b16bc6d0e4758
+Subproject commit 2b3cbff75c10fd04998d59ea89b560876ce5567c
--- a/run.sh
+++ b/run.sh
@@ -414,39 +414,41 @@ function run_start_onebox()
        echo "ERROR: file ${DSN_ROOT}/bin/pegasus_server/pegasus_server not exist"
        exit -1
    fi
-    if ps -ef | grep ' \./pegasus_server config.ini' | grep -E 'app_list meta@|app_list replica@'; then
+    if ps -ef | grep ' /pegasus_server config.ini' | grep -E 'app_list meta|app_list replica'; then
        echo "ERROR: some onebox processes are running, start failed"
        exit -1
    fi
    ln -s -f ${DSN_ROOT}/bin/pegasus_server/pegasus_server
    run_start_zk
-    sed "s/@LOCAL_IP@/`hostname -i`/g;s/@META_COUNT@/${META_COUNT}/g;s/@REPLICA_COUNT@/${REPLICA_COUNT}/g;s/@APP_NAME@/${APP_NAME}/g;s/@PARTITION_COUNT@/${PARTITION_COUNT}/g" \
+    sed "s/@LOCAL_IP@/`hostname -i`/g;s/@APP_NAME@/${APP_NAME}/g;s/@PARTITION_COUNT@/${PARTITION_COUNT}/g" \
        ${ROOT}/src/server/config-server.ini >${ROOT}/config-server.ini
    echo "starting server"
    mkdir -p onebox
    cd onebox
    for i in $(seq ${META_COUNT})
    do
+        meta_port=$((34600+i))
        mkdir -p meta$i;
        cd meta$i
        ln -s -f ${DSN_ROOT}/bin/pegasus_server/pegasus_server pegasus_server
-        ln -s -f ${ROOT}/config-server.ini config.ini
-        echo "cd `pwd` && ./pegasus_server config.ini -app_list meta@$i &>result &"
-        ./pegasus_server config.ini -app_list meta@$i &>result &
+        sed "s/@META_PORT@/$meta_port/;s/@REPLICA_PORT@/34800/" ${ROOT}/config-server.ini >config.ini
+        echo "cd `pwd` && ../meta$i/pegasus_server config.ini -app_list meta &>result &"
+        ../meta$i/pegasus_server config.ini -app_list meta &>result &
        PID=$!
-        ps -ef | grep ' \./pegasus_server config.ini' | grep "\<$PID\>"
+        ps -ef | grep '/pegasus_server config.ini' | grep "\<$PID\>"
        cd ..
    done
    for j in $(seq ${REPLICA_COUNT})
    do
+        replica_port=$((34800+j))
        mkdir -p replica$j
        cd replica$j
        ln -s -f ${DSN_ROOT}/bin/pegasus_server/pegasus_server pegasus_server
-        ln -s -f ${ROOT}/config-server.ini config.ini
-        echo "cd `pwd` && ./pegasus_server config.ini -app_list replica@$j &>result &"
-        ./pegasus_server config.ini -app_list replica@$j &>result &
+        sed "s/@META_PORT@/34600/;s/@REPLICA_PORT@/$replica_port/" ${ROOT}/config-server.ini >config.ini
+        echo "cd `pwd` && ../replica$j/pegasus_server config.ini -app_list replica &>result &"
+        ../replica$j/pegasus_server config.ini -app_list replica &>result &
        PID=$!
-        ps -ef | grep ' \./pegasus_server config.ini' | grep "\<$PID\>"
+        ps -ef | grep '/pegasus_server config.ini' | grep "\<$PID\>"
        cd ..
    done
 }
@@ -478,7 +480,7 @@ function run_stop_onebox()
        esac
        shift
    done
-    ps -ef | grep ' \./pegasus_server config.ini' | grep -E 'app_list meta@|app_list replica@' | awk '{print $2}' | xargs kill &>/dev/null
+    ps -ef | grep '/pegasus_server config.ini' | grep -E 'app_list meta|app_list replica' | awk '{print $2}' | xargs kill &>/dev/null
 }

 #####################
@@ -508,7 +510,7 @@ function run_list_onebox()
        esac
        shift
    done
-    ps -ef | grep ' \./pegasus_server config.ini' | grep -E 'app_list meta@|app_list replica@' | sort -k11
+    ps -ef | grep '/pegasus_server config.ini' | grep -E 'app_list meta|app_list replica' | sort -k11
 }

 #####################
@@ -598,17 +600,17 @@ function run_start_onebox_instance()
            echo "ERROR: invalid meta_id"
            exit -1
        fi
-        if ps -ef | grep ' \./pegasus_server config.ini' | grep "app_list meta@$META_ID\>" ; then
+        if ps -ef | grep "/meta$META_ID/pegasus_server config.ini" | grep "app_list meta" ; then
            echo "INFO: meta@$META_ID already running"
            exit -1
        fi
        cd $dir
-        echo "cd `pwd` && ./pegasus_server config.ini -app_list meta@$META_ID &>result &"
-        ./pegasus_server config.ini -app_list meta@$META_ID &>result &
+        echo "cd `pwd` && ../meta$META_ID/pegasus_server config.ini -app_list meta &>result &"
+        ../meta$META_ID/pegasus_server config.ini -app_list meta &>result &
        PID=$!
-        ps -ef | grep ' \./pegasus_server config.ini' | grep "\<$PID\>"
+        ps -ef | grep '/pegasus_server config.ini' | grep "\<$PID\>"
        cd ..
-        echo "INFO: meta@$META started"
+        echo "INFO: meta@$META_ID started"
    fi
    if [ $REPLICA_ID != "0" ]; then
        dir=onebox/replica$REPLICA_ID
@@ -616,15 +618,15 @@ function run_start_onebox_instance()
            echo "ERROR: invalid replica_id"
            exit -1
        fi
-        if ps -ef | grep ' \./pegasus_server config.ini' | grep "app_list replica@$REPLICA_ID\>" ; then
+        if ps -ef | grep "/replica$REPLICA_ID/pegasus_server config.ini" | grep "app_list replica" ; then
            echo "INFO: replica@$REPLICA_ID already running"
            exit -1
        fi
        cd $dir
-        echo "cd `pwd` && ./pegasus_server config.ini -app_list replica@$REPLICA_ID &>result &"
-        ./pegasus_server config.ini -app_list replica@$REPLICA_ID &>result &
+        echo "cd `pwd` && ../replica$REPLICA_ID/pegasus_server config.ini -app_list replica &>result &"
+        ../replica$REPLICA_ID/pegasus_server config.ini -app_list replica &>result &
        PID=$!
-        ps -ef | grep ' \./pegasus_server config.ini' | grep "\<$PID\>"
+        ps -ef | grep '/pegasus_server config.ini' | grep "\<$PID\>"
        cd ..
        echo "INFO: replica@$REPLICA_ID started"
    fi
@@ -685,11 +687,11 @@ function run_stop_onebox_instance()
            echo "ERROR: invalid meta_id"
            exit -1
        fi
-        if ! ps -ef | grep ' \./pegasus_server config.ini' | grep "app_list meta@$META_ID\>" ; then
+        if ! ps -ef | grep "/meta$META_ID/pegasus_server config.ini" | grep "app_list meta" ; then
            echo "INFO: meta@$META_ID is not running"
            exit -1
        fi
-        ps -ef | grep ' \./pegasus_server config.ini' | grep "app_list meta@$META_ID\>" | awk '{print $2}' | xargs kill &>/dev/null
+        ps -ef | grep "/meta$META_ID/pegasus_server config.ini" | grep "app_list meta" | awk '{print $2}' | xargs kill &>/dev/null
        echo "INFO: meta@$META_ID stopped"
    fi
    if [ $REPLICA_ID != "0" ]; then
@@ -698,11 +700,11 @@ function run_stop_onebox_instance()
            echo "ERROR: invalid replica_id"
            exit -1
        fi
-        if ! ps -ef | grep ' \./pegasus_server config.ini' | grep "app_list replica@$REPLICA_ID\>" ; then
+        if ! ps -ef | grep "/replica$REPLICA_ID/pegasus_server config.ini" | grep "app_list replica" ; then
            echo "INFO: replica@$REPLICA_ID is not running"
            exit -1
        fi
-        ps -ef | grep ' \./pegasus_server config.ini' | grep "app_list replica@$REPLICA_ID\>" | awk '{print $2}' | xargs kill &>/dev/null
+        ps -ef | grep "/replica$REPLICA_ID/pegasus_server config.ini" | grep "app_list replica" | awk '{print $2}' | xargs kill &>/dev/null
        echo "INFO: replica@$REPLICA_ID stopped"
    fi
 }

--- a/scripts/cluster_check.in
+++ b/scripts/cluster_check.in
+cluster_info
+server_info
+ls -d
+nodes -d
+app_stat
+query_backup_policy -p every_day
+
--- a/scripts/downgrade_node.sh
+++ b/scripts/downgrade_node.sh
@@ -70,6 +70,7 @@ do

    if [ "$type" = "run" ]
    then
+      cat /tmp/pegasus.cmd.$app
      cat /tmp/pegasus.cmd.$app | ./run.sh shell --cluster $cluster 2>/dev/null
      echo
      echo

--- a/scripts/pegasus_offline_node.sh
+++ b/scripts/pegasus_offline_node.sh
+#!/bin/bash
+#
+# Offline replica server using minos.
+#
+
+if [ $# -le 2 ]; then
+  echo "USAGE: $0 <cluster-name> <cluster-meta-list> <start_task_id>"
+  echo
+  echo "For example:"
+  echo "  $0 onebox 127.0.0.1:34601,127.0.0.1:34602 0"
+  echo
+  exit -1
+fi
+
+cluster=$1
+meta_list=$2
+start_task_id=$3
+
+pwd="$( cd "$( dirname "$0"  )" && pwd )"
+shell_dir="$( cd $pwd/.. && pwd )"
+minos_config_dir=$(dirname $MINOS_CONFIG_FILE)/xiaomi-config/conf/pegasus
+minos_client_dir=/home/work/pegasus/infra/minos/client
+cd $shell_dir
+
+minos_config=$minos_config_dir/pegasus-${cluster}.cfg
+if [ ! -f $minos_config ]; then
+  echo "ERROR: minos config \"$minos_config\" not found"
+  exit -1
+fi
+
+minos_client=$minos_client_dir/deploy
+if [ ! -f $minos_client ]; then
+  echo "ERROR: minos client \"$minos_client\" not found"
+  exit -1
+fi
+
+echo "Start time: `date`"
+all_start_time=$((`date +%s`))
+echo
+
+echo "Generating /tmp/pegasus.offline_node.minos.show..."
+cd $minos_client_dir
+./deploy show pegasus $cluster &>/tmp/pegasus.offline_node.minos.show
+
+echo "Generating /tmp/pegasus.offline_node.rs.list..."
+grep 'Showing task [0-9][0-9]* of replica' /tmp/pegasus.offline_node.minos.show | awk '{print $5,$9}' | sed 's/(.*)$//' >/tmp/pegasus.offline_node.rs.list
+replica_server_count=`cat /tmp/pegasus.offline_node.rs.list | wc -l`
+if [ $replica_server_count -eq 0 ]; then
+  echo "ERROR: replica server count is 0 by minos show"
+  exit -1
+fi
+cd $shell_dir
+
+echo "Generating /tmp/pegasus.offline_node.cluster_info..."
+echo cluster_info | ./run.sh shell --cluster $meta_list &>/tmp/pegasus.offline_node.cluster_info
+cname=`grep zookeeper_root /tmp/pegasus.offline_node.cluster_info | grep -o '/[^/]*$' | grep -o '[^/]*$'`
+if [ "$cname" != "$cluster" ]; then
+  echo "ERROR: cluster name and meta list not matched"
+  exit -1
+fi
+pmeta=`grep primary_meta_server /tmp/pegasus.offline_node.cluster_info | grep -o '[0-9.:]*$'`
+if [ "$pmeta" == ""]; then
+  echo "ERROR: extract primary_meta_server by shell failed"
+  exit -1
+fi
+
+echo "Generating /tmp/pegasus.offline_node.nodes..."
+echo nodes | ./run.sh shell --cluster $meta_list &>/tmp/pegasus.offline_node.nodes
+rs_port=`grep '^[0-9.]*:' /tmp/pegasus.offline_node.nodes | head -n 1 | grep -o ':[0-9]*' | grep -o '[0-9]*'`
+if [ "$rs_port" == "" ]; then
+  echo "ERROR: extract replica server port by shell failed"
+  exit -1
+fi
+
+echo "Set meta level to steady..."
+echo "set_meta_level steady" | ./run.sh shell --cluster $meta_list &>/tmp/pegasus.offline_node.set_meta_level
+set_ok=`grep 'control meta level ok' /tmp/pegasus.offline_node.set_meta_level | wc -l`
+if [ $set_ok -ne 1 ]; then
+  echo "ERROR: set meta level to steady failed"
+  exit -1
+fi
+
+echo "Set lb.assign_delay_ms to 10..."
+echo "remote_command -l $pmeta meta.lb.assign_delay_ms 10" | ./run.sh shell --cluster $meta_list &>/tmp/pegasus.offline_node.assign_delay_ms
+set_ok=`grep OK /tmp/pegasus.offline_node.assign_delay_ms | wc -l`
+if [ $set_ok -ne 1 ]; then
+  echo "ERROR: set lb.assign_delay_ms to 10 failed"
+  exit -1
+fi
+
+echo
+while read line
+do
+  task_id=`echo $line | awk '{print $1}'`
+  if [ $task_id -ne $start_task_id ]; then
+    continue
+  fi
+  node_str=`echo $line | awk '{print $2}'`
+  node_ip=`getent hosts $node_str | awk '{print $1}'`
+  node_name=`getent hosts $node_str | awk '{print $2}'`
+  node=${node_ip}:${rs_port}
+  echo "=================================================================="
+  echo "=================================================================="
+  echo "Offline replica server task $task_id of [$node_name] [$node]..."
+  echo
+
+  echo "Getting serving replica count..."
+  serving_replica_count=`echo 'nodes -d' | ./run.sh shell --cluster $meta_list | grep $node | awk '{print $3}'`
+  echo "servicing_replica_count=$serving_replica_count"
+  echo
+
+  echo "Migrating primary replicas out of node..."
+  ./run.sh migrate_node -c $meta_list -n $node -t run &>/tmp/pegasus.offline_node.migrate_node
+  echo "Wait [$node] to migrate done..."
+  echo "Refer to /tmp/pegasus.offline_node.migrate_node for details"
+  while true
+  do
+    pri_count=`echo 'nodes -d' | ./run.sh shell --cluster $meta_list | grep $node | awk '{print $4}'`
+    if [ $pri_count -eq 0 ]; then
+      echo "Migrate done."
+      break
+    else
+      echo "Still $pri_count primary replicas left on $node"
+      sleep 1
+    fi
+  done
+  echo
+  sleep 1
+
+  echo "Downgrading replicas on node..."
+  ./run.sh downgrade_node -c $meta_list -n $node -t run &>/tmp/pegasus.offline_node.downgrade_node
+  echo "Wait [$node] to downgrade done..."
+  echo "Refer to /tmp/pegasus.offline_node.downgrade_node for details"
+  while true
+  do
+    rep_count=`echo 'nodes -d' | ./run.sh shell --cluster $meta_list | grep $node | awk '{print $3}'`
+    if [ $rep_count -eq 0 ]; then
+      echo "Downgrade done."
+      break
+    else
+      echo "Still $rep_count replicas left on $node"
+      sleep 1
+    fi
+  done
+  echo
+  sleep 1
+
+  echo "Send kill_partition to node..."
+  grep '^propose ' /tmp/pegasus.offline_node.downgrade_node >/tmp/pegasus.offline_node.downgrade_node.propose
+  while read line2 
+  do
+    gpid=`echo $line2 | awk '{print $3}' | sed 's/\./ /'`
+    echo "remote_command -l $node replica.kill_partition $gpid" | ./run.sh shell --cluster $meta_list &>/tmp/pegasus.offline_node.kill_partition
+  done </tmp/pegasus.offline_node.downgrade_node.propose
+  echo "Sent kill_partition to `cat /tmp/pegasus.offline_node.downgrade_node.propose | wc -l` partitions"
+  echo
+  sleep 1
+
+  echo "Stop node by minos..."
+  cd $minos_client_dir
+  ./deploy stop pegasus $cluster --skip_confirm --job replica --task $task_id
+  cd $shell_dir
+  echo "Stop node by minos done."
+  echo
+  sleep 1
+
+  echo "Wait cluster to become healthy..."
+  while true
+  do
+    unhealthy_count=`echo "ls -d" | ./run.sh shell --cluster $meta_list | awk 'BEGIN{s=0} f{ if($NF<7){f=0} else if($3!=$4){s=s+$5+$6} } /fully_healthy_num/{f=1} END{print s}'`
+    if [ $unhealthy_count -eq 0 ]; then
+      echo "Cluster becomes healthy"
+      break
+    else
+      echo "Cluster not healthy, unhealthy_partition_count = $unhealthy_count"
+      sleep 10
+    fi
+  done
+  echo
+  sleep 1
+done </tmp/pegasus.offline_node.rs.list
+
+echo "Set lb.assign_delay_ms to DEFAULT..."
+echo "remote_command -l $pmeta meta.lb.assign_delay_ms DEFAULT" | ./run.sh shell --cluster $meta_list &>/tmp/pegasus.offline_node.assign_delay_ms
+set_ok=`grep OK /tmp/pegasus.offline_node.assign_delay_ms | wc -l`
+if [ $set_ok -ne 1 ]; then
+  echo "ERROR: set lb.assign_delay_ms to DEFAULT failed"
+  exit -1
+fi
+echo
+
+all_finish_time=$((`date +%s`))
+echo "Offline replica server task $start_task_id done."
+echo "Elapsed time is $((all_finish_time - all_start_time)) seconds."
--- a/scripts/pegasus_rolling_update.sh
+++ b/scripts/pegasus_rolling_update.sh
@@ -69,6 +69,11 @@ if [ "$cname" != "$cluster" ]; then
  echo "ERROR: cluster name and meta list not matched"
  exit -1
 fi
+pmeta=`grep primary_meta_server /tmp/pegasus.rolling_update.cluster_info | grep -o '[0-9.:]*$'`
+if [ "$pmeta" == "" ]; then
+  echo "ERROR: extract primary_meta_server by shell failed"
+  exit -1
+fi

 echo "Generating /tmp/pegasus.rolling_update.nodes..."
 echo nodes | ./run.sh shell --cluster $meta_list &>/tmp/pegasus.rolling_update.nodes
@@ -108,6 +113,14 @@ do
  echo "servicing_replica_count=$serving_replica_count"
  echo

+  echo "Set lb.add_secondary_max_count_for_one_node to 0..."
+  echo "remote_command -l $pmeta meta.lb.add_secondary_max_count_for_one_node 0" | ./run.sh shell --cluster $meta_list &>/tmp/pegasus.rolling_update.add_secondary_max_count_for_one_node
+  set_ok=`grep OK /tmp/pegasus.rolling_update.add_secondary_max_count_for_one_node | wc -l`
+  if [ $set_ok -ne 1 ]; then
+    echo "ERROR: set lb.add_secondary_max_count_for_one_node to 0 failed"
+    exit -1
+  fi
+
  echo "Migrating primary replicas out of node..."
  ./run.sh migrate_node -c $meta_list -n $node -t run &>/tmp/pegasus.rolling_update.migrate_node
  echo "Wait [$node] to migrate done..."
@@ -124,7 +137,7 @@ do
    fi
  done
  echo
-  sleep 3
+  sleep 1

  echo "Downgrading replicas on node..."
  ./run.sh downgrade_node -c $meta_list -n $node -t run &>/tmp/pegasus.rolling_update.downgrade_node
@@ -142,7 +155,52 @@ do
    fi
  done
  echo
-  sleep 3
+  sleep 1
+
+  echo "Send kill_partition to node..."
+  grep '^propose ' /tmp/pegasus.rolling_update.downgrade_node >/tmp/pegasus.rolling_update.downgrade_node.propose
+  while read line2 
+  do
+    gpid=`echo $line2 | awk '{print $3}' | sed 's/\./ /'`
+    echo "remote_command -l $node replica.kill_partition $gpid" | ./run.sh shell --cluster $meta_list &>/tmp/pegasus.rolling_update.kill_partition
+  done </tmp/pegasus.rolling_update.downgrade_node.propose
+  echo "Sent kill_partition to `cat /tmp/pegasus.rolling_update.downgrade_node.propose | wc -l` partitions"
+  echo
+  sleep 1
+
+  echo "Checking replicas closed on node..."
+  sleeped=0
+  while true
+  do
+    echo "remote_command -l $node perf-counters '.*replica(Count)'" | ./run.sh shell --cluster $meta_list &>/tmp/pegasus.rolling_update.replica_count_perf_counters
+    serving_count=`grep -o 'replica_stub.replica(Count)","type":"NUMBER","value":[0-9]*' /tmp/pegasus.rolling_update.replica_count_perf_counters | grep -o '[0-9]*$'`
+    opening_count=`grep -o 'replica_stub.opening.replica(Count)","type":"NUMBER","value":[0-9]*' /tmp/pegasus.rolling_update.replica_count_perf_counters | grep -o '[0-9]*$'`
+    closing_count=`grep -o 'replica_stub.closing.replica(Count)","type":"NUMBER","value":[0-9]*' /tmp/pegasus.rolling_update.replica_count_perf_counters | grep -o '[0-9]*$'`
+    if [ "$serving_count" = "" -o "$opening_count" = "" -o "$closing_count" = "" ]; then
+      echo "ERROR: extract replica count from perf counters failed"
+      exit -1
+    fi
+    rep_count=$((serving_count + opening_count + closing_count))
+    if [ $rep_count -eq 0 -o $sleeped -gt 20 ]; then
+      break
+    else
+      echo "Still $rep_count replicas not closed on $node"
+      sleep 1
+      sleeped=$((sleeped+1))
+    fi
+  done
+  echo
+  sleep 1
+
+  echo "remote_command -l $node flush-log" | ./run.sh shell --cluster $meta_list &>/dev/null
+
+  echo "Set lb.add_secondary_max_count_for_one_node to 100..."
+  echo "remote_command -l $pmeta meta.lb.add_secondary_max_count_for_one_node 100" | ./run.sh shell --cluster $meta_list &>/tmp/pegasus.rolling_update.add_secondary_max_count_for_one_node
+  set_ok=`grep OK /tmp/pegasus.rolling_update.add_secondary_max_count_for_one_node | wc -l`
+  if [ $set_ok -ne 1 ]; then
+    echo "ERROR: set lb.add_secondary_max_count_for_one_node to 100 failed"
+    exit -1
+  fi

  echo "Rolling update by minos..."
  cd $minos_client_dir
@@ -150,11 +208,7 @@ do
  cd $shell_dir
  echo "Rolling update by minos done."
  echo
-
-  echo "Sleep 20 seconds for server restarting..."
-  sleep 20
-  echo "Sleep done."
-  echo
+  sleep 1

  echo "Wait [$node] to become alive..."
  while true
@@ -168,21 +222,21 @@ do
    fi
  done
  echo
+  sleep 1

  echo "Wait cluster to become healthy..."
  while true
  do
    unhealthy_count=`echo "ls -d" | ./run.sh shell --cluster $meta_list | awk 'f{ if($NF<7){f=0} else if($3!=$4){print} } /fully_healthy_num/{f=1}' | wc -l`
    if [ $unhealthy_count -eq 0 ]; then
-      echo "Cluster becomes healthy, sleep 10 seconds before stepping next..."
-      sleep 10
+      echo "Cluster becomes healthy."
      break
    else
      sleep 1
    fi
  done
-  echo "Sleep done."
  echo
+  sleep 1

  finish_time=$((`date +%s`))
  echo "Rolling update replica server task $task_id of [$node_name] [$node] done."
@@ -190,21 +244,28 @@ do
  echo

  if [ "$type" = "one" ]; then
-    echo "Finish time: `date`"
-    all_finish_time=$((`date +%s`))
-    echo "Rolling update one done, elasped time is $((all_finish_time - all_start_time)) seconds."
-    exit 0
+    break
  fi
 done </tmp/pegasus.rolling_update.rs.list

-echo "=================================================================="
-echo "=================================================================="
-echo "Rolling update meta servers and collectors..."
-cd $minos_client_dir
-./deploy rolling_update pegasus $cluster --skip_confirm --time_interval 10 $update_options --job meta collector
-cd $shell_dir
-echo
+echo "Set lb.add_secondary_max_count_for_one_node to DEFAULT..."
+echo "remote_command -l $pmeta meta.lb.add_secondary_max_count_for_one_node DEFAULT" | ./run.sh shell --cluster $meta_list &>/tmp/pegasus.rolling_update.add_secondary_max_count_for_one_node
+set_ok=`grep OK /tmp/pegasus.rolling_update.add_secondary_max_count_for_one_node | wc -l`
+if [ $set_ok -ne 1 ]; then
+  echo "ERROR: set lb.add_secondary_max_count_for_one_node to DEFAULT failed"
+  exit -1
+fi
+
+if [ "$type" = "all" ]; then
+  echo "=================================================================="
+  echo "=================================================================="
+  echo "Rolling update meta servers and collectors..."
+  cd $minos_client_dir
+  ./deploy rolling_update pegasus $cluster --skip_confirm --time_interval 10 $update_options --job meta collector
+  cd $shell_dir
+  echo
+fi

 echo "Finish time: `date`"
 all_finish_time=$((`date +%s`))
-echo "Rolling update all done, elasped time is $((all_finish_time - all_start_time)) seconds."
+echo "Rolling update $type done, elasped time is $((all_finish_time - all_start_time)) seconds."
--- a/src/server/config-server.ini
+++ b/src/server/config-server.ini
@@ -9,19 +9,19 @@ count = 1
 type = meta
 name = meta
 arguments = 
-ports = 34601
+ports = @META_PORT@
 pools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_META_STATE,THREAD_POOL_FD,THREAD_POOL_DLOCK,THREAD_POOL_LOCAL_SERVICE,THREAD_POOL_FDS_SERVICE
 run = true
-count = @META_COUNT@
+count = 1

 [apps.replica]
 type = replica
 name = replica
 arguments =
-ports = 34801
+ports = @REPLICA_PORT@
 pools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_LOCAL_SERVICE,THREAD_POOL_FDS_SERVICE
 run = true
-count = @REPLICA_COUNT@
+count = 1

 [core]
 ;tool = simulator

--- a/src/server/pegasus_counter_updater.cpp
+++ b/src/server/pegasus_counter_updater.cpp
@@ -36,7 +36,9 @@ static const char *s_brief_stat_mapper[] = {
    "read_qps", "zion*profiler*RPC_L2_CLIENT_READ.qps",
    "GET_P99(ns)", "zion*profiler*RPC_RRDB_RRDB_GET.latency.server",
    "MULTI_GET_P99(ns)", "zion*profiler*RPC_RRDB_RRDB_MULTI_GET.latency.server",
-    "replica_count", "replica*eon.replica_stub*replica(Count)",
+    "serving_replica_count", "replica*eon.replica_stub*replica(Count)",
+    "opening_replica_count", "replica*eon.replica_stub*opening.replica(Count)",
+    "closing_replica_count", "replica*eon.replica_stub*closing.replica(Count)",
    "commit_throughput", "replica*eon.replica_stub*replicas.commit.qps",
    "learning_count", "replica*eon.replica_stub*replicas.learning.count",
    "shared_log_size(MB)", "replica*eon.replica_stub*shared.log.size(MB)",

--- a/src/server/pegasus_server_impl.cpp
+++ b/src/server/pegasus_server_impl.cpp
@@ -1558,6 +1558,16 @@ DEFINE_TASK_CODE(UPDATING_ROCKSDB_SSTSIZE, TASK_PRIORITY_COMMON, THREAD_POOL_REP
        return ::dsn::ERR_OK;
    }

+    if (!clear_state) {
+        rocksdb::FlushOptions options;
+        options.wait = true;
+        auto status = _db->Flush(options);
+        if (!status.ok()) {
+            derror("%s: flush memtable failed: %s",
+                   replica_name(), status.ToString().c_str());
+        }
+    }
+
    _context_cache.clear();

    // when stop the, should stop the timer_task.
@@ -1584,6 +1594,8 @@ DEFINE_TASK_CODE(UPDATING_ROCKSDB_SSTSIZE, TASK_PRIORITY_COMMON, THREAD_POOL_REP
        _pfc_sst_size->set(0);
    }

+    ddebug("%s: close app succeed, clear_state = %s",
+           replica_name(), clear_state ? "true" : "false");
    return ::dsn::ERR_OK;
 }


--- a/src/test/function_test/global_env.cpp
+++ b/src/test/function_test/global_env.cpp
@@ -13,26 +13,37 @@ global_env global_env::inst;

 global_env::global_env()
 {
+    std::cout << "============" << std::endl << "start global_env()" << std::endl;
    get_dirs();
    get_hostip();
 }

 void global_env::get_dirs()
 {
-    const char *cmd = "readlink /proc/`ps aux | grep pegasus_server | grep -v grep | grep @ | sed "
-                      "-n \"1p\" | awk '{print $2}'`/cwd";
-    std::stringstream ss;
-    pipe_execute(cmd, ss);
+    const char *cmd1 = "ps aux | grep pegasus_server | grep meta1 | awk '{print $2}'";
+    std::stringstream ss1;
+    pipe_execute(cmd1, ss1);
+    int meta1_pid;
+    ss1 >> meta1_pid;
+    std::cout << "meta1 pid: " << meta1_pid << std::endl;

    // get the dir of a process in onebox, say: $PEGASUS/onebox/meta1
-    char task_target[512];
-    ss >> task_target;
+    char cmd2[512];
+    sprintf(cmd2, "readlink /proc/%d/cwd", meta1_pid);
+    std::stringstream ss2;
+    pipe_execute(cmd2, ss2);
+    std::string meta1_dir;
+    ss2 >> meta1_dir;
+    std::cout << "meta1 dir: " << meta1_dir << std::endl;

-    _pegasus_root = dirname(dirname(task_target));
-    std::cout << "get project root: " << _pegasus_root << std::endl;
+    _pegasus_root = dirname(dirname((char *)meta1_dir.c_str()));
+    std::cout << "project root: " << _pegasus_root << std::endl;
+    assert(_pegasus_root != ".");

+    char task_target[512];
    assert(getcwd(task_target, sizeof(task_target)) != nullptr);
    _working_dir = task_target;
+    std::cout << "working dir: " << _working_dir << std::endl;
 }

 void global_env::get_hostip()
@@ -40,7 +51,7 @@ void global_env::get_hostip()
    std::stringstream output;
    pipe_execute("hostname -i", output);
    output >> _host_ip;
-    std::cout << "get host ip: " << _host_ip << std::endl;
+    std::cout << "host ip: " << _host_ip << std::endl;
 }

 /*static*/

--- a/src/test/function_test/test_recovery.cpp
+++ b/src/test/function_test/test_recovery.cpp
@@ -165,7 +165,7 @@ public:
        snprintf(command,
                 512,
                 "cd %s && sed -i \"/^recover_from_replica_server/c recover_from_replica_server = "
-                 "true\" config-server.ini",
+                 "true\" onebox/meta1/config.ini",
                 global_env::instance()._pegasus_root.c_str());
        system(command);
    }
@@ -175,10 +175,9 @@ public:
        char command[512];
        snprintf(command,
                 512,
-                 "cd %s/onebox/replica%d/data/replica%d/reps && rm -rf %d.%d.pegasus",
+                 "cd %s/onebox/replica%d/data/replica/reps && rm -rf %d.%d.pegasus",
                 global_env::instance()._pegasus_root.c_str(),
                 replica_id,
-                 replica_id,
                 app_id,
                 partition_id);
        std::cout << command << std::endl;
@@ -190,10 +189,9 @@ public:
        char command[512];
        snprintf(command,
                 512,
-                 "cd %s/onebox/replica%d/data/replica%d/reps && rm -rf %d.*.pegasus",
+                 "cd %s/onebox/replica%d/data/replica/reps && rm -rf %d.*.pegasus",
                 global_env::instance()._pegasus_root.c_str(),
                 replica_id,
-                 replica_id,
                 app_id);
        std::cout << command << std::endl;
        system(command);

--- a/src/test/function_test/test_restore.cpp
+++ b/src/test/function_test/test_restore.cpp
@@ -75,18 +75,16 @@ public:

    virtual void TearDown() override
    {
+        chdir(global_env::instance()._pegasus_root.c_str());
        system("./run.sh clear_onebox");
        std::this_thread::sleep_for(std::chrono::seconds(3));
-        // TODO: when teardown must recover config-server.ini
        system("git checkout -- src/server/config-server.ini");
        system("./run.sh start_onebox");
        std::cout << "sleep 10s to restart onebox" << std::endl;
        std::this_thread::sleep_for(std::chrono::seconds(10));
        std::string cmd = "rm -rf " + backup_data_dir;
        system(cmd.c_str());
-
-        // go back to working dir
-        chdir(working_root_dir.c_str());
+        chdir(global_env::instance()._working_dir.c_str());
    }

    void write_data()

--- a/src/test/function_test/test_slog_lost.cpp
+++ b/src/test/function_test/test_slog_lost.cpp
@@ -38,11 +38,22 @@ static void truncate_recent_file(const std::string &path)
    std::string file_name;
    ss >> file_length >> file_name;

-    std::cout << "get file with size: (" << file_name << ", " << file_length << ")" << std::endl;
+    std::cout << "truncate file with size: (" << file_name << ", " << file_length << ")"
+              << std::endl;
+
    snprintf(
        command, 512, "truncate -s %lu %s/%s", file_length / 3, path.c_str(), file_name.c_str());
    std::cout << command << std::endl;
    system(command);
+
+    snprintf(command, 512, "ls -l %s/%s | awk '{print $5}'", path.c_str(), file_name.c_str());
+    std::stringstream ss2;
+    global_env::pipe_execute(command, ss2);
+    size_t new_file_length;
+    ss2 >> new_file_length;
+
+    ASSERT_LT(new_file_length, file_length);
+    std::cout << "after truncated file size: " << new_file_length << std::endl;
 }

 TEST(lost_log, slog)
@@ -84,7 +95,7 @@ TEST(lost_log, slog)
    system("./run.sh stop_onebox");

    std::cout << "truncate slog for replica1" << std::endl;
-    truncate_recent_file("onebox/replica1/data/replica1/slog");
+    truncate_recent_file("onebox/replica1/data/replica/slog");

    std::cout << "restart onebox again" << std::endl;
    system("./run.sh start_onebox");