提交 283e55b4 编写于 作者: Q qinzuoyan 提交者: WeijieSun

fix scripts; add downgrade_node script

Summary: Ref T10174

Test Plan: N/A

Reviewers: sunweijie, cailiuyang, heyuchen, wutao1

Reviewed By: cailiuyang

Subscribers: #pegasus

Maniphest Tasks: T10174

Differential Revision: https://phabricator.d.xiaomi.net/D76405
上级 ec69a42d
......@@ -35,6 +35,7 @@ function usage()
echo " bench run benchmark test"
echo " shell run pegasus shell"
echo " migrate_node migrate primary replicas out of specified node"
echo " downgrade_node downgrade replicas to inactive on specified node"
echo
echo " test run unit test"
echo
......@@ -1275,6 +1276,100 @@ function run_migrate_node()
fi
}
#####################
## downgrade_node
#####################
function usage_downgrade_node()
{
echo "Options for subcommand 'downgrade_node':"
echo " -h|--help print the help info"
echo " -c|--cluster <str> cluster meta lists"
echo " -n|--node <str> the node to downgrade replicas, should be ip:port"
echo " -a|--app <str> the app to downgrade replicas, if not set, means downgrade all apps"
echo " -t|--type <str> type: test or run, default is test"
}
function run_downgrade_node()
{
CLUSTER=""
NODE=""
APP="*"
TYPE="test"
while [[ $# > 0 ]]; do
key="$1"
case $key in
-h|--help)
usage_downgrade_node
exit 0
;;
-c|--cluster)
CLUSTER="$2"
shift
;;
-n|--node)
NODE="$2"
shift
;;
-a|--app)
APP="$2"
shift
;;
-t|--type)
TYPE="$2"
shift
;;
*)
echo "ERROR: unknown option \"$key\""
echo
usage_downgrade_node
exit -1
;;
esac
shift
done
if [ "$CLUSTER" == "" ]; then
echo "ERROR: no cluster specified"
echo
usage_downgrade_node
exit -1
fi
if [ "$NODE" == "" ]; then
echo "ERROR: no node specified"
echo
usage_downgrade_node
exit -1
fi
if [ "$TYPE" != "test" -a "$TYPE" != "run" ]; then
echo "ERROR: invalid type $TYPE"
echo
usage_downgrade_node
exit -1
fi
echo "CLUSTER=$CLUSTER"
echo "NODE=$NODE"
echo "APP=$APP"
echo "TYPE=$TYPE"
echo
cd ${ROOT}
echo "------------------------------"
./scripts/downgrade_node.sh $CLUSTER $NODE "$APP" $TYPE
echo "------------------------------"
echo
if [ "$TYPE" == "test" ]; then
echo "The above is sample downgrade commands."
echo "Run with option '-t run' to do migration actually."
else
echo "Done."
echo "You can run shell command 'nodes -d' to check the result."
echo
echo "The cluster's auto migration is disabled now, you can run shell command 'set_meta_level lively' to enable it again."
fi
}
####################################################################
if [ $# -eq 0 ]; then
......@@ -1358,6 +1453,10 @@ case $cmd in
shift
run_migrate_node $*
;;
downgrade_node)
shift
run_downgrade_node $*
;;
test)
shift
run_test $*
......
#!/bin/bash
if [ $# -ne 4 ]
then
echo "This tool is for downgrading replicas of specified node."
echo "USAGE: $0 <cluster-meta-list> <node> <app-name> <run|test>"
echo " app-name = * means migrate all apps"
exit -1
fi
pwd="$( cd "$( dirname "$0" )" && pwd )"
shell_dir="$( cd $pwd/.. && pwd )"
cd $shell_dir
cluster=$1
node=$2
app_name=$3
type=$4
if [ "$type" != "run" -a "$type" != "test" ]
then
echo "ERROR: invalid type: $type"
echo "USAGE: $0 <cluster-meta-list> <node> <app-name> <run|test>"
exit -1
fi
echo "set_meta_level steady" | ./run.sh shell --cluster $cluster &>/tmp/pegasus.set_meta_level
echo ls | ./run.sh shell --cluster $cluster &>/tmp/pegasus.ls
while read app_line
do
status=`echo $app_line | awk '{print $2}'`
if [ "$status" = "AVAILABLE" ]
then
gid=`echo $app_line | awk '{print $1}'`
app=`echo $app_line | awk '{print $3}'`
if [ "$app_name" != "*" -a "$app_name" != "$app" ]
then
continue
fi
echo "app $app -d" | ./run.sh shell --cluster $cluster &>/tmp/pegasus.app.$app
while read line
do
sec=`echo $line | awk '{print $5}' | grep -o '\[.*\]' | grep -o '[0-9.:,]*'`
if echo $sec | grep -q "$node"
then
pid=`echo $line | awk '{print $1}'`
pri=`echo $line | awk '{print $4}'`
if [ "$pri" = "" ]
then
echo "ERROR: can't downgrade ${gid}.${pid} because it is unhealthy"
exit -1
fi
if [ "$pri" = "$node" ]
then
echo "ERROR: can't downgrade ${gid}.${pid} because $node is primary"
exit -1
fi
if echo $sec | grep -v -q ','
then
echo "ERROR: can't downgrade ${gid}.${pid} because it is unhealthy"
exit -1
fi
echo "propose --gpid ${gid}.${pid} --type DOWNGRADE_TO_INACTIVE -t $pri -n $node"
fi
done </tmp/pegasus.app.$app >/tmp/pegasus.cmd.$app
if [ "$type" = "run" ]
then
cat /tmp/pegasus.cmd.$app | ./run.sh shell --cluster $cluster 2>/dev/null
echo
echo
else
cat /tmp/pegasus.cmd.$app
fi
fi
done </tmp/pegasus.ls
......@@ -20,7 +20,7 @@ type=$4
if [ "$type" != "run" -a "$type" != "test" ]
then
echo "ERROR: invalid type: $type"
echo "USAGE: $0 <cluster-meta-list> <migrate-node> <run|test>"
echo "USAGE: $0 <cluster-meta-list> <migrate-node> <app-name> <run|test>"
exit -1
fi
......
......@@ -112,6 +112,7 @@ cp -v ./DSN_ROOT/bin/pegasus_server/pegasus_server ${pack}/bin
cp -v ./DSN_ROOT/bin/pegasus_rproxy/pegasus_rproxy ${pack}/bin
cp -v ./DSN_ROOT/lib/libdsn_meta_server.so ${pack}/bin
cp -v ./DSN_ROOT/lib/libdsn_layer2_stateful_type1.so ${pack}/bin
cp -v ./rdsn/thirdparty/output/lib/libPoco*.so.48 ${pack}/bin
cp -v ./rdsn/scripts/linux/learn_stat.py ${pack}/bin
cp -v ./scripts/sendmail.sh ${pack}/bin
......
......@@ -111,6 +111,7 @@ do
echo "Migrating primary replicas out of node..."
./run.sh migrate_node -c $meta_list -n $node -t run &>/tmp/pegasus.rolling_update.migrate_node
echo "Wait [$node] to migrate done..."
echo "Refer to /tmp/pegasus.rolling_update.migrate_node for details"
while true
do
pri_count=`echo 'nodes -d' | ./run.sh shell --cluster $meta_list | grep $node | awk '{print $4}'`
......@@ -118,10 +119,30 @@ do
echo "Migrate done."
break
else
echo "Still $pri_count primary replicas left on $node"
sleep 1
fi
done
done
echo
sleep 3
echo "Downgrading replicas on node..."
./run.sh downgrade_node -c $meta_list -n $node -t run &>/tmp/pegasus.rolling_update.downgrade_node
echo "Wait [$node] to downgrade done..."
echo "Refer to /tmp/pegasus.rolling_update.downgrade_node for details"
while true
do
rep_count=`echo 'nodes -d' | ./run.sh shell --cluster $meta_list | grep $node | awk '{print $3}'`
if [ $rep_count -eq 0 ]; then
echo "Downgrade done."
break
else
echo "Still $rep_count replicas left on $node"
sleep 1
fi
done
echo
sleep 3
echo "Rolling update by minos..."
cd $minos_client_dir
......@@ -145,7 +166,7 @@ do
else
sleep 1
fi
done
done
echo
echo "Wait cluster to become healthy..."
......@@ -159,7 +180,7 @@ do
else
sleep 1
fi
done
done
echo "Sleep done."
echo
......
......@@ -30,7 +30,7 @@ fi
result_file="pegasus.stat_available.scan_result"
tmp_file="/tmp/pegasus.stat_available.scan.$UID"
echo -e "use $detect_table\nscan detect_available_day -o $result_file" | ./run.sh shell -n $cluster &>$tmp_file
echo -e "use $detect_table\nhash_scan detect_available_day '' '' -s prefix -y \"$filter\" -o $result_file" | ./run.sh shell -n $cluster &>$tmp_file
scan_ok=`grep 'key-value pairs got' $tmp_file | wc -l`
if [ $scan_ok -ne 1 ]; then
echo "ERROR: scan detect table failed, refer error to $tmp_file"
......@@ -38,13 +38,13 @@ if [ $scan_ok -ne 1 ]; then
exit -1
fi
days=`grep $filter $result_file | wc -l`
days=`cat $result_file | wc -l`
if [ $days -eq 0 ]; then
echo "ERROR: no detect data found for filter \"$filter\", refer to $tmp_file"
rm -f $result_file
exit -1
fi
available=`grep $filter $result_file | grep -o '[0-9]*,[0-9]*,[0-9]*' | awk -F, '{a+=$1;b+=$2}END{printf("%f\n",(double)b/a);}'`
available=`cat $result_file | grep -o '[0-9]*,[0-9]*,[0-9]*' | awk -F, '{a+=$1;b+=$2}END{printf("%f\n",(double)b/a);}'`
rm -f $result_file
echo "$cluster $filter $days $available"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册