您的位置:首页 > 其它

megacli 管理 ceph 存储常用脚本

2017-07-12 11:12 429 查看

说明

只适用于可用使用 megacli 支持的 raid controller
只适用于手动进行 ceph 管理的集群,  不支持 ceph deploy 创建的集群
需修改对应磁盘设备命名


脚本

#!/bin/bash
#
#  使用说明:
#     脚本用于停止 osd, umount osd, 标注对应 raid 设备为离线状态
#     ./stopraid.sh [number]     [number] 为对应的 raid 卡 slot 槽号
#
#  1. 获取帮助  (./raidrepair.sh   ||   ./raidrepair.sh  -h )
#
#  2. 确认整体磁盘
#    确认磁盘整体故障     (./raidrepair.sh -a)
#    确认具体某个磁盘故障  (./raidrepair.sh -c 8)   假设 raid 中第 8 个 slot number 有问题  ( 确定 slot number 8 对应着 /dev/sdf1  /var/lib/ceph/osd/ceph-20 )
#    确认磁盘挂载点       (./raidrepair.sh -m)    确定 slot number 8 对应着 /dev/sdf1  /var/lib/ceph/osd/ceph-20
#    确认 raid 中虚拟磁盘  (./raidrepair.sh -v)   缺 slot number 8 对应着 ( Virtual Drive: 5 )
#    ##########   建议手动对 /etc/fstab 中, 所有 ceph 磁盘信息进行屏蔽  ####################
#
#  3. 故障处理
#     删除故障磁盘  (./raidrepair.sh -d 8)
#
#  4. 关机, 更换磁盘  ( /etc/init.d/ceph -a stop osd ; init 0 )
#      ##########   关机后, 可能需要对 raid 执行清除缓存操作 (需要利用 idrac 进行手动操作 #####################
#      ##########   假如没有对 /etc/fstab 中 ceph 磁盘执行屏蔽操作, 可能导致无法登入系统 #####################
#
#  5. 修复磁盘
#      确认磁盘状态  ( ./raidrepair.sh -c 8 )  确认状态  ( Firmware state: Unconfigured(good), Spun Up )
#      令磁盘在线    ( ./raidrepair.sh -o 8 )
#      令该磁盘执行初始化  ( ./raidrepair.sh -i 8 )  注意, 不要写错 slot, 因为磁盘数据将会丢失
#      重启测试          ( init 6 )
#
#  6. 初始化 ceph
#      挂载所有的 ceph 磁盘 ( ./raidrepair.sh -m )
#      手动启动 ceph mon, ceph osd  ( /etc/init.d/ceph start mon )
#      格式化新的故障 ceph 磁盘 ( ./raidrepair.sh -f 8 )
#      初始化 ceph  ( ./raidrepair.sh -p 8 )

if [ `whoami` != 'root' ]
then
echo "This program must be run by root."
exit
fi

function rpmcheck()
{

if [ ! -f "/sbin/megacli" ]
then

toolsrpm=`rpm -qa | grep -i megacli | tail -1 `
if [ -z "$toolsrpm" ]
then
echo "Error:  Megacli tool not install."
exit
fi

grep 8.07 `echo $toolsrpm`  > /dev/null 2>&1
if [ $? -eq 0 ]
then
file=`rpm -ql $toolsrpm | grep -i "MegaCli$"`
else
file=`rpm -ql $toolsrpm | grep -i "MegaCli$" | grep "bin/"`
fi
ln -s $file /sbin/megacli
fi
}

function parametercheck()
{
if [ -z $1 ]
then
help
exit
fi
}

function verifyceph()
{
dirs=`ls -d /var/lib/ceph/osd/*`
if [ -z "$dirs" ]
then
echo "there is nothing is /var/lib/ceph/osd directory"
exit
else
for dir in $dirs
do
if [ -f "$dir/whoami" ]
then
num=`cat $dir/whoami`
partition=`df | grep "$dir" | awk '{print $1}'`
else
num="NULL"
partition="NULL"
fi
echo -e "$dir\t\tCeph Number is: $num\t\tPartition is: $partition"
done
fi
}

function help()
{
cat <<'EOF'
-a  use to check all raid slot status
-h  use to display help
-y  use to check ceph mount
-m  use to show partition mount to ceph.
-c  [num]  || ex: ( raid card slot number )  use to check raid slot number.
-d  [num]  || ex: ( raid card slot number )  use to offline raid disk.
-f  [num]  || ex: ( raid card slot number )  use to format partition.
-i  [num]  || ex: ( raid card slot number )  use to initial raid slot number disk.
-o  [num]  || ex: ( raid card slot number )  use to online raid disk.
-p  [num]  || ex: ( raid card slot number )  use to initial ceph and start up ceph.
-v  [num]  || ex: ( raid card slot number )  use to show virtual partition.
EOF
}

parametercheck $1
rpmcheck

function raidcheckall()
{
/sbin/megacli -PDList  -aALL |egrep "Slot Number|Error"
exit
}

function virtualcheck()
{
/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot"
}

function automountceph()
{
disks=`fdisk -l  | grep GPT | awk '{print $1}' | grep -v sda`
for disk in $disks
do
mount $disk /mnt
if [ -f "/mnt/whoami" ]
then
cephnum=`cat /mnt/whoami`
echo "mount  $disk /var/lib/ceph/osd/ceph-$cephnum"
fi
umount /mnt
done
}

while getopts ":hHaAvVyYmMc:d:o:p:f:i:" OPT;
do
case $OPT in
h|H)
help
;;
a|A)
raidcheckall
;;
v|V)
virtualcheck
;;
y|Y)
verifyceph
;;
m|M)
automountceph
;;
c)
slotnum=$OPTARG
;;
d)
down=$OPTARG
;;
o)
up=$OPTARG
;;
i)
initial=$OPTARG
;;
f)
prepare=$OPTARG
;;
p)
ready=$OPTARG
;;
?)
help
exit
;;
*)
help
exit
;;
esac

done

function raidcheck()
{
if [ ! -z $slotnum ]
then
/sbin/megacli -PDList  -aALL  | sed -n /"Slot\ Number:\ $slotnum\$"/,/Media\ Type/p | grep -E "Slot Number|Device Id|Error Count|Failure Count|Raw Size|Firmware state|Inquiry Data"

virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w -B 1 "Slot Number: $slotnum" | grep Virtual  | awk '{print $3}'`
if [ -z $virtualnum ]
then
exit
fi
case $virtualnum in
0)
partition=sda1
disk=sda
;;
1)
partition=sdb1
disk=sdb
;;
2)
partition=sdc1
disk=sdc
;;
3)
partition=sdd1
disk=sdd
;;
4)
partition=sde1
disk=sde
;;
5)
partition=sdf1
disk=sdf
;;
6)
partition=sdg1
disk=sdg
;;
7)
partition=sdh1
disk=sdh
;;
8)
partition=sdi1
disk=sdi
;;
9)
partition=sdj1
disk=sdj
;;
10)
partition=sdk1
disk=sdk
;;
11)
partition=sdl1
disk=sdl
;;
12)
partition=sdm1
disk=sdm
;;
13)
partition=sdn1
disk=sdn
;;
esac
#       if [ -z $partition ]

cephnum=`df -h | grep /dev/$partition | awk '{print $NF}' | awk -F- '{print $NF}'`
echo "Ceph number: $cephnum"
echo "Partition: $partition"

fi
}

function raidoffline()
{
if [ ! -z $down ]
then
nums=`/sbin/megacli -PDList  -aALL  |  grep "Slot Number" | awk -F: '{print $2}'`
echo $nums | grep $down > /dev/null
if [ $? -ne 0 ]
then
echo "Slot Number: $down is not exists"
exit
else

/sbin/megacli -PDList  -aALL  | sed -n /"Slot\ Number:\ $down\$"/,/Media\ Type/p | grep "Firmware state" | grep Online | grep Up > /dev/null
if [ $? -ne 0 ]
then
echo "Slot Number: $down status error, please try to use ./$0 -c $down"
fi

virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w -B 1 "Slot Number: $down" | grep Virtual | awk '{print $3}'`
case $virtualnum in
0)
partition=sda1
disk=sda
;;
1)
partition=sdb1
disk=sdb
;;
2)
partition=sdc1
disk=sdc
;;
3)
partition=sdd1
disk=sdd
;;
4)
partition=sde1
disk=sde
;;
5)
partition=sdf1
disk=sdf
;;
6)
partition=sdg1
disk=sdg
;;
7)
partition=sdh1
disk=sdh
;;
8)
partition=sdi1
disk=sdi
;;
9)
partition=sdj1
disk=sdj
;;
10)
partition=sdk1
disk=sdk
;;
11)
partition=sdl1
disk=sdl
;;
12)
partition=sdm1
disk=sdm
;;
13)
partition=sdn1
disk=sdn
;;
esac
raidnum=`/sbin/megacli -PDList  -aALL | grep 'Enclosure Device ID' | uniq | awk -F[:\ ] '{print $NF}'`
cephnum=`df -h | grep /dev/$partition | awk '{print $NF}' | awk -F- '{print $NF}'`

# 执行 ceph 维护
ceph osd set norecover
ceph osd set noscrub
ceph osd set nobackfill
ceph osd set nodeep-scrub

if [ ! -z $cephnum ]
then
/etc/init.d/ceph stop osd.$cephnum
umount /dev/$partition
sed -i /ceph-$cephnum/d /etc/fstab
fi

#  执行 megacli 维护
/sbin/megacli -PDOffline -PhysDrv [$raidnum:$down] -a0
/sbin/megacli -PDMarkMissing -PhysDrv [$raidnum:$down] -a0
/sbin/megacli -PDPrpRmv -PhysDrv [$raidnum:$down] -a0

## 提示
echo "stop raid slot complete, please use this command to shutdown compute"
echo "#####################################################"
echo "#       /etc/init.d/ceph -a stop osd ; init 0       #"
echo "#####################################################"
fi
fi
}

##################terrry

function raidonline()
{
######
#  检测没有被挂载的 ceph 目录对应的 number
#

if [ ! -z $up ]
then
/sbin/megacli -PDList  -aALL  | sed -n /"Slot\ Number:\ $up\$"/,/Media\ Type/p | grep "Firmware state" | grep Online | grep Up > /dev/null
if [ $? -eq 0 ]
then
echo "Slot Number: $up  status is up, use $0 -c $up"
exit
fi

raidnum=`/sbin/megacli -PDList  -aALL | grep 'Enclosure Device ID' | uniq | awk -F[:\ ] '{print $NF}'`

#
#    获得 -L 的 num
virtualnum=`megacli -cfgdsply -aALL  | grep  -E "Target\ Id:|Slot Number:" | grep -w -B 1 "Slot Number: $up\$" | grep Virtual | awk '{print $3}'`

## megacli 维护
/sbin/megacli -PDMakeGood -PhysDrv [$raidnum:$up] -force -a0
/sbin/megacli -CfgLdAdd -r0 [$raidnum:$up] WT RA DIRECT -a0
if [ ! -z $virtualnum ]
then
/sbin/megacli  -LDInit -start -L$virtualnum -a0
fi
fi
}

function initialraid()
{
if [ ! -z $initial ]
then
/sbin/megacli -PDList  -aALL  | sed -n /"Slot\ Number:\ $initial\$"/,/Media\ Type/p | grep "Firmware state" | grep Online | grep Up > /dev/null
if [ $? -ne 0 ]
then
echo "Slot Number: $initial  status is not up, use $0 -c $initial"
exit
fi
virtualnum=`/sbin/megacli -cfgdsply -aALL  | grep  -E "Target\ Id:|Slot Number:" | grep -w -B 1 "Slot Number: $initial\$" | grep Virtual | awk '{print $3}'`
if [ ! -z $virtualnum ]
then
/sbin/megacli  -LDInit -start -L$virtualnum -a0
fi
fi
}
#   分区
#
function startupceph()
{
if [ ! -z $prepare ]
then

nums=`/sbin/megacli -PDList  -aALL  |  grep "Slot Number" | awk -F: '{print $2}'`
echo $nums | grep $prepare > /dev/null
if [ $? -ne 0 ]
then
echo "Slot Number: $prepare is not exists"
exit
else
virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w  -B 1 "Slot Number: $prepare" | grep Virtual | awk '{print $3}'`
case $virtualnum in
0)
partition=sda1
disk=sda
;;
1)
partition=sdb1
disk=sdb
;;
2)
partition=sdc1
disk=sdc
;;
3)
partition=sdd1
disk=sdd
;;
4)
partition=sde1
disk=sde
;;
5)
partition=sdf1
disk=sdf
;;
6)
partition=sdg1
disk=sdg
;;
7)
partition=sdh1
disk=sdh
;;
8)
partition=sdi1
disk=sdi
;;
9)
partition=sdj1
disk=sdj
;;
10)
partition=sdk1
disk=sdk
;;
11)
partition=sdl1
disk=sdl
;;
12)
partition=sdm1
disk=sdm
;;
13)
partition=sdn1
disk=sdn
;;
esac
#############
fi

if [[ ! -b "/dev/$partition" ]] && [[ -b "/dev/$disk" ]]
then
disksize=`parted /dev/$disk print|  grep -v Flags | grep ^Disk  | awk -F[:\ ] '{print $4}' | sed s/GB//`
if [ $disksize -gt 2000 ]
then
parted /dev/"$disk"  mklabel gpt
else
parted /dev/"$disk" mklabel msdos
fi
parted /dev/"$disk"  mkpart primary xfs 1 100%
partprobe
mkfs -t xfs -i size=512 /dev/"$partition"
else
echo "/dev/$partition is exists, are you sure wanna initial? use dd if=/dev/zero of=/dev/$disk bs=1M count=10"
fi
fi
}

function initialceph()
{
if [ ! -z $ready ]
then
nums=`/sbin/megacli -PDList  -aALL  |  grep "Slot Number" | awk -F: '{print $2}'`
echo $nums | grep $ready > /dev/null
if [ $? -ne 0 ]
then
echo "Slot Number: $ready is not exists"
exit
else
virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w  -B 1 "Slot Number: $ready" | grep Virtual | awk '{print $3}'`
case $virtualnum in
0)
partition=sda1
disk=sda
;;
1)
partition=sdb1
disk=sdb
;;
2)
partition=sdc1
disk=sdc
;;
3)
partition=sdd1
disk=sdd
;;
4)
partition=sde1
disk=sde
;;
5)
partition=sdf1
disk=sdf
;;
6)
partition=sdg1
disk=sdg
;;
7)
partition=sdh1
disk=sdh
;;
8)
partition=sdi1
disk=sdi
;;
9)
partition=sdj1
disk=sdj
;;
10)
partition=sdk1
disk=sdk
;;
11)
partition=sdl1
disk=sdl
;;
12)
partition=sdm1
disk=sdm
;;
13)
partition=sdn1
disk=sdn
;;
esac
#############
fi
#  检测没有被挂载的 ceph 目录对应的 number
#
for dir in `ls -d /var/lib/ceph/osd/ceph-*`
do
if [ ! -f  "$dir/whoami" ]
then
cephnum=`echo $dir | awk -F"-"  '{print $NF}'`
fi
done

mount "/dev/$partition" "/var/lib/ceph/osd/ceph-$cephnum"

uuid=`blkid /dev/$partition | awk -F\" '{print $2}'`
echo  "UUID=$uuid  /var/lib/ceph/osd/ceph-$cephnum  xfs defaults 0 0" >> /etc/fstab
cephuuid=`cat /etc/ceph/ceph.conf | grep fsid | awk -F[=\ ] '{print $NF}'`

/usr/bin/ceph-osd -i $cephnum --mkfs --mkkey --osd-uuid $cephuuid

authkey=`/usr/bin/ceph auth list 2> /dev/null |  grep -w -A 1 osd."$cephnum"  | grep key | awk -F[:\ ] '{print $NF}'`
echo -e "[osd.$cephnum]\n\tkey = $authkey"  > /var/lib/ceph/osd/ceph-$cephnum/keyring
touch /var/lib/ceph/osd/ceph-$cephnum/{sysvinit,done}

/etc/init.d/ceph start osd.$cephnum
ceph osd unset norecover
ceph osd unset noscrub
ceph osd unset nobackfill
ceph osd unset nodeep-scrub
fi
}

raidcheck
raidoffline
raidonline
startupceph
initialceph
initialraid
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  ceph 存储