megacli 管理 ceph 存储常用脚本
2017-07-12 11:12
429 查看
说明
只适用于可用使用 megacli 支持的 raid controller 只适用于手动进行 ceph 管理的集群, 不支持 ceph deploy 创建的集群 需修改对应磁盘设备命名
脚本
#!/bin/bash # # 使用说明: # 脚本用于停止 osd, umount osd, 标注对应 raid 设备为离线状态 # ./stopraid.sh [number] [number] 为对应的 raid 卡 slot 槽号 # # 1. 获取帮助 (./raidrepair.sh || ./raidrepair.sh -h ) # # 2. 确认整体磁盘 # 确认磁盘整体故障 (./raidrepair.sh -a) # 确认具体某个磁盘故障 (./raidrepair.sh -c 8) 假设 raid 中第 8 个 slot number 有问题 ( 确定 slot number 8 对应着 /dev/sdf1 /var/lib/ceph/osd/ceph-20 ) # 确认磁盘挂载点 (./raidrepair.sh -m) 确定 slot number 8 对应着 /dev/sdf1 /var/lib/ceph/osd/ceph-20 # 确认 raid 中虚拟磁盘 (./raidrepair.sh -v) 缺 slot number 8 对应着 ( Virtual Drive: 5 ) # ########## 建议手动对 /etc/fstab 中, 所有 ceph 磁盘信息进行屏蔽 #################### # # 3. 故障处理 # 删除故障磁盘 (./raidrepair.sh -d 8) # # 4. 关机, 更换磁盘 ( /etc/init.d/ceph -a stop osd ; init 0 ) # ########## 关机后, 可能需要对 raid 执行清除缓存操作 (需要利用 idrac 进行手动操作 ##################### # ########## 假如没有对 /etc/fstab 中 ceph 磁盘执行屏蔽操作, 可能导致无法登入系统 ##################### # # 5. 修复磁盘 # 确认磁盘状态 ( ./raidrepair.sh -c 8 ) 确认状态 ( Firmware state: Unconfigured(good), Spun Up ) # 令磁盘在线 ( ./raidrepair.sh -o 8 ) # 令该磁盘执行初始化 ( ./raidrepair.sh -i 8 ) 注意, 不要写错 slot, 因为磁盘数据将会丢失 # 重启测试 ( init 6 ) # # 6. 初始化 ceph # 挂载所有的 ceph 磁盘 ( ./raidrepair.sh -m ) # 手动启动 ceph mon, ceph osd ( /etc/init.d/ceph start mon ) # 格式化新的故障 ceph 磁盘 ( ./raidrepair.sh -f 8 ) # 初始化 ceph ( ./raidrepair.sh -p 8 ) if [ `whoami` != 'root' ] then echo "This program must be run by root." exit fi function rpmcheck() { if [ ! -f "/sbin/megacli" ] then toolsrpm=`rpm -qa | grep -i megacli | tail -1 ` if [ -z "$toolsrpm" ] then echo "Error: Megacli tool not install." exit fi grep 8.07 `echo $toolsrpm` > /dev/null 2>&1 if [ $? -eq 0 ] then file=`rpm -ql $toolsrpm | grep -i "MegaCli$"` else file=`rpm -ql $toolsrpm | grep -i "MegaCli$" | grep "bin/"` fi ln -s $file /sbin/megacli fi } function parametercheck() { if [ -z $1 ] then help exit fi } function verifyceph() { dirs=`ls -d /var/lib/ceph/osd/*` if [ -z "$dirs" ] then echo "there is nothing is /var/lib/ceph/osd directory" exit else for dir in $dirs do if [ -f "$dir/whoami" ] then num=`cat $dir/whoami` partition=`df | grep "$dir" | awk '{print $1}'` else num="NULL" partition="NULL" fi echo -e "$dir\t\tCeph Number is: $num\t\tPartition is: $partition" done fi } function help() { cat <<'EOF' -a use to check all raid slot status -h use to display help -y use to check ceph mount -m use to show partition mount to ceph. -c [num] || ex: ( raid card slot number ) use to check raid slot number. -d [num] || ex: ( raid card slot number ) use to offline raid disk. -f [num] || ex: ( raid card slot number ) use to format partition. -i [num] || ex: ( raid card slot number ) use to initial raid slot number disk. -o [num] || ex: ( raid card slot number ) use to online raid disk. -p [num] || ex: ( raid card slot number ) use to initial ceph and start up ceph. -v [num] || ex: ( raid card slot number ) use to show virtual partition. EOF } parametercheck $1 rpmcheck function raidcheckall() { /sbin/megacli -PDList -aALL |egrep "Slot Number|Error" exit } function virtualcheck() { /sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" } function automountceph() { disks=`fdisk -l | grep GPT | awk '{print $1}' | grep -v sda` for disk in $disks do mount $disk /mnt if [ -f "/mnt/whoami" ] then cephnum=`cat /mnt/whoami` echo "mount $disk /var/lib/ceph/osd/ceph-$cephnum" fi umount /mnt done } while getopts ":hHaAvVyYmMc:d:o:p:f:i:" OPT; do case $OPT in h|H) help ;; a|A) raidcheckall ;; v|V) virtualcheck ;; y|Y) verifyceph ;; m|M) automountceph ;; c) slotnum=$OPTARG ;; d) down=$OPTARG ;; o) up=$OPTARG ;; i) initial=$OPTARG ;; f) prepare=$OPTARG ;; p) ready=$OPTARG ;; ?) help exit ;; *) help exit ;; esac done function raidcheck() { if [ ! -z $slotnum ] then /sbin/megacli -PDList -aALL | sed -n /"Slot\ Number:\ $slotnum\$"/,/Media\ Type/p | grep -E "Slot Number|Device Id|Error Count|Failure Count|Raw Size|Firmware state|Inquiry Data" virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w -B 1 "Slot Number: $slotnum" | grep Virtual | awk '{print $3}'` if [ -z $virtualnum ] then exit fi case $virtualnum in 0) partition=sda1 disk=sda ;; 1) partition=sdb1 disk=sdb ;; 2) partition=sdc1 disk=sdc ;; 3) partition=sdd1 disk=sdd ;; 4) partition=sde1 disk=sde ;; 5) partition=sdf1 disk=sdf ;; 6) partition=sdg1 disk=sdg ;; 7) partition=sdh1 disk=sdh ;; 8) partition=sdi1 disk=sdi ;; 9) partition=sdj1 disk=sdj ;; 10) partition=sdk1 disk=sdk ;; 11) partition=sdl1 disk=sdl ;; 12) partition=sdm1 disk=sdm ;; 13) partition=sdn1 disk=sdn ;; esac # if [ -z $partition ] cephnum=`df -h | grep /dev/$partition | awk '{print $NF}' | awk -F- '{print $NF}'` echo "Ceph number: $cephnum" echo "Partition: $partition" fi } function raidoffline() { if [ ! -z $down ] then nums=`/sbin/megacli -PDList -aALL | grep "Slot Number" | awk -F: '{print $2}'` echo $nums | grep $down > /dev/null if [ $? -ne 0 ] then echo "Slot Number: $down is not exists" exit else /sbin/megacli -PDList -aALL | sed -n /"Slot\ Number:\ $down\$"/,/Media\ Type/p | grep "Firmware state" | grep Online | grep Up > /dev/null if [ $? -ne 0 ] then echo "Slot Number: $down status error, please try to use ./$0 -c $down" fi virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w -B 1 "Slot Number: $down" | grep Virtual | awk '{print $3}'` case $virtualnum in 0) partition=sda1 disk=sda ;; 1) partition=sdb1 disk=sdb ;; 2) partition=sdc1 disk=sdc ;; 3) partition=sdd1 disk=sdd ;; 4) partition=sde1 disk=sde ;; 5) partition=sdf1 disk=sdf ;; 6) partition=sdg1 disk=sdg ;; 7) partition=sdh1 disk=sdh ;; 8) partition=sdi1 disk=sdi ;; 9) partition=sdj1 disk=sdj ;; 10) partition=sdk1 disk=sdk ;; 11) partition=sdl1 disk=sdl ;; 12) partition=sdm1 disk=sdm ;; 13) partition=sdn1 disk=sdn ;; esac raidnum=`/sbin/megacli -PDList -aALL | grep 'Enclosure Device ID' | uniq | awk -F[:\ ] '{print $NF}'` cephnum=`df -h | grep /dev/$partition | awk '{print $NF}' | awk -F- '{print $NF}'` # 执行 ceph 维护 ceph osd set norecover ceph osd set noscrub ceph osd set nobackfill ceph osd set nodeep-scrub if [ ! -z $cephnum ] then /etc/init.d/ceph stop osd.$cephnum umount /dev/$partition sed -i /ceph-$cephnum/d /etc/fstab fi # 执行 megacli 维护 /sbin/megacli -PDOffline -PhysDrv [$raidnum:$down] -a0 /sbin/megacli -PDMarkMissing -PhysDrv [$raidnum:$down] -a0 /sbin/megacli -PDPrpRmv -PhysDrv [$raidnum:$down] -a0 ## 提示 echo "stop raid slot complete, please use this command to shutdown compute" echo "#####################################################" echo "# /etc/init.d/ceph -a stop osd ; init 0 #" echo "#####################################################" fi fi } ##################terrry function raidonline() { ###### # 检测没有被挂载的 ceph 目录对应的 number # if [ ! -z $up ] then /sbin/megacli -PDList -aALL | sed -n /"Slot\ Number:\ $up\$"/,/Media\ Type/p | grep "Firmware state" | grep Online | grep Up > /dev/null if [ $? -eq 0 ] then echo "Slot Number: $up status is up, use $0 -c $up" exit fi raidnum=`/sbin/megacli -PDList -aALL | grep 'Enclosure Device ID' | uniq | awk -F[:\ ] '{print $NF}'` # # 获得 -L 的 num virtualnum=`megacli -cfgdsply -aALL | grep -E "Target\ Id:|Slot Number:" | grep -w -B 1 "Slot Number: $up\$" | grep Virtual | awk '{print $3}'` ## megacli 维护 /sbin/megacli -PDMakeGood -PhysDrv [$raidnum:$up] -force -a0 /sbin/megacli -CfgLdAdd -r0 [$raidnum:$up] WT RA DIRECT -a0 if [ ! -z $virtualnum ] then /sbin/megacli -LDInit -start -L$virtualnum -a0 fi fi } function initialraid() { if [ ! -z $initial ] then /sbin/megacli -PDList -aALL | sed -n /"Slot\ Number:\ $initial\$"/,/Media\ Type/p | grep "Firmware state" | grep Online | grep Up > /dev/null if [ $? -ne 0 ] then echo "Slot Number: $initial status is not up, use $0 -c $initial" exit fi virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -E "Target\ Id:|Slot Number:" | grep -w -B 1 "Slot Number: $initial\$" | grep Virtual | awk '{print $3}'` if [ ! -z $virtualnum ] then /sbin/megacli -LDInit -start -L$virtualnum -a0 fi fi } # 分区 # function startupceph() { if [ ! -z $prepare ] then nums=`/sbin/megacli -PDList -aALL | grep "Slot Number" | awk -F: '{print $2}'` echo $nums | grep $prepare > /dev/null if [ $? -ne 0 ] then echo "Slot Number: $prepare is not exists" exit else virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w -B 1 "Slot Number: $prepare" | grep Virtual | awk '{print $3}'` case $virtualnum in 0) partition=sda1 disk=sda ;; 1) partition=sdb1 disk=sdb ;; 2) partition=sdc1 disk=sdc ;; 3) partition=sdd1 disk=sdd ;; 4) partition=sde1 disk=sde ;; 5) partition=sdf1 disk=sdf ;; 6) partition=sdg1 disk=sdg ;; 7) partition=sdh1 disk=sdh ;; 8) partition=sdi1 disk=sdi ;; 9) partition=sdj1 disk=sdj ;; 10) partition=sdk1 disk=sdk ;; 11) partition=sdl1 disk=sdl ;; 12) partition=sdm1 disk=sdm ;; 13) partition=sdn1 disk=sdn ;; esac ############# fi if [[ ! -b "/dev/$partition" ]] && [[ -b "/dev/$disk" ]] then disksize=`parted /dev/$disk print| grep -v Flags | grep ^Disk | awk -F[:\ ] '{print $4}' | sed s/GB//` if [ $disksize -gt 2000 ] then parted /dev/"$disk" mklabel gpt else parted /dev/"$disk" mklabel msdos fi parted /dev/"$disk" mkpart primary xfs 1 100% partprobe mkfs -t xfs -i size=512 /dev/"$partition" else echo "/dev/$partition is exists, are you sure wanna initial? use dd if=/dev/zero of=/dev/$disk bs=1M count=10" fi fi } function initialceph() { if [ ! -z $ready ] then nums=`/sbin/megacli -PDList -aALL | grep "Slot Number" | awk -F: '{print $2}'` echo $nums | grep $ready > /dev/null if [ $? -ne 0 ] then echo "Slot Number: $ready is not exists" exit else virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w -B 1 "Slot Number: $ready" | grep Virtual | awk '{print $3}'` case $virtualnum in 0) partition=sda1 disk=sda ;; 1) partition=sdb1 disk=sdb ;; 2) partition=sdc1 disk=sdc ;; 3) partition=sdd1 disk=sdd ;; 4) partition=sde1 disk=sde ;; 5) partition=sdf1 disk=sdf ;; 6) partition=sdg1 disk=sdg ;; 7) partition=sdh1 disk=sdh ;; 8) partition=sdi1 disk=sdi ;; 9) partition=sdj1 disk=sdj ;; 10) partition=sdk1 disk=sdk ;; 11) partition=sdl1 disk=sdl ;; 12) partition=sdm1 disk=sdm ;; 13) partition=sdn1 disk=sdn ;; esac ############# fi # 检测没有被挂载的 ceph 目录对应的 number # for dir in `ls -d /var/lib/ceph/osd/ceph-*` do if [ ! -f "$dir/whoami" ] then cephnum=`echo $dir | awk -F"-" '{print $NF}'` fi done mount "/dev/$partition" "/var/lib/ceph/osd/ceph-$cephnum" uuid=`blkid /dev/$partition | awk -F\" '{print $2}'` echo "UUID=$uuid /var/lib/ceph/osd/ceph-$cephnum xfs defaults 0 0" >> /etc/fstab cephuuid=`cat /etc/ceph/ceph.conf | grep fsid | awk -F[=\ ] '{print $NF}'` /usr/bin/ceph-osd -i $cephnum --mkfs --mkkey --osd-uuid $cephuuid authkey=`/usr/bin/ceph auth list 2> /dev/null | grep -w -A 1 osd."$cephnum" | grep key | awk -F[:\ ] '{print $NF}'` echo -e "[osd.$cephnum]\n\tkey = $authkey" > /var/lib/ceph/osd/ceph-$cephnum/keyring touch /var/lib/ceph/osd/ceph-$cephnum/{sysvinit,done} /etc/init.d/ceph start osd.$cephnum ceph osd unset norecover ceph osd unset noscrub ceph osd unset nobackfill ceph osd unset nodeep-scrub fi } raidcheck raidoffline raidonline startupceph initialceph initialraid
相关文章推荐
- Oracle管理及常用基础脚本
- ORACLE 常用管理脚本
- MS SQL 日常维护管理常用脚本(一)
- Oracle管理及常用基础脚本
- dba 常用管理脚本收集(测)
- 活动目录管理中常用的脚本(一)
- ceph存储 centos中Git常用命令
- Oracle数据库管理常用的监控脚本------极大的简化运维工作
- 系统管理中 bash shell 脚本常用方法总结(转)
- ceph存储 Source Insight 常用设置和快捷键大全
- Oracle管理及常用基础脚本
- windows服务器管理几种常用脚本
- 操作系统实验 之 存储管理---------常用页面置换算法模拟实验
- oracle 常用DBA管理脚本--数据库构架体系
- Oracle管理及常用基础脚本
- Oracle管理及常用基础脚本
- Oracle管理及常用基础脚本
- Oracle管理及常用基础脚本
- MS SQL 日常维护管理常用脚本(二)
- ceph存储 centos下Git常用命令查询