Linux下进行业务进程监控并发送告警
2018-03-08 13:19
232 查看
# -*- coding:utf-8 -*- # !/usr/bin/env python # name : Alenx import os import time import stat import pexpect import ConfigParser from socket import * # time.sleep(60) # 配置需要监控的业务进程名称 Process_Name = ["/tomcat"] # 读配置文件获取告警阀值(物理及虚拟内存/CPU) def ReadConf(alarmtype, sername, value): config = ConfigParser.ConfigParser() config.read('/usr/local/admin/admin.conf') threshold = config.getint(alarmtype, sername) if (alarmtype != "cpu") and (alarmtype != "Physical_Machine_Threshold"): threshold = threshold * 0.01 * value return threshold # 写日志及拷贝日志文件函数 def WriteCopyLog(name, num, typename, threshold): timenow = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) opentxt = open('/var/log/admin/%s_alarm.log' % ip, 'a') if typename == "Rss_Memory" or typename == "Vsz_Memory": opentxt.writelines('%s\t%s\t%s\t\tAt present the use value:%.2fMb\t\tThreshold:%sMb\n' % ( timenow, name, typename, num, threshold)) else: opentxt.writelines('%s\t%s\t%s\t\tAt present the use value:%.2f%%\t\tThreshold:%s%%\n' % ( timenow, name, typename, num, threshold)) opentxt.close() copyfile = pexpect.spawn("scp -P 22 /var/log/admin/%s_alarm.log root@10.10.0.11:/var/log/admin/" % ip) time.sleep(30) copyfile.expect("root@10.10.0.11's password") copyfile.sendline("kedatest1") copyfile.expect(pexpect.EOF) # x=os.popen('ps aux |grep guard|awk \'{print $11}\'').read() # 获取系统IP地址 ip = os.popen("/sbin/ifconfig | grep 'inet addr' | awk '{print $2}'").read() ip = ip[ip.find(':') + 1:ip.find('\n')] ''' proce=os.popen("ps aux |grep guard|grep -v 'grep'|awk '{print $11}'").read() proce=proce.strip().split('\n') lens=len(proce) for i in range(lens): #得到以‘/’打头的进程 if proce[i][0] in '/': pname[i]=proce[i][9:] elif proce[i][0] in '.': pname[i]=proce[i][2:] keys=pname.keys() ''' Process_Number = len(Process_Name) # 服务器虚拟内存大小 swptotal = os.popen("free | grep Swap | awk '{print $2}'").read() swptotal = int(swptotal) / 1024 # 服务器物理内存大小 memtotal = os.popen("free | grep Mem | awk '{print $2}'").read() memtotal = int(memtotal) / 1024 # CPU核数 cpunumber = os.popen("cat /proc/cpuinfo |grep 'processor' 4000 |wc -l").read() cpunumber = int(cpunumber) # 系统CPU使用率 def System_Cpu(): Sys_Cpu_Use = os.popen("vmstat|grep -v procs|grep -v swpd|awk '{print $13}'").read() Sys_Cpu_Use = int(Sys_Cpu_Use) return Sys_Cpu_Use # 系统内存使用率 def System_Memory(): Sys_Mem_Use = os.popen("free | grep Mem | awk '{print $3}'").read() Sys_Mem_Use = int(Sys_Mem_Use) / 1024 return Sys_Mem_Use # 获取业务进程号 First_Process_Number = {} for x in range(Process_Number): if Process_Name[x] == "/guard": First_Process = os.popen( "ps aux | grep %s |grep -v 'grep'|grep -v '/bin/bash'|awk '{print $2}'" % Process_Name[x], 'r').read() else: First_Process = os.popen("ps aux | grep %s |grep -v 'grep'|awk '{print $2}'" % Process_Name[x], 'r').read() First_Process_Number[x] = First_Process.strip().split('\n') print(First_Process_Number) Second_Process_Number = {} Vsz_Memory = {} Rss_Memory = {} Cpu_Use_Value = {} while True: time.sleep(3) # 获取业务进程号 for y in range(Process_Number): if Process_Name[y] == "/guard": Second_Process = os.popen( "ps aux | grep %s |grep -v 'grep'|grep -v '/bin/bash'|awk '{print $2}'" % Process_Name[y], 'r').read() else: Second_Process = os.popen("ps aux | grep %s |grep -v 'grep'|awk '{print $2}'" % Process_Name[y], 'r').read() Second_Process_Number[y] = Second_Process.strip().split('\n') # print "second" # print Second_Process_Number # 判断业务进程是否有变化,有变化写入日志文件 for j in range(Process_Number): if First_Process_Number[j] != Second_Process_Number[j]: Time_Now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) opencpu = open('/var/log/collapselog/%s_collapse.log' % ip, 'a') opencpu.writelines('%s\t%s\tCollapse\tThe new process:%s\tThe old process:%s\n' % (Time_Now, Process_Name[j], Second_Process_Number[j], First_Process_Number[j])) opencpu.close() copyfile = pexpect.spawn( "scp -P 22 /var/log/collapselog/%s_collapse.log root@10.10.0.11:/var/log/collapselog" % ip) time.sleep(30) copyfile.expect("root@10.10.0.11's password") copyfile.sendline("kedatest1") copyfile.expect(pexpect.EOF) # 获取业务进程号 for x in range(Process_Number): if Process_Name[x] == "/guard": First_Process = os.popen( "ps aux | grep %s |grep -v 'grep'|grep -v '/bin/bash'|awk '{print $2}'" % Process_Name[x], 'r').read() else: First_Process = os.popen("ps aux | grep %s |grep -v 'grep'|awk '{print $2}'" % Process_Name[x], 'r').read() First_Process_Number[x] = First_Process.strip().split('\n') # print "first" # print First_Process_Number # 虚拟内存使用值判断 # print "RSSMEM---------------" for m1 in range(Process_Number): alarmtype = "vszmem" typename = "Vsz_Memory" # print pname[keys[m1]] if Process_Name[m1] == "/guard": Vsz_Mems = os.popen( "ps aux | grep %s |grep -v 'grep'|grep -v '/bin/bash'|awk '{print $5}'" % Process_Name[m1], 'r').read() else: Vsz_Mems = os.popen("ps aux | grep %s |grep -v 'grep'|awk '{print $5}'" % Process_Name[m1], 'r').read() Vsz_Memory[m1] = float(Vsz_Mems) / 1024 Vsz_Memory_Threshold = ReadConf(alarmtype, Process_Name[m1], swptotal) # print "1使用:%s" %Vsz_Memory[m1] # print "1阀值:%s" %Vsz_Memory_Threshold if Vsz_Memory[m1] >= Vsz_Memory_Threshold: WriteCopyLog(Process_Name[m1], Vsz_Memory[m1], typename, Vsz_Memory_Threshold) # 物理内存使用值判断 # print "RSSMEM---------------" for m2 in range(Process_Number): alarmtype = "rssmem" typename = "Rss_Memory" # print pname[keys[m2]] if Process_Name[m2] == "/guard": Rss_Mems = os.popen( "ps aux | grep %s |grep -v 'grep'|grep -v '/bin/bash'|awk '{print $6}'" % Process_Name[m2], 'r').read() else: Rss_Mems = os.popen("ps aux | grep %s |grep -v 'grep'|awk '{print $6}'" % Process_Name[m2], 'r').read() # print "xxxxxxxxx:%s" %Rss_Mems # print "yyyyyyyyy:%.2f" %float(Rss_Mems) Rss_Memory[m2] = float(Rss_Mems) / 1024 Rss_Memory_Threshold = ReadConf(alarmtype, Process_Name[m2], memtotal) # print "1使用:%.2f" %Rss_Memory[m2] # print "1阀值:%.2f" %Rss_Memory_Threshold if Rss_Memory[m2] >= Rss_Memory_Threshold: WriteCopyLog(Process_Name[m2], Rss_Memory[m2], typename, Rss_Memory_Threshold) # CPU使用值判断 # print "CPU---------------" for c in range(Process_Number): alarmtype = "cpu" typename = "Cpu Use " cpuvalue = 0 # print pname[keys[c]] if Process_Name[c] == "/guard": Cpu_Uses = os.popen( "ps aux | grep %s |grep -v 'grep'|grep -v '/bin/bash'|awk '{print $3}'" % Process_Name[c], 'r').read() Cpu_Uses = Cpu_Uses.strip().split('\n') else: Cpu_Uses = os.popen("ps aux | grep %s |grep -v 'grep'|awk '{print $3}'" % Process_Name[c], 'r').read() Cpu_Uses = Cpu_Uses.strip().split('\n') Cpu_Use_Value[c] = float(Cpu_Uses[0]) Cpu_Use_Threshold = ReadConf(alarmtype, Process_Name[c], cpuvalue) # print "1使用:%s" %Cpu_Use_Value[c] # print "1阀值:%s" %Cpu_Use_Threshold if Cpu_Use_Value[c] >= Cpu_Use_Threshold: WriteCopyLog(Process_Name[c], Cpu_Use_Value[c], typename, Cpu_Use_Threshold) # 系统CPU使用告警 Sys_Cpu_Threshold = ReadConf("Physical_Machine_Threshold", "cpu_use", 0) System_Cpu() if System_Cpu() >= Sys_Cpu_Threshold: WriteCopyLog("System", System_Cpu(), "System Cpu Use", Sys_Cpu_Threshold) # 系统内存使用告警 Sys_Mem_Threshold = ReadConf("Physical_Machine_Threshold", "memory_use", 0) Sys_Mem = System_Memory() / memtotal if Sys_Mem >= Sys_Mem_Threshold: WriteCopyLog("System", Sys_Mem, "System Mem Use", Sys_Mem_Threshold)
相关文章推荐
- 使用Shell脚本对Linux系统和进程资源进行监控
- 使用 shell 脚本对 Linux 系统和进程资源进行监控
- 使用Shell脚本对Linux系统和进程资源进行监控
- 使用 shell 脚本对 Linux 系统和进程资源进行监控
- 使用 shell 脚本对 Linux 系统和进程资源进行监控
- 使用 shell 脚本对 Linux 系统和进程资源进行监控
- 使用shell脚本对Linux系统和进程资源进行监控
- 使用shell脚本对Linux系统和进程资源进行监控
- 使用 shell 脚本对 Linux 系统和进程资源进行监控
- 使用shell 脚本对 Linux 系统进程的CPU使用率与内存资源进行监控
- 使用 shell 脚本对 Linux 系统和进程资源进行监控
- 使用 shell 脚本对 Linux 系统和进程资源进行监控
- shell 脚本对 Linux 系统和进程资源进行监控
- 使用shell 脚本对 Linux 系统进程的CPU使用率与内存资源进行监控
- 使用Shell脚本对Linux系统和进程资源进行监控
- 使用shell脚本对Linux系统和进程资源进行监控
- Linux如何安装使用pidstat命令以对进程数据进行监控
- tomcat监控脚本(监控进程,测试接口,告警动作为发送邮件)
- 【转】使用Shell脚本对Linux系统和进程资源进行监控
- 使用 shell 脚本对 Linux 系统和进程资源进行监控