[CentOS] Automated LSI card management.
John R Pierce
pierce at hogranch.com
Wed Dec 4 04:52:56 UTC 2013
On 12/3/2013 6:18 PM, John R Pierce wrote:
> i found and modified a python script that parses the output of megacli
> to create a useful status page in a format that could easily be then
> grepped for errors for an alert. I'll try and dig it up after dinner.
k, here it is. I call this lsi-raidinfo and keep it in /root/bin as
its only meant for root's use. it assumes you've installed megacli.
(note, there's 2 more scripts following)
#!/usr/bin/python
# megaclisas-status 0.6
# renamed lsi-raidinfo
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Pulse 2; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
# Copyright (C) 2007-2009 Adam Cecile (Le_Vert)
## modified by johnpuskar at gmail.com 08/14/11
# fixed for LSI 9285-8e on Openfiler
## modified by pierce at hogranch.com 2012-01-05
# fixed for newer version of megacli output on RHEL6/CentOS6
# output format extended to show raid span-unit and rebuild % complete
import os
import re
import sys
if len(sys.argv) > 2:
print 'Usage: lsi-raidinfo [-d]'
sys.exit(1)
# if argument -d, only print disk info
printarray = True
printcontroller = True
if len(sys.argv) > 1:
if sys.argv[1] == '-d':
printarray = False
printcontroller = False
else:
print 'Usage: lsi-raidinfo [-d]'
sys.exit(1)
# Get command output
def getOutput(cmd):
output = os.popen(cmd)
lines = []
for line in output:
if not re.match(r'^$',line.strip()):
lines.append(line.strip())
return lines
def returnControllerNumber(output):
for line in output:
if re.match(r'^Controller Count.*$',line.strip()):
return int(line.split(':')[1].strip().strip('.'))
def returnControllerModel(output):
for line in output:
if re.match(r'^Product Name.*$',line.strip()):
return line.split(':')[1].strip()
def returnArrayNumber(output):
i = 0
for line in output:
if re.match(r'^Virtual (Drive|Disk).*$',line.strip()):
i += 1
return i
def returnArrayInfo(output,controllerid,arrayid):
id = 'c'+str(controllerid)+'u'+str(arrayid)
# print 'DEBUG: id = '+str(id)
operationlinennumber = False
linenumber = 0
units = 1
type = 'JBOD'
span = 0
size = 0
for line in output:
if re.match(r'^RAID Level.*$',line.strip()):
type = line.strip().split(':')[1].strip()
type = 'RAID' + type.split(',')[0].split('-')[1].strip()
# print 'debug: type = '+str(type)
if re.match(r'^Number.*$',line.strip()):
units = line.strip().split(':')[1].strip()
if re.match(r'^Span Depth.*$',line.strip()):
span = line.strip().split(':')[1].strip()
if re.match(r'^Size.*$',line.strip()):
# Size reported in MB
if re.match(r'^.*MB$',line.strip().split(':')[1]):
size = line.strip().split(':')[1].strip('MB').strip()
size = str(int(round((float(size) / 1000))))+'G'
# Size reported in TB
elif re.match(r'^.*TB$',line.strip().split(':')[1]):
size = line.strip().split(':')[1].strip('TB').strip()
size = str(int(round((float(size) * 1000))))+'G'
# Size reported in GB (default)
else:
size = line.strip().split(':')[1].strip('GB').strip()
size = str(int(round((float(size)))))+'G'
if re.match(r'^State.*$',line.strip()):
state = line.strip().split(':')[1].strip()
if re.match(r'^Ongoing Progresses.*$',line.strip()):
operationlinennumber = linenumber
linenumber += 1
if operationlinennumber:
inprogress = output[operationlinennumber+1]
else:
inprogress = 'None'
if span > 1:
type = type+'0'
type = type + ' ' + str(span) + 'x' + str(units)
return [id,type,size,state,inprogress]
def returnDiskInfo(output,controllerid,currentarrayid):
arrayid = False
oldarrayid = False
olddiskid = False
table = []
state = 'Offline'
model = 'Unknown'
enclnum = 'Unknown'
slotnum = 'Unknown'
enclsl = 'Unknown'
firstDisk = True
for line in output:
if re.match(r'Firmware state: .*$',line.strip()):
state = line.split(':')[1].strip()
if re.match(r'Rebuild',state):
cmd2 = '/opt/MegaRAID/MegaCli/MegaCli64 pdrbld showprog
physdrv['+str(enclnum)+':'+str(slotnum)+'] a'+str(controllerid)+' nolog'
ll = getOutput(cmd2)
state += ' completed ' + re.sub(r'Rebuild
Progress.*Completed', '', ll[0]).strip();
if re.match(r'Slot Number: .*$',line.strip()):
slotnum = line.split(':')[1].strip()
if re.match(r'Inquiry Data: .*$',line.strip()):
model = line.split(':')[1].strip()
model = re.sub(' +', ' ', model)
model = re.sub('Hotspare Information', '',
model).strip() #remove bogus output from firmware 12.12
if re.match(r"(Drive|Disk)'s postion: .*$",line.strip()):
spans = line.split(',')
span = re.sub(r"(Drive|Disk).*DiskGroup:", '',
spans[0]).strip()+'-'
span += spans[1].split(':')[1].strip()+'-'
span += spans[2].split(':')[1].strip()
if re.match(r'Enclosure Device ID: [0-9]+$',line.strip()):
if firstDisk == True:
firstDisk = False
else:
enclsl = str(enclnum)+':'+str(slotnum)
table.append([str(enclsl), span, model, state])
span = 'x-x-x'
enclnum = line.split(':')[1].strip()
# Last disk of last array
enclsl = str(enclnum)+':'+str(slotnum)
table.append([str(enclsl), span, model, state])
arraytable = []
for disk in table:
arraytable.append(disk)
return arraytable
cmd = '/opt/MegaRAID/MegaCli/MegaCli64 adpcount nolog'
output = getOutput(cmd)
controllernumber = returnControllerNumber(output)
bad = False
# List available controller
if printcontroller:
print '-- Controllers --'
print '-- ID | Model'
controllerid = 0
while controllerid < controllernumber:
cmd = '/opt/MegaRAID/MegaCli/MegaCli64 adpallinfo
a'+str(controllerid)+' nolog'
output = getOutput(cmd)
controllermodel = returnControllerModel(output)
print 'c'+str(controllerid)+' | '+controllermodel
controllerid += 1
print ''
if printarray:
controllerid = 0
print '-- Volumes --'
print '-- ID | Type | Size | Status | InProgress'
# print 'controller number'+str(controllernumber)
while controllerid < controllernumber:
arrayid = 0
cmd = '/opt/MegaRAID/MegaCli/MegaCli64 ldinfo lall
a'+str(controllerid)+' nolog'
output = getOutput(cmd)
arraynumber = returnArrayNumber(output)
# print 'array number'+str(arraynumber)
while arrayid < arraynumber:
cmd = '/opt/MegaRAID/MegaCli/MegaCli64 ldinfo
l'+str(arrayid)+' a'+str(controllerid)+' nolog'
# print 'DEBUG: running '+str(cmd)
output = getOutput(cmd)
# print 'DEBUG: output '+str(output)
arrayinfo = returnArrayInfo(output,controllerid,arrayid)
print 'volume '+arrayinfo[0]+' | '+arrayinfo[1]+' |
'+arrayinfo[2]+' | '+arrayinfo[3]+' | '+arrayinfo[4]
if not arrayinfo[3] == 'Optimal':
bad = True
arrayid += 1
controllerid += 1
print ''
print '-- Disks --'
print '-- Encl:Slot | vol-span-unit | Model | Status'
controllerid = 0
while controllerid < controllernumber:
arrayid = 0
cmd = '/opt/MegaRAID/MegaCli/MegaCli64 ldinfo lall
a'+str(controllerid)+' nolog'
output = getOutput(cmd)
arraynumber = returnArrayNumber(output)
while arrayid<arraynumber: #grab disk arrayId info
cmd = '/opt/MegaRAID/MegaCli/MegaCli64 pdlist
a'+str(controllerid)+' nolog'
#print 'debug: running '+str(cmd)
output = getOutput(cmd)
arraydisk = returnDiskInfo(output,controllerid,arrayid)
for array in arraydisk:
print 'disk '+array[0]+' | '+array[1]+' | '+array[2]+' |
'+array[3]
arrayid += 1
controllerid += 1
if bad:
print '\nThere is at least one disk/array in a NOT OPTIMAL state.'
sys.exit(1)
======EOF======
and, this script is /root/bin/lsi-checkraid, which is meant to run like
a pipe, lsi-raidinfo | lsi-checkraid, I run this from a monitor script
#!/usr/bin/python
# created by johnpuskar at gmail.com on 08/14/11
# rev 01
import os
import re
import sys
if len(sys.argv) > 1:
print 'Usage: accepts stdin from lsi-raidinfo'
sys.exit(1)
blnBadDisk = False
infile = sys.stdin
for line in infile:
# print 'DEBUG!! checking line:'+str(line)
if re.match(r'disk .*$',line.strip()):
if re.match(r'^((?!Online, Spun Up|Online, Spun down|Hotspare, Spun
Up|Hotspare, Spun down|Unconfigured\(good\), Spun Up).)*$',line.strip()):
blnBadDisk = True
badLine = line
# print 'DEBUG!! bad disk found!'
if re.match(r'volume ',line.strip()):
if re.match(r'^((?!Optimal).)*$',line.strip()):
# print 'DEBUG!! bad vol found!'
blnBadDisk = True
badLine = line
if blnBadDisk == True:
print 'RAID ERROR'
# print badLine
else:
print 'RAID CLEAN'
======EOF=======
and finally, this is bin/lsi-emailalerts, which uses the above to send
an email alert...
#!/bin/sh
MAILTOADDR=root
HOST=$(hostname -s| tr [a-z] [A-Z])
#get megaraid status info
/root/bin/lsi-raidinfo | tee /tmp/lsi-raidinfo.txt |
/root/bin/lsi-checkraid > /tmp/lsi-checkraid.txt
#check megaraid status info
if grep -qE "RAID ERROR" /tmp/lsi-checkraid.txt ; then
cat /tmp/lsi-raidinfo.txt | mailx -s "$HOST Warning: failed disk or
degraded array" $MAILTOADDR
fi
#check mpt status
if (bin/mpt-status -s | awk '{print $3}' | egrep -qv
"(OPTIMAL|ONLINE)") ; then
bin/mpt-status | mailx -s "$HOST Warning: internal disk
failure" $MAILTOADDR
fi
#rm -f /tmp/lsi-raidinfo.txt
#rm -f /tmp/lsi-checkraid.txt
exit 0
========EOF==========
--
john r pierce 37N 122W
somewhere on the middle of the left coast
More information about the CentOS
mailing list