[CentOS] Very slow disk I/O

Mon Feb 2 03:31:22 UTC 2015
Jatin Davey <jashokda at cisco.com>

On 1/30/2015 9:44 AM, John R Pierce wrote:
> On 1/29/2015 7:21 PM, Jatin Davey wrote:
>> [root at localhost ~]# lspci | grep RAID
>> 05:00.0 RAID bus controller: LSI Logic / Symbios Logic MegaRAID SAS-3 
>> 3108 [Invader] (rev 02) 
>
> to get info out of those, you need to install MegaCli64 from LSI 
> Logic, which has the ugliest command lines and output you've ever seen.
>
> I use the python script below, which I put in /root/bin/lsi-raidinfo 
> and chmod +x it, then run it like this..
>
> # lsi-raidinfo
> -- Controllers --
> -- ID | Model
> c0 | LSI MegaRAID SAS 9261-8i
>
> -- Volumes --
> -- ID | Type | Size | Status | InProgress
> volume c0u0 | RAID1 1x2 | 2727G | Optimal | None
> volume c0u1 | RAID6 1x8 | 16370G | Optimal | None
> volume c0u2 | RAID6 1x8 | 16370G | Optimal | None
>
> -- Disks --
> -- Encl:Slot | vol-span-unit | Model | Status
> disk 8:0 | 0-0-0 | ST33000650NS 0003 | Online, Spun Up
> disk 8:1 | 0-0-1 | ST33000650NS 0003 | Online, Spun Up
> disk 8:2 | 1-0-0 | ST33000650NS 0003 | Online, Spun Up
> disk 8:3 | 1-0-1 | ST33000650NS 0003 | Online, Spun Up
> disk 8:4 | 1-0-2 | ST33000650NS 0003 | Online, Spun Up
> disk 8:5 | 1-0-3 | ST33000650NS 0003 | Online, Spun Up
> disk 8:6 | 1-0-4 | ST33000650NS 0003 | Online, Spun Up
> disk 8:7 | 1-0-5 | ST33000650NS 0003 | Online, Spun Up
> disk 8:8 | 1-0-6 | ST33000650NS 0003 | Online, Spun Up
> disk 8:9 | 1-0-7 | ST33000650NS 0003 | Online, Spun Up
> disk 8:10 | 2-0-0 | ST33000650NS 0003 | Online, Spun Up
> disk 8:11 | 2-0-1 | ST33000650NS 0003 | Online, Spun Up
> disk 8:12 | 2-0-2 | ST33000650NS 0003 | Online, Spun Up
> disk 8:13 | 2-0-3 | ST33000650NS 0003 | Online, Spun Up
> disk 8:14 | 2-0-4 | ST33000650NS 0003 | Online, Spun Up
> disk 8:15 | 2-0-5 | ST33000650NS 0003 | Online, Spun Up
> disk 8:16 | 2-0-6 | ST33000650NS 0003 | Online, Spun Up
> disk 8:17 | 2-0-7 | ST33000650NS 0003 | Online, Spun Up
> disk 8:18 | x-x-x | ST33000650NS 0003 | Hotspare, Spun down
> disk 8:19 | x-x-x | ST33000650NS 0003 | Hotspare, Spun down
>
> (yes, that system has 20 disks, in 3 raids + 2 hot spares)
>
> *****************start of my script***********
> #!/usr/bin/python
>
> # megaclisas-status 0.6
> # renamed lsi-raidinfo
> #
> # This program is free software; you can redistribute it and/or modify
> # it under the terms of the GNU General Public License as published by
> # the Free Software Foundation; either version 2 of the License, or
> # (at your option) any later version.
> #
> # This program is distributed in the hope that it will be useful,
> # but WITHOUT ANY WARRANTY; without even the implied warranty of
> # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> # GNU General Public License for more details.
> #
> # You should have received a copy of the GNU General Public License
> # along with Pulse 2; if not, write to the Free Software
> # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
> # MA 02110-1301, USA.
> #
> # Copyright (C) 2007-2009 Adam Cecile (Le_Vert)
>
> ## modified by johnpuskar at gmail.com 08/14/11
> # fixed for LSI 9285-8e on Openfiler
>
> ## modified by pierce at hogranch.com 2012-01-05
> # fixed for newer version of megacli output on RHEL6/CentOS6
> # output format extended to show raid span-unit and rebuild % complete
>
> import os
> import re
> import sys
>
> if len(sys.argv) > 2:
>     print 'Usage: lsi-raidinfo [-d]'
>     sys.exit(1)
>
> # if argument -d, only print disk info
> printarray = True
> printcontroller = True
> if len(sys.argv) > 1:
>     if sys.argv[1] == '-d':
>         printarray = False
>         printcontroller = False
>     else:
>         print 'Usage: lsi-raidinfo [-d]'
>         sys.exit(1)
>
> # Get command output
> def getOutput(cmd):
>     output = os.popen(cmd)
>     lines = []
>     for line in output:
>         if not re.match(r'^$',line.strip()):
>             lines.append(line.strip())
>     return lines
>
> def returnControllerNumber(output):
>     for line in output:
>         if re.match(r'^Controller Count.*$',line.strip()):
>             return int(line.split(':')[1].strip().strip('.'))
>
> def returnControllerModel(output):
>     for line in output:
>         if re.match(r'^Product Name.*$',line.strip()):
>             return line.split(':')[1].strip()
>
> def returnArrayNumber(output):
>     i = 0
>     for line in output:
>         if re.match(r'^Virtual (Drive|Disk).*$',line.strip()):
>             i += 1
>     return i
>
> def returnArrayInfo(output,controllerid,arrayid):
>     id = 'c'+str(controllerid)+'u'+str(arrayid)
>     # print 'DEBUG: id = '+str(id)
>     operationlinennumber = False
>     linenumber = 0
>     units = 1
>     type = 'JBOD'
>     span = 0
>     size = 0
>     for line in output:
>         if re.match(r'^RAID Level.*$',line.strip()):
>             type = line.strip().split(':')[1].strip()
>             type = 'RAID' + type.split(',')[0].split('-')[1].strip()
>             # print 'debug: type = '+str(type)
>         if re.match(r'^Number.*$',line.strip()):
>             units = line.strip().split(':')[1].strip()
>         if re.match(r'^Span Depth.*$',line.strip()):
>             span = line.strip().split(':')[1].strip()
>         if re.match(r'^Size.*$',line.strip()):
>             # Size reported in MB
>             if re.match(r'^.*MB$',line.strip().split(':')[1]):
>                 size = line.strip().split(':')[1].strip('MB').strip()
>                 size = str(int(round((float(size) / 1000))))+'G'
>             # Size reported in TB
>             elif re.match(r'^.*TB$',line.strip().split(':')[1]):
>                 size = line.strip().split(':')[1].strip('TB').strip()
>                 size = str(int(round((float(size) * 1000))))+'G'
>             # Size reported in GB (default)
>             else:
>                 size = line.strip().split(':')[1].strip('GB').strip()
>                 size = str(int(round((float(size)))))+'G'
>         if re.match(r'^State.*$',line.strip()):
>             state = line.strip().split(':')[1].strip()
>         if re.match(r'^Ongoing Progresses.*$',line.strip()):
>             operationlinennumber = linenumber
>         linenumber += 1
>         if operationlinennumber:
>             inprogress = output[operationlinennumber+1]
>         else:
>             inprogress = 'None'
>     if span > 1:
>         type = type+'0'
>     type = type + ' ' + str(span) + 'x' + str(units)
>     return [id,type,size,state,inprogress]
>
> def returnDiskInfo(output,controllerid,currentarrayid):
>     arrayid = False
>     oldarrayid = False
>     olddiskid = False
>     table = []
>     state = 'Offline'
>     model = 'Unknown'
>     enclnum = 'Unknown'
>     slotnum = 'Unknown'
>     enclsl = 'Unknown'
>
>     firstDisk = True
>     for line in output:
>         if re.match(r'Firmware state: .*$',line.strip()):
>             state = line.split(':')[1].strip()
>             if re.match(r'Rebuild',state):
>                 cmd2 = '/opt/MegaRAID/MegaCli/MegaCli64 pdrbld 
> showprog physdrv['+str(enclnum)+':'+str(slotnum)+'] 
> a'+str(controllerid)+' nolog'
>                 ll = getOutput(cmd2)
>                 state += ' completed ' + re.sub(r'Rebuild 
> Progress.*Completed', '', ll[0]).strip();
>         if re.match(r'Slot Number: .*$',line.strip()):
>             slotnum = line.split(':')[1].strip()
>         if re.match(r'Inquiry Data: .*$',line.strip()):
>             model = line.split(':')[1].strip()
>             model = re.sub(' +', ' ', model)
>             model = re.sub('Hotspare Information', '', 
> model).strip()     #remove bogus output from firmware 12.12
>         if re.match(r"(Drive|Disk)'s postion: .*$",line.strip()):
>             spans = line.split(',')
>             span = re.sub(r"(Drive|Disk).*DiskGroup:", '', 
> spans[0]).strip()+'-'
>             span += spans[1].split(':')[1].strip()+'-'
>             span += spans[2].split(':')[1].strip()
>         if re.match(r'Enclosure Device ID: [0-9]+$',line.strip()):
>             if firstDisk == True:
>                 firstDisk = False
>             else:
>                 enclsl = str(enclnum)+':'+str(slotnum)
>                 table.append([str(enclsl), span, model, state])
>             span = 'x-x-x'
>             enclnum = line.split(':')[1].strip()
>     # Last disk of last array
>     enclsl = str(enclnum)+':'+str(slotnum)
>     table.append([str(enclsl), span, model, state])
>     arraytable = []
>     for disk in table:
>         arraytable.append(disk)
>     return arraytable
>
> cmd = '/opt/MegaRAID/MegaCli/MegaCli64 adpcount nolog'
> output = getOutput(cmd)
> controllernumber = returnControllerNumber(output)
>
> bad = False
>
> # List available controller
> if printcontroller:
>     print '-- Controllers --'
>     print '-- ID | Model'
>     controllerid = 0
>     while controllerid < controllernumber:
>         cmd = '/opt/MegaRAID/MegaCli/MegaCli64 adpallinfo 
> a'+str(controllerid)+' nolog'
>         output = getOutput(cmd)
>         controllermodel = returnControllerModel(output)
>         print 'c'+str(controllerid)+' | '+controllermodel
>         controllerid += 1
>     print ''
>
> if printarray:
>     controllerid = 0
>     print '-- Volumes --'
>     print '-- ID | Type | Size | Status | InProgress'
>     # print 'controller number'+str(controllernumber)
>     while controllerid < controllernumber:
>         arrayid = 0
>         cmd = '/opt/MegaRAID/MegaCli/MegaCli64 ldinfo lall 
> a'+str(controllerid)+' nolog'
>         output = getOutput(cmd)
>         arraynumber = returnArrayNumber(output)
> #       print 'array number'+str(arraynumber)
>         while arrayid < arraynumber:
>             cmd = '/opt/MegaRAID/MegaCli/MegaCli64 ldinfo 
> l'+str(arrayid)+' a'+str(controllerid)+' nolog'
> #           print 'DEBUG: running '+str(cmd)
>             output = getOutput(cmd)
> #           print 'DEBUG: output '+str(output)
>             arrayinfo = returnArrayInfo(output,controllerid,arrayid)
>             print 'volume '+arrayinfo[0]+' | '+arrayinfo[1]+' | 
> '+arrayinfo[2]+' | '+arrayinfo[3]+' | '+arrayinfo[4]
>             if not arrayinfo[3] == 'Optimal':
>                 bad = True
>             arrayid += 1
>         controllerid += 1
>     print ''
>
> print '-- Disks --'
> print '-- Encl:Slot | vol-span-unit | Model | Status'
>
> controllerid = 0
> while controllerid < controllernumber:
>     arrayid = 0
>     cmd = '/opt/MegaRAID/MegaCli/MegaCli64 ldinfo lall 
> a'+str(controllerid)+' nolog'
>     output = getOutput(cmd)
>     arraynumber = returnArrayNumber(output)
>     while arrayid<arraynumber:         #grab disk arrayId info
>         cmd = '/opt/MegaRAID/MegaCli/MegaCli64 pdlist 
> a'+str(controllerid)+' nolog'
>         #print 'debug: running '+str(cmd)
>         output = getOutput(cmd)
>         arraydisk = returnDiskInfo(output,controllerid,arrayid)
>
>         for array in arraydisk:
>             print 'disk '+array[0]+' | '+array[1]+' | '+array[2]+' | 
> '+array[3]
>             arrayid += 1
>     controllerid += 1
>
> if bad:
>     print '\nThere is at least one disk/array in a NOT OPTIMAL state.'
>     sys.exit(1)
> *****************end of the script**************
>
>
Thanks John,

I ran your script and here is the output for it:

Start of the Output***************************
[root at localhost bin]# lsi-raidinfo
sh: /opt/MegaRAID/MegaCli/MegaCli64: No such file or directory
-- Controllers --
-- ID | Model

-- Volumes --
-- ID | Type | Size | Status | InProgress

-- Disks --
-- Encl:Slot | vol-span-unit | Model | Status
End of Output ***********************************

Thanks
Jatin