1 |
95b003ff
|
Origo
|
#!/bin/bash
|
2 |
|
|
#
|
3 |
|
|
# Calomel.org
|
4 |
|
|
# https://calomel.org/megacli_lsi_commands.html
|
5 |
|
|
# LSI MegaRaid CLI
|
6 |
|
|
# lsi.sh @ Version 0.05
|
7 |
|
|
#
|
8 |
|
|
# description: MegaCLI script to configure and monitor LSI raid cards.
|
9 |
|
|
|
10 |
|
|
# Full path to the MegaRaid CLI binary
|
11 |
|
|
MegaCli="megacli"
|
12 |
|
|
|
13 |
|
|
# The identifying number of the enclosure. Default for our systems is "8". Use
|
14 |
|
|
# "MegaCli64 -PDlist -a0 | grep "Enclosure Device"" to see what your number
|
15 |
|
|
# is and set this variable.
|
16 |
|
|
ENCLOSURE="8"
|
17 |
|
|
|
18 |
|
|
if [ $# -eq 0 ]
|
19 |
|
|
then
|
20 |
|
|
echo ""
|
21 |
|
|
echo " OBPG .:. lsi.sh $arg1 $arg2"
|
22 |
|
|
echo "-----------------------------------------------------"
|
23 |
|
|
echo "status = Status of Virtual drives (volumes)"
|
24 |
|
|
echo "drives = Status of hard drives"
|
25 |
|
|
echo "ident \$slot = Blink light on drive (need slot number)"
|
26 |
|
|
echo "good \$slot = Simply makes the slot \"Unconfigured(good)\" (need slot number)"
|
27 |
|
|
echo "replace \$slot = Replace \"Unconfigured(bad)\" drive (need slot number)"
|
28 |
|
|
echo "progress = Status of drive rebuild"
|
29 |
|
|
echo "errors = Show drive errors which are non-zero"
|
30 |
|
|
echo "bat = Battery health and capacity"
|
31 |
|
|
echo "batrelearn = Force BBU re-learn cycle"
|
32 |
|
|
echo "logs = Print card logs"
|
33 |
|
|
echo "checkNemail \$email = Check volume(s) and send email on raid errors"
|
34 |
|
|
echo "allinfo = Print out all settings and information about the card"
|
35 |
|
|
echo "settime = Set the raid card's time to the current system time"
|
36 |
|
|
echo "setdefaults = Set preferred default settings for new raid setup"
|
37 |
|
|
echo ""
|
38 |
|
|
exit
|
39 |
|
|
fi
|
40 |
|
|
|
41 |
|
|
# General status of all RAID virtual disks or volumes and if PATROL disk check
|
42 |
|
|
# is running.
|
43 |
|
|
if [ $1 = "status" ]
|
44 |
|
|
then
|
45 |
|
|
$MegaCli -LDInfo -Lall -aALL -NoLog
|
46 |
|
|
echo "###############################################"
|
47 |
|
|
$MegaCli -AdpPR -Info -aALL -NoLog
|
48 |
|
|
echo "###############################################"
|
49 |
|
|
$MegaCli -LDCC -ShowProg -LALL -aALL -NoLog
|
50 |
|
|
exit
|
51 |
|
|
fi
|
52 |
|
|
|
53 |
|
|
# Shows the state of all drives and if they are online, unconfigured or missing.
|
54 |
|
|
if [ $1 = "drives" ]
|
55 |
|
|
then
|
56 |
|
|
$MegaCli -PDlist -aALL -NoLog | egrep 'Slot|state' | awk '/Slot/{if (x)print x;x="";}{x=(!x)?$0:x" -"$0;}END{print x;}' | sed 's/Firmware state://g'
|
57 |
|
|
exit
|
58 |
|
|
fi
|
59 |
|
|
|
60 |
|
|
# Use to blink the light on the slot in question. Hit enter again to turn the blinking light off.
|
61 |
|
|
if [ $1 = "ident" ]
|
62 |
|
|
then
|
63 |
|
|
$MegaCli -PdLocate -start -physdrv[$ENCLOSURE:$2] -a0 -NoLog
|
64 |
|
|
logger "`hostname` - identifying enclosure $ENCLOSURE, drive $2 "
|
65 |
|
|
read -p "Press [Enter] key to turn off light..."
|
66 |
|
|
$MegaCli -PdLocate -stop -physdrv[$ENCLOSURE:$2] -a0 -NoLog
|
67 |
|
|
exit
|
68 |
|
|
fi
|
69 |
|
|
|
70 |
|
|
# When a new drive is inserted it might have old RAID headers on it. This
|
71 |
|
|
# method simply removes old RAID configs from the drive in the slot and make
|
72 |
|
|
# the drive "good." Basically, Unconfigured(bad) to Unconfigured(good). We use
|
73 |
|
|
# this method on our FreeBSD ZFS machines before the drive is added back into
|
74 |
|
|
# the zfs pool.
|
75 |
|
|
if [ $1 = "good" ]
|
76 |
|
|
then
|
77 |
|
|
# set Unconfigured(bad) to Unconfigured(good)
|
78 |
|
|
$MegaCli -PDMakeGood -PhysDrv[$ENCLOSURE:$2] -a0 -NoLog
|
79 |
|
|
# clear 'Foreign' flag or invalid raid header on replacement drive
|
80 |
|
|
$MegaCli -CfgForeign -Clear -aALL -NoLog
|
81 |
|
|
exit
|
82 |
|
|
fi
|
83 |
|
|
|
84 |
|
|
# Use to diagnose bad drives. When no errors are shown only the slot numbers
|
85 |
|
|
# will print out. If a drive(s) has an error you will see the number of errors
|
86 |
|
|
# under the slot number. At this point you can decided to replace the flaky
|
87 |
|
|
# drive. Bad drives might not fail right away and will slow down your raid with
|
88 |
|
|
# read/write retries or corrupt data.
|
89 |
|
|
if [ $1 = "errors" ]
|
90 |
|
|
then
|
91 |
|
|
echo "Slot Number: 0"; $MegaCli -PDlist -aALL -NoLog | egrep -i 'error|fail|slot' | egrep -v '0'
|
92 |
|
|
exit
|
93 |
|
|
fi
|
94 |
|
|
|
95 |
|
|
# status of the battery and the amount of charge. Without a working Battery
|
96 |
|
|
# Backup Unit (BBU) most of the LSI read/write caching will be disabled
|
97 |
|
|
# automatically. You want caching for speed so make sure the battery is ok.
|
98 |
|
|
if [ $1 = "bat" ]
|
99 |
|
|
then
|
100 |
|
|
$MegaCli -AdpBbuCmd -aAll -NoLog
|
101 |
|
|
exit
|
102 |
|
|
fi
|
103 |
|
|
|
104 |
|
|
# Force a Battery Backup Unit (BBU) re-learn cycle. This will discharge the
|
105 |
|
|
# lithium BBU unit and recharge it. This check might take a few hours and you
|
106 |
|
|
# will want to always run this in off hours. LSI suggests a battery relearn
|
107 |
|
|
# monthly or so. We actually run it every three(3) months by way of a cron job.
|
108 |
|
|
# Understand if your "Current Cache Policy" is set to "No Write Cache if Bad
|
109 |
|
|
# BBU" then write-cache will be disabled during this check. This means writes
|
110 |
|
|
# to the raid will be VERY slow at about 1/10th normal speed. NOTE: if the
|
111 |
|
|
# battery is new (new bats should charge for a few hours before they register)
|
112 |
|
|
# or if the BBU comes up and says it has no charge try powering off the machine
|
113 |
|
|
# and restart it. This will force the LSI card to re-evaluate the BBU. Silly
|
114 |
|
|
# but it works.
|
115 |
|
|
if [ $1 = "batrelearn" ]
|
116 |
|
|
then
|
117 |
|
|
$MegaCli -AdpBbuCmd -BbuLearn -aALL -NoLog
|
118 |
|
|
exit
|
119 |
|
|
fi
|
120 |
|
|
|
121 |
|
|
# Use to replace a drive. You need the slot number and may want to use the
|
122 |
|
|
# "drives" method to show which drive in a slot is "Unconfigured(bad)". Once
|
123 |
|
|
# the new drive is in the slot and spun up this method will bring the drive
|
124 |
|
|
# online, clear any foreign raid headers from the replacement drive and set the
|
125 |
|
|
# drive as a hot spare. We will also tell the card to start rebuilding if it
|
126 |
|
|
# does not start automatically. The raid should start rebuilding right away
|
127 |
|
|
# either way. NOTE: if you pass a slot number which is already part of the raid
|
128 |
|
|
# by mistake the LSI raid card is smart enough to just error out and _NOT_
|
129 |
|
|
# destroy the raid drive, thankfully.
|
130 |
|
|
if [ $1 = "replace" ]
|
131 |
|
|
then
|
132 |
|
|
logger "`hostname` - REPLACE enclosure $ENCLOSURE, drive $2 "
|
133 |
|
|
# set Unconfigured(bad) to Unconfigured(good)
|
134 |
|
|
$MegaCli -PDMakeGood -PhysDrv[$ENCLOSURE:$2] -a0 -NoLog
|
135 |
|
|
# clear 'Foreign' flag or invalid raid header on replacement drive
|
136 |
|
|
$MegaCli -CfgForeign -Clear -aALL -NoLog
|
137 |
|
|
# set drive as hot spare
|
138 |
|
|
$MegaCli -PDHSP -Set -PhysDrv [$ENCLOSURE:$2] -a0 -NoLog
|
139 |
|
|
# show rebuild progress on replacement drive just to make sure it starts
|
140 |
|
|
$MegaCli -PDRbld -ShowProg -PhysDrv [$ENCLOSURE:$2] -a0 -NoLog
|
141 |
|
|
exit
|
142 |
|
|
fi
|
143 |
|
|
|
144 |
|
|
# Print all the logs from the LSI raid card. You can grep on the output.
|
145 |
|
|
if [ $1 = "logs" ]
|
146 |
|
|
then
|
147 |
|
|
$MegaCli -FwTermLog -Dsply -aALL -NoLog
|
148 |
|
|
exit
|
149 |
|
|
fi
|
150 |
|
|
|
151 |
|
|
# Use to query the RAID card and find the drive which is rebuilding. The script
|
152 |
|
|
# will then query the rebuilding drive to see what percentage it is rebuilt and
|
153 |
|
|
# how much time it has taken so far. You can then guess-ti-mate the
|
154 |
|
|
# completion time.
|
155 |
|
|
if [ $1 = "progress" ]
|
156 |
|
|
then
|
157 |
|
|
DRIVE=`$MegaCli -PDlist -aALL -NoLog | egrep 'Slot|state' | awk '/Slot/{if (x)print x;x="";}{x=(!x)?$0:x" -"$0;}END{print x;}' | sed 's/Firmware state://g' | egrep build | awk '{print $3}'`
|
158 |
|
|
$MegaCli -PDRbld -ShowProg -PhysDrv [$ENCLOSURE:$DRIVE] -a0 -NoLog
|
159 |
|
|
exit
|
160 |
|
|
fi
|
161 |
|
|
|
162 |
|
|
# Use to check the status of the raid. If the raid is degraded or faulty the
|
163 |
|
|
# script will send email to the address in the $EMAIL variable. We normally add
|
164 |
|
|
# this method to a cron job to be run every few hours so we are notified of any
|
165 |
|
|
# issues.
|
166 |
|
|
if [ $1 = "checkNemail" ]
|
167 |
|
|
then
|
168 |
|
|
EMAIL="$2"
|
169 |
|
|
|
170 |
|
|
# Check if raid is in good condition
|
171 |
|
|
STATUS=`$MegaCli -LDInfo -Lall -aALL -NoLog | egrep -i 'fail|degrad|error'`
|
172 |
|
|
|
173 |
|
|
# On bad raid status send email with basic drive information
|
174 |
|
|
if [ "$STATUS" ]; then
|
175 |
|
|
$MegaCli -PDlist -aALL -NoLog | egrep 'Slot|state' | awk '/Slot/{if (x)print x;x="";}{x=(!x)?$0:x" -"$0;}END{print x;}' | sed 's/Firmware state://g' | mail -s `hostname`' - RAID Notification' $EMAIL
|
176 |
|
|
fi
|
177 |
|
|
fi
|
178 |
|
|
|
179 |
|
|
# Use to print all information about the LSI raid card. Check default options,
|
180 |
|
|
# firmware version (FW Package Build), battery back-up unit presence, installed
|
181 |
|
|
# cache memory and the capabilities of the adapter. Pipe to grep to find the
|
182 |
|
|
# term you need.
|
183 |
|
|
if [ $1 = "allinfo" ]
|
184 |
|
|
then
|
185 |
|
|
$MegaCli -AdpAllInfo -aAll -NoLog
|
186 |
|
|
exit
|
187 |
|
|
fi
|
188 |
|
|
|
189 |
|
|
# Update the LSI card's time with the current operating system time. You may
|
190 |
|
|
# want to setup a cron job to call this method once a day or whenever you
|
191 |
|
|
# think the raid card's time might drift too much.
|
192 |
|
|
if [ $1 = "settime" ]
|
193 |
|
|
then
|
194 |
|
|
$MegaCli -AdpGetTime -aALL -NoLog
|
195 |
|
|
$MegaCli -AdpSetTime `date +%Y%m%d` `date +%H:%M:%S` -aALL -NoLog
|
196 |
|
|
$MegaCli -AdpGetTime -aALL -NoLog
|
197 |
|
|
exit
|
198 |
|
|
fi
|
199 |
|
|
|
200 |
|
|
# These are the defaults we like to use on the hundreds of raids we manage. You
|
201 |
|
|
# will want to go through each option here and make sure you want to use them
|
202 |
|
|
# too. These options are for speed optimization, build rate tweaks and PATROL
|
203 |
|
|
# options. When setting up a new machine we simply execute the "setdefaults"
|
204 |
|
|
# method and the raid is configured. You can use this on live raids too.
|
205 |
|
|
if [ $1 = "setdefaults" ]
|
206 |
|
|
then
|
207 |
|
|
# Read Cache enabled specifies that all reads are buffered in cache memory.
|
208 |
|
|
$MegaCli -LDSetProp -Cached -LAll -aAll -NoLog
|
209 |
|
|
# Adaptive Read-Ahead if the controller receives several requests to sequential sectors
|
210 |
|
|
$MegaCli -LDSetProp ADRA -LALL -aALL -NoLog
|
211 |
|
|
# Hard Disk cache policy enabled allowing the drive to use internal caching too
|
212 |
|
|
$MegaCli -LDSetProp EnDskCache -LAll -aAll -NoLog
|
213 |
|
|
# Write-Back cache enabled
|
214 |
|
|
$MegaCli -LDSetProp WB -LALL -aALL -NoLog
|
215 |
|
|
# Continue booting with data stuck in cache. Set Boot with Pinned Cache Enabled.
|
216 |
|
|
$MegaCli -AdpSetProp -BootWithPinnedCache -1 -aALL -NoLog
|
217 |
|
|
# PATROL run every 672 hours or monthly (RAID6 77TB @60% rebuild takes 21 hours)
|
218 |
|
|
$MegaCli -AdpPR -SetDelay 672 -aALL -NoLog
|
219 |
|
|
# Check Consistency every 672 hours or monthly
|
220 |
|
|
$MegaCli -AdpCcSched -SetDelay 672 -aALL -NoLog
|
221 |
|
|
# Enable autobuild when a new Unconfigured(good) drive is inserted or set to hot spare
|
222 |
|
|
$MegaCli -AdpAutoRbld -Enbl -a0 -NoLog
|
223 |
|
|
# RAID rebuild rate to 60% (build quick before another failure)
|
224 |
|
|
$MegaCli -AdpSetProp \{RebuildRate -60\} -aALL -NoLog
|
225 |
|
|
# RAID check consistency rate to 60% (fast parity checks)
|
226 |
|
|
$MegaCli -AdpSetProp \{CCRate -60\} -aALL -NoLog
|
227 |
|
|
# Enable Native Command Queue (NCQ) on all drives
|
228 |
|
|
$MegaCli -AdpSetProp NCQEnbl -aAll -NoLog
|
229 |
|
|
# Sound alarm disabled (server room is too loud anyways)
|
230 |
|
|
$MegaCli -AdpSetProp AlarmDsbl -aALL -NoLog
|
231 |
|
|
# Use write-back cache mode even if BBU is bad. Make sure your machine is on UPS too.
|
232 |
|
|
$MegaCli -LDSetProp CachedBadBBU -LAll -aAll -NoLog
|
233 |
|
|
# Disable auto learn BBU check which can severely affect raid speeds
|
234 |
|
|
OUTBBU=$(mktemp /tmp/output.XXXXXXXXXX)
|
235 |
|
|
echo "autoLearnMode=1" > $OUTBBU
|
236 |
|
|
$MegaCli -AdpBbuCmd -SetBbuProperties -f $OUTBBU -a0 -NoLog
|
237 |
|
|
rm -rf $OUTBBU
|
238 |
|
|
exit
|
239 |
|
|
fi
|
240 |
|
|
|
241 |
|
|
### EOF ###
|