Project

General

Profile

Download (10.5 KB) Statistics
| Branch: | Revision:
1
#!/bin/bash
2
#
3
# Calomel.org 
4
#     https://calomel.org/megacli_lsi_commands.html
5
#     LSI MegaRaid CLI 
6
#     lsi.sh @ Version 0.05
7
#
8
# description: MegaCLI script to configure and monitor LSI raid cards.
9

    
10
# Full path to the MegaRaid CLI binary
11
MegaCli="megacli"
12

    
13
# The identifying number of the enclosure. Default for our systems is "8". Use
14
# "MegaCli64 -PDlist -a0 | grep "Enclosure Device"" to see what your number
15
# is and set this variable.
16
ENCLOSURE="8"
17

    
18
if [ $# -eq 0 ]
19
   then
20
    echo ""
21
    echo "            OBPG  .:.  lsi.sh $arg1 $arg2"
22
    echo "-----------------------------------------------------"
23
    echo "status        = Status of Virtual drives (volumes)"
24
    echo "drives        = Status of hard drives"
25
    echo "ident \$slot   = Blink light on drive (need slot number)"
26
    echo "good \$slot    = Simply makes the slot \"Unconfigured(good)\" (need slot number)"
27
    echo "replace \$slot = Replace \"Unconfigured(bad)\" drive (need slot number)"
28
    echo "progress      = Status of drive rebuild"
29
    echo "errors        = Show drive errors which are non-zero"
30
    echo "bat           = Battery health and capacity"
31
    echo "batrelearn    = Force BBU re-learn cycle"
32
    echo "logs          = Print card logs"
33
    echo "checkNemail \$email  = Check volume(s) and send email on raid errors"
34
    echo "allinfo       = Print out all settings and information about the card"
35
    echo "settime       = Set the raid card's time to the current system time"
36
    echo "setdefaults   = Set preferred default settings for new raid setup"
37
    echo ""
38
   exit
39
 fi
40

    
41
# General status of all RAID virtual disks or volumes and if PATROL disk check
42
# is running.
43
if [ $1 = "status" ]
44
   then
45
      $MegaCli -LDInfo -Lall -aALL -NoLog
46
      echo "###############################################"
47
      $MegaCli -AdpPR -Info -aALL -NoLog
48
      echo "###############################################"
49
      $MegaCli -LDCC -ShowProg -LALL -aALL -NoLog
50
   exit
51
fi
52

    
53
# Shows the state of all drives and if they are online, unconfigured or missing.
54
if [ $1 = "drives" ]
55
   then
56
      $MegaCli -PDlist -aALL -NoLog | egrep 'Slot|state' | awk '/Slot/{if (x)print x;x="";}{x=(!x)?$0:x" -"$0;}END{print x;}' | sed 's/Firmware state://g'
57
   exit
58
fi
59

    
60
# Use to blink the light on the slot in question. Hit enter again to turn the blinking light off.
61
if [ $1 = "ident" ]
62
   then
63
      $MegaCli  -PdLocate -start -physdrv[$ENCLOSURE:$2] -a0 -NoLog
64
      logger "`hostname` - identifying enclosure $ENCLOSURE, drive $2 "
65
      read -p "Press [Enter] key to turn off light..."
66
      $MegaCli  -PdLocate -stop -physdrv[$ENCLOSURE:$2] -a0 -NoLog
67
   exit
68
fi
69

    
70
# When a new drive is inserted it might have old RAID headers on it. This
71
# method simply removes old RAID configs from the drive in the slot and make
72
# the drive "good." Basically, Unconfigured(bad) to Unconfigured(good). We use
73
# this method on our FreeBSD ZFS machines before the drive is added back into
74
# the zfs pool.
75
if [ $1 = "good" ]
76
   then
77
      # set Unconfigured(bad) to Unconfigured(good)
78
      $MegaCli -PDMakeGood -PhysDrv[$ENCLOSURE:$2] -a0 -NoLog
79
      # clear 'Foreign' flag or invalid raid header on replacement drive
80
      $MegaCli -CfgForeign -Clear -aALL -NoLog
81
   exit
82
fi
83

    
84
# Use to diagnose bad drives. When no errors are shown only the slot numbers
85
# will print out. If a drive(s) has an error you will see the number of errors
86
# under the slot number. At this point you can decided to replace the flaky
87
# drive. Bad drives might not fail right away and will slow down your raid with
88
# read/write retries or corrupt data. 
89
if [ $1 = "errors" ]
90
   then
91
      echo "Slot Number: 0"; $MegaCli -PDlist -aALL -NoLog | egrep -i 'error|fail|slot' | egrep -v '0'
92
   exit
93
fi
94

    
95
# status of the battery and the amount of charge. Without a working Battery
96
# Backup Unit (BBU) most of the LSI read/write caching will be disabled
97
# automatically. You want caching for speed so make sure the battery is ok.
98
if [ $1 = "bat" ]
99
   then
100
      $MegaCli -AdpBbuCmd -aAll -NoLog
101
   exit
102
fi
103

    
104
# Force a Battery Backup Unit (BBU) re-learn cycle. This will discharge the
105
# lithium BBU unit and recharge it. This check might take a few hours and you
106
# will want to always run this in off hours. LSI suggests a battery relearn
107
# monthly or so. We actually run it every three(3) months by way of a cron job.
108
# Understand if your "Current Cache Policy" is set to "No Write Cache if Bad
109
# BBU" then write-cache will be disabled during this check. This means writes
110
# to the raid will be VERY slow at about 1/10th normal speed. NOTE: if the
111
# battery is new (new bats should charge for a few hours before they register)
112
# or if the BBU comes up and says it has no charge try powering off the machine
113
# and restart it. This will force the LSI card to re-evaluate the BBU. Silly
114
# but it works.
115
if [ $1 = "batrelearn" ]
116
   then
117
      $MegaCli -AdpBbuCmd -BbuLearn -aALL -NoLog
118
   exit
119
fi
120

    
121
# Use to replace a drive. You need the slot number and may want to use the
122
# "drives" method to show which drive in a slot is "Unconfigured(bad)". Once
123
# the new drive is in the slot and spun up this method will bring the drive
124
# online, clear any foreign raid headers from the replacement drive and set the
125
# drive as a hot spare. We will also tell the card to start rebuilding if it
126
# does not start automatically. The raid should start rebuilding right away
127
# either way. NOTE: if you pass a slot number which is already part of the raid
128
# by mistake the LSI raid card is smart enough to just error out and _NOT_
129
# destroy the raid drive, thankfully.
130
if [ $1 = "replace" ]
131
   then
132
      logger "`hostname` - REPLACE enclosure $ENCLOSURE, drive $2 "
133
      # set Unconfigured(bad) to Unconfigured(good)
134
      $MegaCli -PDMakeGood -PhysDrv[$ENCLOSURE:$2] -a0 -NoLog
135
      # clear 'Foreign' flag or invalid raid header on replacement drive
136
      $MegaCli -CfgForeign -Clear -aALL -NoLog
137
      # set drive as hot spare
138
      $MegaCli -PDHSP -Set -PhysDrv [$ENCLOSURE:$2] -a0 -NoLog
139
      # show rebuild progress on replacement drive just to make sure it starts
140
      $MegaCli -PDRbld -ShowProg -PhysDrv [$ENCLOSURE:$2] -a0 -NoLog
141
   exit
142
fi
143

    
144
# Print all the logs from the LSI raid card. You can grep on the output.
145
if [ $1 = "logs" ]
146
   then
147
      $MegaCli -FwTermLog -Dsply -aALL -NoLog
148
   exit
149
fi
150

    
151
# Use to query the RAID card and find the drive which is rebuilding. The script
152
# will then query the rebuilding drive to see what percentage it is rebuilt and
153
# how much time it has taken so far. You can then guess-ti-mate the
154
# completion time.
155
if [ $1 = "progress" ]
156
   then
157
      DRIVE=`$MegaCli -PDlist -aALL -NoLog | egrep 'Slot|state' | awk '/Slot/{if (x)print x;x="";}{x=(!x)?$0:x" -"$0;}END{print x;}' | sed 's/Firmware state://g' | egrep build | awk '{print $3}'`
158
      $MegaCli -PDRbld -ShowProg -PhysDrv [$ENCLOSURE:$DRIVE] -a0 -NoLog
159
   exit
160
fi
161

    
162
# Use to check the status of the raid. If the raid is degraded or faulty the
163
# script will send email to the address in the $EMAIL variable. We normally add
164
# this method to a cron job to be run every few hours so we are notified of any
165
# issues.
166
if [ $1 = "checkNemail" ]
167
   then
168
      EMAIL="$2"
169

    
170
      # Check if raid is in good condition
171
      STATUS=`$MegaCli -LDInfo -Lall -aALL -NoLog | egrep -i 'fail|degrad|error'`
172

    
173
      # On bad raid status send email with basic drive information
174
      if [ "$STATUS" ]; then
175
         $MegaCli -PDlist -aALL -NoLog | egrep 'Slot|state' | awk '/Slot/{if (x)print x;x="";}{x=(!x)?$0:x" -"$0;}END{print x;}' | sed 's/Firmware state://g' | mail -s `hostname`' - RAID Notification' $EMAIL
176
      fi
177
fi
178

    
179
# Use to print all information about the LSI raid card. Check default options,
180
# firmware version (FW Package Build), battery back-up unit presence, installed
181
# cache memory and the capabilities of the adapter. Pipe to grep to find the
182
# term you need.
183
if [ $1 = "allinfo" ]
184
   then
185
      $MegaCli -AdpAllInfo -aAll -NoLog
186
   exit
187
fi
188

    
189
# Update the LSI card's time with the current operating system time. You may
190
# want to setup a cron job to call this method once a day or whenever you
191
# think the raid card's time might drift too much. 
192
if [ $1 = "settime" ]
193
   then
194
      $MegaCli -AdpGetTime -aALL -NoLog
195
      $MegaCli -AdpSetTime `date +%Y%m%d` `date +%H:%M:%S` -aALL -NoLog
196
      $MegaCli -AdpGetTime -aALL -NoLog
197
   exit
198
fi
199

    
200
# These are the defaults we like to use on the hundreds of raids we manage. You
201
# will want to go through each option here and make sure you want to use them
202
# too. These options are for speed optimization, build rate tweaks and PATROL
203
# options. When setting up a new machine we simply execute the "setdefaults"
204
# method and the raid is configured. You can use this on live raids too.
205
if [ $1 = "setdefaults" ]
206
   then
207
      # Read Cache enabled specifies that all reads are buffered in cache memory. 
208
       $MegaCli -LDSetProp -Cached -LAll -aAll -NoLog
209
      # Adaptive Read-Ahead if the controller receives several requests to sequential sectors
210
       $MegaCli -LDSetProp ADRA -LALL -aALL -NoLog
211
      # Hard Disk cache policy enabled allowing the drive to use internal caching too
212
       $MegaCli -LDSetProp EnDskCache -LAll -aAll -NoLog
213
      # Write-Back cache enabled
214
       $MegaCli -LDSetProp WB -LALL -aALL -NoLog
215
      # Continue booting with data stuck in cache. Set Boot with Pinned Cache Enabled.
216
       $MegaCli -AdpSetProp -BootWithPinnedCache -1 -aALL -NoLog
217
      # PATROL run every 672 hours or monthly (RAID6 77TB @60% rebuild takes 21 hours)
218
       $MegaCli -AdpPR -SetDelay 672 -aALL -NoLog
219
      # Check Consistency every 672 hours or monthly
220
       $MegaCli -AdpCcSched -SetDelay 672 -aALL -NoLog
221
      # Enable autobuild when a new Unconfigured(good) drive is inserted or set to hot spare
222
       $MegaCli -AdpAutoRbld -Enbl -a0 -NoLog
223
      # RAID rebuild rate to 60% (build quick before another failure)
224
       $MegaCli -AdpSetProp \{RebuildRate -60\} -aALL -NoLog
225
      # RAID check consistency rate to 60% (fast parity checks)
226
       $MegaCli -AdpSetProp \{CCRate -60\} -aALL -NoLog
227
      # Enable Native Command Queue (NCQ) on all drives
228
       $MegaCli -AdpSetProp NCQEnbl -aAll -NoLog
229
      # Sound alarm disabled (server room is too loud anyways)
230
       $MegaCli -AdpSetProp AlarmDsbl -aALL -NoLog
231
      # Use write-back cache mode even if BBU is bad. Make sure your machine is on UPS too.
232
       $MegaCli -LDSetProp CachedBadBBU -LAll -aAll -NoLog
233
      # Disable auto learn BBU check which can severely affect raid speeds
234
       OUTBBU=$(mktemp /tmp/output.XXXXXXXXXX)
235
       echo "autoLearnMode=1" > $OUTBBU
236
       $MegaCli -AdpBbuCmd -SetBbuProperties -f $OUTBBU -a0 -NoLog
237
       rm -rf $OUTBBU
238
   exit
239
fi
240

    
241
### EOF ###
    (1-1/1)