Commit fdd9c743674d9c06a68cef0ffbe7f4b0d5b599ab
1 parent
75ead3b5
Adding wear leveling changes to check_smart. Adding check_systemd and check_zfs
Showing
6 changed files
with
222 additions
and
15 deletions
check_smart/auxiliar.cpp
100644 โ 100755
check_smart/auxiliar.h
100644 โ 100755
check_smart/check_smart.cpp
1 | 1 | #include "check_smart.h" |
2 | +#include <regex> | |
2 | 3 | |
3 | 4 | const char *servicename = (const char*)"SMART"; |
4 | 5 | |
5 | -int getSmartAttrValue(string line) { | |
6 | - return stoi(line.substr(line.find_last_of(" ")+1)); | |
6 | +int getSmartAttrValue(string line, unsigned int col) { | |
7 | + line = std::regex_replace(line, std::regex("\\s+"), " "); | |
8 | + line = std::regex_replace(line, std::regex("^ "), ""); | |
9 | + | |
10 | + // Find nth col | |
11 | + size_t pos = 0; | |
12 | + int cnt = 0; | |
13 | + | |
14 | + while(cnt != col) { | |
15 | + pos = line.find(" ", pos+1); | |
16 | + if (pos == std::string::npos) { | |
17 | + throw std::runtime_error("Column out of range"); | |
18 | + } | |
19 | + cnt++; | |
20 | + } | |
21 | + if(pos != 0) { | |
22 | + ++pos; | |
23 | + } | |
24 | + | |
25 | + size_t end_pos = line.find(" ", pos); | |
26 | + if(end_pos != std::string::npos) { | |
27 | + return stoi(line.substr(pos, line.find(" ", pos))); | |
28 | + } | |
29 | + return stoi(line.substr(pos)); | |
7 | 30 | } |
8 | 31 | |
9 | 32 | int getSmartAttrID(string line) { |
... | ... | @@ -88,7 +111,7 @@ int evalStatus(const char* disk, int driveType, string *status) { |
88 | 111 | continue; |
89 | 112 | } |
90 | 113 | if(getSmartAttrID(line) == id) { |
91 | - attrMap[id].value = getSmartAttrValue(line); | |
114 | + attrMap[id].value = getSmartAttrValue(line, attrMap[id].col); | |
92 | 115 | } |
93 | 116 | } |
94 | 117 | } |
... | ... | @@ -107,11 +130,21 @@ int evalStatus(const char* disk, int driveType, string *status) { |
107 | 130 | } |
108 | 131 | |
109 | 132 | int veredict = 0; |
110 | - if(attr.value > attr.threshold_warn) { | |
111 | - veredict = WARN; | |
112 | - } | |
113 | - if(attr.threshold_crit != -1 && attr.value > attr.threshold_crit) { | |
114 | - veredict = CRIT; | |
133 | + | |
134 | + if(attr.lower_than) { | |
135 | + if(attr.value < attr.threshold_warn) { | |
136 | + veredict = WARN; | |
137 | + } | |
138 | + if(attr.threshold_crit != -1 && attr.value < attr.threshold_crit) { | |
139 | + veredict = CRIT; | |
140 | + } | |
141 | + } else { | |
142 | + if(attr.value > attr.threshold_warn) { | |
143 | + veredict = WARN; | |
144 | + } | |
145 | + if(attr.threshold_crit != -1 && attr.value > attr.threshold_crit) { | |
146 | + veredict = CRIT; | |
147 | + } | |
115 | 148 | } |
116 | 149 | |
117 | 150 | switch(veredict) { |
... | ... | @@ -183,7 +216,7 @@ int main(int argc, char **argv) { |
183 | 216 | switch(c) { |
184 | 217 | case 'h': |
185 | 218 | printHelp(true); |
186 | - return OK; | |
219 | + return OK; | |
187 | 220 | case 'V': |
188 | 221 | printVersion(); |
189 | 222 | return OK; | ... | ... |
check_smart/check_smart.h
100644 โ 100755
... | ... | @@ -48,6 +48,8 @@ struct SMARTAttr |
48 | 48 | int threshold_warn; |
49 | 49 | int threshold_crit; |
50 | 50 | bool optional; |
51 | + unsigned int col; | |
52 | + bool lower_than; | |
51 | 53 | }; typedef struct SMARTAttr SMARTAttr; |
52 | 54 | |
53 | 55 | |
... | ... | @@ -59,6 +61,8 @@ SMARTAttr reallocated = { |
59 | 61 | .threshold_warn = 0, |
60 | 62 | .threshold_crit = -1, |
61 | 63 | .optional = false, |
64 | + .col = 9, | |
65 | + .lower_than = false, | |
62 | 66 | }; |
63 | 67 | |
64 | 68 | SMARTAttr pending = { |
... | ... | @@ -66,8 +70,10 @@ SMARTAttr pending = { |
66 | 70 | .name = "Current_Pending_Sector", |
67 | 71 | .value = -1, |
68 | 72 | .threshold_warn = 0, |
69 | - .threshold_crit = -1, | |
73 | + .threshold_crit = -1, | |
70 | 74 | .optional = false, |
75 | + .col = 9, | |
76 | + .lower_than = false, | |
71 | 77 | }; |
72 | 78 | |
73 | 79 | SMARTAttr off_uncorrect = { |
... | ... | @@ -77,24 +83,30 @@ SMARTAttr off_uncorrect = { |
77 | 83 | .threshold_warn = 0, |
78 | 84 | .threshold_crit = 0, |
79 | 85 | .optional = false, |
86 | + .col = 9, | |
87 | + .lower_than = false, | |
80 | 88 | }; |
81 | 89 | |
82 | 90 | SMARTAttr wear = { |
83 | 91 | .id = WEAR_COUNT_ID, |
84 | 92 | .name = "Wear_Leveling_Count", |
85 | 93 | .value = -1, |
86 | - .threshold_warn = 80, | |
87 | - .threshold_crit = 90, | |
94 | + .threshold_warn = 20, | |
95 | + .threshold_crit = 10, | |
88 | 96 | .optional = true, |
97 | + .col = 3, | |
98 | + .lower_than = true, | |
89 | 99 | }; |
90 | 100 | |
91 | 101 | SMARTAttr wearout = { |
92 | 102 | .id = MEDIA_WEAROUT_ID, |
93 | 103 | .name = "Media_Wearout_Indicator", |
94 | 104 | .value = -1, |
95 | - .threshold_warn = 80, | |
96 | - .threshold_crit = 90, | |
105 | + .threshold_warn = 20, | |
106 | + .threshold_crit = 10, | |
97 | 107 | .optional = true, |
108 | + .col = 3, | |
109 | + .lower_than = true, | |
98 | 110 | }; |
99 | 111 | |
100 | 112 | SMARTAttr badblocks = { |
... | ... | @@ -104,6 +116,8 @@ SMARTAttr badblocks = { |
104 | 116 | .threshold_warn = 0, |
105 | 117 | .threshold_crit = 0, |
106 | 118 | .optional = false, |
119 | + .col = 9, | |
120 | + .lower_than = false, | |
107 | 121 | }; |
108 | 122 | |
109 | 123 | SMARTAttr rep_uncorrect = { |
... | ... | @@ -113,6 +127,8 @@ SMARTAttr rep_uncorrect = { |
113 | 127 | .threshold_warn = 0, |
114 | 128 | .threshold_crit = -1, |
115 | 129 | .optional = false, |
130 | + .col = 9, | |
131 | + .lower_than = false, | |
116 | 132 | }; |
117 | 133 | |
118 | 134 | map<int,SMARTAttr> prepareAttrMap(int driveType); | ... | ... |
check_systemd/check_systemd.sh
0 โ 100755
check_zfs/check_zfs.sh
0 โ 100755
1 | +#!/bin/bash | |
2 | + | |
3 | +function smart_disks() { | |
4 | + if [[ $# != 1 ]]; then | |
5 | + echo "Wrong number of arguments" | |
6 | + return 3 | |
7 | + fi | |
8 | + | |
9 | + POOL=$1 | |
10 | + DISKS=$(zpool status -P $POOL | sed -E 's/\t/ /g' | sed -E 's/[ ]+/ /g' | sed -E 's/^ //g' | grep "^/dev" | cut -d' ' -f 1) | |
11 | + ERR_OUTPUT="" | |
12 | + HIGHEST_RES=0 | |
13 | + for disk in $DISKS; do | |
14 | + OUTPUT=$(/usr/lib64/nagios/plugins/check_smart $disk) | |
15 | + RES=$? | |
16 | + if [[ $RES != 0 ]]; then | |
17 | + ERR_OUTPUT="$ERR_OUTPUT - $OUTPUT" | |
18 | + fi | |
19 | + if [[ $RES -gt $HIGHEST_RES ]]; then | |
20 | + HIGHEST_RES=$RES | |
21 | + fi | |
22 | + done | |
23 | + | |
24 | + echo -n "ZFS POOL SMART $POOL " | |
25 | + if [[ $HIGHEST_RES == 1 ]]; then | |
26 | + echo "WARNING${ERR_OUTPUT}" | |
27 | + return 1 | |
28 | + elif [[ $HIGHEST_RES == 2 ]]; then | |
29 | + echo "CRITICAL${ERR_OUTPUT}" | |
30 | + return 2 | |
31 | + elif [[ $HIGHEST_RES == 3 ]]; then | |
32 | + echo "UNKNOWN${ERR_OUTPUT}" | |
33 | + return 3 | |
34 | + fi | |
35 | + | |
36 | + echo "OK" | |
37 | + return 0 | |
38 | +} | |
39 | + | |
40 | +function pool_errors() { | |
41 | + if [[ $# != 1 ]]; then | |
42 | + echo "Wrong number of arguments" | |
43 | + return 3 | |
44 | + fi | |
45 | + | |
46 | + OUTPUT="$(zpool status $1)" | |
47 | + if [[ $? != 0 ]]; then | |
48 | + return 3 | |
49 | + fi | |
50 | + | |
51 | + STATUS=$(echo "$OUTPUT" | grep -P "^\s*state:" | sed 's/ //g' | grep -P "^\s*state" | cut -d ':' -f 2) | |
52 | + if [[ $? != 0 ]]; then | |
53 | + return 3 | |
54 | + fi | |
55 | + | |
56 | + ERRORS=$(echo "$OUTPUT" | grep -P "^errors:") | |
57 | + if [[ $? != 0 ]]; then | |
58 | + return 3 | |
59 | + fi | |
60 | + | |
61 | + if [[ $STATUS != "ONLINE" ]]; then | |
62 | + echo "ZFS POOL ERRORS $1 - CRITICAL: status: $STATUS" | |
63 | + return 2 | |
64 | + fi | |
65 | + | |
66 | + if [[ $ERRORS != "errors: No known data errors" ]]; then | |
67 | + echo "ZFS POOL ERRORS $1 - WARNING: $ERRORS" | |
68 | + return 1 | |
69 | + fi | |
70 | + | |
71 | + echo "ZFS POOL ERRORS $1 - OK" | |
72 | + return 0 | |
73 | +} | |
74 | + | |
75 | +function pool_free() { | |
76 | + if [[ $# != 7 ]]; then | |
77 | + echo "Wrong number of arguments" | |
78 | + return 3 | |
79 | + fi | |
80 | + POOL=$3 | |
81 | + | |
82 | + FREE_RAW=$(zfs list -po available $POOL | tail -n 1) | |
83 | + if [[ $? != 0 ]]; then | |
84 | + return 3 | |
85 | + fi | |
86 | + | |
87 | + USED_RAW=$(zfs list -po used $POOL | tail -n 1) | |
88 | + if [[ $? != 0 ]]; then | |
89 | + return 3 | |
90 | + fi | |
91 | + TOTAL_RAW=$((USED_RAW + FREE_RAW)) | |
92 | + TOTAL=$(echo "scale=2; $TOTAL_RAW / (1024^3)" | bc) | |
93 | + FREE=$(echo "scale=2; $FREE_RAW / (1024^3)" | bc) | |
94 | + PERC_FREE=$(echo "scale=2; 100*$FREE_RAW/$TOTAL_RAW" | bc) | |
95 | + | |
96 | + CRITICAL="0" | |
97 | + WARNING="0" | |
98 | + TEMP=$(getopt -o c:w: -- "$@") | |
99 | + eval set -- "$TEMP" | |
100 | + | |
101 | + for opt; do | |
102 | + case "$opt" in | |
103 | + -c) CRITICAL=$2; shift 2 ;; | |
104 | + -w) WARNING=$2; shift 2 ;; | |
105 | + esac | |
106 | + done | |
107 | + | |
108 | + if [[ $CRITICAL == "0" || $WARNING == "0" ]]; then | |
109 | + echo "Missing -c or -w arguments" | |
110 | + return 3 | |
111 | + fi | |
112 | + | |
113 | + if [[ $(echo "$PERC_FREE < $CRITICAL" | bc) == 1 ]]; then | |
114 | + echo "ZFS POOL FREE $POOL - CRITICAL: ${PERC_FREE}% ${FREE}/${TOTAL} GB" | |
115 | + return 2 | |
116 | + elif [[ $(echo "$PERC_FREE < $WARNING" | bc) == 1 ]]; then | |
117 | + echo "ZFS POOL FREE $POOL - WARNING: ${PERC_FREE}% ${FREE}/${TOTAL} GB" | |
118 | + return 2 | |
119 | + fi | |
120 | + | |
121 | + echo "ZFS POOL FREE $POOL - OK: ${PERC_FREE}% ${FREE}/${TOTAL} GB" | |
122 | + return 0 | |
123 | +} | |
124 | + | |
125 | +if [[ $# -lt 2 ]]; then | |
126 | + echo "Wrong number of arguments" | |
127 | + exit 3 | |
128 | +fi | |
129 | + | |
130 | +ACTION=$1 | |
131 | +POOL=$2 | |
132 | + | |
133 | +if [[ $ACTION == "smart" ]]; then | |
134 | + smart_disks $POOL | |
135 | + exit $? | |
136 | +elif [[ $ACTION == "errors" ]]; then | |
137 | + pool_errors $POOL | |
138 | + exit $? | |
139 | +elif [[ $ACTION == "free" ]]; then | |
140 | + pool_free $POOL $@ | |
141 | + exit $? | |
142 | +fi | |
143 | +echo "Unknown command: $ACTION" | |
144 | +exit 3 | ... | ... |