Commit fdd9c743674d9c06a68cef0ffbe7f4b0d5b599ab
1 parent
75ead3b5
Adding wear leveling changes to check_smart. Adding check_systemd and check_zfs
Showing
6 changed files
with
222 additions
and
15 deletions
check_smart/auxiliar.cpp
100644 → 100755
check_smart/auxiliar.h
100644 → 100755
check_smart/check_smart.cpp
1 | #include "check_smart.h" | 1 | #include "check_smart.h" |
2 | +#include <regex> | ||
2 | 3 | ||
3 | const char *servicename = (const char*)"SMART"; | 4 | const char *servicename = (const char*)"SMART"; |
4 | 5 | ||
5 | -int getSmartAttrValue(string line) { | ||
6 | - return stoi(line.substr(line.find_last_of(" ")+1)); | 6 | +int getSmartAttrValue(string line, unsigned int col) { |
7 | + line = std::regex_replace(line, std::regex("\\s+"), " "); | ||
8 | + line = std::regex_replace(line, std::regex("^ "), ""); | ||
9 | + | ||
10 | + // Find nth col | ||
11 | + size_t pos = 0; | ||
12 | + int cnt = 0; | ||
13 | + | ||
14 | + while(cnt != col) { | ||
15 | + pos = line.find(" ", pos+1); | ||
16 | + if (pos == std::string::npos) { | ||
17 | + throw std::runtime_error("Column out of range"); | ||
18 | + } | ||
19 | + cnt++; | ||
20 | + } | ||
21 | + if(pos != 0) { | ||
22 | + ++pos; | ||
23 | + } | ||
24 | + | ||
25 | + size_t end_pos = line.find(" ", pos); | ||
26 | + if(end_pos != std::string::npos) { | ||
27 | + return stoi(line.substr(pos, line.find(" ", pos))); | ||
28 | + } | ||
29 | + return stoi(line.substr(pos)); | ||
7 | } | 30 | } |
8 | 31 | ||
9 | int getSmartAttrID(string line) { | 32 | int getSmartAttrID(string line) { |
@@ -88,7 +111,7 @@ int evalStatus(const char* disk, int driveType, string *status) { | @@ -88,7 +111,7 @@ int evalStatus(const char* disk, int driveType, string *status) { | ||
88 | continue; | 111 | continue; |
89 | } | 112 | } |
90 | if(getSmartAttrID(line) == id) { | 113 | if(getSmartAttrID(line) == id) { |
91 | - attrMap[id].value = getSmartAttrValue(line); | 114 | + attrMap[id].value = getSmartAttrValue(line, attrMap[id].col); |
92 | } | 115 | } |
93 | } | 116 | } |
94 | } | 117 | } |
@@ -107,11 +130,21 @@ int evalStatus(const char* disk, int driveType, string *status) { | @@ -107,11 +130,21 @@ int evalStatus(const char* disk, int driveType, string *status) { | ||
107 | } | 130 | } |
108 | 131 | ||
109 | int veredict = 0; | 132 | int veredict = 0; |
110 | - if(attr.value > attr.threshold_warn) { | ||
111 | - veredict = WARN; | ||
112 | - } | ||
113 | - if(attr.threshold_crit != -1 && attr.value > attr.threshold_crit) { | ||
114 | - veredict = CRIT; | 133 | + |
134 | + if(attr.lower_than) { | ||
135 | + if(attr.value < attr.threshold_warn) { | ||
136 | + veredict = WARN; | ||
137 | + } | ||
138 | + if(attr.threshold_crit != -1 && attr.value < attr.threshold_crit) { | ||
139 | + veredict = CRIT; | ||
140 | + } | ||
141 | + } else { | ||
142 | + if(attr.value > attr.threshold_warn) { | ||
143 | + veredict = WARN; | ||
144 | + } | ||
145 | + if(attr.threshold_crit != -1 && attr.value > attr.threshold_crit) { | ||
146 | + veredict = CRIT; | ||
147 | + } | ||
115 | } | 148 | } |
116 | 149 | ||
117 | switch(veredict) { | 150 | switch(veredict) { |
@@ -183,7 +216,7 @@ int main(int argc, char **argv) { | @@ -183,7 +216,7 @@ int main(int argc, char **argv) { | ||
183 | switch(c) { | 216 | switch(c) { |
184 | case 'h': | 217 | case 'h': |
185 | printHelp(true); | 218 | printHelp(true); |
186 | - return OK; | 219 | + return OK; |
187 | case 'V': | 220 | case 'V': |
188 | printVersion(); | 221 | printVersion(); |
189 | return OK; | 222 | return OK; |
check_smart/check_smart.h
100644 → 100755
@@ -48,6 +48,8 @@ struct SMARTAttr | @@ -48,6 +48,8 @@ struct SMARTAttr | ||
48 | int threshold_warn; | 48 | int threshold_warn; |
49 | int threshold_crit; | 49 | int threshold_crit; |
50 | bool optional; | 50 | bool optional; |
51 | + unsigned int col; | ||
52 | + bool lower_than; | ||
51 | }; typedef struct SMARTAttr SMARTAttr; | 53 | }; typedef struct SMARTAttr SMARTAttr; |
52 | 54 | ||
53 | 55 | ||
@@ -59,6 +61,8 @@ SMARTAttr reallocated = { | @@ -59,6 +61,8 @@ SMARTAttr reallocated = { | ||
59 | .threshold_warn = 0, | 61 | .threshold_warn = 0, |
60 | .threshold_crit = -1, | 62 | .threshold_crit = -1, |
61 | .optional = false, | 63 | .optional = false, |
64 | + .col = 9, | ||
65 | + .lower_than = false, | ||
62 | }; | 66 | }; |
63 | 67 | ||
64 | SMARTAttr pending = { | 68 | SMARTAttr pending = { |
@@ -66,8 +70,10 @@ SMARTAttr pending = { | @@ -66,8 +70,10 @@ SMARTAttr pending = { | ||
66 | .name = "Current_Pending_Sector", | 70 | .name = "Current_Pending_Sector", |
67 | .value = -1, | 71 | .value = -1, |
68 | .threshold_warn = 0, | 72 | .threshold_warn = 0, |
69 | - .threshold_crit = -1, | 73 | + .threshold_crit = -1, |
70 | .optional = false, | 74 | .optional = false, |
75 | + .col = 9, | ||
76 | + .lower_than = false, | ||
71 | }; | 77 | }; |
72 | 78 | ||
73 | SMARTAttr off_uncorrect = { | 79 | SMARTAttr off_uncorrect = { |
@@ -77,24 +83,30 @@ SMARTAttr off_uncorrect = { | @@ -77,24 +83,30 @@ SMARTAttr off_uncorrect = { | ||
77 | .threshold_warn = 0, | 83 | .threshold_warn = 0, |
78 | .threshold_crit = 0, | 84 | .threshold_crit = 0, |
79 | .optional = false, | 85 | .optional = false, |
86 | + .col = 9, | ||
87 | + .lower_than = false, | ||
80 | }; | 88 | }; |
81 | 89 | ||
82 | SMARTAttr wear = { | 90 | SMARTAttr wear = { |
83 | .id = WEAR_COUNT_ID, | 91 | .id = WEAR_COUNT_ID, |
84 | .name = "Wear_Leveling_Count", | 92 | .name = "Wear_Leveling_Count", |
85 | .value = -1, | 93 | .value = -1, |
86 | - .threshold_warn = 80, | ||
87 | - .threshold_crit = 90, | 94 | + .threshold_warn = 20, |
95 | + .threshold_crit = 10, | ||
88 | .optional = true, | 96 | .optional = true, |
97 | + .col = 3, | ||
98 | + .lower_than = true, | ||
89 | }; | 99 | }; |
90 | 100 | ||
91 | SMARTAttr wearout = { | 101 | SMARTAttr wearout = { |
92 | .id = MEDIA_WEAROUT_ID, | 102 | .id = MEDIA_WEAROUT_ID, |
93 | .name = "Media_Wearout_Indicator", | 103 | .name = "Media_Wearout_Indicator", |
94 | .value = -1, | 104 | .value = -1, |
95 | - .threshold_warn = 80, | ||
96 | - .threshold_crit = 90, | 105 | + .threshold_warn = 20, |
106 | + .threshold_crit = 10, | ||
97 | .optional = true, | 107 | .optional = true, |
108 | + .col = 3, | ||
109 | + .lower_than = true, | ||
98 | }; | 110 | }; |
99 | 111 | ||
100 | SMARTAttr badblocks = { | 112 | SMARTAttr badblocks = { |
@@ -104,6 +116,8 @@ SMARTAttr badblocks = { | @@ -104,6 +116,8 @@ SMARTAttr badblocks = { | ||
104 | .threshold_warn = 0, | 116 | .threshold_warn = 0, |
105 | .threshold_crit = 0, | 117 | .threshold_crit = 0, |
106 | .optional = false, | 118 | .optional = false, |
119 | + .col = 9, | ||
120 | + .lower_than = false, | ||
107 | }; | 121 | }; |
108 | 122 | ||
109 | SMARTAttr rep_uncorrect = { | 123 | SMARTAttr rep_uncorrect = { |
@@ -113,6 +127,8 @@ SMARTAttr rep_uncorrect = { | @@ -113,6 +127,8 @@ SMARTAttr rep_uncorrect = { | ||
113 | .threshold_warn = 0, | 127 | .threshold_warn = 0, |
114 | .threshold_crit = -1, | 128 | .threshold_crit = -1, |
115 | .optional = false, | 129 | .optional = false, |
130 | + .col = 9, | ||
131 | + .lower_than = false, | ||
116 | }; | 132 | }; |
117 | 133 | ||
118 | map<int,SMARTAttr> prepareAttrMap(int driveType); | 134 | map<int,SMARTAttr> prepareAttrMap(int driveType); |
check_systemd/check_systemd.sh
0 → 100755
check_zfs/check_zfs.sh
0 → 100755
1 | +#!/bin/bash | ||
2 | + | ||
3 | +function smart_disks() { | ||
4 | + if [[ $# != 1 ]]; then | ||
5 | + echo "Wrong number of arguments" | ||
6 | + return 3 | ||
7 | + fi | ||
8 | + | ||
9 | + POOL=$1 | ||
10 | + DISKS=$(zpool status -P $POOL | sed -E 's/\t/ /g' | sed -E 's/[ ]+/ /g' | sed -E 's/^ //g' | grep "^/dev" | cut -d' ' -f 1) | ||
11 | + ERR_OUTPUT="" | ||
12 | + HIGHEST_RES=0 | ||
13 | + for disk in $DISKS; do | ||
14 | + OUTPUT=$(/usr/lib64/nagios/plugins/check_smart $disk) | ||
15 | + RES=$? | ||
16 | + if [[ $RES != 0 ]]; then | ||
17 | + ERR_OUTPUT="$ERR_OUTPUT - $OUTPUT" | ||
18 | + fi | ||
19 | + if [[ $RES -gt $HIGHEST_RES ]]; then | ||
20 | + HIGHEST_RES=$RES | ||
21 | + fi | ||
22 | + done | ||
23 | + | ||
24 | + echo -n "ZFS POOL SMART $POOL " | ||
25 | + if [[ $HIGHEST_RES == 1 ]]; then | ||
26 | + echo "WARNING${ERR_OUTPUT}" | ||
27 | + return 1 | ||
28 | + elif [[ $HIGHEST_RES == 2 ]]; then | ||
29 | + echo "CRITICAL${ERR_OUTPUT}" | ||
30 | + return 2 | ||
31 | + elif [[ $HIGHEST_RES == 3 ]]; then | ||
32 | + echo "UNKNOWN${ERR_OUTPUT}" | ||
33 | + return 3 | ||
34 | + fi | ||
35 | + | ||
36 | + echo "OK" | ||
37 | + return 0 | ||
38 | +} | ||
39 | + | ||
40 | +function pool_errors() { | ||
41 | + if [[ $# != 1 ]]; then | ||
42 | + echo "Wrong number of arguments" | ||
43 | + return 3 | ||
44 | + fi | ||
45 | + | ||
46 | + OUTPUT="$(zpool status $1)" | ||
47 | + if [[ $? != 0 ]]; then | ||
48 | + return 3 | ||
49 | + fi | ||
50 | + | ||
51 | + STATUS=$(echo "$OUTPUT" | grep -P "^\s*state:" | sed 's/ //g' | grep -P "^\s*state" | cut -d ':' -f 2) | ||
52 | + if [[ $? != 0 ]]; then | ||
53 | + return 3 | ||
54 | + fi | ||
55 | + | ||
56 | + ERRORS=$(echo "$OUTPUT" | grep -P "^errors:") | ||
57 | + if [[ $? != 0 ]]; then | ||
58 | + return 3 | ||
59 | + fi | ||
60 | + | ||
61 | + if [[ $STATUS != "ONLINE" ]]; then | ||
62 | + echo "ZFS POOL ERRORS $1 - CRITICAL: status: $STATUS" | ||
63 | + return 2 | ||
64 | + fi | ||
65 | + | ||
66 | + if [[ $ERRORS != "errors: No known data errors" ]]; then | ||
67 | + echo "ZFS POOL ERRORS $1 - WARNING: $ERRORS" | ||
68 | + return 1 | ||
69 | + fi | ||
70 | + | ||
71 | + echo "ZFS POOL ERRORS $1 - OK" | ||
72 | + return 0 | ||
73 | +} | ||
74 | + | ||
75 | +function pool_free() { | ||
76 | + if [[ $# != 7 ]]; then | ||
77 | + echo "Wrong number of arguments" | ||
78 | + return 3 | ||
79 | + fi | ||
80 | + POOL=$3 | ||
81 | + | ||
82 | + FREE_RAW=$(zfs list -po available $POOL | tail -n 1) | ||
83 | + if [[ $? != 0 ]]; then | ||
84 | + return 3 | ||
85 | + fi | ||
86 | + | ||
87 | + USED_RAW=$(zfs list -po used $POOL | tail -n 1) | ||
88 | + if [[ $? != 0 ]]; then | ||
89 | + return 3 | ||
90 | + fi | ||
91 | + TOTAL_RAW=$((USED_RAW + FREE_RAW)) | ||
92 | + TOTAL=$(echo "scale=2; $TOTAL_RAW / (1024^3)" | bc) | ||
93 | + FREE=$(echo "scale=2; $FREE_RAW / (1024^3)" | bc) | ||
94 | + PERC_FREE=$(echo "scale=2; 100*$FREE_RAW/$TOTAL_RAW" | bc) | ||
95 | + | ||
96 | + CRITICAL="0" | ||
97 | + WARNING="0" | ||
98 | + TEMP=$(getopt -o c:w: -- "$@") | ||
99 | + eval set -- "$TEMP" | ||
100 | + | ||
101 | + for opt; do | ||
102 | + case "$opt" in | ||
103 | + -c) CRITICAL=$2; shift 2 ;; | ||
104 | + -w) WARNING=$2; shift 2 ;; | ||
105 | + esac | ||
106 | + done | ||
107 | + | ||
108 | + if [[ $CRITICAL == "0" || $WARNING == "0" ]]; then | ||
109 | + echo "Missing -c or -w arguments" | ||
110 | + return 3 | ||
111 | + fi | ||
112 | + | ||
113 | + if [[ $(echo "$PERC_FREE < $CRITICAL" | bc) == 1 ]]; then | ||
114 | + echo "ZFS POOL FREE $POOL - CRITICAL: ${PERC_FREE}% ${FREE}/${TOTAL} GB" | ||
115 | + return 2 | ||
116 | + elif [[ $(echo "$PERC_FREE < $WARNING" | bc) == 1 ]]; then | ||
117 | + echo "ZFS POOL FREE $POOL - WARNING: ${PERC_FREE}% ${FREE}/${TOTAL} GB" | ||
118 | + return 2 | ||
119 | + fi | ||
120 | + | ||
121 | + echo "ZFS POOL FREE $POOL - OK: ${PERC_FREE}% ${FREE}/${TOTAL} GB" | ||
122 | + return 0 | ||
123 | +} | ||
124 | + | ||
125 | +if [[ $# -lt 2 ]]; then | ||
126 | + echo "Wrong number of arguments" | ||
127 | + exit 3 | ||
128 | +fi | ||
129 | + | ||
130 | +ACTION=$1 | ||
131 | +POOL=$2 | ||
132 | + | ||
133 | +if [[ $ACTION == "smart" ]]; then | ||
134 | + smart_disks $POOL | ||
135 | + exit $? | ||
136 | +elif [[ $ACTION == "errors" ]]; then | ||
137 | + pool_errors $POOL | ||
138 | + exit $? | ||
139 | +elif [[ $ACTION == "free" ]]; then | ||
140 | + pool_free $POOL $@ | ||
141 | + exit $? | ||
142 | +fi | ||
143 | +echo "Unknown command: $ACTION" | ||
144 | +exit 3 |