Commit fdd9c743674d9c06a68cef0ffbe7f4b0d5b599ab

Authored by Imanol-Mikel Barba Sabariego
1 parent 75ead3b5

Adding wear leveling changes to check_smart. Adding check_systemd and check_zfs

check_smart/auxiliar.cpp 100644 → 100755
@@ -52,4 +52,4 @@ int exec(string cmd, string *output) @@ -52,4 +52,4 @@ int exec(string cmd, string *output)
52 } 52 }
53 } 53 }
54 return pclose(pipe)/256; 54 return pclose(pipe)/256;
55 -} 55 +}
56 \ No newline at end of file 56 \ No newline at end of file
check_smart/auxiliar.h 100644 → 100755
check_smart/check_smart.cpp
1 #include "check_smart.h" 1 #include "check_smart.h"
  2 +#include <regex>
2 3
3 const char *servicename = (const char*)"SMART"; 4 const char *servicename = (const char*)"SMART";
4 5
5 -int getSmartAttrValue(string line) {  
6 - return stoi(line.substr(line.find_last_of(" ")+1)); 6 +int getSmartAttrValue(string line, unsigned int col) {
  7 + line = std::regex_replace(line, std::regex("\\s+"), " ");
  8 + line = std::regex_replace(line, std::regex("^ "), "");
  9 +
  10 + // Find nth col
  11 + size_t pos = 0;
  12 + int cnt = 0;
  13 +
  14 + while(cnt != col) {
  15 + pos = line.find(" ", pos+1);
  16 + if (pos == std::string::npos) {
  17 + throw std::runtime_error("Column out of range");
  18 + }
  19 + cnt++;
  20 + }
  21 + if(pos != 0) {
  22 + ++pos;
  23 + }
  24 +
  25 + size_t end_pos = line.find(" ", pos);
  26 + if(end_pos != std::string::npos) {
  27 + return stoi(line.substr(pos, line.find(" ", pos)));
  28 + }
  29 + return stoi(line.substr(pos));
7 } 30 }
8 31
9 int getSmartAttrID(string line) { 32 int getSmartAttrID(string line) {
@@ -88,7 +111,7 @@ int evalStatus(const char* disk, int driveType, string *status) { @@ -88,7 +111,7 @@ int evalStatus(const char* disk, int driveType, string *status) {
88 continue; 111 continue;
89 } 112 }
90 if(getSmartAttrID(line) == id) { 113 if(getSmartAttrID(line) == id) {
91 - attrMap[id].value = getSmartAttrValue(line); 114 + attrMap[id].value = getSmartAttrValue(line, attrMap[id].col);
92 } 115 }
93 } 116 }
94 } 117 }
@@ -107,11 +130,21 @@ int evalStatus(const char* disk, int driveType, string *status) { @@ -107,11 +130,21 @@ int evalStatus(const char* disk, int driveType, string *status) {
107 } 130 }
108 131
109 int veredict = 0; 132 int veredict = 0;
110 - if(attr.value > attr.threshold_warn) {  
111 - veredict = WARN;  
112 - }  
113 - if(attr.threshold_crit != -1 && attr.value > attr.threshold_crit) {  
114 - veredict = CRIT; 133 +
  134 + if(attr.lower_than) {
  135 + if(attr.value < attr.threshold_warn) {
  136 + veredict = WARN;
  137 + }
  138 + if(attr.threshold_crit != -1 && attr.value < attr.threshold_crit) {
  139 + veredict = CRIT;
  140 + }
  141 + } else {
  142 + if(attr.value > attr.threshold_warn) {
  143 + veredict = WARN;
  144 + }
  145 + if(attr.threshold_crit != -1 && attr.value > attr.threshold_crit) {
  146 + veredict = CRIT;
  147 + }
115 } 148 }
116 149
117 switch(veredict) { 150 switch(veredict) {
@@ -183,7 +216,7 @@ int main(int argc, char **argv) { @@ -183,7 +216,7 @@ int main(int argc, char **argv) {
183 switch(c) { 216 switch(c) {
184 case 'h': 217 case 'h':
185 printHelp(true); 218 printHelp(true);
186 - return OK; 219 + return OK;
187 case 'V': 220 case 'V':
188 printVersion(); 221 printVersion();
189 return OK; 222 return OK;
check_smart/check_smart.h 100644 → 100755
@@ -48,6 +48,8 @@ struct SMARTAttr @@ -48,6 +48,8 @@ struct SMARTAttr
48 int threshold_warn; 48 int threshold_warn;
49 int threshold_crit; 49 int threshold_crit;
50 bool optional; 50 bool optional;
  51 + unsigned int col;
  52 + bool lower_than;
51 }; typedef struct SMARTAttr SMARTAttr; 53 }; typedef struct SMARTAttr SMARTAttr;
52 54
53 55
@@ -59,6 +61,8 @@ SMARTAttr reallocated = { @@ -59,6 +61,8 @@ SMARTAttr reallocated = {
59 .threshold_warn = 0, 61 .threshold_warn = 0,
60 .threshold_crit = -1, 62 .threshold_crit = -1,
61 .optional = false, 63 .optional = false,
  64 + .col = 9,
  65 + .lower_than = false,
62 }; 66 };
63 67
64 SMARTAttr pending = { 68 SMARTAttr pending = {
@@ -66,8 +70,10 @@ SMARTAttr pending = { @@ -66,8 +70,10 @@ SMARTAttr pending = {
66 .name = "Current_Pending_Sector", 70 .name = "Current_Pending_Sector",
67 .value = -1, 71 .value = -1,
68 .threshold_warn = 0, 72 .threshold_warn = 0,
69 - .threshold_crit = -1, 73 + .threshold_crit = -1,
70 .optional = false, 74 .optional = false,
  75 + .col = 9,
  76 + .lower_than = false,
71 }; 77 };
72 78
73 SMARTAttr off_uncorrect = { 79 SMARTAttr off_uncorrect = {
@@ -77,24 +83,30 @@ SMARTAttr off_uncorrect = { @@ -77,24 +83,30 @@ SMARTAttr off_uncorrect = {
77 .threshold_warn = 0, 83 .threshold_warn = 0,
78 .threshold_crit = 0, 84 .threshold_crit = 0,
79 .optional = false, 85 .optional = false,
  86 + .col = 9,
  87 + .lower_than = false,
80 }; 88 };
81 89
82 SMARTAttr wear = { 90 SMARTAttr wear = {
83 .id = WEAR_COUNT_ID, 91 .id = WEAR_COUNT_ID,
84 .name = "Wear_Leveling_Count", 92 .name = "Wear_Leveling_Count",
85 .value = -1, 93 .value = -1,
86 - .threshold_warn = 80,  
87 - .threshold_crit = 90, 94 + .threshold_warn = 20,
  95 + .threshold_crit = 10,
88 .optional = true, 96 .optional = true,
  97 + .col = 3,
  98 + .lower_than = true,
89 }; 99 };
90 100
91 SMARTAttr wearout = { 101 SMARTAttr wearout = {
92 .id = MEDIA_WEAROUT_ID, 102 .id = MEDIA_WEAROUT_ID,
93 .name = "Media_Wearout_Indicator", 103 .name = "Media_Wearout_Indicator",
94 .value = -1, 104 .value = -1,
95 - .threshold_warn = 80,  
96 - .threshold_crit = 90, 105 + .threshold_warn = 20,
  106 + .threshold_crit = 10,
97 .optional = true, 107 .optional = true,
  108 + .col = 3,
  109 + .lower_than = true,
98 }; 110 };
99 111
100 SMARTAttr badblocks = { 112 SMARTAttr badblocks = {
@@ -104,6 +116,8 @@ SMARTAttr badblocks = { @@ -104,6 +116,8 @@ SMARTAttr badblocks = {
104 .threshold_warn = 0, 116 .threshold_warn = 0,
105 .threshold_crit = 0, 117 .threshold_crit = 0,
106 .optional = false, 118 .optional = false,
  119 + .col = 9,
  120 + .lower_than = false,
107 }; 121 };
108 122
109 SMARTAttr rep_uncorrect = { 123 SMARTAttr rep_uncorrect = {
@@ -113,6 +127,8 @@ SMARTAttr rep_uncorrect = { @@ -113,6 +127,8 @@ SMARTAttr rep_uncorrect = {
113 .threshold_warn = 0, 127 .threshold_warn = 0,
114 .threshold_crit = -1, 128 .threshold_crit = -1,
115 .optional = false, 129 .optional = false,
  130 + .col = 9,
  131 + .lower_than = false,
116 }; 132 };
117 133
118 map<int,SMARTAttr> prepareAttrMap(int driveType); 134 map<int,SMARTAttr> prepareAttrMap(int driveType);
check_systemd/check_systemd.sh 0 → 100755
  1 +#!/bin/bash
  2 +
  3 +UNIT=$1
  4 +
  5 +systemctl status $UNIT > /dev/null
  6 +RET=$?
  7 +
  8 +if [[ $RET != 0 ]]; then
  9 + echo "SERVICE $UNIT - CRITICAL: Not running"
  10 + exit 2
  11 +fi
  12 +
  13 +echo "SERVICE $UNIT - OK"
  14 +exit 0
check_zfs/check_zfs.sh 0 → 100755
  1 +#!/bin/bash
  2 +
  3 +function smart_disks() {
  4 + if [[ $# != 1 ]]; then
  5 + echo "Wrong number of arguments"
  6 + return 3
  7 + fi
  8 +
  9 + POOL=$1
  10 + DISKS=$(zpool status -P $POOL | sed -E 's/\t/ /g' | sed -E 's/[ ]+/ /g' | sed -E 's/^ //g' | grep "^/dev" | cut -d' ' -f 1)
  11 + ERR_OUTPUT=""
  12 + HIGHEST_RES=0
  13 + for disk in $DISKS; do
  14 + OUTPUT=$(/usr/lib64/nagios/plugins/check_smart $disk)
  15 + RES=$?
  16 + if [[ $RES != 0 ]]; then
  17 + ERR_OUTPUT="$ERR_OUTPUT - $OUTPUT"
  18 + fi
  19 + if [[ $RES -gt $HIGHEST_RES ]]; then
  20 + HIGHEST_RES=$RES
  21 + fi
  22 + done
  23 +
  24 + echo -n "ZFS POOL SMART $POOL "
  25 + if [[ $HIGHEST_RES == 1 ]]; then
  26 + echo "WARNING${ERR_OUTPUT}"
  27 + return 1
  28 + elif [[ $HIGHEST_RES == 2 ]]; then
  29 + echo "CRITICAL${ERR_OUTPUT}"
  30 + return 2
  31 + elif [[ $HIGHEST_RES == 3 ]]; then
  32 + echo "UNKNOWN${ERR_OUTPUT}"
  33 + return 3
  34 + fi
  35 +
  36 + echo "OK"
  37 + return 0
  38 +}
  39 +
  40 +function pool_errors() {
  41 + if [[ $# != 1 ]]; then
  42 + echo "Wrong number of arguments"
  43 + return 3
  44 + fi
  45 +
  46 + OUTPUT="$(zpool status $1)"
  47 + if [[ $? != 0 ]]; then
  48 + return 3
  49 + fi
  50 +
  51 + STATUS=$(echo "$OUTPUT" | grep -P "^\s*state:" | sed 's/ //g' | grep -P "^\s*state" | cut -d ':' -f 2)
  52 + if [[ $? != 0 ]]; then
  53 + return 3
  54 + fi
  55 +
  56 + ERRORS=$(echo "$OUTPUT" | grep -P "^errors:")
  57 + if [[ $? != 0 ]]; then
  58 + return 3
  59 + fi
  60 +
  61 + if [[ $STATUS != "ONLINE" ]]; then
  62 + echo "ZFS POOL ERRORS $1 - CRITICAL: status: $STATUS"
  63 + return 2
  64 + fi
  65 +
  66 + if [[ $ERRORS != "errors: No known data errors" ]]; then
  67 + echo "ZFS POOL ERRORS $1 - WARNING: $ERRORS"
  68 + return 1
  69 + fi
  70 +
  71 + echo "ZFS POOL ERRORS $1 - OK"
  72 + return 0
  73 +}
  74 +
  75 +function pool_free() {
  76 + if [[ $# != 7 ]]; then
  77 + echo "Wrong number of arguments"
  78 + return 3
  79 + fi
  80 + POOL=$3
  81 +
  82 + FREE_RAW=$(zfs list -po available $POOL | tail -n 1)
  83 + if [[ $? != 0 ]]; then
  84 + return 3
  85 + fi
  86 +
  87 + USED_RAW=$(zfs list -po used $POOL | tail -n 1)
  88 + if [[ $? != 0 ]]; then
  89 + return 3
  90 + fi
  91 + TOTAL_RAW=$((USED_RAW + FREE_RAW))
  92 + TOTAL=$(echo "scale=2; $TOTAL_RAW / (1024^3)" | bc)
  93 + FREE=$(echo "scale=2; $FREE_RAW / (1024^3)" | bc)
  94 + PERC_FREE=$(echo "scale=2; 100*$FREE_RAW/$TOTAL_RAW" | bc)
  95 +
  96 + CRITICAL="0"
  97 + WARNING="0"
  98 + TEMP=$(getopt -o c:w: -- "$@")
  99 + eval set -- "$TEMP"
  100 +
  101 + for opt; do
  102 + case "$opt" in
  103 + -c) CRITICAL=$2; shift 2 ;;
  104 + -w) WARNING=$2; shift 2 ;;
  105 + esac
  106 + done
  107 +
  108 + if [[ $CRITICAL == "0" || $WARNING == "0" ]]; then
  109 + echo "Missing -c or -w arguments"
  110 + return 3
  111 + fi
  112 +
  113 + if [[ $(echo "$PERC_FREE < $CRITICAL" | bc) == 1 ]]; then
  114 + echo "ZFS POOL FREE $POOL - CRITICAL: ${PERC_FREE}% ${FREE}/${TOTAL} GB"
  115 + return 2
  116 + elif [[ $(echo "$PERC_FREE < $WARNING" | bc) == 1 ]]; then
  117 + echo "ZFS POOL FREE $POOL - WARNING: ${PERC_FREE}% ${FREE}/${TOTAL} GB"
  118 + return 2
  119 + fi
  120 +
  121 + echo "ZFS POOL FREE $POOL - OK: ${PERC_FREE}% ${FREE}/${TOTAL} GB"
  122 + return 0
  123 +}
  124 +
  125 +if [[ $# -lt 2 ]]; then
  126 + echo "Wrong number of arguments"
  127 + exit 3
  128 +fi
  129 +
  130 +ACTION=$1
  131 +POOL=$2
  132 +
  133 +if [[ $ACTION == "smart" ]]; then
  134 + smart_disks $POOL
  135 + exit $?
  136 +elif [[ $ACTION == "errors" ]]; then
  137 + pool_errors $POOL
  138 + exit $?
  139 +elif [[ $ACTION == "free" ]]; then
  140 + pool_free $POOL $@
  141 + exit $?
  142 +fi
  143 +echo "Unknown command: $ACTION"
  144 +exit 3