Commit fdd9c743674d9c06a68cef0ffbe7f4b0d5b599ab

Authored by Imanol-Mikel Barba Sabariego
1 parent 75ead3b5

Adding wear leveling changes to check_smart. Adding check_systemd and check_zfs

check_smart/auxiliar.cpp 100644 โ†’ 100755
... ... @@ -52,4 +52,4 @@ int exec(string cmd, string *output)
52 52 }
53 53 }
54 54 return pclose(pipe)/256;
55   -}
  55 +}
56 56 \ No newline at end of file
... ...
check_smart/auxiliar.h 100644 โ†’ 100755
check_smart/check_smart.cpp
1 1 #include "check_smart.h"
  2 +#include <regex>
2 3  
3 4 const char *servicename = (const char*)"SMART";
4 5  
5   -int getSmartAttrValue(string line) {
6   - return stoi(line.substr(line.find_last_of(" ")+1));
  6 +int getSmartAttrValue(string line, unsigned int col) {
  7 + line = std::regex_replace(line, std::regex("\\s+"), " ");
  8 + line = std::regex_replace(line, std::regex("^ "), "");
  9 +
  10 + // Find nth col
  11 + size_t pos = 0;
  12 + int cnt = 0;
  13 +
  14 + while(cnt != col) {
  15 + pos = line.find(" ", pos+1);
  16 + if (pos == std::string::npos) {
  17 + throw std::runtime_error("Column out of range");
  18 + }
  19 + cnt++;
  20 + }
  21 + if(pos != 0) {
  22 + ++pos;
  23 + }
  24 +
  25 + size_t end_pos = line.find(" ", pos);
  26 + if(end_pos != std::string::npos) {
  27 + return stoi(line.substr(pos, line.find(" ", pos)));
  28 + }
  29 + return stoi(line.substr(pos));
7 30 }
8 31  
9 32 int getSmartAttrID(string line) {
... ... @@ -88,7 +111,7 @@ int evalStatus(const char* disk, int driveType, string *status) {
88 111 continue;
89 112 }
90 113 if(getSmartAttrID(line) == id) {
91   - attrMap[id].value = getSmartAttrValue(line);
  114 + attrMap[id].value = getSmartAttrValue(line, attrMap[id].col);
92 115 }
93 116 }
94 117 }
... ... @@ -107,11 +130,21 @@ int evalStatus(const char* disk, int driveType, string *status) {
107 130 }
108 131  
109 132 int veredict = 0;
110   - if(attr.value > attr.threshold_warn) {
111   - veredict = WARN;
112   - }
113   - if(attr.threshold_crit != -1 && attr.value > attr.threshold_crit) {
114   - veredict = CRIT;
  133 +
  134 + if(attr.lower_than) {
  135 + if(attr.value < attr.threshold_warn) {
  136 + veredict = WARN;
  137 + }
  138 + if(attr.threshold_crit != -1 && attr.value < attr.threshold_crit) {
  139 + veredict = CRIT;
  140 + }
  141 + } else {
  142 + if(attr.value > attr.threshold_warn) {
  143 + veredict = WARN;
  144 + }
  145 + if(attr.threshold_crit != -1 && attr.value > attr.threshold_crit) {
  146 + veredict = CRIT;
  147 + }
115 148 }
116 149  
117 150 switch(veredict) {
... ... @@ -183,7 +216,7 @@ int main(int argc, char **argv) {
183 216 switch(c) {
184 217 case 'h':
185 218 printHelp(true);
186   - return OK;
  219 + return OK;
187 220 case 'V':
188 221 printVersion();
189 222 return OK;
... ...
check_smart/check_smart.h 100644 โ†’ 100755
... ... @@ -48,6 +48,8 @@ struct SMARTAttr
48 48 int threshold_warn;
49 49 int threshold_crit;
50 50 bool optional;
  51 + unsigned int col;
  52 + bool lower_than;
51 53 }; typedef struct SMARTAttr SMARTAttr;
52 54  
53 55  
... ... @@ -59,6 +61,8 @@ SMARTAttr reallocated = {
59 61 .threshold_warn = 0,
60 62 .threshold_crit = -1,
61 63 .optional = false,
  64 + .col = 9,
  65 + .lower_than = false,
62 66 };
63 67  
64 68 SMARTAttr pending = {
... ... @@ -66,8 +70,10 @@ SMARTAttr pending = {
66 70 .name = "Current_Pending_Sector",
67 71 .value = -1,
68 72 .threshold_warn = 0,
69   - .threshold_crit = -1,
  73 + .threshold_crit = -1,
70 74 .optional = false,
  75 + .col = 9,
  76 + .lower_than = false,
71 77 };
72 78  
73 79 SMARTAttr off_uncorrect = {
... ... @@ -77,24 +83,30 @@ SMARTAttr off_uncorrect = {
77 83 .threshold_warn = 0,
78 84 .threshold_crit = 0,
79 85 .optional = false,
  86 + .col = 9,
  87 + .lower_than = false,
80 88 };
81 89  
82 90 SMARTAttr wear = {
83 91 .id = WEAR_COUNT_ID,
84 92 .name = "Wear_Leveling_Count",
85 93 .value = -1,
86   - .threshold_warn = 80,
87   - .threshold_crit = 90,
  94 + .threshold_warn = 20,
  95 + .threshold_crit = 10,
88 96 .optional = true,
  97 + .col = 3,
  98 + .lower_than = true,
89 99 };
90 100  
91 101 SMARTAttr wearout = {
92 102 .id = MEDIA_WEAROUT_ID,
93 103 .name = "Media_Wearout_Indicator",
94 104 .value = -1,
95   - .threshold_warn = 80,
96   - .threshold_crit = 90,
  105 + .threshold_warn = 20,
  106 + .threshold_crit = 10,
97 107 .optional = true,
  108 + .col = 3,
  109 + .lower_than = true,
98 110 };
99 111  
100 112 SMARTAttr badblocks = {
... ... @@ -104,6 +116,8 @@ SMARTAttr badblocks = {
104 116 .threshold_warn = 0,
105 117 .threshold_crit = 0,
106 118 .optional = false,
  119 + .col = 9,
  120 + .lower_than = false,
107 121 };
108 122  
109 123 SMARTAttr rep_uncorrect = {
... ... @@ -113,6 +127,8 @@ SMARTAttr rep_uncorrect = {
113 127 .threshold_warn = 0,
114 128 .threshold_crit = -1,
115 129 .optional = false,
  130 + .col = 9,
  131 + .lower_than = false,
116 132 };
117 133  
118 134 map<int,SMARTAttr> prepareAttrMap(int driveType);
... ...
check_systemd/check_systemd.sh 0 โ†’ 100755
  1 +#!/bin/bash
  2 +
  3 +UNIT=$1
  4 +
  5 +systemctl status $UNIT > /dev/null
  6 +RET=$?
  7 +
  8 +if [[ $RET != 0 ]]; then
  9 + echo "SERVICE $UNIT - CRITICAL: Not running"
  10 + exit 2
  11 +fi
  12 +
  13 +echo "SERVICE $UNIT - OK"
  14 +exit 0
... ...
check_zfs/check_zfs.sh 0 โ†’ 100755
  1 +#!/bin/bash
  2 +
  3 +function smart_disks() {
  4 + if [[ $# != 1 ]]; then
  5 + echo "Wrong number of arguments"
  6 + return 3
  7 + fi
  8 +
  9 + POOL=$1
  10 + DISKS=$(zpool status -P $POOL | sed -E 's/\t/ /g' | sed -E 's/[ ]+/ /g' | sed -E 's/^ //g' | grep "^/dev" | cut -d' ' -f 1)
  11 + ERR_OUTPUT=""
  12 + HIGHEST_RES=0
  13 + for disk in $DISKS; do
  14 + OUTPUT=$(/usr/lib64/nagios/plugins/check_smart $disk)
  15 + RES=$?
  16 + if [[ $RES != 0 ]]; then
  17 + ERR_OUTPUT="$ERR_OUTPUT - $OUTPUT"
  18 + fi
  19 + if [[ $RES -gt $HIGHEST_RES ]]; then
  20 + HIGHEST_RES=$RES
  21 + fi
  22 + done
  23 +
  24 + echo -n "ZFS POOL SMART $POOL "
  25 + if [[ $HIGHEST_RES == 1 ]]; then
  26 + echo "WARNING${ERR_OUTPUT}"
  27 + return 1
  28 + elif [[ $HIGHEST_RES == 2 ]]; then
  29 + echo "CRITICAL${ERR_OUTPUT}"
  30 + return 2
  31 + elif [[ $HIGHEST_RES == 3 ]]; then
  32 + echo "UNKNOWN${ERR_OUTPUT}"
  33 + return 3
  34 + fi
  35 +
  36 + echo "OK"
  37 + return 0
  38 +}
  39 +
  40 +function pool_errors() {
  41 + if [[ $# != 1 ]]; then
  42 + echo "Wrong number of arguments"
  43 + return 3
  44 + fi
  45 +
  46 + OUTPUT="$(zpool status $1)"
  47 + if [[ $? != 0 ]]; then
  48 + return 3
  49 + fi
  50 +
  51 + STATUS=$(echo "$OUTPUT" | grep -P "^\s*state:" | sed 's/ //g' | grep -P "^\s*state" | cut -d ':' -f 2)
  52 + if [[ $? != 0 ]]; then
  53 + return 3
  54 + fi
  55 +
  56 + ERRORS=$(echo "$OUTPUT" | grep -P "^errors:")
  57 + if [[ $? != 0 ]]; then
  58 + return 3
  59 + fi
  60 +
  61 + if [[ $STATUS != "ONLINE" ]]; then
  62 + echo "ZFS POOL ERRORS $1 - CRITICAL: status: $STATUS"
  63 + return 2
  64 + fi
  65 +
  66 + if [[ $ERRORS != "errors: No known data errors" ]]; then
  67 + echo "ZFS POOL ERRORS $1 - WARNING: $ERRORS"
  68 + return 1
  69 + fi
  70 +
  71 + echo "ZFS POOL ERRORS $1 - OK"
  72 + return 0
  73 +}
  74 +
  75 +function pool_free() {
  76 + if [[ $# != 7 ]]; then
  77 + echo "Wrong number of arguments"
  78 + return 3
  79 + fi
  80 + POOL=$3
  81 +
  82 + FREE_RAW=$(zfs list -po available $POOL | tail -n 1)
  83 + if [[ $? != 0 ]]; then
  84 + return 3
  85 + fi
  86 +
  87 + USED_RAW=$(zfs list -po used $POOL | tail -n 1)
  88 + if [[ $? != 0 ]]; then
  89 + return 3
  90 + fi
  91 + TOTAL_RAW=$((USED_RAW + FREE_RAW))
  92 + TOTAL=$(echo "scale=2; $TOTAL_RAW / (1024^3)" | bc)
  93 + FREE=$(echo "scale=2; $FREE_RAW / (1024^3)" | bc)
  94 + PERC_FREE=$(echo "scale=2; 100*$FREE_RAW/$TOTAL_RAW" | bc)
  95 +
  96 + CRITICAL="0"
  97 + WARNING="0"
  98 + TEMP=$(getopt -o c:w: -- "$@")
  99 + eval set -- "$TEMP"
  100 +
  101 + for opt; do
  102 + case "$opt" in
  103 + -c) CRITICAL=$2; shift 2 ;;
  104 + -w) WARNING=$2; shift 2 ;;
  105 + esac
  106 + done
  107 +
  108 + if [[ $CRITICAL == "0" || $WARNING == "0" ]]; then
  109 + echo "Missing -c or -w arguments"
  110 + return 3
  111 + fi
  112 +
  113 + if [[ $(echo "$PERC_FREE < $CRITICAL" | bc) == 1 ]]; then
  114 + echo "ZFS POOL FREE $POOL - CRITICAL: ${PERC_FREE}% ${FREE}/${TOTAL} GB"
  115 + return 2
  116 + elif [[ $(echo "$PERC_FREE < $WARNING" | bc) == 1 ]]; then
  117 + echo "ZFS POOL FREE $POOL - WARNING: ${PERC_FREE}% ${FREE}/${TOTAL} GB"
  118 + return 2
  119 + fi
  120 +
  121 + echo "ZFS POOL FREE $POOL - OK: ${PERC_FREE}% ${FREE}/${TOTAL} GB"
  122 + return 0
  123 +}
  124 +
  125 +if [[ $# -lt 2 ]]; then
  126 + echo "Wrong number of arguments"
  127 + exit 3
  128 +fi
  129 +
  130 +ACTION=$1
  131 +POOL=$2
  132 +
  133 +if [[ $ACTION == "smart" ]]; then
  134 + smart_disks $POOL
  135 + exit $?
  136 +elif [[ $ACTION == "errors" ]]; then
  137 + pool_errors $POOL
  138 + exit $?
  139 +elif [[ $ACTION == "free" ]]; then
  140 + pool_free $POOL $@
  141 + exit $?
  142 +fi
  143 +echo "Unknown command: $ACTION"
  144 +exit 3
... ...