Commit 55296ee690a271802e647a19e7e699b9bf6eea30

Authored by Imanol-Mikel Barba Sabariego
1 parent 5f26e3fa

Adding check_nvme

CMakeLists.txt
... ... @@ -18,6 +18,9 @@ add_executable(check_memfree_freebsd ${SOURCE_FILES_MEMFREE_FREEBSD})
18 18 set(SOURCE_FILES_SMART check_smart/check_smart.cpp check_smart/auxiliar.cpp)
19 19 add_executable(check_smart ${SOURCE_FILES_SMART})
20 20  
  21 +set(SOURCE_FILES_NVME check_nvme/check_nvme.cpp check_nvme/auxiliar.cpp)
  22 +add_executable(check_nvme ${SOURCE_FILES_NVME})
  23 +
21 24 find_library(libsensors libsensors.so)
22 25 set(SOURCE_FILES_SENSORS check_sensors/check_sensors.cpp check_sensors/auxiliar.cpp)
23 26 add_executable(check_sensors ${SOURCE_FILES_SENSORS})
... ...
check_nvme/README.md 0 → 100644
  1 +```
  2 +check_smart v1.0
  3 +
  4 +Checks for pending, reallocated or uncorrectable sectors in disks using SMART
  5 +WARNING: Requires the setuid bit to be set to run as root
  6 +
  7 +Usage:
  8 +check_smart [-hV] DISKS...
  9 +
  10 +Options:
  11 + -h
  12 + Print detailed help screen
  13 + -V
  14 + Print version information
  15 + DISKS
  16 + Disks for which to retrieve S.M.A.R.T data
  17 +```
... ...
check_nvme/auxiliar.cpp 0 → 100755
  1 +//
  2 +// Created by Imanol on 28-may-16.
  3 +//
  4 +
  5 +#include "auxiliar.h"
  6 +
  7 +void timer_handler (int signum)
  8 +{
  9 + if(signum == SIGVTALRM)
  10 + {
  11 + cout << servicename << " CRITICAL - timeout occurred" << endl;
  12 + exit(2);
  13 + }
  14 +}
  15 +
  16 +int str2int(string str)
  17 +{
  18 + int num;
  19 + stringstream sstream;
  20 + sstream << str;
  21 + if(!(sstream >> num))
  22 + {
  23 + throw integerConversionException("Integer conversion error");
  24 + }
  25 + return num;
  26 +}
  27 +
  28 +string int2str(int x)
  29 +{
  30 + string str;
  31 + stringstream sstream;
  32 + sstream << x;
  33 + sstream >> str;
  34 + return str;
  35 +}
  36 +
  37 +int exec(string cmd, string *output)
  38 +{
  39 + *output = "";
  40 + FILE* pipe = popen(cmd.c_str(), "r");
  41 + if (!pipe)
  42 + {
  43 + cout << "Error opening child process" << endl;
  44 + exit(3);
  45 + }
  46 + char buffer[128];
  47 + while(!feof(pipe))
  48 + {
  49 + if(fgets(buffer, 128, pipe) != NULL)
  50 + {
  51 + *output += buffer;
  52 + }
  53 + }
  54 + return pclose(pipe)/256;
  55 +}
0 56 \ No newline at end of file
... ...
check_nvme/auxiliar.h 0 → 100755
  1 +//
  2 +// Created by Imanol on 28-may-16.
  3 +//
  4 +
  5 +#ifndef NAGIOS_PLUGINS_AUXILIAR_H
  6 +#define NAGIOS_PLUGINS_AUXILIAR_H
  7 +
  8 +#include <sstream>
  9 +#include <iostream>
  10 +#include <exception>
  11 +
  12 +#include <stdlib.h>
  13 +#include <stdio.h>
  14 +#include <signal.h>
  15 +
  16 +using namespace std;
  17 +
  18 +extern const char *servicename;
  19 +
  20 +int str2int(string str);
  21 +string int2str(int x);
  22 +int exec(string cmd, string *output);
  23 +void timer_handler (int signum);
  24 +
  25 +class integerConversionException : public exception
  26 +{
  27 +private:
  28 + string s;
  29 +public:
  30 + integerConversionException(std::string ss) : s(ss) {}
  31 + ~integerConversionException() throw () {}
  32 + const char* what() const throw() { return s.c_str(); }
  33 +};
  34 +
  35 +#endif //NAGIOS_PLUGINS_AUXILIAR_H
... ...
check_nvme/check_nvme.cpp 0 → 100644
  1 +#include "check_nvme.h"
  2 +#include <regex>
  3 +
  4 +const char *servicename = (const char*)"NVME";
  5 +
  6 +int getNVMeAttrValue(string line) {
  7 + line = std::regex_replace(line, std::regex("\\s+"), " ");
  8 + line = std::regex_replace(line, std::regex("^ "), "");
  9 +
  10 + return stoi(line.substr(line.find(":")+1));
  11 +}
  12 +
  13 +std::string getNVMeAttrName(string line) {
  14 + size_t first = line.find_first_not_of(' ');
  15 + size_t last = line.find_first_of(':',first);
  16 + return line.substr(first,last-first);
  17 +}
  18 +
  19 +int evalStatus(const char* disk, string *status) {
  20 + string output = "";
  21 + string line = "";
  22 +
  23 + int rc = run_smartctl_cmd(string(SMARTCTL_CMD_ATTRS), disk,&output);
  24 + if(rc) {
  25 + cout << "Error reading SMART data from disk " << disk << endl;
  26 + exit(UNKN);
  27 + }
  28 +
  29 + map<std::string,NVMeAttr> attrMap;
  30 + attrMap[AVAILABLE_SPARE] = spare;
  31 + attrMap[MEDIA_ERRORS] = errors;
  32 + attrMap[PERCENTAGE_USED] = used;
  33 +
  34 + stringstream sstream;
  35 + sstream.str(output);
  36 + while(getline(sstream,line)) {
  37 + if(line == "") {
  38 + continue;
  39 + }
  40 + for(map<std::string,NVMeAttr>::iterator it = attrMap.begin(); it != attrMap.end(); ++it) {
  41 + std::string name = it->first;
  42 + if(getNVMeAttrName(line) == name) {
  43 + attrMap[name].value = getNVMeAttrValue(line);
  44 + }
  45 + }
  46 + }
  47 +
  48 + int ret = OK;
  49 + *status = string(disk);
  50 + for(map<std::string, NVMeAttr>::iterator it = attrMap.begin(); it != attrMap.end(); ++it) {
  51 + NVMeAttr attr = it->second;
  52 +
  53 + if(attr.value == -1) {
  54 + if(attr.optional) {
  55 + continue;
  56 + }
  57 + *status = string(disk) + " status UNKNOWN";
  58 + return UNKN;
  59 + }
  60 +
  61 + int veredict = 0;
  62 +
  63 + if(attr.lower_than) {
  64 + if(attr.value < attr.threshold_warn) {
  65 + veredict = WARN;
  66 + }
  67 + if(attr.threshold_crit != -1 && attr.value < attr.threshold_crit) {
  68 + veredict = CRIT;
  69 + }
  70 + } else {
  71 + if(attr.value > attr.threshold_warn) {
  72 + veredict = WARN;
  73 + }
  74 + if(attr.threshold_crit != -1 && attr.value > attr.threshold_crit) {
  75 + veredict = CRIT;
  76 + }
  77 + }
  78 +
  79 + switch(veredict) {
  80 + case OK:
  81 + break;
  82 + case WARN:
  83 + if(ret == OK) {
  84 + ret = WARN;
  85 + }
  86 + break;
  87 + case CRIT:
  88 + ret = CRIT;
  89 + break;
  90 + }
  91 + *status += " " + attr.name + ":" + to_string(attr.value);
  92 + }
  93 + return ret;
  94 +}
  95 +
  96 +int run_smartctl_cmd(const string command, const char* disk, string* output) {
  97 + int uid = getuid();
  98 + setreuid(0,0);
  99 +
  100 + int rc = exec(command + disk,output);
  101 +
  102 + setreuid(uid,0);
  103 + return rc;
  104 +}
  105 +
  106 +void printVersion() {
  107 + cout << "check_nvme v" << VERSION << endl << endl;
  108 +}
  109 +
  110 +void printHelp(bool longVersion) {
  111 + if(longVersion) {
  112 + printVersion();
  113 + cout << "Checks for pending, reallocated or uncorrectable sectors in NVMe disks" << endl << "WARNING: Requires the setuid bit to be set to run as root" << endl << endl;
  114 + printHelp(false);
  115 + cout << "Options:" << endl;
  116 + cout << " -h" << endl;
  117 + cout << " Print detailed help screen" << endl;
  118 + cout << " -V" << endl;
  119 + cout << " Print version information" << endl;
  120 + cout << " DISKS" << endl;
  121 + cout << " Disks for which to retrieve S.M.A.R.T data" << endl << endl;
  122 + return;
  123 + }
  124 + cout << "Usage: " << endl << "check_smart [-hV] DISKS..." << endl << endl;
  125 +}
  126 +
  127 +void set_timeout(unsigned int sec) {
  128 + struct itimerval timer;
  129 + timer.it_value.tv_sec = sec;
  130 + timer.it_value.tv_usec = 0;
  131 + timer.it_interval.tv_sec = 0;
  132 + timer.it_interval.tv_usec = 0;
  133 + setitimer (ITIMER_VIRTUAL, &timer, 0);
  134 +
  135 + struct sigaction sa;
  136 + memset (&sa, 0, sizeof (sa));
  137 + sa.sa_handler = &timer_handler;
  138 + sigaction (SIGVTALRM, &sa, 0);
  139 +}
  140 +
  141 +int main(int argc, char **argv) {
  142 + set_timeout(10);
  143 + int c;
  144 + while ((c = getopt (argc, argv, "Vh")) != -1) {
  145 + switch(c) {
  146 + case 'h':
  147 + printHelp(true);
  148 + return OK;
  149 + case 'V':
  150 + printVersion();
  151 + return OK;
  152 + case '?':
  153 + printHelp(false);
  154 + return UNKN;
  155 + }
  156 + }
  157 +
  158 + if(argc == 1) {
  159 + cout << "No disks checked" << endl;
  160 + return 3;
  161 + }
  162 +
  163 + string *results = new string[argc-1];
  164 + int returnCode = OK;
  165 +
  166 + for(int i = 1; i < argc; i++) {
  167 + switch(evalStatus(argv[i],&(results[i-1]))) {
  168 + case OK:
  169 + break;
  170 + case WARN:
  171 + if(returnCode == OK) {returnCode = WARN;}
  172 + break;
  173 + case CRIT:
  174 + returnCode = CRIT;
  175 + break;
  176 + case UNKN:
  177 + returnCode = UNKN;
  178 + break;
  179 + }
  180 + }
  181 +
  182 + cout << servicename;
  183 + if(returnCode == OK) {cout << " OK - disk status: ";}
  184 + else if(returnCode == WARN) {cout << " WARNING - disk status: ";}
  185 + else if(returnCode == CRIT) {cout << " CRITICAL - disk status: ";}
  186 + else if(returnCode == UNKN) {cout << " UNKNOWN - disk status: ";}
  187 + for(int i = 0; i < (argc-1); i++) {
  188 + cout << results[i];
  189 + if(i != (argc-2)) {cout << ", ";}
  190 + }
  191 + cout << endl;
  192 + return returnCode;
  193 +}
... ...
check_nvme/check_nvme.h 0 → 100755
  1 +#ifndef CHECK_NVME_H
  2 +#define CHECK_NVME_H
  3 +
  4 +#include <iostream>
  5 +#include <sstream>
  6 +#include <map>
  7 +
  8 +#include <stdio.h>
  9 +#include <string.h>
  10 +#include <stdlib.h>
  11 +#include <unistd.h>
  12 +
  13 +#include <sys/time.h>
  14 +
  15 +#include "auxiliar.h"
  16 +
  17 +#define OK 0
  18 +#define WARN 1
  19 +#define CRIT 2
  20 +#define UNKN 3
  21 +
  22 +#define VERSION "1.0"
  23 +
  24 +#define SMARTCTL_CMD_ATTRS "/usr/sbin/smartctl -A "
  25 +#define SMARTCTL_CMD_INFO "/usr/sbin/smartctl -i "
  26 +
  27 +#define ROTATION_INFO_STR "Rotation Rate:"
  28 +#define SSD_DEV_STR "Solid State Device"
  29 +
  30 +#define AVAILABLE_SPARE "Available Spare"
  31 +#define MEDIA_ERRORS "Media and Data Integrity Errors"
  32 +#define PERCENTAGE_USED "Percentage Used"
  33 +
  34 +using namespace std;
  35 +
  36 +struct NVMeAttr
  37 +{
  38 + string name;
  39 + int value;
  40 + int threshold_warn;
  41 + int threshold_crit;
  42 + bool optional;
  43 + bool lower_than;
  44 +}; typedef struct NVMeAttr NVMeAttr;
  45 +
  46 +
  47 +// Attribute definitions
  48 +NVMeAttr used = {
  49 + .name = PERCENTAGE_USED,
  50 + .value = -1,
  51 + .threshold_warn = 80,
  52 + .threshold_crit = 90,
  53 + .optional = false,
  54 + .lower_than = false,
  55 +};
  56 +
  57 +NVMeAttr spare = {
  58 + .name = AVAILABLE_SPARE,
  59 + .value = -1,
  60 + .threshold_warn = 20,
  61 + .threshold_crit = 10,
  62 + .optional = false,
  63 + .lower_than = true,
  64 +};
  65 +
  66 +NVMeAttr errors = {
  67 + .name = MEDIA_ERRORS,
  68 + .value = -1,
  69 + .threshold_warn = 0,
  70 + .threshold_crit = 0,
  71 + .optional = false,
  72 + .lower_than = false,
  73 +};
  74 +
  75 +map<std::string,NVMeAttr> prepareAttrMap(int driveType);
  76 +int getNVMeAttrValue(string line);
  77 +std::string getNVMeAttrName(string line);
  78 +int run_smartctl_cmd(const string command, const char* disk, string* output);
  79 +int evalStatus(const char* disk, int driveType, string* status);
  80 +void printVersion();
  81 +void printHelp(bool longVersion);
  82 +void set_timeout(unsigned int sec);
  83 +
  84 +#endif
... ...
check_zfs/check_zfs.sh
... ... @@ -11,8 +11,15 @@ function smart_disks() {
11 11 ERR_OUTPUT=""
12 12 HIGHEST_RES=0
13 13 for disk in $DISKS; do
14   - OUTPUT=$(/usr/lib64/nagios/plugins/check_smart $disk)
15   - RES=$?
  14 + OUTPUT=""
  15 + RES=0
  16 + if echo $disk | grep "nvme" > /dev/null; then
  17 + OUTPUT=$(/usr/lib64/nagios/plugins/check_nvme $disk)
  18 + RES=$?
  19 + else
  20 + OUTPUT=$(/usr/lib64/nagios/plugins/check_smart $disk)
  21 + RES=$?
  22 + fi
16 23 if [[ $RES != 0 ]]; then
17 24 ERR_OUTPUT="$ERR_OUTPUT - $OUTPUT"
18 25 fi
... ...