From 55296ee690a271802e647a19e7e699b9bf6eea30 Mon Sep 17 00:00:00 2001 From: Imanol-Mikel Barba Sabariego Date: Mon, 19 Feb 2024 06:08:07 +0100 Subject: [PATCH] Adding check_nvme --- CMakeLists.txt | 3 +++ check_nvme/README.md | 17 +++++++++++++++++ check_nvme/auxiliar.cpp | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ check_nvme/auxiliar.h | 35 +++++++++++++++++++++++++++++++++++ check_nvme/check_nvme.cpp | 193 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ check_nvme/check_nvme.h | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ check_zfs/check_zfs.sh | 11 +++++++++-- 7 files changed, 396 insertions(+), 2 deletions(-) create mode 100644 check_nvme/README.md create mode 100755 check_nvme/auxiliar.cpp create mode 100755 check_nvme/auxiliar.h create mode 100644 check_nvme/check_nvme.cpp create mode 100755 check_nvme/check_nvme.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 7d2decd..8bd7218 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,9 @@ add_executable(check_memfree_freebsd ${SOURCE_FILES_MEMFREE_FREEBSD}) set(SOURCE_FILES_SMART check_smart/check_smart.cpp check_smart/auxiliar.cpp) add_executable(check_smart ${SOURCE_FILES_SMART}) +set(SOURCE_FILES_NVME check_nvme/check_nvme.cpp check_nvme/auxiliar.cpp) +add_executable(check_nvme ${SOURCE_FILES_NVME}) + find_library(libsensors libsensors.so) set(SOURCE_FILES_SENSORS check_sensors/check_sensors.cpp check_sensors/auxiliar.cpp) add_executable(check_sensors ${SOURCE_FILES_SENSORS}) diff --git a/check_nvme/README.md b/check_nvme/README.md new file mode 100644 index 0000000..4208715 --- /dev/null +++ b/check_nvme/README.md @@ -0,0 +1,17 @@ +``` +check_smart v1.0 + +Checks for pending, reallocated or uncorrectable sectors in disks using SMART +WARNING: Requires the setuid bit to be set to run as root + +Usage: +check_smart [-hV] DISKS... + +Options: + -h + Print detailed help screen + -V + Print version information + DISKS + Disks for which to retrieve S.M.A.R.T data +``` diff --git a/check_nvme/auxiliar.cpp b/check_nvme/auxiliar.cpp new file mode 100755 index 0000000..9599ce0 --- /dev/null +++ b/check_nvme/auxiliar.cpp @@ -0,0 +1,55 @@ +// +// Created by Imanol on 28-may-16. +// + +#include "auxiliar.h" + +void timer_handler (int signum) +{ + if(signum == SIGVTALRM) + { + cout << servicename << " CRITICAL - timeout occurred" << endl; + exit(2); + } +} + +int str2int(string str) +{ + int num; + stringstream sstream; + sstream << str; + if(!(sstream >> num)) + { + throw integerConversionException("Integer conversion error"); + } + return num; +} + +string int2str(int x) +{ + string str; + stringstream sstream; + sstream << x; + sstream >> str; + return str; +} + +int exec(string cmd, string *output) +{ + *output = ""; + FILE* pipe = popen(cmd.c_str(), "r"); + if (!pipe) + { + cout << "Error opening child process" << endl; + exit(3); + } + char buffer[128]; + while(!feof(pipe)) + { + if(fgets(buffer, 128, pipe) != NULL) + { + *output += buffer; + } + } + return pclose(pipe)/256; +} \ No newline at end of file diff --git a/check_nvme/auxiliar.h b/check_nvme/auxiliar.h new file mode 100755 index 0000000..b68c3de --- /dev/null +++ b/check_nvme/auxiliar.h @@ -0,0 +1,35 @@ +// +// Created by Imanol on 28-may-16. +// + +#ifndef NAGIOS_PLUGINS_AUXILIAR_H +#define NAGIOS_PLUGINS_AUXILIAR_H + +#include +#include +#include + +#include +#include +#include + +using namespace std; + +extern const char *servicename; + +int str2int(string str); +string int2str(int x); +int exec(string cmd, string *output); +void timer_handler (int signum); + +class integerConversionException : public exception +{ +private: + string s; +public: + integerConversionException(std::string ss) : s(ss) {} + ~integerConversionException() throw () {} + const char* what() const throw() { return s.c_str(); } +}; + +#endif //NAGIOS_PLUGINS_AUXILIAR_H diff --git a/check_nvme/check_nvme.cpp b/check_nvme/check_nvme.cpp new file mode 100644 index 0000000..38e8e4a --- /dev/null +++ b/check_nvme/check_nvme.cpp @@ -0,0 +1,193 @@ +#include "check_nvme.h" +#include + +const char *servicename = (const char*)"NVME"; + +int getNVMeAttrValue(string line) { + line = std::regex_replace(line, std::regex("\\s+"), " "); + line = std::regex_replace(line, std::regex("^ "), ""); + + return stoi(line.substr(line.find(":")+1)); +} + +std::string getNVMeAttrName(string line) { + size_t first = line.find_first_not_of(' '); + size_t last = line.find_first_of(':',first); + return line.substr(first,last-first); +} + +int evalStatus(const char* disk, string *status) { + string output = ""; + string line = ""; + + int rc = run_smartctl_cmd(string(SMARTCTL_CMD_ATTRS), disk,&output); + if(rc) { + cout << "Error reading SMART data from disk " << disk << endl; + exit(UNKN); + } + + map attrMap; + attrMap[AVAILABLE_SPARE] = spare; + attrMap[MEDIA_ERRORS] = errors; + attrMap[PERCENTAGE_USED] = used; + + stringstream sstream; + sstream.str(output); + while(getline(sstream,line)) { + if(line == "") { + continue; + } + for(map::iterator it = attrMap.begin(); it != attrMap.end(); ++it) { + std::string name = it->first; + if(getNVMeAttrName(line) == name) { + attrMap[name].value = getNVMeAttrValue(line); + } + } + } + + int ret = OK; + *status = string(disk); + for(map::iterator it = attrMap.begin(); it != attrMap.end(); ++it) { + NVMeAttr attr = it->second; + + if(attr.value == -1) { + if(attr.optional) { + continue; + } + *status = string(disk) + " status UNKNOWN"; + return UNKN; + } + + int veredict = 0; + + if(attr.lower_than) { + if(attr.value < attr.threshold_warn) { + veredict = WARN; + } + if(attr.threshold_crit != -1 && attr.value < attr.threshold_crit) { + veredict = CRIT; + } + } else { + if(attr.value > attr.threshold_warn) { + veredict = WARN; + } + if(attr.threshold_crit != -1 && attr.value > attr.threshold_crit) { + veredict = CRIT; + } + } + + switch(veredict) { + case OK: + break; + case WARN: + if(ret == OK) { + ret = WARN; + } + break; + case CRIT: + ret = CRIT; + break; + } + *status += " " + attr.name + ":" + to_string(attr.value); + } + return ret; +} + +int run_smartctl_cmd(const string command, const char* disk, string* output) { + int uid = getuid(); + setreuid(0,0); + + int rc = exec(command + disk,output); + + setreuid(uid,0); + return rc; +} + +void printVersion() { + cout << "check_nvme v" << VERSION << endl << endl; +} + +void printHelp(bool longVersion) { + if(longVersion) { + printVersion(); + cout << "Checks for pending, reallocated or uncorrectable sectors in NVMe disks" << endl << "WARNING: Requires the setuid bit to be set to run as root" << endl << endl; + printHelp(false); + cout << "Options:" << endl; + cout << " -h" << endl; + cout << " Print detailed help screen" << endl; + cout << " -V" << endl; + cout << " Print version information" << endl; + cout << " DISKS" << endl; + cout << " Disks for which to retrieve S.M.A.R.T data" << endl << endl; + return; + } + cout << "Usage: " << endl << "check_smart [-hV] DISKS..." << endl << endl; +} + +void set_timeout(unsigned int sec) { + struct itimerval timer; + timer.it_value.tv_sec = sec; + timer.it_value.tv_usec = 0; + timer.it_interval.tv_sec = 0; + timer.it_interval.tv_usec = 0; + setitimer (ITIMER_VIRTUAL, &timer, 0); + + struct sigaction sa; + memset (&sa, 0, sizeof (sa)); + sa.sa_handler = &timer_handler; + sigaction (SIGVTALRM, &sa, 0); +} + +int main(int argc, char **argv) { + set_timeout(10); + int c; + while ((c = getopt (argc, argv, "Vh")) != -1) { + switch(c) { + case 'h': + printHelp(true); + return OK; + case 'V': + printVersion(); + return OK; + case '?': + printHelp(false); + return UNKN; + } + } + + if(argc == 1) { + cout << "No disks checked" << endl; + return 3; + } + + string *results = new string[argc-1]; + int returnCode = OK; + + for(int i = 1; i < argc; i++) { + switch(evalStatus(argv[i],&(results[i-1]))) { + case OK: + break; + case WARN: + if(returnCode == OK) {returnCode = WARN;} + break; + case CRIT: + returnCode = CRIT; + break; + case UNKN: + returnCode = UNKN; + break; + } + } + + cout << servicename; + if(returnCode == OK) {cout << " OK - disk status: ";} + else if(returnCode == WARN) {cout << " WARNING - disk status: ";} + else if(returnCode == CRIT) {cout << " CRITICAL - disk status: ";} + else if(returnCode == UNKN) {cout << " UNKNOWN - disk status: ";} + for(int i = 0; i < (argc-1); i++) { + cout << results[i]; + if(i != (argc-2)) {cout << ", ";} + } + cout << endl; + return returnCode; +} diff --git a/check_nvme/check_nvme.h b/check_nvme/check_nvme.h new file mode 100755 index 0000000..13e7115 --- /dev/null +++ b/check_nvme/check_nvme.h @@ -0,0 +1,84 @@ +#ifndef CHECK_NVME_H +#define CHECK_NVME_H + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "auxiliar.h" + +#define OK 0 +#define WARN 1 +#define CRIT 2 +#define UNKN 3 + +#define VERSION "1.0" + +#define SMARTCTL_CMD_ATTRS "/usr/sbin/smartctl -A " +#define SMARTCTL_CMD_INFO "/usr/sbin/smartctl -i " + +#define ROTATION_INFO_STR "Rotation Rate:" +#define SSD_DEV_STR "Solid State Device" + +#define AVAILABLE_SPARE "Available Spare" +#define MEDIA_ERRORS "Media and Data Integrity Errors" +#define PERCENTAGE_USED "Percentage Used" + +using namespace std; + +struct NVMeAttr +{ + string name; + int value; + int threshold_warn; + int threshold_crit; + bool optional; + bool lower_than; +}; typedef struct NVMeAttr NVMeAttr; + + +// Attribute definitions +NVMeAttr used = { + .name = PERCENTAGE_USED, + .value = -1, + .threshold_warn = 80, + .threshold_crit = 90, + .optional = false, + .lower_than = false, +}; + +NVMeAttr spare = { + .name = AVAILABLE_SPARE, + .value = -1, + .threshold_warn = 20, + .threshold_crit = 10, + .optional = false, + .lower_than = true, +}; + +NVMeAttr errors = { + .name = MEDIA_ERRORS, + .value = -1, + .threshold_warn = 0, + .threshold_crit = 0, + .optional = false, + .lower_than = false, +}; + +map prepareAttrMap(int driveType); +int getNVMeAttrValue(string line); +std::string getNVMeAttrName(string line); +int run_smartctl_cmd(const string command, const char* disk, string* output); +int evalStatus(const char* disk, int driveType, string* status); +void printVersion(); +void printHelp(bool longVersion); +void set_timeout(unsigned int sec); + +#endif diff --git a/check_zfs/check_zfs.sh b/check_zfs/check_zfs.sh index 7525a57..bafa2fb 100755 --- a/check_zfs/check_zfs.sh +++ b/check_zfs/check_zfs.sh @@ -11,8 +11,15 @@ function smart_disks() { ERR_OUTPUT="" HIGHEST_RES=0 for disk in $DISKS; do - OUTPUT=$(/usr/lib64/nagios/plugins/check_smart $disk) - RES=$? + OUTPUT="" + RES=0 + if echo $disk | grep "nvme" > /dev/null; then + OUTPUT=$(/usr/lib64/nagios/plugins/check_nvme $disk) + RES=$? + else + OUTPUT=$(/usr/lib64/nagios/plugins/check_smart $disk) + RES=$? + fi if [[ $RES != 0 ]]; then ERR_OUTPUT="$ERR_OUTPUT - $OUTPUT" fi -- libgit2 0.22.2