Commit 55296ee690a271802e647a19e7e699b9bf6eea30
1 parent
5f26e3fa
Adding check_nvme
Showing
7 changed files
with
396 additions
and
2 deletions
CMakeLists.txt
@@ -18,6 +18,9 @@ add_executable(check_memfree_freebsd ${SOURCE_FILES_MEMFREE_FREEBSD}) | @@ -18,6 +18,9 @@ add_executable(check_memfree_freebsd ${SOURCE_FILES_MEMFREE_FREEBSD}) | ||
18 | set(SOURCE_FILES_SMART check_smart/check_smart.cpp check_smart/auxiliar.cpp) | 18 | set(SOURCE_FILES_SMART check_smart/check_smart.cpp check_smart/auxiliar.cpp) |
19 | add_executable(check_smart ${SOURCE_FILES_SMART}) | 19 | add_executable(check_smart ${SOURCE_FILES_SMART}) |
20 | 20 | ||
21 | +set(SOURCE_FILES_NVME check_nvme/check_nvme.cpp check_nvme/auxiliar.cpp) | ||
22 | +add_executable(check_nvme ${SOURCE_FILES_NVME}) | ||
23 | + | ||
21 | find_library(libsensors libsensors.so) | 24 | find_library(libsensors libsensors.so) |
22 | set(SOURCE_FILES_SENSORS check_sensors/check_sensors.cpp check_sensors/auxiliar.cpp) | 25 | set(SOURCE_FILES_SENSORS check_sensors/check_sensors.cpp check_sensors/auxiliar.cpp) |
23 | add_executable(check_sensors ${SOURCE_FILES_SENSORS}) | 26 | add_executable(check_sensors ${SOURCE_FILES_SENSORS}) |
check_nvme/README.md
0 → 100644
1 | +``` | ||
2 | +check_smart v1.0 | ||
3 | + | ||
4 | +Checks for pending, reallocated or uncorrectable sectors in disks using SMART | ||
5 | +WARNING: Requires the setuid bit to be set to run as root | ||
6 | + | ||
7 | +Usage: | ||
8 | +check_smart [-hV] DISKS... | ||
9 | + | ||
10 | +Options: | ||
11 | + -h | ||
12 | + Print detailed help screen | ||
13 | + -V | ||
14 | + Print version information | ||
15 | + DISKS | ||
16 | + Disks for which to retrieve S.M.A.R.T data | ||
17 | +``` |
check_nvme/auxiliar.cpp
0 → 100755
1 | +// | ||
2 | +// Created by Imanol on 28-may-16. | ||
3 | +// | ||
4 | + | ||
5 | +#include "auxiliar.h" | ||
6 | + | ||
7 | +void timer_handler (int signum) | ||
8 | +{ | ||
9 | + if(signum == SIGVTALRM) | ||
10 | + { | ||
11 | + cout << servicename << " CRITICAL - timeout occurred" << endl; | ||
12 | + exit(2); | ||
13 | + } | ||
14 | +} | ||
15 | + | ||
16 | +int str2int(string str) | ||
17 | +{ | ||
18 | + int num; | ||
19 | + stringstream sstream; | ||
20 | + sstream << str; | ||
21 | + if(!(sstream >> num)) | ||
22 | + { | ||
23 | + throw integerConversionException("Integer conversion error"); | ||
24 | + } | ||
25 | + return num; | ||
26 | +} | ||
27 | + | ||
28 | +string int2str(int x) | ||
29 | +{ | ||
30 | + string str; | ||
31 | + stringstream sstream; | ||
32 | + sstream << x; | ||
33 | + sstream >> str; | ||
34 | + return str; | ||
35 | +} | ||
36 | + | ||
37 | +int exec(string cmd, string *output) | ||
38 | +{ | ||
39 | + *output = ""; | ||
40 | + FILE* pipe = popen(cmd.c_str(), "r"); | ||
41 | + if (!pipe) | ||
42 | + { | ||
43 | + cout << "Error opening child process" << endl; | ||
44 | + exit(3); | ||
45 | + } | ||
46 | + char buffer[128]; | ||
47 | + while(!feof(pipe)) | ||
48 | + { | ||
49 | + if(fgets(buffer, 128, pipe) != NULL) | ||
50 | + { | ||
51 | + *output += buffer; | ||
52 | + } | ||
53 | + } | ||
54 | + return pclose(pipe)/256; | ||
55 | +} | ||
0 | \ No newline at end of file | 56 | \ No newline at end of file |
check_nvme/auxiliar.h
0 → 100755
1 | +// | ||
2 | +// Created by Imanol on 28-may-16. | ||
3 | +// | ||
4 | + | ||
5 | +#ifndef NAGIOS_PLUGINS_AUXILIAR_H | ||
6 | +#define NAGIOS_PLUGINS_AUXILIAR_H | ||
7 | + | ||
8 | +#include <sstream> | ||
9 | +#include <iostream> | ||
10 | +#include <exception> | ||
11 | + | ||
12 | +#include <stdlib.h> | ||
13 | +#include <stdio.h> | ||
14 | +#include <signal.h> | ||
15 | + | ||
16 | +using namespace std; | ||
17 | + | ||
18 | +extern const char *servicename; | ||
19 | + | ||
20 | +int str2int(string str); | ||
21 | +string int2str(int x); | ||
22 | +int exec(string cmd, string *output); | ||
23 | +void timer_handler (int signum); | ||
24 | + | ||
25 | +class integerConversionException : public exception | ||
26 | +{ | ||
27 | +private: | ||
28 | + string s; | ||
29 | +public: | ||
30 | + integerConversionException(std::string ss) : s(ss) {} | ||
31 | + ~integerConversionException() throw () {} | ||
32 | + const char* what() const throw() { return s.c_str(); } | ||
33 | +}; | ||
34 | + | ||
35 | +#endif //NAGIOS_PLUGINS_AUXILIAR_H |
check_nvme/check_nvme.cpp
0 → 100644
1 | +#include "check_nvme.h" | ||
2 | +#include <regex> | ||
3 | + | ||
4 | +const char *servicename = (const char*)"NVME"; | ||
5 | + | ||
6 | +int getNVMeAttrValue(string line) { | ||
7 | + line = std::regex_replace(line, std::regex("\\s+"), " "); | ||
8 | + line = std::regex_replace(line, std::regex("^ "), ""); | ||
9 | + | ||
10 | + return stoi(line.substr(line.find(":")+1)); | ||
11 | +} | ||
12 | + | ||
13 | +std::string getNVMeAttrName(string line) { | ||
14 | + size_t first = line.find_first_not_of(' '); | ||
15 | + size_t last = line.find_first_of(':',first); | ||
16 | + return line.substr(first,last-first); | ||
17 | +} | ||
18 | + | ||
19 | +int evalStatus(const char* disk, string *status) { | ||
20 | + string output = ""; | ||
21 | + string line = ""; | ||
22 | + | ||
23 | + int rc = run_smartctl_cmd(string(SMARTCTL_CMD_ATTRS), disk,&output); | ||
24 | + if(rc) { | ||
25 | + cout << "Error reading SMART data from disk " << disk << endl; | ||
26 | + exit(UNKN); | ||
27 | + } | ||
28 | + | ||
29 | + map<std::string,NVMeAttr> attrMap; | ||
30 | + attrMap[AVAILABLE_SPARE] = spare; | ||
31 | + attrMap[MEDIA_ERRORS] = errors; | ||
32 | + attrMap[PERCENTAGE_USED] = used; | ||
33 | + | ||
34 | + stringstream sstream; | ||
35 | + sstream.str(output); | ||
36 | + while(getline(sstream,line)) { | ||
37 | + if(line == "") { | ||
38 | + continue; | ||
39 | + } | ||
40 | + for(map<std::string,NVMeAttr>::iterator it = attrMap.begin(); it != attrMap.end(); ++it) { | ||
41 | + std::string name = it->first; | ||
42 | + if(getNVMeAttrName(line) == name) { | ||
43 | + attrMap[name].value = getNVMeAttrValue(line); | ||
44 | + } | ||
45 | + } | ||
46 | + } | ||
47 | + | ||
48 | + int ret = OK; | ||
49 | + *status = string(disk); | ||
50 | + for(map<std::string, NVMeAttr>::iterator it = attrMap.begin(); it != attrMap.end(); ++it) { | ||
51 | + NVMeAttr attr = it->second; | ||
52 | + | ||
53 | + if(attr.value == -1) { | ||
54 | + if(attr.optional) { | ||
55 | + continue; | ||
56 | + } | ||
57 | + *status = string(disk) + " status UNKNOWN"; | ||
58 | + return UNKN; | ||
59 | + } | ||
60 | + | ||
61 | + int veredict = 0; | ||
62 | + | ||
63 | + if(attr.lower_than) { | ||
64 | + if(attr.value < attr.threshold_warn) { | ||
65 | + veredict = WARN; | ||
66 | + } | ||
67 | + if(attr.threshold_crit != -1 && attr.value < attr.threshold_crit) { | ||
68 | + veredict = CRIT; | ||
69 | + } | ||
70 | + } else { | ||
71 | + if(attr.value > attr.threshold_warn) { | ||
72 | + veredict = WARN; | ||
73 | + } | ||
74 | + if(attr.threshold_crit != -1 && attr.value > attr.threshold_crit) { | ||
75 | + veredict = CRIT; | ||
76 | + } | ||
77 | + } | ||
78 | + | ||
79 | + switch(veredict) { | ||
80 | + case OK: | ||
81 | + break; | ||
82 | + case WARN: | ||
83 | + if(ret == OK) { | ||
84 | + ret = WARN; | ||
85 | + } | ||
86 | + break; | ||
87 | + case CRIT: | ||
88 | + ret = CRIT; | ||
89 | + break; | ||
90 | + } | ||
91 | + *status += " " + attr.name + ":" + to_string(attr.value); | ||
92 | + } | ||
93 | + return ret; | ||
94 | +} | ||
95 | + | ||
96 | +int run_smartctl_cmd(const string command, const char* disk, string* output) { | ||
97 | + int uid = getuid(); | ||
98 | + setreuid(0,0); | ||
99 | + | ||
100 | + int rc = exec(command + disk,output); | ||
101 | + | ||
102 | + setreuid(uid,0); | ||
103 | + return rc; | ||
104 | +} | ||
105 | + | ||
106 | +void printVersion() { | ||
107 | + cout << "check_nvme v" << VERSION << endl << endl; | ||
108 | +} | ||
109 | + | ||
110 | +void printHelp(bool longVersion) { | ||
111 | + if(longVersion) { | ||
112 | + printVersion(); | ||
113 | + cout << "Checks for pending, reallocated or uncorrectable sectors in NVMe disks" << endl << "WARNING: Requires the setuid bit to be set to run as root" << endl << endl; | ||
114 | + printHelp(false); | ||
115 | + cout << "Options:" << endl; | ||
116 | + cout << " -h" << endl; | ||
117 | + cout << " Print detailed help screen" << endl; | ||
118 | + cout << " -V" << endl; | ||
119 | + cout << " Print version information" << endl; | ||
120 | + cout << " DISKS" << endl; | ||
121 | + cout << " Disks for which to retrieve S.M.A.R.T data" << endl << endl; | ||
122 | + return; | ||
123 | + } | ||
124 | + cout << "Usage: " << endl << "check_smart [-hV] DISKS..." << endl << endl; | ||
125 | +} | ||
126 | + | ||
127 | +void set_timeout(unsigned int sec) { | ||
128 | + struct itimerval timer; | ||
129 | + timer.it_value.tv_sec = sec; | ||
130 | + timer.it_value.tv_usec = 0; | ||
131 | + timer.it_interval.tv_sec = 0; | ||
132 | + timer.it_interval.tv_usec = 0; | ||
133 | + setitimer (ITIMER_VIRTUAL, &timer, 0); | ||
134 | + | ||
135 | + struct sigaction sa; | ||
136 | + memset (&sa, 0, sizeof (sa)); | ||
137 | + sa.sa_handler = &timer_handler; | ||
138 | + sigaction (SIGVTALRM, &sa, 0); | ||
139 | +} | ||
140 | + | ||
141 | +int main(int argc, char **argv) { | ||
142 | + set_timeout(10); | ||
143 | + int c; | ||
144 | + while ((c = getopt (argc, argv, "Vh")) != -1) { | ||
145 | + switch(c) { | ||
146 | + case 'h': | ||
147 | + printHelp(true); | ||
148 | + return OK; | ||
149 | + case 'V': | ||
150 | + printVersion(); | ||
151 | + return OK; | ||
152 | + case '?': | ||
153 | + printHelp(false); | ||
154 | + return UNKN; | ||
155 | + } | ||
156 | + } | ||
157 | + | ||
158 | + if(argc == 1) { | ||
159 | + cout << "No disks checked" << endl; | ||
160 | + return 3; | ||
161 | + } | ||
162 | + | ||
163 | + string *results = new string[argc-1]; | ||
164 | + int returnCode = OK; | ||
165 | + | ||
166 | + for(int i = 1; i < argc; i++) { | ||
167 | + switch(evalStatus(argv[i],&(results[i-1]))) { | ||
168 | + case OK: | ||
169 | + break; | ||
170 | + case WARN: | ||
171 | + if(returnCode == OK) {returnCode = WARN;} | ||
172 | + break; | ||
173 | + case CRIT: | ||
174 | + returnCode = CRIT; | ||
175 | + break; | ||
176 | + case UNKN: | ||
177 | + returnCode = UNKN; | ||
178 | + break; | ||
179 | + } | ||
180 | + } | ||
181 | + | ||
182 | + cout << servicename; | ||
183 | + if(returnCode == OK) {cout << " OK - disk status: ";} | ||
184 | + else if(returnCode == WARN) {cout << " WARNING - disk status: ";} | ||
185 | + else if(returnCode == CRIT) {cout << " CRITICAL - disk status: ";} | ||
186 | + else if(returnCode == UNKN) {cout << " UNKNOWN - disk status: ";} | ||
187 | + for(int i = 0; i < (argc-1); i++) { | ||
188 | + cout << results[i]; | ||
189 | + if(i != (argc-2)) {cout << ", ";} | ||
190 | + } | ||
191 | + cout << endl; | ||
192 | + return returnCode; | ||
193 | +} |
check_nvme/check_nvme.h
0 → 100755
1 | +#ifndef CHECK_NVME_H | ||
2 | +#define CHECK_NVME_H | ||
3 | + | ||
4 | +#include <iostream> | ||
5 | +#include <sstream> | ||
6 | +#include <map> | ||
7 | + | ||
8 | +#include <stdio.h> | ||
9 | +#include <string.h> | ||
10 | +#include <stdlib.h> | ||
11 | +#include <unistd.h> | ||
12 | + | ||
13 | +#include <sys/time.h> | ||
14 | + | ||
15 | +#include "auxiliar.h" | ||
16 | + | ||
17 | +#define OK 0 | ||
18 | +#define WARN 1 | ||
19 | +#define CRIT 2 | ||
20 | +#define UNKN 3 | ||
21 | + | ||
22 | +#define VERSION "1.0" | ||
23 | + | ||
24 | +#define SMARTCTL_CMD_ATTRS "/usr/sbin/smartctl -A " | ||
25 | +#define SMARTCTL_CMD_INFO "/usr/sbin/smartctl -i " | ||
26 | + | ||
27 | +#define ROTATION_INFO_STR "Rotation Rate:" | ||
28 | +#define SSD_DEV_STR "Solid State Device" | ||
29 | + | ||
30 | +#define AVAILABLE_SPARE "Available Spare" | ||
31 | +#define MEDIA_ERRORS "Media and Data Integrity Errors" | ||
32 | +#define PERCENTAGE_USED "Percentage Used" | ||
33 | + | ||
34 | +using namespace std; | ||
35 | + | ||
36 | +struct NVMeAttr | ||
37 | +{ | ||
38 | + string name; | ||
39 | + int value; | ||
40 | + int threshold_warn; | ||
41 | + int threshold_crit; | ||
42 | + bool optional; | ||
43 | + bool lower_than; | ||
44 | +}; typedef struct NVMeAttr NVMeAttr; | ||
45 | + | ||
46 | + | ||
47 | +// Attribute definitions | ||
48 | +NVMeAttr used = { | ||
49 | + .name = PERCENTAGE_USED, | ||
50 | + .value = -1, | ||
51 | + .threshold_warn = 80, | ||
52 | + .threshold_crit = 90, | ||
53 | + .optional = false, | ||
54 | + .lower_than = false, | ||
55 | +}; | ||
56 | + | ||
57 | +NVMeAttr spare = { | ||
58 | + .name = AVAILABLE_SPARE, | ||
59 | + .value = -1, | ||
60 | + .threshold_warn = 20, | ||
61 | + .threshold_crit = 10, | ||
62 | + .optional = false, | ||
63 | + .lower_than = true, | ||
64 | +}; | ||
65 | + | ||
66 | +NVMeAttr errors = { | ||
67 | + .name = MEDIA_ERRORS, | ||
68 | + .value = -1, | ||
69 | + .threshold_warn = 0, | ||
70 | + .threshold_crit = 0, | ||
71 | + .optional = false, | ||
72 | + .lower_than = false, | ||
73 | +}; | ||
74 | + | ||
75 | +map<std::string,NVMeAttr> prepareAttrMap(int driveType); | ||
76 | +int getNVMeAttrValue(string line); | ||
77 | +std::string getNVMeAttrName(string line); | ||
78 | +int run_smartctl_cmd(const string command, const char* disk, string* output); | ||
79 | +int evalStatus(const char* disk, int driveType, string* status); | ||
80 | +void printVersion(); | ||
81 | +void printHelp(bool longVersion); | ||
82 | +void set_timeout(unsigned int sec); | ||
83 | + | ||
84 | +#endif |
check_zfs/check_zfs.sh
@@ -11,8 +11,15 @@ function smart_disks() { | @@ -11,8 +11,15 @@ function smart_disks() { | ||
11 | ERR_OUTPUT="" | 11 | ERR_OUTPUT="" |
12 | HIGHEST_RES=0 | 12 | HIGHEST_RES=0 |
13 | for disk in $DISKS; do | 13 | for disk in $DISKS; do |
14 | - OUTPUT=$(/usr/lib64/nagios/plugins/check_smart $disk) | ||
15 | - RES=$? | 14 | + OUTPUT="" |
15 | + RES=0 | ||
16 | + if echo $disk | grep "nvme" > /dev/null; then | ||
17 | + OUTPUT=$(/usr/lib64/nagios/plugins/check_nvme $disk) | ||
18 | + RES=$? | ||
19 | + else | ||
20 | + OUTPUT=$(/usr/lib64/nagios/plugins/check_smart $disk) | ||
21 | + RES=$? | ||
22 | + fi | ||
16 | if [[ $RES != 0 ]]; then | 23 | if [[ $RES != 0 ]]; then |
17 | ERR_OUTPUT="$ERR_OUTPUT - $OUTPUT" | 24 | ERR_OUTPUT="$ERR_OUTPUT - $OUTPUT" |
18 | fi | 25 | fi |