Commit 55296ee690a271802e647a19e7e699b9bf6eea30
1 parent
5f26e3fa
Adding check_nvme
Showing
7 changed files
with
396 additions
and
2 deletions
CMakeLists.txt
... | ... | @@ -18,6 +18,9 @@ add_executable(check_memfree_freebsd ${SOURCE_FILES_MEMFREE_FREEBSD}) |
18 | 18 | set(SOURCE_FILES_SMART check_smart/check_smart.cpp check_smart/auxiliar.cpp) |
19 | 19 | add_executable(check_smart ${SOURCE_FILES_SMART}) |
20 | 20 | |
21 | +set(SOURCE_FILES_NVME check_nvme/check_nvme.cpp check_nvme/auxiliar.cpp) | |
22 | +add_executable(check_nvme ${SOURCE_FILES_NVME}) | |
23 | + | |
21 | 24 | find_library(libsensors libsensors.so) |
22 | 25 | set(SOURCE_FILES_SENSORS check_sensors/check_sensors.cpp check_sensors/auxiliar.cpp) |
23 | 26 | add_executable(check_sensors ${SOURCE_FILES_SENSORS}) | ... | ... |
check_nvme/README.md
0 → 100644
1 | +``` | |
2 | +check_smart v1.0 | |
3 | + | |
4 | +Checks for pending, reallocated or uncorrectable sectors in disks using SMART | |
5 | +WARNING: Requires the setuid bit to be set to run as root | |
6 | + | |
7 | +Usage: | |
8 | +check_smart [-hV] DISKS... | |
9 | + | |
10 | +Options: | |
11 | + -h | |
12 | + Print detailed help screen | |
13 | + -V | |
14 | + Print version information | |
15 | + DISKS | |
16 | + Disks for which to retrieve S.M.A.R.T data | |
17 | +``` | ... | ... |
check_nvme/auxiliar.cpp
0 → 100755
1 | +// | |
2 | +// Created by Imanol on 28-may-16. | |
3 | +// | |
4 | + | |
5 | +#include "auxiliar.h" | |
6 | + | |
7 | +void timer_handler (int signum) | |
8 | +{ | |
9 | + if(signum == SIGVTALRM) | |
10 | + { | |
11 | + cout << servicename << " CRITICAL - timeout occurred" << endl; | |
12 | + exit(2); | |
13 | + } | |
14 | +} | |
15 | + | |
16 | +int str2int(string str) | |
17 | +{ | |
18 | + int num; | |
19 | + stringstream sstream; | |
20 | + sstream << str; | |
21 | + if(!(sstream >> num)) | |
22 | + { | |
23 | + throw integerConversionException("Integer conversion error"); | |
24 | + } | |
25 | + return num; | |
26 | +} | |
27 | + | |
28 | +string int2str(int x) | |
29 | +{ | |
30 | + string str; | |
31 | + stringstream sstream; | |
32 | + sstream << x; | |
33 | + sstream >> str; | |
34 | + return str; | |
35 | +} | |
36 | + | |
37 | +int exec(string cmd, string *output) | |
38 | +{ | |
39 | + *output = ""; | |
40 | + FILE* pipe = popen(cmd.c_str(), "r"); | |
41 | + if (!pipe) | |
42 | + { | |
43 | + cout << "Error opening child process" << endl; | |
44 | + exit(3); | |
45 | + } | |
46 | + char buffer[128]; | |
47 | + while(!feof(pipe)) | |
48 | + { | |
49 | + if(fgets(buffer, 128, pipe) != NULL) | |
50 | + { | |
51 | + *output += buffer; | |
52 | + } | |
53 | + } | |
54 | + return pclose(pipe)/256; | |
55 | +} | |
0 | 56 | \ No newline at end of file | ... | ... |
check_nvme/auxiliar.h
0 → 100755
1 | +// | |
2 | +// Created by Imanol on 28-may-16. | |
3 | +// | |
4 | + | |
5 | +#ifndef NAGIOS_PLUGINS_AUXILIAR_H | |
6 | +#define NAGIOS_PLUGINS_AUXILIAR_H | |
7 | + | |
8 | +#include <sstream> | |
9 | +#include <iostream> | |
10 | +#include <exception> | |
11 | + | |
12 | +#include <stdlib.h> | |
13 | +#include <stdio.h> | |
14 | +#include <signal.h> | |
15 | + | |
16 | +using namespace std; | |
17 | + | |
18 | +extern const char *servicename; | |
19 | + | |
20 | +int str2int(string str); | |
21 | +string int2str(int x); | |
22 | +int exec(string cmd, string *output); | |
23 | +void timer_handler (int signum); | |
24 | + | |
25 | +class integerConversionException : public exception | |
26 | +{ | |
27 | +private: | |
28 | + string s; | |
29 | +public: | |
30 | + integerConversionException(std::string ss) : s(ss) {} | |
31 | + ~integerConversionException() throw () {} | |
32 | + const char* what() const throw() { return s.c_str(); } | |
33 | +}; | |
34 | + | |
35 | +#endif //NAGIOS_PLUGINS_AUXILIAR_H | ... | ... |
check_nvme/check_nvme.cpp
0 → 100644
1 | +#include "check_nvme.h" | |
2 | +#include <regex> | |
3 | + | |
4 | +const char *servicename = (const char*)"NVME"; | |
5 | + | |
6 | +int getNVMeAttrValue(string line) { | |
7 | + line = std::regex_replace(line, std::regex("\\s+"), " "); | |
8 | + line = std::regex_replace(line, std::regex("^ "), ""); | |
9 | + | |
10 | + return stoi(line.substr(line.find(":")+1)); | |
11 | +} | |
12 | + | |
13 | +std::string getNVMeAttrName(string line) { | |
14 | + size_t first = line.find_first_not_of(' '); | |
15 | + size_t last = line.find_first_of(':',first); | |
16 | + return line.substr(first,last-first); | |
17 | +} | |
18 | + | |
19 | +int evalStatus(const char* disk, string *status) { | |
20 | + string output = ""; | |
21 | + string line = ""; | |
22 | + | |
23 | + int rc = run_smartctl_cmd(string(SMARTCTL_CMD_ATTRS), disk,&output); | |
24 | + if(rc) { | |
25 | + cout << "Error reading SMART data from disk " << disk << endl; | |
26 | + exit(UNKN); | |
27 | + } | |
28 | + | |
29 | + map<std::string,NVMeAttr> attrMap; | |
30 | + attrMap[AVAILABLE_SPARE] = spare; | |
31 | + attrMap[MEDIA_ERRORS] = errors; | |
32 | + attrMap[PERCENTAGE_USED] = used; | |
33 | + | |
34 | + stringstream sstream; | |
35 | + sstream.str(output); | |
36 | + while(getline(sstream,line)) { | |
37 | + if(line == "") { | |
38 | + continue; | |
39 | + } | |
40 | + for(map<std::string,NVMeAttr>::iterator it = attrMap.begin(); it != attrMap.end(); ++it) { | |
41 | + std::string name = it->first; | |
42 | + if(getNVMeAttrName(line) == name) { | |
43 | + attrMap[name].value = getNVMeAttrValue(line); | |
44 | + } | |
45 | + } | |
46 | + } | |
47 | + | |
48 | + int ret = OK; | |
49 | + *status = string(disk); | |
50 | + for(map<std::string, NVMeAttr>::iterator it = attrMap.begin(); it != attrMap.end(); ++it) { | |
51 | + NVMeAttr attr = it->second; | |
52 | + | |
53 | + if(attr.value == -1) { | |
54 | + if(attr.optional) { | |
55 | + continue; | |
56 | + } | |
57 | + *status = string(disk) + " status UNKNOWN"; | |
58 | + return UNKN; | |
59 | + } | |
60 | + | |
61 | + int veredict = 0; | |
62 | + | |
63 | + if(attr.lower_than) { | |
64 | + if(attr.value < attr.threshold_warn) { | |
65 | + veredict = WARN; | |
66 | + } | |
67 | + if(attr.threshold_crit != -1 && attr.value < attr.threshold_crit) { | |
68 | + veredict = CRIT; | |
69 | + } | |
70 | + } else { | |
71 | + if(attr.value > attr.threshold_warn) { | |
72 | + veredict = WARN; | |
73 | + } | |
74 | + if(attr.threshold_crit != -1 && attr.value > attr.threshold_crit) { | |
75 | + veredict = CRIT; | |
76 | + } | |
77 | + } | |
78 | + | |
79 | + switch(veredict) { | |
80 | + case OK: | |
81 | + break; | |
82 | + case WARN: | |
83 | + if(ret == OK) { | |
84 | + ret = WARN; | |
85 | + } | |
86 | + break; | |
87 | + case CRIT: | |
88 | + ret = CRIT; | |
89 | + break; | |
90 | + } | |
91 | + *status += " " + attr.name + ":" + to_string(attr.value); | |
92 | + } | |
93 | + return ret; | |
94 | +} | |
95 | + | |
96 | +int run_smartctl_cmd(const string command, const char* disk, string* output) { | |
97 | + int uid = getuid(); | |
98 | + setreuid(0,0); | |
99 | + | |
100 | + int rc = exec(command + disk,output); | |
101 | + | |
102 | + setreuid(uid,0); | |
103 | + return rc; | |
104 | +} | |
105 | + | |
106 | +void printVersion() { | |
107 | + cout << "check_nvme v" << VERSION << endl << endl; | |
108 | +} | |
109 | + | |
110 | +void printHelp(bool longVersion) { | |
111 | + if(longVersion) { | |
112 | + printVersion(); | |
113 | + cout << "Checks for pending, reallocated or uncorrectable sectors in NVMe disks" << endl << "WARNING: Requires the setuid bit to be set to run as root" << endl << endl; | |
114 | + printHelp(false); | |
115 | + cout << "Options:" << endl; | |
116 | + cout << " -h" << endl; | |
117 | + cout << " Print detailed help screen" << endl; | |
118 | + cout << " -V" << endl; | |
119 | + cout << " Print version information" << endl; | |
120 | + cout << " DISKS" << endl; | |
121 | + cout << " Disks for which to retrieve S.M.A.R.T data" << endl << endl; | |
122 | + return; | |
123 | + } | |
124 | + cout << "Usage: " << endl << "check_smart [-hV] DISKS..." << endl << endl; | |
125 | +} | |
126 | + | |
127 | +void set_timeout(unsigned int sec) { | |
128 | + struct itimerval timer; | |
129 | + timer.it_value.tv_sec = sec; | |
130 | + timer.it_value.tv_usec = 0; | |
131 | + timer.it_interval.tv_sec = 0; | |
132 | + timer.it_interval.tv_usec = 0; | |
133 | + setitimer (ITIMER_VIRTUAL, &timer, 0); | |
134 | + | |
135 | + struct sigaction sa; | |
136 | + memset (&sa, 0, sizeof (sa)); | |
137 | + sa.sa_handler = &timer_handler; | |
138 | + sigaction (SIGVTALRM, &sa, 0); | |
139 | +} | |
140 | + | |
141 | +int main(int argc, char **argv) { | |
142 | + set_timeout(10); | |
143 | + int c; | |
144 | + while ((c = getopt (argc, argv, "Vh")) != -1) { | |
145 | + switch(c) { | |
146 | + case 'h': | |
147 | + printHelp(true); | |
148 | + return OK; | |
149 | + case 'V': | |
150 | + printVersion(); | |
151 | + return OK; | |
152 | + case '?': | |
153 | + printHelp(false); | |
154 | + return UNKN; | |
155 | + } | |
156 | + } | |
157 | + | |
158 | + if(argc == 1) { | |
159 | + cout << "No disks checked" << endl; | |
160 | + return 3; | |
161 | + } | |
162 | + | |
163 | + string *results = new string[argc-1]; | |
164 | + int returnCode = OK; | |
165 | + | |
166 | + for(int i = 1; i < argc; i++) { | |
167 | + switch(evalStatus(argv[i],&(results[i-1]))) { | |
168 | + case OK: | |
169 | + break; | |
170 | + case WARN: | |
171 | + if(returnCode == OK) {returnCode = WARN;} | |
172 | + break; | |
173 | + case CRIT: | |
174 | + returnCode = CRIT; | |
175 | + break; | |
176 | + case UNKN: | |
177 | + returnCode = UNKN; | |
178 | + break; | |
179 | + } | |
180 | + } | |
181 | + | |
182 | + cout << servicename; | |
183 | + if(returnCode == OK) {cout << " OK - disk status: ";} | |
184 | + else if(returnCode == WARN) {cout << " WARNING - disk status: ";} | |
185 | + else if(returnCode == CRIT) {cout << " CRITICAL - disk status: ";} | |
186 | + else if(returnCode == UNKN) {cout << " UNKNOWN - disk status: ";} | |
187 | + for(int i = 0; i < (argc-1); i++) { | |
188 | + cout << results[i]; | |
189 | + if(i != (argc-2)) {cout << ", ";} | |
190 | + } | |
191 | + cout << endl; | |
192 | + return returnCode; | |
193 | +} | ... | ... |
check_nvme/check_nvme.h
0 → 100755
1 | +#ifndef CHECK_NVME_H | |
2 | +#define CHECK_NVME_H | |
3 | + | |
4 | +#include <iostream> | |
5 | +#include <sstream> | |
6 | +#include <map> | |
7 | + | |
8 | +#include <stdio.h> | |
9 | +#include <string.h> | |
10 | +#include <stdlib.h> | |
11 | +#include <unistd.h> | |
12 | + | |
13 | +#include <sys/time.h> | |
14 | + | |
15 | +#include "auxiliar.h" | |
16 | + | |
17 | +#define OK 0 | |
18 | +#define WARN 1 | |
19 | +#define CRIT 2 | |
20 | +#define UNKN 3 | |
21 | + | |
22 | +#define VERSION "1.0" | |
23 | + | |
24 | +#define SMARTCTL_CMD_ATTRS "/usr/sbin/smartctl -A " | |
25 | +#define SMARTCTL_CMD_INFO "/usr/sbin/smartctl -i " | |
26 | + | |
27 | +#define ROTATION_INFO_STR "Rotation Rate:" | |
28 | +#define SSD_DEV_STR "Solid State Device" | |
29 | + | |
30 | +#define AVAILABLE_SPARE "Available Spare" | |
31 | +#define MEDIA_ERRORS "Media and Data Integrity Errors" | |
32 | +#define PERCENTAGE_USED "Percentage Used" | |
33 | + | |
34 | +using namespace std; | |
35 | + | |
36 | +struct NVMeAttr | |
37 | +{ | |
38 | + string name; | |
39 | + int value; | |
40 | + int threshold_warn; | |
41 | + int threshold_crit; | |
42 | + bool optional; | |
43 | + bool lower_than; | |
44 | +}; typedef struct NVMeAttr NVMeAttr; | |
45 | + | |
46 | + | |
47 | +// Attribute definitions | |
48 | +NVMeAttr used = { | |
49 | + .name = PERCENTAGE_USED, | |
50 | + .value = -1, | |
51 | + .threshold_warn = 80, | |
52 | + .threshold_crit = 90, | |
53 | + .optional = false, | |
54 | + .lower_than = false, | |
55 | +}; | |
56 | + | |
57 | +NVMeAttr spare = { | |
58 | + .name = AVAILABLE_SPARE, | |
59 | + .value = -1, | |
60 | + .threshold_warn = 20, | |
61 | + .threshold_crit = 10, | |
62 | + .optional = false, | |
63 | + .lower_than = true, | |
64 | +}; | |
65 | + | |
66 | +NVMeAttr errors = { | |
67 | + .name = MEDIA_ERRORS, | |
68 | + .value = -1, | |
69 | + .threshold_warn = 0, | |
70 | + .threshold_crit = 0, | |
71 | + .optional = false, | |
72 | + .lower_than = false, | |
73 | +}; | |
74 | + | |
75 | +map<std::string,NVMeAttr> prepareAttrMap(int driveType); | |
76 | +int getNVMeAttrValue(string line); | |
77 | +std::string getNVMeAttrName(string line); | |
78 | +int run_smartctl_cmd(const string command, const char* disk, string* output); | |
79 | +int evalStatus(const char* disk, int driveType, string* status); | |
80 | +void printVersion(); | |
81 | +void printHelp(bool longVersion); | |
82 | +void set_timeout(unsigned int sec); | |
83 | + | |
84 | +#endif | ... | ... |
check_zfs/check_zfs.sh
... | ... | @@ -11,8 +11,15 @@ function smart_disks() { |
11 | 11 | ERR_OUTPUT="" |
12 | 12 | HIGHEST_RES=0 |
13 | 13 | for disk in $DISKS; do |
14 | - OUTPUT=$(/usr/lib64/nagios/plugins/check_smart $disk) | |
15 | - RES=$? | |
14 | + OUTPUT="" | |
15 | + RES=0 | |
16 | + if echo $disk | grep "nvme" > /dev/null; then | |
17 | + OUTPUT=$(/usr/lib64/nagios/plugins/check_nvme $disk) | |
18 | + RES=$? | |
19 | + else | |
20 | + OUTPUT=$(/usr/lib64/nagios/plugins/check_smart $disk) | |
21 | + RES=$? | |
22 | + fi | |
16 | 23 | if [[ $RES != 0 ]]; then |
17 | 24 | ERR_OUTPUT="$ERR_OUTPUT - $OUTPUT" |
18 | 25 | fi | ... | ... |