check_zfs.sh 3.19 KB
#!/bin/bash

function smart_disks() {
  if [[ $# != 1 ]]; then
    echo "Wrong number of arguments"
    return 3
  fi

  POOL=$1
  DISKS=$(zpool status -P $POOL | sed -E 's/\t/ /g' | sed -E 's/[ ]+/ /g' | sed -E 's/^ //g' | grep "^/dev" | cut -d' ' -f 1)
  ERR_OUTPUT=""
  HIGHEST_RES=0
  for disk in $DISKS; do
    OUTPUT=""
    RES=0
    if echo $disk | grep "nvme" > /dev/null; then
      OUTPUT=$(/usr/lib64/nagios/plugins/check_nvme $disk)
      RES=$?
    else
      OUTPUT=$(/usr/lib64/nagios/plugins/check_smart $disk)
      RES=$?
    fi
    if [[ $RES != 0 ]]; then
      ERR_OUTPUT="$ERR_OUTPUT - $OUTPUT"
    fi
    if [[ $RES -gt $HIGHEST_RES ]]; then
      HIGHEST_RES=$RES
    fi
  done

  echo -n "ZFS POOL SMART $POOL "
  if [[ $HIGHEST_RES == 1 ]]; then
    echo "WARNING${ERR_OUTPUT}"
    return 1
  elif [[ $HIGHEST_RES == 2 ]]; then
    echo "CRITICAL${ERR_OUTPUT}"
    return 2
  elif [[ $HIGHEST_RES == 3 ]]; then
    echo "UNKNOWN${ERR_OUTPUT}"
    return 3
  fi

  echo "OK"
  return 0
}

function pool_errors() {
  if [[ $# != 1 ]]; then
    echo "Wrong number of arguments"
    return 3
  fi

  OUTPUT="$(zpool status $1)"
  if [[ $? != 0 ]]; then
    return 3
  fi

  STATUS=$(echo "$OUTPUT" | grep -P "^\s*state:" | sed 's/ //g' | grep -P "^\s*state" | cut -d ':' -f 2)
  if [[ $? != 0 ]]; then
    return 3
  fi

  ERRORS=$(echo "$OUTPUT" | grep -P "^errors:")
  if [[ $? != 0 ]]; then
    return 3
  fi

  if [[ $STATUS != "ONLINE" ]]; then
    echo "ZFS POOL ERRORS $1 - CRITICAL: status: $STATUS"
    return 2
  fi

  if [[ $ERRORS != "errors: No known data errors" ]]; then
    echo "ZFS POOL ERRORS $1 - WARNING: $ERRORS"
    return 1
  fi

  echo "ZFS POOL ERRORS $1 - OK"
  return 0
}

function pool_free() {
  if [[ $# != 7 ]]; then
    echo "Wrong number of arguments"
    return 3
  fi
  POOL=$3

  FREE_RAW=$(zfs list -po available $POOL | tail -n 1)
  if [[ $? != 0 ]]; then
    return 3
  fi

  USED_RAW=$(zfs list -po used $POOL | tail -n 1)
  if [[ $? != 0 ]]; then
    return 3
  fi
  TOTAL_RAW=$((USED_RAW + FREE_RAW))
  TOTAL=$(echo "scale=2; $TOTAL_RAW / (1024^3)" | bc)
  FREE=$(echo "scale=2; $FREE_RAW / (1024^3)" | bc)
  PERC_FREE=$(echo "scale=2; 100*$FREE_RAW/$TOTAL_RAW" | bc)

  CRITICAL="0"
  WARNING="0"
  TEMP=$(getopt -o c:w: -- "$@")
  eval set -- "$TEMP"

  for opt; do
    case "$opt" in
        -c) CRITICAL=$2; shift 2 ;;
        -w) WARNING=$2;  shift 2 ;;
    esac
  done

  if [[ $CRITICAL == "0" || $WARNING == "0" ]]; then
    echo "Missing -c or -w arguments"
    return 3
  fi

  if [[ $(echo "$PERC_FREE < $CRITICAL" | bc) == 1 ]]; then
    echo "ZFS POOL FREE $POOL - CRITICAL: ${PERC_FREE}% ${FREE}/${TOTAL} GB"
    return 2
  elif [[ $(echo "$PERC_FREE < $WARNING" | bc) == 1 ]]; then
    echo "ZFS POOL FREE $POOL - WARNING: ${PERC_FREE}% ${FREE}/${TOTAL} GB"
    return 2
  fi

  echo "ZFS POOL FREE $POOL - OK: ${PERC_FREE}% ${FREE}/${TOTAL} GB"
  return 0
}

if [[ $# -lt 2 ]]; then
  echo "Wrong number of arguments"
  exit 3
fi

ACTION=$1
POOL=$2

if [[ $ACTION == "smart" ]]; then
  smart_disks $POOL
  exit $?
elif [[ $ACTION == "errors" ]]; then
  pool_errors $POOL
  exit $?
elif [[ $ACTION == "free" ]]; then
  pool_free $POOL $@
  exit $?
fi
echo "Unknown command: $ACTION"
exit 3