diff --git a/opnsense_checkmk_agent.py b/opnsense_checkmk_agent.py index 3713b98..2ab8bb5 100644 --- a/opnsense_checkmk_agent.py +++ b/opnsense_checkmk_agent.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # vim: set fileencoding=utf-8:noet -## Copyright 2023 Bashclub https://github.com/bashclub +## Copyright 2024 Bashclub https://github.com/bashclub ## BSD-2-Clause ## ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -32,7 +32,7 @@ ## -__VERSION__ = "1.2.3" +__VERSION__ = "1.2.8" import sys import os @@ -86,7 +86,7 @@ SPOOLDIR = os.path.join(VARDIR,"spool") TASKDIR = os.path.join(BASEDIR,"tasks") TASKFILE_KEYS = "service|type|interval|interface|disabled|ipaddress|hostname|domain|port|piggyback|sshoptions|options|tenant" TASKFILE_REGEX = re.compile(f"^({TASKFILE_KEYS}):\s*(.*?)(?:\s+#|$)",re.M) -MAX_SIMULATAN_THREADS = 2 +MAX_SIMULATAN_THREADS = 4 for _dir in (BASEDIR, VARDIR, LOCALDIR, PLUGINSDIR, SPOOLDIR, TASKDIR): if not os.path.exists(_dir): @@ -184,6 +184,7 @@ class checkmk_handler(StreamRequestHandler): class checkmk_checker(object): _available_sysctl_list = [] _available_sysctl_temperature_list = [] + _ipaccess_log = {} _certificate_timestamp = 0 _check_cache = {} _datastore_mutex = threading.RLock() @@ -210,8 +211,8 @@ class checkmk_checker(object): modes.CBC(_iv), backend = _backend ).encryptor() - message = pad_pkcs7(message) message = message.encode("utf-8") + message = pad_pkcs7(message) _encrypted_message = _encryptor.update(message) + _encryptor.finalize() return pad_pkcs7(b"03",10) + SALT + _encrypted_message @@ -243,6 +244,18 @@ class checkmk_checker(object): except UnicodeDecodeError: return ("invalid key") + def _expired_lastaccesed(self,remote_ip): + _now = time.time() + _lastaccess = self._ipaccess_log.get(remote_ip,0) + _ret = True + if _lastaccess + self.expire_inventory > _now: + _ret = False + for _ip, _time in self._ipaccess_log.items(): + if _time + self.expire_inventory < _now: + del self._ipaccess_log[_ip] + self._ipaccess_log[remote_ip] = _now + return _ret + def do_checks(self,debug=False,remote_ip=None,**kwargs): self._getosinfo() _errors = [] @@ -294,6 +307,13 @@ class checkmk_checker(object): except: _errors.append(traceback.format_exc()) + if self._expired_lastaccesed(remote_ip): + try: + _lines += self.do_inventory() + except: + _errors.append(traceback.format_exc()) + + _lines.append("<<>>") for _check in dir(self): if _check.startswith("checklocal_"): @@ -1278,10 +1298,11 @@ class checkmk_checker(object): def check_ipmi(self): if not os.path.exists("/usr/local/bin/ipmitool"): return [] - _ret = ["<<>>"] - _out = self._run_prog("/usr/local/bin/ipmitool sensor list") - _ret += re.findall("^(?!.*\sna\s.*$).*",_out,re.M) - return _ret + _out = self._run_prog("ipmitool sensor list") + _ipmisensor = re.findall("^(?!.*\sna\s.*$).*",_out,re.M) + if _ipmisensor: + return ["<<>>"] + _ipmisensor + return [] def check_apcupsd(self): if self._config_reader().get("OPNsense",{}).get("apcupsd",{}).get("general",{}).get("Enabled") != "1": @@ -1300,10 +1321,7 @@ class checkmk_checker(object): if self._config_reader().get("system",{}).get("ssh",{}).get("enabled") != "enabled": return [] _ret = ["<<>>"] - with open("/usr/local/etc/ssh/sshd_config","r") as _f: - for _line in _f.readlines(): - if re.search("^[a-zA-Z]",_line): - _ret.append(_line.replace("\n","")) + _ret += self._run_cache_prog("sshd -T").splitlines() return _ret def check_kernel(self): @@ -1440,6 +1458,26 @@ class checkmk_checker(object): _ret.append(f"{_uptime_sec} {_idle_sec}") return _ret + def do_inventory(self): + _ret = [] + _persist = int(time.time()) + self.expire_inventory + 600 + if os.path.exists("/sbin/dmidecode") or os.path.exists("/usr/local/sbin/dmidecode") : + _ret += [f"<<>>"] + _ret += self._run_cache_prog("dmidecode -q",7200).replace("\t",":").splitlines() + _ret += [f"<<>>"] + if os.path.exists("/etc/os-release"): + _ret.append("[[[/etc/os-release]]]") + _ret.append(open("/etc/os-release","rt").read().replace("\n","|")) + else: + try: + _ret.append("[[[/etc/os-release]]]") + _ret += list(map(lambda x: 'Name={0}|VERSION="{1}"|VERSION_ID="{2}"|ID=freebsd|PRETTY_NAME="{0} {1}"'.format(x[0],x[1],x[1].split("-")[0]),re.findall("(\w+)\s([\w.-]+)\s(\d+)",self._run_cache_prog("uname -rsK",1200)))) + except: + raise + _ret += [f"<<>>"] + _ret += list(map(lambda x: "{0}|{1}|amd64|freebsd|{2}|install ok installed".format(*x),re.findall("(\S+)-([0-9][0-9a-z._,-]+)\s*(.*)",self._run_cache_prog("pkg info",1200),re.M))) + return _ret + def _run_prog(self,cmdline="",*args,shell=False,timeout=60,ignore_error=False): if type(cmdline) == str: _process = shlex.split(cmdline,posix=True) @@ -1517,7 +1555,7 @@ class checkmk_cached_process(object): return _data class checkmk_server(TCPServer,checkmk_checker): - def __init__(self,port,pidfile,onlyfrom=None,encrypt=None,skipcheck=None,tenants=None,**kwargs): + def __init__(self,port,pidfile,onlyfrom=None,encrypt=None,skipcheck=None,expire_inventory=0,tenants=None,**kwargs): self.tcp_port = port self.pidfile = pidfile self.onlyfrom = onlyfrom.split(",") if onlyfrom else None @@ -1526,6 +1564,7 @@ class checkmk_server(TCPServer,checkmk_checker): self._available_sysctl_list = self._run_prog("sysctl -aN").split() self._available_sysctl_temperature_list = list(filter(lambda x: x.lower().find("temperature") > -1 and x.lower().find("cpu") == -1,self._available_sysctl_list)) self.encrypt = encrypt + self.expire_inventory = expire_inventory self._mutex = threading.Lock() self.user = pwd.getpwnam("root") self.allow_reuse_address = True @@ -1574,23 +1613,30 @@ class checkmk_server(TCPServer,checkmk_checker): sys.stdout.write("\n") pass - def cmkclient(self,host="127.0.0.1",port=None,enryptionkey=None): - if port == None: - port = self.tcp_port - if host == "127.0.0.1" and enryptionkey == None: - enryptionkey = self.encrypt + def cmkclient(self,checkoutput="127.0.0.1",port=None,encrypt=None,**kwargs): _sock = socket.socket(socket.AF_INET,socket.SOCK_STREAM) - _sock.connect((host,port)) - _msg = b"" - while True: - _data = _sock.recv(2048) - if not _data: - break - _msg += _data + _sock.settimeout(3) + try: + _sock.connect((checkoutput,port)) + _sock.settimeout(None) + _msg = b"" + while True: + _data = _sock.recv(2048) + if not _data: + break + _msg += _data + except TimeoutError: + sys.stderr.write("timeout\n") + sys.stderr.flush() + sys.exit(1) - if enryptionkey: - return self.decrypt_msg(_msg,enryptionkey) - return _msg + if _msg[:2] == b"03": + if encrypt: + return self.decrypt_msg(_msg,encrypt) + else: + pprint(repr(_msg[:2])) + return "missing key" + return _msg.decode("utf-8") def _signal_handler(self,signum,*args): if signum in (signal.SIGTERM,signal.SIGINT): @@ -1754,21 +1800,21 @@ class checkmk_task(object): self.tenant = _tenant.split(",") if type(_tenant) == str else [] self.interval = int(config.get("interval","3600")) self.nextrun = time.time() + self.error = None self._data = "" self._thread = None @property def get_piggyback(self): ## namen mit host prefixen with self._mutex: - sys.stderr.write(f"getPiggyback-{self.id}\n") - sys.stderr.flush() return self.piggyback def update(self,config): with self._mutex: self.interval = int(config.get("interval","3600")) self.piggyback = config.get("piggyback") - self.tenant = config.get("tenant") + _tenant = config.get("tenant") + self.tenant = _tenant.split(",") if type(_tenant) == str else [] self.config = config _now = time.time() self.lastmodified = _now @@ -1779,6 +1825,7 @@ class checkmk_task(object): _t = None with self._mutex: if self._thread == None: + self.error = None _t = threading.Thread(target=self._run,name=self.id) _t.daemon = True self._thread = _t @@ -1810,9 +1857,8 @@ class checkmk_task(object): try: _data = subprocess.check_output(_proc_args,shell=False,encoding="utf-8",stderr=subprocess.DEVNULL,timeout=300) except subprocess.CalledProcessError as e: - if self._ignore_error: - _data = e.stdout - else: + with self._mutex: + self.error = e.stdout _data = "" except subprocess.TimeoutExpired: _data = "" @@ -1929,6 +1975,8 @@ class checkmk_task(object): def __str__(self): with self._mutex: + sys.stderr.write(f"getdata-{self.id}\n") + sys.stderr.flush() return self._data def __repr__(self): @@ -2051,7 +2099,6 @@ class checkmk_taskrunner(object): sys.stderr.write("GetDATA\n") sys.stderr.flush() with self._mutex: - sys.stderr.write("GetDATA-Mutex\n") sys.stderr.flush() if self.err: for _line in str(self.err).split(): @@ -2063,6 +2110,8 @@ class checkmk_taskrunner(object): _piggyback = "" _data = [] for _task in sorted(self._queue,key=lambda x: x.get_piggyback): + if _task.error: + _fails += 1 #if tenant in (None,_task.tenant): if len(_task.tenant) == 0 or tenant in _task.tenant: if _task.get_piggyback != _piggyback: @@ -2086,9 +2135,10 @@ class checkmk_taskrunner(object): _id = os.path.basename(_file) _task = list(filter(lambda x: x.id == _id,self._queue)) if _task: - if _task[0].lastmodified >= os.stat(_file).st_mtime: + if _task[0].lastmodified < os.stat(_file).st_mtime: sys.stderr.write(f"{_id} not modified\n") sys.stderr.flush() + _ids.append(_id) continue with open(_file,"r",encoding="utf-8") as _f: _options = dict(TASKFILE_REGEX.findall(_f.read())) @@ -2134,6 +2184,8 @@ class checkmk_taskrunner(object): next_task = self._queue[0] if next_task: + sys.stderr.write(f"next: {next_task.id} {next_task!r}\n") + sys.stderr.flush() wait_time = max(0, next_task.nextrun - _now - PREEXEC) if wait_time > 0: self._event.wait(min(30, wait_time)) @@ -2152,12 +2204,20 @@ class checkmk_taskrunner(object): running_tasks = self._get_running_task_threads() if len(running_tasks) < MAX_SIMULATAN_THREADS: task.run() + else: + sys.stderr.write("Max Threads running wait\n") + sys.stderr.flush() + self._event.wait(3) + self._event.clear() + REGEX_SMART_VENDOR = re.compile(r"^\s*(?P\d+)\s(?P[-\w]+).*\s{2,}(?P[\w\/() ]+)$",re.M) -REGEX_SMART_DICT = re.compile(r"^(.*?):\s*(.*?)$",re.M) +REGEX_SMART_DICT = re.compile(r"^(.*?)[:=]\s*(.*?)$",re.M) class smart_disc(object): - def __init__(self,device): + def __init__(self,device,description=""): self.device = device + if description: + self.description = description MAPPING = { "Model Family" : ("model_family" ,lambda x: x), "Model Number" : ("model_family" ,lambda x: x), @@ -2200,8 +2260,10 @@ class smart_disc(object): "Critical Comp. Temp. Threshold" : ("temperature_crit" ,lambda x: x.split(" ")[0]), "Media and Data Integrity Errors" : ("media_errors" ,lambda x: x), "Airflow_Temperature_Cel" : ("temperature" ,lambda x: x), - "number of hours powered up" : ("poweronhours" ,lambda x: x.split(".")[0]), + "number of hours powered up" : ("poweronhours" ,lambda x: x.split(".")[0]), + "Accumulated power on time, hours" : ("poweronhours" ,lambda x: x.split(":")[0].replace("minutes ","")), "Accumulated start-stop cycles" : ("powercycles" ,lambda x: x), + "Available Spare" : ("wearoutspare" ,lambda x: x.replace("%","")), "SMART overall-health self-assessment test result" : ("smart_status" ,lambda x: int(x.lower().strip() == "passed")), "SMART Health Status" : ("smart_status" ,lambda x: int(x.lower() == "ok")), } @@ -2247,6 +2309,8 @@ class smart_disc(object): return "" if not getattr(self,"model_type",None): self.model_type = getattr(self,"model_family","unknown") + if not getattr(self,"model_family",None): + self.model_type = getattr(self,"model_type","unknown") for _k,_v in self.__dict__.items(): if _k.startswith("_") or _k in ("device"): continue @@ -2293,6 +2357,8 @@ if __name__ == "__main__": help=_("path to pid file")) _parser.add_argument("--onlyfrom",type=str, help=_("comma seperated ip addresses to allow")) + _parser.add_argument("--expire_inventory",type=int,default=3600*4, + help=_("number of seconds for inventory expire (default 4h)")) _parser.add_argument("--skipcheck",type=str, help=_("R|comma seperated checks that will be skipped \n{0}".format("\n".join([", ".join(_checks_available[i:i+10]) for i in range(0,len(_checks_available),10)])))) _parser.add_argument("--zabbix",action="store_true", @@ -2311,7 +2377,6 @@ if __name__ == "__main__": sys.exit(1) _parser.error = _args_error args = _parser.parse_args() - if args.configfile and os.path.exists(args.configfile): for _k,_v in re.findall(f"^(\w+):\s*(.*?)(?:\s+#|$)",open(args.configfile,"rt").read(),re.M): if _k == "port": @@ -2320,6 +2385,8 @@ if __name__ == "__main__": args.encrypt = _v if _k == "onlyfrom": args.onlyfrom = _v + if _k == "expire_inventory": + args.expire_inventory = _v if _k == "skipcheck": args.skipcheck = _v if _k == "tenants": @@ -2387,7 +2454,7 @@ if __name__ == "__main__": _server.daemonize() elif args.checkoutput: - sys.stdout.write(_server.cmkclient(args.checkoutput)) + sys.stdout.write(_server.cmkclient(**args.__dict__)) sys.stdout.write("\n") sys.stdout.flush() @@ -2408,7 +2475,7 @@ if __name__ == "__main__": from pkg_resources import parse_version _github_req = requests.get(f"https://api.github.com/repos/bashclub/check-opnsense/contents/opnsense_checkmk_agent.py?ref={args.update}") if _github_req.status_code != 200: - raise Exception("Github Error") + raise Exception(f"Github Error {_github_req.status_code}") _github_version = _github_req.json() _github_last_modified = datetime.strptime(_github_req.headers.get("last-modified"),"%a, %d %b %Y %X %Z") _new_script = base64.b64decode(_github_version.get("content")).decode("utf-8") @@ -2505,8 +2572,8 @@ if __name__ == "__main__": try: _taskrunner = checkmk_taskrunner(None) _taskrunner.check_taskdir() - for _task in _taskrunner._queue: - print(" * [{type}]{id} ({interval} sec)".format(**_task.__dict__)) + for _task in sorted(_taskrunner._queue,key=lambda x: (x.type,x.id)): + print(" * [{type}]{id} ({interval} sec) piggyback:{piggyback} tenant:{tenant}".format(**_task.__dict__)) except: raise