"""
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
Copyright © 2019 Cloud Linux Software Inc.
This software is also available under ImunifyAV commercial license,
see
"""
import asyncio
import json
import logging
import os
import signal as signal_module
import subprocess
import tempfile
import time
from collections import defaultdict
from contextlib import suppress
from itertools import islice
from typing import Callable, Dict, Generator, List, Optional, Set, Tuple, Union
from defence360agent.contracts.config import (
Malware,
MalwareSignatures,
MyImunifyConfig,
)
from defence360agent.contracts.messages import MessageType
from defence360agent.contracts.permissions import (
ms_clean_requires_myimunify_protection,
)
from defence360agent.utils import (
RecurringCheckStop,
Singleton,
base64_encode_filename,
recurring_check,
)
from imav.contracts.config import MalwareTune
from imav.malwarelib.model import MalwareHit
from imav.malwarelib.utils.revisium import (
DeletionType,
ErrorType,
RescanResultType,
RevisiumCSVFile,
RevisiumJsonFile,
RevisiumTempFile,
)
logger = logging.getLogger(__name__)
def cleaner_result_instance(tempdir=None, mode=None):
if MalwareTune.USE_JSON_REPORT:
return RevisiumJsonFile(tempdir, mode)
return RevisiumCSVFile(tempdir, mode)
class MalwareCleanerLog(RevisiumTempFile):
pass
class MalwareCleanerProgress(RevisiumJsonFile):
"""
Get progress from external source
"""
_progress = 0
@recurring_check(2)
async def watch(self, callback):
try:
data = self.read()
except FileNotFoundError:
raise RecurringCheckStop()
except json.JSONDecodeError:
return
progress = data["current"]
increment, self._progress = progress - self._progress, progress
callback(increment)
class MalwareCleanupFileList(RevisiumTempFile):
def write(self, filelist):
with self._path.open("wb") as w:
w.writelines(base64_encode_filename(f) + b"\n" for f in filelist)
def _normalize_path(path: str) -> str:
"""Round-trip through fsencode/fsdecode to match FilenameField behavior.
Ensures CleanupResult keys are consistent with MalwareHit.orig_file
which goes through FilenameField's os.fsencode/os.fsdecode cycle.
"""
return os.fsdecode(os.fsencode(path))
def _parse_int(value: Union[str, int]) -> int:
"""Convert str|int to int, in case errors return -2
-1 used as default value when storing CH
"""
try:
return int(value)
except ValueError:
return -2
class CleanupResultEntry(dict):
def __init__(self, data: Dict[str, Union[str, int]]):
# fields:
# d - cleanup result
# e - error description
# s - signature that was triggered for the file during scan
# f - file path (it's unexpected that f is absent)
# r - the result of aibolit rescan after cleanup
#
# We shouldn't fail on parsing one record (to do not stop processing
# report), so we consider default values for all fields.
super().__init__(
d=_parse_int(data.get("d", -1)),
e=_parse_int(data.get("e", -1)),
s=data["s"],
f=data["f"],
r=_parse_int(data.get("r", -1)),
mtime_before=_parse_int(data.get("mb", -1)),
mtime_after=_parse_int(data.get("ma", -1)),
hash_before=data.get("hb", ""),
hash_after=data.get("ha", ""),
)
def is_cleaned(self):
if self.is_failed() or self.requires_myimunify_protection():
return False
if self["e"] == ErrorType.NOT_CLEANEDUP:
logger.warning(
"File has changed, assuming that it was cleaned: %s", self["f"]
)
return True
return (
self["e"] == ErrorType.NO_ERROR
and self["d"] == DeletionType.INJECTION_REMOVED
)
def is_removed(self):
return (
not self.is_failed()
and self["e"] == ErrorType.NO_ERROR
and self["d"] > DeletionType.INJECTION_REMOVED
)
def is_failed(self):
return self["r"] == RescanResultType.DETECTED
def requires_myimunify_protection(self):
return self["r"] == RescanResultType.REQUIRED_ADVANCED_SIGNATURES
def not_exist(self):
return not self.is_failed() and self["e"] == ErrorType.FILE_NOT_EXISTS
class CleanupResult(Dict[str, CleanupResultEntry]):
"""
Cleanup result container for result entries
"""
def __init__(self, report=None):
if report:
super().__init__(
{
_normalize_path(e["f"]): CleanupResultEntry(e)
for e in report
}
)
@staticmethod
def __key(hit: Union[str, MalwareHit]) -> str:
return getattr(hit, "orig_file", hit)
def __contains__(self, hit: Union[str, MalwareHit]):
return super().__contains__(self.__key(hit))
def __getitem__(self, hit: Union[str, MalwareHit]):
return super().__getitem__(self.__key(hit))
class MalwareCleaner:
PROCU_PATH = "/opt/ai-bolit/procu2.php"
PROCU_DB = MalwareSignatures.PROCU_DB
def __init__(self, loop=None, sink=None, watch_progress=True):
self._loop = loop if loop else asyncio.get_event_loop()
self._proxy = MalwareCleanupProxy()
self._sink = sink
self._watch_progress = watch_progress
def _cmd(
self,
filename,
progress_path,
result_path,
log_path,
soft,
*,
username,
blacklist=None,
use_csv=True,
standard_only=True,
):
cmd = [
"/opt/ai-bolit/wrapper",
self.PROCU_PATH,
"--deobfuscate",
"--nobackup",
"--forcibly_cleanup",
"--rescan",
"--list=%s" % filename,
"--input-fn-b64-encoded",
"--username=%s" % username,
"--report-hashes",
]
if blacklist:
cmd.append("--black-list=%s" % blacklist)
cmd.extend(
[
"--log=%s" % log_path,
"--progress=%s" % progress_path,
]
)
if Malware.CLEANUP_DISABLE_CLOUDAV:
cmd.append("--disable-cloudav")
if use_csv:
cmd.extend(["--csv_result=%s" % result_path])
else:
cmd.extend(["--result=%s" % result_path])
if standard_only:
cmd.extend(["--standard-only"])
if os.path.exists(self.PROCU_DB):
cmd.append("--avdb")
cmd.append(self.PROCU_DB)
if soft:
cmd.append("--soft")
return cmd
CLEANUP_ERROR_PREFIX = "Cleanup failed."
# Procu exit codes (must match constants in procu2_src.php)
_EXIT_ERROR_GENERAL = 1
_EXIT_ERROR_INPUT_NOT_FOUND = 2
_EXIT_ERROR_INVALID_USERNAME = 255
_SIGNAL_EXIT_CODE_OFFSET = 128
@staticmethod
def _get_cleaner_error_info(
exc: Exception,
cmd: List[str],
returncode: int,
stdout: Optional[bytes],
stderr: Optional[bytes],
):
return dict(
exception=exc.__class__.__name__,
return_code=returncode,
command=cmd,
out=stdout.decode(errors="replace") if stdout is not None else "",
err=stderr.decode(errors="replace") if stderr is not None else "",
)
@classmethod
def _categorize_process_error(
cls,
returncode: int,
stderr: bytes,
) -> Optional[str]:
"""Categorize procu process failures into explicit error types.
Returns a human-readable error string if the process failed,
or None on success (exit code 0).
"""
if returncode == 0:
return None
prefix = cls.CLEANUP_ERROR_PREFIX
stderr_text = stderr.decode(errors="replace") if stderr else ""
if returncode > cls._SIGNAL_EXIT_CODE_OFFSET:
sig_num = returncode - cls._SIGNAL_EXIT_CODE_OFFSET
try:
sig_name = signal_module.Signals(sig_num).name
except ValueError:
pass # not a known signal, fall through
else:
if sig_num == signal_module.SIGSEGV:
return f"{prefix} Segmentation fault (signal {sig_name})"
if sig_num == signal_module.SIGKILL:
return (
f"{prefix} Process killed"
f" (signal {sig_name}, likely OOM)"
)
if sig_num == signal_module.SIGTERM:
return f"{prefix} Process terminated (signal {sig_name})"
return f"{prefix} Process killed by signal {sig_name}"
if (
"Allowed memory size of" in stderr_text
and "bytes exhausted" in stderr_text
):
return f"{prefix} Out of memory"
if "Fatal error" in stderr_text:
return f"{prefix} PHP fatal error (exit code {returncode})"
if "Parse error" in stderr_text:
return f"{prefix} PHP parse error (exit code {returncode})"
if "error while loading shared libraries" in stderr_text:
return f"{prefix} Shared library error (exit code {returncode})"
if returncode == cls._EXIT_ERROR_GENERAL:
return f"{prefix} General error (exit code 1)"
if returncode == cls._EXIT_ERROR_INPUT_NOT_FOUND:
return f"{prefix} Input file not found (exit code 2)"
if (
returncode == cls._EXIT_ERROR_INVALID_USERNAME
and "Invalid username" in stderr_text
):
return f"{prefix} Invalid username (exit code 255)"
return f"{prefix} Process exited with code {returncode}"
async def _send_cleanup_failed_message(self, info: dict):
if self._sink:
try:
msg = MessageType.CleanupFailed(
{**info, **{"timestamp": int(time.time())}}
)
await self._sink.process_message(msg)
except asyncio.CancelledError:
raise
except Exception:
logger.exception(
"Exception while sending CleanupFailed message"
)
async def start(
self,
user,
filelist,
soft=True,
blacklist=None,
standard_only=None,
) -> Tuple[CleanupResult, Optional[str], List[str]]:
tempdir = tempfile.gettempdir()
result_file = cleaner_result_instance(tempdir=tempdir)
use_csv = isinstance(result_file, RevisiumCSVFile)
standard_only = self.is_standard_only(user, standard_only)
with MalwareCleanupFileList(
tempdir=tempdir, mode=0o644
) as flist, MalwareCleanupFileList(
tempdir=tempdir, mode=0o644
) as blk, MalwareCleanerProgress(
tempdir=tempdir
) as progress, result_file as result, MalwareCleanerLog(
tempdir=tempdir
) as log:
flist.write(filelist)
if blacklist:
blk.write(blacklist)
if self._watch_progress:
self._loop.create_task(progress.watch(self._proxy.progress_cb))
if blacklist:
cmd = self._cmd(
flist.filename,
progress.filename,
result.filename,
log.filename,
soft,
username=user,
blacklist=blk.filename,
use_csv=use_csv,
standard_only=standard_only,
)
else:
cmd = self._cmd(
flist.filename,
progress.filename,
result.filename,
log.filename,
soft,
username=user,
use_csv=use_csv,
standard_only=standard_only,
)
logger.debug("Executing %s", " ".join(cmd))
out, err = b"", b""
proc = None
try:
proc = await asyncio.subprocess.create_subprocess_exec(
*cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
out, err = await proc.communicate()
report = result.read()
except asyncio.CancelledError:
if proc:
with suppress(ProcessLookupError):
proc.terminate()
raise
except Exception as exc:
info = self._get_cleaner_error_info(
exc,
cmd,
proc.returncode if proc else 126, # 126 - permission error
stdout=out,
stderr=err,
)
# Group errors by exit code on Sentry
logger.error(
f"Cleanup failed exit_code={info.get('return_code')}: %s",
f"{info.get('out')} {info.get('err')}",
extra={**info, "exception": exc},
)
await self._send_cleanup_failed_message(
{**info, **dict(message=str(exc))}
)
return CleanupResult(), repr(exc), cmd
error = self._categorize_process_error(proc.returncode, err)
if error:
info = self._get_cleaner_error_info(
RuntimeError(error),
cmd,
proc.returncode,
stdout=out,
stderr=err,
)
logger.error(
"Cleanup process failed: %s (exit_code=%d, stderr=%s)",
error,
proc.returncode,
err,
)
await self._send_cleanup_failed_message(
{**info, **dict(message=error)}
)
elif not report:
error = f"{self.CLEANUP_ERROR_PREFIX} Report is empty"
info = self._get_cleaner_error_info(
RuntimeError(error),
cmd,
proc.returncode,
stdout=out,
stderr=err,
)
logger.error(
"Cleanup report is empty despite successful exit"
" (exit_code=%d, input_files=%d)",
proc.returncode,
len(filelist),
)
await self._send_cleanup_failed_message(
{**info, **dict(message=error)}
)
elif len(report) < len(filelist):
logger.warning(
"Partial cleanup report: %d entries for %d input files",
len(report),
len(filelist),
)
return CleanupResult(report), error, cmd
@staticmethod
def is_standard_only(user: str, standard_only: bool) -> bool:
"""Check if only standard signatures should be applied for the user"""
# FIXME: DEF-20763 Remove this line to enable standard signatures
return False
if not MyImunifyConfig.ENABLED:
# Ignore standard_only value if MyImunify is disabled
return False
elif standard_only is None:
# When cleaned by default action
return not ms_clean_requires_myimunify_protection(user)
return standard_only
class MalwareCleanupProxy(metaclass=Singleton):
_CHUNK_SIZE = 10000
"""
Class to interconnect Cleanup status endpoint and Cleanup plugin
"""
def __init__(self):
self.current = self.total = 0
self.hits = defaultdict(set)
def add(self, cause, initiator, post_action, scan_id, standard_only, hits):
self.hits[
(cause, initiator, post_action, scan_id, standard_only)
].update(hits)
def flush(
self,
) -> Generator[Tuple[str, str, Callable, str, Set], None, None]:
while self.hits:
scan_info, hits = self.hits.popitem()
all_hits = iter(hits)
hits = set(islice(all_hits, self._CHUNK_SIZE))
remaining_hit = next(all_hits, None)
if remaining_hit is not None:
self.hits[scan_info].add(remaining_hit)
self.hits[scan_info].update(all_hits)
self.total += len(hits)
yield *scan_info, hits
def progress_cb(self, increment=1):
self.current += increment
def reset(self):
self.current = self.total = 0
def get_progress(self):
try:
return int(self.current / (self.total + len(self.hits)) * 100)
except ZeroDivisionError:
return None