Skip to content

Commit

Permalink
Save NVMeOF log in files under /var/log/ceph.
Browse files Browse the repository at this point in the history
Fixes #317

Signed-off-by: Gil Bregman <[email protected]>
  • Loading branch information
gbregman committed Jan 10, 2024
1 parent 4300218 commit 6c120cf
Show file tree
Hide file tree
Showing 12 changed files with 396 additions and 31 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-container.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
strategy:
fail-fast: false
matrix:
test: ["cli", "state", "multi_gateway", "server", "grpc", "omap_lock", "old_omap"]
test: ["cli", "state", "multi_gateway", "server", "grpc", "omap_lock", "old_omap", "log_files"]
runs-on: ubuntu-latest
env:
HUGEPAGES: 512 # for multi gateway test, approx 256 per gateway instance
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ setup: ## Configure huge-pages (requires sudo/root password)

@echo Setup core dump pattern as /tmp/coredump/core.*
mkdir -p /tmp/coredump
sudo mkdir -p /var/log/ceph
sudo bash -c 'echo "|/usr/bin/env tee /tmp/coredump/core.%e.%p.%h.%t" > /proc/sys/kernel/core_pattern'
sudo bash -c 'echo $(HUGEPAGES) > $(HUGEPAGES_DIR)'
@echo Actual Hugepages allocation: $$(cat $(HUGEPAGES_DIR))
Expand Down
10 changes: 9 additions & 1 deletion ceph-nvmeof.conf
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,19 @@ enable_spdk_discovery_controller = False
#omap_file_lock_retries = 15
#omap_file_lock_retry_sleep_interval = 5
#omap_file_update_reloads = 10
log_level=debug
#log_files_enabled = True
#log_files_rotation_enabled = True
#max_log_file_size_in_mb=10
#max_log_files_count=20
#
# Notice that if you change the log directory the log files will only be visible inside the container
#
#log_directory = /var/log/ceph/

[discovery]
addr = 0.0.0.0
port = 8009
debug = 20

[ceph]
pool = rbd
Expand Down
20 changes: 14 additions & 6 deletions control/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,19 @@

import logging
import argparse
import signal
from .server import GatewayServer
from .config import GatewayConfig
from .config import GatewayLogger

if __name__ == '__main__':
# Set up root logger
logging.basicConfig()
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
gw_logger = None
gw_name = None

def sigterm_handler(signum, frame):
if gw_logger and gw_name:
gw_logger.compress_final_log_file(gw_name)

if __name__ == '__main__':
parser = argparse.ArgumentParser(prog="python3 -m control",
description="Manage NVMe gateways",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
Expand All @@ -30,8 +34,12 @@
)
args = parser.parse_args()

signal.signal(signal.SIGTERM, sigterm_handler)

config = GatewayConfig(args.config)
config.dump_config_file(logger)
gw_logger = GatewayLogger(config)
config.dump_config_file(gw_logger.logger)
with GatewayServer(config) as gateway:
gw_name = gateway.name
gateway.serve()
gateway.keep_alive()
209 changes: 209 additions & 0 deletions control/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
#

import configparser
import os
import socket
import logging
import logging.handlers
import gzip
import shutil

class GatewayEnumUtils:
def get_value_from_key(e_type, keyval, ignore_case = False):
Expand Down Expand Up @@ -46,6 +52,7 @@ class GatewayConfig:
"""

DISCOVERY_NQN = "nqn.2014-08.org.nvmexpress.discovery"
CEPH_RUN_DIRECTORY = "/var/run/ceph/"

def __init__(self, conffile):
self.filepath = conffile
Expand Down Expand Up @@ -102,3 +109,205 @@ def escape_address_if_ipv6(addr) -> str:
if ":" in addr and not addr.strip().startswith("["):
ret_addr = f"[{addr}]"
return ret_addr

class GatewayLogger:
CEPH_LOG_DIRECTORY = "/var/log/ceph/"
MAX_LOG_FILE_SIZE_DEFAULT = 10
MAX_LOG_FILES_COUNT_DEFAULT = 20
NVME_LOG_DIR_PREFIX = "nvmeof-"
NVME_LOG_FILE_NAME = "nvmeof-log"
logger = None
handler = None

def __init__(self, config=None):
if config:
self.log_directory = config.get_with_default("gateway", "log_directory", GatewayLogger.CEPH_LOG_DIRECTORY)
gateway_name = config.get("gateway", "name")
else:
self.log_directory = GatewayLogger.CEPH_LOG_DIRECTORY
gateway_name = None

if not self.log_directory.endswith("/"):
self.log_directory += "/"

if not gateway_name:
gateway_name = socket.gethostname()
self.log_directory = self.log_directory + GatewayLogger.NVME_LOG_DIR_PREFIX + gateway_name

if GatewayLogger.logger:
assert self.logger == GatewayLogger.logger
if self.handler:
return

frmtr = logging.Formatter(fmt='[%(asctime)s] %(levelname)s %(filename)s:%(lineno)d: %(message)s')
frmtr.default_msec_format = None

if config:
log_files_enabled = config.getboolean_with_default("gateway", "log_files_enabled", True)
log_files_rotation_enabled = config.getboolean_with_default("gateway", "log_files_rotation_enabled", True)
max_log_file_size = config.getint_with_default("gateway", "max_log_file_size_in_mb", GatewayLogger.MAX_LOG_FILE_SIZE_DEFAULT)
max_log_files_count = config.getint_with_default("gateway", "max_log_files_count", GatewayLogger.MAX_LOG_FILES_COUNT_DEFAULT)
log_level = config.get_with_default("gateway", "log_level", "info")
else:
log_files_enabled = False
log_files_rotation_enabled = False
max_log_file_size = GatewayLogger.MAX_LOG_FILE_SIZE_DEFAULT
max_log_files_count = GatewayLogger.MAX_LOG_FILES_COUNT_DEFAULT
log_leGatewayLoggervel = "info"

self.handler = None
if log_files_enabled:
GatewayLogger.rotate_backup_directories(self.log_directory, 5)
if not log_files_rotation_enabled:
max_log_file_size = 0
max_log_files_count = 0
try:
os.makedirs(self.log_directory, 0o777, True)
self.handler = logging.handlers.RotatingFileHandler(self.log_directory + "/" + GatewayLogger.NVME_LOG_FILE_NAME,
maxBytes = max_log_file_size * 1024 * 1024,
backupCount = max_log_files_count)
self.handler.setFormatter(frmtr)
if log_files_rotation_enabled:
self.handler.rotator = GatewayLogger.log_file_rotate
except Exception:
pass

logging.basicConfig(level=GatewayLogger.get_log_level(log_level))
self.logger = logging.getLogger("nvmeof")
if self.handler:
self.logger.addHandler(self.handler)
GatewayLogger.logger = self.logger
GatewayLogger.handler = self.handler

def rotate_backup_directories(dirname, count):
try:
shutil.rmtree(dirname + f".bak{count}", ignore_errors = True)
except Exception:
pass
for i in range(count, 2, -1):
try:
os.rename(dirname + f".bak{i - 1}", dirname + f".bak{i}")
except Exception:
pass
try:
os.rename(dirname + f".bak", dirname + f".bak2")
except Exception:
pass
try:
os.rename(dirname, dirname + f".bak")
except Exception:
pass

# Just to be on the safe side, in case the rename failed
try:
shutil.rmtree(dirname, ignore_errors = True)
except Exception:
pass

def get_log_level(log_level):
if type(log_level) == int:
return log_level
assert type(log_level) == str
if log_level.upper() == "DEBUG":
return logging.DEBUG
elif log_level.upper() == "INFO":
return logging.INFO
elif log_level.upper() == "WARNING":
return logging.WARNING
elif log_level.upper() == "ERROR":
return logging.ERROR
elif log_level.upper() == "CRITICAL":
return logging.CRITICAL
elif log_level.upper() == "NOTSET":
return logging.NOTSET
else:
assert False

def set_log_level(self, log_level):
log_level = GatewayLogger.get_log_level(log_level)
self.logger.setLevel(log_level)

def log_file_rotate(src, dest):
# Files with an extension bigger than 1 are already compressed
if dest.endswith(".1"):
msgs, errs = GatewayLogger.compress_file(src, dest)
if GatewayLogger.logger:
for m in msgs:
GatewayLogger.logger.info(m)
for e in errs:
GatewayLogger.logger.error(e)

else:
os.rename(src, dest)

def compress_file(src, dest):
msgs = []
errs = []
msgs.append(f"Will compress log file {src} to {dest}")
if src == dest:
errs.append(f"Can't compress log file {src} into the same file name")
return msgs, errs
try:
os.remove(dest)
except Exception:
pass
need_to_remove_dest = False
try:
with open(src, 'rb') as f_in:
with gzip.open(dest, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
except FileNotFoundError:
errs.append(f"Failure compressing file {src}: file not found")
return msgs, errs
except Exception as ex:
errs.append(f"Failure compressing file {src}:\n{ex}")
need_to_remove_dest = True

if need_to_remove_dest:
# We ran into a problem trying to compress so need to remove destination file in case one was created
try:
os.remove(dest)
except Exception as ex:
errs.append(f"Failure deleting file {dest}, ignore:\n{ex}")
return msgs, errs

# If we got here the compression was successful so we can delete the source file
try:
os.remove(src)
except Exception as ex:
errs.append(f"Failure deleting file {src}, ignore:\n{ex}")

return msgs, errs

def compress_final_log_file(self, gw_name):
if not self.handler:
return

if not self.logger:
return

if not gw_name:
self.logger.error(f"No gateway name, can't compress the log file")
return

if not self.log_directory.endswith(gw_name):
self.logger.error(f"Log directory {self.log_directory} doesn't belong to gateway {gw_name}, do not compress log file")
return

self.logger.removeHandler(self.handler)
self.handler = None
GatewayLogger.handler = None

dest_name = self.log_directory + "/" + GatewayLogger.NVME_LOG_FILE_NAME + ".gz"
if os.access(self.log_directory + "/" + GatewayLogger.NVME_LOG_FILE_NAME + ".1",
os.F_OK) and not os.access(self.log_directory + "/" + GatewayLogger.NVME_LOG_FILE_NAME + ".0",
os.F_OK):
dest_name = self.log_directory + "/" + GatewayLogger.NVME_LOG_FILE_NAME + ".0"

msgs, errs = GatewayLogger.compress_file(self.log_directory + "/" + GatewayLogger.NVME_LOG_FILE_NAME, dest_name)
for m in msgs:
self.logger.info(m)
for e in errs:
self.logger.error(e)
self.logger = None
GatewayLogger.logger = None
10 changes: 2 additions & 8 deletions control/discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .config import GatewayConfig
from .state import GatewayState, LocalGatewayState, OmapGatewayState, GatewayStateHandler
from .config import GatewayEnumUtils
from .config import GatewayLogger
from .proto import gateway_pb2 as pb2

import rados
Expand Down Expand Up @@ -297,9 +298,7 @@ def __init__(self, config):
self.lock = threading.Lock()
self.omap_state = OmapGatewayState(self.config)

self.logger = logging.getLogger(__name__)
log_level = self.config.getint_with_default("discovery", "debug", 20)
self.logger.setLevel(level=log_level)
self.logger = GatewayLogger(config).logger

gateway_group = self.config.get_with_default("gateway", "group", "")
self.omap_name = f"nvmeof.{gateway_group}.state" \
Expand Down Expand Up @@ -1032,11 +1031,6 @@ def start_service(self):
self.logger.debug("received a ctrl+C interrupt. exiting...")

def main(args=None):
# Set up root logger
logging.basicConfig()
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

parser = argparse.ArgumentParser(prog="python3 -m control",
description="Discover NVMe gateways")
parser.add_argument(
Expand Down
5 changes: 3 additions & 2 deletions control/grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from .proto import gateway_pb2_grpc as pb2_grpc
from .config import GatewayConfig
from .config import GatewayEnumUtils
from .config import GatewayLogger
from .state import GatewayState

MAX_ANA_GROUPS = 4
Expand All @@ -47,7 +48,7 @@ class GatewayService(pb2_grpc.GatewayServicer):

def __init__(self, config, gateway_state, omap_lock, spdk_rpc_client) -> None:
"""Constructor"""
self.logger = logging.getLogger(__name__)
self.logger = GatewayLogger(config).logger
ver = os.getenv("NVMEOF_VERSION")
if ver:
self.logger.info(f"Using NVMeoF gateway version {ver}")
Expand Down Expand Up @@ -818,7 +819,7 @@ def list_namespaces(self, request, context=None):
nsid_msg = f"namespace with NSID {request.nsid} and UUID {request.uuid}"
else:
nsid_msg = f"namespace with NSID {request.nsid}"
self.logger.info(f"Received request to list {nsid_msg}for {request.subsystem}, context: {context}")
self.logger.info(f"Received request to list {nsid_msg} for {request.subsystem}, context: {context}")

with self.rpc_lock:
try:
Expand Down
Loading

0 comments on commit 6c120cf

Please sign in to comment.