Settings

Users can provide service settings via the /etc/a3m/a3m.conf configuration file, e.g.:

[a3m]
debug = False

Environment strings are also supported and they are evaluated last, e.g.:

env A3M_DEBUG=yes a3m ...

Configuration settings are not properly described yet, but here’s the list:

  • debug (boolean)

  • batch_size (int)

  • concurrent_packages (int)

  • rpc_threads (int)

  • worker_threads (int)

  • shared_directory (string)

  • temp_directory (string)

  • processing_directory (string)

  • rejected_directory (string)

  • capture_client_script_output (boolean)

  • removable_files (string)

  • secret_key (string)

  • prometheus_bind_address (string)

  • prometheus_bind_port (string)

  • time_zone (string)

  • db_engine (string)

  • db_name (string)

  • db_user (string)

  • db_password (string)

  • db_host (string)

  • db_port (string)

  • rpc_bind_address (string)

  • s3_enabled (boolean)

  • s3_endpoint_url (string)

  • s3_region_name (string)

  • s3_access_key_id (string)

  • s3_secret_access_key (string)

  • s3_use_ssl (boolean)

  • s3_addressing_style (string)

  • s3_signature_version (string)

  • s3_bucket (string)

  • org_id (string)

  • org_name (string)

For greater flexibility, it is also possible to alter the applicatin settings module manually. This is how our a3m.settings.common module looks like:

import json
import logging.config
import math
import multiprocessing
import os
from io import StringIO
from pathlib import Path
from typing import Any

import django_stubs_ext
from platformdirs import user_data_dir

from a3m.appconfig import Config


django_stubs_ext.monkeypatch()


CONFIG_MAPPING = {
    "debug": {"section": "a3m", "option": "debug", "type": "boolean"},
    "batch_size": {"section": "a3m", "option": "batch_size", "type": "int"},
    "concurrent_packages": {
        "section": "a3m",
        "option": "concurrent_packages",
        "type": "int",
    },
    "rpc_threads": {"section": "a3m", "option": "rpc_threads", "type": "int"},
    "worker_threads": {"section": "a3m", "option": "worker_threads", "type": "int"},
    "shared_directory": {
        "section": "a3m",
        "option": "shared_directory",
        "type": "string",
    },
    "temp_directory": {"section": "a3m", "option": "temp_dir", "type": "string"},
    "processing_directory": {
        "section": "a3m",
        "option": "processing_directory",
        "type": "string",
    },
    "rejected_directory": {
        "section": "a3m",
        "option": "rejected_directory",
        "type": "string",
    },
    "capture_client_script_output": {
        "section": "a3m",
        "option": "capture_client_script_output",
        "type": "boolean",
    },
    "removable_files": {
        "section": "a3m",
        "option": "removable_files",
        "type": "string",
    },
    "secret_key": {"section": "a3m", "option": "secret_key", "type": "string"},
    "prometheus_bind_address": {
        "section": "a3m",
        "option": "prometheus_bind_address",
        "type": "string",
    },
    "prometheus_bind_port": {
        "section": "a3m",
        "option": "prometheus_bind_port",
        "type": "string",
    },
    "time_zone": {"section": "a3m", "option": "time_zone", "type": "string"},
    "db_engine": {"section": "a3m", "option": "db_engine", "type": "string"},
    "db_name": {"section": "a3m", "option": "db_name", "type": "string"},
    "db_user": {"section": "a3m", "option": "db_user", "type": "string"},
    "db_password": {"section": "a3m", "option": "db_password", "type": "string"},
    "db_host": {"section": "a3m", "option": "db_host", "type": "string"},
    "db_port": {"section": "a3m", "option": "db_port", "type": "string"},
    "rpc_bind_address": {
        "section": "a3m",
        "option": "rpc_bind_address",
        "type": "string",
    },
    "s3_enabled": {"section": "a3m", "option": "s3_enabled", "type": "boolean"},
    "s3_endpoint_url": {
        "section": "a3m",
        "option": "s3_endpoint_url",
        "type": "string",
    },
    "s3_region_name": {"section": "a3m", "option": "s3_region_name", "type": "string"},
    "s3_access_key_id": {
        "section": "a3m",
        "option": "s3_access_key_id",
        "type": "string",
    },
    "s3_secret_access_key": {
        "section": "a3m",
        "option": "s3_secret_access_key",
        "type": "string",
    },
    "s3_use_ssl": {"section": "a3m", "option": "s3_use_ssl", "type": "boolean"},
    "s3_addressing_style": {
        "section": "a3m",
        "option": "s3_addressing_style",
        "type": "string",
    },
    "s3_signature_version": {
        "section": "a3m",
        "option": "s3_signature_version",
        "type": "string",
    },
    "s3_bucket": {"section": "a3m", "option": "s3_bucket", "type": "string"},
    "org_id": {"section": "a3m", "option": "org_id", "type": "string"},
    "org_name": {"section": "a3m", "option": "org_name", "type": "string"},
}


CONFIG_DEFAULTS = """[a3m]

debug = False
batch_size = 128
rpc_threads = 4
prometheus_bind_address =
prometheus_bind_port =
time_zone = UTC
capture_client_script_output = True
removable_files = Thumbs.db, Icon, Icon\r, .DS_Store
secret_key = 12345
rpc_bind_address = 0.0.0.0:7000

db_engine = django.db.backends.sqlite3
db_name =
db_user =
db_password =
db_host =
db_port =

s3_enabled = False
s3_endpoint_url =
s3_region_name =
s3_access_key_id =
s3_secret_access_key =
s3_use_ssl = False
s3_addressing_style = path
s3_signature_version = s3v4
s3_bucket =

shared_directory =
temp_dir =
processing_directory =
rejected_directory =

org_id =
org_name =
"""


def get_data_dir():
    # A3M-TODO: when we run a command with an unknown uid (Compose)
    home_dir = Path.home()
    if str(home_dir) == "/":
        return Path("/home/a3m/.local/share/a3m")

    return Path(user_data_dir("a3m", "Artefactual"))


def _get_data_dir_defaults(config):
    data_dir = get_data_dir()
    config_dict = {"a3m": {}}

    def format_path(subdir):
        return os.path.join(str(data_dir / "share" / subdir), "")

    if not config.get("shared_directory"):
        config_dict["a3m"].update(
            {
                "shared_directory": format_path(""),
                "temp_dir": format_path("tmp"),
                "processing_directory": format_path("currentlyProcessing"),
                "rejected_directory": format_path("rejected"),
            }
        )

    if not config.get("db_name"):
        config_dict["a3m"].update({"db_name": data_dir / "db.sqlite"})

    # Create home directory if we're going to use it.
    if config_dict["a3m"]:
        data_dir.mkdir(parents=True, exist_ok=True)

    return config_dict


config = Config(env_prefix="A3M", attrs=CONFIG_MAPPING)
config.read_defaults(StringIO(CONFIG_DEFAULTS))
config.read_files(["/etc/a3m/a3m.cfg"])
config.read_dict(_get_data_dir_defaults(config))


# Django

DATABASES = {
    "default": {
        "ENGINE": config.get("db_engine"),
        "NAME": config.get("db_name"),
        "USER": config.get("db_user"),
        "PASSWORD": config.get("db_password"),
        "HOST": config.get("db_host"),
        "PORT": config.get("db_port"),
        "CONN_MAX_AGE": 3600,
        "OPTIONS": {"timeout": 5},
    }
}

MIDDLEWARE_CLASSES = ()

TEMPLATES = [{"BACKEND": "django.template.backends.django.DjangoTemplates"}]

INSTALLED_APPS = ("a3m.main",)

SECRET_KEY = config.get("secret_key")

USE_TZ = True
TIME_ZONE = config.get("time_zone")

# Configure logging manually
LOGGING_CONFIG = None

# Location of the logging configuration file that we're going to pass to
# `logging.config.fileConfig` unless it doesn't exist.
LOGGING_CONFIG_FILE = "/etc/a3m/logging.json"

LOGGING: dict[str, Any] = {
    "version": 1,
    "disable_existing_loggers": False,
    "formatters": {
        "detailed": {
            "format": "%(levelname)-8s <%(asctime)s>: %(message)s",
            "datefmt": "%Y-%m-%d %H:%M:%S",
        }
    },
    "handlers": {
        "console": {
            "level": "INFO",
            "class": "logging.StreamHandler",
            "formatter": "detailed",
        }
    },
    "loggers": {"a3m": {"level": "INFO"}, "bagit": {"level": "WARNING"}},
    "root": {"handlers": ["console"], "level": "INFO"},
}

DEBUG = config.get("debug")

if DEBUG:
    LOGGING["formatters"]["detailed"]["format"] = (
        "%(levelname)-8s <%(process)d:%(threadName)s> <%(asctime)s> %(module)s:%(funcName)s:%(lineno)d: %(message)s"
    )
    LOGGING["handlers"]["console"]["level"] = "DEBUG"
    LOGGING["root"]["level"] = "DEBUG"
    LOGGING["loggers"] = {
        "a3m": {"level": "DEBUG"},
        # Use "DEBUG" to log database queries.
        "django.db.backends": {"level": "WARNING"},
    }


if os.path.isfile(LOGGING_CONFIG_FILE):
    with open(LOGGING_CONFIG_FILE) as f:
        logging.config.dictConfig(json.load(f))
else:
    logging.config.dictConfig(LOGGING)


def concurrent_packages_default():
    """Default to 1/2 of CPU count, rounded up."""
    if "sqlite" in DATABASES["default"]["ENGINE"]:
        # A3M-TODO: this needs to be investigated further, but having multiple
        # writer in SQLite seems counterproductive.
        return 1
    cpu_count = multiprocessing.cpu_count()
    return int(math.ceil(cpu_count / 2))


BATCH_SIZE = config.get("batch_size")
CONCURRENT_PACKAGES = config.get(
    "concurrent_packages", default=concurrent_packages_default()
)
RPC_THREADS = config.get("rpc_threads")
WORKER_THREADS = config.get("worker_threads", default=multiprocessing.cpu_count() + 1)
REMOVABLE_FILES = config.get("removable_files")
CAPTURE_CLIENT_SCRIPT_OUTPUT = config.get("capture_client_script_output")
DEFAULT_CHECKSUM_ALGORITHM = "sha256"
RPC_BIND_ADDRESS = config.get("rpc_bind_address")


# Shared directories

SHARED_DIRECTORY = config.get("shared_directory")
TEMP_DIRECTORY = config.get("temp_directory")
REJECTED_DIRECTORY = config.get("rejected_directory")
PROCESSING_DIRECTORY = config.get("processing_directory")


# Prometheus

PROMETHEUS_BIND_ADDRESS = config.get("prometheus_bind_address")
try:
    PROMETHEUS_BIND_PORT = int(config.get("prometheus_bind_port"))
except ValueError:
    PROMETHEUS_ENABLED = False
else:
    PROMETHEUS_ENABLED = True


# S3

S3_ENABLED = config.get("s3_enabled")
S3_ENDPOINT_URL = config.get("s3_endpoint_url")
S3_REGION_NAME = config.get("s3_region_name")
S3_ACCESS_KEY_ID = config.get("s3_access_key_id")
S3_SECRET_ACCESS_KEY = config.get("s3_secret_access_key")
S3_USE_SSL = config.get("s3_use_ssl")
S3_ADDRESSING_STYLE = config.get("s3_addressing_style")
S3_SIGNATURE_VERSION = config.get("s3_signature_version")
S3_BUCKET = config.get("s3_bucket")

# ~ S3 config example ~
# S3_ENDPOINT_URL = "https://play.min.io"
# S3_USE_SSL = True
# S3_ACCESS_KEY_ID = "Q3AM3UQ867SPQQA43P2F"
# S3_SECRET_ACCESS_KEY = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG"
# S3_BUCKET = "a3m"

# A3M-TODO: fix this
INSTANCE_ID = "fec7bcf7-45db-4a22-8ceb-e94377db3476"

ORG_ID = config.get("org_id")
ORG_NAME = config.get("org_name")