]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
fsck: add systemd service definitions for automatic online service
authorDarrick J. Wong <djwong@kernel.org>
Tue, 5 Dec 2023 02:43:31 +0000 (18:43 -0800)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 7 Dec 2023 16:57:36 +0000 (11:57 -0500)
Add some systemd service files so that bcachefs can automatically fsck
mounted filesystems in the background.  Hopefully with minimal
disruption to frontend operations.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Makefile
debian/bcachefs-tools.postinst
debian/bcachefs-tools.postrm
fsck/bcachefsck@.service.in [new file with mode: 0644]
fsck/bcachefsck_fail [new file with mode: 0755]
fsck/bcachefsck_fail@.service.in [new file with mode: 0644]
fsck/system-bcachefsck.slice [new file with mode: 0644]

index aa7341e02fc19d08a3bf4c8b0a7b794cda96d5dc..fa9f2ff2cf68b08a9cb571b91b9a79a405265361 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -91,9 +91,36 @@ else
        ROOT_SBINDIR?=$(PREFIX)/sbin
        INITRAMFS_DIR=/etc/initramfs-tools
 endif
+LIBDIR=$(PREFIX)/lib
+
+PKGCONFIG_SERVICEDIR:=$(shell $(PKG_CONFIG) --variable=systemdsystemunitdir systemd)
+ifeq (,$(PKGCONFIG_SERVICEDIR))
+       $(warning skipping systemd integration)
+else
+BCACHEFSCK_ARGS=-f -n
+systemd_libfiles=\
+       fsck/bcachefsck_fail
+
+systemd_services=\
+       fsck/bcachefsck_fail@.service \
+       fsck/bcachefsck@.service \
+       fsck/system-bcachefsck.slice
+
+built_scripts+=\
+       fsck/bcachefsck_fail@.service \
+       fsck/bcachefsck@.service
+
+%.service: %.service.in
+       @echo "    [SED]    $@"
+       $(Q)sed -e "s|@libdir@|$(LIBDIR)|g" \
+               -e "s|@bcachefsck_args@|$(BCACHEFSCK_ARGS)|g" < $< > $@
+
+optional_build+=$(systemd_libfiles) $(systemd_services)
+optional_install+=install_systemd
+endif  # PKGCONFIG_SERVICEDIR
 
 .PHONY: all
-all: bcachefs
+all: bcachefs $(optional_build)
 
 .PHONY: debug
 debug: CFLAGS+=-Werror -DCONFIG_BCACHEFS_DEBUG=y -DCONFIG_VALGRIND=y
@@ -157,7 +184,7 @@ cmd_version.o : .version
 .PHONY: install
 install: INITRAMFS_HOOK=$(INITRAMFS_DIR)/hooks/bcachefs
 install: INITRAMFS_SCRIPT=$(INITRAMFS_DIR)/scripts/local-premount/bcachefs
-install: bcachefs
+install: bcachefs $(optional_install)
        $(INSTALL) -m0755 -D bcachefs      -t $(DESTDIR)$(ROOT_SBINDIR)
        $(INSTALL) -m0644 -D bcachefs.8    -t $(DESTDIR)$(PREFIX)/share/man/man8/
        $(INSTALL) -m0755 -D initramfs/script $(DESTDIR)$(INITRAMFS_SCRIPT)
@@ -173,11 +200,17 @@ install: bcachefs
        sed -i '/^# Note: make install replaces/,$$d' $(DESTDIR)$(INITRAMFS_HOOK)
        echo "copy_exec $(ROOT_SBINDIR)/bcachefs /sbin/bcachefs" >> $(DESTDIR)$(INITRAMFS_HOOK)
 
+.PHONY: install_systemd
+install_systemd: $(systemd_services) $(systemd_libfiles)
+       $(INSTALL) -m0755 -D $(systemd_libfiles) -t $(DESTDIR)$(LIBDIR)
+       $(INSTALL) -m0644 -D $(systemd_services) -t $(DESTDIR)$(PKGCONFIG_SERVICEDIR)
+
 .PHONY: clean
 clean:
        @echo "Cleaning all"
        $(Q)$(RM) bcachefs libbcachefs.a tests/test_helper .version *.tar.xz $(OBJS) $(DEPS) $(DOCGENERATED)
        $(Q)$(RM) -rf rust-src/*/target
+       $(Q)$(RM) -f $(built_scripts)
 
 .PHONY: deb
 deb: all
index 483b96190d4b63ab502f31aae72dc2566855e01c..56dd8905da46b7685248a1a2725b32c259ce12a5 100644 (file)
@@ -2,6 +2,8 @@
 
 set -e
 
+#DEBHELPER#
+
 case "$1" in
     configure)
        if which update-initramfs >/dev/null; then
index 6b6fe8acd31a9c1c2894398aff08554ad6264cba..2d913367eb818d9c92a44cead68dc23eb6e9a765 100644 (file)
@@ -2,6 +2,8 @@
 
 set -e
 
+#DEBHELPER#
+
 case "$1" in
     remove)
        if which update-initramfs >/dev/null; then
diff --git a/fsck/bcachefsck@.service.in b/fsck/bcachefsck@.service.in
new file mode 100644 (file)
index 0000000..86c1824
--- /dev/null
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2023-2024 Oracle.  All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+[Unit]
+Description=Online bcachefsck for %f
+OnFailure=bcachefsck@%i.service
+Documentation=man:bcachefs(8)
+
+# Explicitly require the capabilities that this program needs
+ConditionCapability=CAP_SYS_ADMIN
+ConditionCapability=CAP_FOWNER
+ConditionCapability=CAP_DAC_OVERRIDE
+ConditionCapability=CAP_DAC_READ_SEARCH
+ConditionCapability=CAP_SYS_RAWIO
+
+# Must be a mountpoint
+ConditionPathIsMountPoint=%f
+RequiresMountsFor=%f
+
+[Service]
+Type=oneshot
+Environment=SERVICE_MODE=1
+ExecStart=bcachefs fsck --real-mountpoint /tmp/scrub/ @bcachefsck_args@ %f
+SyslogIdentifier=%N
+
+# Run scrub with minimal CPU and IO priority so that nothing else will starve.
+IOSchedulingClass=idle
+CPUSchedulingPolicy=idle
+CPUAccounting=true
+Nice=19
+
+# Create the service underneath the background service slice so that we can
+# control resource usage.
+Slice=system-bcachefsck.slice
+
+# No realtime CPU scheduling
+RestrictRealtime=true
+
+# Dynamically create a user that isn't root
+DynamicUser=true
+
+# Make the entire filesystem readonly and /home inaccessible, then bind mount
+# the filesystem we're supposed to be checking into our private /tmp dir.
+# 'norbind' means that we don't bind anything under that original mount.
+# This enables checking filesystems mounted under /tmp in the global mount
+# namespace.
+ProtectSystem=strict
+ProtectHome=yes
+PrivateTmp=true
+BindPaths=%f:/tmp/scrub:norbind
+
+# No network access
+PrivateNetwork=true
+ProtectHostname=true
+RestrictAddressFamilies=none
+IPAddressDeny=any
+
+# Don't let the program mess with the kernel configuration at all
+ProtectKernelLogs=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectControlGroups=true
+ProtectProc=invisible
+RestrictNamespaces=true
+
+# Hide everything in /proc, even /proc/mounts
+ProcSubset=pid
+
+# Only allow the default personality Linux
+LockPersonality=true
+
+# No writable memory pages
+MemoryDenyWriteExecute=true
+
+# Don't let our mounts leak out to the host
+PrivateMounts=true
+
+# Restrict system calls to the native arch and only enough to get things going
+SystemCallArchitectures=native
+SystemCallFilter=@system-service
+SystemCallFilter=~@privileged
+SystemCallFilter=~@resources
+SystemCallFilter=~@mount
+
+# bcachefsck needs these privileges to run, and no others
+CapabilityBoundingSet=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO
+AmbientCapabilities=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO
+NoNewPrivileges=true
+
+# bcachefsck doesn't create files
+UMask=7777
+
+# No access to hardware /dev files except for block devices
+ProtectClock=true
+DevicePolicy=closed
+DeviceAllow=block-*
diff --git a/fsck/bcachefsck_fail b/fsck/bcachefsck_fail
new file mode 100755 (executable)
index 0000000..283cee7
--- /dev/null
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2023-2024 Oracle.  All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+# Email logs of failed bcachefsck and bcachefsck_all unit runs
+
+recipient="$1"
+test -z "${recipient}" && exit 0
+service="$2"
+test -z "${service}" && exit 0
+mntpoint="$3"
+
+hostname="$(hostname -f 2>/dev/null)"
+test -z "${hostname}" && hostname="${HOSTNAME}"
+
+mailer="$(command -v sendmail)"
+if [ ! -x "${mailer}" ]; then
+       echo "${mailer}: Mailer program not found."
+       exit 1
+fi
+
+fail_mail_mntpoint() {
+       local scrub_svc
+
+       # Turn the mountpoint into a properly escaped systemd instance name
+       scrub_svc="$(systemd-escape --template "${service}@.service" --path "${mntpoint}")"
+       cat << ENDL
+To: ${recipient}
+From: <${service}@${hostname}>
+Subject: ${service} failure on ${mntpoint}
+Content-Transfer-Encoding: 8bit
+Content-Type: text/plain; charset=UTF-8
+
+So sorry, the automatic ${service} of ${mntpoint} on ${hostname} failed.
+Please do not reply to this mesage.
+
+A log of what happened follows:
+ENDL
+       systemctl status --full --lines 4294967295 "${scrub_svc}"
+}
+
+fail_mail() {
+       cat << ENDL
+To: ${recipient}
+From: <${service}@${hostname}>
+Subject: ${service} failure
+
+So sorry, the automatic ${service} on ${hostname} failed.
+
+A log of what happened follows:
+ENDL
+       systemctl status --full --lines 4294967295 "${service}"
+}
+
+if [ -n "${mntpoint}" ]; then
+       fail_mail_mntpoint | "${mailer}" -t -i
+else
+       fail_mail | "${mailer}" -t -i
+fi
+exit "${PIPESTATUS[1]}"
diff --git a/fsck/bcachefsck_fail@.service.in b/fsck/bcachefsck_fail@.service.in
new file mode 100644 (file)
index 0000000..369a809
--- /dev/null
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2023-2024 Oracle.  All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+[Unit]
+Description=Online bcachefsck Failure Reporting for %f
+Documentation=man:bcachefs(8)
+
+[Service]
+Type=oneshot
+Environment=EMAIL_ADDR=root
+ExecStart=@libdir@/bcachefsck_fail "${EMAIL_ADDR}" bcachefs %f
+User=mail
+Group=mail
+SupplementaryGroups=systemd-journal
+
+# Create the service underneath the background service slice so that we can
+# control resource usage.
+Slice=system-bcachefsck.slice
+
+# No realtime scheduling
+RestrictRealtime=true
+
+# Make the entire filesystem readonly and /home inaccessible.
+ProtectSystem=full
+ProtectHome=yes
+PrivateTmp=true
+RestrictSUIDSGID=true
+
+# Emailing reports requires network access, but not the ability to change the
+# hostname.
+ProtectHostname=true
+
+# Don't let the program mess with the kernel configuration at all
+ProtectKernelLogs=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectControlGroups=true
+ProtectProc=invisible
+RestrictNamespaces=true
+
+# Can't hide /proc because journalctl needs it to find various pieces of log
+# information
+#ProcSubset=pid
+
+# Only allow the default personality Linux
+LockPersonality=true
+
+# No writable memory pages
+MemoryDenyWriteExecute=true
+
+# Don't let our mounts leak out to the host
+PrivateMounts=true
+
+# Restrict system calls to the native arch and only enough to get things going
+SystemCallArchitectures=native
+SystemCallFilter=@system-service
+SystemCallFilter=~@privileged
+SystemCallFilter=~@resources
+SystemCallFilter=~@mount
+
+# xfs_scrub needs these privileges to run, and no others
+CapabilityBoundingSet=
+NoNewPrivileges=true
+
+# Failure reporting shouldn't create world-readable files
+UMask=0077
+
+# Clean up any IPC objects when this unit stops
+RemoveIPC=true
+
+# No access to hardware device files
+PrivateDevices=true
+ProtectClock=true
diff --git a/fsck/system-bcachefsck.slice b/fsck/system-bcachefsck.slice
new file mode 100644 (file)
index 0000000..ea36803
--- /dev/null
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2023-2024 Oracle.  All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+[Unit]
+Description=bcachefsck background service slice
+Before=slices.target
+
+[Slice]
+
+# If the CPU usage cgroup controller is available, don't use more than 60% of a
+# single core for all background processes.
+CPUQuota=60%
+CPUAccounting=true
+
+[Install]
+# As of systemd 249, the systemd cgroupv2 configuration code will drop resource
+# controllers from the root and system.slice cgroups at startup if it doesn't
+# find any direct dependencies that require a given controller.  Newly
+# activated units with resource control directives are created under the system
+# slice but do not cause a reconfiguration of the slice's resource controllers.
+# Hence we cannot put CPUQuota= into the bcachefsck service units directly.
+#
+# For the CPUQuota directive to have any effect, we must therefore create an
+# explicit definition file for the slice that systemd creates to contain the
+# bcachefsck instance units (e.g. bcachefsck@.service) and we must configure this
+# slice as a dependency of the system slice to establish the direct dependency
+# relation.
+WantedBy=system.slice