# SPDX-License-Identifier: GPL-2.0 # # Copyright (C) 2023-2024 Oracle. All Rights Reserved. # Author: Darrick J. Wong [Unit] Description=Online bcachefsck for %f OnFailure=bcachefsck@%i.service Documentation=man:bcachefs(8) # Explicitly require the capabilities that this program needs ConditionCapability=CAP_SYS_ADMIN ConditionCapability=CAP_FOWNER ConditionCapability=CAP_DAC_OVERRIDE ConditionCapability=CAP_DAC_READ_SEARCH ConditionCapability=CAP_SYS_RAWIO # Must be a mountpoint ConditionPathIsMountPoint=%f RequiresMountsFor=%f [Service] Type=oneshot Environment=SERVICE_MODE=1 ExecStart=bcachefs fsck --real-mountpoint /tmp/scrub/ @bcachefsck_args@ %f SyslogIdentifier=%N # Run scrub with minimal CPU and IO priority so that nothing else will starve. IOSchedulingClass=idle CPUSchedulingPolicy=idle CPUAccounting=true Nice=19 # Create the service underneath the background service slice so that we can # control resource usage. Slice=system-bcachefsck.slice # No realtime CPU scheduling RestrictRealtime=true # Dynamically create a user that isn't root DynamicUser=true # Make the entire filesystem readonly and /home inaccessible, then bind mount # the filesystem we're supposed to be checking into our private /tmp dir. # 'norbind' means that we don't bind anything under that original mount. # This enables checking filesystems mounted under /tmp in the global mount # namespace. ProtectSystem=strict ProtectHome=yes PrivateTmp=true BindPaths=%f:/tmp/scrub:norbind # No network access PrivateNetwork=true ProtectHostname=true RestrictAddressFamilies=none IPAddressDeny=any # Don't let the program mess with the kernel configuration at all ProtectKernelLogs=true ProtectKernelModules=true ProtectKernelTunables=true ProtectControlGroups=true ProtectProc=invisible RestrictNamespaces=true # Hide everything in /proc, even /proc/mounts ProcSubset=pid # Only allow the default personality Linux LockPersonality=true # No writable memory pages MemoryDenyWriteExecute=true # Don't let our mounts leak out to the host PrivateMounts=true # Restrict system calls to the native arch and only enough to get things going SystemCallArchitectures=native SystemCallFilter=@system-service SystemCallFilter=~@privileged SystemCallFilter=~@resources SystemCallFilter=~@mount # bcachefsck needs these privileges to run, and no others CapabilityBoundingSet=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO AmbientCapabilities=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO NoNewPrivileges=true # bcachefsck doesn't create files UMask=7777 # No access to hardware /dev files except for block devices ProtectClock=true DevicePolicy=closed DeviceAllow=block-*