]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Add upstream files
authorJonathan Carter <jcc@debian.org>
Sun, 18 Oct 2020 16:27:02 +0000 (18:27 +0200)
committerJonathan Carter <jcc@debian.org>
Sun, 18 Oct 2020 16:27:02 +0000 (18:27 +0200)
365 files changed:
COPYING [new file with mode: 0644]
INSTALL [new file with mode: 0644]
Makefile [new file with mode: 0644]
README [new file with mode: 0644]
bcachefs.8 [new file with mode: 0644]
bcachefs.c [new file with mode: 0644]
ccan/array_size/LICENSE [new file with mode: 0644]
ccan/array_size/_info [new file with mode: 0644]
ccan/array_size/array_size.h [new file with mode: 0644]
ccan/build_assert/LICENSE [new file with mode: 0644]
ccan/build_assert/_info [new file with mode: 0644]
ccan/build_assert/build_assert.h [new file with mode: 0644]
ccan/compiler/LICENSE [new file with mode: 0644]
ccan/compiler/_info [new file with mode: 0644]
ccan/compiler/compiler.h [new file with mode: 0644]
ccan/darray/LICENSE [new file with mode: 0644]
ccan/darray/_info [new file with mode: 0644]
ccan/darray/darray.h [new file with mode: 0644]
cmd_assemble.c [new file with mode: 0644]
cmd_attr.c [new file with mode: 0644]
cmd_data.c [new file with mode: 0644]
cmd_debug.c [new file with mode: 0644]
cmd_device.c [new file with mode: 0644]
cmd_format.c [new file with mode: 0644]
cmd_fs.c [new file with mode: 0644]
cmd_fsck.c [new file with mode: 0644]
cmd_fusemount.c [new file with mode: 0644]
cmd_key.c [new file with mode: 0644]
cmd_migrate.c [new file with mode: 0644]
cmd_run.c [new file with mode: 0644]
cmd_version.c [new file with mode: 0644]
cmds.h [new file with mode: 0644]
config.h [new file with mode: 0644]
crypto.c [new file with mode: 0644]
crypto.h [new file with mode: 0644]
debian/files [new file with mode: 0644]
default.nix [new file with mode: 0644]
doc/bcachefs.5.txt [new file with mode: 0644]
fsck.bcachefs [new file with mode: 0755]
include/asm/page.h [new file with mode: 0644]
include/asm/unaligned.h [new file with mode: 0644]
include/crypto/algapi.h [new file with mode: 0644]
include/crypto/chacha.h [new file with mode: 0644]
include/crypto/hash.h [new file with mode: 0644]
include/crypto/poly1305.h [new file with mode: 0644]
include/crypto/sha.h [new file with mode: 0644]
include/crypto/skcipher.h [new file with mode: 0644]
include/keys/user-type.h [new file with mode: 0644]
include/linux/atomic.h [new file with mode: 0644]
include/linux/backing-dev.h [new file with mode: 0644]
include/linux/bio.h [new file with mode: 0644]
include/linux/bit_spinlock.h [new file with mode: 0644]
include/linux/bitmap.h [new file with mode: 0644]
include/linux/bitops.h [new file with mode: 0644]
include/linux/blk_types.h [new file with mode: 0644]
include/linux/blkdev.h [new file with mode: 0644]
include/linux/bug.h [new file with mode: 0644]
include/linux/bvec.h [new file with mode: 0644]
include/linux/byteorder.h [new file with mode: 0644]
include/linux/cache.h [new file with mode: 0644]
include/linux/closure.h [new file with mode: 0644]
include/linux/compiler.h [new file with mode: 0644]
include/linux/completion.h [new file with mode: 0644]
include/linux/console.h [new file with mode: 0644]
include/linux/cpumask.h [new file with mode: 0644]
include/linux/crc32c.h [new file with mode: 0644]
include/linux/crc64.h [new file with mode: 0644]
include/linux/crypto.h [new file with mode: 0644]
include/linux/ctype.h [new file with mode: 0644]
include/linux/dcache.h [new file with mode: 0644]
include/linux/debugfs.h [new file with mode: 0644]
include/linux/device.h [new file with mode: 0644]
include/linux/dynamic_fault.h [new file with mode: 0644]
include/linux/err.h [new file with mode: 0644]
include/linux/export.h [new file with mode: 0644]
include/linux/freezer.h [new file with mode: 0644]
include/linux/generic-radix-tree.h [new file with mode: 0644]
include/linux/genhd.h [new file with mode: 0644]
include/linux/gfp.h [new file with mode: 0644]
include/linux/hash.h [new file with mode: 0644]
include/linux/idr.h [new file with mode: 0644]
include/linux/ioprio.h [new file with mode: 0644]
include/linux/jhash.h [new file with mode: 0644]
include/linux/jiffies.h [new file with mode: 0644]
include/linux/kernel.h [new file with mode: 0644]
include/linux/key.h [new file with mode: 0644]
include/linux/kobject.h [new file with mode: 0644]
include/linux/kthread.h [new file with mode: 0644]
include/linux/list.h [new file with mode: 0644]
include/linux/llist.h [new file with mode: 0644]
include/linux/lockdep.h [new file with mode: 0644]
include/linux/log2.h [new file with mode: 0644]
include/linux/lz4.h [new file with mode: 0644]
include/linux/math64.h [new file with mode: 0644]
include/linux/mempool.h [new file with mode: 0644]
include/linux/module.h [new file with mode: 0644]
include/linux/mutex.h [new file with mode: 0644]
include/linux/osq_lock.h [new file with mode: 0644]
include/linux/page.h [new file with mode: 0644]
include/linux/percpu-refcount.h [new file with mode: 0644]
include/linux/percpu-rwsem.h [new file with mode: 0644]
include/linux/percpu.h [new file with mode: 0644]
include/linux/posix_acl.h [new file with mode: 0644]
include/linux/posix_acl_xattr.h [new file with mode: 0644]
include/linux/preempt.h [new file with mode: 0644]
include/linux/prefetch.h [new file with mode: 0644]
include/linux/printk.h [new file with mode: 0644]
include/linux/random.h [new file with mode: 0644]
include/linux/ratelimit.h [new file with mode: 0644]
include/linux/rculist.h [new file with mode: 0644]
include/linux/rcupdate.h [new file with mode: 0644]
include/linux/rhashtable.h [new file with mode: 0644]
include/linux/rwsem.h [new file with mode: 0644]
include/linux/scatterlist.h [new file with mode: 0644]
include/linux/sched.h [new file with mode: 0644]
include/linux/sched/clock.h [new file with mode: 0644]
include/linux/sched/cputime.h [new file with mode: 0644]
include/linux/sched/mm.h [new file with mode: 0644]
include/linux/sched/rt.h [new file with mode: 0644]
include/linux/sched/signal.h [new file with mode: 0644]
include/linux/sched/task.h [new file with mode: 0644]
include/linux/semaphore.h [new file with mode: 0644]
include/linux/seq_file.h [new file with mode: 0644]
include/linux/seqlock.h [new file with mode: 0644]
include/linux/shrinker.h [new file with mode: 0644]
include/linux/six.h [new file with mode: 0644]
include/linux/slab.h [new file with mode: 0644]
include/linux/sort.h [new file with mode: 0644]
include/linux/spinlock.h [new file with mode: 0644]
include/linux/stat.h [new file with mode: 0644]
include/linux/string.h [new file with mode: 0644]
include/linux/sysfs.h [new file with mode: 0644]
include/linux/time64.h [new file with mode: 0644]
include/linux/timer.h [new file with mode: 0644]
include/linux/tracepoint.h [new file with mode: 0644]
include/linux/typecheck.h [new file with mode: 0644]
include/linux/types.h [new file with mode: 0644]
include/linux/unaligned/be_byteshift.h [new file with mode: 0644]
include/linux/unaligned/be_struct.h [new file with mode: 0644]
include/linux/unaligned/generic.h [new file with mode: 0644]
include/linux/unaligned/le_byteshift.h [new file with mode: 0644]
include/linux/unaligned/le_struct.h [new file with mode: 0644]
include/linux/unaligned/packed_struct.h [new file with mode: 0644]
include/linux/uuid.h [new file with mode: 0644]
include/linux/vmalloc.h [new file with mode: 0644]
include/linux/wait.h [new file with mode: 0644]
include/linux/workqueue.h [new file with mode: 0644]
include/linux/xattr.h [new file with mode: 0644]
include/linux/zlib.h [new file with mode: 0644]
include/linux/zstd.h [new file with mode: 0644]
include/trace/define_trace.h [new file with mode: 0644]
include/trace/events/bcachefs.h [new file with mode: 0644]
include/uapi/linux/xattr.h [new file with mode: 0644]
initramfs/hook [new file with mode: 0755]
initramfs/script [new file with mode: 0755]
libbcachefs.c [new file with mode: 0644]
libbcachefs.h [new file with mode: 0644]
libbcachefs/acl.c [new file with mode: 0644]
libbcachefs/acl.h [new file with mode: 0644]
libbcachefs/alloc_background.c [new file with mode: 0644]
libbcachefs/alloc_background.h [new file with mode: 0644]
libbcachefs/alloc_foreground.c [new file with mode: 0644]
libbcachefs/alloc_foreground.h [new file with mode: 0644]
libbcachefs/alloc_types.h [new file with mode: 0644]
libbcachefs/bcachefs.h [new file with mode: 0644]
libbcachefs/bcachefs_format.h [new file with mode: 0644]
libbcachefs/bcachefs_ioctl.h [new file with mode: 0644]
libbcachefs/bkey.c [new file with mode: 0644]
libbcachefs/bkey.h [new file with mode: 0644]
libbcachefs/bkey_methods.c [new file with mode: 0644]
libbcachefs/bkey_methods.h [new file with mode: 0644]
libbcachefs/bkey_on_stack.h [new file with mode: 0644]
libbcachefs/bkey_sort.c [new file with mode: 0644]
libbcachefs/bkey_sort.h [new file with mode: 0644]
libbcachefs/bset.c [new file with mode: 0644]
libbcachefs/bset.h [new file with mode: 0644]
libbcachefs/btree_cache.c [new file with mode: 0644]
libbcachefs/btree_cache.h [new file with mode: 0644]
libbcachefs/btree_gc.c [new file with mode: 0644]
libbcachefs/btree_gc.h [new file with mode: 0644]
libbcachefs/btree_io.c [new file with mode: 0644]
libbcachefs/btree_io.h [new file with mode: 0644]
libbcachefs/btree_iter.c [new file with mode: 0644]
libbcachefs/btree_iter.h [new file with mode: 0644]
libbcachefs/btree_key_cache.c [new file with mode: 0644]
libbcachefs/btree_key_cache.h [new file with mode: 0644]
libbcachefs/btree_locking.h [new file with mode: 0644]
libbcachefs/btree_types.h [new file with mode: 0644]
libbcachefs/btree_update.h [new file with mode: 0644]
libbcachefs/btree_update_interior.c [new file with mode: 0644]
libbcachefs/btree_update_interior.h [new file with mode: 0644]
libbcachefs/btree_update_leaf.c [new file with mode: 0644]
libbcachefs/buckets.c [new file with mode: 0644]
libbcachefs/buckets.h [new file with mode: 0644]
libbcachefs/buckets_types.h [new file with mode: 0644]
libbcachefs/chardev.c [new file with mode: 0644]
libbcachefs/chardev.h [new file with mode: 0644]
libbcachefs/checksum.c [new file with mode: 0644]
libbcachefs/checksum.h [new file with mode: 0644]
libbcachefs/clock.c [new file with mode: 0644]
libbcachefs/clock.h [new file with mode: 0644]
libbcachefs/clock_types.h [new file with mode: 0644]
libbcachefs/compress.c [new file with mode: 0644]
libbcachefs/compress.h [new file with mode: 0644]
libbcachefs/debug.c [new file with mode: 0644]
libbcachefs/debug.h [new file with mode: 0644]
libbcachefs/dirent.c [new file with mode: 0644]
libbcachefs/dirent.h [new file with mode: 0644]
libbcachefs/disk_groups.c [new file with mode: 0644]
libbcachefs/disk_groups.h [new file with mode: 0644]
libbcachefs/ec.c [new file with mode: 0644]
libbcachefs/ec.h [new file with mode: 0644]
libbcachefs/ec_types.h [new file with mode: 0644]
libbcachefs/error.c [new file with mode: 0644]
libbcachefs/error.h [new file with mode: 0644]
libbcachefs/extent_update.c [new file with mode: 0644]
libbcachefs/extent_update.h [new file with mode: 0644]
libbcachefs/extents.c [new file with mode: 0644]
libbcachefs/extents.h [new file with mode: 0644]
libbcachefs/extents_types.h [new file with mode: 0644]
libbcachefs/eytzinger.h [new file with mode: 0644]
libbcachefs/fifo.h [new file with mode: 0644]
libbcachefs/fs-common.c [new file with mode: 0644]
libbcachefs/fs-common.h [new file with mode: 0644]
libbcachefs/fs-io.c [new file with mode: 0644]
libbcachefs/fs-io.h [new file with mode: 0644]
libbcachefs/fs-ioctl.c [new file with mode: 0644]
libbcachefs/fs-ioctl.h [new file with mode: 0644]
libbcachefs/fs.c [new file with mode: 0644]
libbcachefs/fs.h [new file with mode: 0644]
libbcachefs/fsck.c [new file with mode: 0644]
libbcachefs/fsck.h [new file with mode: 0644]
libbcachefs/inode.c [new file with mode: 0644]
libbcachefs/inode.h [new file with mode: 0644]
libbcachefs/io.c [new file with mode: 0644]
libbcachefs/io.h [new file with mode: 0644]
libbcachefs/io_types.h [new file with mode: 0644]
libbcachefs/journal.c [new file with mode: 0644]
libbcachefs/journal.h [new file with mode: 0644]
libbcachefs/journal_io.c [new file with mode: 0644]
libbcachefs/journal_io.h [new file with mode: 0644]
libbcachefs/journal_reclaim.c [new file with mode: 0644]
libbcachefs/journal_reclaim.h [new file with mode: 0644]
libbcachefs/journal_seq_blacklist.c [new file with mode: 0644]
libbcachefs/journal_seq_blacklist.h [new file with mode: 0644]
libbcachefs/journal_types.h [new file with mode: 0644]
libbcachefs/keylist.c [new file with mode: 0644]
libbcachefs/keylist.h [new file with mode: 0644]
libbcachefs/keylist_types.h [new file with mode: 0644]
libbcachefs/migrate.c [new file with mode: 0644]
libbcachefs/migrate.h [new file with mode: 0644]
libbcachefs/move.c [new file with mode: 0644]
libbcachefs/move.h [new file with mode: 0644]
libbcachefs/move_types.h [new file with mode: 0644]
libbcachefs/movinggc.c [new file with mode: 0644]
libbcachefs/movinggc.h [new file with mode: 0644]
libbcachefs/opts.c [new file with mode: 0644]
libbcachefs/opts.h [new file with mode: 0644]
libbcachefs/quota.c [new file with mode: 0644]
libbcachefs/quota.h [new file with mode: 0644]
libbcachefs/quota_types.h [new file with mode: 0644]
libbcachefs/rebalance.c [new file with mode: 0644]
libbcachefs/rebalance.h [new file with mode: 0644]
libbcachefs/rebalance_types.h [new file with mode: 0644]
libbcachefs/recovery.c [new file with mode: 0644]
libbcachefs/recovery.h [new file with mode: 0644]
libbcachefs/reflink.c [new file with mode: 0644]
libbcachefs/reflink.h [new file with mode: 0644]
libbcachefs/replicas.c [new file with mode: 0644]
libbcachefs/replicas.h [new file with mode: 0644]
libbcachefs/replicas_types.h [new file with mode: 0644]
libbcachefs/siphash.c [new file with mode: 0644]
libbcachefs/siphash.h [new file with mode: 0644]
libbcachefs/str_hash.h [new file with mode: 0644]
libbcachefs/super-io.c [new file with mode: 0644]
libbcachefs/super-io.h [new file with mode: 0644]
libbcachefs/super.c [new file with mode: 0644]
libbcachefs/super.h [new file with mode: 0644]
libbcachefs/super_types.h [new file with mode: 0644]
libbcachefs/sysfs.c [new file with mode: 0644]
libbcachefs/sysfs.h [new file with mode: 0644]
libbcachefs/tests.c [new file with mode: 0644]
libbcachefs/tests.h [new file with mode: 0644]
libbcachefs/trace.c [new file with mode: 0644]
libbcachefs/util.c [new file with mode: 0644]
libbcachefs/util.h [new file with mode: 0644]
libbcachefs/vstructs.h [new file with mode: 0644]
libbcachefs/xattr.c [new file with mode: 0644]
libbcachefs/xattr.h [new file with mode: 0644]
linux/atomic64.c [new file with mode: 0644]
linux/bio.c [new file with mode: 0644]
linux/blkdev.c [new file with mode: 0644]
linux/closure.c [new file with mode: 0644]
linux/crc64.c [new file with mode: 0644]
linux/crc64table.h [new file with mode: 0644]
linux/crypto/api.c [new file with mode: 0644]
linux/crypto/chacha20_generic.c [new file with mode: 0644]
linux/crypto/poly1305_generic.c [new file with mode: 0644]
linux/crypto/sha256_generic.c [new file with mode: 0644]
linux/fs.c [new file with mode: 0644]
linux/generic-radix-tree.c [new file with mode: 0644]
linux/kstrtox.c [new file with mode: 0644]
linux/kstrtox.h [new file with mode: 0644]
linux/kthread.c [new file with mode: 0644]
linux/llist.c [new file with mode: 0644]
linux/preempt.c [new file with mode: 0644]
linux/rhashtable.c [new file with mode: 0644]
linux/sched.c [new file with mode: 0644]
linux/semaphore.c [new file with mode: 0644]
linux/shrinker.c [new file with mode: 0644]
linux/six.c [new file with mode: 0644]
linux/string.c [new file with mode: 0644]
linux/timer.c [new file with mode: 0644]
linux/wait.c [new file with mode: 0644]
linux/workqueue.c [new file with mode: 0644]
mkfs.bcachefs [new file with mode: 0755]
mount.bcachefs.sh [new file with mode: 0755]
mount/Cargo.lock [new file with mode: 0644]
mount/Cargo.toml [new file with mode: 0644]
mount/build.rs [new file with mode: 0644]
mount/src/filesystem.rs [new file with mode: 0644]
mount/src/key.rs [new file with mode: 0644]
mount/src/keyutils_wrapper.h [new file with mode: 0644]
mount/src/lib.rs [new file with mode: 0644]
mount/src/libbcachefs_wrapper.h [new file with mode: 0644]
nix/fetchnix.nix [new file with mode: 0644]
nix/nixpkgs.json [new file with mode: 0644]
nix/nixpkgs.nix [new file with mode: 0644]
nix/update-nixpkgs.sh [new file with mode: 0755]
packaging/README [new file with mode: 0644]
packaging/bcachefs-tools.spec [new file with mode: 0644]
packaging/userspace-rcu.spec [new file with mode: 0644]
qcow2.c [new file with mode: 0644]
qcow2.h [new file with mode: 0644]
raid/COPYING [new file with mode: 0644]
raid/check.c [new file with mode: 0644]
raid/combo.h [new file with mode: 0644]
raid/cpu.h [new file with mode: 0644]
raid/gf.h [new file with mode: 0644]
raid/helper.c [new file with mode: 0644]
raid/helper.h [new file with mode: 0644]
raid/int.c [new file with mode: 0644]
raid/internal.h [new file with mode: 0644]
raid/intz.c [new file with mode: 0644]
raid/memory.c [new file with mode: 0644]
raid/memory.h [new file with mode: 0644]
raid/module.c [new file with mode: 0644]
raid/raid.c [new file with mode: 0644]
raid/raid.h [new file with mode: 0644]
raid/tables.c [new file with mode: 0644]
raid/tag.c [new file with mode: 0644]
raid/test.c [new file with mode: 0644]
raid/test.h [new file with mode: 0644]
raid/x86.c [new file with mode: 0644]
raid/x86z.c [new file with mode: 0644]
smoke_test [new file with mode: 0755]
tests/conftest.py [new file with mode: 0644]
tests/test_basic.py [new file with mode: 0644]
tests/test_fixture.py [new file with mode: 0644]
tests/test_fuse.py [new file with mode: 0644]
tests/test_helper.c [new file with mode: 0644]
tests/test_helper_trick.c [new file with mode: 0644]
tests/util.py [new file with mode: 0644]
tools-util.c [new file with mode: 0644]
tools-util.h [new file with mode: 0644]

diff --git a/COPYING b/COPYING
new file mode 100644 (file)
index 0000000..3912109
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,340 @@
+                   GNU GENERAL PUBLIC LICENSE
+                      Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                           Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+\f
+                   GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+\f
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+\f
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+\f
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                           NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                    END OF TERMS AND CONDITIONS
+\f
+           How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/INSTALL b/INSTALL
new file mode 100644 (file)
index 0000000..85c09a2
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,75 @@
+-- Getting started --
+
+Dependencies:
+
+ * libaio
+ * libblkid
+ * libkeyutils
+ * liblz4
+ * libscrypt
+ * libsodium
+ * liburcu
+ * libuuid
+ * libzstd
+ * pkg-config
+ * zlib1g
+ * valgrind
+
+Debian (Bullseye or later) and Ubuntu (20.04 or later): you can install these with
+    apt install -y pkg-config libaio-dev libblkid-dev libkeyutils-dev \
+        liblz4-dev libscrypt-dev libsodium-dev liburcu-dev libzstd-dev \
+        uuid-dev zlib1g-dev valgrind libudev-dev
+
+Fedora: install the "Development tools" group along with:
+    dnf install -y libaio-devel libsodium-devel \
+        libblkid-devel libzstd-devel zlib-devel userspace-rcu-devel \
+        lz4-devel libuuid-devel valgrind-devel keyutils-libs-devel \
+        libscrypt-devel findutils
+
+Arch: install bcachefs-tools-git from the AUR.
+Or to build from source, install libscrypt from the AUR along with,
+    pacman -S base-devel libaio keyutils libsodium liburcu zstd valgrind
+
+Then, just make && make install
+
+
+-- Experimental features --
+
+Experimental fuse support is currently disabled by default. Fuse support is at
+an early stage and may corrupt your filesystem, so it should only be used for
+testing. To enable, you'll also need to add:
+
+* libfuse3 >= 3.7
+
+On Debian/Ubuntu (Bullseye/20.04 or later needed for libfuse >= 3.7):
+    apt install -y libfuse3-dev
+
+On Fedora (32 or later needed for lbifuse >= 3.7):
+    dnf install -y fuse3-devel
+
+Arch:
+    pacman -S fuse3
+
+Then, make using the BCACHEFS_FUSE environment variable (make clean first if
+previously built without fuse support):
+
+BCACHEFS_FUSE=1 make && make install
+
+
+-- Tests --
+
+Some tests are available to validate the "bcachefs" binary.  The tests depend
+on python3 pytest.
+
+On debian:
+    apt install -u python3-pytest
+
+Then, you can run the tests via:
+
+    make check
+    # or if pytest has a different name
+    make check PYTEST=pytest
+
+Optionally, you may wish to run tests in parallel using python3-pytest-xdist:
+
+    cd tests; pytest-3 -n4
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..cc00ac6
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,164 @@
+
+PREFIX?=/usr/local
+PKG_CONFIG?=pkg-config
+INSTALL=install
+PYTEST=pytest-3
+CFLAGS+=-std=gnu89 -O2 -g -MMD -Wall                           \
+       -Wno-pointer-sign                                       \
+       -Wno-zero-length-bounds                                 \
+       -fno-strict-aliasing                                    \
+       -fno-delete-null-pointer-checks                         \
+       -I. -Iinclude -Iraid                                    \
+       -D_FILE_OFFSET_BITS=64                                  \
+       -D_GNU_SOURCE                                           \
+       -D_LGPL_SOURCE                                          \
+       -DRCU_MEMBARRIER                                        \
+       -DZSTD_STATIC_LINKING_ONLY                              \
+       -DFUSE_USE_VERSION=32                                   \
+       -DNO_BCACHEFS_CHARDEV                                   \
+       -DNO_BCACHEFS_FS                                        \
+       -DNO_BCACHEFS_SYSFS                                     \
+       -DVERSION_STRING='"$(VERSION)"'                         \
+       $(EXTRA_CFLAGS)
+LDFLAGS+=$(CFLAGS) $(EXTRA_LDFLAGS)
+
+VERSION?=$(shell git describe --dirty=+ 2>/dev/null || echo v0.1-nogit)
+
+CC_VERSION=$(shell $(CC) -v 2>&1|grep -E '(gcc|clang) version')
+
+ifneq (,$(findstring gcc,$(CC_VERSION)))
+       CFLAGS+=-Wno-unused-but-set-variable
+endif
+
+ifneq (,$(findstring clang,$(CC_VERSION)))
+       CFLAGS+=-Wno-missing-braces
+endif
+
+ifdef D
+       CFLAGS+=-Werror
+       CFLAGS+=-DCONFIG_BCACHEFS_DEBUG=y
+endif
+       CFLAGS+=-DCONFIG_VALGRIND=y
+
+PKGCONFIG_LIBS="blkid uuid liburcu libsodium zlib liblz4 libzstd libudev"
+ifdef BCACHEFS_FUSE
+       PKGCONFIG_LIBS+="fuse3 >= 3.7"
+       CFLAGS+=-DBCACHEFS_FUSE
+endif
+
+PKGCONFIG_CFLAGS:=$(shell $(PKG_CONFIG) --cflags $(PKGCONFIG_LIBS))
+ifeq (,$(PKGCONFIG_CFLAGS))
+    $(error pkg-config error, command: $(PKG_CONFIG) --cflags $(PKGCONFIG_LIBS))
+endif
+PKGCONFIG_LDLIBS:=$(shell $(PKG_CONFIG) --libs   $(PKGCONFIG_LIBS))
+ifeq (,$(PKGCONFIG_LDLIBS))
+    $(error pkg-config error, command: $(PKG_CONFIG) --libs $(PKGCONFIG_LIBS))
+endif
+
+CFLAGS+=$(PKGCONFIG_CFLAGS)
+LDLIBS+=$(PKGCONFIG_LDLIBS)
+
+LDLIBS+=-lm -lpthread -lrt -lscrypt -lkeyutils -laio -ldl
+LDLIBS+=$(EXTRA_LDLIBS)
+
+ifeq ($(PREFIX),/usr)
+       ROOT_SBINDIR=/sbin
+       INITRAMFS_DIR=$(PREFIX)/share/initramfs-tools
+else
+       ROOT_SBINDIR=$(PREFIX)/sbin
+       INITRAMFS_DIR=/etc/initramfs-tools
+endif
+
+.PHONY: all
+all: bcachefs
+
+.PHONY: tests
+tests: tests/test_helper
+
+.PHONY: check
+check: tests bcachefs
+       cd tests; $(PYTEST)
+
+.PHONY: TAGS tags
+TAGS:
+       ctags -e -R .
+
+tags:
+       ctags -R .
+
+SRCS=$(shell find . -type f -iname '*.c')
+DEPS=$(SRCS:.c=.d)
+-include $(DEPS)
+
+OBJS=$(SRCS:.c=.o)
+bcachefs: $(filter-out ./tests/%.o, $(OBJS))
+
+MOUNT_SRCS=$(shell find mount/src -type f -iname '*.rs') \
+    mount/Cargo.toml mount/Cargo.lock mount/build.rs
+libbcachefs_mount.a: $(MOUNT_SRCS)
+       LIBBCACHEFS_INCLUDE=$(CURDIR) cargo build --manifest-path mount/Cargo.toml --release
+       cp mount/target/release/libbcachefs_mount.a $@
+
+MOUNT_OBJ=$(filter-out ./bcachefs.o ./tests/%.o ./cmd_%.o , $(OBJS))
+mount.bcachefs: libbcachefs_mount.a $(MOUNT_OBJ)
+       $(CC) -Wl,--gc-sections libbcachefs_mount.a $(MOUNT_OBJ) -o $@ $(LDLIBS)
+
+tests/test_helper: $(filter ./tests/%.o, $(OBJS))
+
+# If the version string differs from the last build, update the last version
+ifneq ($(VERSION),$(shell cat .version 2>/dev/null))
+.PHONY: .version
+endif
+.version:
+       echo '$(VERSION)' > $@
+
+# Rebuild the 'version' command any time the version string changes
+cmd_version.o : .version
+
+doc/bcachefs.5: doc/bcachefs.5.txt
+       a2x -f manpage doc/bcachefs.5.txt
+
+.PHONY: install
+install: INITRAMFS_HOOK=$(INITRAMFS_DIR)/hooks/bcachefs
+install: INITRAMFS_SCRIPT=$(INITRAMFS_DIR)/scripts/local-premount/bcachefs
+install: bcachefs
+       $(INSTALL) -m0755 -D bcachefs      -t $(DESTDIR)$(ROOT_SBINDIR)
+       $(INSTALL) -m0755    fsck.bcachefs    $(DESTDIR)$(ROOT_SBINDIR)
+       $(INSTALL) -m0755    mkfs.bcachefs    $(DESTDIR)$(ROOT_SBINDIR)
+       $(INSTALL) -m0644 -D bcachefs.8    -t $(DESTDIR)$(PREFIX)/share/man/man8/
+       $(INSTALL) -m0755 -D initramfs/script $(DESTDIR)$(INITRAMFS_SCRIPT)
+       $(INSTALL) -m0755 -D initramfs/hook   $(DESTDIR)$(INITRAMFS_HOOK)
+       $(INSTALL) -m0755 -D mount.bcachefs.sh $(DESTDIR)$(ROOT_SBINDIR)
+       sed -i '/^# Note: make install replaces/,$$d' $(DESTDIR)$(INITRAMFS_HOOK)
+       echo "copy_exec $(ROOT_SBINDIR)/bcachefs /sbin/bcachefs" >> $(DESTDIR)$(INITRAMFS_HOOK)
+
+.PHONY: clean
+clean:
+       $(RM) bcachefs mount.bcachefs libbcachefs_mount.a tests/test_helper .version $(OBJS) $(DEPS)
+       $(RM) -rf mount/target
+
+.PHONY: deb
+deb: all
+# --unsigned-source --unsigned-changes --no-pre-clean --build=binary
+# --diff-ignore --tar-ignore
+       debuild -us -uc -nc -b -i -I
+
+.PHONE: update-bcachefs-sources
+update-bcachefs-sources:
+       git rm -rf --ignore-unmatch libbcachefs
+       test -d libbcachefs || mkdir libbcachefs
+       cp $(LINUX_DIR)/fs/bcachefs/*.[ch] libbcachefs/
+       git add libbcachefs/*.[ch]
+       cp $(LINUX_DIR)/include/trace/events/bcachefs.h include/trace/events/
+       git add include/trace/events/bcachefs.h
+       cp $(LINUX_DIR)/kernel/locking/six.c linux/
+       git add linux/six.c
+       cp $(LINUX_DIR)/include/linux/six.h include/linux/
+       git add include/linux/six.h
+       $(RM) libbcachefs/*.mod.c
+       git -C $(LINUX_DIR) rev-parse HEAD | tee .bcachefs_revision
+       git add .bcachefs_revision
+
+.PHONE: update-commit-bcachefs-sources
+update-commit-bcachefs-sources: update-bcachefs-sources
+       git commit -m "Update bcachefs sources to $(shell git -C $(LINUX_DIR) show --oneline --no-patch)"
diff --git a/README b/README
new file mode 100644 (file)
index 0000000..3d2641e
--- /dev/null
+++ b/README
@@ -0,0 +1,12 @@
+Userspace tools for bcachefs
+
+This builds the bcachefs tool, which has a number of subcommands for formatting
+and managing bcachefs filesystems:
+
+bcachefs format
+bcachefs unlock
+bcachefs assemble
+bcachefs incremental
+etc.
+
+Run bcachefs --help for full list of commands.
diff --git a/bcachefs.8 b/bcachefs.8
new file mode 100644 (file)
index 0000000..61af7f4
--- /dev/null
@@ -0,0 +1,316 @@
+.Dd May 26, 2018
+.Dt BCACHEFS 8 SMM
+.Os
+.Sh NAME
+.Nm bcachefs
+.Nd manage bcachefs filesystems/devices
+.Sh SYNOPSIS
+.Nm
+.Ar command
+.Op Ar options
+.Op Ar arguments
+.Sh DESCRIPTION
+The
+.Nm
+utility supports the following subcommands,
+which are documented in detail below:
+.Ss Superblock commands
+.Bl -tag -width 18n -compact
+.It Ic format
+Format one or a list of devices with bcachefs data structures.
+.It Ic show-super
+Dump superblock information to stdout.
+.El
+.Ss Repair commands
+.Bl -tag -width 18n -compact
+.It Ic fsck
+Check an existing filesystem for errors.
+.El
+.Ss Startup/shutdown, assembly of multi device filesystems
+.Bl -tag -width 18n -compact
+.It Ic assemble
+Assemble an existing multi device filesystem
+.It Ic incremental
+Incrementally assemble an existing multi device filesystem
+.It Ic run
+Start a partially assembled filesystem.
+.It Ic stop
+Stop a running filesystem.
+.El
+.Ss Commands for managing a running filesystem
+.Bl -tag -width 18n -compact
+.It Ic fs usage
+Show disk usage
+.El
+.Ss Commands for managing devices within a running filesystem
+.Bl -tag -width 18n -compact
+.It Ic device add
+Add a new device to an existing filesystem
+.It Ic device remove
+Remove a device from an existing filesystem
+.It Ic device online
+Re-add an existing member to a filesystem
+.It Ic device offline
+Take a device offline, without removing it
+.It Ic device evacuate
+Migrate data off of a specific device
+.It Ic device set-state
+Mark a device as failed
+.It Ic device resize
+Resize filesystem on a device
+.El
+.Ss Commands for managing filesystem data
+.Bl -tag -width 18n -compact
+.It Ic data rereplicate
+Rereplicate degraded data
+.El
+.Ss Commands for encryption
+.Bl -tag -width 18n -compact
+.It Ic unlock
+Unlock an encrypted filesystem prior to running/mounting
+.It Ic set-passphrase
+Change passphrase on an existing (unmounted) filesystem
+.It Ic remove-passphrase
+Remove passphrase on an existing (unmounted) filesystem
+.El
+.Ss Commands for migration
+.Bl -tag -width 18n -compact
+.It Ic migrate
+Migrate an existing filesystem to bcachefs, in place
+.It Ic migrate-superblock
+Add default superblock, after bcachefs migrate
+.El
+.Ss Commands for debugging
+.Bl -tag -width 18n -compact
+.It Ic dump
+Dump filesystem metadata to a qcow2 image
+.It Ic list
+List filesystem metadata in textual form
+.El
+.Ss Miscellaneous commands
+.Bl -tag -width 18n -compact
+.It Ic version
+Display the version of the invoked bcachefs tool
+.El
+.Sh Superblock commands
+.Bl -tag -width Ds
+.It Nm Ic format Oo Ar options Oc Ar devices\ ...
+Format one or a list of devices with bcachefs data structures.
+You need to do this before you create a volume.
+.Pp
+Device specific options must come before corresponding devices, e.g.
+.Dl bcachefs format --group=ssd /dev/sda --group=hdd /dev/sdb
+.Bl -tag -width Ds
+.It Fl b , Fl -block Ns = Ns Ar size
+block size, in bytes (e.g. 4k)
+.It Fl -btree_node Ns = Ns Ar size
+Btree node size, default 256k
+.It Fl -metadata_checksum_type Ns = Ns ( Cm none | crc32c | crc64 )
+Set metadata checksum type (default:
+.Cm crc32c ) .
+.It Fl -data_checksum_type Ns = Ns ( Cm none | crc32c | crc64 )
+Set data checksum type (default:
+.Cm crc32c ) .
+.It Fl -compression_type Ns = Ns ( Cm none | lz4 | gzip )
+Set compression type (default:
+.Cm none ) .
+.It Fl -data_replicas Ns = Ns Ar number
+Number of data replicas
+.It Fl -metadata_replicas Ns = Ns Ar number
+Number of metadata replicas
+.It Fl -replicas Ns = Ns Ar number
+Sets both data and metadata replicas
+.It Fl -encrypted
+Enable whole filesystem encryption (chacha20/poly1305);
+passphrase will be prompted for.
+.It Fl -no_passphrase
+Don't encrypt master encryption key
+.It Fl -error_action Ns = Ns ( Cm continue | remount-ro | panic )
+Action to take on filesystem error (default:
+.Cm remount-ro )
+.It Fl L , Fl -label Ns = Ns Ar label
+Create the filesystem with the specified
+.Ar label
+.It Fl U , -uuid Ns = Ns Ar uuid
+Create the filesystem with the specified
+.Ar uuid
+.It Fl f , Fl -force
+Force the filesystem to be created,
+even if the device already contains a filesystem.
+.El
+.Pp
+Device specific options:
+.Bl -tag -width Ds
+.It Fl -fs_size Ns = Ns Ar size
+Create the filesystem using
+.Ar size
+bytes on the subsequent device.
+.It Fl -bucket Ns = Ns Ar size
+Specifies the bucket size;
+must be greater than the btree node size
+.It Fl -discard
+Enable discards on subsequent devices
+.It Fl q , Fl -quiet
+Only print errors
+.El
+.It Nm Ic show-super Oo Ar options Oc Ar device
+Dump superblock information to stdout.
+.Bl -tag -width Ds
+.It Fl f , Fl -fields Ns = Ns Ar fields
+List of sections to print
+.It Fl l , Fl -layout
+Print superblock layout
+.El
+.El
+.Sh Repair commands
+.Bl -tag -width Ds
+.It Nm Ic fsck Oo Ar options Oc Ar devices\ ...
+Check an existing filesystem for errors.
+.Bl -tag -width Ds
+.It Fl p
+Automatic repair (no questions)
+.It Fl n
+Don't repair, only check for errors
+.It Fl y
+Assume "yes" to all questions
+.It Fl f
+Force checking even if filesystem is marked clean
+.It Fl v
+Be verbose
+.El
+.El
+.Sh Startup/shutdown, assembly of multi device filesystems
+.Bl -tag -width Ds
+.It Nm Ic assemble Ar devices\ ...
+Assemble an existing multi device filesystem.
+.It Nm Ic incremental Ar device
+Incrementally assemble an existing multi device filesystem.
+.It Nm Ic run
+Start a partially assembled filesystem.
+.It Nm Ic stop Ar filesystem
+Stop a running filesystem.
+.El
+.Sh Commands for managing a running filesystem
+.Bl -tag -width Ds
+.It Nm Ic fs Ic usage Oo Ar options Oc Op Ar filesystem
+Show disk usage.
+.Bl -tag -width Ds
+.It Fl h
+Print human readable sizes.
+.El
+.El
+.Sh Commands for managing devices within a running filesystem
+.Bl -tag -width Ds
+.It Nm Ic device Ic add Oo Ar options Oc Ar device
+Add a device to an existing filesystem.
+.Bl -tag -width Ds
+.It Fl -fs_size Ns = Ns Ar size
+Size of filesystem on device
+.It Fl -bucket Ns = Ns Ar size
+Set bucket size
+.It Fl -discard
+Enable discards
+.It Fl f , Fl -force
+Use device even if it appears to already be formatted
+.El
+.It Nm Ic device Ic remove Oo Ar options Oc Ar device
+Remove a device from a filesystem
+.Bl -tag -width Ds
+.It Fl f , Fl -force
+Force removal, even if some data couldn't be migrated
+.It Fl -force-metadata
+Force removal, even if some metadata couldn't be migrated
+.El
+.It Nm Ic device Ic online Ar device
+Re-add a device to a running filesystem
+.It Nm Ic device Ic offline Ar device
+Take a device offline, without removing it
+.Bl -tag -width Ds
+.It Fl f , Fl -force
+Force, if data redundancy will be degraded
+.El
+.It Nm Ic device Ic evacuate Ar device
+Move data off of a given device
+.It Nm Ic device Ic set-state Oo Ar options Oc Ar device Ar new-state
+.Bl -tag -width Ds
+.It Fl f , Fl -force
+Force, if data redundancy will be degraded
+.El
+.It Nm Ic device Ic resize Ar device Op Ar size
+Resize filesystem on a device
+.El
+.Sh Commands for managing filesystem data
+.Bl -tag -width Ds
+.It Nm Ic device Ic rereplicate Ar filesystem
+Walks existing data in a filesystem,
+writing additional copies of any degraded data.
+.El
+.Sh Commands for encryption
+.Bl -tag -width Ds
+.It Nm Ic unlock Ar device
+Unlock an encrypted filesystem prior to running/mounting.
+.It Nm Ic set-passphrase Ar devices\ ...
+Change passphrase on an existing (unmounted) filesystem.
+.It Nm Ic remove-passphrase Ar devices\ ...
+Remove passphrase on an existing (unmounted) filesystem.
+.El
+.Sh Commands for migration
+.Bl -tag -width Ds
+.It Nm Ic migrate Oo Ar options Oc Ar device
+Migrate an existing filesystem to bcachefs
+.Bl -tag -width Ds
+.It Fl f Ar fs
+Root of filesystem to migrate
+.It Fl -encrypted
+Enable whole filesystem encryption (chacha20/poly1305)
+.It Fl -no_passphrase
+Don't encrypt master encryption key
+.It Fl F
+Force, even if metadata file already exists
+.El
+.It Nm Ic migrate-superblock Oo Ar options Oc Ar device
+Create default superblock after migrating
+.Bl -tag -width Ds
+.It Fl d Ar device
+Device to create superblock for
+.It Fl o Ar offset
+Offset of existing superblock
+.El
+.El
+.Sh Commands for debugging
+These commands work on offline, unmounted filesystems.
+.Bl -tag -width Ds
+.It Nm Ic dump Oo Ar options Oc Ar device
+Dump filesystem metadata
+.Bl -tag -width Ds
+.It Fl o Ar output
+Required flag: Output qcow2 image(s)
+.It Fl f
+Force; overwrite when needed
+.El
+.It Nm Ic list Oo Ar options Oc Ar devices\ ...
+List filesystem metadata to stdout
+.Bl -tag -width Ds
+.It Fl b ( Cm extents | inodes | dirents | xattrs )
+Btree to list from
+.It Fl s Ar inode Ns Cm \&: Ns Ar offset
+Start position to list from
+.It Fl e Ar inode Ns Cm \&: Ns Ar offset
+End position
+.It Fl i Ar inode
+List keys for a given inode number
+.It Fl m ( Cm keys | formats )
+.It Fl f
+Force fsck
+.It Fl v
+Verbose mode
+List mode
+.El
+.El
+.Sh Miscellaneous commands
+.Bl -tag -width Ds
+.It Nm Ic version
+Display the version of the invoked bcachefs tool
+.El
+.Sh EXIT STATUS
+.Ex -std
diff --git a/bcachefs.c b/bcachefs.c
new file mode 100644 (file)
index 0000000..b4958f1
--- /dev/null
@@ -0,0 +1,222 @@
+/*
+ * Authors: Kent Overstreet <kent.overstreet@gmail.com>
+ *         Gabriel de Perthuis <g2p.code@gmail.com>
+ *         Jacob Malevich <jam@datera.io>
+ *
+ * GPLv2
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <raid/raid.h>
+
+#include "cmds.h"
+
+static void usage(void)
+{
+       puts("bcachefs - tool for managing bcachefs filesystems\n"
+            "usage: bcachefs <command> [<args>]\n"
+            "\n"
+            "Superblock commands:\n"
+            "  format               Format a new filesystem\n"
+            "  show-super           Dump superblock information to stdout\n"
+            "\n"
+            "Repair:\n"
+            "  fsck                 Check an existing filesystem for errors\n"
+            "\n"
+            "Startup/shutdown, assembly of multi device filesystems:\n"
+#if 0
+            "  assemble             Assemble an existing multi device filesystem\n"
+            "  incremental          Incrementally assemble an existing multi device filesystem\n"
+            "  run                  Start a partially assembled filesystem\n"
+            "  stop                   Stop a running filesystem\n"
+#endif
+            "\n"
+            "Commands for managing a running filesystem:\n"
+            "  fs usage             Show disk usage\n"
+            "\n"
+            "Commands for managing devices within a running filesystem:\n"
+            "  device add           Add a new device to an existing filesystem\n"
+            "  device remove        Remove a device from an existing filesystem\n"
+            "  device online        Re-add an existing member to a filesystem\n"
+            "  device offline       Take a device offline, without removing it\n"
+            "  device evacuate      Migrate data off of a specific device\n"
+            "  device set-state     Mark a device as failed\n"
+            "  device resize        Resize filesystem on a device\n"
+            "\n"
+            "Commands for managing filesystem data:\n"
+            "  data rereplicate     Rereplicate degraded data\n"
+            "\n"
+            "Encryption:\n"
+            "  unlock               Unlock an encrypted filesystem prior to running/mounting\n"
+            "  set-passphrase       Change passphrase on an existing (unmounted) filesystem\n"
+            "  remove-passphrase    Remove passphrase on an existing (unmounted) filesystem\n"
+            "\n"
+            "Migrate:\n"
+            "  migrate              Migrate an existing filesystem to bcachefs, in place\n"
+            "  migrate-superblock   Add default superblock, after bcachefs migrate\n"
+            "\n"
+            "Commands for operating on files in a bcachefs filesystem:\n"
+            "  setattr              Set various per file attributes\n"
+            "Debug:\n"
+            "These commands work on offline, unmounted filesystems\n"
+            "  dump                 Dump filesystem metadata to a qcow2 image\n"
+            "  list                 List filesystem metadata in textual form\n"
+            "  list_journal         List contents of journal\n"
+            "\n"
+            "Miscellaneous:\n"
+            "  version              Display the version of the invoked bcachefs tool\n");
+}
+
+static char *full_cmd;
+
+static char *pop_cmd(int *argc, char *argv[])
+{
+       if (*argc < 2) {
+               printf("%s: missing command\n", argv[0]);
+               usage();
+               exit(EXIT_FAILURE);
+       }
+
+       char *cmd = argv[1];
+       memmove(&argv[1], &argv[2], *argc * sizeof(argv[0]));
+       (*argc)--;
+
+       full_cmd = mprintf("%s %s", full_cmd, cmd);
+       return cmd;
+}
+
+static int fs_cmds(int argc, char *argv[])
+{
+       char *cmd = pop_cmd(&argc, argv);
+
+       if (!strcmp(cmd, "usage"))
+               return cmd_fs_usage(argc, argv);
+
+       usage();
+       return 0;
+}
+
+static int device_cmds(int argc, char *argv[])
+{
+       char *cmd = pop_cmd(&argc, argv);
+
+       if (!strcmp(cmd, "add"))
+               return cmd_device_add(argc, argv);
+       if (!strcmp(cmd, "remove"))
+               return cmd_device_remove(argc, argv);
+       if (!strcmp(cmd, "online"))
+               return cmd_device_online(argc, argv);
+       if (!strcmp(cmd, "offline"))
+               return cmd_device_offline(argc, argv);
+       if (!strcmp(cmd, "evacuate"))
+               return cmd_device_evacuate(argc, argv);
+       if (!strcmp(cmd, "set-state"))
+               return cmd_device_set_state(argc, argv);
+       if (!strcmp(cmd, "resize"))
+               return cmd_device_resize(argc, argv);
+
+       usage();
+       return 0;
+}
+
+static int data_cmds(int argc, char *argv[])
+{
+       char *cmd = pop_cmd(&argc, argv);
+
+       if (!strcmp(cmd, "rereplicate"))
+               return cmd_data_rereplicate(argc, argv);
+
+       usage();
+       return 0;
+}
+
+int main(int argc, char *argv[])
+{
+       raid_init();
+
+       full_cmd = argv[0];
+
+       setvbuf(stdout, NULL, _IOLBF, 0);
+
+       char *cmd = pop_cmd(&argc, argv);
+
+       if (!strcmp(cmd, "version"))
+               return cmd_version(argc, argv);
+       if (!strcmp(cmd, "format"))
+               return cmd_format(argc, argv);
+       if (!strcmp(cmd, "show-super"))
+               return cmd_show_super(argc, argv);
+
+       if (!strcmp(cmd, "fsck"))
+               return cmd_fsck(argc, argv);
+
+#if 0
+       if (!strcmp(cmd, "assemble"))
+               return cmd_assemble(argc, argv);
+       if (!strcmp(cmd, "incremental"))
+               return cmd_incremental(argc, argv);
+       if (!strcmp(cmd, "run"))
+               return cmd_run(argc, argv);
+       if (!strcmp(cmd, "stop"))
+               return cmd_stop(argc, argv);
+#endif
+
+       if (!strcmp(cmd, "fs"))
+               return fs_cmds(argc, argv);
+
+       if (!strcmp(cmd, "device"))
+               return device_cmds(argc, argv);
+
+       if (!strcmp(cmd, "data"))
+               return data_cmds(argc, argv);
+
+       if (!strcmp(cmd, "unlock"))
+               return cmd_unlock(argc, argv);
+       if (!strcmp(cmd, "set-passphrase"))
+               return cmd_set_passphrase(argc, argv);
+       if (!strcmp(cmd, "remove-passphrase"))
+               return cmd_remove_passphrase(argc, argv);
+
+       if (!strcmp(cmd, "migrate"))
+               return cmd_migrate(argc, argv);
+       if (!strcmp(cmd, "migrate-superblock"))
+               return cmd_migrate_superblock(argc, argv);
+
+       if (!strcmp(cmd, "dump"))
+               return cmd_dump(argc, argv);
+       if (!strcmp(cmd, "list"))
+               return cmd_list(argc, argv);
+       if (!strcmp(cmd, "list_journal"))
+               return cmd_list_journal(argc, argv);
+
+       if (!strcmp(cmd, "setattr"))
+               return cmd_setattr(argc, argv);
+
+#ifdef BCACHEFS_FUSE
+       if (!strcmp(cmd, "fusemount"))
+               return cmd_fusemount(argc, argv);
+#endif
+
+       if (!strcmp(cmd, "--help")) {
+               usage();
+               return 0;
+       }
+
+       printf("Unknown command %s\n", cmd);
+       usage();
+       exit(EXIT_FAILURE);
+}
diff --git a/ccan/array_size/LICENSE b/ccan/array_size/LICENSE
new file mode 100644 (file)
index 0000000..feb9b11
--- /dev/null
@@ -0,0 +1,28 @@
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
+
+    the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
+    moral rights retained by the original author(s) and/or performer(s);
+    publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
+    rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
+    rights protecting the extraction, dissemination, use and reuse of data in a Work;
+    database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
+    other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+    No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
+    Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
+    Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
+    Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
diff --git a/ccan/array_size/_info b/ccan/array_size/_info
new file mode 100644 (file)
index 0000000..69570f3
--- /dev/null
@@ -0,0 +1,46 @@
+#include "config.h"
+#include <stdio.h>
+#include <string.h>
+
+/**
+ * array_size - routine for safely deriving the size of a visible array.
+ *
+ * This provides a simple ARRAY_SIZE() macro, which (given a good compiler)
+ * will also break compile if you try to use it on a pointer.
+ *
+ * This can ensure your code is robust to changes, without needing a gratuitous
+ * macro or constant.
+ *
+ * Example:
+ *     // Outputs "Initialized 32 values\n"
+ *     #include <ccan/array_size/array_size.h>
+ *     #include <stdlib.h>
+ *     #include <stdio.h>
+ *
+ *     // We currently use 32 random values.
+ *     static unsigned int vals[32];
+ *
+ *     int main(void)
+ *     {
+ *             unsigned int i;
+ *             for (i = 0; i < ARRAY_SIZE(vals); i++)
+ *                     vals[i] = random();
+ *             printf("Initialized %u values\n", i);
+ *             return 0;
+ *     }
+ *
+ * License: CC0 (Public domain)
+ * Author: Rusty Russell <rusty@rustcorp.com.au>
+ */
+int main(int argc, char *argv[])
+{
+       if (argc != 2)
+               return 1;
+
+       if (strcmp(argv[1], "depends") == 0) {
+               printf("ccan/build_assert\n");
+               return 0;
+       }
+
+       return 1;
+}
diff --git a/ccan/array_size/array_size.h b/ccan/array_size/array_size.h
new file mode 100644 (file)
index 0000000..0ca422a
--- /dev/null
@@ -0,0 +1,26 @@
+/* CC0 (Public domain) - see LICENSE file for details */
+#ifndef CCAN_ARRAY_SIZE_H
+#define CCAN_ARRAY_SIZE_H
+#include "config.h"
+#include <ccan/build_assert/build_assert.h>
+
+/**
+ * ARRAY_SIZE - get the number of elements in a visible array
+ * @arr: the array whose size you want.
+ *
+ * This does not work on pointers, or arrays declared as [], or
+ * function parameters.  With correct compiler support, such usage
+ * will cause a build error (see build_assert).
+ */
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + _array_size_chk(arr))
+
+#if HAVE_BUILTIN_TYPES_COMPATIBLE_P && HAVE_TYPEOF
+/* Two gcc extensions.
+ * &a[0] degrades to a pointer: a different type from an array */
+#define _array_size_chk(arr)                                           \
+       BUILD_ASSERT_OR_ZERO(!__builtin_types_compatible_p(typeof(arr), \
+                                                       typeof(&(arr)[0])))
+#else
+#define _array_size_chk(arr) 0
+#endif
+#endif /* CCAN_ALIGNOF_H */
diff --git a/ccan/build_assert/LICENSE b/ccan/build_assert/LICENSE
new file mode 100644 (file)
index 0000000..feb9b11
--- /dev/null
@@ -0,0 +1,28 @@
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
+
+    the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
+    moral rights retained by the original author(s) and/or performer(s);
+    publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
+    rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
+    rights protecting the extraction, dissemination, use and reuse of data in a Work;
+    database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
+    other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+    No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
+    Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
+    Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
+    Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
diff --git a/ccan/build_assert/_info b/ccan/build_assert/_info
new file mode 100644 (file)
index 0000000..97ebe6c
--- /dev/null
@@ -0,0 +1,49 @@
+#include "config.h"
+#include <stdio.h>
+#include <string.h>
+
+/**
+ * build_assert - routines for build-time assertions
+ *
+ * This code provides routines which will cause compilation to fail should some
+ * assertion be untrue: such failures are preferable to run-time assertions,
+ * but much more limited since they can only depends on compile-time constants.
+ *
+ * These assertions are most useful when two parts of the code must be kept in
+ * sync: it is better to avoid such cases if possible, but seconds best is to
+ * detect invalid changes at build time.
+ *
+ * For example, a tricky piece of code might rely on a certain element being at
+ * the start of the structure.  To ensure that future changes don't break it,
+ * you would catch such changes in your code like so:
+ *
+ * Example:
+ *     #include <stddef.h>
+ *     #include <ccan/build_assert/build_assert.h>
+ *
+ *     struct foo {
+ *             char string[5];
+ *             int x;
+ *     };
+ *
+ *     static char *foo_string(struct foo *foo)
+ *     {
+ *             // This trick requires that the string be first in the structure
+ *             BUILD_ASSERT(offsetof(struct foo, string) == 0);
+ *             return (char *)foo;
+ *     }
+ *
+ * License: CC0 (Public domain)
+ * Author: Rusty Russell <rusty@rustcorp.com.au>
+ */
+int main(int argc, char *argv[])
+{
+       if (argc != 2)
+               return 1;
+
+       if (strcmp(argv[1], "depends") == 0)
+               /* Nothing. */
+               return 0;
+
+       return 1;
+}
diff --git a/ccan/build_assert/build_assert.h b/ccan/build_assert/build_assert.h
new file mode 100644 (file)
index 0000000..b9ecd84
--- /dev/null
@@ -0,0 +1,40 @@
+/* CC0 (Public domain) - see LICENSE file for details */
+#ifndef CCAN_BUILD_ASSERT_H
+#define CCAN_BUILD_ASSERT_H
+
+/**
+ * BUILD_ASSERT - assert a build-time dependency.
+ * @cond: the compile-time condition which must be true.
+ *
+ * Your compile will fail if the condition isn't true, or can't be evaluated
+ * by the compiler.  This can only be used within a function.
+ *
+ * Example:
+ *     #include <stddef.h>
+ *     ...
+ *     static char *foo_to_char(struct foo *foo)
+ *     {
+ *             // This code needs string to be at start of foo.
+ *             BUILD_ASSERT(offsetof(struct foo, string) == 0);
+ *             return (char *)foo;
+ *     }
+ */
+#define BUILD_ASSERT(cond) \
+       do { (void) sizeof(char [1 - 2*!(cond)]); } while(0)
+
+/**
+ * BUILD_ASSERT_OR_ZERO - assert a build-time dependency, as an expression.
+ * @cond: the compile-time condition which must be true.
+ *
+ * Your compile will fail if the condition isn't true, or can't be evaluated
+ * by the compiler.  This can be used in an expression: its value is "0".
+ *
+ * Example:
+ *     #define foo_to_char(foo)                                        \
+ *              ((char *)(foo)                                         \
+ *               + BUILD_ASSERT_OR_ZERO(offsetof(struct foo, string) == 0))
+ */
+#define BUILD_ASSERT_OR_ZERO(cond) \
+       (sizeof(char [1 - 2*!(cond)]) - 1)
+
+#endif /* CCAN_BUILD_ASSERT_H */
diff --git a/ccan/compiler/LICENSE b/ccan/compiler/LICENSE
new file mode 100644 (file)
index 0000000..feb9b11
--- /dev/null
@@ -0,0 +1,28 @@
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
+
+    the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
+    moral rights retained by the original author(s) and/or performer(s);
+    publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
+    rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
+    rights protecting the extraction, dissemination, use and reuse of data in a Work;
+    database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
+    other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+    No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
+    Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
+    Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
+    Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
diff --git a/ccan/compiler/_info b/ccan/compiler/_info
new file mode 100644 (file)
index 0000000..d60dff4
--- /dev/null
@@ -0,0 +1,64 @@
+#include "config.h"
+#include <string.h>
+#include <stdio.h>
+
+/**
+ * compiler - macros for common compiler extensions
+ *
+ * Abstracts away some compiler hints.  Currently these include:
+ * - COLD
+ *     For functions not called in fast paths (aka. cold functions)
+ * - PRINTF_FMT
+ *     For functions which take printf-style parameters.
+ * - CONST_FUNCTION
+ *     For functions which return the same value for same parameters.
+ * - NEEDED
+ *     For functions and variables which must be emitted even if unused.
+ * - UNNEEDED
+ *     For functions and variables which need not be emitted if unused.
+ * - UNUSED
+ *     For parameters which are not used.
+ * - IS_COMPILE_CONSTANT()
+ *     For using different tradeoffs for compiletime vs runtime evaluation.
+ *
+ * License: CC0 (Public domain)
+ * Author: Rusty Russell <rusty@rustcorp.com.au>
+ *
+ * Example:
+ *     #include <ccan/compiler/compiler.h>
+ *     #include <stdio.h>
+ *     #include <stdarg.h>
+ *
+ *     // Example of a (slow-path) logging function.
+ *     static int log_threshold = 2;
+ *     static void COLD PRINTF_FMT(2,3)
+ *             logger(int level, const char *fmt, ...)
+ *     {
+ *             va_list ap;
+ *             va_start(ap, fmt);
+ *             if (level >= log_threshold)
+ *                     vfprintf(stderr, fmt, ap);
+ *             va_end(ap);
+ *     }
+ *
+ *     int main(int argc, char *argv[])
+ *     {
+ *             if (argc != 1) {
+ *                     logger(3, "Don't want %i arguments!\n", argc-1);
+ *                     return 1;
+ *             }
+ *             return 0;
+ *     }
+ */
+int main(int argc, char *argv[])
+{
+       /* Expect exactly one argument */
+       if (argc != 2)
+               return 1;
+
+       if (strcmp(argv[1], "depends") == 0) {
+               return 0;
+       }
+
+       return 1;
+}
diff --git a/ccan/compiler/compiler.h b/ccan/compiler/compiler.h
new file mode 100644 (file)
index 0000000..bce4f25
--- /dev/null
@@ -0,0 +1,231 @@
+/* CC0 (Public domain) - see LICENSE file for details */
+#ifndef CCAN_COMPILER_H
+#define CCAN_COMPILER_H
+#include "config.h"
+
+#ifndef COLD
+#if HAVE_ATTRIBUTE_COLD
+/**
+ * COLD - a function is unlikely to be called.
+ *
+ * Used to mark an unlikely code path and optimize appropriately.
+ * It is usually used on logging or error routines.
+ *
+ * Example:
+ * static void COLD moan(const char *reason)
+ * {
+ *     fprintf(stderr, "Error: %s (%s)\n", reason, strerror(errno));
+ * }
+ */
+#define COLD __attribute__((__cold__))
+#else
+#define COLD
+#endif
+#endif
+
+#ifndef NORETURN
+#if HAVE_ATTRIBUTE_NORETURN
+/**
+ * NORETURN - a function does not return
+ *
+ * Used to mark a function which exits; useful for suppressing warnings.
+ *
+ * Example:
+ * static void NORETURN fail(const char *reason)
+ * {
+ *     fprintf(stderr, "Error: %s (%s)\n", reason, strerror(errno));
+ *     exit(1);
+ * }
+ */
+#define NORETURN __attribute__((__noreturn__))
+#else
+#define NORETURN
+#endif
+#endif
+
+#ifndef PRINTF_FMT
+#if HAVE_ATTRIBUTE_PRINTF
+/**
+ * PRINTF_FMT - a function takes printf-style arguments
+ * @nfmt: the 1-based number of the function's format argument.
+ * @narg: the 1-based number of the function's first variable argument.
+ *
+ * This allows the compiler to check your parameters as it does for printf().
+ *
+ * Example:
+ * void PRINTF_FMT(2,3) my_printf(const char *prefix, const char *fmt, ...);
+ */
+#define PRINTF_FMT(nfmt, narg) \
+       __attribute__((format(__printf__, nfmt, narg)))
+#else
+#define PRINTF_FMT(nfmt, narg)
+#endif
+#endif
+
+#ifndef CONST_FUNCTION
+#if HAVE_ATTRIBUTE_CONST
+/**
+ * CONST_FUNCTION - a function's return depends only on its argument
+ *
+ * This allows the compiler to assume that the function will return the exact
+ * same value for the exact same arguments.  This implies that the function
+ * must not use global variables, or dereference pointer arguments.
+ */
+#define CONST_FUNCTION __attribute__((__const__))
+#else
+#define CONST_FUNCTION
+#endif
+
+#ifndef PURE_FUNCTION
+#if HAVE_ATTRIBUTE_PURE
+/**
+ * PURE_FUNCTION - a function is pure
+ *
+ * A pure function is one that has no side effects other than it's return value
+ * and uses no inputs other than it's arguments and global variables.
+ */
+#define PURE_FUNCTION __attribute__((__pure__))
+#else
+#define PURE_FUNCTION
+#endif
+#endif
+#endif
+
+#if HAVE_ATTRIBUTE_UNUSED
+#ifndef UNNEEDED
+/**
+ * UNNEEDED - a variable/function may not be needed
+ *
+ * This suppresses warnings about unused variables or functions, but tells
+ * the compiler that if it is unused it need not emit it into the source code.
+ *
+ * Example:
+ * // With some preprocessor options, this is unnecessary.
+ * static UNNEEDED int counter;
+ *
+ * // With some preprocessor options, this is unnecessary.
+ * static UNNEEDED void add_to_counter(int add)
+ * {
+ *     counter += add;
+ * }
+ */
+#define UNNEEDED __attribute__((__unused__))
+#endif
+
+#ifndef NEEDED
+#if HAVE_ATTRIBUTE_USED
+/**
+ * NEEDED - a variable/function is needed
+ *
+ * This suppresses warnings about unused variables or functions, but tells
+ * the compiler that it must exist even if it (seems) unused.
+ *
+ * Example:
+ *     // Even if this is unused, these are vital for debugging.
+ *     static NEEDED int counter;
+ *     static NEEDED void dump_counter(void)
+ *     {
+ *             printf("Counter is %i\n", counter);
+ *     }
+ */
+#define NEEDED __attribute__((__used__))
+#else
+/* Before used, unused functions and vars were always emitted. */
+#define NEEDED __attribute__((__unused__))
+#endif
+#endif
+
+#ifndef UNUSED
+/**
+ * UNUSED - a parameter is unused
+ *
+ * Some compilers (eg. gcc with -W or -Wunused) warn about unused
+ * function parameters.  This suppresses such warnings and indicates
+ * to the reader that it's deliberate.
+ *
+ * Example:
+ *     // This is used as a callback, so needs to have this prototype.
+ *     static int some_callback(void *unused UNUSED)
+ *     {
+ *             return 0;
+ *     }
+ */
+#define UNUSED __attribute__((__unused__))
+#endif
+#else
+#ifndef UNNEEDED
+#define UNNEEDED
+#endif
+#ifndef NEEDED
+#define NEEDED
+#endif
+#ifndef UNUSED
+#define UNUSED
+#endif
+#endif
+
+#ifndef IS_COMPILE_CONSTANT
+#if HAVE_BUILTIN_CONSTANT_P
+/**
+ * IS_COMPILE_CONSTANT - does the compiler know the value of this expression?
+ * @expr: the expression to evaluate
+ *
+ * When an expression manipulation is complicated, it is usually better to
+ * implement it in a function.  However, if the expression being manipulated is
+ * known at compile time, it is better to have the compiler see the entire
+ * expression so it can simply substitute the result.
+ *
+ * This can be done using the IS_COMPILE_CONSTANT() macro.
+ *
+ * Example:
+ *     enum greek { ALPHA, BETA, GAMMA, DELTA, EPSILON };
+ *
+ *     // Out-of-line version.
+ *     const char *greek_name(enum greek greek);
+ *
+ *     // Inline version.
+ *     static inline const char *_greek_name(enum greek greek)
+ *     {
+ *             switch (greek) {
+ *             case ALPHA: return "alpha";
+ *             case BETA: return "beta";
+ *             case GAMMA: return "gamma";
+ *             case DELTA: return "delta";
+ *             case EPSILON: return "epsilon";
+ *             default: return "**INVALID**";
+ *             }
+ *     }
+ *
+ *     // Use inline if compiler knows answer.  Otherwise call function
+ *     // to avoid copies of the same code everywhere.
+ *     #define greek_name(g)                                           \
+ *              (IS_COMPILE_CONSTANT(greek) ? _greek_name(g) : greek_name(g))
+ */
+#define IS_COMPILE_CONSTANT(expr) __builtin_constant_p(expr)
+#else
+/* If we don't know, assume it's not. */
+#define IS_COMPILE_CONSTANT(expr) 0
+#endif
+#endif
+
+#ifndef WARN_UNUSED_RESULT
+#if HAVE_WARN_UNUSED_RESULT
+/**
+ * WARN_UNUSED_RESULT - warn if a function return value is unused.
+ *
+ * Used to mark a function where it is extremely unlikely that the caller
+ * can ignore the result, eg realloc().
+ *
+ * Example:
+ * // buf param may be freed by this; need return value!
+ * static char *WARN_UNUSED_RESULT enlarge(char *buf, unsigned *size)
+ * {
+ *     return realloc(buf, (*size) *= 2);
+ * }
+ */
+#define WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
+#else
+#define WARN_UNUSED_RESULT
+#endif
+#endif
+#endif /* CCAN_COMPILER_H */
diff --git a/ccan/darray/LICENSE b/ccan/darray/LICENSE
new file mode 100644 (file)
index 0000000..89de354
--- /dev/null
@@ -0,0 +1,17 @@
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/ccan/darray/_info b/ccan/darray/_info
new file mode 100644 (file)
index 0000000..b6d5e4b
--- /dev/null
@@ -0,0 +1,57 @@
+#include "config.h"
+#include <stdio.h>
+#include <string.h>
+
+#include "ccan/darray/darray.h"
+
+/**
+ * darray - Generic resizable arrays
+ *
+ * darray is a set of macros for managing dynamically-allocated arrays.
+ * It removes the tedium of managing realloc'd arrays with pointer, size, and
+ * allocated size.
+ *
+ * Example:
+ * #include <ccan/darray/darray.h>
+ * #include <stdio.h>
+ * 
+ * int main(void) {
+ *     darray(int) numbers = darray_new();
+ *     char buffer[32];
+ *     
+ *     for (;;) {
+ *             int *i;
+ *             darray_foreach(i, numbers)
+ *                     printf("%d ", *i);
+ *             if (darray_size(numbers) > 0)
+ *                     puts("");
+ *             
+ *             printf("darray> ");
+ *             fgets(buffer, sizeof(buffer), stdin);
+ *             if (*buffer == '\0' || *buffer == '\n')
+ *                     break;
+ *             
+ *             darray_append(numbers, atoi(buffer));
+ *     }
+ *     
+ *     darray_free(numbers);
+ *     
+ *     return 0;
+ * }
+ *
+ * Author: Joey Adams <joeyadams3.14159@gmail.com>
+ * License: MIT
+ * Version: 0.2
+ */
+int main(int argc, char *argv[])
+{
+       if (argc != 2)
+               return 1;
+
+       if (strcmp(argv[1], "depends") == 0) {
+               /* Nothing. */
+               return 0;
+       }
+
+       return 1;
+}
diff --git a/ccan/darray/darray.h b/ccan/darray/darray.h
new file mode 100644 (file)
index 0000000..7511241
--- /dev/null
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) 2011 Joseph Adams <joeyadams3.14159@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef CCAN_DARRAY_H
+#define CCAN_DARRAY_H
+
+#include <stdlib.h>
+#include <string.h>
+#include "config.h"
+
+/*
+ * SYNOPSIS
+ *
+ * Life cycle of a darray (dynamically-allocated array):
+ *
+ *     darray(int) a = darray_new();
+ *     darray_free(a);
+ *
+ *     struct {darray(int) a;} foo;
+ *     darray_init(foo.a);
+ *     darray_free(foo.a);
+ *
+ * Typedefs for darrays of common types:
+ *
+ *     darray_char, darray_schar, darray_uchar
+ *     darray_short, darray_int, darray_long
+ *     darray_ushort, darray_uint, darray_ulong
+ *
+ * Access:
+ *
+ *     T      darray_item(darray(T) arr, size_t index);
+ *     size_t darray_size(darray(T) arr);
+ *     size_t darray_alloc(darray(T) arr);
+ *     bool   darray_empty(darray(T) arr);
+ *
+ * Insertion (single item):
+ *
+ *     void   darray_append(darray(T) arr, T item);
+ *     void   darray_prepend(darray(T) arr, T item);
+ *     void   darray_push(darray(T) arr, T item); // same as darray_append
+ *
+ * Insertion (multiple items):
+ *
+ *     void   darray_append_items(darray(T) arr, T *items, size_t count);
+ *     void   darray_prepend_items(darray(T) arr, T *items, size_t count);
+ *
+ *     void   darray_appends(darray(T) arr, [T item, [...]]);
+ *     void   darray_prepends(darray(T) arr, [T item, [...]]);
+ *
+ *     // Same functionality as above, but does not require typeof.
+ *     void   darray_appends_t(darray(T) arr, #T, [T item, [...]]);
+ *     void   darray_prepends_t(darray(T) arr, #T, [T item, [...]]);
+ *
+ * Removal:
+ *
+ *     T      darray_pop(darray(T) arr | darray_size(arr) != 0);
+ *     T*     darray_pop_check(darray(T*) arr);
+ *     void   darray_remove(darray(T) arr, size_t index);
+ *
+ * Replacement:
+ *
+ *     void   darray_from_items(darray(T) arr, T *items, size_t count);
+ *     void   darray_from_c(darray(T) arr, T c_array[N]);
+ *
+ * String buffer:
+ *
+ *     void   darray_append_string(darray(char) arr, const char *str);
+ *     void   darray_append_lit(darray(char) arr, char stringLiteral[N+1]);
+ *
+ *     void   darray_prepend_string(darray(char) arr, const char *str);
+ *     void   darray_prepend_lit(darray(char) arr, char stringLiteral[N+1]);
+ *
+ *     void   darray_from_string(darray(T) arr, const char *str);
+ *     void   darray_from_lit(darray(char) arr, char stringLiteral[N+1]);
+ *
+ * Size management:
+ *
+ *     void   darray_resize(darray(T) arr, size_t newSize);
+ *     void   darray_resize0(darray(T) arr, size_t newSize);
+ *
+ *     void   darray_realloc(darray(T) arr, size_t newAlloc);
+ *     void   darray_growalloc(darray(T) arr, size_t newAlloc);
+ *
+ *     void   darray_make_room(darray(T) arr, size_t room);
+ *
+ * Traversal:
+ *
+ *     darray_foreach(T *&i, darray(T) arr) {...}
+ *     darray_foreach_reverse(T *&i, darray(T) arr) {...}
+ *
+ * Except for darray_foreach, darray_foreach_reverse, and darray_remove,
+ * all macros evaluate their non-darray arguments only once.
+ */
+
+/*** Life cycle ***/
+
+#define darray(type) struct {type *item; size_t size; size_t alloc;}
+
+#define darray_new() {0,0,0}
+#define darray_init(arr) do {(arr).item=0; (arr).size=0; (arr).alloc=0;} while(0)
+#define darray_free(arr) do {free((arr).item);} while(0)
+
+
+/*
+ * Typedefs for darrays of common types.  These are useful
+ * when you want to pass a pointer to an darray(T) around.
+ *
+ * The following will produce an incompatible pointer warning:
+ *
+ *     void foo(darray(int) *arr);
+ *     darray(int) arr = darray_new();
+ *     foo(&arr);
+ *
+ * The workaround:
+ *
+ *     void foo(darray_int *arr);
+ *     darray_int arr = darray_new();
+ *     foo(&arr);
+ */
+
+typedef darray(char)           darray_char;
+typedef darray(signed char)    darray_schar;
+typedef darray(unsigned char)  darray_uchar;
+
+typedef darray(short)          darray_short;
+typedef darray(int)            darray_int;
+typedef darray(long)           darray_long;
+
+typedef darray(unsigned short) darray_ushort;
+typedef darray(unsigned int)   darray_uint;
+typedef darray(unsigned long)  darray_ulong;
+
+
+/*** Access ***/
+
+#define darray_item(arr, i) ((arr).item[i])
+#define darray_size(arr)    ((arr).size)
+#define darray_alloc(arr)   ((arr).alloc)
+#define darray_empty(arr)   ((arr).size == 0)
+
+
+/*** Insertion (single item) ***/
+
+#define darray_append(arr, ...) do { \
+               darray_resize(arr, (arr).size+1); \
+               (arr).item[(arr).size-1] = (__VA_ARGS__); \
+       } while(0)
+#define darray_prepend(arr, ...) do { \
+               darray_resize(arr, (arr).size+1); \
+               memmove((arr).item+1, (arr).item, ((arr).size-1)*sizeof(*(arr).item)); \
+               (arr).item[0] = (__VA_ARGS__); \
+       } while(0)
+#define darray_push(arr, ...) darray_append(arr, __VA_ARGS__)
+
+
+/*** Insertion (multiple items) ***/
+
+#define darray_append_items(arr, items, count) do { \
+               size_t __count = (count), __oldSize = (arr).size; \
+               darray_resize(arr, __oldSize + __count); \
+               memcpy((arr).item + __oldSize, items, __count * sizeof(*(arr).item)); \
+       } while(0)
+
+#define darray_prepend_items(arr, items, count) do { \
+               size_t __count = (count), __oldSize = (arr).size; \
+               darray_resize(arr, __count + __oldSize); \
+               memmove((arr).item + __count, (arr).item, __oldSize * sizeof(*(arr).item)); \
+               memcpy((arr).item, items, __count * sizeof(*(arr).item)); \
+       } while(0)
+
+#define darray_append_items_nullterminate(arr, items, count) do { \
+               size_t __count = (count), __oldSize = (arr).size; \
+               darray_resize(arr, __oldSize + __count + 1); \
+               memcpy((arr).item + __oldSize, items, __count * sizeof(*(arr).item)); \
+               (arr).item[--(arr).size] = 0; \
+       } while(0)
+
+#define darray_prepend_items_nullterminate(arr, items, count) do { \
+               size_t __count = (count), __oldSize = (arr).size; \
+               darray_resize(arr, __count + __oldSize + 1); \
+               memmove((arr).item + __count, (arr).item, __oldSize * sizeof(*(arr).item)); \
+               memcpy((arr).item, items, __count * sizeof(*(arr).item)); \
+               (arr).item[--(arr).size] = 0; \
+       } while(0)
+
+#if HAVE_TYPEOF
+#define darray_appends(arr, ...) darray_appends_t(arr, typeof((*(arr).item)), __VA_ARGS__)
+#define darray_prepends(arr, ...) darray_prepends_t(arr, typeof((*(arr).item)), __VA_ARGS__)
+#endif
+
+#define darray_appends_t(arr, type, ...) do { \
+               type __src[] = {__VA_ARGS__}; \
+               darray_append_items(arr, __src, sizeof(__src)/sizeof(*__src)); \
+       } while(0)
+#define darray_prepends_t(arr, type, ...) do { \
+               type __src[] = {__VA_ARGS__}; \
+               darray_prepend_items(arr, __src, sizeof(__src)/sizeof(*__src)); \
+       } while(0)
+
+
+/*** Removal ***/
+
+/* Warning: Do not call darray_pop on an empty darray. */
+#define darray_pop(arr) ((arr).item[--(arr).size])
+#define darray_pop_check(arr) ((arr).size ? darray_pop(arr) : NULL)
+/* Warning, slow: Requires copying all elements after removed item. */
+#define darray_remove(arr, index) do { \
+       if (index < arr.size-1)    \
+               memmove(&(arr).item[index], &(arr).item[index+1], ((arr).size-1-i)*sizeof(*(arr).item)); \
+       (arr).size--;  \
+       } while(0)
+
+
+/*** Replacement ***/
+
+#define darray_from_items(arr, items, count) do {size_t __count = (count); darray_resize(arr, __count); memcpy((arr).item, items, __count*sizeof(*(arr).item));} while(0)
+#define darray_from_c(arr, c_array) darray_from_items(arr, c_array, sizeof(c_array)/sizeof(*(c_array)))
+
+
+/*** String buffer ***/
+
+#define darray_append_string(arr, str) do {const char *__str = (str); darray_append_items(arr, __str, strlen(__str)+1); (arr).size--;} while(0)
+#define darray_append_lit(arr, stringLiteral) do {darray_append_items(arr, stringLiteral, sizeof(stringLiteral)); (arr).size--;} while(0)
+
+#define darray_prepend_string(arr, str) do { \
+               const char *__str = (str); \
+               darray_prepend_items_nullterminate(arr, __str, strlen(__str)); \
+       } while(0)
+#define darray_prepend_lit(arr, stringLiteral) \
+       darray_prepend_items_nullterminate(arr, stringLiteral, sizeof(stringLiteral) - 1)
+
+#define darray_from_string(arr, str) do {const char *__str = (str); darray_from_items(arr, __str, strlen(__str)+1); (arr).size--;} while(0)
+#define darray_from_lit(arr, stringLiteral) do {darray_from_items(arr, stringLiteral, sizeof(stringLiteral)); (arr).size--;} while(0)
+
+
+/*** Size management ***/
+
+#define darray_resize(arr, newSize) darray_growalloc(arr, (arr).size = (newSize))
+#define darray_resize0(arr, newSize) do { \
+               size_t __oldSize = (arr).size, __newSize = (newSize); \
+               (arr).size = __newSize; \
+               if (__newSize > __oldSize) { \
+                       darray_growalloc(arr, __newSize); \
+                       memset(&(arr).item[__oldSize], 0, (__newSize - __oldSize) * sizeof(*(arr).item)); \
+               } \
+       } while(0)
+
+#define darray_realloc(arr, newAlloc) do { \
+               (arr).item = realloc((arr).item, ((arr).alloc = (newAlloc)) * sizeof(*(arr).item)); \
+       } while(0)
+#define darray_growalloc(arr, need) do { \
+               size_t __need = (need); \
+               if (__need > (arr).alloc) \
+                       darray_realloc(arr, darray_next_alloc((arr).alloc, __need)); \
+       } while(0)
+
+#if HAVE_STATEMENT_EXPR==1
+#define darray_make_room(arr, room) ({size_t newAlloc = (arr).size+(room); if ((arr).alloc<newAlloc) darray_realloc(arr, newAlloc); (arr).item+(arr).size; })
+#endif
+
+static inline size_t darray_next_alloc(size_t alloc, size_t need)
+{
+       if (alloc == 0)
+               alloc = 1;
+       while (alloc < need)
+               alloc *= 2;
+       return alloc;
+}
+
+
+/*** Traversal ***/
+
+/*
+ * darray_foreach(T *&i, darray(T) arr) {...}
+ *
+ * Traverse a darray.  `i` must be declared in advance as a pointer to an item.
+ */
+#define darray_foreach(i, arr) \
+       for ((i) = &(arr).item[0]; (i) < &(arr).item[(arr).size]; (i)++)
+
+/*
+ * darray_foreach_reverse(T *&i, darray(T) arr) {...}
+ *
+ * Like darray_foreach, but traverse in reverse order.
+ */
+#define darray_foreach_reverse(i, arr) \
+       for ((i) = &(arr).item[(arr).size]; (i)-- > &(arr).item[0]; )
+
+
+#endif /* CCAN_DARRAY_H */
+
+/*
+
+darray_growalloc(arr, newAlloc) sees if the darray can currently hold newAlloc items;
+       if not, it increases the alloc to satisfy this requirement, allocating slack
+       space to avoid having to reallocate for every size increment.
+
+darray_from_string(arr, str) copies a string to an darray_char.
+
+darray_push(arr, item) pushes an item to the end of the darray.
+darray_pop(arr) pops it back out.  Be sure there is at least one item in the darray before calling.
+darray_pop_check(arr) does the same as darray_pop, but returns NULL if there are no more items left in the darray.
+
+darray_make_room(arr, room) ensures there's 'room' elements of space after the end of the darray, and it returns a pointer to this space.
+Currently requires HAVE_STATEMENT_EXPR, but I plan to remove this dependency by creating an inline function.
+
+The following require HAVE_TYPEOF==1 :
+
+darray_appends(arr, item0, item1...) appends a collection of comma-delimited items to the darray.
+darray_prepends(arr, item0, item1...) prepends a collection of comma-delimited items to the darray.\
+
+
+Examples:
+
+       darray(int)  arr;
+       int        *i;
+       
+       darray_appends(arr, 0,1,2,3,4);
+       darray_appends(arr, -5,-4,-3,-2,-1);
+       darray_foreach(i, arr)
+               printf("%d ", *i);
+       printf("\n");
+       
+       darray_free(arr);
+       
+
+       typedef struct {int n,d;} Fraction;
+       darray(Fraction) fractions;
+       Fraction        *i;
+       
+       darray_appends(fractions, {3,4}, {3,5}, {2,1});
+       darray_foreach(i, fractions)
+               printf("%d/%d\n", i->n, i->d);
+       
+       darray_free(fractions);
+*/
diff --git a/cmd_assemble.c b/cmd_assemble.c
new file mode 100644 (file)
index 0000000..a997e1e
--- /dev/null
@@ -0,0 +1,48 @@
+
+#include <alloca.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "cmds.h"
+#include "libbcachefs.h"
+
+#if 0
+int cmd_assemble(int argc, char *argv[])
+{
+       unsigned nr_devs = argc - 1;
+
+       if (argc <= 1)
+               die("Please supply at least one device");
+
+       struct bch_ioctl_assemble *assemble =
+               alloca(sizeof(*assemble) + sizeof(__u64) * nr_devs);
+
+       memset(assemble, 0, sizeof(*assemble));
+       assemble->nr_devs = nr_devs;
+
+       unsigned i;
+       for (i = 0; i < nr_devs; i++)
+               assemble->devs[i] = (unsigned long) argv[i + 1];
+
+       xioctl(bcachectl_open(), BCH_IOCTL_ASSEMBLE, assemble);
+       return 0;
+}
+
+int cmd_incremental(int argc, char *argv[])
+{
+       if (argc != 2)
+               die("Please supply exactly one device");
+
+       struct bch_ioctl_incremental incremental = {
+               .dev = (unsigned long) argv[1],
+       };
+
+       xioctl(bcachectl_open(), BCH_IOCTL_INCREMENTAL, &incremental);
+       return 0;
+}
+#endif
diff --git a/cmd_attr.c b/cmd_attr.c
new file mode 100644 (file)
index 0000000..aef42f4
--- /dev/null
@@ -0,0 +1,113 @@
+#include <dirent.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+
+#include "cmds.h"
+#include "libbcachefs.h"
+
+static void propagate_recurse(int dirfd)
+{
+       DIR *dir = fdopendir(dirfd);
+       struct dirent *d;
+
+       while ((errno = 0), (d = readdir(dir))) {
+               if (!strcmp(d->d_name, ".") ||
+                   !strcmp(d->d_name, ".."))
+                       continue;
+
+               int ret = ioctl(dirfd, BCHFS_IOC_REINHERIT_ATTRS,
+                           d->d_name);
+               if (ret < 0) {
+                       fprintf(stderr, "error propagating attributes to %s: %m\n",
+                               d->d_name);
+                       continue;
+               }
+
+               if (!ret) /* did no work */
+                       continue;
+
+               struct stat st = xfstatat(dirfd, d->d_name,
+                                         AT_SYMLINK_NOFOLLOW);
+               if (!S_ISDIR(st.st_mode))
+                       continue;
+
+               int fd = openat(dirfd, d->d_name, O_RDONLY);
+               if (fd < 0) {
+                       fprintf(stderr, "error opening %s: %m\n", d->d_name);
+                       continue;
+               }
+               propagate_recurse(fd);
+               close(fd);
+       }
+
+       if (errno)
+               die("readdir error: %m");
+}
+
+static void do_setattr(char *path, struct bch_opt_strs opts)
+{
+       unsigned i;
+
+       for (i = 0; i < bch2_opts_nr; i++) {
+               if (!opts.by_id[i])
+                       continue;
+
+               char *n = mprintf("bcachefs.%s", bch2_opt_table[i].attr.name);
+
+               if (setxattr(path, n, opts.by_id[i], strlen(opts.by_id[i]), 0))
+                       die("setxattr error: %m");
+
+               free(n);
+       }
+
+       struct stat st = xstat(path);
+       if (!S_ISDIR(st.st_mode))
+               return;
+
+       int dirfd = open(path, O_RDONLY);
+       if (dirfd < 0)
+               die("error opening %s: %m", path);
+
+       propagate_recurse(dirfd);
+       close(dirfd);
+}
+
+static void setattr_usage(void)
+{
+       puts("bcachefs setattr - set attributes on files in a bcachefs filesystem\n"
+            "Usage: bcachefs setattr [OPTIONS]... <files>\n"
+            "\n"
+            "Options:");
+
+       bch2_opts_usage(OPT_INODE);
+       puts("  -h            Display this help and exit\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+}
+
+int cmd_setattr(int argc, char *argv[])
+{
+       struct bch_opt_strs opts =
+               bch2_cmdline_opts_get(&argc, argv, OPT_INODE);
+       unsigned i;
+
+       for (i = 1; i < argc; i++)
+               if (argv[i][0] == '-') {
+                       printf("invalid option %s\n", argv[i]);
+                       setattr_usage();
+                       exit(EXIT_FAILURE);
+               }
+
+       if (argc <= 1)
+               die("Please supply one or more files");
+
+       for (i = 1; i < argc; i++)
+               do_setattr(argv[i], opts);
+
+       return 0;
+}
diff --git a/cmd_data.c b/cmd_data.c
new file mode 100644 (file)
index 0000000..f495b6c
--- /dev/null
@@ -0,0 +1,48 @@
+
+
+#include <stdio.h>
+#include <sys/ioctl.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+
+#include "cmds.h"
+#include "libbcachefs.h"
+
+static void data_rereplicate_usage(void)
+{
+       puts("bcachefs data rereplicate\n"
+            "Usage: bcachefs data rereplicate filesystem\n"
+            "\n"
+            "Walks existing data in a filesystem, writing additional copies\n"
+            "of any degraded data\n"
+            "\n"
+            "Options:\n"
+            "  -h, --help                  display this help and exit\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+       exit(EXIT_SUCCESS);
+}
+
+int cmd_data_rereplicate(int argc, char *argv[])
+{
+       int opt;
+
+       while ((opt = getopt(argc, argv, "h")) != -1)
+               switch (opt) {
+               case 'h':
+                       data_rereplicate_usage();
+               }
+       args_shift(optind);
+
+       char *fs_path = arg_pop();
+       if (!fs_path)
+               die("Please supply a filesystem");
+
+       if (argc)
+               die("too many arguments");
+
+       return bchu_data(bcache_fs_open(fs_path), (struct bch_ioctl_data) {
+               .op     = BCH_DATA_OP_REREPLICATE,
+               .start  = POS_MIN,
+               .end    = POS_MAX,
+       });
+}
diff --git a/cmd_debug.c b/cmd_debug.c
new file mode 100644 (file)
index 0000000..4616447
--- /dev/null
@@ -0,0 +1,595 @@
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "qcow2.h"
+#include "tools-util.h"
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/bset.h"
+#include "libbcachefs/btree_cache.h"
+#include "libbcachefs/btree_io.h"
+#include "libbcachefs/btree_iter.h"
+#include "libbcachefs/buckets.h"
+#include "libbcachefs/checksum.h"
+#include "libbcachefs/error.h"
+#include "libbcachefs/journal.h"
+#include "libbcachefs/journal_io.h"
+#include "libbcachefs/super.h"
+
+static void dump_usage(void)
+{
+       puts("bcachefs dump - dump filesystem metadata\n"
+            "Usage: bcachefs dump [OPTION]... <devices>\n"
+            "\n"
+            "Options:\n"
+            "  -o output     Output qcow2 image(s)\n"
+            "  -f            Force; overwrite when needed\n"
+            "  -h            Display this help and exit\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+}
+
+static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd)
+{
+       struct bch_sb *sb = ca->disk_sb.sb;
+       ranges data;
+       unsigned i;
+
+       darray_init(data);
+
+       /* Superblock: */
+       range_add(&data, BCH_SB_LAYOUT_SECTOR << 9,
+                 sizeof(struct bch_sb_layout));
+
+       for (i = 0; i < sb->layout.nr_superblocks; i++)
+               range_add(&data,
+                         le64_to_cpu(sb->layout.sb_offset[i]) << 9,
+                         vstruct_bytes(sb));
+
+       /* Journal: */
+       for (i = 0; i < ca->journal.nr; i++)
+               if (ca->journal.bucket_seq[i] >= c->journal.last_seq_ondisk) {
+                       u64 bucket = ca->journal.buckets[i];
+
+                       range_add(&data,
+                                 bucket_bytes(ca) * bucket,
+                                 bucket_bytes(ca));
+               }
+
+       /* Btree: */
+       for (i = 0; i < BTREE_ID_NR; i++) {
+               const struct bch_extent_ptr *ptr;
+               struct bkey_ptrs_c ptrs;
+               struct btree_trans trans;
+               struct btree_iter *iter;
+               struct btree *b;
+
+               bch2_trans_init(&trans, c, 0, 0);
+
+               __for_each_btree_node(&trans, iter, i, POS_MIN, 0, 1, 0, b) {
+                       struct btree_node_iter iter;
+                       struct bkey u;
+                       struct bkey_s_c k;
+
+                       for_each_btree_node_key_unpack(b, k, &iter, &u) {
+                               ptrs = bch2_bkey_ptrs_c(k);
+
+                               bkey_for_each_ptr(ptrs, ptr)
+                                       if (ptr->dev == ca->dev_idx)
+                                               range_add(&data,
+                                                         ptr->offset << 9,
+                                                         btree_bytes(c));
+                       }
+               }
+
+               b = c->btree_roots[i].b;
+               if (!btree_node_fake(b)) {
+                       ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key));
+
+                       bkey_for_each_ptr(ptrs, ptr)
+                               if (ptr->dev == ca->dev_idx)
+                                       range_add(&data,
+                                                 ptr->offset << 9,
+                                                 btree_bytes(c));
+               }
+               bch2_trans_exit(&trans);
+       }
+
+       qcow2_write_image(ca->disk_sb.bdev->bd_fd, fd, &data,
+                         max_t(unsigned, btree_bytes(c) / 8, block_bytes(c)));
+}
+
+int cmd_dump(int argc, char *argv[])
+{
+       struct bch_opts opts = bch2_opts_empty();
+       struct bch_dev *ca;
+       char *out = NULL;
+       unsigned i, nr_devices = 0;
+       bool force = false;
+       int fd, opt;
+
+       opt_set(opts, nochanges,        true);
+       opt_set(opts, norecovery,       true);
+       opt_set(opts, degraded,         true);
+       opt_set(opts, errors,           BCH_ON_ERROR_CONTINUE);
+       opt_set(opts, fix_errors,       FSCK_OPT_YES);
+
+       while ((opt = getopt(argc, argv, "o:fvh")) != -1)
+               switch (opt) {
+               case 'o':
+                       out = optarg;
+                       break;
+               case 'f':
+                       force = true;
+                       break;
+               case 'v':
+                       opt_set(opts, verbose, true);
+                       break;
+               case 'h':
+                       dump_usage();
+                       exit(EXIT_SUCCESS);
+               }
+       args_shift(optind);
+
+       if (!out)
+               die("Please supply output filename");
+
+       if (!argc)
+               die("Please supply device(s) to check");
+
+       struct bch_fs *c = bch2_fs_open(argv, argc, opts);
+       if (IS_ERR(c))
+               die("error opening %s: %s", argv[0], strerror(-PTR_ERR(c)));
+
+       down_read(&c->gc_lock);
+
+       for_each_online_member(ca, c, i)
+               nr_devices++;
+
+       BUG_ON(!nr_devices);
+
+       for_each_online_member(ca, c, i) {
+               int flags = O_WRONLY|O_CREAT|O_TRUNC;
+
+               if (!force)
+                       flags |= O_EXCL;
+
+               if (!c->devs[i])
+                       continue;
+
+               char *path = nr_devices > 1
+                       ? mprintf("%s.%u", out, i)
+                       : strdup(out);
+               fd = xopen(path, flags, 0600);
+               free(path);
+
+               dump_one_device(c, ca, fd);
+               close(fd);
+       }
+
+       up_read(&c->gc_lock);
+
+       bch2_fs_stop(c);
+       return 0;
+}
+
+static void list_keys(struct bch_fs *c, enum btree_id btree_id,
+                     struct bpos start, struct bpos end)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       char buf[512];
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, btree_id, start,
+                          BTREE_ITER_PREFETCH, k, ret) {
+               if (bkey_cmp(k.k->p, end) > 0)
+                       break;
+
+               bch2_bkey_val_to_text(&PBUF(buf), c, k);
+               puts(buf);
+       }
+       bch2_trans_exit(&trans);
+}
+
+static void list_btree_formats(struct bch_fs *c, enum btree_id btree_id,
+                              struct bpos start, struct bpos end)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct btree *b;
+       char buf[4096];
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_node(&trans, iter, btree_id, start, 0, b) {
+               if (bkey_cmp(b->key.k.p, end) > 0)
+                       break;
+
+               bch2_btree_node_to_text(&PBUF(buf), c, b);
+               puts(buf);
+       }
+       bch2_trans_exit(&trans);
+}
+
+static void list_nodes(struct bch_fs *c, enum btree_id btree_id,
+                           struct bpos start, struct bpos end)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct btree *b;
+       char buf[4096];
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_node(&trans, iter, btree_id, start, 0, b) {
+               if (bkey_cmp(b->key.k.p, end) > 0)
+                       break;
+
+               bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(&b->key));
+               fputs(buf, stdout);
+               putchar('\n');
+       }
+       bch2_trans_exit(&trans);
+}
+
+static void print_node_ondisk(struct bch_fs *c, struct btree *b)
+{
+       struct btree_node *n_ondisk;
+       struct extent_ptr_decoded pick;
+       struct bch_dev *ca;
+       struct bio *bio;
+       unsigned offset = 0;
+
+       if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick) <= 0) {
+               printf("error getting device to read from\n");
+               return;
+       }
+
+       ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+       if (!bch2_dev_get_ioref(ca, READ)) {
+               printf("error getting device to read from\n");
+               return;
+       }
+
+       n_ondisk = malloc(btree_bytes(c));
+
+       bio = bio_alloc_bioset(GFP_NOIO,
+                       buf_pages(n_ondisk, btree_bytes(c)),
+                       &c->btree_bio);
+       bio_set_dev(bio, ca->disk_sb.bdev);
+       bio->bi_opf             = REQ_OP_READ|REQ_META;
+       bio->bi_iter.bi_sector  = pick.ptr.offset;
+       bch2_bio_map(bio, n_ondisk, btree_bytes(c));
+
+       submit_bio_wait(bio);
+
+       bio_put(bio);
+       percpu_ref_put(&ca->io_ref);
+
+       while (offset < c->opts.btree_node_size) {
+               struct bset *i;
+               struct nonce nonce;
+               struct bch_csum csum;
+               struct bkey_packed *k;
+               unsigned sectors;
+
+               if (!offset) {
+                       i = &n_ondisk->keys;
+
+                       if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)))
+                               die("unknown checksum type");
+
+                       nonce = btree_nonce(i, offset << 9);
+                       csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, n_ondisk);
+
+                       if (bch2_crc_cmp(csum, n_ondisk->csum))
+                               die("invalid checksum\n");
+
+                       bset_encrypt(c, i, offset << 9);
+
+                       sectors = vstruct_sectors(n_ondisk, c->block_bits);
+               } else {
+                       struct btree_node_entry *bne = (void *) n_ondisk + (offset << 9);
+
+                       i = &bne->keys;
+
+                       if (i->seq != n_ondisk->keys.seq)
+                               break;
+
+                       if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)))
+                               die("unknown checksum type");
+
+                       nonce = btree_nonce(i, offset << 9);
+                       csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+
+                       if (bch2_crc_cmp(csum, bne->csum))
+                               die("invalid checksum");
+
+                       bset_encrypt(c, i, offset << 9);
+
+                       sectors = vstruct_sectors(bne, c->block_bits);
+               }
+
+               fprintf(stdout, "  offset %u journal seq %llu\n",
+                       offset, le64_to_cpu(i->journal_seq));
+               offset += sectors;
+
+               for (k = i->start;
+                    k != vstruct_last(i);
+                    k = bkey_next_skip_noops(k, vstruct_last(i))) {
+                       struct bkey u;
+                       char buf[4096];
+
+                       bch2_bkey_val_to_text(&PBUF(buf), c, bkey_disassemble(b, k, &u));
+                       fprintf(stdout, "    %s\n", buf);
+               }
+       }
+
+       free(n_ondisk);
+}
+
+static void list_nodes_ondisk(struct bch_fs *c, enum btree_id btree_id,
+                             struct bpos start, struct bpos end)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct btree *b;
+       char buf[4096];
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_node(&trans, iter, btree_id, start, 0, b) {
+               if (bkey_cmp(b->key.k.p, end) > 0)
+                       break;
+
+               bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(&b->key));
+               fputs(buf, stdout);
+               putchar('\n');
+
+               print_node_ondisk(c, b);
+       }
+       bch2_trans_exit(&trans);
+}
+
+static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id,
+                           struct bpos start, struct bpos end)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct btree_node_iter node_iter;
+       struct bkey unpacked;
+       struct bkey_s_c k;
+       struct btree *b;
+       char buf[4096];
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_node(&trans, iter, btree_id, start, 0, b) {
+               if (bkey_cmp(b->key.k.p, end) > 0)
+                       break;
+
+               bch2_btree_node_to_text(&PBUF(buf), c, b);
+               fputs(buf, stdout);
+
+               for_each_btree_node_key_unpack(b, k, &node_iter, &unpacked) {
+                       bch2_bkey_val_to_text(&PBUF(buf), c, k);
+                       putchar('\t');
+                       puts(buf);
+               }
+       }
+       bch2_trans_exit(&trans);
+}
+
+static struct bpos parse_pos(char *buf)
+{
+       char *s = buf, *field;
+       u64 inode_v = 0, offset_v = 0;
+
+       if (!(field = strsep(&s, ":")) ||
+           kstrtoull(field, 10, &inode_v))
+               die("invalid bpos %s", buf);
+
+       if ((field = strsep(&s, ":")) &&
+           kstrtoull(field, 10, &offset_v))
+               die("invalid bpos %s", buf);
+
+       if (s)
+               die("invalid bpos %s", buf);
+
+       return (struct bpos) { .inode = inode_v, .offset = offset_v };
+}
+
+static void list_keys_usage(void)
+{
+       puts("bcachefs list - list filesystem metadata to stdout\n"
+            "Usage: bcachefs list [OPTION]... <devices>\n"
+            "\n"
+            "Options:\n"
+            "  -b (extents|inodes|dirents|xattrs)    Btree to list from\n"
+            "  -s inode:offset                       Start position to list from\n"
+            "  -e inode:offset                       End position\n"
+            "  -i inode                              List keys for a given inode number\n"
+            "  -m (keys|formats)                     List mode\n"
+            "  -f                                    Check (fsck) the filesystem first\n"
+            "  -v                                    Verbose mode\n"
+            "  -h                                    Display this help and exit\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+}
+
+static const char * const list_modes[] = {
+       "keys",
+       "formats",
+       "nodes",
+       "nodes_ondisk",
+       "nodes_keys",
+       NULL
+};
+
+int cmd_list(int argc, char *argv[])
+{
+       struct bch_opts opts = bch2_opts_empty();
+       enum btree_id btree_id_start    = 0;
+       enum btree_id btree_id_end      = BTREE_ID_NR;
+       enum btree_id btree_id;
+       struct bpos start = POS_MIN, end = POS_MAX;
+       u64 inum;
+       int mode = 0, opt;
+
+       opt_set(opts, nochanges,        true);
+       opt_set(opts, norecovery,       true);
+       opt_set(opts, degraded,         true);
+       opt_set(opts, errors,           BCH_ON_ERROR_CONTINUE);
+
+       while ((opt = getopt(argc, argv, "b:s:e:i:m:fvh")) != -1)
+               switch (opt) {
+               case 'b':
+                       btree_id_start = read_string_list_or_die(optarg,
+                                               bch2_btree_ids, "btree id");
+                       btree_id_end = btree_id_start + 1;
+                       break;
+               case 's':
+                       start   = parse_pos(optarg);
+                       break;
+               case 'e':
+                       end     = parse_pos(optarg);
+                       break;
+               case 'i':
+                       if (kstrtoull(optarg, 10, &inum))
+                               die("invalid inode %s", optarg);
+                       start   = POS(inum, 0);
+                       end     = POS(inum + 1, 0);
+                       break;
+               case 'm':
+                       mode = read_string_list_or_die(optarg,
+                                               list_modes, "list mode");
+                       break;
+               case 'f':
+                       opt_set(opts, fix_errors, FSCK_OPT_YES);
+                       opt_set(opts, norecovery, false);
+                       break;
+               case 'v':
+                       opt_set(opts, verbose, true);
+                       break;
+               case 'h':
+                       list_keys_usage();
+                       exit(EXIT_SUCCESS);
+               }
+       args_shift(optind);
+
+       if (!argc)
+               die("Please supply device(s)");
+
+       struct bch_fs *c = bch2_fs_open(argv, argc, opts);
+       if (IS_ERR(c))
+               die("error opening %s: %s", argv[0], strerror(-PTR_ERR(c)));
+
+
+       for (btree_id = btree_id_start;
+            btree_id < btree_id_end;
+            btree_id++) {
+               switch (mode) {
+               case 0:
+                       list_keys(c, btree_id, start, end);
+                       break;
+               case 1:
+                       list_btree_formats(c, btree_id, start, end);
+                       break;
+               case 2:
+                       list_nodes(c, btree_id, start, end);
+                       break;
+               case 3:
+                       list_nodes_ondisk(c, btree_id, start, end);
+                       break;
+               case 4:
+                       list_nodes_keys(c, btree_id, start, end);
+                       break;
+               default:
+                       die("Invalid mode");
+               }
+       }
+
+       bch2_fs_stop(c);
+       return 0;
+}
+
+static void list_journal_usage(void)
+{
+       puts("bcachefs list_journal - print contents of journal\n"
+            "Usage: bcachefs list_journal [OPTION]... <devices>\n"
+            "\n"
+            "Options:\n"
+            "  -a            Read entire journal, not just dirty entries\n"
+            "  -h            Display this help and exit\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+}
+
+int cmd_list_journal(int argc, char *argv[])
+{
+       struct bch_opts opts = bch2_opts_empty();
+       int opt;
+
+       opt_set(opts, nochanges,        true);
+       opt_set(opts, norecovery,       true);
+       opt_set(opts, degraded,         true);
+       opt_set(opts, errors,           BCH_ON_ERROR_CONTINUE);
+       opt_set(opts, fix_errors,       FSCK_OPT_YES);
+       opt_set(opts, keep_journal,     true);
+
+       while ((opt = getopt(argc, argv, "ah")) != -1)
+               switch (opt) {
+               case 'a':
+                       opt_set(opts, read_entire_journal, true);
+                       break;
+               case 'h':
+                       list_journal_usage();
+                       exit(EXIT_SUCCESS);
+               }
+       args_shift(optind);
+
+       if (!argc)
+               die("Please supply device(s) to open");
+
+       struct bch_fs *c = bch2_fs_open(argv, argc, opts);
+       if (IS_ERR(c))
+               die("error opening %s: %s", argv[0], strerror(-PTR_ERR(c)));
+
+       struct journal_replay *p;
+       struct jset_entry *entry;
+       struct bkey_i *k, *_n;
+
+       /* This could be greatly expanded: */
+
+       list_for_each_entry(p, &c->journal_entries, list) {
+               printf("journal entry   %8llu\n"
+                      "    version     %8u\n"
+                      "    last seq    %8llu\n"
+                      "    read clock  %8u\n"
+                      "    write clock %8u\n"
+                      ,
+                      le64_to_cpu(p->j.seq),
+                      le32_to_cpu(p->j.seq),
+                      le64_to_cpu(p->j.last_seq),
+                      le16_to_cpu(p->j.read_clock),
+                      le16_to_cpu(p->j.write_clock));
+
+               for_each_jset_key(k, _n, entry, &p->j) {
+                       char buf[200];
+
+                       bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k));
+                       printf("btree %s l %u: %s\n",
+                              bch2_btree_ids[entry->btree_id],
+                              entry->level,
+                              buf);
+               }
+       }
+
+       bch2_fs_stop(c);
+       return 0;
+}
diff --git a/cmd_device.c b/cmd_device.c
new file mode 100644 (file)
index 0000000..c311324
--- /dev/null
@@ -0,0 +1,498 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "libbcachefs/super-io.h"
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "libbcachefs/opts.h"
+#include "tools-util.h"
+
+static void device_add_usage(void)
+{
+       puts("bcachefs device add - add a device to an existing filesystem\n"
+            "Usage: bcachefs device add [OPTION]... filesystem device\n"
+            "\n"
+            "Options:\n"
+            "  -S, --fs_size=size          Size of filesystem on device\n"
+            "  -B, --bucket=size           Bucket size\n"
+            "  -D, --discard               Enable discards\n"
+            "  -g, --group=group           Disk group\n"
+            "  -f, --force                 Use device even if it appears to already be formatted\n"
+            "  -h, --help                  Display this help and exit\n"
+            "\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+}
+
+int cmd_device_add(int argc, char *argv[])
+{
+       static const struct option longopts[] = {
+               { "fs_size",            required_argument,      NULL, 'S' },
+               { "bucket",             required_argument,      NULL, 'B' },
+               { "discard",            no_argument,            NULL, 'D' },
+               { "group",              required_argument,      NULL, 'g' },
+               { "force",              no_argument,            NULL, 'f' },
+               { "help",               no_argument,            NULL, 'h' },
+               { NULL }
+       };
+       struct format_opts format_opts  = format_opts_default();
+       struct dev_opts dev_opts        = dev_opts_default();
+       bool force = false;
+       int opt;
+
+       while ((opt = getopt_long(argc, argv, "t:fh",
+                                 longopts, NULL)) != -1)
+               switch (opt) {
+               case 'S':
+                       if (bch2_strtoull_h(optarg, &dev_opts.size))
+                               die("invalid filesystem size");
+
+                       dev_opts.size >>= 9;
+                       break;
+               case 'B':
+                       dev_opts.bucket_size =
+                               hatoi_validate(optarg, "bucket size");
+                       break;
+               case 'D':
+                       dev_opts.discard = true;
+                       break;
+               case 'g':
+                       dev_opts.group = strdup(optarg);
+                       break;
+               case 'f':
+                       force = true;
+                       break;
+               case 'h':
+                       device_add_usage();
+                       exit(EXIT_SUCCESS);
+               }
+       args_shift(optind);
+
+       char *fs_path = arg_pop();
+       if (!fs_path)
+               die("Please supply a filesystem");
+
+       char *dev_path = arg_pop();
+       if (!dev_path)
+               die("Please supply a device");
+
+       if (argc)
+               die("too many arguments");
+
+       struct bchfs_handle fs = bcache_fs_open(fs_path);
+
+       dev_opts.path = dev_path;
+       dev_opts.fd = open_for_format(dev_opts.path, force);
+
+       struct bch_opt_strs fs_opt_strs;
+       memset(&fs_opt_strs, 0, sizeof(fs_opt_strs));
+
+       struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
+
+       opt_set(fs_opts, block_size,
+               read_file_u64(fs.sysfs_fd, "block_size") >> 9);
+       opt_set(fs_opts, btree_node_size,
+               read_file_u64(fs.sysfs_fd, "btree_node_size") >> 9);
+
+       struct bch_sb *sb = bch2_format(fs_opt_strs,
+                                       fs_opts,
+                                       format_opts,
+                                       &dev_opts, 1);
+       free(sb);
+       fsync(dev_opts.fd);
+       close(dev_opts.fd);
+
+       bchu_disk_add(fs, dev_opts.path);
+       return 0;
+}
+
+static void device_remove_usage(void)
+{
+       puts("bcachefs device_remove - remove a device from a filesystem\n"
+            "Usage:\n"
+            "  bcachefs device remove device\n"
+            "  bcachefs device remove --by-id path devid\n"
+            "\n"
+            "Options:\n"
+            "  -i, --by-id                 Remove device by device id\n"
+            "  -f, --force                 Force removal, even if some data\n"
+            "                              couldn't be migrated\n"
+            "  -F, --force-metadata        Force removal, even if some metadata\n"
+            "                              couldn't be migrated\n"
+            "  -h, --help                  display this help and exit\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+       exit(EXIT_SUCCESS);
+}
+
+int cmd_device_remove(int argc, char *argv[])
+{
+       static const struct option longopts[] = {
+               { "by-id",              0, NULL, 'i' },
+               { "force",              0, NULL, 'f' },
+               { "force-metadata",     0, NULL, 'F' },
+               { "help",               0, NULL, 'h' },
+               { NULL }
+       };
+       struct bchfs_handle fs;
+       bool by_id = false;
+       int opt, flags = BCH_FORCE_IF_DEGRADED;
+       unsigned dev_idx;
+
+       while ((opt = getopt_long(argc, argv, "fh", longopts, NULL)) != -1)
+               switch (opt) {
+               case 'i':
+                       by_id = true;
+                       break;
+               case 'f':
+                       flags |= BCH_FORCE_IF_DATA_LOST;
+                       break;
+               case 'F':
+                       flags |= BCH_FORCE_IF_METADATA_LOST;
+                       break;
+               case 'h':
+                       device_remove_usage();
+               }
+       args_shift(optind);
+
+       if (by_id) {
+               char *path = arg_pop();
+               if (!path)
+                       die("Please supply filesystem to remove device from");
+
+               dev_idx = (intptr_t) arg_pop();
+               if (!dev_idx)
+                       die("Please supply device id");
+
+               fs = bcache_fs_open(path);
+       } else {
+               char *dev = arg_pop();
+               if (!dev)
+                       die("Please supply a device to remove");
+
+               fs = bchu_fs_open_by_dev(dev, &dev_idx);
+       }
+
+       if (argc)
+               die("too many arguments");
+
+       bchu_disk_remove(fs, dev_idx, flags);
+       return 0;
+}
+
+static void device_online_usage(void)
+{
+       puts("bcachefs device online - readd a device to a running filesystem\n"
+            "Usage: bcachefs device online [OPTION]... device\n"
+            "\n"
+            "Options:\n"
+            "  -h, --help                  Display this help and exit\n"
+            "\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+}
+
+int cmd_device_online(int argc, char *argv[])
+{
+       int opt;
+
+       while ((opt = getopt(argc, argv, "h")) != -1)
+               switch (opt) {
+               case 'h':
+                       device_online_usage();
+                       exit(EXIT_SUCCESS);
+               }
+       args_shift(optind);
+
+       char *dev = arg_pop();
+       if (!dev)
+               die("Please supply a device");
+
+       if (argc)
+               die("too many arguments");
+
+       unsigned dev_idx;
+       struct bchfs_handle fs = bchu_fs_open_by_dev(dev, &dev_idx);
+       bchu_disk_online(fs, dev);
+       return 0;
+}
+
+static void device_offline_usage(void)
+{
+       puts("bcachefs device offline - take a device offline, without removing it\n"
+            "Usage: bcachefs device offline [OPTION]... device\n"
+            "\n"
+            "Options:\n"
+            "  -f, --force                 Force, if data redundancy will be degraded\n"
+            "  -h, --help                  Display this help and exit\n"
+            "\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+}
+
+int cmd_device_offline(int argc, char *argv[])
+{
+       static const struct option longopts[] = {
+               { "force",              0, NULL, 'f' },
+               { NULL }
+       };
+       int opt, flags = 0;
+
+       while ((opt = getopt_long(argc, argv, "fh",
+                                 longopts, NULL)) != -1)
+               switch (opt) {
+               case 'f':
+                       flags |= BCH_FORCE_IF_DEGRADED;
+                       break;
+               case 'h':
+                       device_offline_usage();
+                       exit(EXIT_SUCCESS);
+               }
+       args_shift(optind);
+
+       char *dev = arg_pop();
+       if (!dev)
+               die("Please supply a device");
+
+       if (argc)
+               die("too many arguments");
+
+       unsigned dev_idx;
+       struct bchfs_handle fs = bchu_fs_open_by_dev(dev, &dev_idx);
+       bchu_disk_offline(fs, dev_idx, flags);
+       return 0;
+}
+
+static void device_evacuate_usage(void)
+{
+       puts("bcachefs device evacuate - move data off of a given device\n"
+            "Usage: bcachefs device evacuate [OPTION]... device\n"
+            "\n"
+            "Options:\n"
+            "  -h, --help                  Display this help and exit\n"
+            "\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+}
+
+int cmd_device_evacuate(int argc, char *argv[])
+{
+       int opt;
+
+       while ((opt = getopt(argc, argv, "h")) != -1)
+               switch (opt) {
+               case 'h':
+                       device_evacuate_usage();
+                       exit(EXIT_SUCCESS);
+               }
+       args_shift(optind);
+
+       char *dev_path = arg_pop();
+       if (!dev_path)
+               die("Please supply a device");
+
+       if (argc)
+               die("too many arguments");
+
+       unsigned dev_idx;
+       struct bchfs_handle fs = bchu_fs_open_by_dev(dev_path, &dev_idx);
+
+       struct bch_ioctl_dev_usage u = bchu_dev_usage(fs, dev_idx);
+
+       if (u.state == BCH_MEMBER_STATE_RW) {
+               printf("Setting %s readonly\n", dev_path);
+               bchu_disk_set_state(fs, dev_idx, BCH_MEMBER_STATE_RO, 0);
+       }
+
+       return bchu_data(fs, (struct bch_ioctl_data) {
+               .op             = BCH_DATA_OP_MIGRATE,
+               .start          = POS_MIN,
+               .end            = POS_MAX,
+               .migrate.dev    = dev_idx,
+       });
+}
+
+static void device_set_state_usage(void)
+{
+       puts("bcachefs device set-state\n"
+            "Usage: bcachefs device set-state device new-state\n"
+            "\n"
+            "Options:\n"
+            "  -f, --force                 Force, if data redundancy will be degraded\n"
+            "  -o, --offline               Set state of an offline device\n"
+            "  -h, --help                  display this help and exit\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+       exit(EXIT_SUCCESS);
+}
+
+int cmd_device_set_state(int argc, char *argv[])
+{
+       static const struct option longopts[] = {
+               { "force",                      0, NULL, 'f' },
+               { "offline",                    0, NULL, 'o' },
+               { "help",                       0, NULL, 'h' },
+               { NULL }
+       };
+       int opt, flags = 0;
+       bool offline = false;
+
+       while ((opt = getopt_long(argc, argv, "foh", longopts, NULL)) != -1)
+               switch (opt) {
+               case 'f':
+                       flags |= BCH_FORCE_IF_DEGRADED;
+                       break;
+               case 'o':
+                       offline = true;
+                       break;
+               case 'h':
+                       device_set_state_usage();
+               }
+       args_shift(optind);
+
+       char *dev_path = arg_pop();
+       if (!dev_path)
+               die("Please supply a device");
+
+       char *new_state_str = arg_pop();
+       if (!new_state_str)
+               die("Please supply a device state");
+
+       unsigned new_state = read_string_list_or_die(new_state_str,
+                                       bch2_dev_state, "device state");
+
+       if (!offline) {
+               unsigned dev_idx;
+               struct bchfs_handle fs = bchu_fs_open_by_dev(dev_path, &dev_idx);
+
+               bchu_disk_set_state(fs, dev_idx, new_state, flags);
+
+               bcache_fs_close(fs);
+       } else {
+               struct bch_opts opts = bch2_opts_empty();
+               struct bch_sb_handle sb = { NULL };
+
+               int ret = bch2_read_super(dev_path, &opts, &sb);
+               if (ret)
+                       die("error opening %s: %s", dev_path, strerror(-ret));
+
+               struct bch_member *m = bch2_sb_get_members(sb.sb)->members + sb.sb->dev_idx;
+
+               SET_BCH_MEMBER_STATE(m, new_state);
+
+               le64_add_cpu(&sb.sb->seq, 1);
+
+               bch2_super_write(sb.bdev->bd_fd, sb.sb);
+               bch2_free_super(&sb);
+       }
+
+       return 0;
+}
+
+static void device_resize_usage(void)
+{
+       puts("bcachefs device resize \n"
+            "Usage: bcachefs device resize device [ size ]\n"
+            "\n"
+            "Options:\n"
+            "  -h, --help                  display this help and exit\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+       exit(EXIT_SUCCESS);
+}
+
+int cmd_device_resize(int argc, char *argv[])
+{
+       static const struct option longopts[] = {
+               { "help",                       0, NULL, 'h' },
+               { NULL }
+       };
+       u64 size;
+       int opt;
+
+       while ((opt = getopt_long(argc, argv, "h", longopts, NULL)) != -1)
+               switch (opt) {
+               case 'h':
+                       device_resize_usage();
+               }
+       args_shift(optind);
+
+       char *dev = arg_pop();
+       if (!dev)
+               die("Please supply a device to resize");
+
+       int dev_fd = xopen(dev, O_RDONLY);
+
+       char *size_arg = arg_pop();
+       if (!size_arg)
+               size = get_size(dev, dev_fd);
+       else if (bch2_strtoull_h(size_arg, &size))
+               die("invalid size");
+
+       size >>= 9;
+
+       if (argc)
+               die("Too many arguments");
+
+       struct stat dev_stat = xfstat(dev_fd);
+
+       struct mntent *mount = dev_to_mount(dev);
+       if (mount) {
+               if (!S_ISBLK(dev_stat.st_mode))
+                       die("%s is mounted but isn't a block device?!", dev);
+
+               printf("Doing online resize of %s\n", dev);
+
+               struct bchfs_handle fs = bcache_fs_open(mount->mnt_dir);
+
+               unsigned idx = bchu_disk_get_idx(fs, dev_stat.st_rdev);
+
+               struct bch_sb *sb = bchu_read_super(fs, -1);
+               if (idx >= sb->nr_devices)
+                       die("error reading superblock: dev idx >= sb->nr_devices");
+
+               struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
+               if (!mi)
+                       die("error reading superblock: no member info");
+
+               /* could also just read this out of sysfs... meh */
+               struct bch_member *m = mi->members + idx;
+
+               u64 nbuckets = size / le16_to_cpu(m->bucket_size);
+
+               printf("resizing %s to %llu buckets\n", dev, nbuckets);
+               bchu_disk_resize(fs, idx, nbuckets);
+       } else {
+               printf("Doing offline resize of %s\n", dev);
+
+               struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty());
+               if (IS_ERR(c))
+                       die("error opening %s: %s", dev, strerror(-PTR_ERR(c)));
+
+               struct bch_dev *ca, *resize = NULL;
+               unsigned i;
+
+               for_each_online_member(ca, c, i) {
+                       if (resize)
+                               die("confused: more than one online device?");
+                       resize = ca;
+                       percpu_ref_get(&resize->io_ref);
+               }
+
+               u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
+
+               printf("resizing %s to %llu buckets\n", dev, nbuckets);
+               int ret = bch2_dev_resize(c, resize, nbuckets);
+               if (ret)
+                       fprintf(stderr, "resize error: %s\n", strerror(-ret));
+
+               percpu_ref_put(&resize->io_ref);
+               bch2_fs_stop(c);
+       }
+       return 0;
+}
diff --git a/cmd_format.c b/cmd_format.c
new file mode 100644 (file)
index 0000000..673c63a
--- /dev/null
@@ -0,0 +1,313 @@
+/*
+ * Authors: Kent Overstreet <kent.overstreet@gmail.com>
+ *         Gabriel de Perthuis <g2p.code@gmail.com>
+ *         Jacob Malevich <jam@datera.io>
+ *
+ * GPLv2
+ */
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <uuid/uuid.h>
+
+#include "ccan/darray/darray.h"
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "crypto.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/super-io.h"
+#include "libbcachefs/util.h"
+
+#define OPTS                                           \
+x(0,   replicas,               required_argument)      \
+x(0,   encrypted,              no_argument)            \
+x(0,   no_passphrase,          no_argument)            \
+x('L', label,                  required_argument)      \
+x('U', uuid,                   required_argument)      \
+x(0,   fs_size,                required_argument)      \
+x(0,   bucket_size,            required_argument)      \
+x('g', group,                  required_argument)      \
+x(0,   discard,                no_argument)            \
+x(0,   data_allowed,           required_argument)      \
+x(0,   durability,             required_argument)      \
+x('f', force,                  no_argument)            \
+x('q', quiet,                  no_argument)            \
+x('h', help,                   no_argument)
+
+static void usage(void)
+{
+       puts("bcachefs format - create a new bcachefs filesystem on one or more devices\n"
+            "Usage: bcachefs format [OPTION]... <devices>\n"
+            "\n"
+            "Options:");
+
+       bch2_opts_usage(OPT_FORMAT);
+
+       puts(
+            "      --replicas=#            Sets both data and metadata replicas\n"
+            "      --encrypted             Enable whole filesystem encryption (chacha20/poly1305)\n"
+            "      --no_passphrase         Don't encrypt master encryption key\n"
+            "  -L, --label=label\n"
+            "  -U, --uuid=uuid\n"
+            "\n"
+            "Device specific options:");
+
+       bch2_opts_usage(OPT_DEVICE);
+
+       puts("  -g, --group=label           Disk group\n"
+            "\n"
+            "  -f, --force\n"
+            "  -q, --quiet                 Only print errors\n"
+            "  -h, --help                  Display this help and exit\n"
+            "\n"
+            "Device specific options must come before corresponding devices, e.g.\n"
+            "  bcachefs format --group cache /dev/sdb /dev/sdc\n"
+            "\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+}
+
+enum {
+       O_no_opt = 1,
+#define x(shortopt, longopt, arg)      O_##longopt,
+       OPTS
+#undef x
+};
+
+#define x(shortopt, longopt, arg) {                    \
+       .name           = #longopt,                     \
+       .has_arg        = arg,                          \
+       .flag           = NULL,                         \
+       .val            = O_##longopt,                  \
+},
+static const struct option format_opts[] = {
+       OPTS
+       { NULL }
+};
+#undef x
+
+u64 read_flag_list_or_die(char *opt, const char * const list[],
+                         const char *msg)
+{
+       u64 v = bch2_read_flag_list(opt, list);
+       if (v == (u64) -1)
+               die("Bad %s %s", msg, opt);
+
+       return v;
+}
+
+int cmd_format(int argc, char *argv[])
+{
+       darray(struct dev_opts) devices;
+       darray(char *) device_paths;
+       struct format_opts opts = format_opts_default();
+       struct dev_opts dev_opts = dev_opts_default(), *dev;
+       bool force = false, no_passphrase = false, quiet = false;
+       unsigned v;
+       int opt;
+
+       darray_init(devices);
+       darray_init(device_paths);
+
+       struct bch_opt_strs fs_opt_strs =
+               bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
+       struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
+
+       while ((opt = getopt_long(argc, argv,
+                                 "-L:U:g:fqh",
+                                 format_opts,
+                                 NULL)) != -1)
+               switch (opt) {
+               case O_replicas:
+                       if (kstrtouint(optarg, 10, &v) ||
+                           !v ||
+                           v > BCH_REPLICAS_MAX)
+                               die("invalid replicas");
+
+                       opt_set(fs_opts, metadata_replicas, v);
+                       opt_set(fs_opts, data_replicas, v);
+                       break;
+               case O_encrypted:
+                       opts.encrypted = true;
+                       break;
+               case O_no_passphrase:
+                       no_passphrase = true;
+                       break;
+               case O_label:
+               case 'L':
+                       opts.label = optarg;
+                       break;
+               case O_uuid:
+               case 'U':
+                       if (uuid_parse(optarg, opts.uuid.b))
+                               die("Bad uuid");
+                       break;
+               case O_force:
+               case 'f':
+                       force = true;
+                       break;
+               case O_fs_size:
+                       if (bch2_strtoull_h(optarg, &dev_opts.size))
+                               die("invalid filesystem size");
+
+                       dev_opts.size >>= 9;
+                       break;
+               case O_bucket_size:
+                       dev_opts.bucket_size =
+                               hatoi_validate(optarg, "bucket size");
+                       break;
+               case O_group:
+               case 'g':
+                       dev_opts.group = optarg;
+                       break;
+               case O_discard:
+                       dev_opts.discard = true;
+                       break;
+               case O_data_allowed:
+                       dev_opts.data_allowed =
+                               read_flag_list_or_die(optarg,
+                                       bch2_data_types, "data type");
+                       break;
+               case O_durability:
+                       if (kstrtouint(optarg, 10, &dev_opts.durability) ||
+                           dev_opts.durability > BCH_REPLICAS_MAX)
+                               die("invalid durability");
+                       break;
+               case O_no_opt:
+                       darray_append(device_paths, optarg);
+                       dev_opts.path = optarg;
+                       darray_append(devices, dev_opts);
+                       dev_opts.size = 0;
+                       break;
+               case O_quiet:
+               case 'q':
+                       quiet = true;
+                       break;
+               case O_help:
+               case 'h':
+                       usage();
+                       exit(EXIT_SUCCESS);
+                       break;
+               case '?':
+                       exit(EXIT_FAILURE);
+                       break;
+               }
+
+       if (darray_empty(devices))
+               die("Please supply a device");
+
+       if (opts.encrypted && !no_passphrase)
+               opts.passphrase = read_passphrase_twice("Enter passphrase: ");
+
+       darray_foreach(dev, devices)
+               dev->fd = open_for_format(dev->path, force);
+
+       struct bch_sb *sb =
+               bch2_format(fs_opt_strs,
+                           fs_opts,
+                           opts,
+                           devices.item, darray_size(devices));
+
+       if (!quiet)
+               bch2_sb_print(sb, false, 1 << BCH_SB_FIELD_members, HUMAN_READABLE);
+       free(sb);
+
+       if (opts.passphrase) {
+               memzero_explicit(opts.passphrase, strlen(opts.passphrase));
+               free(opts.passphrase);
+       }
+
+       darray_free(devices);
+
+       if (!opts.passphrase) {
+               /*
+                * Start the filesystem once, to allocate the journal and create
+                * the root directory:
+                */
+               struct bch_fs *c = bch2_fs_open(device_paths.item,
+                                               darray_size(device_paths),
+                                               bch2_opts_empty());
+               if (IS_ERR(c))
+                       die("error opening %s: %s", device_paths.item,
+                           strerror(-PTR_ERR(c)));
+
+               bch2_fs_stop(c);
+       }
+
+       darray_free(device_paths);
+
+       return 0;
+}
+
+static void show_super_usage(void)
+{
+       puts("bcachefs show-super \n"
+            "Usage: bcachefs show-super [OPTION].. device\n"
+            "\n"
+            "Options:\n"
+            "  -f, --fields=(fields)       list of sections to print\n"
+            "  -l, --layout                print superblock layout\n"
+            "  -h, --help                  display this help and exit\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+       exit(EXIT_SUCCESS);
+}
+
+int cmd_show_super(int argc, char *argv[])
+{
+       static const struct option longopts[] = {
+               { "fields",                     1, NULL, 'f' },
+               { "layout",                     0, NULL, 'l' },
+               { "help",                       0, NULL, 'h' },
+               { NULL }
+       };
+       unsigned fields = 1 << BCH_SB_FIELD_members;
+       bool print_layout = false;
+       int opt;
+
+       while ((opt = getopt_long(argc, argv, "f:lh", longopts, NULL)) != -1)
+               switch (opt) {
+               case 'f':
+                       fields = !strcmp(optarg, "all")
+                               ? ~0
+                               : read_flag_list_or_die(optarg,
+                                       bch2_sb_fields, "superblock field");
+                       break;
+               case 'l':
+                       print_layout = true;
+                       break;
+               case 'h':
+                       show_super_usage();
+                       break;
+               }
+       args_shift(optind);
+
+       char *dev = arg_pop();
+       if (!dev)
+               die("please supply a device");
+       if (argc)
+               die("too many arguments");
+
+       struct bch_opts opts = bch2_opts_empty();
+
+       opt_set(opts, noexcl,   true);
+       opt_set(opts, nochanges, true);
+
+       struct bch_sb_handle sb;
+       int ret = bch2_read_super(dev, &opts, &sb);
+       if (ret)
+               die("Error opening %s: %s", dev, strerror(-ret));
+
+       bch2_sb_print(sb.sb, print_layout, fields, HUMAN_READABLE);
+       bch2_free_super(&sb);
+       return 0;
+}
diff --git a/cmd_fs.c b/cmd_fs.c
new file mode 100644 (file)
index 0000000..f0b67b6
--- /dev/null
+++ b/cmd_fs.c
@@ -0,0 +1,221 @@
+
+#include <stdio.h>
+#include <sys/ioctl.h>
+
+#include <uuid/uuid.h>
+
+#include "ccan/darray/darray.h"
+
+#include "linux/sort.h"
+
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "libbcachefs/opts.h"
+
+#include "cmds.h"
+#include "libbcachefs.h"
+
+static void print_dev_usage_type(const char *type,
+                                unsigned bucket_size,
+                                u64 buckets, u64 sectors,
+                                enum units units)
+{
+       u64 frag = max((s64) buckets * bucket_size - (s64) sectors, 0LL);
+
+       printf_pad(20, "  %s:", type);
+       printf("%12s%12llu%12s\n",
+              pr_units(sectors, units),
+              buckets,
+              pr_units(frag, units));
+}
+
+static void print_dev_usage(struct bchfs_handle fs,
+                           struct dev_name *d,
+                           enum units units)
+{
+       struct bch_ioctl_dev_usage u = bchu_dev_usage(fs, d->idx);
+       unsigned i;
+
+       printf("\n");
+       printf_pad(20, "%s (device %u):", d->label ?: "(no label)", d->idx);
+       printf("%24s%12s\n", d->dev ?: "(device not found)", bch2_dev_state[u.state]);
+
+       printf("%-20s%12s%12s%12s\n",
+              "", "data", "buckets", "fragmented");
+
+       for (i = BCH_DATA_sb; i < BCH_DATA_NR; i++) {
+               print_dev_usage_type(bch2_data_types[i],
+                                    u.bucket_size,
+                                    u.buckets[i],
+                                    u.sectors[i],
+                                    units);
+       }
+
+       print_dev_usage_type("erasure coded",
+                            u.bucket_size,
+                            u.ec_buckets,
+                            u.ec_sectors,
+                            units);
+
+       printf_pad(20, "  available:");
+       printf("%12s%12llu\n",
+              pr_units(u.available_buckets * u.bucket_size, units),
+              u.available_buckets);
+
+       printf_pad(20, "  capacity:");
+       printf("%12s%12llu\n",
+              pr_units(u.nr_buckets * u.bucket_size, units),
+              u.nr_buckets);
+}
+
+static int dev_by_label_cmp(const void *_l, const void *_r)
+{
+       const struct dev_name *l = _l, *r = _r;
+
+       return  (l->label && r->label
+                ? strcmp(l->label, r->label) : 0) ?:
+               (l->dev && r->dev
+                ? strcmp(l->dev, r->dev) : 0) ?:
+               cmp_int(l->idx, r->idx);
+}
+
+static struct dev_name *dev_idx_to_name(dev_names *dev_names, unsigned idx)
+{
+       struct dev_name *dev;
+
+       darray_foreach(dev, *dev_names)
+               if (dev->idx == idx)
+                       return dev;
+
+       return NULL;
+}
+
+static void print_replicas_usage(const struct bch_replicas_usage *r,
+                                dev_names *dev_names, enum units units)
+{
+       unsigned i;
+
+       if (!r->sectors)
+               return;
+
+       char devs[4096], *d = devs;
+       *d++ = '[';
+
+       for (i = 0; i < r->r.nr_devs; i++) {
+               unsigned dev_idx = r->r.devs[i];
+               struct dev_name *dev = dev_idx_to_name(dev_names, dev_idx);
+
+               if (i)
+                       *d++ = ' ';
+
+               d += dev && dev->dev
+                       ? sprintf(d, "%s", dev->dev)
+                       : sprintf(d, "%u", dev_idx);
+       }
+       *d++ = ']';
+       *d++ = '\0';
+
+       printf_pad(16, "%s: ", bch2_data_types[r->r.data_type]);
+       printf_pad(16, "%u/%u ", r->r.nr_required, r->r.nr_devs);
+       printf_pad(32, "%s ", devs);
+       printf(" %s\n", pr_units(r->sectors, units));
+}
+
+#define for_each_usage_replica(_u, _r)                                 \
+       for (_r = (_u)->replicas;                                       \
+            _r != (void *) (_u)->replicas + (_u)->replica_entries_bytes;\
+            _r = replicas_usage_next(_r),                              \
+            BUG_ON((void *) _r > (void *) (_u)->replicas + (_u)->replica_entries_bytes))
+
+static void print_fs_usage(const char *path, enum units units)
+{
+       unsigned i;
+       char uuid[40];
+
+       struct bchfs_handle fs = bcache_fs_open(path);
+
+       struct dev_name *dev;
+       dev_names dev_names = bchu_fs_get_devices(fs);
+
+       struct bch_ioctl_fs_usage *u = bchu_fs_usage(fs);
+
+       uuid_unparse(fs.uuid.b, uuid);
+       printf("Filesystem %s:\n", uuid);
+
+       printf("%-20s%12s\n", "Size:", pr_units(u->capacity, units));
+       printf("%-20s%12s\n", "Used:", pr_units(u->used, units));
+
+       printf("%-20s%12s\n", "Online reserved:", pr_units(u->online_reserved, units));
+
+       printf("\n");
+       printf("%-16s%-16s%s\n", "Data type", "Required/total", "Devices");
+
+       for (i = 0; i < BCH_REPLICAS_MAX; i++) {
+               if (!u->persistent_reserved[i])
+                       continue;
+
+               printf_pad(16, "%s: ", "reserved");
+               printf_pad(16, "%u/%u ", 1, i);
+               printf_pad(32, "[] ");
+               printf("%s\n", pr_units(u->persistent_reserved[i], units));
+       }
+
+       struct bch_replicas_usage *r;
+
+       for_each_usage_replica(u, r)
+               if (r->r.data_type < BCH_DATA_user)
+                       print_replicas_usage(r, &dev_names, units);
+
+       for_each_usage_replica(u, r)
+               if (r->r.data_type == BCH_DATA_user &&
+                   r->r.nr_required <= 1)
+                       print_replicas_usage(r, &dev_names, units);
+
+       for_each_usage_replica(u, r)
+               if (r->r.data_type == BCH_DATA_user &&
+                   r->r.nr_required > 1)
+                       print_replicas_usage(r, &dev_names, units);
+
+       for_each_usage_replica(u, r)
+               if (r->r.data_type > BCH_DATA_user)
+                       print_replicas_usage(r, &dev_names, units);
+
+       free(u);
+
+       sort(&darray_item(dev_names, 0), darray_size(dev_names),
+            sizeof(darray_item(dev_names, 0)), dev_by_label_cmp, NULL);
+
+       darray_foreach(dev, dev_names)
+               print_dev_usage(fs, dev, units);
+
+       darray_foreach(dev, dev_names) {
+               free(dev->dev);
+               free(dev->label);
+       }
+       darray_free(dev_names);
+
+       bcache_fs_close(fs);
+}
+
+int cmd_fs_usage(int argc, char *argv[])
+{
+       enum units units = BYTES;
+       char *fs;
+       int opt;
+
+       while ((opt = getopt(argc, argv, "h")) != -1)
+               switch (opt) {
+               case 'h':
+                       units = HUMAN_READABLE;
+                       break;
+               }
+       args_shift(optind);
+
+       if (!argc) {
+               print_fs_usage(".", units);
+       } else {
+               while ((fs = arg_pop()))
+                       print_fs_usage(fs, units);
+       }
+
+       return 0;
+}
diff --git a/cmd_fsck.c b/cmd_fsck.c
new file mode 100644 (file)
index 0000000..5756ee7
--- /dev/null
@@ -0,0 +1,103 @@
+
+#include <getopt.h>
+#include "cmds.h"
+#include "libbcachefs/error.h"
+#include "libbcachefs.h"
+#include "libbcachefs/super.h"
+#include "tools-util.h"
+
+static void usage(void)
+{
+       puts("bcachefs fsck - filesystem check and repair\n"
+            "Usage: bcachefs fsck [OPTION]... <devices>\n"
+            "\n"
+            "Options:\n"
+            "  -p                     Automatic repair (no questions)\n"
+            "  -n                     Don't repair, only check for errors\n"
+            "  -y                     Assume \"yes\" to all questions\n"
+            "  -f                     Force checking even if filesystem is marked clean\n"
+            " --reconstruct_alloc     Reconstruct the alloc btree\n"
+            "  -v                     Be verbose\n"
+            "  -h                     Display this help and exit\n"
+            "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_fsck(int argc, char *argv[])
+{
+       static const struct option longopts[] = {
+               { "reconstruct_alloc",  no_argument,            NULL, 'R' },
+               { NULL }
+       };
+       struct bch_opts opts = bch2_opts_empty();
+       unsigned i;
+       int opt, ret = 0;
+
+       opt_set(opts, degraded, true);
+       opt_set(opts, fsck, true);
+       opt_set(opts, fix_errors, FSCK_OPT_ASK);
+
+       while ((opt = getopt_long(argc, argv,
+                                 "apynfo:vh",
+                                 longopts, NULL)) != -1)
+               switch (opt) {
+               case 'a': /* outdated alias for -p */
+               case 'p':
+                       opt_set(opts, fix_errors, FSCK_OPT_YES);
+                       break;
+               case 'y':
+                       opt_set(opts, fix_errors, FSCK_OPT_YES);
+                       break;
+               case 'n':
+                       opt_set(opts, nochanges, true);
+                       opt_set(opts, fix_errors, FSCK_OPT_NO);
+                       break;
+               case 'f':
+                       /* force check, even if filesystem marked clean: */
+                       break;
+               case 'o':
+                       ret = bch2_parse_mount_opts(&opts, optarg);
+                       if (ret)
+                               return ret;
+                       break;
+               case 'R':
+                       opt_set(opts, reconstruct_alloc, true);
+                       break;
+               case 'v':
+                       opt_set(opts, verbose, true);
+                       break;
+               case 'h':
+                       usage();
+                       exit(16);
+               }
+       args_shift(optind);
+
+       if (!argc) {
+               fprintf(stderr, "Please supply device(s) to check\n");
+               exit(8);
+       }
+
+       for (i = 0; i < argc; i++) {
+               switch (dev_mounted(argv[i])) {
+               case 1:
+                       ret |= 2;
+                       break;
+               case 2:
+                       fprintf(stderr, "%s is mounted read-write - aborting\n", argv[i]);
+                       exit(8);
+               }
+       }
+
+       struct bch_fs *c = bch2_fs_open(argv, argc, opts);
+       if (IS_ERR(c)) {
+               fprintf(stderr, "error opening %s: %s\n", argv[0], strerror(-PTR_ERR(c)));
+               exit(8);
+       }
+
+       if (test_bit(BCH_FS_ERRORS_FIXED, &c->flags))
+               ret |= 1;
+       if (test_bit(BCH_FS_ERROR, &c->flags))
+               ret |= 4;
+
+       bch2_fs_stop(c);
+       return ret;
+}
diff --git a/cmd_fusemount.c b/cmd_fusemount.c
new file mode 100644 (file)
index 0000000..54bc76c
--- /dev/null
@@ -0,0 +1,1267 @@
+#ifdef BCACHEFS_FUSE
+
+#include <errno.h>
+#include <float.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <sys/statvfs.h>
+
+#include <fuse_lowlevel.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "tools-util.h"
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/alloc_foreground.h"
+#include "libbcachefs/btree_iter.h"
+#include "libbcachefs/buckets.h"
+#include "libbcachefs/dirent.h"
+#include "libbcachefs/error.h"
+#include "libbcachefs/fs-common.h"
+#include "libbcachefs/inode.h"
+#include "libbcachefs/io.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/super.h"
+
+/* mode_to_type(): */
+#include "libbcachefs/fs.h"
+
+#include <linux/dcache.h>
+
+/* XXX cut and pasted from fsck.c */
+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
+
+static inline u64 map_root_ino(u64 ino)
+{
+       return ino == 1 ? 4096 : ino;
+}
+
+static inline u64 unmap_root_ino(u64 ino)
+{
+       return ino == 4096 ? 1 : ino;
+}
+
+static struct stat inode_to_stat(struct bch_fs *c,
+                                struct bch_inode_unpacked *bi)
+{
+       return (struct stat) {
+               .st_ino         = unmap_root_ino(bi->bi_inum),
+               .st_size        = bi->bi_size,
+               .st_mode        = bi->bi_mode,
+               .st_uid         = bi->bi_uid,
+               .st_gid         = bi->bi_gid,
+               .st_nlink       = bch2_inode_nlink_get(bi),
+               .st_rdev        = bi->bi_dev,
+               .st_blksize     = block_bytes(c),
+               .st_blocks      = bi->bi_sectors,
+               .st_atim        = bch2_time_to_timespec(c, bi->bi_atime),
+               .st_mtim        = bch2_time_to_timespec(c, bi->bi_mtime),
+               .st_ctim        = bch2_time_to_timespec(c, bi->bi_ctime),
+       };
+}
+
+static struct fuse_entry_param inode_to_entry(struct bch_fs *c,
+                                             struct bch_inode_unpacked *bi)
+{
+       return (struct fuse_entry_param) {
+               .ino            = unmap_root_ino(bi->bi_inum),
+               .generation     = bi->bi_generation,
+               .attr           = inode_to_stat(c, bi),
+               .attr_timeout   = DBL_MAX,
+               .entry_timeout  = DBL_MAX,
+       };
+}
+
+static void bcachefs_fuse_init(void *arg, struct fuse_conn_info *conn)
+{
+       if (conn->capable & FUSE_CAP_WRITEBACK_CACHE) {
+               fuse_log(FUSE_LOG_DEBUG, "fuse_init: activating writeback\n");
+               conn->want |= FUSE_CAP_WRITEBACK_CACHE;
+       } else
+               fuse_log(FUSE_LOG_DEBUG, "fuse_init: writeback not capable\n");
+
+       //conn->want |= FUSE_CAP_POSIX_ACL;
+}
+
+static void bcachefs_fuse_destroy(void *arg)
+{
+       struct bch_fs *c = arg;
+
+       bch2_fs_stop(c);
+}
+
+static void bcachefs_fuse_lookup(fuse_req_t req, fuse_ino_t dir,
+                                const char *name)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+       struct bch_inode_unpacked bi;
+       struct qstr qstr = QSTR(name);
+       u64 inum;
+       int ret;
+
+       fuse_log(FUSE_LOG_DEBUG, "fuse_lookup(dir=%llu name=%s)\n",
+                dir, name);
+
+       dir = map_root_ino(dir);
+
+       ret = bch2_inode_find_by_inum(c, dir, &bi);
+       if (ret) {
+               fuse_reply_err(req, -ret);
+               return;
+       }
+
+       struct bch_hash_info hash_info = bch2_hash_info_init(c, &bi);
+
+       inum = bch2_dirent_lookup(c, dir, &hash_info, &qstr);
+       if (!inum) {
+               struct fuse_entry_param e = {
+                       .attr_timeout   = DBL_MAX,
+                       .entry_timeout  = DBL_MAX,
+               };
+               fuse_reply_entry(req, &e);
+               return;
+       }
+
+       ret = bch2_inode_find_by_inum(c, inum, &bi);
+       if (ret)
+               goto err;
+
+       fuse_log(FUSE_LOG_DEBUG, "fuse_lookup ret(inum=%llu)\n",
+                bi.bi_inum);
+
+       struct fuse_entry_param e = inode_to_entry(c, &bi);
+       fuse_reply_entry(req, &e);
+       return;
+err:
+       fuse_log(FUSE_LOG_DEBUG, "fuse_lookup error %i\n", ret);
+       fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_getattr(fuse_req_t req, fuse_ino_t inum,
+                                 struct fuse_file_info *fi)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+       struct bch_inode_unpacked bi;
+       struct stat attr;
+       int ret;
+
+       fuse_log(FUSE_LOG_DEBUG, "fuse_getattr(inum=%llu)\n",
+                inum);
+
+       inum = map_root_ino(inum);
+
+       ret = bch2_inode_find_by_inum(c, inum, &bi);
+       if (ret) {
+               fuse_log(FUSE_LOG_DEBUG, "fuse_getattr error %i\n", ret);
+               fuse_reply_err(req, -ret);
+               return;
+       }
+
+       fuse_log(FUSE_LOG_DEBUG, "fuse_getattr success\n");
+
+       attr = inode_to_stat(c, &bi);
+       fuse_reply_attr(req, &attr, DBL_MAX);
+}
+
+static void bcachefs_fuse_setattr(fuse_req_t req, fuse_ino_t inum,
+                                 struct stat *attr, int to_set,
+                                 struct fuse_file_info *fi)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+       struct bch_inode_unpacked inode_u;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       u64 now;
+       int ret;
+
+       fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_setattr(%llu, %x)\n",
+                inum, to_set);
+
+       inum = map_root_ino(inum);
+
+       bch2_trans_init(&trans, c, 0, 0);
+retry:
+       bch2_trans_begin(&trans);
+       now = bch2_current_time(c);
+
+       iter = bch2_inode_peek(&trans, &inode_u, inum, BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(iter);
+       if (ret)
+               goto err;
+
+       if (to_set & FUSE_SET_ATTR_MODE)
+               inode_u.bi_mode = attr->st_mode;
+       if (to_set & FUSE_SET_ATTR_UID)
+               inode_u.bi_uid  = attr->st_uid;
+       if (to_set & FUSE_SET_ATTR_GID)
+               inode_u.bi_gid  = attr->st_gid;
+       if (to_set & FUSE_SET_ATTR_SIZE)
+               inode_u.bi_size = attr->st_size;
+       if (to_set & FUSE_SET_ATTR_ATIME)
+               inode_u.bi_atime = timespec_to_bch2_time(c, attr->st_atim);
+       if (to_set & FUSE_SET_ATTR_MTIME)
+               inode_u.bi_mtime = timespec_to_bch2_time(c, attr->st_mtim);
+       if (to_set & FUSE_SET_ATTR_ATIME_NOW)
+               inode_u.bi_atime = now;
+       if (to_set & FUSE_SET_ATTR_MTIME_NOW)
+               inode_u.bi_mtime = now;
+       /* TODO: CTIME? */
+
+       ret   = bch2_inode_write(&trans, iter, &inode_u) ?:
+               bch2_trans_commit(&trans, NULL, NULL,
+                                 BTREE_INSERT_NOFAIL);
+err:
+       if (ret == -EINTR)
+               goto retry;
+
+       bch2_trans_exit(&trans);
+
+       if (!ret) {
+               *attr = inode_to_stat(c, &inode_u);
+               fuse_reply_attr(req, attr, DBL_MAX);
+       } else {
+               fuse_reply_err(req, -ret);
+       }
+}
+
+static int do_create(struct bch_fs *c, u64 dir,
+                    const char *name, mode_t mode, dev_t rdev,
+                    struct bch_inode_unpacked *new_inode)
+{
+       struct qstr qstr = QSTR(name);
+       struct bch_inode_unpacked dir_u;
+
+       dir = map_root_ino(dir);
+
+       bch2_inode_init_early(c, new_inode);
+
+       return bch2_trans_do(c, NULL, NULL, 0,
+                       bch2_create_trans(&trans,
+                               dir, &dir_u,
+                               new_inode, &qstr,
+                               0, 0, mode, rdev, NULL, NULL));
+}
+
+static void bcachefs_fuse_mknod(fuse_req_t req, fuse_ino_t dir,
+                               const char *name, mode_t mode,
+                               dev_t rdev)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+       struct bch_inode_unpacked new_inode;
+       int ret;
+
+       fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_mknod(%llu, %s, %x, %x)\n",
+                dir, name, mode, rdev);
+       ret = do_create(c, dir, name, mode, rdev, &new_inode);
+       if (ret)
+               goto err;
+
+       struct fuse_entry_param e = inode_to_entry(c, &new_inode);
+       fuse_reply_entry(req, &e);
+       return;
+err:
+       fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_mkdir(fuse_req_t req, fuse_ino_t dir,
+                               const char *name, mode_t mode)
+{
+       fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_mkdir(%llu, %s, %x)\n",
+                dir, name, mode);
+
+       BUG_ON(mode & S_IFMT);
+
+       mode |= S_IFDIR;
+       bcachefs_fuse_mknod(req, dir, name, mode, 0);
+}
+
+static void bcachefs_fuse_unlink(fuse_req_t req, fuse_ino_t dir,
+                                const char *name)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+       struct bch_inode_unpacked dir_u, inode_u;
+       struct qstr qstr = QSTR(name);
+       int ret;
+
+       fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_unlink(%llu, %s)\n", dir, name);
+
+       dir = map_root_ino(dir);
+
+       ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
+                           bch2_unlink_trans(&trans, dir, &dir_u,
+                                             &inode_u, &qstr));
+
+       fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_rmdir(fuse_req_t req, fuse_ino_t dir,
+                               const char *name)
+{
+       fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_rmdir(%llu, %s)\n", dir, name);
+
+       dir = map_root_ino(dir);
+
+       bcachefs_fuse_unlink(req, dir, name);
+}
+
+static void bcachefs_fuse_rename(fuse_req_t req,
+                                fuse_ino_t src_dir, const char *srcname,
+                                fuse_ino_t dst_dir, const char *dstname,
+                                unsigned flags)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+       struct bch_inode_unpacked dst_dir_u, src_dir_u;
+       struct bch_inode_unpacked src_inode_u, dst_inode_u;
+       struct qstr dst_name = QSTR(srcname);
+       struct qstr src_name = QSTR(dstname);
+       int ret;
+
+       fuse_log(FUSE_LOG_DEBUG,
+                "bcachefs_fuse_rename(%llu, %s, %llu, %s, %x)\n",
+                src_dir, srcname, dst_dir, dstname, flags);
+
+       src_dir = map_root_ino(src_dir);
+       dst_dir = map_root_ino(dst_dir);
+
+       /* XXX handle overwrites */
+       ret = bch2_trans_do(c, NULL, NULL, 0,
+               bch2_rename_trans(&trans,
+                                 src_dir, &src_dir_u,
+                                 dst_dir, &dst_dir_u,
+                                 &src_inode_u, &dst_inode_u,
+                                 &src_name, &dst_name,
+                                 BCH_RENAME));
+
+       fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_link(fuse_req_t req, fuse_ino_t inum,
+                              fuse_ino_t newparent, const char *newname)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+       struct bch_inode_unpacked dir_u, inode_u;
+       struct qstr qstr = QSTR(newname);
+       int ret;
+
+       fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_link(%llu, %llu, %s)\n",
+                inum, newparent, newname);
+
+       newparent = map_root_ino(newparent);
+
+       ret = bch2_trans_do(c, NULL, NULL, 0,
+                           bch2_link_trans(&trans, newparent,
+                                           inum, &dir_u, &inode_u, &qstr));
+
+       if (!ret) {
+               struct fuse_entry_param e = inode_to_entry(c, &inode_u);
+               fuse_reply_entry(req, &e);
+       } else {
+               fuse_reply_err(req, -ret);
+       }
+}
+
+static void bcachefs_fuse_open(fuse_req_t req, fuse_ino_t inum,
+                              struct fuse_file_info *fi)
+{
+       fi->direct_io           = false;
+       fi->keep_cache          = true;
+       fi->cache_readdir       = true;
+
+       fuse_reply_open(req, fi);
+}
+
+static void userbio_init(struct bio *bio, struct bio_vec *bv,
+                        void *buf, size_t size)
+{
+       bio_init(bio, bv, 1);
+       bio->bi_iter.bi_size    = size;
+       bv->bv_page             = buf;
+       bv->bv_len              = size;
+       bv->bv_offset           = 0;
+}
+
+static int get_inode_io_opts(struct bch_fs *c, u64 inum,
+                            struct bch_io_opts *opts)
+{
+       struct bch_inode_unpacked inode;
+       if (bch2_inode_find_by_inum(c, inum, &inode))
+               return -EINVAL;
+
+       *opts = bch2_opts_to_inode_opts(c->opts);
+       bch2_io_opts_apply(opts, bch2_inode_opts_get(&inode));
+       return 0;
+}
+
+static void bcachefs_fuse_read_endio(struct bio *bio)
+{
+       closure_put(bio->bi_private);
+}
+
+struct fuse_align_io {
+       off_t           start;
+       size_t          pad_start;
+       off_t           end;
+       size_t          pad_end;
+       size_t          size;
+};
+
+/* Handle unaligned start and end */
+/* TODO: align to block_bytes, sector size, or page size? */
+static struct fuse_align_io align_io(const struct bch_fs *c, size_t size,
+                                    off_t offset)
+{
+       struct fuse_align_io align;
+
+       BUG_ON(offset < 0);
+
+       align.start = round_down(offset, block_bytes(c));
+       align.pad_start = offset - align.start;
+
+       off_t end = offset + size;
+       align.end = round_up(end, block_bytes(c));
+       align.pad_end = align.end - end;
+
+       align.size = align.end - align.start;
+
+       return align;
+}
+
+/*
+ * Given an aligned number of bytes transferred, figure out how many unaligned
+ * bytes were transferred.
+ */
+static size_t align_fix_up_bytes(const struct fuse_align_io *align,
+                                size_t align_bytes)
+{
+       size_t bytes = 0;
+
+       if (align_bytes > align->pad_start) {
+               bytes = align_bytes - align->pad_start;
+               bytes = bytes > align->pad_end ? bytes - align->pad_end : 0;
+       }
+
+       return bytes;
+}
+
+/*
+ * Read aligned data.
+ */
+static int read_aligned(struct bch_fs *c, fuse_ino_t inum, size_t aligned_size,
+                       off_t aligned_offset, void *buf)
+{
+       BUG_ON(aligned_size & (block_bytes(c) - 1));
+       BUG_ON(aligned_offset & (block_bytes(c) - 1));
+
+       struct bch_io_opts io_opts;
+       if (get_inode_io_opts(c, inum, &io_opts))
+               return -ENOENT;
+
+       struct bch_read_bio rbio;
+       struct bio_vec bv;
+       userbio_init(&rbio.bio, &bv, buf, aligned_size);
+       bio_set_op_attrs(&rbio.bio, REQ_OP_READ, REQ_SYNC);
+       rbio.bio.bi_iter.bi_sector      = aligned_offset >> 9;
+
+       struct closure cl;
+       closure_init_stack(&cl);
+
+       closure_get(&cl);
+       rbio.bio.bi_end_io              = bcachefs_fuse_read_endio;
+       rbio.bio.bi_private             = &cl;
+
+       bch2_read(c, rbio_init(&rbio.bio, io_opts), inum);
+
+       closure_sync(&cl);
+
+       return -blk_status_to_errno(rbio.bio.bi_status);
+}
+
+static void bcachefs_fuse_read(fuse_req_t req, fuse_ino_t inum,
+                              size_t size, off_t offset,
+                              struct fuse_file_info *fi)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+
+       fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_read(%llu, %zd, %lld)\n",
+                inum, size, offset);
+
+       /* Check inode size. */
+       struct bch_inode_unpacked bi;
+       int ret = bch2_inode_find_by_inum(c, inum, &bi);
+       if (ret) {
+               fuse_reply_err(req, -ret);
+               return;
+       }
+
+       off_t end = min_t(u64, bi.bi_size, offset + size);
+       if (end <= offset) {
+               fuse_reply_buf(req, NULL, 0);
+               return;
+       }
+       size = end - offset;
+
+       struct fuse_align_io align = align_io(c, size, offset);
+
+       void *buf = aligned_alloc(PAGE_SIZE, align.size);
+       if (!buf) {
+               fuse_reply_err(req, ENOMEM);
+               return;
+       }
+
+       ret = read_aligned(c, inum, align.size, align.start, buf);
+
+       if (likely(!ret))
+               fuse_reply_buf(req, buf + align.pad_start, size);
+       else
+               fuse_reply_err(req, -ret);
+
+       free(buf);
+}
+
+static int inode_update_times(struct bch_fs *c, fuse_ino_t inum)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bch_inode_unpacked inode_u;
+       int ret = 0;
+       u64 now;
+
+       bch2_trans_init(&trans, c, 0, 0);
+retry:
+       bch2_trans_begin(&trans);
+       now = bch2_current_time(c);
+
+       iter = bch2_inode_peek(&trans, &inode_u, inum, BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(iter);
+       if (ret)
+               goto err;
+
+       inode_u.bi_mtime = now;
+       inode_u.bi_ctime = now;
+
+       ret = bch2_inode_write(&trans, iter, &inode_u);
+       if (ret)
+               goto err;
+
+       ret = bch2_trans_commit(&trans, NULL, NULL,
+                               BTREE_INSERT_NOFAIL);
+
+err:
+       if (ret == -EINTR)
+               goto retry;
+
+       bch2_trans_exit(&trans);
+       return ret;
+}
+
+static int write_aligned(struct bch_fs *c, fuse_ino_t inum,
+                        struct bch_io_opts io_opts, void *buf,
+                        size_t aligned_size, off_t aligned_offset,
+                        off_t new_i_size, size_t *written_out)
+{
+       struct bch_write_op     op = { 0 };
+       struct bio_vec          bv;
+       struct closure          cl;
+
+       BUG_ON(aligned_size & (block_bytes(c) - 1));
+       BUG_ON(aligned_offset & (block_bytes(c) - 1));
+
+       *written_out = 0;
+
+       closure_init_stack(&cl);
+
+       bch2_write_op_init(&op, c, io_opts); /* XXX reads from op?! */
+       op.write_point  = writepoint_hashed(0);
+       op.nr_replicas  = io_opts.data_replicas;
+       op.target       = io_opts.foreground_target;
+       op.pos          = POS(inum, aligned_offset >> 9);
+       op.new_i_size   = new_i_size;
+
+       userbio_init(&op.wbio.bio, &bv, buf, aligned_size);
+       bio_set_op_attrs(&op.wbio.bio, REQ_OP_WRITE, REQ_SYNC);
+
+       if (bch2_disk_reservation_get(c, &op.res, aligned_size >> 9,
+                                     op.nr_replicas, 0)) {
+               /* XXX: use check_range_allocated like dio write path */
+               return -ENOSPC;
+       }
+
+       closure_call(&op.cl, bch2_write, NULL, &cl);
+       closure_sync(&cl);
+
+       if (!op.error)
+               *written_out = op.written << 9;
+
+       return op.error;
+}
+
+static void bcachefs_fuse_write(fuse_req_t req, fuse_ino_t inum,
+                               const char *buf, size_t size,
+                               off_t offset,
+                               struct fuse_file_info *fi)
+{
+       struct bch_fs *c        = fuse_req_userdata(req);
+       struct bch_io_opts      io_opts;
+       size_t                  aligned_written;
+       int                     ret = 0;
+
+       fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write(%llu, %zd, %lld)\n",
+                inum, size, offset);
+
+       struct fuse_align_io align = align_io(c, size, offset);
+       void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size);
+       BUG_ON(!aligned_buf);
+
+       if (get_inode_io_opts(c, inum, &io_opts)) {
+               ret = -ENOENT;
+               goto err;
+       }
+
+       /* Realign the data and read in start and end, if needed */
+
+       /* Read partial start data. */
+       if (align.pad_start) {
+               memset(aligned_buf, 0, block_bytes(c));
+
+               ret = read_aligned(c, inum, block_bytes(c), align.start,
+                                  aligned_buf);
+               if (ret)
+                       goto err;
+       }
+
+       /*
+        * Read partial end data. If the whole write fits in one block, the
+        * start data and the end data are the same so this isn't needed.
+        */
+       if (align.pad_end &&
+           !(align.pad_start && align.size == block_bytes(c))) {
+               off_t partial_end_start = align.end - block_bytes(c);
+               size_t buf_offset = align.size - block_bytes(c);
+
+               memset(aligned_buf + buf_offset, 0, block_bytes(c));
+
+               ret = read_aligned(c, inum, block_bytes(c), partial_end_start,
+                                  aligned_buf + buf_offset);
+               if (ret)
+                       goto err;
+       }
+
+       /* Overlay what we want to write. */
+       memcpy(aligned_buf + align.pad_start, buf, size);
+
+       /* Actually write. */
+       ret = write_aligned(c, inum, io_opts, aligned_buf,
+                           align.size, align.start,
+                           offset + size, &aligned_written);
+
+       /* Figure out how many unaligned bytes were written. */
+       size_t written = align_fix_up_bytes(&align, aligned_written);
+       BUG_ON(written > size);
+
+       fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write: wrote %zd bytes\n",
+                written);
+
+       if (written > 0)
+               ret = 0;
+
+       /*
+        * Update inode times.
+        * TODO: Integrate with bch2_extent_update()
+        */
+       if (!ret)
+               ret = inode_update_times(c, inum);
+
+       if (!ret) {
+               BUG_ON(written == 0);
+               fuse_reply_write(req, written);
+               free(aligned_buf);
+               return;
+       }
+
+err:
+       fuse_reply_err(req, -ret);
+       free(aligned_buf);
+}
+
+static void bcachefs_fuse_symlink(fuse_req_t req, const char *link,
+                                 fuse_ino_t dir, const char *name)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+       struct bch_inode_unpacked new_inode;
+       size_t link_len = strlen(link);
+       int ret;
+
+       fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_symlink(%s, %llu, %s)\n",
+                link, dir, name);
+
+       dir = map_root_ino(dir);
+
+       ret = do_create(c, dir, name, S_IFLNK|S_IRWXUGO, 0, &new_inode);
+       if (ret)
+               goto err;
+
+       struct bch_io_opts io_opts;
+       ret = get_inode_io_opts(c, new_inode.bi_inum, &io_opts);
+       if (ret)
+               goto err;
+
+       struct fuse_align_io align = align_io(c, link_len + 1, 0);
+
+       void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size);
+       BUG_ON(!aligned_buf);
+
+       memset(aligned_buf, 0, align.size);
+       memcpy(aligned_buf, link, link_len); /* already terminated */
+
+       size_t aligned_written;
+       ret = write_aligned(c, new_inode.bi_inum, io_opts, aligned_buf,
+                           align.size, align.start, link_len + 1,
+                           &aligned_written);
+       free(aligned_buf);
+
+       if (ret)
+               goto err;
+
+       size_t written = align_fix_up_bytes(&align, aligned_written);
+       BUG_ON(written != link_len + 1); // TODO: handle short
+
+       ret = inode_update_times(c, new_inode.bi_inum);
+       if (ret)
+               goto err;
+
+       new_inode.bi_size = written;
+
+       struct fuse_entry_param e = inode_to_entry(c, &new_inode);
+       fuse_reply_entry(req, &e);
+       return;
+
+err:
+       fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_readlink(fuse_req_t req, fuse_ino_t inum)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+       char *buf = NULL;
+
+       fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readlink(%llu)\n", inum);
+
+       struct bch_inode_unpacked bi;
+       int ret = bch2_inode_find_by_inum(c, inum, &bi);
+       if (ret)
+               goto err;
+
+       struct fuse_align_io align = align_io(c, bi.bi_size, 0);
+
+       ret = -ENOMEM;
+       buf = aligned_alloc(PAGE_SIZE, align.size);
+       if (!buf)
+               goto err;
+
+       ret = read_aligned(c, inum, align.size, align.start, buf);
+       if (ret)
+               goto err;
+
+       BUG_ON(buf[align.size - 1] != 0);
+
+       fuse_reply_readlink(req, buf);
+
+err:
+       if (ret)
+               fuse_reply_err(req, -ret);
+
+       free(buf);
+}
+
+#if 0
+/*
+ * FUSE flush is essentially the close() call, however it is not guaranteed
+ * that one flush happens per open/create.
+ *
+ * It doesn't have to do anything, and is mostly relevant for NFS-style
+ * filesystems where close has some relationship to caching.
+ */
+static void bcachefs_fuse_flush(fuse_req_t req, fuse_ino_t inum,
+                               struct fuse_file_info *fi)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_release(fuse_req_t req, fuse_ino_t inum,
+                                 struct fuse_file_info *fi)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_fsync(fuse_req_t req, fuse_ino_t inum, int datasync,
+                               struct fuse_file_info *fi)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_opendir(fuse_req_t req, fuse_ino_t inum,
+                                 struct fuse_file_info *fi)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+}
+#endif
+
+struct fuse_dir_context {
+       struct dir_context      ctx;
+       fuse_req_t              req;
+       char                    *buf;
+       size_t                  bufsize;
+};
+
+struct fuse_dirent {
+       uint64_t        ino;
+       uint64_t        off;
+       uint32_t        namelen;
+       uint32_t        type;
+       char name[];
+};
+
+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
+#define FUSE_DIRENT_ALIGN(x) \
+       (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
+
+static size_t fuse_add_direntry2(char *buf, size_t bufsize,
+                                const char *name, int namelen,
+                                const struct stat *stbuf, off_t off)
+{
+       size_t entlen           = FUSE_NAME_OFFSET + namelen;
+       size_t entlen_padded    = FUSE_DIRENT_ALIGN(entlen);
+       struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
+
+       if ((buf == NULL) || (entlen_padded > bufsize))
+               return entlen_padded;
+
+       dirent->ino = stbuf->st_ino;
+       dirent->off = off;
+       dirent->namelen = namelen;
+       dirent->type = (stbuf->st_mode & S_IFMT) >> 12;
+       memcpy(dirent->name, name, namelen);
+       memset(dirent->name + namelen, 0, entlen_padded - entlen);
+
+       return entlen_padded;
+}
+
+static int fuse_filldir(struct dir_context *_ctx,
+                       const char *name, int namelen,
+                       loff_t pos, u64 ino, unsigned type)
+{
+       struct fuse_dir_context *ctx =
+               container_of(_ctx, struct fuse_dir_context, ctx);
+
+       struct stat statbuf = {
+               .st_ino         = unmap_root_ino(ino),
+               .st_mode        = type << 12,
+       };
+
+       fuse_log(FUSE_LOG_DEBUG, "fuse_filldir(name=%s inum=%llu pos=%llu)\n",
+                name, statbuf.st_ino, pos);
+
+       size_t len = fuse_add_direntry2(ctx->buf,
+                                       ctx->bufsize,
+                                       name,
+                                       namelen,
+                                       &statbuf,
+                                       pos + 1);
+
+       if (len > ctx->bufsize)
+               return -1;
+
+       ctx->buf        += len;
+       ctx->bufsize    -= len;
+
+       return 0;
+}
+
+static bool handle_dots(struct fuse_dir_context *ctx, fuse_ino_t dir)
+{
+       if (ctx->ctx.pos == 0) {
+               if (fuse_filldir(&ctx->ctx, ".", 1, ctx->ctx.pos,
+                                dir, DT_DIR) < 0)
+                       return false;
+               ctx->ctx.pos = 1;
+       }
+
+       if (ctx->ctx.pos == 1) {
+               if (fuse_filldir(&ctx->ctx, "..", 2, ctx->ctx.pos,
+                                /*TODO: parent*/ 1, DT_DIR) < 0)
+                       return false;
+               ctx->ctx.pos = 2;
+       }
+
+       return true;
+}
+
+static void bcachefs_fuse_readdir(fuse_req_t req, fuse_ino_t dir,
+                                 size_t size, off_t off,
+                                 struct fuse_file_info *fi)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+       struct bch_inode_unpacked bi;
+       char *buf = calloc(size, 1);
+       struct fuse_dir_context ctx = {
+               .ctx.actor      = fuse_filldir,
+               .ctx.pos        = off,
+               .req            = req,
+               .buf            = buf,
+               .bufsize        = size,
+       };
+       int ret = 0;
+
+       fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readdir(dir=%llu, size=%zu, "
+                "off=%lld)\n", dir, size, off);
+
+       dir = map_root_ino(dir);
+
+       ret = bch2_inode_find_by_inum(c, dir, &bi);
+       if (ret)
+               goto reply;
+
+       if (!S_ISDIR(bi.bi_mode)) {
+               ret = -ENOTDIR;
+               goto reply;
+       }
+
+       if (!handle_dots(&ctx, dir))
+               goto reply;
+
+       ret = bch2_readdir(c, dir, &ctx.ctx);
+reply:
+       if (!ret) {
+               fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readdir reply %zd\n",
+                                       ctx.buf - buf);
+               fuse_reply_buf(req, buf, ctx.buf - buf);
+       } else {
+               fuse_reply_err(req, -ret);
+       }
+
+       free(buf);
+}
+
+#if 0
+static void bcachefs_fuse_readdirplus(fuse_req_t req, fuse_ino_t dir,
+                                     size_t size, off_t off,
+                                     struct fuse_file_info *fi)
+{
+
+}
+
+static void bcachefs_fuse_releasedir(fuse_req_t req, fuse_ino_t inum,
+                                    struct fuse_file_info *fi)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_fsyncdir(fuse_req_t req, fuse_ino_t inum, int datasync,
+                                  struct fuse_file_info *fi)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+}
+#endif
+
+static void bcachefs_fuse_statfs(fuse_req_t req, fuse_ino_t inum)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+       struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
+       unsigned shift = c->block_bits;
+       struct statvfs statbuf = {
+               .f_bsize        = block_bytes(c),
+               .f_frsize       = block_bytes(c),
+               .f_blocks       = usage.capacity >> shift,
+               .f_bfree        = (usage.capacity - usage.used) >> shift,
+               //.f_bavail     = statbuf.f_bfree,
+               .f_files        = usage.nr_inodes,
+               .f_ffree        = U64_MAX,
+               .f_namemax      = BCH_NAME_MAX,
+       };
+
+       fuse_reply_statfs(req, &statbuf);
+}
+
+#if 0
+static void bcachefs_fuse_setxattr(fuse_req_t req, fuse_ino_t inum,
+                                  const char *name, const char *value,
+                                  size_t size, int flags)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_getxattr(fuse_req_t req, fuse_ino_t inum,
+                                  const char *name, size_t size)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+
+       fuse_reply_xattr(req, );
+}
+
+static void bcachefs_fuse_listxattr(fuse_req_t req, fuse_ino_t inum, size_t size)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_removexattr(fuse_req_t req, fuse_ino_t inum,
+                                     const char *name)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+}
+#endif
+
+static void bcachefs_fuse_create(fuse_req_t req, fuse_ino_t dir,
+                                const char *name, mode_t mode,
+                                struct fuse_file_info *fi)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+       struct bch_inode_unpacked new_inode;
+       int ret;
+
+       fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_create(%llu, %s, %x)\n",
+                dir, name, mode);
+
+       ret = do_create(c, dir, name, mode, 0, &new_inode);
+       if (ret)
+               goto err;
+
+       struct fuse_entry_param e = inode_to_entry(c, &new_inode);
+       fuse_reply_create(req, &e, fi);
+       return;
+err:
+       fuse_reply_err(req, -ret);
+
+}
+
+#if 0
+static void bcachefs_fuse_write_buf(fuse_req_t req, fuse_ino_t inum,
+                                   struct fuse_bufvec *bufv, off_t off,
+                                   struct fuse_file_info *fi)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_fallocate(fuse_req_t req, fuse_ino_t inum, int mode,
+                                   off_t offset, off_t length,
+                                   struct fuse_file_info *fi)
+{
+       struct bch_fs *c = fuse_req_userdata(req);
+}
+#endif
+
+static const struct fuse_lowlevel_ops bcachefs_fuse_ops = {
+       .init           = bcachefs_fuse_init,
+       .destroy        = bcachefs_fuse_destroy,
+       .lookup         = bcachefs_fuse_lookup,
+       .getattr        = bcachefs_fuse_getattr,
+       .setattr        = bcachefs_fuse_setattr,
+       .readlink       = bcachefs_fuse_readlink,
+       .mknod          = bcachefs_fuse_mknod,
+       .mkdir          = bcachefs_fuse_mkdir,
+       .unlink         = bcachefs_fuse_unlink,
+       .rmdir          = bcachefs_fuse_rmdir,
+       .symlink        = bcachefs_fuse_symlink,
+       .rename         = bcachefs_fuse_rename,
+       .link           = bcachefs_fuse_link,
+       .open           = bcachefs_fuse_open,
+       .read           = bcachefs_fuse_read,
+       .write          = bcachefs_fuse_write,
+       //.flush        = bcachefs_fuse_flush,
+       //.release      = bcachefs_fuse_release,
+       //.fsync        = bcachefs_fuse_fsync,
+       //.opendir      = bcachefs_fuse_opendir,
+       .readdir        = bcachefs_fuse_readdir,
+       //.readdirplus  = bcachefs_fuse_readdirplus,
+       //.releasedir   = bcachefs_fuse_releasedir,
+       //.fsyncdir     = bcachefs_fuse_fsyncdir,
+       .statfs         = bcachefs_fuse_statfs,
+       //.setxattr     = bcachefs_fuse_setxattr,
+       //.getxattr     = bcachefs_fuse_getxattr,
+       //.listxattr    = bcachefs_fuse_listxattr,
+       //.removexattr  = bcachefs_fuse_removexattr,
+       .create         = bcachefs_fuse_create,
+
+       /* posix locks: */
+#if 0
+       .getlk          = bcachefs_fuse_getlk,
+       .setlk          = bcachefs_fuse_setlk,
+#endif
+       //.write_buf    = bcachefs_fuse_write_buf,
+       //.fallocate    = bcachefs_fuse_fallocate,
+
+};
+
+/*
+ * Setup and command parsing.
+ */
+
+struct bf_context {
+       char            *devices_str;
+       char            **devices;
+       int             nr_devices;
+};
+
+static void bf_context_free(struct bf_context *ctx)
+{
+       int i;
+
+       free(ctx->devices_str);
+       for (i = 0; i < ctx->nr_devices; ++i)
+               free(ctx->devices[i]);
+       free(ctx->devices);
+}
+
+static struct fuse_opt bf_opts[] = {
+       FUSE_OPT_END
+};
+
+/*
+ * Fuse option parsing helper -- returning 0 means we consumed the argument, 1
+ * means we did not.
+ */
+static int bf_opt_proc(void *data, const char *arg, int key,
+    struct fuse_args *outargs)
+{
+       struct bf_context *ctx = data;
+
+       switch (key) {
+       case FUSE_OPT_KEY_NONOPT:
+               /* Just extract the first non-option string. */
+               if (!ctx->devices_str) {
+                       ctx->devices_str = strdup(arg);
+                       return 0;
+               }
+               return 1;
+       }
+
+       return 1;
+}
+
+/*
+ * dev1:dev2 -> [ dev1, dev2 ]
+ * dev      -> [ dev ]
+ */
+static void tokenize_devices(struct bf_context *ctx)
+{
+       char *devices_str = strdup(ctx->devices_str);
+       char *devices_tmp = devices_str;
+       char **devices = NULL;
+       int nr = 0;
+       char *dev = NULL;
+
+       while ((dev = strsep(&devices_tmp, ":"))) {
+               if (strlen(dev) > 0) {
+                       devices = realloc(devices, (nr + 1) * sizeof *devices);
+                       devices[nr] = strdup(dev);
+                       nr++;
+               }
+       }
+
+       if (!devices) {
+               devices = malloc(sizeof *devices);
+               devices[0] = strdup(ctx->devices_str);
+               nr = 1;
+       }
+
+       ctx->devices = devices;
+       ctx->nr_devices = nr;
+
+       free(devices_str);
+}
+
+static void usage(char *argv[])
+{
+       printf("Usage: %s fusemount [options] <dev>[:dev2:...] <mountpoint>\n",
+              argv[0]);
+       printf("\n");
+}
+
+int cmd_fusemount(int argc, char *argv[])
+{
+       struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
+       struct bch_opts bch_opts = bch2_opts_empty();
+       struct bf_context ctx = { 0 };
+       struct bch_fs *c = NULL;
+       int ret = 0, i;
+
+       /* Parse arguments. */
+       if (fuse_opt_parse(&args, &ctx, bf_opts, bf_opt_proc) < 0)
+               die("fuse_opt_parse err: %m");
+
+       struct fuse_cmdline_opts fuse_opts;
+       if (fuse_parse_cmdline(&args, &fuse_opts) < 0)
+               die("fuse_parse_cmdline err: %m");
+
+       if (fuse_opts.show_help) {
+               usage(argv);
+               fuse_cmdline_help();
+               fuse_lowlevel_help();
+               ret = 0;
+               goto out;
+       }
+       if (fuse_opts.show_version) {
+               /* TODO: Show bcachefs version. */
+               printf("FUSE library version %s\n", fuse_pkgversion());
+               fuse_lowlevel_version();
+               ret = 0;
+               goto out;
+       }
+       if (!fuse_opts.mountpoint) {
+               usage(argv);
+               printf("Please supply a mountpoint.\n");
+               ret = 1;
+               goto out;
+       }
+       if (!ctx.devices_str) {
+               usage(argv);
+               printf("Please specify a device or device1:device2:...\n");
+               ret = 1;
+               goto out;
+       }
+       tokenize_devices(&ctx);
+
+       /* Open bch */
+       printf("Opening bcachefs filesystem on:\n");
+       for (i = 0; i < ctx.nr_devices; ++i)
+                printf("\t%s\n", ctx.devices[i]);
+
+       c = bch2_fs_open(ctx.devices, ctx.nr_devices, bch_opts);
+       if (IS_ERR(c))
+               die("error opening %s: %s", ctx.devices_str,
+                   strerror(-PTR_ERR(c)));
+
+       /* Fuse */
+       struct fuse_session *se =
+               fuse_session_new(&args, &bcachefs_fuse_ops,
+                                sizeof(bcachefs_fuse_ops), c);
+       if (!se)
+               die("fuse_lowlevel_new err: %m");
+
+       if (fuse_set_signal_handlers(se) < 0)
+               die("fuse_set_signal_handlers err: %m");
+
+       if (fuse_session_mount(se, fuse_opts.mountpoint))
+               die("fuse_mount err: %m");
+
+       /* This print statement is a trigger for tests. */
+       printf("Fuse mount initialized.\n");
+
+       fuse_daemonize(fuse_opts.foreground);
+
+       ret = fuse_session_loop(se);
+
+       /* Cleanup */
+       fuse_session_unmount(se);
+       fuse_remove_signal_handlers(se);
+       fuse_session_destroy(se);
+
+out:
+       free(fuse_opts.mountpoint);
+       fuse_opt_free_args(&args);
+       bf_context_free(&ctx);
+
+       return ret ? 1 : 0;
+}
+
+#endif /* BCACHEFS_FUSE */
diff --git a/cmd_key.c b/cmd_key.c
new file mode 100644 (file)
index 0000000..6052cb0
--- /dev/null
+++ b/cmd_key.c
@@ -0,0 +1,142 @@
+#include <errno.h>
+#include <unistd.h>
+#include <uuid/uuid.h>
+
+#include "cmds.h"
+#include "libbcachefs/checksum.h"
+#include "crypto.h"
+#include "libbcachefs.h"
+
+static void unlock_usage(void)
+{
+       puts("bcachefs unlock - unlock an encrypted filesystem so it can be mounted\n"
+            "Usage: bcachefs unlock [OPTION] device\n"
+            "\n"
+            "Options:\n"
+            "  -c                     Check if a device is encrypted\n"
+            "  -h                     Display this help and exit\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+}
+
+int cmd_unlock(int argc, char *argv[])
+{
+       bool check = false;
+       int opt;
+
+       while ((opt = getopt(argc, argv, "ch")) != -1)
+               switch (opt) {
+               case 'c':
+                       check = true;
+                       break;
+               case 'h':
+                       unlock_usage();
+                       exit(EXIT_SUCCESS);
+               }
+       args_shift(optind);
+
+       char *dev = arg_pop();
+       if (!dev)
+               die("Please supply a device");
+
+       if (argc)
+               die("Too many arguments");
+
+       struct bch_opts opts = bch2_opts_empty();
+
+       opt_set(opts, noexcl, true);
+       opt_set(opts, nochanges, true);
+
+       struct bch_sb_handle sb;
+       int ret = bch2_read_super(dev, &opts, &sb);
+       if (ret)
+               die("Error opening %s: %s", dev, strerror(-ret));
+
+       if (!bch2_sb_is_encrypted(sb.sb))
+               die("%s is not encrypted", dev);
+
+       if (check)
+               exit(EXIT_SUCCESS);
+
+       char *passphrase = read_passphrase("Enter passphrase: ");
+
+       bch2_add_key(sb.sb, passphrase);
+
+       bch2_free_super(&sb);
+       memzero_explicit(passphrase, strlen(passphrase));
+       free(passphrase);
+       return 0;
+}
+
+int cmd_set_passphrase(int argc, char *argv[])
+{
+       struct bch_opts opts = bch2_opts_empty();
+       struct bch_fs *c;
+
+       if (argc < 2)
+               die("Please supply one or more devices");
+
+       opt_set(opts, nostart, true);
+
+       /*
+        * we use bch2_fs_open() here, instead of just reading the superblock,
+        * to make sure we're opening and updating every component device:
+        */
+
+       c = bch2_fs_open(argv + 1, argc - 1, opts);
+       if (IS_ERR(c))
+               die("Error opening %s: %s", argv[1], strerror(-PTR_ERR(c)));
+
+       struct bch_sb_field_crypt *crypt = bch2_sb_get_crypt(c->disk_sb.sb);
+       if (!crypt)
+               die("Filesystem does not have encryption enabled");
+
+       struct bch_encrypted_key new_key;
+       new_key.magic = BCH_KEY_MAGIC;
+
+       int ret = bch2_decrypt_sb_key(c, crypt, &new_key.key);
+       if (ret)
+               die("Error getting current key");
+
+       char *new_passphrase = read_passphrase_twice("Enter new passphrase: ");
+       struct bch_key passphrase_key = derive_passphrase(crypt, new_passphrase);
+
+       if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(c->disk_sb.sb),
+                                   &new_key, sizeof(new_key)))
+               die("error encrypting key");
+       crypt->key = new_key;
+
+       bch2_write_super(c);
+       bch2_fs_stop(c);
+       return 0;
+}
+
+int cmd_remove_passphrase(int argc, char *argv[])
+{
+       struct bch_opts opts = bch2_opts_empty();
+       struct bch_fs *c;
+
+       if (argc < 2)
+               die("Please supply one or more devices");
+
+       opt_set(opts, nostart, true);
+       c = bch2_fs_open(argv + 1, argc - 1, opts);
+       if (IS_ERR(c))
+               die("Error opening %s: %s", argv[1], strerror(-PTR_ERR(c)));
+
+       struct bch_sb_field_crypt *crypt = bch2_sb_get_crypt(c->disk_sb.sb);
+       if (!crypt)
+               die("Filesystem does not have encryption enabled");
+
+       struct bch_encrypted_key new_key;
+       new_key.magic = BCH_KEY_MAGIC;
+
+       int ret = bch2_decrypt_sb_key(c, crypt, &new_key.key);
+       if (ret)
+               die("Error getting current key");
+
+       crypt->key = new_key;
+
+       bch2_write_super(c);
+       bch2_fs_stop(c);
+       return 0;
+}
diff --git a/cmd_migrate.c b/cmd_migrate.c
new file mode 100644 (file)
index 0000000..797c51e
--- /dev/null
@@ -0,0 +1,840 @@
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <string.h>
+#include <sys/xattr.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+#include <linux/fiemap.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+
+#include <uuid/uuid.h>
+
+#include "cmds.h"
+#include "crypto.h"
+#include "libbcachefs.h"
+
+#include <linux/dcache.h>
+#include <linux/generic-radix-tree.h>
+#include <linux/xattr.h>
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/alloc_background.h"
+#include "libbcachefs/alloc_foreground.h"
+#include "libbcachefs/btree_update.h"
+#include "libbcachefs/buckets.h"
+#include "libbcachefs/dirent.h"
+#include "libbcachefs/fs-common.h"
+#include "libbcachefs/inode.h"
+#include "libbcachefs/io.h"
+#include "libbcachefs/replicas.h"
+#include "libbcachefs/str_hash.h"
+#include "libbcachefs/super.h"
+#include "libbcachefs/xattr.h"
+
+/* XXX cut and pasted from fsck.c */
+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
+
+static char *dev_t_to_path(dev_t dev)
+{
+       char link[PATH_MAX], *p;
+       int ret;
+
+       char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
+                                 major(dev), minor(dev));
+       ret = readlink(sysfs_dev, link, sizeof(link));
+       free(sysfs_dev);
+
+       if (ret < 0 || ret >= sizeof(link))
+               die("readlink error while looking up block device: %m");
+
+       link[ret] = '\0';
+
+       p = strrchr(link, '/');
+       if (!p)
+               die("error looking up device name");
+       p++;
+
+       return mprintf("/dev/%s", p);
+}
+
+static bool path_is_fs_root(const char *path)
+{
+       char *line = NULL, *p, *mount;
+       size_t n = 0;
+       FILE *f;
+       bool ret = true;
+
+       f = fopen("/proc/self/mountinfo", "r");
+       if (!f)
+               die("Error getting mount information");
+
+       while (getline(&line, &n, f) != -1) {
+               p = line;
+
+               strsep(&p, " "); /* mount id */
+               strsep(&p, " "); /* parent id */
+               strsep(&p, " "); /* dev */
+               strsep(&p, " "); /* root */
+               mount = strsep(&p, " ");
+               strsep(&p, " ");
+
+               if (mount && !strcmp(path, mount))
+                       goto found;
+       }
+
+       ret = false;
+found:
+       fclose(f);
+       free(line);
+       return ret;
+}
+
+static void mark_unreserved_space(struct bch_fs *c, ranges extents)
+{
+       struct bch_dev *ca = c->devs[0];
+       struct hole_iter iter;
+       struct range i;
+
+       for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
+               u64 b;
+
+               if (i.start == i.end)
+                       return;
+
+               b = sector_to_bucket(ca, i.start >> 9);
+               do {
+                       set_bit(b, ca->buckets_nouse);
+                       b++;
+               } while (bucket_to_sector(ca, b) << 9 < i.end);
+       }
+}
+
+static void update_inode(struct bch_fs *c,
+                        struct bch_inode_unpacked *inode)
+{
+       struct bkey_inode_buf packed;
+       int ret;
+
+       bch2_inode_pack(&packed, inode);
+       ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
+                               NULL, NULL, 0);
+       if (ret)
+               die("error updating inode: %s", strerror(-ret));
+}
+
+static void create_link(struct bch_fs *c,
+                       struct bch_inode_unpacked *parent,
+                       const char *name, u64 inum, mode_t mode)
+{
+       struct qstr qstr = QSTR(name);
+       struct bch_inode_unpacked parent_u;
+       struct bch_inode_unpacked inode;
+
+       int ret = bch2_trans_do(c, NULL, NULL, 0,
+               bch2_link_trans(&trans, parent->bi_inum, inum,
+                               &parent_u, &inode, &qstr));
+       if (ret)
+               die("error creating hardlink: %s", strerror(-ret));
+}
+
+static struct bch_inode_unpacked create_file(struct bch_fs *c,
+                                            struct bch_inode_unpacked *parent,
+                                            const char *name,
+                                            uid_t uid, gid_t gid,
+                                            mode_t mode, dev_t rdev)
+{
+       struct qstr qstr = QSTR(name);
+       struct bch_inode_unpacked new_inode;
+
+       int ret = bch2_trans_do(c, NULL, NULL, 0,
+               bch2_create_trans(&trans,
+                                 parent->bi_inum, parent,
+                                 &new_inode, &qstr,
+                                 uid, gid, mode, rdev, NULL, NULL));
+       if (ret)
+               die("error creating file: %s", strerror(-ret));
+
+       return new_inode;
+}
+
+#define for_each_xattr_handler(handlers, handler)              \
+       if (handlers)                                           \
+               for ((handler) = *(handlers)++;                 \
+                       (handler) != NULL;                      \
+                       (handler) = *(handlers)++)
+
+static const struct xattr_handler *xattr_resolve_name(char **name)
+{
+       const struct xattr_handler **handlers = bch2_xattr_handlers;
+       const struct xattr_handler *handler;
+
+       for_each_xattr_handler(handlers, handler) {
+               char *n;
+
+               n = strcmp_prefix(*name, xattr_prefix(handler));
+               if (n) {
+                       if (!handler->prefix ^ !*n) {
+                               if (*n)
+                                       continue;
+                               return ERR_PTR(-EINVAL);
+                       }
+                       *name = n;
+                       return handler;
+               }
+       }
+       return ERR_PTR(-EOPNOTSUPP);
+}
+
+static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
+                      struct stat *src)
+{
+       dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
+       dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
+       dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
+}
+
+static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
+                       char *src)
+{
+       struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
+
+       char attrs[XATTR_LIST_MAX];
+       ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
+       if (attrs_size < 0)
+               die("listxattr error: %m");
+
+       char *next, *attr;
+       for (attr = attrs;
+            attr < attrs + attrs_size;
+            attr = next) {
+               next = attr + strlen(attr) + 1;
+
+               char val[XATTR_SIZE_MAX];
+               ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
+
+               if (val_size < 0)
+                       die("error getting xattr val: %m");
+
+               const struct xattr_handler *h = xattr_resolve_name(&attr);
+
+               int ret = bch2_trans_do(c, NULL, NULL, 0,
+                               bch2_xattr_set(&trans, dst->bi_inum, &hash_info, attr,
+                                              val, val_size, h->flags, 0));
+               if (ret < 0)
+                       die("error creating xattr: %s", strerror(-ret));
+       }
+}
+
+static char buf[1 << 20] __aligned(PAGE_SIZE);
+
+static void write_data(struct bch_fs *c,
+                      struct bch_inode_unpacked *dst_inode,
+                      u64 dst_offset, void *buf, size_t len)
+{
+       struct {
+               struct bch_write_op op;
+               struct bio_vec bv[sizeof(buf) / PAGE_SIZE];
+       } o;
+       struct closure cl;
+
+       BUG_ON(dst_offset       & (block_bytes(c) - 1));
+       BUG_ON(len              & (block_bytes(c) - 1));
+
+       closure_init_stack(&cl);
+
+       bio_init(&o.op.wbio.bio, o.bv, ARRAY_SIZE(o.bv));
+       bch2_bio_map(&o.op.wbio.bio, buf, len);
+
+       bch2_write_op_init(&o.op, c, bch2_opts_to_inode_opts(c->opts));
+       o.op.write_point        = writepoint_hashed(0);
+       o.op.nr_replicas        = 1;
+       o.op.pos                = POS(dst_inode->bi_inum, dst_offset >> 9);
+
+       int ret = bch2_disk_reservation_get(c, &o.op.res, len >> 9,
+                                           c->opts.data_replicas, 0);
+       if (ret)
+               die("error reserving space in new filesystem: %s", strerror(-ret));
+
+       closure_call(&o.op.cl, bch2_write, NULL, &cl);
+       closure_sync(&cl);
+
+       dst_inode->bi_sectors += len >> 9;
+}
+
+static void copy_data(struct bch_fs *c,
+                     struct bch_inode_unpacked *dst_inode,
+                     int src_fd, u64 start, u64 end)
+{
+       while (start < end) {
+               unsigned len = min_t(u64, end - start, sizeof(buf));
+               unsigned pad = round_up(len, block_bytes(c)) - len;
+
+               xpread(src_fd, buf, len, start);
+               memset(buf + len, 0, pad);
+
+               write_data(c, dst_inode, start, buf, len + pad);
+               start += len;
+       }
+}
+
+static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
+                     u64 logical, u64 physical, u64 length)
+{
+       struct bch_dev *ca = c->devs[0];
+
+       BUG_ON(logical  & (block_bytes(c) - 1));
+       BUG_ON(physical & (block_bytes(c) - 1));
+       BUG_ON(length   & (block_bytes(c) - 1));
+
+       logical         >>= 9;
+       physical        >>= 9;
+       length          >>= 9;
+
+       BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
+
+       while (length) {
+               struct bkey_i_extent *e;
+               BKEY_PADDED(k) k;
+               u64 b = sector_to_bucket(ca, physical);
+               struct disk_reservation res;
+               unsigned sectors;
+               int ret;
+
+               sectors = min(ca->mi.bucket_size -
+                             (physical & (ca->mi.bucket_size - 1)),
+                             length);
+
+               e = bkey_extent_init(&k.k);
+               e->k.p.inode    = dst->bi_inum;
+               e->k.p.offset   = logical + sectors;
+               e->k.size       = sectors;
+               bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
+                                       .offset = physical,
+                                       .dev = 0,
+                                       .gen = bucket(ca, b)->mark.gen,
+                                 });
+
+               ret = bch2_disk_reservation_get(c, &res, sectors, 1,
+                                               BCH_DISK_RESERVATION_NOFAIL);
+               if (ret)
+                       die("error reserving space in new filesystem: %s",
+                           strerror(-ret));
+
+               bch2_mark_bkey_replicas(c, extent_i_to_s_c(e).s_c);
+
+               ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
+                                       &res, NULL, 0);
+               if (ret)
+                       die("btree insert error %s", strerror(-ret));
+
+               bch2_disk_reservation_put(c, &res);
+
+               dst->bi_sectors += sectors;
+               logical         += sectors;
+               physical        += sectors;
+               length          -= sectors;
+       }
+}
+
+static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
+                     char *src)
+{
+       ssize_t ret = readlink(src, buf, sizeof(buf));
+       if (ret < 0)
+               die("readlink error: %m");
+
+       write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
+}
+
+static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
+                     int src_fd, u64 src_size,
+                     char *src_path, ranges *extents)
+{
+       struct fiemap_iter iter;
+       struct fiemap_extent e;
+
+       fiemap_for_each(src_fd, iter, e)
+               if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
+                       fsync(src_fd);
+                       break;
+               }
+
+       fiemap_for_each(src_fd, iter, e) {
+               if ((e.fe_logical       & (block_bytes(c) - 1)) ||
+                   (e.fe_length        & (block_bytes(c) - 1)))
+                       die("Unaligned extent in %s - can't handle", src_path);
+
+               if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
+                                 FIEMAP_EXTENT_ENCODED|
+                                 FIEMAP_EXTENT_NOT_ALIGNED|
+                                 FIEMAP_EXTENT_DATA_INLINE)) {
+                       copy_data(c, dst, src_fd, e.fe_logical,
+                                 min(src_size - e.fe_logical,
+                                     e.fe_length));
+                       continue;
+               }
+
+               /*
+                * if the data is below 1 MB, copy it so it doesn't conflict
+                * with bcachefs's potentially larger superblock:
+                */
+               if (e.fe_physical < 1 << 20) {
+                       copy_data(c, dst, src_fd, e.fe_logical,
+                                 min(src_size - e.fe_logical,
+                                     e.fe_length));
+                       continue;
+               }
+
+               if ((e.fe_physical      & (block_bytes(c) - 1)))
+                       die("Unaligned extent in %s - can't handle", src_path);
+
+               range_add(extents, e.fe_physical, e.fe_length);
+               link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
+       }
+}
+
+struct copy_fs_state {
+       u64                     bcachefs_inum;
+       dev_t                   dev;
+
+       GENRADIX(u64)           hardlinks;
+       ranges                  extents;
+};
+
+static void copy_dir(struct copy_fs_state *s,
+                    struct bch_fs *c,
+                    struct bch_inode_unpacked *dst,
+                    int src_fd, const char *src_path)
+{
+       DIR *dir = fdopendir(src_fd);
+       struct dirent *d;
+
+       while ((errno = 0), (d = readdir(dir))) {
+               struct bch_inode_unpacked inode;
+               int fd;
+
+               if (fchdir(src_fd))
+                       die("chdir error: %m");
+
+               struct stat stat =
+                       xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
+
+               if (!strcmp(d->d_name, ".") ||
+                   !strcmp(d->d_name, "..") ||
+                   stat.st_ino == s->bcachefs_inum)
+                       continue;
+
+               char *child_path = mprintf("%s/%s", src_path, d->d_name);
+
+               if (stat.st_dev != s->dev)
+                       die("%s does not have correct st_dev!", child_path);
+
+               u64 *dst_inum = S_ISREG(stat.st_mode)
+                       ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
+                       : NULL;
+
+               if (dst_inum && *dst_inum) {
+                       create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
+                       goto next;
+               }
+
+               inode = create_file(c, dst, d->d_name,
+                                   stat.st_uid, stat.st_gid,
+                                   stat.st_mode, stat.st_rdev);
+
+               if (dst_inum)
+                       *dst_inum = inode.bi_inum;
+
+               copy_times(c, &inode, &stat);
+               copy_xattrs(c, &inode, d->d_name);
+
+               /* copy xattrs */
+
+               switch (mode_to_type(stat.st_mode)) {
+               case DT_DIR:
+                       fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
+                       copy_dir(s, c, &inode, fd, child_path);
+                       close(fd);
+                       break;
+               case DT_REG:
+                       inode.bi_size = stat.st_size;
+
+                       fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
+                       copy_file(c, &inode, fd, stat.st_size,
+                                 child_path, &s->extents);
+                       close(fd);
+                       break;
+               case DT_LNK:
+                       inode.bi_size = stat.st_size;
+
+                       copy_link(c, &inode, d->d_name);
+                       break;
+               case DT_FIFO:
+               case DT_CHR:
+               case DT_BLK:
+               case DT_SOCK:
+               case DT_WHT:
+                       /* nothing else to copy for these: */
+                       break;
+               default:
+                       BUG();
+               }
+
+               update_inode(c, &inode);
+next:
+               free(child_path);
+       }
+
+       if (errno)
+               die("readdir error: %m");
+}
+
+static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
+                                  u64 size, u64 *bcachefs_inum, dev_t dev,
+                                  bool force)
+{
+       int fd = force
+               ? open(file_path, O_RDWR|O_CREAT, 0600)
+               : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
+       if (fd < 0)
+               die("Error creating %s for bcachefs metadata: %m",
+                   file_path);
+
+       struct stat statbuf = xfstat(fd);
+
+       if (statbuf.st_dev != dev)
+               die("bcachefs file has incorrect device");
+
+       *bcachefs_inum = statbuf.st_ino;
+
+       if (fallocate(fd, 0, 0, size))
+               die("Error reserving space for bcachefs metadata: %m");
+
+       fsync(fd);
+
+       struct fiemap_iter iter;
+       struct fiemap_extent e;
+       ranges extents = { NULL };
+
+       fiemap_for_each(fd, iter, e) {
+               if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
+                                 FIEMAP_EXTENT_ENCODED|
+                                 FIEMAP_EXTENT_NOT_ALIGNED|
+                                 FIEMAP_EXTENT_DATA_INLINE))
+                       die("Unable to continue: metadata file not fully mapped");
+
+               if ((e.fe_physical      & (block_size - 1)) ||
+                   (e.fe_length        & (block_size - 1)))
+                       die("Unable to continue: unaligned extents in metadata file");
+
+               range_add(&extents, e.fe_physical, e.fe_length);
+       }
+       close(fd);
+
+       ranges_sort_merge(&extents);
+       return extents;
+}
+
+static void reserve_old_fs_space(struct bch_fs *c,
+                                struct bch_inode_unpacked *root_inode,
+                                ranges *extents)
+{
+       struct bch_dev *ca = c->devs[0];
+       struct bch_inode_unpacked dst;
+       struct hole_iter iter;
+       struct range i;
+
+       dst = create_file(c, root_inode, "old_migrated_filesystem",
+                         0, 0, S_IFREG|0400, 0);
+       dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
+
+       ranges_sort_merge(extents);
+
+       for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
+               link_data(c, &dst, i.start, i.start, i.end - i.start);
+
+       update_inode(c, &dst);
+}
+
+static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
+                   u64 bcachefs_inum, ranges *extents)
+{
+       syncfs(src_fd);
+
+       struct bch_inode_unpacked root_inode;
+       int ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, &root_inode);
+       if (ret)
+               die("error looking up root directory: %s", strerror(-ret));
+
+       if (fchdir(src_fd))
+               die("chdir error: %m");
+
+       struct stat stat = xfstat(src_fd);
+       copy_times(c, &root_inode, &stat);
+       copy_xattrs(c, &root_inode, ".");
+
+       struct copy_fs_state s = {
+               .bcachefs_inum  = bcachefs_inum,
+               .dev            = stat.st_dev,
+               .extents        = *extents,
+       };
+
+       /* now, copy: */
+       copy_dir(&s, c, &root_inode, src_fd, src_path);
+
+       reserve_old_fs_space(c, &root_inode, &s.extents);
+
+       update_inode(c, &root_inode);
+
+       darray_free(s.extents);
+       genradix_free(&s.hardlinks);
+
+       bch2_alloc_write(c, false);
+}
+
+static void find_superblock_space(ranges extents, struct dev_opts *dev)
+{
+       struct range *i;
+
+       darray_foreach(i, extents) {
+               u64 start = round_up(max(256ULL << 10, i->start),
+                                    dev->bucket_size << 9);
+               u64 end = round_down(i->end,
+                                    dev->bucket_size << 9);
+
+               if (start + (128 << 10) <= end) {
+                       dev->sb_offset  = start >> 9;
+                       dev->sb_end     = dev->sb_offset + 256;
+                       return;
+               }
+       }
+
+       die("Couldn't find a valid location for superblock");
+}
+
+static void migrate_usage(void)
+{
+       puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
+            "Usage: bcachefs migrate [OPTION]...\n"
+            "\n"
+            "Options:\n"
+            "  -f fs                  Root of filesystem to migrate(s)\n"
+            "      --encrypted        Enable whole filesystem encryption (chacha20/poly1305)\n"
+            "      --no_passphrase    Don't encrypt master encryption key\n"
+            "  -F                     Force, even if metadata file already exists\n"
+            "  -h                     Display this help and exit\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+}
+
+static const struct option migrate_opts[] = {
+       { "encrypted",          no_argument, NULL, 'e' },
+       { "no_passphrase",      no_argument, NULL, 'p' },
+       { NULL }
+};
+
+static int migrate_fs(const char               *fs_path,
+                     struct bch_opt_strs       fs_opt_strs,
+                     struct bch_opts           fs_opts,
+                     struct format_opts        format_opts,
+                     bool force)
+{
+       if (!path_is_fs_root(fs_path))
+               die("%s is not a filysestem root", fs_path);
+
+       int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
+       struct stat stat = xfstat(fs_fd);
+
+       if (!S_ISDIR(stat.st_mode))
+               die("%s is not a directory", fs_path);
+
+       struct dev_opts dev = dev_opts_default();
+
+       dev.path = dev_t_to_path(stat.st_dev);
+       dev.fd = xopen(dev.path, O_RDWR);
+
+       opt_set(fs_opts, block_size, get_blocksize(dev.path, dev.fd));
+
+       char *file_path = mprintf("%s/bcachefs", fs_path);
+       printf("Creating new filesystem on %s in space reserved at %s\n",
+              dev.path, file_path);
+
+       bch2_pick_bucket_size(fs_opts, &dev);
+
+       u64 bcachefs_inum;
+       ranges extents = reserve_new_fs_space(file_path,
+                               fs_opts.block_size << 9,
+                               get_size(dev.path, dev.fd) / 5,
+                               &bcachefs_inum, stat.st_dev, force);
+
+       find_superblock_space(extents, &dev);
+
+       struct bch_sb *sb = bch2_format(fs_opt_strs,
+                                       fs_opts,format_opts, &dev, 1);
+       u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
+
+       if (format_opts.passphrase)
+               bch2_add_key(sb, format_opts.passphrase);
+
+       free(sb);
+
+       struct bch_opts opts = bch2_opts_empty();
+       struct bch_fs *c = NULL;
+       char *path[1] = { dev.path };
+
+       opt_set(opts, sb,       sb_offset);
+       opt_set(opts, nostart,  true);
+       opt_set(opts, noexcl,   true);
+
+       c = bch2_fs_open(path, 1, opts);
+       if (IS_ERR(c))
+               die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
+
+       mark_unreserved_space(c, extents);
+
+       int ret = bch2_fs_start(c);
+       if (ret)
+               die("Error starting new filesystem: %s", strerror(-ret));
+
+       copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
+
+       bch2_fs_stop(c);
+
+       printf("Migrate complete, running fsck:\n");
+       opt_set(opts, nostart,  false);
+       opt_set(opts, nochanges, true);
+
+       c = bch2_fs_open(path, 1, opts);
+       if (IS_ERR(c))
+               die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
+
+       bch2_fs_stop(c);
+       printf("fsck complete\n");
+
+       printf("To mount the new filesystem, run\n"
+              "  mount -t bcachefs -o sb=%llu %s dir\n"
+              "\n"
+              "After verifying that the new filesystem is correct, to create a\n"
+              "superblock at the default offset and finish the migration run\n"
+              "  bcachefs migrate-superblock -d %s -o %llu\n"
+              "\n"
+              "The new filesystem will have a file at /old_migrated_filestem\n"
+              "referencing all disk space that might be used by the existing\n"
+              "filesystem. That file can be deleted once the old filesystem is\n"
+              "no longer needed (and should be deleted prior to running\n"
+              "bcachefs migrate-superblock)\n",
+              sb_offset, dev.path, dev.path, sb_offset);
+       return 0;
+}
+
+int cmd_migrate(int argc, char *argv[])
+{
+       struct format_opts format_opts = format_opts_default();
+       char *fs_path = NULL;
+       bool no_passphrase = false, force = false;
+       int opt;
+
+       struct bch_opt_strs fs_opt_strs =
+               bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
+       struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
+
+       while ((opt = getopt_long(argc, argv, "f:Fh",
+                                 migrate_opts, NULL)) != -1)
+               switch (opt) {
+               case 'f':
+                       fs_path = optarg;
+                       break;
+               case 'e':
+                       format_opts.encrypted = true;
+                       break;
+               case 'p':
+                       no_passphrase = true;
+                       break;
+               case 'F':
+                       force = true;
+                       break;
+               case 'h':
+                       migrate_usage();
+                       exit(EXIT_SUCCESS);
+               }
+
+       if (!fs_path)
+               die("Please specify a filesystem to migrate");
+
+       if (format_opts.encrypted && !no_passphrase)
+               format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
+
+       return migrate_fs(fs_path,
+                         fs_opt_strs,
+                         fs_opts,
+                         format_opts, force);
+}
+
+static void migrate_superblock_usage(void)
+{
+       puts("bcachefs migrate-superblock - create default superblock after migrating\n"
+            "Usage: bcachefs migrate-superblock [OPTION]...\n"
+            "\n"
+            "Options:\n"
+            "  -d device     Device to create superblock for\n"
+            "  -o offset     Offset of existing superblock\n"
+            "  -h            Display this help and exit\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+}
+
+int cmd_migrate_superblock(int argc, char *argv[])
+{
+       char *dev = NULL;
+       u64 offset = 0;
+       int opt, ret;
+
+       while ((opt = getopt(argc, argv, "d:o:h")) != -1)
+               switch (opt) {
+                       case 'd':
+                               dev = optarg;
+                               break;
+                       case 'o':
+                               ret = kstrtou64(optarg, 10, &offset);
+                               if (ret)
+                                       die("Invalid offset");
+                               break;
+                       case 'h':
+                               migrate_superblock_usage();
+                               exit(EXIT_SUCCESS);
+               }
+
+       if (!dev)
+               die("Please specify a device");
+
+       if (!offset)
+               die("Please specify offset of existing superblock");
+
+       int fd = xopen(dev, O_RDWR);
+       struct bch_sb *sb = __bch2_super_read(fd, offset);
+
+       if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
+               die("Can't add superblock: no space left in superblock layout");
+
+       unsigned i;
+       for (i = 0; i < sb->layout.nr_superblocks; i++)
+               if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
+                       die("Superblock layout already has default superblock");
+
+       memmove(&sb->layout.sb_offset[1],
+               &sb->layout.sb_offset[0],
+               sb->layout.nr_superblocks * sizeof(u64));
+       sb->layout.nr_superblocks++;
+
+       sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
+
+       bch2_super_write(fd, sb);
+       close(fd);
+
+       return 0;
+}
diff --git a/cmd_run.c b/cmd_run.c
new file mode 100644 (file)
index 0000000..1bf84e5
--- /dev/null
+++ b/cmd_run.c
@@ -0,0 +1,33 @@
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <uuid/uuid.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "cmds.h"
+#include "libbcachefs.h"
+
+#if 0
+int cmd_run(int argc, char *argv[])
+{
+       return 0;
+}
+
+int cmd_stop(int argc, char *argv[])
+{
+       if (argc != 2)
+               die("Please supply a filesystem");
+
+       struct bchfs_handle fs = bcache_fs_open(argv[1]);
+       xioctl(fs.ioctl_fd, BCH_IOCTL_STOP);
+       return 0;
+}
+#endif
diff --git a/cmd_version.c b/cmd_version.c
new file mode 100644 (file)
index 0000000..3fb4b6e
--- /dev/null
@@ -0,0 +1,9 @@
+#include <stdio.h>
+
+#include "cmds.h"
+
+int cmd_version(int argc, char *argv[])
+{
+       printf("bcachefs tool version %s\n", VERSION_STRING);
+       return 0;
+}
diff --git a/cmds.h b/cmds.h
new file mode 100644 (file)
index 0000000..bcd27ad
--- /dev/null
+++ b/cmds.h
@@ -0,0 +1,53 @@
+/*
+ * Author: Kent Overstreet <kent.overstreet@gmail.com>
+ *
+ * GPLv2
+ */
+
+#ifndef _CMDS_H
+#define _CMDS_H
+
+#include "tools-util.h"
+
+int cmd_format(int argc, char *argv[]);
+int cmd_show_super(int argc, char *argv[]);
+
+#if 0
+int cmd_assemble(int argc, char *argv[]);
+int cmd_incremental(int argc, char *argv[]);
+int cmd_run(int argc, char *argv[]);
+int cmd_stop(int argc, char *argv[]);
+#endif
+
+int cmd_fs_usage(int argc, char *argv[]);
+
+int cmd_device_add(int argc, char *argv[]);
+int cmd_device_remove(int argc, char *argv[]);
+int cmd_device_online(int argc, char *argv[]);
+int cmd_device_offline(int argc, char *argv[]);
+int cmd_device_evacuate(int argc, char *argv[]);
+int cmd_device_set_state(int argc, char *argv[]);
+int cmd_device_resize(int argc, char *argv[]);
+
+int cmd_data_rereplicate(int argc, char *argv[]);
+
+int cmd_unlock(int argc, char *argv[]);
+int cmd_set_passphrase(int argc, char *argv[]);
+int cmd_remove_passphrase(int argc, char *argv[]);
+
+int cmd_fsck(int argc, char *argv[]);
+
+int cmd_dump(int argc, char *argv[]);
+int cmd_list(int argc, char *argv[]);
+int cmd_list_journal(int argc, char *argv[]);
+
+int cmd_migrate(int argc, char *argv[]);
+int cmd_migrate_superblock(int argc, char *argv[]);
+
+int cmd_version(int argc, char *argv[]);
+
+int cmd_setattr(int argc, char *argv[]);
+
+int cmd_fusemount(int argc, char *argv[]);
+
+#endif /* _CMDS_H */
diff --git a/config.h b/config.h
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/crypto.c b/crypto.c
new file mode 100644 (file)
index 0000000..7f7fbd5
--- /dev/null
+++ b/crypto.c
@@ -0,0 +1,189 @@
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <termios.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <keyutils.h>
+#include <linux/random.h>
+#include <libscrypt.h>
+#include <uuid/uuid.h>
+
+#include "libbcachefs/checksum.h"
+#include "crypto.h"
+
+char *read_passphrase(const char *prompt)
+{
+       char *buf = NULL;
+       size_t buflen = 0;
+       ssize_t len;
+
+       if (isatty(STDIN_FILENO)) {
+               struct termios old, new;
+
+               fprintf(stderr, "%s", prompt);
+               fflush(stderr);
+
+               if (tcgetattr(STDIN_FILENO, &old))
+                       die("error getting terminal attrs");
+
+               new = old;
+               new.c_lflag &= ~ECHO;
+               if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &new))
+                       die("error setting terminal attrs");
+
+               len = getline(&buf, &buflen, stdin);
+
+               tcsetattr(STDIN_FILENO, TCSAFLUSH, &old);
+               fprintf(stderr, "\n");
+       } else {
+               len = getline(&buf, &buflen, stdin);
+       }
+
+       if (len < 0)
+               die("error reading passphrase");
+       if (len && buf[len - 1] == '\n')
+               buf[len - 1] = '\0';
+
+       return buf;
+}
+
+char *read_passphrase_twice(const char *prompt)
+{
+       char *pass = read_passphrase(prompt);
+
+       if (!isatty(STDIN_FILENO))
+               return pass;
+
+       char *pass2 = read_passphrase("Enter same passphrase again: ");
+
+       if (strcmp(pass, pass2)) {
+               memzero_explicit(pass, strlen(pass));
+               memzero_explicit(pass2, strlen(pass2));
+               die("Passphrases do not match");
+       }
+
+       memzero_explicit(pass2, strlen(pass2));
+       free(pass2);
+
+       return pass;
+}
+
+struct bch_key derive_passphrase(struct bch_sb_field_crypt *crypt,
+                                const char *passphrase)
+{
+       const unsigned char salt[] = "bcache";
+       struct bch_key key;
+       int ret;
+
+       switch (BCH_CRYPT_KDF_TYPE(crypt)) {
+       case BCH_KDF_SCRYPT:
+               ret = libscrypt_scrypt((void *) passphrase, strlen(passphrase),
+                                      salt, sizeof(salt),
+                                      1ULL << BCH_KDF_SCRYPT_N(crypt),
+                                      1ULL << BCH_KDF_SCRYPT_R(crypt),
+                                      1ULL << BCH_KDF_SCRYPT_P(crypt),
+                                      (void *) &key, sizeof(key));
+               if (ret)
+                       die("scrypt error: %i", ret);
+               break;
+       default:
+               die("unknown kdf type %llu", BCH_CRYPT_KDF_TYPE(crypt));
+       }
+
+       return key;
+}
+
+bool bch2_sb_is_encrypted(struct bch_sb *sb)
+{
+       struct bch_sb_field_crypt *crypt;
+
+       return (crypt = bch2_sb_get_crypt(sb)) &&
+               bch2_key_is_encrypted(&crypt->key);
+}
+
+void bch2_passphrase_check(struct bch_sb *sb, const char *passphrase,
+                          struct bch_key *passphrase_key,
+                          struct bch_encrypted_key *sb_key)
+{
+       struct bch_sb_field_crypt *crypt = bch2_sb_get_crypt(sb);
+       if (!crypt)
+               die("filesystem is not encrypted");
+
+       *sb_key = crypt->key;
+
+       if (!bch2_key_is_encrypted(sb_key))
+               die("filesystem does not have encryption key");
+
+       *passphrase_key = derive_passphrase(crypt, passphrase);
+
+       /* Check if the user supplied the correct passphrase: */
+       if (bch2_chacha_encrypt_key(passphrase_key, __bch2_sb_key_nonce(sb),
+                                   sb_key, sizeof(*sb_key)))
+               die("error encrypting key");
+
+       if (bch2_key_is_encrypted(sb_key))
+               die("incorrect passphrase");
+}
+
+void bch2_add_key(struct bch_sb *sb, const char *passphrase)
+{
+       struct bch_key passphrase_key;
+       struct bch_encrypted_key sb_key;
+
+       bch2_passphrase_check(sb, passphrase,
+                             &passphrase_key,
+                             &sb_key);
+
+       char uuid[40];
+       uuid_unparse_lower(sb->user_uuid.b, uuid);
+
+       char *description = mprintf("bcachefs:%s", uuid);
+
+       if (add_key("logon", description,
+                   &passphrase_key, sizeof(passphrase_key),
+                   KEY_SPEC_USER_KEYRING) < 0 ||
+           add_key("user", description,
+                   &passphrase_key, sizeof(passphrase_key),
+                   KEY_SPEC_USER_KEYRING) < 0)
+               die("add_key error: %m");
+
+       memzero_explicit(description, strlen(description));
+       free(description);
+       memzero_explicit(&passphrase_key, sizeof(passphrase_key));
+       memzero_explicit(&sb_key, sizeof(sb_key));
+}
+
+void bch_sb_crypt_init(struct bch_sb *sb,
+                      struct bch_sb_field_crypt *crypt,
+                      const char *passphrase)
+{
+       crypt->key.magic = BCH_KEY_MAGIC;
+       get_random_bytes(&crypt->key.key, sizeof(crypt->key.key));
+
+       if (passphrase) {
+
+               SET_BCH_CRYPT_KDF_TYPE(crypt, BCH_KDF_SCRYPT);
+               SET_BCH_KDF_SCRYPT_N(crypt, ilog2(SCRYPT_N));
+               SET_BCH_KDF_SCRYPT_R(crypt, ilog2(SCRYPT_r));
+               SET_BCH_KDF_SCRYPT_P(crypt, ilog2(SCRYPT_p));
+
+               struct bch_key passphrase_key = derive_passphrase(crypt, passphrase);
+
+               assert(!bch2_key_is_encrypted(&crypt->key));
+
+               if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(sb),
+                                          &crypt->key, sizeof(crypt->key)))
+                       die("error encrypting key");
+
+               assert(bch2_key_is_encrypted(&crypt->key));
+
+               memzero_explicit(&passphrase_key, sizeof(passphrase_key));
+       }
+}
diff --git a/crypto.h b/crypto.h
new file mode 100644 (file)
index 0000000..7f523c0
--- /dev/null
+++ b/crypto.h
@@ -0,0 +1,22 @@
+#ifndef _CRYPTO_H
+#define _CRYPTO_H
+
+#include "tools-util.h"
+
+struct bch_sb;
+struct bch_sb_field_crypt;
+struct bch_key;
+struct bch_encrypted_key;
+
+char *read_passphrase(const char *);
+char *read_passphrase_twice(const char *);
+
+struct bch_key derive_passphrase(struct bch_sb_field_crypt *, const char *);
+bool bch2_sb_is_encrypted(struct bch_sb *);
+void bch2_passphrase_check(struct bch_sb *, const char *,
+                          struct bch_key *, struct bch_encrypted_key *);
+void bch2_add_key(struct bch_sb *, const char *);
+void bch_sb_crypt_init(struct bch_sb *sb, struct bch_sb_field_crypt *,
+                      const char *);
+
+#endif /* _CRYPTO_H */
diff --git a/debian/files b/debian/files
new file mode 100644 (file)
index 0000000..11087b6
--- /dev/null
@@ -0,0 +1 @@
+bcachefs-tools_0.1+git20201017.8a4408-1~exp1_source.buildinfo utils optional
diff --git a/default.nix b/default.nix
new file mode 100644 (file)
index 0000000..f19ff10
--- /dev/null
@@ -0,0 +1,32 @@
+{ nixpkgs ? (import ./nix/nixpkgs.nix)
+}:
+
+with nixpkgs;
+
+stdenv.mkDerivation rec {
+  name = "bcachefs-tools-${version}";
+  version = "git";
+
+  src = lib.cleanSource ./.; # NOTE: ignore .git, otherwise things get weird!
+
+  nativeBuildInputs = [ git pkgconfig ];
+  buildInputs =
+    [ liburcu libuuid libaio zlib attr keyutils
+      libsodium libscrypt
+    ];
+
+  enableParallelBuilding = true;
+  makeFlags =
+    [ "PREFIX=$(out)"
+    ];
+
+  meta = with stdenv.lib; {
+    description = "Userspace tools for bcachefs";
+    homepage    = http://bcachefs.org;
+    license     = licenses.gpl2;
+    platforms   = platforms.linux;
+    maintainers =
+      [ "Kent Overstreet <kent.overstreet@gmail.com>"
+      ];
+  };
+}
diff --git a/doc/bcachefs.5.txt b/doc/bcachefs.5.txt
new file mode 100644 (file)
index 0000000..291e2e3
--- /dev/null
@@ -0,0 +1,110 @@
+BCACHEFS(5)
+===========
+
+NAME
+----
+bcachefs - bcachefs overview, user's manual and configuration
+
+DESCRIPTION
+-----------
+Bcachefs is a multi device copy on write filesystem that supports
+
+ Checksumming
+ Compression
+ Encryption
+ Reflink
+ Caching
+ Replication
+ Erasure coding (reed-solomon)
+
+And more. This document is intended to be an overview of the various features
+and use cases.
+
+Configuration
+-------------
+Most configuration is done via filesystem options that can be set at format
+time, mount time (as mount -o parameters), or changed at runtime via sysfs (via
+the /sys/fs/bcachefs/<UUID>/options/ directory).
+
+Many of those options (particularly those that control the IO path) can also be
+set on individual files and directories, via the bcachefs setattr command (which
+internally mostly works via the extended attribute interface, but the setattr
+command takes care to propagate options to children correctly).
+
+ * TODO: include master list of options from opts.h
+#include "opts.mdwn"
+
+Device management
+-----------------
+Devices can be added, removed, and resized at will, at runtime. There is no
+fixed topology or data layout, as with hardware RAID or ZFS, and devices need
+not be the same size - the allocator will stripe across multiple disks,
+preferring to allocate from disks with more free space so that disks all fill up
+at the same time.
+
+We generally avoid per-device options, preferring instead options that can be
+overridden on files or directories, but there are some:
+
+ *durability* 
+
+Device labels, targets
+----------------------
+
+Configuration options that point to targets (i.e. a disk or group of disks) may
+be passed either a device (i.e. /dev/sda), or a label. Labels are assigned to
+disks (and need not be unique), and these labels form a nested heirarchy: this
+allows disks to be grouped together and referred to either individually or as a
+group.
+
+For example, given disks formatted with these labels:
+
+  bcachefs format -g controller1.hdd.hdd1 /dev/sda     \
+                  -g controller1.hdd.hdd2 /dev/sdb     \
+                  -g controller1.ssd.ssd1 /dev/sdc     \
+                  -g controller1.ssd.ssd1 /dev/sdd     \
+                  -g controller2.hdd1     /dev/sde     \
+                  -g controller2.hdd2     /dev/sdf
+
+Configuration options such as foreground_target may then refer to controller1,
+or controller1.hdd, or controller1.hdd1 - or to /dev/sda directly.
+
+Data placement, caching
+-----------------------
+
+The following options control which disks data is written to:
+
+ * foreground_target
+ * background_target
+ * promote_target
+
+The foreground_target option is used to direct writes from applications. The
+background_target option, if set, will cause data to be moved to that target in
+the background by the rebalance thread some time after it has been initially
+written - leaving behind the original copy, but marking it cached so that it can
+be discarded by the allocator. The promote_target will will cause reads to write
+a cached copy of the data being read to that target, if it doesn't exist.
+
+Together, these options can be used for writeback caching, like so:
+
+  foregroud_target=ssd
+  background_target=hdd
+  promote_target=ssd
+
+Writethrough caching requires telling bcachefs not to trust the cache device,
+which does require a per-device option and thus can't completely be done with
+per-file options. This is done by setting the device's durability to 0.
+
+These options can all be set on individual files or directories. They can also
+be used to pin a specific file or directory to a specific device or target:
+
+  foreground_target=ssd
+  background_target=
+  promote_target=
+
+Note that if the target specified is full, the write will spill over to the rest
+of the filesystem.
+
+Data protection
+---------------
+
+foo
diff --git a/fsck.bcachefs b/fsck.bcachefs
new file mode 100755 (executable)
index 0000000..f8de4a8
--- /dev/null
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+SDIR="$(readlink -f "$0")"
+exec "${SDIR%/*}/bcachefs" fsck "$@"
diff --git a/include/asm/page.h b/include/asm/page.h
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/include/asm/unaligned.h b/include/asm/unaligned.h
new file mode 100644 (file)
index 0000000..e695bed
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef _ASM_UNALIGNED_H
+#define _ASM_UNALIGNED_H
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# include <linux/unaligned/le_struct.h>
+# include <linux/unaligned/be_byteshift.h>
+# include <linux/unaligned/generic.h>
+# define get_unaligned __get_unaligned_le
+# define put_unaligned __put_unaligned_le
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# include <linux/unaligned/be_struct.h>
+# include <linux/unaligned/le_byteshift.h>
+# include <linux/unaligned/generic.h>
+# define get_unaligned __get_unaligned_be
+# define put_unaligned __put_unaligned_be
+#else
+# error need to define endianess
+#endif
+
+#endif /* _ASM_UNALIGNED_H */
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
new file mode 100644 (file)
index 0000000..5fd3524
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _CRYPTO_ALGAPI_H
+#define _CRYPTO_ALGAPI_H
+
+#include <linux/crypto.h>
+#include <crypto/skcipher.h>
+
+#endif /* _CRYPTO_ALGAPI_H */
diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
new file mode 100644 (file)
index 0000000..f004cfb
--- /dev/null
@@ -0,0 +1,15 @@
+/*
+ * Common values for the ChaCha20 algorithm
+ */
+
+#ifndef _CRYPTO_CHACHA20_H
+#define _CRYPTO_CHACHA20_H
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+
+#define CHACHA_IV_SIZE 16
+#define CHACHA_KEY_SIZE        32
+#define CHACHA_BLOCK_SIZE      64
+
+#endif
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
new file mode 100644 (file)
index 0000000..a74f361
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * Hash: Hash algorithms under the crypto API
+ * 
+ * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ *
+ */
+
+#ifndef _CRYPTO_HASH_H
+#define _CRYPTO_HASH_H
+
+#include <linux/crypto.h>
+
+struct shash_desc;
+
+struct shash_alg {
+       int (*init)(struct shash_desc *desc);
+       int (*update)(struct shash_desc *desc, const u8 *data, unsigned len);
+       int (*final)(struct shash_desc *desc, u8 *out);
+       int (*finup)(struct shash_desc *desc, const u8 *data,
+                    unsigned len, u8 *out);
+       int (*digest)(struct shash_desc *desc, const u8 *data,
+                     unsigned len, u8 *out);
+
+       unsigned                descsize;
+       unsigned                digestsize;
+       struct crypto_alg       base;
+};
+
+int crypto_register_shash(struct shash_alg *alg);
+
+struct crypto_shash {
+       unsigned                descsize;
+       struct crypto_tfm       base;
+};
+
+struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
+                                       u32 mask);
+
+static inline void crypto_free_shash(struct crypto_shash *tfm)
+{
+       kfree(tfm);
+}
+
+static inline struct shash_alg *crypto_shash_alg(struct crypto_shash *tfm)
+{
+       return container_of(tfm->base.alg, struct shash_alg, base);
+}
+
+static inline unsigned crypto_shash_digestsize(struct crypto_shash *tfm)
+{
+       return crypto_shash_alg(tfm)->digestsize;
+}
+
+static inline unsigned crypto_shash_descsize(struct crypto_shash *tfm)
+{
+       return tfm->descsize;
+}
+
+struct shash_desc {
+       struct crypto_shash     *tfm;
+       u32                     flags;
+
+       void                    *ctx[] CRYPTO_MINALIGN_ATTR;
+};
+
+#define SHASH_DESC_ON_STACK(shash, tfm)                                  \
+       char __##shash##_desc[sizeof(struct shash_desc) +         \
+               crypto_shash_descsize(tfm)] CRYPTO_MINALIGN_ATTR; \
+       struct shash_desc *shash = (struct shash_desc *)__##shash##_desc
+
+static inline int crypto_shash_init(struct shash_desc *desc)
+{
+       return crypto_shash_alg(desc->tfm)->init(desc);
+}
+
+static inline int crypto_shash_update(struct shash_desc *desc,
+                                     const u8 *data, unsigned len)
+{
+       return crypto_shash_alg(desc->tfm)->update(desc, data, len);
+}
+
+static inline int crypto_shash_final(struct shash_desc *desc, u8 *out)
+{
+       return crypto_shash_alg(desc->tfm)->final(desc, out);
+}
+
+static inline int crypto_shash_finup(struct shash_desc *desc, const u8 *data,
+                                    unsigned len, u8 *out)
+{
+       return crypto_shash_alg(desc->tfm)->finup(desc, data, len, out);
+}
+
+static inline int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
+                                     unsigned len, u8 *out)
+{
+       return crypto_shash_alg(desc->tfm)->digest(desc, data, len, out);
+}
+
+#endif /* _CRYPTO_HASH_H */
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
new file mode 100644 (file)
index 0000000..9fcfbfe
--- /dev/null
@@ -0,0 +1,13 @@
+/*
+ * Common values for the Poly1305 algorithm
+ */
+
+#ifndef _CRYPTO_POLY1305_H
+#define _CRYPTO_POLY1305_H
+
+#include <sodium/crypto_onetimeauth_poly1305.h>
+
+#define POLY1305_KEY_SIZE      crypto_onetimeauth_poly1305_KEYBYTES
+#define POLY1305_DIGEST_SIZE   crypto_onetimeauth_poly1305_BYTES
+
+#endif
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
new file mode 100644 (file)
index 0000000..8a46202
--- /dev/null
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common values for SHA algorithms
+ */
+
+#ifndef _CRYPTO_SHA_H
+#define _CRYPTO_SHA_H
+
+#include <linux/types.h>
+
+#define SHA1_DIGEST_SIZE        20
+#define SHA1_BLOCK_SIZE         64
+
+#define SHA224_DIGEST_SIZE     28
+#define SHA224_BLOCK_SIZE      64
+
+#define SHA256_DIGEST_SIZE      32
+#define SHA256_BLOCK_SIZE       64
+
+#define SHA384_DIGEST_SIZE      48
+#define SHA384_BLOCK_SIZE       128
+
+#define SHA512_DIGEST_SIZE      64
+#define SHA512_BLOCK_SIZE       128
+
+#define SHA1_H0                0x67452301UL
+#define SHA1_H1                0xefcdab89UL
+#define SHA1_H2                0x98badcfeUL
+#define SHA1_H3                0x10325476UL
+#define SHA1_H4                0xc3d2e1f0UL
+
+#define SHA224_H0      0xc1059ed8UL
+#define SHA224_H1      0x367cd507UL
+#define SHA224_H2      0x3070dd17UL
+#define SHA224_H3      0xf70e5939UL
+#define SHA224_H4      0xffc00b31UL
+#define SHA224_H5      0x68581511UL
+#define SHA224_H6      0x64f98fa7UL
+#define SHA224_H7      0xbefa4fa4UL
+
+#define SHA256_H0      0x6a09e667UL
+#define SHA256_H1      0xbb67ae85UL
+#define SHA256_H2      0x3c6ef372UL
+#define SHA256_H3      0xa54ff53aUL
+#define SHA256_H4      0x510e527fUL
+#define SHA256_H5      0x9b05688cUL
+#define SHA256_H6      0x1f83d9abUL
+#define SHA256_H7      0x5be0cd19UL
+
+#define SHA384_H0      0xcbbb9d5dc1059ed8ULL
+#define SHA384_H1      0x629a292a367cd507ULL
+#define SHA384_H2      0x9159015a3070dd17ULL
+#define SHA384_H3      0x152fecd8f70e5939ULL
+#define SHA384_H4      0x67332667ffc00b31ULL
+#define SHA384_H5      0x8eb44a8768581511ULL
+#define SHA384_H6      0xdb0c2e0d64f98fa7ULL
+#define SHA384_H7      0x47b5481dbefa4fa4ULL
+
+#define SHA512_H0      0x6a09e667f3bcc908ULL
+#define SHA512_H1      0xbb67ae8584caa73bULL
+#define SHA512_H2      0x3c6ef372fe94f82bULL
+#define SHA512_H3      0xa54ff53a5f1d36f1ULL
+#define SHA512_H4      0x510e527fade682d1ULL
+#define SHA512_H5      0x9b05688c2b3e6c1fULL
+#define SHA512_H6      0x1f83d9abfb41bd6bULL
+#define SHA512_H7      0x5be0cd19137e2179ULL
+
+extern const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE];
+
+extern const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE];
+
+extern const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE];
+
+extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE];
+
+extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE];
+
+struct sha1_state {
+       u32 state[SHA1_DIGEST_SIZE / 4];
+       u64 count;
+       u8 buffer[SHA1_BLOCK_SIZE];
+};
+
+struct sha256_state {
+       u32 state[SHA256_DIGEST_SIZE / 4];
+       u64 count;
+       u8 buf[SHA256_BLOCK_SIZE];
+};
+
+struct sha512_state {
+       u64 state[SHA512_DIGEST_SIZE / 8];
+       u64 count[2];
+       u8 buf[SHA512_BLOCK_SIZE];
+};
+
+struct shash_desc;
+
+extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
+                             unsigned int len);
+
+extern int crypto_sha1_finup(struct shash_desc *desc, const u8 *data,
+                            unsigned int len, u8 *hash);
+
+extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
+                             unsigned int len);
+
+extern int crypto_sha256_finup(struct shash_desc *desc, const u8 *data,
+                              unsigned int len, u8 *hash);
+
+extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
+                             unsigned int len);
+
+extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
+                              unsigned int len, u8 *hash);
+#endif
diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h
new file mode 100644 (file)
index 0000000..5989855
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * Symmetric key ciphers.
+ * 
+ * Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ *
+ */
+
+#ifndef _CRYPTO_SKCIPHER_H
+#define _CRYPTO_SKCIPHER_H
+
+#include <linux/crypto.h>
+
+struct crypto_skcipher;
+struct skcipher_request;
+
+struct skcipher_alg {
+       struct crypto_alg base;
+};
+
+int crypto_register_skcipher(struct skcipher_alg *alg);
+
+struct crypto_skcipher {
+       int (*setkey)(struct crypto_skcipher *tfm, const u8 *key,
+                     unsigned int keylen);
+       int (*encrypt)(struct skcipher_request *req);
+       int (*decrypt)(struct skcipher_request *req);
+
+       unsigned                ivsize;
+       unsigned                keysize;
+
+       struct crypto_tfm       base;
+};
+
+struct crypto_sync_skcipher {
+       struct crypto_skcipher base;
+};
+
+struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
+                                             u32 type, u32 mask);
+
+static inline struct crypto_sync_skcipher *
+crypto_alloc_sync_skcipher(const char *alg_name, u32 type, u32 mask)
+{
+       return (void *) crypto_alloc_skcipher(alg_name, type, mask);
+}
+
+static inline void crypto_free_skcipher(struct crypto_skcipher *tfm)
+{
+       kfree(tfm);
+}
+
+static inline void crypto_free_sync_skcipher(struct crypto_sync_skcipher *tfm)
+{
+       crypto_free_skcipher(&tfm->base);
+}
+
+struct skcipher_request {
+       unsigned                cryptlen;
+       u8                      *iv;
+
+       struct scatterlist      *src;
+       struct scatterlist      *dst;
+
+       struct crypto_tfm       *tfm;
+};
+
+#define MAX_SYNC_SKCIPHER_REQSIZE      384
+#define SYNC_SKCIPHER_REQUEST_ON_STACK(name, tfm) \
+       char __##name##_desc[sizeof(struct skcipher_request) + \
+                            MAX_SYNC_SKCIPHER_REQSIZE + \
+                            (!(sizeof((struct crypto_sync_skcipher *)1 == \
+                                      (typeof(tfm))1))) \
+                           ] CRYPTO_MINALIGN_ATTR; \
+       struct skcipher_request *name = (void *)__##name##_desc
+
+static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm,
+                                        const u8 *key, unsigned int keylen)
+{
+       return tfm->setkey(tfm, key, keylen);
+}
+
+static inline struct crypto_skcipher *crypto_skcipher_reqtfm(
+       struct skcipher_request *req)
+{
+       return container_of(req->tfm, struct crypto_skcipher, base);
+}
+
+static inline int crypto_skcipher_encrypt(struct skcipher_request *req)
+{
+       return crypto_skcipher_reqtfm(req)->encrypt(req);
+}
+
+static inline int crypto_skcipher_decrypt(struct skcipher_request *req)
+{
+       return crypto_skcipher_reqtfm(req)->decrypt(req);
+}
+
+static inline void skcipher_request_set_tfm(struct skcipher_request *req,
+                                           struct crypto_skcipher *tfm)
+{
+       req->tfm = &tfm->base;
+}
+
+static inline void skcipher_request_set_sync_tfm(struct skcipher_request *req,
+                                           struct crypto_sync_skcipher *tfm)
+{
+       skcipher_request_set_tfm(req, &tfm->base);
+}
+
+static inline void skcipher_request_set_crypt(
+       struct skcipher_request *req,
+       struct scatterlist *src, struct scatterlist *dst,
+       unsigned int cryptlen, void *iv)
+{
+       req->src        = src;
+       req->dst        = dst;
+       req->cryptlen   = cryptlen;
+       req->iv         = iv;
+}
+
+#endif /* _CRYPTO_SKCIPHER_H */
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
new file mode 100644 (file)
index 0000000..a7a2ee4
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _KEYS_USER_TYPE_H
+#define _KEYS_USER_TYPE_H
+
+#include <linux/key.h>
+
+#endif /* _KEYS_USER_TYPE_H */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
new file mode 100644 (file)
index 0000000..38a364c
--- /dev/null
@@ -0,0 +1,290 @@
+#ifndef __TOOLS_LINUX_ATOMIC_H
+#define __TOOLS_LINUX_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+typedef struct {
+       int             counter;
+} atomic_t;
+
+typedef struct {
+       long            counter;
+} atomic_long_t;
+
+typedef struct {
+       u64             counter;
+} atomic64_t;
+
+#ifndef C11_ATOMICS
+
+#include <urcu/uatomic.h>
+
+#if (CAA_BITS_PER_LONG != 64)
+#define ATOMIC64_SPINLOCK
+#endif
+
+#define __ATOMIC_READ(p)               uatomic_read(p)
+#define __ATOMIC_SET(p, v)             uatomic_set(p, v)
+#define __ATOMIC_ADD_RETURN(v, p)      uatomic_add_return(p, v)
+#define __ATOMIC_SUB_RETURN(v, p)      uatomic_sub_return(p, v)
+#define __ATOMIC_ADD(v, p)             uatomic_add(p, v)
+#define __ATOMIC_SUB(v, p)             uatomic_sub(p, v)
+#define __ATOMIC_INC(p)                        uatomic_inc(p)
+#define __ATOMIC_DEC(p)                        uatomic_dec(p)
+
+#define xchg(p, v)                     uatomic_xchg(p, v)
+#define xchg_acquire(p, v)             uatomic_xchg(p, v)
+#define cmpxchg(p, old, new)           uatomic_cmpxchg(p, old, new)
+#define cmpxchg_acquire(p, old, new)   uatomic_cmpxchg(p, old, new)
+#define cmpxchg_release(p, old, new)   uatomic_cmpxchg(p, old, new)
+
+#define smp_mb__before_atomic()                cmm_smp_mb__before_uatomic_add()
+#define smp_mb__after_atomic()         cmm_smp_mb__after_uatomic_add()
+#define smp_wmb()                      cmm_smp_wmb()
+#define smp_rmb()                      cmm_smp_rmb()
+#define smp_mb()                       cmm_smp_mb()
+#define smp_read_barrier_depends()     cmm_smp_read_barrier_depends()
+
+#else /* C11_ATOMICS */
+
+#define __ATOMIC_READ(p)               __atomic_load_n(p,      __ATOMIC_RELAXED)
+#define __ATOMIC_SET(p, v)             __atomic_store_n(p, v,  __ATOMIC_RELAXED)
+#define __ATOMIC_ADD_RETURN(v, p)      __atomic_add_fetch(p, v, __ATOMIC_RELAXED)
+#define __ATOMIC_ADD_RETURN_RELEASE(v, p)                              \
+                                       __atomic_add_fetch(p, v, __ATOMIC_RELEASE)
+#define __ATOMIC_SUB_RETURN(v, p)      __atomic_sub_fetch(p, v, __ATOMIC_RELAXED)
+
+#define xchg(p, v)                     __atomic_exchange_n(p, v, __ATOMIC_SEQ_CST)
+#define xchg_acquire(p, v)             __atomic_exchange_n(p, v, __ATOMIC_ACQUIRE)
+
+#define cmpxchg(p, old, new)                                   \
+({                                                             \
+       typeof(*(p)) __old = (old);                             \
+                                                               \
+       __atomic_compare_exchange_n((p), &__old, new, false,    \
+                                   __ATOMIC_SEQ_CST,           \
+                                   __ATOMIC_SEQ_CST);          \
+       __old;                                                  \
+})
+
+#define cmpxchg_acquire(p, old, new)                           \
+({                                                             \
+       typeof(*(p)) __old = (old);                             \
+                                                               \
+       __atomic_compare_exchange_n((p), &__old, new, false,    \
+                                   __ATOMIC_ACQUIRE,           \
+                                   __ATOMIC_ACQUIRE);          \
+       __old;                                                  \
+})
+
+#define cmpxchg_release(p, old, new)                           \
+({                                                             \
+       typeof(*(p)) __old = (old);                             \
+                                                               \
+       __atomic_compare_exchange_n((p), &__old, new, false,    \
+                                   __ATOMIC_RELEASE,           \
+                                   __ATOMIC_RELEASE);          \
+       __old;                                                  \
+})
+
+#define smp_mb__before_atomic()        __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define smp_mb__after_atomic() __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define smp_wmb()              __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define smp_rmb()              __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define smp_mb()               __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define smp_read_barrier_depends()
+
+#endif
+
+#define smp_store_mb(var, value)  do { WRITE_ONCE(var, value); smp_mb(); } while (0)
+
+#define smp_load_acquire(p)                                            \
+({                                                                     \
+       typeof(*p) ___p1 = READ_ONCE(*p);                               \
+       smp_mb();                                                       \
+       ___p1;                                                          \
+})
+
+#define smp_store_release(p, v)                                                \
+do {                                                                   \
+       smp_mb();                                                       \
+       WRITE_ONCE(*p, v);                                              \
+} while (0)
+
+/* atomic interface: */
+
+#ifndef __ATOMIC_ADD
+#define __ATOMIC_ADD(i, v) __ATOMIC_ADD_RETURN(i, v)
+#endif
+
+#ifndef __ATOMIC_ADD_RETURN_RELEASE
+#define __ATOMIC_ADD_RETURN_RELEASE(i, v)                              \
+       ({ smp_mb__before_atomic(); __ATOMIC_ADD_RETURN(i, v); })
+#endif
+
+#ifndef __ATOMIC_SUB
+#define __ATOMIC_SUB(i, v) __ATOMIC_SUB_RETURN(i, v)
+#endif
+
+#ifndef __ATOMIC_INC_RETURN
+#define __ATOMIC_INC_RETURN(v) __ATOMIC_ADD_RETURN(1, v)
+#endif
+
+#ifndef __ATOMIC_DEC_RETURN
+#define __ATOMIC_DEC_RETURN(v) __ATOMIC_SUB_RETURN(1, v)
+#endif
+
+#ifndef __ATOMIC_INC
+#define __ATOMIC_INC(v) __ATOMIC_ADD(1, v)
+#endif
+
+#ifndef __ATOMIC_DEC
+#define __ATOMIC_DEC(v) __ATOMIC_SUB(1, v)
+#endif
+
+#define DEF_ATOMIC_OPS(a_type, i_type)                                 \
+static inline i_type a_type##_read(const a_type##_t *v)                        \
+{                                                                      \
+       return __ATOMIC_READ(&v->counter);                              \
+}                                                                      \
+                                                                       \
+static inline void a_type##_set(a_type##_t *v, i_type i)               \
+{                                                                      \
+       return __ATOMIC_SET(&v->counter, i);                            \
+}                                                                      \
+                                                                       \
+static inline i_type a_type##_add_return(i_type i, a_type##_t *v)      \
+{                                                                      \
+       return __ATOMIC_ADD_RETURN(i, &v->counter);                     \
+}                                                                      \
+                                                                       \
+static inline i_type a_type##_add_return_release(i_type i, a_type##_t *v)\
+{                                                                      \
+       return __ATOMIC_ADD_RETURN_RELEASE(i, &v->counter);             \
+}                                                                      \
+                                                                       \
+static inline i_type a_type##_sub_return(i_type i, a_type##_t *v)      \
+{                                                                      \
+       return __ATOMIC_SUB_RETURN(i, &v->counter);                     \
+}                                                                      \
+                                                                       \
+static inline void a_type##_add(i_type i, a_type##_t *v)               \
+{                                                                      \
+       __ATOMIC_ADD(i, &v->counter);                                   \
+}                                                                      \
+                                                                       \
+static inline void a_type##_sub(i_type i, a_type##_t *v)               \
+{                                                                      \
+       __ATOMIC_SUB(i, &v->counter);                                   \
+}                                                                      \
+                                                                       \
+static inline i_type a_type##_inc_return(a_type##_t *v)                        \
+{                                                                      \
+       return __ATOMIC_INC_RETURN(&v->counter);                        \
+}                                                                      \
+                                                                       \
+static inline i_type a_type##_dec_return(a_type##_t *v)                        \
+{                                                                      \
+       return __ATOMIC_DEC_RETURN(&v->counter);                        \
+}                                                                      \
+                                                                       \
+static inline void a_type##_inc(a_type##_t *v)                         \
+{                                                                      \
+       __ATOMIC_INC(&v->counter);                                      \
+}                                                                      \
+                                                                       \
+static inline void a_type##_dec(a_type##_t *v)                         \
+{                                                                      \
+       __ATOMIC_DEC(&v->counter);                                      \
+}                                                                      \
+                                                                       \
+static inline bool a_type##_add_negative(i_type i, a_type##_t *v)      \
+{                                                                      \
+       return __ATOMIC_ADD_RETURN(i, &v->counter) < 0;                 \
+}                                                                      \
+                                                                       \
+static inline bool a_type##_sub_and_test(i_type i, a_type##_t *v)      \
+{                                                                      \
+       return __ATOMIC_SUB_RETURN(i, &v->counter) == 0;                \
+}                                                                      \
+                                                                       \
+static inline bool a_type##_inc_and_test(a_type##_t *v)                        \
+{                                                                      \
+       return __ATOMIC_INC_RETURN(&v->counter) == 0;                   \
+}                                                                      \
+                                                                       \
+static inline bool a_type##_dec_and_test(a_type##_t *v)                        \
+{                                                                      \
+       return __ATOMIC_DEC_RETURN(&v->counter) == 0;                   \
+}                                                                      \
+                                                                       \
+static inline i_type a_type##_add_unless(a_type##_t *v, i_type a, i_type u)\
+{                                                                      \
+       i_type old, c = __ATOMIC_READ(&v->counter);                     \
+       while (c != u && (old = cmpxchg(&v->counter, c, c + a)) != c)   \
+               c = old;                                                \
+       return c;                                                       \
+}                                                                      \
+                                                                       \
+static inline bool a_type##_inc_not_zero(a_type##_t *v)                        \
+{                                                                      \
+       return a_type##_add_unless(v, 1, 0);                            \
+}                                                                      \
+                                                                       \
+static inline i_type a_type##_xchg(a_type##_t *v, i_type i)            \
+{                                                                      \
+       return xchg(&v->counter, i);                                    \
+}                                                                      \
+                                                                       \
+static inline i_type a_type##_cmpxchg(a_type##_t *v, i_type old, i_type new)\
+{                                                                      \
+       return cmpxchg(&v->counter, old, new);                          \
+}                                                                      \
+                                                                       \
+static inline i_type a_type##_cmpxchg_acquire(a_type##_t *v, i_type old, i_type new)\
+{                                                                      \
+       return cmpxchg_acquire(&v->counter, old, new);                  \
+}
+
+DEF_ATOMIC_OPS(atomic,         int)
+DEF_ATOMIC_OPS(atomic_long,    long)
+
+#ifndef ATOMIC64_SPINLOCK
+DEF_ATOMIC_OPS(atomic64,       s64)
+#else
+s64 atomic64_read(const atomic64_t *v);
+void atomic64_set(atomic64_t *v, s64);
+
+s64 atomic64_add_return(s64, atomic64_t *);
+s64 atomic64_sub_return(s64, atomic64_t *);
+void atomic64_add(s64, atomic64_t *);
+void atomic64_sub(s64, atomic64_t *);
+
+s64 atomic64_xchg(atomic64_t *, s64);
+s64 atomic64_cmpxchg(atomic64_t *, s64, s64);
+
+#define atomic64_add_negative(a, v)    (atomic64_add_return((a), (v)) < 0)
+#define atomic64_inc(v)                        atomic64_add(1LL, (v))
+#define atomic64_inc_return(v)         atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v)       (atomic64_inc_return(v) == 0)
+#define atomic64_sub_and_test(a, v)    (atomic64_sub_return((a), (v)) == 0)
+#define atomic64_dec(v)                        atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v)         atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v)       (atomic64_dec_return((v)) == 0)
+#define atomic64_inc_not_zero(v)       atomic64_add_unless((v), 1LL, 0LL)
+
+static inline s64 atomic64_add_return_release(s64 i, atomic64_t *v)
+{
+       smp_mb__before_atomic();
+       return atomic64_add_return(i, v);
+}
+
+static inline s64 atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
+{
+       return atomic64_cmpxchg(v, old, new);
+}
+
+#endif
+
+#endif /* __TOOLS_LINUX_ATOMIC_H */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
new file mode 100644 (file)
index 0000000..d8a86b4
--- /dev/null
@@ -0,0 +1,45 @@
+#ifndef _LINUX_BACKING_DEV_H
+#define _LINUX_BACKING_DEV_H
+
+#include <linux/list.h>
+
+typedef int (congested_fn)(void *, int);
+
+enum wb_congested_state {
+       WB_async_congested,     /* The async (write) queue is getting full */
+       WB_sync_congested,      /* The sync queue is getting full */
+};
+
+struct backing_dev_info {
+       struct list_head bdi_list;
+       unsigned        ra_pages;
+       unsigned        capabilities;
+
+       congested_fn    *congested_fn;
+       void            *congested_data;
+};
+
+#define BDI_CAP_NO_ACCT_DIRTY  0x00000001
+#define BDI_CAP_NO_WRITEBACK   0x00000002
+#define BDI_CAP_NO_ACCT_WB     0x00000004
+#define BDI_CAP_STABLE_WRITES  0x00000008
+#define BDI_CAP_STRICTLIMIT    0x00000010
+#define BDI_CAP_CGROUP_WRITEBACK 0x00000020
+
+static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
+{
+       return 0;
+}
+
+static inline int __must_check bdi_setup_and_register(struct backing_dev_info *bdi,
+                                                     char *name)
+{
+       bdi->capabilities = 0;
+       return 0;
+}
+
+static inline void bdi_destroy(struct backing_dev_info *bdi) {}
+
+#define VM_MAX_READAHEAD       128     /* kbytes */
+
+#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/bio.h b/include/linux/bio.h
new file mode 100644 (file)
index 0000000..cdbbcb3
--- /dev/null
@@ -0,0 +1,452 @@
+/*
+ * 2.5 block I/O model
+ *
+ * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ */
+#ifndef __LINUX_BIO_H
+#define __LINUX_BIO_H
+
+#include <linux/mempool.h>
+#include <linux/bug.h>
+#include <linux/err.h>
+
+#include <linux/blkdev.h>
+#include <linux/blk_types.h>
+#include <linux/workqueue.h>
+
+#define bio_prio(bio)                  (bio)->bi_ioprio
+#define bio_set_prio(bio, prio)                ((bio)->bi_ioprio = prio)
+
+#define bio_iter_iovec(bio, iter)                              \
+       bvec_iter_bvec((bio)->bi_io_vec, (iter))
+
+#define bio_iter_page(bio, iter)                               \
+       bvec_iter_page((bio)->bi_io_vec, (iter))
+#define bio_iter_len(bio, iter)                                        \
+       bvec_iter_len((bio)->bi_io_vec, (iter))
+#define bio_iter_offset(bio, iter)                             \
+       bvec_iter_offset((bio)->bi_io_vec, (iter))
+
+#define bio_page(bio)          bio_iter_page((bio), (bio)->bi_iter)
+#define bio_offset(bio)                bio_iter_offset((bio), (bio)->bi_iter)
+#define bio_iovec(bio)         bio_iter_iovec((bio), (bio)->bi_iter)
+
+#define bio_multiple_segments(bio)                             \
+       ((bio)->bi_iter.bi_size != bio_iovec(bio).bv_len)
+
+#define bvec_iter_sectors(iter)        ((iter).bi_size >> 9)
+#define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter)))
+
+#define bio_sectors(bio)       bvec_iter_sectors((bio)->bi_iter)
+#define bio_end_sector(bio)    bvec_iter_end_sector((bio)->bi_iter)
+
+static inline bool bio_has_data(struct bio *bio)
+{
+       if (bio &&
+           bio->bi_iter.bi_size &&
+           bio_op(bio) != REQ_OP_DISCARD &&
+           bio_op(bio) != REQ_OP_SECURE_ERASE)
+               return true;
+
+       return false;
+}
+
+static inline bool bio_no_advance_iter(struct bio *bio)
+{
+       return bio_op(bio) == REQ_OP_DISCARD ||
+              bio_op(bio) == REQ_OP_SECURE_ERASE ||
+              bio_op(bio) == REQ_OP_WRITE_SAME;
+}
+
+static inline bool bio_is_rw(struct bio *bio)
+{
+       if (!bio_has_data(bio))
+               return false;
+
+       if (bio_no_advance_iter(bio))
+               return false;
+
+       return true;
+}
+
+static inline bool bio_mergeable(struct bio *bio)
+{
+       if (bio->bi_opf & REQ_NOMERGE_FLAGS)
+               return false;
+
+       return true;
+}
+
+static inline unsigned int bio_cur_bytes(struct bio *bio)
+{
+       if (bio_has_data(bio))
+               return bio_iovec(bio).bv_len;
+       else /* dataless requests such as discard */
+               return bio->bi_iter.bi_size;
+}
+
+static inline void *bio_data(struct bio *bio)
+{
+       if (bio_has_data(bio))
+               return page_address(bio_page(bio)) + bio_offset(bio);
+
+       return NULL;
+}
+
+#define __bio_kmap_atomic(bio, iter)                           \
+       (kmap_atomic(bio_iter_iovec((bio), (iter)).bv_page) +   \
+               bio_iter_iovec((bio), (iter)).bv_offset)
+
+#define __bio_kunmap_atomic(addr)      kunmap_atomic(addr)
+
+static inline struct bio_vec *bio_next_segment(const struct bio *bio,
+                                              struct bvec_iter_all *iter)
+{
+       if (iter->idx >= bio->bi_vcnt)
+               return NULL;
+
+       return &bio->bi_io_vec[iter->idx];
+}
+
+#define bio_for_each_segment_all(bvl, bio, iter) \
+       for ((iter).idx = 0; (bvl = bio_next_segment((bio), &(iter))); (iter).idx++)
+
+static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
+                                   unsigned bytes)
+{
+       iter->bi_sector += bytes >> 9;
+
+       if (bio_no_advance_iter(bio))
+               iter->bi_size -= bytes;
+       else
+               bvec_iter_advance(bio->bi_io_vec, iter, bytes);
+}
+
+#define __bio_for_each_segment(bvl, bio, iter, start)                  \
+       for (iter = (start);                                            \
+            (iter).bi_size &&                                          \
+               ((bvl = bio_iter_iovec((bio), (iter))), 1);             \
+            bio_advance_iter((bio), &(iter), (bvl).bv_len))
+
+#define bio_for_each_segment(bvl, bio, iter)                           \
+       __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter)
+
+#define __bio_for_each_bvec(bvl, bio, iter, start)                     \
+       __bio_for_each_segment(bvl, bio, iter, start)
+
+#define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
+
+static inline unsigned bio_segments(struct bio *bio)
+{
+       unsigned segs = 0;
+       struct bio_vec bv;
+       struct bvec_iter iter;
+
+       /*
+        * We special case discard/write same, because they interpret bi_size
+        * differently:
+        */
+
+       if (bio_op(bio) == REQ_OP_DISCARD)
+               return 1;
+
+       if (bio_op(bio) == REQ_OP_SECURE_ERASE)
+               return 1;
+
+       if (bio_op(bio) == REQ_OP_WRITE_SAME)
+               return 1;
+
+       bio_for_each_segment(bv, bio, iter)
+               segs++;
+
+       return segs;
+}
+
+static inline void bio_get(struct bio *bio)
+{
+       bio->bi_flags |= (1 << BIO_REFFED);
+       smp_mb__before_atomic();
+       atomic_inc(&bio->__bi_cnt);
+}
+
+static inline bool bio_flagged(struct bio *bio, unsigned int bit)
+{
+       return (bio->bi_flags & (1U << bit)) != 0;
+}
+
+static inline void bio_set_flag(struct bio *bio, unsigned int bit)
+{
+       bio->bi_flags |= (1U << bit);
+}
+
+static inline void bio_clear_flag(struct bio *bio, unsigned int bit)
+{
+       bio->bi_flags &= ~(1U << bit);
+}
+
+extern struct bio *bio_split(struct bio *bio, int sectors,
+                            gfp_t gfp, struct bio_set *bs);
+
+static inline struct bio *bio_next_split(struct bio *bio, int sectors,
+                                        gfp_t gfp, struct bio_set *bs)
+{
+       if (sectors >= bio_sectors(bio))
+               return bio;
+
+       return bio_split(bio, sectors, gfp, bs);
+}
+
+struct bio_set {
+       unsigned int front_pad;
+};
+
+static inline void bioset_exit(struct bio_set *bs) {}
+
+static inline void bioset_free(struct bio_set *bs)
+{
+       kfree(bs);
+}
+
+static inline int bioset_init(struct bio_set *bs,
+                             unsigned pool_size,
+                             unsigned front_pad,
+                             int flags)
+{
+       bs->front_pad = front_pad;
+       return 0;
+}
+
+extern struct bio_set *bioset_create(unsigned int, unsigned int);
+extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
+enum {
+       BIOSET_NEED_BVECS       = 1 << 0,
+       BIOSET_NEED_RESCUER     = 1 << 1,
+};
+
+extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
+extern void bio_put(struct bio *);
+
+int bio_add_page(struct bio *, struct page *, unsigned, unsigned);
+
+extern void __bio_clone_fast(struct bio *, struct bio *);
+extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
+extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
+
+static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+{
+       return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
+}
+
+static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
+{
+       return bio_clone_bioset(bio, gfp_mask, NULL);
+
+}
+
+extern void bio_endio(struct bio *);
+
+extern void bio_advance(struct bio *, unsigned);
+
+extern void bio_reset(struct bio *);
+void bio_chain(struct bio *, struct bio *);
+
+extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+                              struct bio *src, struct bvec_iter *src_iter);
+extern void bio_copy_data(struct bio *dst, struct bio *src);
+
+void bio_free_pages(struct bio *bio);
+
+void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter);
+
+static inline void zero_fill_bio(struct bio *bio)
+{
+       zero_fill_bio_iter(bio, bio->bi_iter);
+}
+
+#define bio_set_dev(bio, bdev)                 \
+do {                                           \
+       (bio)->bi_bdev = (bdev);                \
+} while (0)
+
+#define bio_copy_dev(dst, src)                 \
+do {                                           \
+       (dst)->bi_bdev = (src)->bi_bdev;        \
+} while (0)
+
+static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
+{
+       return page_address(bvec->bv_page) + bvec->bv_offset;
+}
+
+static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
+{
+       *flags = 0;
+}
+
+static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter,
+                                  unsigned long *flags)
+{
+       return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags);
+}
+#define __bio_kunmap_irq(buf, flags)   bvec_kunmap_irq(buf, flags)
+
+#define bio_kmap_irq(bio, flags) \
+       __bio_kmap_irq((bio), (bio)->bi_iter, (flags))
+#define bio_kunmap_irq(buf,flags)      __bio_kunmap_irq(buf, flags)
+
+struct bio_list {
+       struct bio *head;
+       struct bio *tail;
+};
+
+static inline int bio_list_empty(const struct bio_list *bl)
+{
+       return bl->head == NULL;
+}
+
+static inline void bio_list_init(struct bio_list *bl)
+{
+       bl->head = bl->tail = NULL;
+}
+
+#define BIO_EMPTY_LIST { NULL, NULL }
+
+#define bio_list_for_each(bio, bl) \
+       for (bio = (bl)->head; bio; bio = bio->bi_next)
+
+static inline unsigned bio_list_size(const struct bio_list *bl)
+{
+       unsigned sz = 0;
+       struct bio *bio;
+
+       bio_list_for_each(bio, bl)
+               sz++;
+
+       return sz;
+}
+
+static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
+{
+       bio->bi_next = NULL;
+
+       if (bl->tail)
+               bl->tail->bi_next = bio;
+       else
+               bl->head = bio;
+
+       bl->tail = bio;
+}
+
+static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
+{
+       bio->bi_next = bl->head;
+
+       bl->head = bio;
+
+       if (!bl->tail)
+               bl->tail = bio;
+}
+
+static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
+{
+       if (!bl2->head)
+               return;
+
+       if (bl->tail)
+               bl->tail->bi_next = bl2->head;
+       else
+               bl->head = bl2->head;
+
+       bl->tail = bl2->tail;
+}
+
+static inline void bio_list_merge_head(struct bio_list *bl,
+                                      struct bio_list *bl2)
+{
+       if (!bl2->head)
+               return;
+
+       if (bl->head)
+               bl2->tail->bi_next = bl->head;
+       else
+               bl->tail = bl2->tail;
+
+       bl->head = bl2->head;
+}
+
+static inline struct bio *bio_list_peek(struct bio_list *bl)
+{
+       return bl->head;
+}
+
+static inline struct bio *bio_list_pop(struct bio_list *bl)
+{
+       struct bio *bio = bl->head;
+
+       if (bio) {
+               bl->head = bl->head->bi_next;
+               if (!bl->head)
+                       bl->tail = NULL;
+
+               bio->bi_next = NULL;
+       }
+
+       return bio;
+}
+
+static inline struct bio *bio_list_get(struct bio_list *bl)
+{
+       struct bio *bio = bl->head;
+
+       bl->head = bl->tail = NULL;
+
+       return bio;
+}
+
+/*
+ * Increment chain count for the bio. Make sure the CHAIN flag update
+ * is visible before the raised count.
+ */
+static inline void bio_inc_remaining(struct bio *bio)
+{
+       bio_set_flag(bio, BIO_CHAIN);
+       smp_mb__before_atomic();
+       atomic_inc(&bio->__bi_remaining);
+}
+
+static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+{
+       return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
+}
+
+static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
+{
+       return bio_clone_bioset(bio, gfp_mask, NULL);
+}
+
+static inline void bio_init(struct bio *bio, struct bio_vec *table,
+             unsigned short max_vecs)
+{
+       memset(bio, 0, sizeof(*bio));
+       atomic_set(&bio->__bi_remaining, 1);
+       atomic_set(&bio->__bi_cnt, 1);
+
+       bio->bi_io_vec = table;
+       bio->bi_max_vecs = max_vecs;
+}
+
+#endif /* __LINUX_BIO_H */
diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
new file mode 100644 (file)
index 0000000..0e88820
--- /dev/null
@@ -0,0 +1,41 @@
+#ifndef __LINUX_BIT_SPINLOCK_H
+#define __LINUX_BIT_SPINLOCK_H
+
+#include <linux/kernel.h>
+#include <linux/preempt.h>
+#include <linux/atomic.h>
+#include <linux/bug.h>
+
+static inline void bit_spin_lock(int bitnum, unsigned long *addr)
+{
+       while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
+               do {
+                       cpu_relax();
+               } while (test_bit(bitnum, addr));
+       }
+}
+
+static inline int bit_spin_trylock(int bitnum, unsigned long *addr)
+{
+       return !test_and_set_bit_lock(bitnum, addr);
+}
+
+static inline void bit_spin_unlock(int bitnum, unsigned long *addr)
+{
+       BUG_ON(!test_bit(bitnum, addr));
+
+       clear_bit_unlock(bitnum, addr);
+}
+
+static inline void __bit_spin_unlock(int bitnum, unsigned long *addr)
+{
+       bit_spin_unlock(bitnum, addr);
+}
+
+static inline int bit_spin_is_locked(int bitnum, unsigned long *addr)
+{
+       return test_bit(bitnum, addr);
+}
+
+#endif /* __LINUX_BIT_SPINLOCK_H */
+
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
new file mode 100644 (file)
index 0000000..80e8ecd
--- /dev/null
@@ -0,0 +1,138 @@
+#ifndef _PERF_BITOPS_H
+#define _PERF_BITOPS_H
+
+#include <string.h>
+#include <linux/bitops.h>
+#include <stdlib.h>
+
+#define DECLARE_BITMAP(name,bits) \
+       unsigned long name[BITS_TO_LONGS(bits)]
+
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+                const unsigned long *bitmap2, int bits);
+
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
+
+#define BITMAP_LAST_WORD_MASK(nbits)                                   \
+(                                                                      \
+       ((nbits) % BITS_PER_LONG) ?                                     \
+               (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL               \
+)
+
+#define small_const_nbits(nbits) \
+       (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG)
+
+static inline int __bitmap_weight(const unsigned long *bitmap, int bits)
+{
+       int k, w = 0, lim = bits/BITS_PER_LONG;
+
+       for (k = 0; k < lim; k++)
+               w += hweight_long(bitmap[k]);
+
+       if (bits % BITS_PER_LONG)
+               w += hweight_long(bitmap[k] & BITMAP_LAST_WORD_MASK(bits));
+
+       return w;
+} 
+
+static inline int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+                const unsigned long *bitmap2, unsigned int bits)
+{
+       unsigned int k;
+       unsigned int lim = bits/BITS_PER_LONG;
+       unsigned long result = 0;
+
+       for (k = 0; k < lim; k++)
+               result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+       if (bits % BITS_PER_LONG)
+               result |= (dst[k] = bitmap1[k] & bitmap2[k] &
+                          BITMAP_LAST_WORD_MASK(bits));
+       return result != 0;
+}
+
+static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
+                                    unsigned int bits)
+{
+       unsigned int k, lim = bits/BITS_PER_LONG;
+       for (k = 0; k < lim; ++k)
+               dst[k] = ~src[k];
+
+       if (bits % BITS_PER_LONG)
+               dst[k] = ~src[k];
+}
+
+static inline void bitmap_zero(unsigned long *dst, int nbits)
+{
+       memset(dst, 0, BITS_TO_LONGS(nbits) * sizeof(unsigned long));
+}
+
+static inline int bitmap_weight(const unsigned long *src, int nbits)
+{
+       if (small_const_nbits(nbits))
+               return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
+       return __bitmap_weight(src, nbits);
+}
+
+static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
+                            const unsigned long *src2, int nbits)
+{
+       if (small_const_nbits(nbits))
+               *dst = *src1 | *src2;
+       else
+               __bitmap_or(dst, src1, src2, nbits);
+}
+
+static inline unsigned long *bitmap_alloc(int nbits)
+{
+       return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long));
+}
+
+static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
+                            const unsigned long *src2, unsigned int nbits)
+{
+       if (small_const_nbits(nbits))
+               return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
+       return __bitmap_and(dst, src1, src2, nbits);
+}
+
+static inline unsigned long _find_next_bit(const unsigned long *addr,
+               unsigned long nbits, unsigned long start, unsigned long invert)
+{
+       unsigned long tmp;
+
+       if (!nbits || start >= nbits)
+               return nbits;
+
+       tmp = addr[start / BITS_PER_LONG] ^ invert;
+
+       /* Handle 1st word. */
+       tmp &= BITMAP_FIRST_WORD_MASK(start);
+       start = round_down(start, BITS_PER_LONG);
+
+       while (!tmp) {
+               start += BITS_PER_LONG;
+               if (start >= nbits)
+                       return nbits;
+
+               tmp = addr[start / BITS_PER_LONG] ^ invert;
+       }
+
+       return min(start + __ffs(tmp), nbits);
+}
+
+static inline unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+                           unsigned long offset)
+{
+       return _find_next_bit(addr, size, offset, 0UL);
+}
+
+static inline unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+                                unsigned long offset)
+{
+       return _find_next_bit(addr, size, offset, ~0UL);
+}
+
+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
+#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
+
+#endif /* _PERF_BITOPS_H */
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
new file mode 100644 (file)
index 0000000..f2183d5
--- /dev/null
@@ -0,0 +1,269 @@
+#ifndef _TOOLS_LINUX_BITOPS_H_
+#define _TOOLS_LINUX_BITOPS_H_
+
+#include <asm/types.h>
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/page.h>
+
+#ifndef __WORDSIZE
+#define __WORDSIZE (__SIZEOF_LONG__ * 8)
+#endif
+
+#ifndef BITS_PER_LONG
+# define BITS_PER_LONG __WORDSIZE
+#endif
+
+#define BIT_MASK(nr)           (1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr)           ((nr) / BITS_PER_LONG)
+#define BITS_PER_BYTE          8
+#define BITS_TO_LONGS(nr)      DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define BITS_TO_U64(nr)                DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
+#define BITS_TO_U32(nr)                DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
+#define BITS_TO_BYTES(nr)      DIV_ROUND_UP(nr, BITS_PER_BYTE)
+
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+       *p  |= mask;
+}
+
+static inline void set_bit(long nr, volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+       __atomic_or_fetch(p, mask, __ATOMIC_RELAXED);
+}
+
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+       *p &= ~mask;
+}
+
+static inline void clear_bit(long nr, volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+       __atomic_and_fetch(p, ~mask, __ATOMIC_RELAXED);
+}
+
+static inline int test_bit(long nr, const volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr);
+
+       return (*p & mask) != 0;
+}
+
+static inline int __test_and_set_bit(int nr, unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+       unsigned long old;
+
+       old = *p;
+       *p = old | mask;
+
+       return (old & mask) != 0;
+}
+
+static inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr);
+       unsigned long old;
+
+       old = __atomic_fetch_or(p, mask, __ATOMIC_RELAXED);
+
+       return (old & mask) != 0;
+}
+
+static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+       __atomic_and_fetch(p, ~mask, __ATOMIC_RELEASE);
+}
+
+static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr);
+       unsigned long old;
+
+       old = __atomic_fetch_or(p, mask, __ATOMIC_ACQUIRE);
+
+       return (old & mask) != 0;
+}
+
+#define for_each_set_bit(bit, addr, size) \
+       for ((bit) = find_first_bit((addr), (size));            \
+            (bit) < (size);                                    \
+            (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_from(bit, addr, size) \
+       for ((bit) = find_next_bit((addr), (size), (bit));      \
+            (bit) < (size);                                    \
+            (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+static inline unsigned long hweight_long(unsigned long w)
+{
+       return __builtin_popcountl(w);
+}
+
+static inline unsigned long hweight64(u64 w)
+{
+       return __builtin_popcount((u32) w) +
+              __builtin_popcount(w >> 32);
+}
+
+static inline unsigned long hweight8(unsigned long w)
+{
+       return __builtin_popcountl(w);
+}
+
+/**
+ * rol64 - rotate a 64-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u64 rol64(__u64 word, unsigned int shift)
+{
+       return (word << shift) | (word >> (64 - shift));
+}
+
+/**
+ * ror64 - rotate a 64-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u64 ror64(__u64 word, unsigned int shift)
+{
+       return (word >> shift) | (word << (64 - shift));
+}
+
+/**
+ * rol32 - rotate a 32-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+       return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/**
+ * ror32 - rotate a 32-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u32 ror32(__u32 word, unsigned int shift)
+{
+       return (word >> shift) | (word << (32 - shift));
+}
+
+/**
+ * rol16 - rotate a 16-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u16 rol16(__u16 word, unsigned int shift)
+{
+       return (word << shift) | (word >> (16 - shift));
+}
+
+/**
+ * ror16 - rotate a 16-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u16 ror16(__u16 word, unsigned int shift)
+{
+       return (word >> shift) | (word << (16 - shift));
+}
+
+/**
+ * rol8 - rotate an 8-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u8 rol8(__u8 word, unsigned int shift)
+{
+       return (word << shift) | (word >> (8 - shift));
+}
+
+/**
+ * ror8 - rotate an 8-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u8 ror8(__u8 word, unsigned int shift)
+{
+       return (word >> shift) | (word << (8 - shift));
+}
+
+static inline unsigned long __fls(unsigned long word)
+{
+       return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
+}
+
+static inline int fls(int x)
+{
+       return x ? sizeof(x) * 8 - __builtin_clz(x) : 0;
+}
+
+static inline int fls64(__u64 x)
+{
+#if BITS_PER_LONG == 32
+       __u32 h = x >> 32;
+       if (h)
+               return fls(h) + 32;
+       return fls(x);
+#elif BITS_PER_LONG == 64
+       if (x == 0)
+               return 0;
+       return __fls(x) + 1;
+#endif
+}
+
+static inline unsigned fls_long(unsigned long l)
+{
+       if (sizeof(l) == 4)
+               return fls(l);
+       return fls64(l);
+}
+
+static inline unsigned long __ffs(unsigned long word)
+{
+       return __builtin_ctzl(word);
+}
+
+static inline unsigned long __ffs64(u64 word)
+{
+#if BITS_PER_LONG == 32
+       if (((u32)word) == 0UL)
+               return __ffs((u32)(word >> 32)) + 32;
+#elif BITS_PER_LONG != 64
+#error BITS_PER_LONG not 32 or 64
+#endif
+       return __ffs((unsigned long)word);
+}
+
+#define ffz(x)  __ffs(~(x))
+
+static inline __attribute__((const))
+unsigned long rounddown_pow_of_two(unsigned long n)
+{
+       return 1UL << (fls_long(n) - 1);
+}
+
+#endif
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
new file mode 100644 (file)
index 0000000..42cd003
--- /dev/null
@@ -0,0 +1,216 @@
+/*
+ * Block data types and constants.  Directly include this file only to
+ * break include dependency loop.
+ */
+#ifndef __LINUX_BLK_TYPES_H
+#define __LINUX_BLK_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/types.h>
+#include <linux/bvec.h>
+
+struct bio_set;
+struct bio;
+struct block_device;
+typedef void (bio_end_io_t) (struct bio *);
+
+/*
+ * Block error status values.  See block/blk-core:blk_errors for the details.
+ */
+typedef u8 __bitwise blk_status_t;
+#define        BLK_STS_OK 0
+#define BLK_STS_NOTSUPP                ((__force blk_status_t)1)
+#define BLK_STS_TIMEOUT                ((__force blk_status_t)2)
+#define BLK_STS_NOSPC          ((__force blk_status_t)3)
+#define BLK_STS_TRANSPORT      ((__force blk_status_t)4)
+#define BLK_STS_TARGET         ((__force blk_status_t)5)
+#define BLK_STS_NEXUS          ((__force blk_status_t)6)
+#define BLK_STS_MEDIUM         ((__force blk_status_t)7)
+#define BLK_STS_PROTECTION     ((__force blk_status_t)8)
+#define BLK_STS_RESOURCE       ((__force blk_status_t)9)
+#define BLK_STS_IOERR          ((__force blk_status_t)10)
+
+/* hack for device mapper, don't use elsewhere: */
+#define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
+
+#define BLK_STS_AGAIN          ((__force blk_status_t)12)
+
+/*
+ * main unit of I/O for the block layer and lower layers (ie drivers and
+ * stacking drivers)
+ */
+struct bio {
+       struct bio              *bi_next;       /* request queue link */
+       struct block_device     *bi_bdev;
+       blk_status_t            bi_status;
+       unsigned int            bi_opf;         /* bottom bits req flags,
+                                                * top bits REQ_OP. Use
+                                                * accessors.
+                                                */
+       unsigned short          bi_flags;       /* status, command, etc */
+       unsigned short          bi_ioprio;
+
+       struct bvec_iter        bi_iter;
+
+       atomic_t                __bi_remaining;
+
+       bio_end_io_t            *bi_end_io;
+       void                    *bi_private;
+
+       unsigned short          bi_vcnt;        /* how many bio_vec's */
+
+       /*
+        * Everything starting with bi_max_vecs will be preserved by bio_reset()
+        */
+
+       unsigned short          bi_max_vecs;    /* max bvl_vecs we can hold */
+
+       atomic_t                __bi_cnt;       /* pin count */
+
+       struct bio_vec          *bi_io_vec;     /* the actual vec list */
+
+       struct bio_set          *bi_pool;
+
+       /*
+        * We can inline a number of vecs at the end of the bio, to avoid
+        * double allocations for a small number of bio_vecs. This member
+        * MUST obviously be kept at the very end of the bio.
+        */
+       struct bio_vec          bi_inline_vecs[0];
+};
+
+#define BIO_RESET_BYTES                offsetof(struct bio, bi_max_vecs)
+
+/*
+ * bio flags
+ */
+#define BIO_SEG_VALID  1       /* bi_phys_segments valid */
+#define BIO_CLONED     2       /* doesn't own data */
+#define BIO_BOUNCED    3       /* bio is a bounce bio */
+#define BIO_USER_MAPPED 4      /* contains user pages */
+#define BIO_NULL_MAPPED 5      /* contains invalid user pages */
+#define BIO_QUIET      6       /* Make BIO Quiet */
+#define BIO_CHAIN      7       /* chained bio, ->bi_remaining in effect */
+#define BIO_REFFED     8       /* bio has elevated ->bi_cnt */
+
+/*
+ * Flags starting here get preserved by bio_reset() - this includes
+ * BVEC_POOL_IDX()
+ */
+#define BIO_RESET_BITS 10
+
+/*
+ * We support 6 different bvec pools, the last one is magic in that it
+ * is backed by a mempool.
+ */
+#define BVEC_POOL_NR           6
+#define BVEC_POOL_MAX          (BVEC_POOL_NR - 1)
+
+/*
+ * Top 4 bits of bio flags indicate the pool the bvecs came from.  We add
+ * 1 to the actual index so that 0 indicates that there are no bvecs to be
+ * freed.
+ */
+#define BVEC_POOL_BITS         (4)
+#define BVEC_POOL_OFFSET       (16 - BVEC_POOL_BITS)
+#define BVEC_POOL_IDX(bio)     ((bio)->bi_flags >> BVEC_POOL_OFFSET)
+
+/*
+ * Operations and flags common to the bio and request structures.
+ * We use 8 bits for encoding the operation, and the remaining 24 for flags.
+ *
+ * The least significant bit of the operation number indicates the data
+ * transfer direction:
+ *
+ *   - if the least significant bit is set transfers are TO the device
+ *   - if the least significant bit is not set transfers are FROM the device
+ *
+ * If a operation does not transfer data the least significant bit has no
+ * meaning.
+ */
+#define REQ_OP_BITS    8
+#define REQ_OP_MASK    ((1 << REQ_OP_BITS) - 1)
+#define REQ_FLAG_BITS  24
+
+enum req_opf {
+       /* read sectors from the device */
+       REQ_OP_READ             = 0,
+       /* write sectors to the device */
+       REQ_OP_WRITE            = 1,
+       /* flush the volatile write cache */
+       REQ_OP_FLUSH            = 2,
+       /* discard sectors */
+       REQ_OP_DISCARD          = 3,
+       /* get zone information */
+       REQ_OP_ZONE_REPORT      = 4,
+       /* securely erase sectors */
+       REQ_OP_SECURE_ERASE     = 5,
+       /* seset a zone write pointer */
+       REQ_OP_ZONE_RESET       = 6,
+       /* write the same sector many times */
+       REQ_OP_WRITE_SAME       = 7,
+       /* write the zero filled sector many times */
+       REQ_OP_WRITE_ZEROES     = 8,
+
+       /* SCSI passthrough using struct scsi_request */
+       REQ_OP_SCSI_IN          = 32,
+       REQ_OP_SCSI_OUT         = 33,
+       /* Driver private requests */
+       REQ_OP_DRV_IN           = 34,
+       REQ_OP_DRV_OUT          = 35,
+
+       REQ_OP_LAST,
+};
+
+enum req_flag_bits {
+       __REQ_FAILFAST_DEV =    /* no driver retries of device errors */
+               REQ_OP_BITS,
+       __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
+       __REQ_FAILFAST_DRIVER,  /* no driver retries of driver errors */
+       __REQ_SYNC,             /* request is sync (sync write or read) */
+       __REQ_META,             /* metadata io request */
+       __REQ_PRIO,             /* boost priority in cfq */
+       __REQ_NOMERGE,          /* don't touch this for merging */
+       __REQ_IDLE,             /* anticipate more IO after this one */
+       __REQ_INTEGRITY,        /* I/O includes block integrity payload */
+       __REQ_FUA,              /* forced unit access */
+       __REQ_PREFLUSH,         /* request for cache flush */
+       __REQ_RAHEAD,           /* read ahead, can fail anytime */
+       __REQ_BACKGROUND,       /* background IO */
+       __REQ_NR_BITS,          /* stops here */
+};
+
+#define REQ_SYNC               (1ULL << __REQ_SYNC)
+#define REQ_META               (1ULL << __REQ_META)
+#define REQ_PRIO               (1ULL << __REQ_PRIO)
+
+#define REQ_NOMERGE_FLAGS      (REQ_PREFLUSH | REQ_FUA)
+
+#define bio_op(bio) \
+       ((bio)->bi_opf & REQ_OP_MASK)
+
+static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
+               unsigned op_flags)
+{
+       bio->bi_opf = op | op_flags;
+}
+
+#define REQ_RAHEAD             (1ULL << __REQ_RAHEAD)
+#define REQ_THROTTLED          (1ULL << __REQ_THROTTLED)
+
+#define REQ_FUA                        (1ULL << __REQ_FUA)
+#define REQ_PREFLUSH           (1ULL << __REQ_PREFLUSH)
+
+#define RW_MASK                        REQ_OP_WRITE
+
+#define READ                   REQ_OP_READ
+#define WRITE                  REQ_OP_WRITE
+
+#define READ_SYNC              REQ_SYNC
+#define WRITE_SYNC             (REQ_SYNC)
+#define WRITE_ODIRECT          REQ_SYNC
+#define WRITE_FLUSH            (REQ_SYNC | REQ_PREFLUSH)
+#define WRITE_FUA              (REQ_SYNC | REQ_FUA)
+#define WRITE_FLUSH_FUA                (REQ_SYNC | REQ_PREFLUSH | REQ_FUA)
+
+#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
new file mode 100644 (file)
index 0000000..318bcfa
--- /dev/null
@@ -0,0 +1,207 @@
+#ifndef __TOOLS_LINUX_BLKDEV_H
+#define __TOOLS_LINUX_BLKDEV_H
+
+#include <linux/backing-dev.h>
+#include <linux/blk_types.h>
+#include <linux/kobject.h>
+#include <linux/types.h>
+
+#define BIO_MAX_PAGES  256
+
+typedef unsigned fmode_t;
+
+struct bio;
+struct user_namespace;
+
+#define MINORBITS      20
+#define MINORMASK      ((1U << MINORBITS) - 1)
+
+#define MAJOR(dev)     ((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev)     ((unsigned int) ((dev) & MINORMASK))
+#define MKDEV(ma,mi)   (((ma) << MINORBITS) | (mi))
+
+/* file is open for reading */
+#define FMODE_READ             ((__force fmode_t)0x1)
+/* file is open for writing */
+#define FMODE_WRITE            ((__force fmode_t)0x2)
+/* file is seekable */
+#define FMODE_LSEEK            ((__force fmode_t)0x4)
+/* file can be accessed using pread */
+#define FMODE_PREAD            ((__force fmode_t)0x8)
+/* file can be accessed using pwrite */
+#define FMODE_PWRITE           ((__force fmode_t)0x10)
+/* File is opened for execution with sys_execve / sys_uselib */
+#define FMODE_EXEC             ((__force fmode_t)0x20)
+/* File is opened with O_NDELAY (only set for block devices) */
+#define FMODE_NDELAY           ((__force fmode_t)0x40)
+/* File is opened with O_EXCL (only set for block devices) */
+#define FMODE_EXCL             ((__force fmode_t)0x80)
+/* File is opened using open(.., 3, ..) and is writeable only for ioctls
+   (specialy hack for floppy.c) */
+#define FMODE_WRITE_IOCTL      ((__force fmode_t)0x100)
+/* 32bit hashes as llseek() offset (for directories) */
+#define FMODE_32BITHASH         ((__force fmode_t)0x200)
+/* 64bit hashes as llseek() offset (for directories) */
+#define FMODE_64BITHASH         ((__force fmode_t)0x400)
+
+struct inode {
+       unsigned long           i_ino;
+       loff_t                  i_size;
+       struct super_block      *i_sb;
+};
+
+struct file {
+       struct inode            *f_inode;
+};
+
+static inline struct inode *file_inode(const struct file *f)
+{
+       return f->f_inode;
+}
+
+#define BDEVNAME_SIZE  32
+
+struct request_queue {
+       struct backing_dev_info *backing_dev_info;
+};
+
+struct gendisk {
+};
+
+struct hd_struct {
+       struct kobject          kobj;
+};
+
+#define part_to_dev(part)      (part)
+
+struct block_device {
+       char                    name[BDEVNAME_SIZE];
+       struct inode            *bd_inode;
+       struct request_queue    queue;
+       void                    *bd_holder;
+       struct hd_struct        *bd_part;
+       struct gendisk          *bd_disk;
+       struct gendisk          __bd_disk;
+       int                     bd_fd;
+       int                     bd_sync_fd;
+
+       struct backing_dev_info *bd_bdi;
+       struct backing_dev_info __bd_bdi;
+};
+
+void generic_make_request(struct bio *);
+int submit_bio_wait(struct bio *);
+
+static inline void submit_bio(struct bio *bio)
+{
+       generic_make_request(bio);
+}
+
+int blkdev_issue_discard(struct block_device *, sector_t,
+                        sector_t, gfp_t, unsigned long);
+
+#define bdev_get_queue(bdev)           (&((bdev)->queue))
+
+#define blk_queue_discard(q)           ((void) (q), 0)
+#define blk_queue_nonrot(q)            ((void) (q), 0)
+
+unsigned bdev_logical_block_size(struct block_device *bdev);
+sector_t get_capacity(struct gendisk *disk);
+
+void blkdev_put(struct block_device *bdev, fmode_t mode);
+void bdput(struct block_device *bdev);
+struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder);
+struct block_device *lookup_bdev(const char *path);
+
+struct super_block {
+       void                    *s_fs_info;
+};
+
+/*
+ * File types
+ *
+ * NOTE! These match bits 12..15 of stat.st_mode
+ * (ie "(i_mode >> 12) & 15").
+ */
+#ifndef DT_UNKNOWN
+#define DT_UNKNOWN     0
+#define DT_FIFO                1
+#define DT_CHR         2
+#define DT_DIR         4
+#define DT_BLK         6
+#define DT_REG         8
+#define DT_LNK         10
+#define DT_SOCK                12
+#define DT_WHT         14
+#endif
+
+/*
+ * This is the "filldir" function type, used by readdir() to let
+ * the kernel specify what kind of dirent layout it wants to have.
+ * This allows the kernel to read directories into kernel space or
+ * to have different dirent layouts depending on the binary type.
+ */
+struct dir_context;
+typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
+                        unsigned);
+
+struct dir_context {
+       const filldir_t actor;
+       u64 pos;
+};
+
+/* /sys/fs */
+extern struct kobject *fs_kobj;
+
+struct file_operations {
+};
+
+static inline int register_chrdev(unsigned int major, const char *name,
+                                 const struct file_operations *fops)
+{
+       return 1;
+}
+
+static inline void unregister_chrdev(unsigned int major, const char *name)
+{
+}
+
+static inline const char *bdevname(struct block_device *bdev, char *buf)
+{
+       snprintf(buf, BDEVNAME_SIZE, "%s", bdev->name);
+       return buf;
+}
+
+static inline bool op_is_write(unsigned int op)
+{
+       return op == REQ_OP_READ ? false : true;
+}
+
+/*
+ * return data direction, READ or WRITE
+ */
+static inline int bio_data_dir(struct bio *bio)
+{
+       return op_is_write(bio_op(bio)) ? WRITE : READ;
+}
+
+static inline bool dir_emit(struct dir_context *ctx,
+                           const char *name, int namelen,
+                           u64 ino, unsigned type)
+{
+       return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0;
+}
+
+static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
+{
+       return true;
+}
+
+#define capable(cap)           true
+
+int blk_status_to_errno(blk_status_t status);
+blk_status_t errno_to_blk_status(int errno);
+const char *blk_status_to_str(blk_status_t status);
+
+#endif /* __TOOLS_LINUX_BLKDEV_H */
+
diff --git a/include/linux/bug.h b/include/linux/bug.h
new file mode 100644 (file)
index 0000000..d47f5a4
--- /dev/null
@@ -0,0 +1,61 @@
+#ifndef __TOOLS_LINUX_BUG_H
+#define __TOOLS_LINUX_BUG_H
+
+#include <assert.h>
+#include <linux/compiler.h>
+
+#ifdef CONFIG_VALGRIND
+#include <valgrind/memcheck.h>
+
+#define DEBUG_MEMORY_FREED(p, len) VALGRIND_MAKE_MEM_UNDEFINED(p, len)
+#endif
+
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n)                 \
+       BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
+#define BUILD_BUG_ON_ZERO(e)   (sizeof(struct { int:-!!(e); }))
+#define BUILD_BUG_ON_NULL(e)   ((void *)sizeof(struct { int:-!!(e); }))
+
+#define BUILD_BUG_ON(cond)     ((void)sizeof(char[1 - 2*!!(cond)]))
+
+#define BUG()                  do { assert(0); unreachable(); } while (0)
+#define BUG_ON(cond)           assert(!(cond))
+
+#define WARN(cond, fmt, ...)                                           \
+({                                                                     \
+       int __ret_warn_on = unlikely(!!(cond));                         \
+       if (__ret_warn_on)                                              \
+               fprintf(stderr, "WARNING at " __FILE__ ":%d: " fmt "\n",\
+                       __LINE__, ##__VA_ARGS__);                       \
+       __ret_warn_on;                                                  \
+})
+
+#define WARN_ON(cond) ({                                               \
+       int __ret_warn_on = unlikely(!!(cond));                         \
+       if (__ret_warn_on)                                              \
+               fprintf(stderr, "WARNING at " __FILE__ ":%d\n", __LINE__);\
+       __ret_warn_on;                                                  \
+})
+
+#define WARN_ONCE(cond, fmt, ...)                                      \
+({                                                                     \
+       static bool __warned;                                           \
+       int __ret_warn_on = unlikely(!!(cond));                         \
+       if (__ret_warn_on && !__warned) {                               \
+               __warned = true;                                        \
+               fprintf(stderr, "WARNING at " __FILE__ ":%d: " fmt "\n",\
+                       __LINE__, ##__VA_ARGS__);                       \
+       }                                                               \
+       __ret_warn_on;                                                  \
+})
+
+#define WARN_ON_ONCE(cond) ({                                          \
+       static bool __warned;                                           \
+       int __ret_warn_on = unlikely(!!(cond));                         \
+       if (__ret_warn_on && !__warned) {                               \
+               __warned = true;                                        \
+               fprintf(stderr, "WARNING at " __FILE__ ":%d\n", __LINE__);\
+       }                                                               \
+       __ret_warn_on;                                                  \
+})
+
+#endif /* __TOOLS_LINUX_BUG_H */
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
new file mode 100644 (file)
index 0000000..5bc68b4
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * bvec iterator
+ *
+ * Copyright (C) 2001 Ming Lei <ming.lei@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ */
+#ifndef __LINUX_BVEC_ITER_H
+#define __LINUX_BVEC_ITER_H
+
+#include <linux/kernel.h>
+#include <linux/bug.h>
+
+/*
+ * was unsigned short, but we might as well be ready for > 64kB I/O pages
+ */
+struct bio_vec {
+       struct page     *bv_page;
+       unsigned int    bv_len;
+       unsigned int    bv_offset;
+};
+
+struct bvec_iter {
+       sector_t                bi_sector;      /* device address in 512 byte
+                                                  sectors */
+       unsigned int            bi_size;        /* residual I/O count */
+
+       unsigned int            bi_idx;         /* current index into bvl_vec */
+
+       unsigned int            bi_bvec_done;   /* number of bytes completed in
+                                                  current bvec */
+};
+
+struct bvec_iter_all {
+       int             idx;
+};
+
+/*
+ * various member access, note that bio_data should of course not be used
+ * on highmem page vectors
+ */
+#define __bvec_iter_bvec(bvec, iter)   (&(bvec)[(iter).bi_idx])
+
+#define bvec_iter_page(bvec, iter)                             \
+       (__bvec_iter_bvec((bvec), (iter))->bv_page)
+
+#define bvec_iter_len(bvec, iter)                              \
+       min((iter).bi_size,                                     \
+           __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
+
+#define bvec_iter_offset(bvec, iter)                           \
+       (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
+
+#define bvec_iter_bvec(bvec, iter)                             \
+((struct bio_vec) {                                            \
+       .bv_page        = bvec_iter_page((bvec), (iter)),       \
+       .bv_len         = bvec_iter_len((bvec), (iter)),        \
+       .bv_offset      = bvec_iter_offset((bvec), (iter)),     \
+})
+
+static inline void bvec_iter_advance(const struct bio_vec *bv,
+                                    struct bvec_iter *iter,
+                                    unsigned bytes)
+{
+       WARN_ONCE(bytes > iter->bi_size,
+                 "Attempted to advance past end of bvec iter\n");
+
+       while (bytes) {
+               unsigned iter_len = bvec_iter_len(bv, *iter);
+               unsigned len = min(bytes, iter_len);
+
+               bytes -= len;
+               iter->bi_size -= len;
+               iter->bi_bvec_done += len;
+
+               if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
+                       iter->bi_bvec_done = 0;
+                       iter->bi_idx++;
+               }
+       }
+}
+
+#define for_each_bvec(bvl, bio_vec, iter, start)                       \
+       for (iter = (start);                                            \
+            (iter).bi_size &&                                          \
+               ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
+            bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
+
+#endif /* __LINUX_BVEC_ITER_H */
diff --git a/include/linux/byteorder.h b/include/linux/byteorder.h
new file mode 100644 (file)
index 0000000..7b04f5b
--- /dev/null
@@ -0,0 +1,75 @@
+#ifndef __LINUX_BYTEORDER_H
+#define __LINUX_BYTEORDER_H
+
+#include <linux/compiler.h>
+#include <asm/byteorder.h>
+
+#define swab16 __swab16
+#define swab32 __swab32
+#define swab64 __swab64
+#define swahw32 __swahw32
+#define swahb32 __swahb32
+#define swab16p __swab16p
+#define swab32p __swab32p
+#define swab64p __swab64p
+#define swahw32p __swahw32p
+#define swahb32p __swahb32p
+#define swab16s __swab16s
+#define swab32s __swab32s
+#define swab64s __swab64s
+#define swahw32s __swahw32s
+#define swahb32s __swahb32s
+
+#define cpu_to_le64 __cpu_to_le64
+#define le64_to_cpu __le64_to_cpu
+#define cpu_to_le32 __cpu_to_le32
+#define le32_to_cpu __le32_to_cpu
+#define cpu_to_le16 __cpu_to_le16
+#define le16_to_cpu __le16_to_cpu
+#define cpu_to_be64 __cpu_to_be64
+#define be64_to_cpu __be64_to_cpu
+#define cpu_to_be32 __cpu_to_be32
+#define be32_to_cpu __be32_to_cpu
+#define cpu_to_be16 __cpu_to_be16
+#define be16_to_cpu __be16_to_cpu
+#define cpu_to_le64p __cpu_to_le64p
+#define le64_to_cpup __le64_to_cpup
+#define cpu_to_le32p __cpu_to_le32p
+#define le32_to_cpup __le32_to_cpup
+#define cpu_to_le16p __cpu_to_le16p
+#define le16_to_cpup __le16_to_cpup
+#define cpu_to_be64p __cpu_to_be64p
+#define be64_to_cpup __be64_to_cpup
+#define cpu_to_be32p __cpu_to_be32p
+#define be32_to_cpup __be32_to_cpup
+#define cpu_to_be16p __cpu_to_be16p
+#define be16_to_cpup __be16_to_cpup
+#define cpu_to_le64s __cpu_to_le64s
+#define le64_to_cpus __le64_to_cpus
+#define cpu_to_le32s __cpu_to_le32s
+#define le32_to_cpus __le32_to_cpus
+#define cpu_to_le16s __cpu_to_le16s
+#define le16_to_cpus __le16_to_cpus
+#define cpu_to_be64s __cpu_to_be64s
+#define be64_to_cpus __be64_to_cpus
+#define cpu_to_be32s __cpu_to_be32s
+#define be32_to_cpus __be32_to_cpus
+#define cpu_to_be16s __cpu_to_be16s
+#define be16_to_cpus __be16_to_cpus
+
+static inline void le16_add_cpu(__le16 *var, u16 val)
+{
+       *var = cpu_to_le16(le16_to_cpu(*var) + val);
+}
+
+static inline void le32_add_cpu(__le32 *var, u32 val)
+{
+       *var = cpu_to_le32(le32_to_cpu(*var) + val);
+}
+
+static inline void le64_add_cpu(__le64 *var, u64 val)
+{
+       *var = cpu_to_le64(le64_to_cpu(*var) + val);
+}
+
+#endif /* __LINUX_BYTEORDER_H */
diff --git a/include/linux/cache.h b/include/linux/cache.h
new file mode 100644 (file)
index 0000000..c61167c
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef __TOOLS_LINUX_CACHE_H
+#define __TOOLS_LINUX_CACHE_H
+
+#define L1_CACHE_SHIFT         6
+#define L1_CACHE_BYTES         (1 << L1_CACHE_SHIFT)
+#define SMP_CACHE_BYTES                L1_CACHE_BYTES
+
+#define L1_CACHE_ALIGN(x)      __ALIGN_KERNEL(x, L1_CACHE_BYTES)
+
+#define __read_mostly
+#define __ro_after_init
+
+#define ____cacheline_aligned  __attribute__((__aligned__(SMP_CACHE_BYTES)))
+#define ____cacheline_aligned_in_smp ____cacheline_aligned
+
+#endif /* __TOOLS_LINUX_CACHE_H */
+
diff --git a/include/linux/closure.h b/include/linux/closure.h
new file mode 100644 (file)
index 0000000..a9de6d9
--- /dev/null
@@ -0,0 +1,407 @@
+#ifndef _LINUX_CLOSURE_H
+#define _LINUX_CLOSURE_H
+
+#include <linux/llist.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+
+/*
+ * Closure is perhaps the most overused and abused term in computer science, but
+ * since I've been unable to come up with anything better you're stuck with it
+ * again.
+ *
+ * What are closures?
+ *
+ * They embed a refcount. The basic idea is they count "things that are in
+ * progress" - in flight bios, some other thread that's doing something else -
+ * anything you might want to wait on.
+ *
+ * The refcount may be manipulated with closure_get() and closure_put().
+ * closure_put() is where many of the interesting things happen, when it causes
+ * the refcount to go to 0.
+ *
+ * Closures can be used to wait on things both synchronously and asynchronously,
+ * and synchronous and asynchronous use can be mixed without restriction. To
+ * wait synchronously, use closure_sync() - you will sleep until your closure's
+ * refcount hits 1.
+ *
+ * To wait asynchronously, use
+ *   continue_at(cl, next_function, workqueue);
+ *
+ * passing it, as you might expect, the function to run when nothing is pending
+ * and the workqueue to run that function out of.
+ *
+ * continue_at() also, critically, requires a 'return' immediately following the
+ * location where this macro is referenced, to return to the calling function.
+ * There's good reason for this.
+ *
+ * To use safely closures asynchronously, they must always have a refcount while
+ * they are running owned by the thread that is running them. Otherwise, suppose
+ * you submit some bios and wish to have a function run when they all complete:
+ *
+ * foo_endio(struct bio *bio)
+ * {
+ *     closure_put(cl);
+ * }
+ *
+ * closure_init(cl);
+ *
+ * do_stuff();
+ * closure_get(cl);
+ * bio1->bi_endio = foo_endio;
+ * bio_submit(bio1);
+ *
+ * do_more_stuff();
+ * closure_get(cl);
+ * bio2->bi_endio = foo_endio;
+ * bio_submit(bio2);
+ *
+ * continue_at(cl, complete_some_read, system_wq);
+ *
+ * If closure's refcount started at 0, complete_some_read() could run before the
+ * second bio was submitted - which is almost always not what you want! More
+ * importantly, it wouldn't be possible to say whether the original thread or
+ * complete_some_read()'s thread owned the closure - and whatever state it was
+ * associated with!
+ *
+ * So, closure_init() initializes a closure's refcount to 1 - and when a
+ * closure_fn is run, the refcount will be reset to 1 first.
+ *
+ * Then, the rule is - if you got the refcount with closure_get(), release it
+ * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
+ * on a closure because you called closure_init() or you were run out of a
+ * closure - _always_ use continue_at(). Doing so consistently will help
+ * eliminate an entire class of particularly pernicious races.
+ *
+ * Lastly, you might have a wait list dedicated to a specific event, and have no
+ * need for specifying the condition - you just want to wait until someone runs
+ * closure_wake_up() on the appropriate wait list. In that case, just use
+ * closure_wait(). It will return either true or false, depending on whether the
+ * closure was already on a wait list or not - a closure can only be on one wait
+ * list at a time.
+ *
+ * Parents:
+ *
+ * closure_init() takes two arguments - it takes the closure to initialize, and
+ * a (possibly null) parent.
+ *
+ * If parent is non null, the new closure will have a refcount for its lifetime;
+ * a closure is considered to be "finished" when its refcount hits 0 and the
+ * function to run is null. Hence
+ *
+ * continue_at(cl, NULL, NULL);
+ *
+ * returns up the (spaghetti) stack of closures, precisely like normal return
+ * returns up the C stack. continue_at() with non null fn is better thought of
+ * as doing a tail call.
+ *
+ * All this implies that a closure should typically be embedded in a particular
+ * struct (which its refcount will normally control the lifetime of), and that
+ * struct can very much be thought of as a stack frame.
+ */
+
+struct closure;
+struct closure_syncer;
+typedef void (closure_fn) (struct closure *);
+
+struct closure_waitlist {
+       struct llist_head       list;
+};
+
+enum closure_state {
+       /*
+        * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
+        * the thread that owns the closure, and cleared by the thread that's
+        * waking up the closure.
+        *
+        * The rest are for debugging and don't affect behaviour:
+        *
+        * CLOSURE_RUNNING: Set when a closure is running (i.e. by
+        * closure_init() and when closure_put() runs then next function), and
+        * must be cleared before remaining hits 0. Primarily to help guard
+        * against incorrect usage and accidentally transferring references.
+        * continue_at() and closure_return() clear it for you, if you're doing
+        * something unusual you can use closure_set_dead() which also helps
+        * annotate where references are being transferred.
+        */
+
+       CLOSURE_BITS_START      = (1U << 27),
+       CLOSURE_DESTRUCTOR      = (1U << 27),
+       CLOSURE_WAITING         = (1U << 29),
+       CLOSURE_RUNNING         = (1U << 31),
+};
+
+#define CLOSURE_GUARD_MASK                                     \
+       ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)
+
+#define CLOSURE_REMAINING_MASK         (CLOSURE_BITS_START - 1)
+#define CLOSURE_REMAINING_INITIALIZER  (1|CLOSURE_RUNNING)
+
+struct closure {
+       union {
+               struct {
+                       struct workqueue_struct *wq;
+                       struct closure_syncer   *s;
+                       struct llist_node       list;
+                       closure_fn              *fn;
+               };
+               struct work_struct      work;
+       };
+
+       struct closure          *parent;
+
+       atomic_t                remaining;
+
+#ifdef CONFIG_DEBUG_CLOSURES
+#define CLOSURE_MAGIC_DEAD     0xc054dead
+#define CLOSURE_MAGIC_ALIVE    0xc054a11e
+
+       unsigned                magic;
+       struct list_head        all;
+       unsigned long           ip;
+       unsigned long           waiting_on;
+#endif
+};
+
+void closure_sub(struct closure *cl, int v);
+void closure_put(struct closure *cl);
+void __closure_wake_up(struct closure_waitlist *list);
+bool closure_wait(struct closure_waitlist *list, struct closure *cl);
+void __closure_sync(struct closure *cl);
+
+/**
+ * closure_sync - sleep until a closure a closure has nothing left to wait on
+ *
+ * Sleeps until the refcount hits 1 - the thread that's running the closure owns
+ * the last refcount.
+ */
+static inline void closure_sync(struct closure *cl)
+{
+       if ((atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK) != 1)
+               __closure_sync(cl);
+}
+
+#ifdef CONFIG_DEBUG_CLOSURES
+
+void closure_debug_create(struct closure *cl);
+void closure_debug_destroy(struct closure *cl);
+
+#else
+
+static inline void closure_debug_create(struct closure *cl) {}
+static inline void closure_debug_destroy(struct closure *cl) {}
+
+#endif
+
+static inline void closure_set_ip(struct closure *cl)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+       cl->ip = _THIS_IP_;
+#endif
+}
+
+static inline void closure_set_ret_ip(struct closure *cl)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+       cl->ip = _RET_IP_;
+#endif
+}
+
+static inline void closure_set_waiting(struct closure *cl, unsigned long f)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+       cl->waiting_on = f;
+#endif
+}
+
+static inline void closure_set_stopped(struct closure *cl)
+{
+       atomic_sub(CLOSURE_RUNNING, &cl->remaining);
+}
+
+static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
+                                 struct workqueue_struct *wq)
+{
+       closure_set_ip(cl);
+       cl->fn = fn;
+       cl->wq = wq;
+       /* between atomic_dec() in closure_put() */
+       smp_mb__before_atomic();
+}
+
+static inline void closure_queue(struct closure *cl)
+{
+       struct workqueue_struct *wq = cl->wq;
+
+       if (wq) {
+               INIT_WORK(&cl->work, cl->work.func);
+               queue_work(wq, &cl->work);
+       } else
+               cl->fn(cl);
+}
+
+/**
+ * closure_get - increment a closure's refcount
+ */
+static inline void closure_get(struct closure *cl)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+       BUG_ON((atomic_inc_return(&cl->remaining) &
+               CLOSURE_REMAINING_MASK) <= 1);
+#else
+       atomic_inc(&cl->remaining);
+#endif
+}
+
+/**
+ * closure_init - Initialize a closure, setting the refcount to 1
+ * @cl:                closure to initialize
+ * @parent:    parent of the new closure. cl will take a refcount on it for its
+ *             lifetime; may be NULL.
+ */
+static inline void closure_init(struct closure *cl, struct closure *parent)
+{
+       cl->fn = NULL;
+       cl->parent = parent;
+       if (parent)
+               closure_get(parent);
+
+       atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+
+       closure_debug_create(cl);
+       closure_set_ip(cl);
+}
+
+static inline void closure_init_stack(struct closure *cl)
+{
+       memset(cl, 0, sizeof(struct closure));
+       atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+}
+
+/**
+ * closure_wake_up - wake up all closures on a wait list.
+ */
+static inline void closure_wake_up(struct closure_waitlist *list)
+{
+       smp_mb();
+       __closure_wake_up(list);
+}
+
+#define continue_at_noreturn(_cl, _fn, _wq)                            \
+do {                                                                   \
+       set_closure_fn(_cl, _fn, _wq);                                  \
+       closure_sub(_cl, CLOSURE_RUNNING + 1);                          \
+} while (0)
+
+/**
+ * continue_at - jump to another function with barrier
+ *
+ * After @cl is no longer waiting on anything (i.e. all outstanding refs have
+ * been dropped with closure_put()), it will resume execution at @fn running out
+ * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
+ *
+ * NOTE: This macro expands to a return in the calling function!
+ *
+ * This is because after calling continue_at() you no longer have a ref on @cl,
+ * and whatever @cl owns may be freed out from under you - a running closure fn
+ * has a ref on its own closure which continue_at() drops.
+ */
+#define continue_at(_cl, _fn, _wq)                                     \
+do {                                                                   \
+       continue_at_noreturn(_cl, _fn, _wq);                            \
+       return;                                                         \
+} while (0)
+
+/**
+ * closure_return - finish execution of a closure
+ *
+ * This is used to indicate that @cl is finished: when all outstanding refs on
+ * @cl have been dropped @cl's ref on its parent closure (as passed to
+ * closure_init()) will be dropped, if one was specified - thus this can be
+ * thought of as returning to the parent closure.
+ */
+#define closure_return(_cl)    continue_at((_cl), NULL, NULL)
+
+/**
+ * continue_at_nobarrier - jump to another function without barrier
+ *
+ * Causes @fn to be executed out of @cl, in @wq context (or called directly if
+ * @wq is NULL).
+ *
+ * NOTE: like continue_at(), this macro expands to a return in the caller!
+ *
+ * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
+ * thus it's not safe to touch anything protected by @cl after a
+ * continue_at_nobarrier().
+ */
+#define continue_at_nobarrier(_cl, _fn, _wq)                           \
+do {                                                                   \
+       closure_set_ip(_cl);                                            \
+       if (_wq) {                                                      \
+               INIT_WORK(&(_cl)->work, (void *) _fn);                  \
+               queue_work((_wq), &(_cl)->work);                        \
+       } else {                                                        \
+               (_fn)(_cl);                                             \
+       }                                                               \
+       return;                                                         \
+} while (0)
+
+#define closure_return_with_destructor_noreturn(_cl, _destructor)      \
+do {                                                                   \
+       set_closure_fn(_cl, _destructor, NULL);                         \
+       closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1);     \
+} while (0)
+
+/**
+ * closure_return - finish execution of a closure, with destructor
+ *
+ * Works like closure_return(), except @destructor will be called when all
+ * outstanding refs on @cl have been dropped; @destructor may be used to safely
+ * free the memory occupied by @cl, and it is called with the ref on the parent
+ * closure still held - so @destructor could safely return an item to a
+ * freelist protected by @cl's parent.
+ */
+#define closure_return_with_destructor(_cl, _destructor)               \
+do {                                                                   \
+       closure_return_with_destructor_noreturn(_cl, _destructor);      \
+       return;                                                         \
+} while (0)
+
+/**
+ * closure_call - execute @fn out of a new, uninitialized closure
+ *
+ * Typically used when running out of one closure, and we want to run @fn
+ * asynchronously out of a new closure - @parent will then wait for @cl to
+ * finish.
+ */
+static inline void closure_call(struct closure *cl, closure_fn fn,
+                               struct workqueue_struct *wq,
+                               struct closure *parent)
+{
+       closure_init(cl, parent);
+       continue_at_nobarrier(cl, fn, wq);
+}
+
+#define __closure_wait_event(waitlist, _cond)                          \
+do {                                                                   \
+       struct closure cl;                                              \
+                                                                       \
+       closure_init_stack(&cl);                                        \
+                                                                       \
+       while (1) {                                                     \
+               closure_wait(waitlist, &cl);                            \
+               if (_cond)                                              \
+                       break;                                          \
+               closure_sync(&cl);                                      \
+       }                                                               \
+       closure_wake_up(waitlist);                                      \
+       closure_sync(&cl);                                              \
+} while (0)
+
+#define closure_wait_event(waitlist, _cond)                            \
+do {                                                                   \
+       if (!(_cond))                                                   \
+               __closure_wait_event(waitlist, _cond);                  \
+} while (0)
+
+#endif /* _LINUX_CLOSURE_H */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
new file mode 100644 (file)
index 0000000..2bfbfad
--- /dev/null
@@ -0,0 +1,174 @@
+#ifndef _TOOLS_LINUX_COMPILER_H_
+#define _TOOLS_LINUX_COMPILER_H_
+
+/* Optimization barrier */
+/* The "volatile" is due to gcc bugs */
+#define barrier() __asm__ __volatile__("": : :"memory")
+#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
+
+#ifndef __always_inline
+# define __always_inline       inline __attribute__((always_inline))
+#endif
+
+#ifdef __ANDROID__
+/*
+ * FIXME: Big hammer to get rid of tons of:
+ *   "warning: always_inline function might not be inlinable"
+ *
+ * At least on android-ndk-r12/platforms/android-24/arch-arm
+ */
+#undef __always_inline
+#define __always_inline        inline
+#endif
+
+#define noinline
+#define noinline_for_stack noinline
+
+#define __user
+#define __kernel
+
+#define __pure                 __attribute__((pure))
+#define __aligned(x)           __attribute__((aligned(x)))
+#define __printf(a, b)         __attribute__((format(printf, a, b)))
+#define __used                 __attribute__((__used__))
+#define __maybe_unused         __attribute__((unused))
+#define __always_unused                __attribute__((unused))
+#define __packed               __attribute__((__packed__))
+#define __flatten              __attribute__((flatten))
+#define __force
+#define __nocast
+#define __iomem
+#define __chk_user_ptr(x) (void)0
+#define __chk_io_ptr(x) (void)0
+#define __builtin_warning(x, y...) (1)
+#define __must_hold(x)
+#define __acquires(x)
+#define __releases(x)
+#define __acquire(x) (void)0
+#define __release(x) (void)0
+#define __cond_lock(x,c) (c)
+#define __percpu
+#define __rcu
+#define __sched
+#define __init
+#define __exit
+#define __private
+#define __must_check
+#define __malloc
+#define __weak                 __attribute__((weak))
+#define likely(x)              __builtin_expect(!!(x), 1)
+#define unlikely(x)            __builtin_expect(!!(x), 0)
+#define unreachable()          __builtin_unreachable()
+#define __same_type(a, b)      __builtin_types_compatible_p(typeof(a), typeof(b))
+
+#define ___PASTE(a,b) a##b
+#define __PASTE(a,b) ___PASTE(a,b)
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
+
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#define __initcall(x)  /* unimplemented */
+#define __exitcall(x)  /* unimplemented */
+
+#include <linux/types.h>
+
+/*
+ * Following functions are taken from kernel sources and
+ * break aliasing rules in their original form.
+ *
+ * While kernel is compiled with -fno-strict-aliasing,
+ * perf uses -Wstrict-aliasing=3 which makes build fail
+ * under gcc 4.4.
+ *
+ * Using extra __may_alias__ type to allow aliasing
+ * in this case.
+ */
+typedef __u8  __attribute__((__may_alias__))  __u8_alias_t;
+typedef __u16 __attribute__((__may_alias__)) __u16_alias_t;
+typedef __u32 __attribute__((__may_alias__)) __u32_alias_t;
+typedef __u64 __attribute__((__may_alias__)) __u64_alias_t;
+
+static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
+{
+       switch (size) {
+       case 1: *(__u8_alias_t  *) res = *(volatile __u8_alias_t  *) p; break;
+       case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break;
+       case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break;
+       case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break;
+       default:
+               barrier();
+               __builtin_memcpy((void *)res, (const void *)p, size);
+               barrier();
+       }
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+       switch (size) {
+       case 1: *(volatile  __u8_alias_t *) p = *(__u8_alias_t  *) res; break;
+       case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break;
+       case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break;
+       case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break;
+       default:
+               barrier();
+               __builtin_memcpy((void *)p, (const void *)res, size);
+               barrier();
+       }
+}
+
+/*
+ * Prevent the compiler from merging or refetching reads or writes. The
+ * compiler is also forbidden from reordering successive instances of
+ * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
+ * compiler is aware of some particular ordering.  One way to make the
+ * compiler aware of ordering is to put the two invocations of READ_ONCE,
+ * WRITE_ONCE or ACCESS_ONCE() in different C statements.
+ *
+ * In contrast to ACCESS_ONCE these two macros will also work on aggregate
+ * data types like structs or unions. If the size of the accessed data
+ * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
+ * READ_ONCE() and WRITE_ONCE()  will fall back to memcpy and print a
+ * compile-time warning.
+ *
+ * Their two major use cases are: (1) Mediating communication between
+ * process-level code and irq/NMI handlers, all running on the same CPU,
+ * and (2) Ensuring that the compiler does not  fold, spindle, or otherwise
+ * mutilate accesses that either do not require ordering or that interact
+ * with an explicit memory barrier or atomic instruction that provides the
+ * required ordering.
+ */
+
+#define READ_ONCE(x) \
+       ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+
+#define WRITE_ONCE(x, val) \
+       ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+
+#define lockless_dereference(p) \
+({ \
+       typeof(p) _________p1 = READ_ONCE(p); \
+       typeof(*(p)) *___typecheck_p __maybe_unused; \
+       smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
+       (_________p1); \
+})
+
+#define flush_cache_all()                      do { } while (0)
+#define flush_cache_mm(mm)                     do { } while (0)
+#define flush_cache_dup_mm(mm)                 do { } while (0)
+#define flush_cache_range(vma, start, end)     do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)     do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define flush_dcache_page(page)                        do { } while (0)
+#define flush_dcache_mmap_lock(mapping)                do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)      do { } while (0)
+#define flush_icache_range(start, end)         do { } while (0)
+#define flush_icache_page(vma,pg)              do { } while (0)
+#define flush_icache_user_range(vma,pg,adr,len)        do { } while (0)
+#define flush_cache_vmap(start, end)           do { } while (0)
+#define flush_cache_vunmap(start, end)         do { } while (0)
+
+#ifdef __x86_64
+#define CONFIG_X86_64  y
+#endif
+
+#endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/include/linux/completion.h b/include/linux/completion.h
new file mode 100644 (file)
index 0000000..d11a8dd
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef __LINUX_COMPLETION_H
+#define __LINUX_COMPLETION_H
+
+/*
+ * (C) Copyright 2001 Linus Torvalds
+ *
+ * Atomic wait-for-completion handler data structures.
+ * See kernel/sched/completion.c for details.
+ */
+
+#include <linux/wait.h>
+
+struct completion {
+       unsigned int done;
+       wait_queue_head_t wait;
+};
+
+#define DECLARE_COMPLETION(work)                                       \
+       struct completion work = {                                      \
+               .done = 0,                                              \
+               .wait = __WAIT_QUEUE_HEAD_INITIALIZER((work).wait)      \
+       }
+
+#define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
+
+static inline void init_completion(struct completion *x)
+{
+       x->done = 0;
+       init_waitqueue_head(&x->wait);
+}
+
+static inline void reinit_completion(struct completion *x)
+{
+       x->done = 0;
+}
+
+void complete(struct completion *);
+void wait_for_completion(struct completion *);
+
+#define wait_for_completion_interruptible(x) (wait_for_completion(x), 0)
+
+#endif
diff --git a/include/linux/console.h b/include/linux/console.h
new file mode 100644 (file)
index 0000000..d01aa9a
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _LINUX_CONSOLE_H_
+#define _LINUX_CONSOLE_H_
+
+#define console_lock()
+#define console_unlock()
+
+#endif /* _LINUX_CONSOLE_H */
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
new file mode 100644 (file)
index 0000000..024d645
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef __LINUX_CPUMASK_H
+#define __LINUX_CPUMASK_H
+
+#define num_online_cpus()      1U
+#define num_possible_cpus()    1U
+#define num_present_cpus()     1U
+#define num_active_cpus()      1U
+#define cpu_online(cpu)                ((cpu) == 0)
+#define cpu_possible(cpu)      ((cpu) == 0)
+#define cpu_present(cpu)       ((cpu) == 0)
+#define cpu_active(cpu)                ((cpu) == 0)
+
+#define for_each_cpu(cpu, mask)                        \
+       for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#define for_each_cpu_not(cpu, mask)            \
+       for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#define for_each_cpu_and(cpu, mask, and)       \
+       for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
+
+#define for_each_possible_cpu(cpu) for_each_cpu((cpu), 1)
+#define for_each_online_cpu(cpu)   for_each_cpu((cpu), 1)
+#define for_each_present_cpu(cpu)  for_each_cpu((cpu), 1)
+
+#endif /* __LINUX_CPUMASK_H */
diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h
new file mode 100644 (file)
index 0000000..1ac74f7
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _LINUX_CRC32C_H
+#define _LINUX_CRC32C_H
+
+#include "tools-util.h"
+
+#endif /* _LINUX_CRC32C_H */
diff --git a/include/linux/crc64.h b/include/linux/crc64.h
new file mode 100644 (file)
index 0000000..c756e65
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * See lib/crc64.c for the related specification and polynomial arithmetic.
+ */
+#ifndef _LINUX_CRC64_H
+#define _LINUX_CRC64_H
+
+#include <linux/types.h>
+
+u64 __pure crc64_be(u64 crc, const void *p, size_t len);
+#endif /* _LINUX_CRC64_H */
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
new file mode 100644 (file)
index 0000000..866b4c5
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Scatterlist Cryptographic API.
+ *
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * Copyright (c) 2002 David S. Miller (davem@redhat.com)
+ * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * Portions derived from Cryptoapi, by Alexander Kjeldaas <astor@fast.no>
+ * and Nettle, by Niels Möller.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ *
+ */
+#ifndef _LINUX_CRYPTO_H
+#define _LINUX_CRYPTO_H
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN
+#define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN)))
+
+struct crypto_type;
+
+struct crypto_alg {
+       struct list_head        cra_list;
+
+       const char              *cra_name;
+       const struct crypto_type *cra_type;
+
+       void *                  (*alloc_tfm)(void);
+} CRYPTO_MINALIGN_ATTR;
+
+int crypto_register_alg(struct crypto_alg *alg);
+
+struct crypto_tfm {
+       struct crypto_alg       *alg;
+};
+
+#endif /* _LINUX_CRYPTO_H */
+
diff --git a/include/linux/ctype.h b/include/linux/ctype.h
new file mode 100644 (file)
index 0000000..26b7de5
--- /dev/null
@@ -0,0 +1,2 @@
+
+#include <ctype.h>
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
new file mode 100644 (file)
index 0000000..7637854
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef __LINUX_DCACHE_H
+#define __LINUX_DCACHE_H
+
+struct super_block;
+struct inode;
+
+struct dentry {
+       struct super_block *d_sb;
+       struct inode *d_inode;
+};
+
+#endif /* __LINUX_DCACHE_H */
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
new file mode 100644 (file)
index 0000000..9a78cb1
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ *  debugfs.h - a tiny little debug file system
+ *
+ *  Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
+ *  Copyright (C) 2004 IBM Inc.
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License version
+ *     2 as published by the Free Software Foundation.
+ *
+ *  debugfs is for people to use instead of /proc or /sys.
+ *  See Documentation/DocBook/filesystems for more details.
+ */
+
+#ifndef _DEBUGFS_H_
+#define _DEBUGFS_H_
+
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+struct file_operations;
+
+#include <linux/err.h>
+
+static inline struct dentry *debugfs_create_file(const char *name, umode_t mode,
+                                       struct dentry *parent, void *data,
+                                       const struct file_operations *fops)
+{
+       return ERR_PTR(-ENODEV);
+}
+
+static inline struct dentry *debugfs_create_dir(const char *name,
+                                               struct dentry *parent)
+{
+       return ERR_PTR(-ENODEV);
+}
+
+static inline void debugfs_remove(struct dentry *dentry)
+{ }
+
+static inline void debugfs_remove_recursive(struct dentry *dentry)
+{ }
+
+#endif
diff --git a/include/linux/device.h b/include/linux/device.h
new file mode 100644 (file)
index 0000000..2b2b849
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _DEVICE_H_
+#define _DEVICE_H_
+
+#include <linux/slab.h>
+#include <linux/types.h>
+
+struct module;
+
+struct class {
+};
+
+static inline void class_destroy(struct class *class)
+{
+       kfree(class);
+}
+
+static inline struct class * __must_check class_create(struct module *owner,
+                                                      const char *name)
+{
+       return kzalloc(sizeof(struct class), GFP_KERNEL);
+}
+
+struct device {
+};
+
+static inline void device_unregister(struct device *dev)
+{
+       kfree(dev);
+}
+
+static inline void device_destroy(struct class *cls, dev_t devt) {}
+
+static inline struct device *device_create(struct class *cls, struct device *parent,
+                            dev_t devt, void *drvdata,
+                            const char *fmt, ...)
+{
+       return kzalloc(sizeof(struct device), GFP_KERNEL);
+}
+
+#endif /* _DEVICE_H_ */
diff --git a/include/linux/dynamic_fault.h b/include/linux/dynamic_fault.h
new file mode 100644 (file)
index 0000000..dd215dc
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef __TOOLS_LINUX_DYNAMIC_FAULT_H
+#define __TOOLS_LINUX_DYNAMIC_FAULT_H
+
+#define dynamic_fault(_class)                  0
+#define race_fault()                           0
+
+#endif /* __TOOLS_LINUX_DYNAMIC_FAULT_H */
diff --git a/include/linux/err.h b/include/linux/err.h
new file mode 100644 (file)
index 0000000..e94bdff
--- /dev/null
@@ -0,0 +1,68 @@
+#ifndef __TOOLS_LINUX_ERR_H
+#define __TOOLS_LINUX_ERR_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include <asm/errno.h>
+
+/*
+ * Original kernel header comment:
+ *
+ * Kernel pointers have redundant information, so we can use a
+ * scheme where we can return either an error code or a normal
+ * pointer with the same return value.
+ *
+ * This should be a per-architecture thing, to allow different
+ * error and pointer decisions.
+ *
+ * Userspace note:
+ * The same principle works for userspace, because 'error' pointers
+ * fall down to the unused hole far from user space, as described
+ * in Documentation/x86/x86_64/mm.txt for x86_64 arch:
+ *
+ * 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension
+ * ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+ *
+ * It should be the same case for other architectures, because
+ * this code is used in generic kernel code.
+ */
+#define MAX_ERRNO      4095
+
+#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void * __must_check ERR_PTR(long error_)
+{
+       return (void *) error_;
+}
+
+static inline long __must_check PTR_ERR(__force const void *ptr)
+{
+       return (long) ptr;
+}
+
+static inline bool __must_check IS_ERR(__force const void *ptr)
+{
+       return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr)
+{
+       return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline void * __must_check ERR_CAST(__force const void *ptr)
+{
+       /* cast away the const */
+       return (void *) ptr;
+}
+
+static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr)
+{
+       if (IS_ERR(ptr))
+               return PTR_ERR(ptr);
+       else
+               return 0;
+}
+
+#endif /* _LINUX_ERR_H */
diff --git a/include/linux/export.h b/include/linux/export.h
new file mode 100644 (file)
index 0000000..af9da96
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef _TOOLS_LINUX_EXPORT_H_
+#define _TOOLS_LINUX_EXPORT_H_
+
+#define EXPORT_SYMBOL(sym)
+#define EXPORT_SYMBOL_GPL(sym)
+#define EXPORT_SYMBOL_GPL_FUTURE(sym)
+#define EXPORT_UNUSED_SYMBOL(sym)
+#define EXPORT_UNUSED_SYMBOL_GPL(sym)
+
+#define THIS_MODULE ((struct module *)0)
+#define KBUILD_MODNAME
+
+#endif
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
new file mode 100644 (file)
index 0000000..1af94d5
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef __TOOLS_LINUX_FREEZER_H
+#define __TOOLS_LINUX_FREEZER_H
+
+#define try_to_freeze()
+#define set_freezable()
+#define freezing(task)         false
+
+#endif /* __TOOLS_LINUX_FREEZER_H */
diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h
new file mode 100644 (file)
index 0000000..3a91130
--- /dev/null
@@ -0,0 +1,231 @@
+#ifndef _LINUX_GENERIC_RADIX_TREE_H
+#define _LINUX_GENERIC_RADIX_TREE_H
+
+/**
+ * DOC: Generic radix trees/sparse arrays:
+ *
+ * Very simple and minimalistic, supporting arbitrary size entries up to
+ * PAGE_SIZE.
+ *
+ * A genradix is defined with the type it will store, like so:
+ *
+ * static GENRADIX(struct foo) foo_genradix;
+ *
+ * The main operations are:
+ *
+ * - genradix_init(radix) - initialize an empty genradix
+ *
+ * - genradix_free(radix) - free all memory owned by the genradix and
+ *   reinitialize it
+ *
+ * - genradix_ptr(radix, idx) - gets a pointer to the entry at idx, returning
+ *   NULL if that entry does not exist
+ *
+ * - genradix_ptr_alloc(radix, idx, gfp) - gets a pointer to an entry,
+ *   allocating it if necessary
+ *
+ * - genradix_for_each(radix, iter, p) - iterate over each entry in a genradix
+ *
+ * The radix tree allocates one page of entries at a time, so entries may exist
+ * that were never explicitly allocated - they will be initialized to all
+ * zeroes.
+ *
+ * Internally, a genradix is just a radix tree of pages, and indexing works in
+ * terms of byte offsets. The wrappers in this header file use sizeof on the
+ * type the radix contains to calculate a byte offset from the index - see
+ * __idx_to_offset.
+ */
+
+#include <asm/page.h>
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+
+struct genradix_root;
+
+struct __genradix {
+       struct genradix_root __rcu      *root;
+};
+
+/*
+ * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE:
+ */
+
+#define __GENRADIX_INITIALIZER                                 \
+       {                                                       \
+               .tree = {                                       \
+                       .root = NULL,                           \
+               }                                               \
+       }
+
+/*
+ * We use a 0 size array to stash the type we're storing without taking any
+ * space at runtime - then the various accessor macros can use typeof() to get
+ * to it for casts/sizeof - we also force the alignment so that storing a type
+ * with a ridiculous alignment doesn't blow up the alignment or size of the
+ * genradix.
+ */
+
+#define GENRADIX(_type)                                                \
+struct {                                                       \
+       struct __genradix       tree;                           \
+       _type                   type[0] __aligned(1);           \
+}
+
+#define DEFINE_GENRADIX(_name, _type)                          \
+       GENRADIX(_type) _name = __GENRADIX_INITIALIZER
+
+/**
+ * genradix_init - initialize a genradix
+ * @_radix:    genradix to initialize
+ *
+ * Does not fail
+ */
+#define genradix_init(_radix)                                  \
+do {                                                           \
+       *(_radix) = (typeof(*_radix)) __GENRADIX_INITIALIZER;   \
+} while (0)
+
+void __genradix_free(struct __genradix *);
+
+/**
+ * genradix_free: free all memory owned by a genradix
+ * @_radix: the genradix to free
+ *
+ * After freeing, @_radix will be reinitialized and empty
+ */
+#define genradix_free(_radix)  __genradix_free(&(_radix)->tree)
+
+static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
+{
+       if (__builtin_constant_p(obj_size))
+               BUILD_BUG_ON(obj_size > PAGE_SIZE);
+       else
+               BUG_ON(obj_size > PAGE_SIZE);
+
+       if (!is_power_of_2(obj_size)) {
+               size_t objs_per_page = PAGE_SIZE / obj_size;
+
+               return (idx / objs_per_page) * PAGE_SIZE +
+                       (idx % objs_per_page) * obj_size;
+       } else {
+               return idx * obj_size;
+       }
+}
+
+#define __genradix_cast(_radix)                (typeof((_radix)->type[0]) *)
+#define __genradix_obj_size(_radix)    sizeof((_radix)->type[0])
+#define __genradix_idx_to_offset(_radix, _idx)                 \
+       __idx_to_offset(_idx, __genradix_obj_size(_radix))
+
+void *__genradix_ptr(struct __genradix *, size_t);
+
+/**
+ * genradix_ptr - get a pointer to a genradix entry
+ * @_radix:    genradix to access
+ * @_idx:      index to fetch
+ *
+ * Returns a pointer to entry at @_idx, or NULL if that entry does not exist.
+ */
+#define genradix_ptr(_radix, _idx)                             \
+       (__genradix_cast(_radix)                                \
+        __genradix_ptr(&(_radix)->tree,                        \
+                       __genradix_idx_to_offset(_radix, _idx)))
+
+void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t);
+
+/**
+ * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it
+ *                     if necessary
+ * @_radix:    genradix to access
+ * @_idx:      index to fetch
+ * @_gfp:      gfp mask
+ *
+ * Returns a pointer to entry at @_idx, or NULL on allocation failure
+ */
+#define genradix_ptr_alloc(_radix, _idx, _gfp)                 \
+       (__genradix_cast(_radix)                                \
+        __genradix_ptr_alloc(&(_radix)->tree,                  \
+                       __genradix_idx_to_offset(_radix, _idx), \
+                       _gfp))
+
+struct genradix_iter {
+       size_t                  offset;
+       size_t                  pos;
+};
+
+/**
+ * genradix_iter_init - initialize a genradix_iter
+ * @_radix:    genradix that will be iterated over
+ * @_idx:      index to start iterating from
+ */
+#define genradix_iter_init(_radix, _idx)                       \
+       ((struct genradix_iter) {                               \
+               .pos    = (_idx),                               \
+               .offset = __genradix_idx_to_offset((_radix), (_idx)),\
+       })
+
+void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t);
+
+/**
+ * genradix_iter_peek - get first entry at or above iterator's current
+ *                     position
+ * @_iter:     a genradix_iter
+ * @_radix:    genradix being iterated over
+ *
+ * If no more entries exist at or above @_iter's current position, returns NULL
+ */
+#define genradix_iter_peek(_iter, _radix)                      \
+       (__genradix_cast(_radix)                                \
+        __genradix_iter_peek(_iter, &(_radix)->tree,           \
+                             PAGE_SIZE / __genradix_obj_size(_radix)))
+
+static inline void __genradix_iter_advance(struct genradix_iter *iter,
+                                          size_t obj_size)
+{
+       iter->offset += obj_size;
+
+       if (!is_power_of_2(obj_size) &&
+           (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE)
+               iter->offset = round_up(iter->offset, PAGE_SIZE);
+
+       iter->pos++;
+}
+
+#define genradix_iter_advance(_iter, _radix)                   \
+       __genradix_iter_advance(_iter, __genradix_obj_size(_radix))
+
+#define genradix_for_each_from(_radix, _iter, _p, _start)      \
+       for (_iter = genradix_iter_init(_radix, _start);        \
+            (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \
+            genradix_iter_advance(&_iter, _radix))
+
+/**
+ * genradix_for_each - iterate over entry in a genradix
+ * @_radix:    genradix to iterate over
+ * @_iter:     a genradix_iter to track current position
+ * @_p:                pointer to genradix entry type
+ *
+ * On every iteration, @_p will point to the current entry, and @_iter.pos
+ * will be the current entry's index.
+ */
+#define genradix_for_each(_radix, _iter, _p)                   \
+       genradix_for_each_from(_radix, _iter, _p, 0)
+
+int __genradix_prealloc(struct __genradix *, size_t, gfp_t);
+
+/**
+ * genradix_prealloc - preallocate entries in a generic radix tree
+ * @_radix:    genradix to preallocate
+ * @_nr:       number of entries to preallocate
+ * @_gfp:      gfp mask
+ *
+ * Returns 0 on success, -ENOMEM on failure
+ */
+#define genradix_prealloc(_radix, _nr, _gfp)                   \
+        __genradix_prealloc(&(_radix)->tree,                   \
+                       __genradix_idx_to_offset(_radix, _nr + 1),\
+                       _gfp)
+
+
+#endif /* _LINUX_GENERIC_RADIX_TREE_H */
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
new file mode 100644 (file)
index 0000000..3830bc2
--- /dev/null
@@ -0,0 +1 @@
+#include <linux/slab.h>
diff --git a/include/linux/hash.h b/include/linux/hash.h
new file mode 100644 (file)
index 0000000..ad6fa21
--- /dev/null
@@ -0,0 +1,104 @@
+#ifndef _LINUX_HASH_H
+#define _LINUX_HASH_H
+/* Fast hashing routine for ints,  longs and pointers.
+   (C) 2002 Nadia Yvette Chambers, IBM */
+
+#include <asm/types.h>
+#include <linux/compiler.h>
+
+/*
+ * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and
+ * fs/inode.c.  It's not actually prime any more (the previous primes
+ * were actively bad for hashing), but the name remains.
+ */
+#if BITS_PER_LONG == 32
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32
+#define hash_long(val, bits) hash_32(val, bits)
+#elif BITS_PER_LONG == 64
+#define hash_long(val, bits) hash_64(val, bits)
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64
+#else
+#error Wordsize not 32 or 64
+#endif
+
+/*
+ * This hash multiplies the input by a large odd number and takes the
+ * high bits.  Since multiplication propagates changes to the most
+ * significant end only, it is essential that the high bits of the
+ * product be used for the hash value.
+ *
+ * Chuck Lever verified the effectiveness of this technique:
+ * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
+ *
+ * Although a random odd number will do, it turns out that the golden
+ * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice
+ * properties.  (See Knuth vol 3, section 6.4, exercise 9.)
+ *
+ * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2,
+ * which is very slightly easier to multiply by and makes no
+ * difference to the hash distribution.
+ */
+#define GOLDEN_RATIO_32 0x61C88647
+#define GOLDEN_RATIO_64 0x61C8864680B583EBull
+
+#ifdef CONFIG_HAVE_ARCH_HASH
+/* This header may use the GOLDEN_RATIO_xx constants */
+#include <asm/hash.h>
+#endif
+
+/*
+ * The _generic versions exist only so lib/test_hash.c can compare
+ * the arch-optimized versions with the generic.
+ *
+ * Note that if you change these, any <asm/hash.h> that aren't updated
+ * to match need to have their HAVE_ARCH_* define values updated so the
+ * self-test will not false-positive.
+ */
+#ifndef HAVE_ARCH__HASH_32
+#define __hash_32 __hash_32_generic
+#endif
+static inline u32 __hash_32_generic(u32 val)
+{
+       return val * GOLDEN_RATIO_32;
+}
+
+#ifndef HAVE_ARCH_HASH_32
+#define hash_32 hash_32_generic
+#endif
+static inline u32 hash_32_generic(u32 val, unsigned int bits)
+{
+       /* High bits are more random, so use them. */
+       return __hash_32(val) >> (32 - bits);
+}
+
+#ifndef HAVE_ARCH_HASH_64
+#define hash_64 hash_64_generic
+#endif
+static __always_inline u32 hash_64_generic(u64 val, unsigned int bits)
+{
+#if BITS_PER_LONG == 64
+       /* 64x64-bit multiply is efficient on all 64-bit processors */
+       return val * GOLDEN_RATIO_64 >> (64 - bits);
+#else
+       /* Hash 64 bits using only 32x32-bit multiply. */
+       return hash_32((u32)val ^ __hash_32(val >> 32), bits);
+#endif
+}
+
+static inline u32 hash_ptr(const void *ptr, unsigned int bits)
+{
+       return hash_long((unsigned long)ptr, bits);
+}
+
+/* This really should be called fold32_ptr; it does no hashing to speak of. */
+static inline u32 hash32_ptr(const void *ptr)
+{
+       unsigned long val = (unsigned long)ptr;
+
+#if BITS_PER_LONG == 64
+       val ^= (val >> 32);
+#endif
+       return (u32)val;
+}
+
+#endif /* _LINUX_HASH_H */
diff --git a/include/linux/idr.h b/include/linux/idr.h
new file mode 100644 (file)
index 0000000..6f92825
--- /dev/null
@@ -0,0 +1,208 @@
+/*
+ * include/linux/idr.h
+ * 
+ * 2002-10-18  written by Jim Houston jim.houston@ccur.com
+ *     Copyright (C) 2002 by Concurrent Computer Corporation
+ *     Distributed under the GNU GPL license version 2.
+ *
+ * Small id to pointer translation service avoiding fixed sized
+ * tables.
+ */
+
+#ifndef __IDR_H__
+#define __IDR_H__
+
+#include <linux/types.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+
+/*
+ * We want shallower trees and thus more bits covered at each layer.  8
+ * bits gives us large enough first layer for most use cases and maximum
+ * tree depth of 4.  Each idr_layer is slightly larger than 2k on 64bit and
+ * 1k on 32bit.
+ */
+#define IDR_BITS 8
+#define IDR_SIZE (1 << IDR_BITS)
+#define IDR_MASK ((1 << IDR_BITS)-1)
+
+struct idr_layer {
+       int                     prefix; /* the ID prefix of this idr_layer */
+       int                     layer;  /* distance from leaf */
+       struct idr_layer __rcu  *ary[1<<IDR_BITS];
+       int                     count;  /* When zero, we can release it */
+       union {
+               /* A zero bit means "space here" */
+               DECLARE_BITMAP(bitmap, IDR_SIZE);
+               struct rcu_head         rcu_head;
+       };
+};
+
+struct idr {
+       struct idr_layer __rcu  *hint;  /* the last layer allocated from */
+       struct idr_layer __rcu  *top;
+       int                     layers; /* only valid w/o concurrent changes */
+       int                     cur;    /* current pos for cyclic allocation */
+       spinlock_t              lock;
+       int                     id_free_cnt;
+       struct idr_layer        *id_free;
+};
+
+#define IDR_INIT(name)                                                 \
+{                                                                      \
+       .lock                   = __SPIN_LOCK_UNLOCKED(name.lock),      \
+}
+#define DEFINE_IDR(name)       struct idr name = IDR_INIT(name)
+
+/**
+ * DOC: idr sync
+ * idr synchronization (stolen from radix-tree.h)
+ *
+ * idr_find() is able to be called locklessly, using RCU. The caller must
+ * ensure calls to this function are made within rcu_read_lock() regions.
+ * Other readers (lock-free or otherwise) and modifications may be running
+ * concurrently.
+ *
+ * It is still required that the caller manage the synchronization and
+ * lifetimes of the items. So if RCU lock-free lookups are used, typically
+ * this would mean that the items have their own locks, or are amenable to
+ * lock-free access; and that the items are freed by RCU (or only freed after
+ * having been deleted from the idr tree *and* a synchronize_rcu() grace
+ * period).
+ */
+
+/*
+ * This is what we export.
+ */
+
+void *idr_find_slowpath(struct idr *idp, int id);
+void idr_preload(gfp_t gfp_mask);
+
+static inline int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask)
+{
+       return 0;
+}
+
+static inline void idr_remove(struct idr *idp, int id) {}
+
+int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask);
+int idr_for_each(struct idr *idp,
+                int (*fn)(int id, void *p, void *data), void *data);
+void *idr_get_next(struct idr *idp, int *nextid);
+void *idr_replace(struct idr *idp, void *ptr, int id);
+void idr_destroy(struct idr *idp);
+void idr_init(struct idr *idp);
+bool idr_is_empty(struct idr *idp);
+
+/**
+ * idr_preload_end - end preload section started with idr_preload()
+ *
+ * Each idr_preload() should be matched with an invocation of this
+ * function.  See idr_preload() for details.
+ */
+static inline void idr_preload_end(void)
+{
+       preempt_enable();
+}
+
+/**
+ * idr_find - return pointer for given id
+ * @idr: idr handle
+ * @id: lookup key
+ *
+ * Return the pointer given the id it has been registered with.  A %NULL
+ * return indicates that @id is not valid or you passed %NULL in
+ * idr_get_new().
+ *
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
+ */
+static inline void *idr_find(struct idr *idr, int id)
+{
+       struct idr_layer *hint = rcu_dereference_raw(idr->hint);
+
+       if (hint && (id & ~IDR_MASK) == hint->prefix)
+               return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
+
+       return idr_find_slowpath(idr, id);
+}
+
+/**
+ * idr_for_each_entry - iterate over an idr's elements of a given type
+ * @idp:     idr handle
+ * @entry:   the type * to use as cursor
+ * @id:      id entry's key
+ *
+ * @entry and @id do not need to be initialized before the loop, and
+ * after normal terminatinon @entry is left with the value NULL.  This
+ * is convenient for a "not found" value.
+ */
+#define idr_for_each_entry(idp, entry, id)                     \
+       for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
+
+/**
+ * idr_for_each_entry - continue iteration over an idr's elements of a given type
+ * @idp:     idr handle
+ * @entry:   the type * to use as cursor
+ * @id:      id entry's key
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define idr_for_each_entry_continue(idp, entry, id)                    \
+       for ((entry) = idr_get_next((idp), &(id));                      \
+            entry;                                                     \
+            ++id, (entry) = idr_get_next((idp), &(id)))
+
+/*
+ * IDA - IDR based id allocator, use when translation from id to
+ * pointer isn't necessary.
+ *
+ * IDA_BITMAP_LONGS is calculated to be one less to accommodate
+ * ida_bitmap->nr_busy so that the whole struct fits in 128 bytes.
+ */
+#define IDA_CHUNK_SIZE         128     /* 128 bytes per chunk */
+#define IDA_BITMAP_LONGS       (IDA_CHUNK_SIZE / sizeof(long) - 1)
+#define IDA_BITMAP_BITS        (IDA_BITMAP_LONGS * sizeof(long) * 8)
+
+struct ida_bitmap {
+       long                    nr_busy;
+       unsigned long           bitmap[IDA_BITMAP_LONGS];
+};
+
+struct ida {
+       struct idr              idr;
+       struct ida_bitmap       *free_bitmap;
+};
+
+#define IDA_INIT(name)         { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
+#define DEFINE_IDA(name)       struct ida name = IDA_INIT(name)
+
+int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
+int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
+void ida_remove(struct ida *ida, int id);
+void ida_destroy(struct ida *ida);
+void ida_init(struct ida *ida);
+
+int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
+                  gfp_t gfp_mask);
+void ida_simple_remove(struct ida *ida, unsigned int id);
+
+/**
+ * ida_get_new - allocate new ID
+ * @ida:       idr handle
+ * @p_id:      pointer to the allocated handle
+ *
+ * Simple wrapper around ida_get_new_above() w/ @starting_id of zero.
+ */
+static inline int ida_get_new(struct ida *ida, int *p_id)
+{
+       return ida_get_new_above(ida, 0, p_id);
+}
+
+void __init idr_init_cache(void);
+
+#endif /* __IDR_H__ */
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
new file mode 100644 (file)
index 0000000..822c64a
--- /dev/null
@@ -0,0 +1,46 @@
+#ifndef IOPRIO_H
+#define IOPRIO_H
+
+/*
+ * Gives us 8 prio classes with 13-bits of data for each class
+ */
+#define IOPRIO_BITS            (16)
+#define IOPRIO_CLASS_SHIFT     (13)
+#define IOPRIO_PRIO_MASK       ((1UL << IOPRIO_CLASS_SHIFT) - 1)
+
+#define IOPRIO_PRIO_CLASS(mask)        ((mask) >> IOPRIO_CLASS_SHIFT)
+#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
+#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
+
+#define ioprio_valid(mask)     (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE)
+
+/*
+ * These are the io priority groups as implemented by CFQ. RT is the realtime
+ * class, it always gets premium service. BE is the best-effort scheduling
+ * class, the default for any process. IDLE is the idle scheduling class, it
+ * is only served when no one else is using the disk.
+ */
+enum {
+       IOPRIO_CLASS_NONE,
+       IOPRIO_CLASS_RT,
+       IOPRIO_CLASS_BE,
+       IOPRIO_CLASS_IDLE,
+};
+
+/*
+ * 8 best effort priority levels are supported
+ */
+#define IOPRIO_BE_NR   (8)
+
+enum {
+       IOPRIO_WHO_PROCESS = 1,
+       IOPRIO_WHO_PGRP,
+       IOPRIO_WHO_USER,
+};
+
+/*
+ * Fallback BE priority
+ */
+#define IOPRIO_NORM    (4)
+
+#endif
diff --git a/include/linux/jhash.h b/include/linux/jhash.h
new file mode 100644 (file)
index 0000000..348c6f4
--- /dev/null
@@ -0,0 +1,175 @@
+#ifndef _LINUX_JHASH_H
+#define _LINUX_JHASH_H
+
+/* jhash.h: Jenkins hash support.
+ *
+ * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
+ *
+ * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are my fault.
+ * Jozsef
+ */
+#include <linux/bitops.h>
+#include <linux/unaligned/packed_struct.h>
+
+/* Best hash sizes are of power of two */
+#define jhash_size(n)   ((u32)1<<(n))
+/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
+#define jhash_mask(n)   (jhash_size(n)-1)
+
+/* __jhash_mix -- mix 3 32-bit values reversibly. */
+#define __jhash_mix(a, b, c)                   \
+{                                              \
+       a -= c;  a ^= rol32(c, 4);  c += b;     \
+       b -= a;  b ^= rol32(a, 6);  a += c;     \
+       c -= b;  c ^= rol32(b, 8);  b += a;     \
+       a -= c;  a ^= rol32(c, 16); c += b;     \
+       b -= a;  b ^= rol32(a, 19); a += c;     \
+       c -= b;  c ^= rol32(b, 4);  b += a;     \
+}
+
+/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
+#define __jhash_final(a, b, c)                 \
+{                                              \
+       c ^= b; c -= rol32(b, 14);              \
+       a ^= c; a -= rol32(c, 11);              \
+       b ^= a; b -= rol32(a, 25);              \
+       c ^= b; c -= rol32(b, 16);              \
+       a ^= c; a -= rol32(c, 4);               \
+       b ^= a; b -= rol32(a, 14);              \
+       c ^= b; c -= rol32(b, 24);              \
+}
+
+/* An arbitrary initial parameter */
+#define JHASH_INITVAL          0xdeadbeef
+
+/* jhash - hash an arbitrary key
+ * @k: sequence of bytes as key
+ * @length: the length of the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * The generic version, hashes an arbitrary sequence of bytes.
+ * No alignment or length assumptions are made about the input key.
+ *
+ * Returns the hash value of the key. The result depends on endianness.
+ */
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+       u32 a, b, c;
+       const u8 *k = key;
+
+       /* Set up the internal state */
+       a = b = c = JHASH_INITVAL + length + initval;
+
+       /* All but the last block: affect some 32 bits of (a,b,c) */
+       while (length > 12) {
+               a += __get_unaligned_cpu32(k);
+               b += __get_unaligned_cpu32(k + 4);
+               c += __get_unaligned_cpu32(k + 8);
+               __jhash_mix(a, b, c);
+               length -= 12;
+               k += 12;
+       }
+       /* Last block: affect all 32 bits of (c) */
+       /* All the case statements fall through */
+       switch (length) {
+       case 12: c += (u32)k[11]<<24;
+       case 11: c += (u32)k[10]<<16;
+       case 10: c += (u32)k[9]<<8;
+       case 9:  c += k[8];
+       case 8:  b += (u32)k[7]<<24;
+       case 7:  b += (u32)k[6]<<16;
+       case 6:  b += (u32)k[5]<<8;
+       case 5:  b += k[4];
+       case 4:  a += (u32)k[3]<<24;
+       case 3:  a += (u32)k[2]<<16;
+       case 2:  a += (u32)k[1]<<8;
+       case 1:  a += k[0];
+                __jhash_final(a, b, c);
+       case 0: /* Nothing left to add */
+               break;
+       }
+
+       return c;
+}
+
+/* jhash2 - hash an array of u32's
+ * @k: the key which must be an array of u32's
+ * @length: the number of u32's in the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * Returns the hash value of the key.
+ */
+static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
+{
+       u32 a, b, c;
+
+       /* Set up the internal state */
+       a = b = c = JHASH_INITVAL + (length<<2) + initval;
+
+       /* Handle most of the key */
+       while (length > 3) {
+               a += k[0];
+               b += k[1];
+               c += k[2];
+               __jhash_mix(a, b, c);
+               length -= 3;
+               k += 3;
+       }
+
+       /* Handle the last 3 u32's: all the case statements fall through */
+       switch (length) {
+       case 3: c += k[2];
+       case 2: b += k[1];
+       case 1: a += k[0];
+               __jhash_final(a, b, c);
+       case 0: /* Nothing left to add */
+               break;
+       }
+
+       return c;
+}
+
+
+/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
+static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+       a += initval;
+       b += initval;
+       c += initval;
+
+       __jhash_final(a, b, c);
+
+       return c;
+}
+
+static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+{
+       return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2));
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+       return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+static inline u32 jhash_1word(u32 a, u32 initval)
+{
+       return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2));
+}
+
+#endif /* _LINUX_JHASH_H */
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
new file mode 100644 (file)
index 0000000..9b8dd43
--- /dev/null
@@ -0,0 +1,83 @@
+#ifndef _LINUX_JIFFIES_H
+#define _LINUX_JIFFIES_H
+
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <linux/typecheck.h>
+#include <linux/types.h>
+
+#define time_after(a,b)                \
+       (typecheck(unsigned long, a) && \
+        typecheck(unsigned long, b) && \
+        ((long)((b) - (a)) < 0))
+#define time_before(a,b)       time_after(b,a)
+
+#define time_after_eq(a,b)     \
+       (typecheck(unsigned long, a) && \
+        typecheck(unsigned long, b) && \
+        ((long)((a) - (b)) >= 0))
+#define time_before_eq(a,b)    time_after_eq(b,a)
+
+#define time_in_range(a,b,c) \
+       (time_after_eq(a,b) && \
+        time_before_eq(a,c))
+
+#define time_in_range_open(a,b,c) \
+       (time_after_eq(a,b) && \
+        time_before(a,c))
+
+#define time_after64(a,b)      \
+       (typecheck(__u64, a) && \
+        typecheck(__u64, b) && \
+        ((__s64)((b) - (a)) < 0))
+#define time_before64(a,b)     time_after64(b,a)
+
+#define time_after_eq64(a,b)   \
+       (typecheck(__u64, a) && \
+        typecheck(__u64, b) && \
+        ((__s64)((a) - (b)) >= 0))
+#define time_before_eq64(a,b)  time_after_eq64(b,a)
+
+#define time_in_range64(a, b, c) \
+       (time_after_eq64(a, b) && \
+        time_before_eq64(a, c))
+
+#define HZ             1000
+
+static inline u64 jiffies_to_nsecs(const unsigned long j)
+{
+       return (u64)j * NSEC_PER_MSEC;
+}
+
+static inline unsigned jiffies_to_msecs(const unsigned long j)
+{
+       return j;
+}
+
+static inline unsigned long msecs_to_jiffies(const unsigned int m)
+{
+       return m;
+}
+
+static inline unsigned long nsecs_to_jiffies(u64 n)
+{
+       return n / NSEC_PER_MSEC;
+}
+
+static inline u64 sched_clock(void)
+{
+       struct timespec ts;
+
+       clock_gettime(CLOCK_MONOTONIC, &ts);
+
+       return ((s64) ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
+}
+
+static inline u64 local_clock(void)
+{
+       return sched_clock();
+}
+
+#define jiffies                        nsecs_to_jiffies(sched_clock())
+
+#endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
new file mode 100644 (file)
index 0000000..10d94c5
--- /dev/null
@@ -0,0 +1,222 @@
+#ifndef __TOOLS_LINUX_KERNEL_H
+#define __TOOLS_LINUX_KERNEL_H
+
+#include <assert.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <limits.h>
+
+#include <linux/bug.h>
+#include <linux/byteorder.h>
+#include <linux/compiler.h>
+
+#define IS_ENABLED(opt)                0
+#define EXPORT_SYMBOL(sym)
+
+#define U8_MAX         ((u8)~0U)
+#define S8_MAX         ((s8)(U8_MAX>>1))
+#define S8_MIN         ((s8)(-S8_MAX - 1))
+#define U16_MAX                ((u16)~0U)
+#define S16_MAX                ((s16)(U16_MAX>>1))
+#define S16_MIN                ((s16)(-S16_MAX - 1))
+#define U32_MAX                ((u32)~0U)
+#define S32_MAX                ((s32)(U32_MAX>>1))
+#define S32_MIN                ((s32)(-S32_MAX - 1))
+#define U64_MAX                ((u64)~0ULL)
+#define S64_MAX                ((s64)(U64_MAX>>1))
+#define S64_MIN                ((s64)(-S64_MAX - 1))
+
+#define ALIGN(x, a)    __ALIGN_MASK(x, (typeof(x))(a)-1)
+#define __ALIGN_MASK(x, mask)  (((x)+(mask))&~(mask))
+
+#define PTR_ALIGN(p, a)                ((typeof(p))ALIGN((unsigned long)(p), (a)))
+#define IS_ALIGNED(x, a)               (((x) & ((typeof(x))(a) - 1)) == 0)
+
+#define __must_be_array(a)     BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
+
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#ifndef container_of
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr:       the pointer to the member.
+ * @type:      the type of the container struct this is embedded in.
+ * @member:    the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({                     \
+       const typeof(((type *)0)->member) * __mptr = (ptr);     \
+       (type *)((char *)__mptr - offsetof(type, member)); })
+#endif
+
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
+#define roundup(x, y)                                  \
+({                                                     \
+       const typeof(y) __y = y;                        \
+       (((x) + (__y - 1)) / __y) * __y;                \
+})
+
+#define max(x, y) ({                           \
+       typeof(x) _max1 = (x);                  \
+       typeof(y) _max2 = (y);                  \
+       (void) (&_max1 == &_max2);              \
+       _max1 > _max2 ? _max1 : _max2; })
+
+#define min(x, y) ({                           \
+       typeof(x) _min1 = (x);                  \
+       typeof(y) _min2 = (y);                  \
+       (void) (&_min1 == &_min2);              \
+       _min1 < _min2 ? _min1 : _min2; })
+
+#define min_t(type, x, y) ({                   \
+       type __min1 = (x);                      \
+       type __min2 = (y);                      \
+       __min1 < __min2 ? __min1: __min2; })
+
+#define max_t(type, x, y) ({                   \
+       type __max1 = (x);                      \
+       type __max2 = (y);                      \
+       __max1 > __max2 ? __max1: __max2; })
+
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
+
+#define swap(a, b) \
+       do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+/* This counts to 12. Any more, it will return 13th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+#define _RET_IP_               (unsigned long)__builtin_return_address(0)
+#define _THIS_IP_  ({ __label__ __here; __here: (unsigned long)&&__here; })
+
+#define might_sleep()
+
+#define cpu_relax()            do {} while (0)
+#define cpu_relax_lowlatency() do {} while (0)
+
+#define panic(fmt, ...)                                        \
+do {                                                   \
+       printf(fmt, ##__VA_ARGS__);                     \
+       BUG();                                          \
+} while (0)
+
+int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
+int __must_check _kstrtol(const char *s, unsigned int base, long *res);
+
+int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res);
+int __must_check kstrtoll(const char *s, unsigned int base, long long *res);
+
+/**
+ * kstrtoul - convert a string to an unsigned long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+*/
+static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+       /*
+        * We want to shortcut function call, but
+        * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0.
+        */
+       if (sizeof(unsigned long) == sizeof(unsigned long long) &&
+           __alignof__(unsigned long) == __alignof__(unsigned long long))
+               return kstrtoull(s, base, (unsigned long long *)res);
+       else
+               return _kstrtoul(s, base, res);
+}
+
+/**
+ * kstrtol - convert a string to a long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+static inline int __must_check kstrtol(const char *s, unsigned int base, long *res)
+{
+       /*
+        * We want to shortcut function call, but
+        * __builtin_types_compatible_p(long, long long) = 0.
+        */
+       if (sizeof(long) == sizeof(long long) &&
+           __alignof__(long) == __alignof__(long long))
+               return kstrtoll(s, base, (long long *)res);
+       else
+               return _kstrtol(s, base, res);
+}
+
+int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res);
+int __must_check kstrtoint(const char *s, unsigned int base, int *res);
+
+static inline int __must_check kstrtou64(const char *s, unsigned int base, u64 *res)
+{
+       return kstrtoull(s, base, res);
+}
+
+static inline int __must_check kstrtos64(const char *s, unsigned int base, s64 *res)
+{
+       return kstrtoll(s, base, res);
+}
+
+static inline int __must_check kstrtou32(const char *s, unsigned int base, u32 *res)
+{
+       return kstrtouint(s, base, res);
+}
+
+static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *res)
+{
+       return kstrtoint(s, base, res);
+}
+
+/* The hash is always the low bits of hash_len */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ #define HASH_LEN_DECLARE u32 hash; u32 len
+#else
+ #define HASH_LEN_DECLARE u32 len; u32 hash
+#endif
+
+struct qstr {
+       union {
+               struct {
+                       HASH_LEN_DECLARE;
+               };
+               u64 hash_len;
+       };
+       const unsigned char *name;
+};
+
+#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
+
+#define POISON_FREE 0x6b
+
+#endif
diff --git a/include/linux/key.h b/include/linux/key.h
new file mode 100644 (file)
index 0000000..cc6859a
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef _LINUX_KEY_H
+#define _LINUX_KEY_H
+
+#include <linux/types.h>
+#include <linux/atomic.h>
+#include <keyutils.h>
+
+struct user_key_payload {
+       size_t          datalen;        /* length of this data */
+       char            data[0];        /* actual data */
+};
+
+struct key {
+       atomic_t                usage;          /* number of references */
+       key_serial_t            serial;         /* key serial number */
+       struct rw_semaphore     sem;            /* change vs change sem */
+       struct user_key_payload payload;
+};
+
+static inline const struct user_key_payload *user_key_payload(const struct key *key)
+{
+       return &key->payload;
+}
+
+static inline void key_put(struct key *key)
+{
+       if (atomic_dec_and_test(&key->usage))
+               free(key);
+}
+
+static inline struct key *__key_get(struct key *key)
+{
+       atomic_inc(&key->usage);
+       return key;
+}
+
+static inline struct key *key_get(struct key *key)
+{
+       return key ? __key_get(key) : key;
+}
+
+#endif /* _LINUX_KEY_H */
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
new file mode 100644 (file)
index 0000000..d524178
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+ * kobject.h - generic kernel object infrastructure.
+ *
+ * Copyright (c) 2002-2003 Patrick Mochel
+ * Copyright (c) 2002-2003 Open Source Development Labs
+ * Copyright (c) 2006-2008 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (c) 2006-2008 Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ *
+ * Please read Documentation/kobject.txt before using the kobject
+ * interface, ESPECIALLY the parts about reference counts and object
+ * destructors.
+ */
+
+#ifndef _KOBJECT_H_
+#define _KOBJECT_H_
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+struct kset;
+
+struct kobj_type {
+       void (*release)(struct kobject *kobj);
+       const struct sysfs_ops *sysfs_ops;
+       struct attribute **default_attrs;
+       const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
+       const void *(*namespace)(struct kobject *kobj);
+};
+
+struct kobj_uevent_env {
+};
+
+struct kobj_attribute {
+       struct attribute attr;
+       ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,
+                       char *buf);
+       ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr,
+                        const char *buf, size_t count);
+};
+
+struct kobject {
+       struct kobject          *parent;
+       struct kset             *kset;
+       struct kobj_type        *ktype;
+       struct kernfs_node      *sd; /* sysfs directory entry */
+       atomic_t                ref;
+       unsigned int state_initialized:1;
+       unsigned int state_in_sysfs:1;
+       unsigned int state_add_uevent_sent:1;
+       unsigned int state_remove_uevent_sent:1;
+       unsigned int uevent_suppress:1;
+};
+
+struct kset {
+       struct kobject          kobj;
+};
+
+#define kobject_add(...)       0
+
+static inline void kobject_init(struct kobject *kobj, struct kobj_type *ktype)
+{
+       memset(kobj, 0, sizeof(*kobj));
+
+       atomic_set(&kobj->ref, 1);
+       kobj->ktype = ktype;
+       kobj->state_initialized = 1;
+}
+
+static inline void kobject_del(struct kobject *kobj);
+
+static inline void kobject_cleanup(struct kobject *kobj)
+{
+       struct kobj_type *t = kobj->ktype;
+
+       /* remove from sysfs if the caller did not do it */
+       if (kobj->state_in_sysfs)
+               kobject_del(kobj);
+
+       if (t && t->release)
+               t->release(kobj);
+}
+
+static inline void kobject_put(struct kobject *kobj)
+{
+       BUG_ON(!kobj);
+       BUG_ON(!kobj->state_initialized);
+
+       if (atomic_dec_and_test(&kobj->ref))
+               kobject_cleanup(kobj);
+}
+
+static inline void kobject_del(struct kobject *kobj)
+{
+       struct kernfs_node *sd;
+
+       if (!kobj)
+               return;
+
+       sd = kobj->sd;
+       kobj->state_in_sysfs = 0;
+#if 0
+       kobj_kset_leave(kobj);
+#endif
+       kobject_put(kobj->parent);
+       kobj->parent = NULL;
+}
+
+static inline struct kobject *kobject_get(struct kobject *kobj)
+{
+       BUG_ON(!kobj);
+       BUG_ON(!kobj->state_initialized);
+
+       atomic_inc(&kobj->ref);
+       return kobj;
+}
+
+static inline void kset_unregister(struct kset *kset)
+{
+       kfree(kset);
+}
+
+#define kset_create_and_add(_name, _u, _parent)                                \
+       ((struct kset *) kzalloc(sizeof(struct kset), GFP_KERNEL))
+
+#endif /* _KOBJECT_H_ */
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
new file mode 100644 (file)
index 0000000..3a8cf10
--- /dev/null
@@ -0,0 +1,118 @@
+#ifndef _LINUX_KTHREAD_H
+#define _LINUX_KTHREAD_H
+
+/* Simple interface for creating and stopping kernel threads without mess. */
+#include <linux/err.h>
+#include <linux/lockdep.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+__printf(3, 4)
+struct task_struct *kthread_create(int (*threadfn)(void *data),
+                                  void *data,
+                                  const char namefmt[], ...);
+
+
+struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
+                                         void *data,
+                                         unsigned int cpu,
+                                         const char *namefmt);
+
+/**
+ * kthread_run - create and wake a thread.
+ * @threadfn: the function to run until signal_pending(current).
+ * @data: data ptr for @threadfn.
+ * @namefmt: printf-style name for the thread.
+ *
+ * Description: Convenient wrapper for kthread_create() followed by
+ * wake_up_process().  Returns the kthread or ERR_PTR(-ENOMEM).
+ */
+#define kthread_run(threadfn, data, namefmt, ...)                         \
+({                                                                        \
+       struct task_struct *__k                                            \
+               = kthread_create(threadfn, data, namefmt, ## __VA_ARGS__); \
+       if (!IS_ERR(__k))                                                  \
+               wake_up_process(__k);                                      \
+       __k;                                                               \
+})
+
+int kthread_stop(struct task_struct *k);
+bool kthread_should_stop(void);
+bool kthread_should_park(void);
+bool kthread_freezable_should_stop(bool *was_frozen);
+void *kthread_data(struct task_struct *k);
+void *probe_kthread_data(struct task_struct *k);
+int kthread_park(struct task_struct *k);
+void kthread_unpark(struct task_struct *k);
+void kthread_parkme(void);
+
+int kthreadd(void *unused);
+extern struct task_struct *kthreadd_task;
+extern int tsk_fork_get_node(struct task_struct *tsk);
+
+/*
+ * Simple work processor based on kthread.
+ *
+ * This provides easier way to make use of kthreads.  A kthread_work
+ * can be queued and flushed using queue/flush_kthread_work()
+ * respectively.  Queued kthread_works are processed by a kthread
+ * running kthread_worker_fn().
+ */
+struct kthread_work;
+typedef void (*kthread_work_func_t)(struct kthread_work *work);
+
+struct kthread_worker {
+       spinlock_t              lock;
+       struct list_head        work_list;
+       struct task_struct      *task;
+       struct kthread_work     *current_work;
+};
+
+struct kthread_work {
+       struct list_head        node;
+       kthread_work_func_t     func;
+       struct kthread_worker   *worker;
+};
+
+#define KTHREAD_WORKER_INIT(worker)    {                               \
+       .lock = __SPIN_LOCK_UNLOCKED((worker).lock),                    \
+       .work_list = LIST_HEAD_INIT((worker).work_list),                \
+       }
+
+#define KTHREAD_WORK_INIT(work, fn)    {                               \
+       .node = LIST_HEAD_INIT((work).node),                            \
+       .func = (fn),                                                   \
+       }
+
+#define DEFINE_KTHREAD_WORKER(worker)                                  \
+       struct kthread_worker worker = KTHREAD_WORKER_INIT(worker)
+
+#define DEFINE_KTHREAD_WORK(work, fn)                                  \
+       struct kthread_work work = KTHREAD_WORK_INIT(work, fn)
+
+#define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker)
+
+extern void __init_kthread_worker(struct kthread_worker *worker,
+                       const char *name, struct lock_class_key *key);
+
+#define init_kthread_worker(worker)                                    \
+       do {                                                            \
+               static struct lock_class_key __key;                     \
+               __init_kthread_worker((worker), "("#worker")->lock", &__key); \
+       } while (0)
+
+#define init_kthread_work(work, fn)                                    \
+       do {                                                            \
+               memset((work), 0, sizeof(struct kthread_work));         \
+               INIT_LIST_HEAD(&(work)->node);                          \
+               (work)->func = (fn);                                    \
+       } while (0)
+
+int kthread_worker_fn(void *worker_ptr);
+
+bool queue_kthread_work(struct kthread_worker *worker,
+                       struct kthread_work *work);
+void flush_kthread_work(struct kthread_work *work);
+void flush_kthread_worker(struct kthread_worker *worker);
+
+#endif /* _LINUX_KTHREAD_H */
diff --git a/include/linux/list.h b/include/linux/list.h
new file mode 100644 (file)
index 0000000..4a31709
--- /dev/null
@@ -0,0 +1,64 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+#include <urcu/list.h>
+
+#define list_head                      cds_list_head
+#define LIST_HEAD_INIT(l)              CDS_LIST_HEAD_INIT(l)
+#define LIST_HEAD(l)                   CDS_LIST_HEAD(l)
+#define INIT_LIST_HEAD(l)              CDS_INIT_LIST_HEAD(l)
+#define list_add(n, h)                 cds_list_add(n, h)
+#define list_add_tail(n, h)            cds_list_add_tail(n, h)
+#define __list_del_entry(l)            cds_list_del(l)
+#define list_del(l)                    cds_list_del(l)
+#define list_del_init(l)               cds_list_del_init(l)
+#define list_replace(o, n)             cds_list_replace(o, n)
+#define list_replace_init(o, n)                cds_list_replace_init(o, n)
+#define list_move(l, h)                        cds_list_move(l, h)
+#define list_empty(l)                  cds_list_empty(l)
+#define list_splice(l, h)              cds_list_splice(l, h)
+#define list_entry(p, t, m)            cds_list_entry(p, t, m)
+#define list_first_entry(p, t, m)      cds_list_first_entry(p, t, m)
+#define list_for_each(p, h)            cds_list_for_each(p, h)
+#define list_for_each_prev(p, h)       cds_list_for_each_prev(p, h)
+#define list_for_each_safe(p, n, h)    cds_list_for_each_safe(p, n, h)
+#define list_for_each_prev_safe(p, n, h) cds_list_for_each_prev_safe(p, n, h)
+#define list_for_each_entry(p, h, m)   cds_list_for_each_entry(p, h, m)
+#define list_for_each_entry_reverse(p, h, m) cds_list_for_each_entry_reverse(p, h, m)
+#define list_for_each_entry_safe(p, n, h, m) cds_list_for_each_entry_safe(p, n, h, m)
+#define list_for_each_entry_safe_reverse(p, n, h, m) cds_list_for_each_entry_safe_reverse(p, n, h, m)
+
+static inline int list_empty_careful(const struct list_head *head)
+{
+       struct list_head *next = head->next;
+       return (next == head) && (next == head->prev);
+}
+
+static inline void list_move_tail(struct list_head *list,
+                                 struct list_head *head)
+{
+       list_del(list);
+       list_add_tail(list, head);
+}
+
+static inline void list_splice_init(struct list_head *list,
+                                   struct list_head *head)
+{
+       list_splice(list, head);
+       INIT_LIST_HEAD(list);
+}
+
+#define list_last_entry(ptr, type, member) \
+       list_entry((ptr)->prev, type, member)
+
+#define list_first_entry_or_null(ptr, type, member) \
+       (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
+
+/* hlists: */
+
+#include <urcu/hlist.h>
+
+#define hlist_head                     cds_hlist_head
+#define hlist_node                     cds_hlist_node
+
+#endif /* _LIST_LIST_H */
diff --git a/include/linux/llist.h b/include/linux/llist.h
new file mode 100644 (file)
index 0000000..2e9c721
--- /dev/null
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef LLIST_H
+#define LLIST_H
+/*
+ * Lock-less NULL terminated single linked list
+ *
+ * Cases where locking is not needed:
+ * If there are multiple producers and multiple consumers, llist_add can be
+ * used in producers and llist_del_all can be used in consumers simultaneously
+ * without locking. Also a single consumer can use llist_del_first while
+ * multiple producers simultaneously use llist_add, without any locking.
+ *
+ * Cases where locking is needed:
+ * If we have multiple consumers with llist_del_first used in one consumer, and
+ * llist_del_first or llist_del_all used in other consumers, then a lock is
+ * needed.  This is because llist_del_first depends on list->first->next not
+ * changing, but without lock protection, there's no way to be sure about that
+ * if a preemption happens in the middle of the delete operation and on being
+ * preempted back, the list->first is the same as before causing the cmpxchg in
+ * llist_del_first to succeed. For example, while a llist_del_first operation
+ * is in progress in one consumer, then a llist_del_first, llist_add,
+ * llist_add (or llist_del_all, llist_add, llist_add) sequence in another
+ * consumer may cause violations.
+ *
+ * This can be summarized as follows:
+ *
+ *           |   add    | del_first |  del_all
+ * add       |    -     |     -     |     -
+ * del_first |          |     L     |     L
+ * del_all   |          |           |     -
+ *
+ * Where, a particular row's operation can happen concurrently with a column's
+ * operation, with "-" being no lock needed, while "L" being lock is needed.
+ *
+ * The list entries deleted via llist_del_all can be traversed with
+ * traversing function such as llist_for_each etc.  But the list
+ * entries can not be traversed safely before deleted from the list.
+ * The order of deleted entries is from the newest to the oldest added
+ * one.  If you want to traverse from the oldest to the newest, you
+ * must reverse the order by yourself before traversing.
+ *
+ * The basic atomic operation of this list is cmpxchg on long.  On
+ * architectures that don't have NMI-safe cmpxchg implementation, the
+ * list can NOT be used in NMI handlers.  So code that uses the list in
+ * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ *
+ * Copyright 2010,2011 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+
+struct llist_head {
+       struct llist_node *first;
+};
+
+struct llist_node {
+       struct llist_node *next;
+};
+
+#define LLIST_HEAD_INIT(name)  { NULL }
+#define LLIST_HEAD(name)       struct llist_head name = LLIST_HEAD_INIT(name)
+
+/**
+ * init_llist_head - initialize lock-less list head
+ * @head:      the head for your lock-less list
+ */
+static inline void init_llist_head(struct llist_head *list)
+{
+       list->first = NULL;
+}
+
+/**
+ * llist_entry - get the struct of this entry
+ * @ptr:       the &struct llist_node pointer.
+ * @type:      the type of the struct this is embedded in.
+ * @member:    the name of the llist_node within the struct.
+ */
+#define llist_entry(ptr, type, member)         \
+       container_of(ptr, type, member)
+
+/**
+ * member_address_is_nonnull - check whether the member address is not NULL
+ * @ptr:       the object pointer (struct type * that contains the llist_node)
+ * @member:    the name of the llist_node within the struct.
+ *
+ * This macro is conceptually the same as
+ *     &ptr->member != NULL
+ * but it works around the fact that compilers can decide that taking a member
+ * address is never a NULL pointer.
+ *
+ * Real objects that start at a high address and have a member at NULL are
+ * unlikely to exist, but such pointers may be returned e.g. by the
+ * container_of() macro.
+ */
+#define member_address_is_nonnull(ptr, member) \
+       ((uintptr_t)(ptr) + offsetof(typeof(*(ptr)), member) != 0)
+
+/**
+ * llist_for_each - iterate over some deleted entries of a lock-less list
+ * @pos:       the &struct llist_node to use as a loop cursor
+ * @node:      the first entry of deleted list entries
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being deleted from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry.  If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each(pos, node)                      \
+       for ((pos) = (node); pos; (pos) = (pos)->next)
+
+/**
+ * llist_for_each_safe - iterate over some deleted entries of a lock-less list
+ *                      safe against removal of list entry
+ * @pos:       the &struct llist_node to use as a loop cursor
+ * @n:         another &struct llist_node to use as temporary storage
+ * @node:      the first entry of deleted list entries
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being deleted from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry.  If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each_safe(pos, n, node)                      \
+       for ((pos) = (node); (pos) && ((n) = (pos)->next, true); (pos) = (n))
+
+/**
+ * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type
+ * @pos:       the type * to use as a loop cursor.
+ * @node:      the fist entry of deleted list entries.
+ * @member:    the name of the llist_node with the struct.
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being removed from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry.  If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each_entry(pos, node, member)                                \
+       for ((pos) = llist_entry((node), typeof(*(pos)), member);       \
+            member_address_is_nonnull(pos, member);                    \
+            (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
+
+/**
+ * llist_for_each_entry_safe - iterate over some deleted entries of lock-less list of given type
+ *                            safe against removal of list entry
+ * @pos:       the type * to use as a loop cursor.
+ * @n:         another type * to use as temporary storage
+ * @node:      the first entry of deleted list entries.
+ * @member:    the name of the llist_node with the struct.
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being removed from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry.  If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each_entry_safe(pos, n, node, member)                               \
+       for (pos = llist_entry((node), typeof(*pos), member);                  \
+            member_address_is_nonnull(pos, member) &&                         \
+               (n = llist_entry(pos->member.next, typeof(*n), member), true); \
+            pos = n)
+
+/**
+ * llist_empty - tests whether a lock-less list is empty
+ * @head:      the list to test
+ *
+ * Not guaranteed to be accurate or up to date.  Just a quick way to
+ * test whether the list is empty without deleting something from the
+ * list.
+ */
+static inline bool llist_empty(const struct llist_head *head)
+{
+       return READ_ONCE(head->first) == NULL;
+}
+
+static inline struct llist_node *llist_next(struct llist_node *node)
+{
+       return node->next;
+}
+
+extern bool llist_add_batch(struct llist_node *new_first,
+                           struct llist_node *new_last,
+                           struct llist_head *head);
+/**
+ * llist_add - add a new entry
+ * @new:       new entry to be added
+ * @head:      the head for your lock-less list
+ *
+ * Returns true if the list was empty prior to adding this entry.
+ */
+static inline bool llist_add(struct llist_node *new, struct llist_head *head)
+{
+       return llist_add_batch(new, new, head);
+}
+
+/**
+ * llist_del_all - delete all entries from lock-less list
+ * @head:      the head of lock-less list to delete all entries
+ *
+ * If list is empty, return NULL, otherwise, delete all entries and
+ * return the pointer to the first entry.  The order of entries
+ * deleted is from the newest to the oldest added one.
+ */
+static inline struct llist_node *llist_del_all(struct llist_head *head)
+{
+       return xchg(&head->first, NULL);
+}
+
+extern struct llist_node *llist_del_first(struct llist_head *head);
+
+struct llist_node *llist_reverse_order(struct llist_node *head);
+
+#endif /* LLIST_H */
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
new file mode 100644 (file)
index 0000000..1a7f024
--- /dev/null
@@ -0,0 +1,55 @@
+#ifndef __TOOLS_LINUX_LOCKDEP_H
+#define __TOOLS_LINUX_LOCKDEP_H
+
+struct lock_class_key {};
+struct task_struct;
+
+# define lock_acquire(l, s, t, r, c, n, i)     do { } while (0)
+# define lock_release(l, i)                    do { } while (0)
+# define lock_set_class(l, n, k, s, i)         do { } while (0)
+# define lock_set_subclass(l, s, i)            do { } while (0)
+# define lockdep_set_current_reclaim_state(g)  do { } while (0)
+# define lockdep_clear_current_reclaim_state() do { } while (0)
+# define lockdep_trace_alloc(g)                        do { } while (0)
+# define lockdep_info()                                do { } while (0)
+# define lockdep_init_map(lock, name, key, sub) \
+               do { (void)(name); (void)(key); } while (0)
+# define lockdep_set_class(lock, key)          do { (void)(key); } while (0)
+# define lockdep_set_class_and_name(lock, key, name) \
+               do { (void)(key); (void)(name); } while (0)
+#define lockdep_set_class_and_subclass(lock, key, sub) \
+               do { (void)(key); } while (0)
+#define lockdep_set_subclass(lock, sub)                do { } while (0)
+
+#define lockdep_set_novalidate_class(lock) do { } while (0)
+
+#define lockdep_assert_held(l)                 do { (void)(l); } while (0)
+#define lockdep_assert_held_once(l)            do { (void)(l); } while (0)
+
+#define lock_acquire_shared(l, s, t, n, i)
+
+#define lockdep_acquire_shared(lock)
+
+#define lock_contended(lockdep_map, ip) do {} while (0)
+#define lock_acquired(lockdep_map, ip) do {} while (0)
+
+static inline void debug_show_all_locks(void)
+{
+}
+
+static inline void debug_show_held_locks(struct task_struct *task)
+{
+}
+
+static inline void
+debug_check_no_locks_freed(const void *from, unsigned long len)
+{
+}
+
+static inline void
+debug_check_no_locks_held(void)
+{
+}
+
+#endif /* __TOOLS_LINUX_LOCKDEP_H */
+
diff --git a/include/linux/log2.h b/include/linux/log2.h
new file mode 100644 (file)
index 0000000..f031ea1
--- /dev/null
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Integer base 2 logarithm calculation
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _LINUX_LOG2_H
+#define _LINUX_LOG2_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+/*
+ * non-constant log of base 2 calculators
+ * - the arch may override these in asm/bitops.h if they can be implemented
+ *   more efficiently than using fls() and fls64()
+ * - the arch is not required to handle n==0 if implementing the fallback
+ */
+#ifndef CONFIG_ARCH_HAS_ILOG2_U32
+static inline __attribute__((const))
+int __ilog2_u32(u32 n)
+{
+       return fls(n) - 1;
+}
+#endif
+
+#ifndef CONFIG_ARCH_HAS_ILOG2_U64
+static inline __attribute__((const))
+int __ilog2_u64(u64 n)
+{
+       return fls64(n) - 1;
+}
+#endif
+
+/**
+ * is_power_of_2() - check if a value is a power of two
+ * @n: the value to check
+ *
+ * Determine whether some value is a power of two, where zero is
+ * *not* considered a power of two.
+ * Return: true if @n is a power of 2, otherwise false.
+ */
+static inline __attribute__((const))
+bool is_power_of_2(unsigned long n)
+{
+       return (n != 0 && ((n & (n - 1)) == 0));
+}
+
+/**
+ * __roundup_pow_of_two() - round up to nearest power of two
+ * @n: value to round up
+ */
+static inline __attribute__((const))
+unsigned long __roundup_pow_of_two(unsigned long n)
+{
+       return 1UL << fls_long(n - 1);
+}
+
+/**
+ * __rounddown_pow_of_two() - round down to nearest power of two
+ * @n: value to round down
+ */
+static inline __attribute__((const))
+unsigned long __rounddown_pow_of_two(unsigned long n)
+{
+       return 1UL << (fls_long(n) - 1);
+}
+
+/**
+ * const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value
+ * @n: parameter
+ *
+ * Use this where sparse expects a true constant expression, e.g. for array
+ * indices.
+ */
+#define const_ilog2(n)                         \
+(                                              \
+       __builtin_constant_p(n) ? (             \
+               (n) < 2 ? 0 :                   \
+               (n) & (1ULL << 63) ? 63 :       \
+               (n) & (1ULL << 62) ? 62 :       \
+               (n) & (1ULL << 61) ? 61 :       \
+               (n) & (1ULL << 60) ? 60 :       \
+               (n) & (1ULL << 59) ? 59 :       \
+               (n) & (1ULL << 58) ? 58 :       \
+               (n) & (1ULL << 57) ? 57 :       \
+               (n) & (1ULL << 56) ? 56 :       \
+               (n) & (1ULL << 55) ? 55 :       \
+               (n) & (1ULL << 54) ? 54 :       \
+               (n) & (1ULL << 53) ? 53 :       \
+               (n) & (1ULL << 52) ? 52 :       \
+               (n) & (1ULL << 51) ? 51 :       \
+               (n) & (1ULL << 50) ? 50 :       \
+               (n) & (1ULL << 49) ? 49 :       \
+               (n) & (1ULL << 48) ? 48 :       \
+               (n) & (1ULL << 47) ? 47 :       \
+               (n) & (1ULL << 46) ? 46 :       \
+               (n) & (1ULL << 45) ? 45 :       \
+               (n) & (1ULL << 44) ? 44 :       \
+               (n) & (1ULL << 43) ? 43 :       \
+               (n) & (1ULL << 42) ? 42 :       \
+               (n) & (1ULL << 41) ? 41 :       \
+               (n) & (1ULL << 40) ? 40 :       \
+               (n) & (1ULL << 39) ? 39 :       \
+               (n) & (1ULL << 38) ? 38 :       \
+               (n) & (1ULL << 37) ? 37 :       \
+               (n) & (1ULL << 36) ? 36 :       \
+               (n) & (1ULL << 35) ? 35 :       \
+               (n) & (1ULL << 34) ? 34 :       \
+               (n) & (1ULL << 33) ? 33 :       \
+               (n) & (1ULL << 32) ? 32 :       \
+               (n) & (1ULL << 31) ? 31 :       \
+               (n) & (1ULL << 30) ? 30 :       \
+               (n) & (1ULL << 29) ? 29 :       \
+               (n) & (1ULL << 28) ? 28 :       \
+               (n) & (1ULL << 27) ? 27 :       \
+               (n) & (1ULL << 26) ? 26 :       \
+               (n) & (1ULL << 25) ? 25 :       \
+               (n) & (1ULL << 24) ? 24 :       \
+               (n) & (1ULL << 23) ? 23 :       \
+               (n) & (1ULL << 22) ? 22 :       \
+               (n) & (1ULL << 21) ? 21 :       \
+               (n) & (1ULL << 20) ? 20 :       \
+               (n) & (1ULL << 19) ? 19 :       \
+               (n) & (1ULL << 18) ? 18 :       \
+               (n) & (1ULL << 17) ? 17 :       \
+               (n) & (1ULL << 16) ? 16 :       \
+               (n) & (1ULL << 15) ? 15 :       \
+               (n) & (1ULL << 14) ? 14 :       \
+               (n) & (1ULL << 13) ? 13 :       \
+               (n) & (1ULL << 12) ? 12 :       \
+               (n) & (1ULL << 11) ? 11 :       \
+               (n) & (1ULL << 10) ? 10 :       \
+               (n) & (1ULL <<  9) ?  9 :       \
+               (n) & (1ULL <<  8) ?  8 :       \
+               (n) & (1ULL <<  7) ?  7 :       \
+               (n) & (1ULL <<  6) ?  6 :       \
+               (n) & (1ULL <<  5) ?  5 :       \
+               (n) & (1ULL <<  4) ?  4 :       \
+               (n) & (1ULL <<  3) ?  3 :       \
+               (n) & (1ULL <<  2) ?  2 :       \
+               1) :                            \
+       -1)
+
+/**
+ * ilog2 - log base 2 of 32-bit or a 64-bit unsigned value
+ * @n: parameter
+ *
+ * constant-capable log of base 2 calculation
+ * - this can be used to initialise global variables from constant data, hence
+ * the massive ternary operator construction
+ *
+ * selects the appropriately-sized optimised version depending on sizeof(n)
+ */
+#define ilog2(n) \
+( \
+       __builtin_constant_p(n) ?       \
+       const_ilog2(n) :                \
+       (sizeof(n) <= 4) ?              \
+       __ilog2_u32(n) :                \
+       __ilog2_u64(n)                  \
+ )
+
+/**
+ * roundup_pow_of_two - round the given value up to nearest power of two
+ * @n: parameter
+ *
+ * round the given value up to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define roundup_pow_of_two(n)                  \
+(                                              \
+       __builtin_constant_p(n) ? (             \
+               (n == 1) ? 1 :                  \
+               (1UL << (ilog2((n) - 1) + 1))   \
+                                  ) :          \
+       __roundup_pow_of_two(n)                 \
+ )
+
+/**
+ * rounddown_pow_of_two - round the given value down to nearest power of two
+ * @n: parameter
+ *
+ * round the given value down to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define rounddown_pow_of_two(n)                        \
+(                                              \
+       __builtin_constant_p(n) ? (             \
+               (1UL << ilog2(n))) :            \
+       __rounddown_pow_of_two(n)               \
+ )
+
+static inline __attribute_const__
+int __order_base_2(unsigned long n)
+{
+       return n > 1 ? ilog2(n - 1) + 1 : 0;
+}
+
+/**
+ * order_base_2 - calculate the (rounded up) base 2 order of the argument
+ * @n: parameter
+ *
+ * The first few values calculated by this routine:
+ *  ob2(0) = 0
+ *  ob2(1) = 0
+ *  ob2(2) = 1
+ *  ob2(3) = 2
+ *  ob2(4) = 2
+ *  ob2(5) = 3
+ *  ... and so on.
+ */
+#define order_base_2(n)                                \
+(                                              \
+       __builtin_constant_p(n) ? (             \
+               ((n) == 0 || (n) == 1) ? 0 :    \
+               ilog2((n) - 1) + 1) :           \
+       __order_base_2(n)                       \
+)
+
+static inline __attribute__((const))
+int __bits_per(unsigned long n)
+{
+       if (n < 2)
+               return 1;
+       if (is_power_of_2(n))
+               return order_base_2(n) + 1;
+       return order_base_2(n);
+}
+
+/**
+ * bits_per - calculate the number of bits required for the argument
+ * @n: parameter
+ *
+ * This is constant-capable and can be used for compile time
+ * initializations, e.g bitfields.
+ *
+ * The first few values calculated by this routine:
+ * bf(0) = 1
+ * bf(1) = 1
+ * bf(2) = 2
+ * bf(3) = 2
+ * bf(4) = 3
+ * ... and so on.
+ */
+#define bits_per(n)                            \
+(                                              \
+       __builtin_constant_p(n) ? (             \
+               ((n) == 0 || (n) == 1)          \
+                       ? 1 : ilog2(n) + 1      \
+       ) :                                     \
+       __bits_per(n)                           \
+)
+
+/**
+ * get_order - Determine the allocation order of a memory size
+ * @size: The size for which to get the order
+ *
+ * Determine the allocation order of a particular sized block of memory.  This
+ * is on a logarithmic scale, where:
+ *
+ *     0 -> 2^0 * PAGE_SIZE and below
+ *     1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
+ *     2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
+ *     3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
+ *     4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
+ *     ...
+ *
+ * The order returned is used to find the smallest allocation granule required
+ * to hold an object of the specified size.
+ *
+ * The result is undefined if the size is 0.
+ */
+static inline __attribute_const__ int get_order(unsigned long size)
+{
+       if (__builtin_constant_p(size)) {
+               if (!size)
+                       return BITS_PER_LONG - PAGE_SHIFT;
+
+               if (size < (1UL << PAGE_SHIFT))
+                       return 0;
+
+               return ilog2((size) - 1) - PAGE_SHIFT + 1;
+       }
+
+       size--;
+       size >>= PAGE_SHIFT;
+#if BITS_PER_LONG == 32
+       return fls(size);
+#else
+       return fls64(size);
+#endif
+}
+
+#endif /* _LINUX_LOG2_H */
diff --git a/include/linux/lz4.h b/include/linux/lz4.h
new file mode 100644 (file)
index 0000000..86e1dde
--- /dev/null
@@ -0,0 +1,5 @@
+#include <lz4.h>
+
+#define LZ4_compress_destSize(src, dst, srclen, dstlen, workspace)     \
+       LZ4_compress_destSize(src, dst, srclen, dstlen)
+#define LZ4_MEM_COMPRESS 0
diff --git a/include/linux/math64.h b/include/linux/math64.h
new file mode 100644 (file)
index 0000000..5eb6f06
--- /dev/null
@@ -0,0 +1,85 @@
+#ifndef _LINUX_MATH64_H
+#define _LINUX_MATH64_H
+
+#include <linux/types.h>
+
+#define do_div(n,base) ({                                      \
+       u32 __base = (base);                                    \
+       u32 __rem;                                              \
+       __rem = ((u64)(n)) % __base;                            \
+       (n) = ((u64)(n)) / __base;                              \
+       __rem;                                                  \
+ })
+
+#define div64_long(x, y) div64_s64((x), (y))
+#define div64_ul(x, y)   div64_u64((x), (y))
+
+/**
+ * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder
+ *
+ * This is commonly provided by 32bit archs to provide an optimized 64bit
+ * divide.
+ */
+static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+       *remainder = dividend % divisor;
+       return dividend / divisor;
+}
+
+/**
+ * div_s64_rem - signed 64bit divide with 32bit divisor with remainder
+ */
+static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
+{
+       *remainder = dividend % divisor;
+       return dividend / divisor;
+}
+
+/**
+ * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
+ */
+static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
+{
+       *remainder = dividend % divisor;
+       return dividend / divisor;
+}
+
+/**
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
+ */
+static inline u64 div64_u64(u64 dividend, u64 divisor)
+{
+       return dividend / divisor;
+}
+
+/**
+ * div64_s64 - signed 64bit divide with 64bit divisor
+ */
+static inline s64 div64_s64(s64 dividend, s64 divisor)
+{
+       return dividend / divisor;
+}
+
+/**
+ * div_u64 - unsigned 64bit divide with 32bit divisor
+ *
+ * This is the most common 64bit divide and should be used if possible,
+ * as many 32bit archs can optimize this variant better than a full 64bit
+ * divide.
+ */
+static inline u64 div_u64(u64 dividend, u32 divisor)
+{
+       u32 remainder;
+       return div_u64_rem(dividend, divisor, &remainder);
+}
+
+/**
+ * div_s64 - signed 64bit divide with 32bit divisor
+ */
+static inline s64 div_s64(s64 dividend, s32 divisor)
+{
+       s32 remainder;
+       return div_s64_rem(dividend, divisor, &remainder);
+}
+
+#endif /* _LINUX_MATH64_H */
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
new file mode 100644 (file)
index 0000000..37d8149
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * memory buffer pool support
+ */
+#ifndef _LINUX_MEMPOOL_H
+#define _LINUX_MEMPOOL_H
+
+#include <linux/compiler.h>
+#include <linux/bug.h>
+#include <linux/slab.h>
+
+struct kmem_cache;
+
+typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data);
+typedef void (mempool_free_t)(void *element, void *pool_data);
+
+typedef struct mempool_s {
+       size_t                  elem_size;
+       void                    *pool_data;
+       mempool_alloc_t         *alloc;
+       mempool_free_t          *free;
+} mempool_t;
+
+static inline bool mempool_initialized(mempool_t *pool)
+{
+       return true;
+}
+
+extern int mempool_resize(mempool_t *pool, int new_min_nr);
+
+static inline void mempool_free(void *element, mempool_t *pool)
+{
+       free(element);
+}
+
+static inline void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc
+{
+       BUG_ON(!pool->elem_size);
+       return kmalloc(pool->elem_size, gfp_mask);
+}
+
+static inline void mempool_exit(mempool_t *pool) {}
+
+static inline void mempool_destroy(mempool_t *pool)
+{
+       free(pool);
+}
+
+static inline int
+mempool_init_slab_pool(mempool_t *pool, int min_nr, struct kmem_cache *kc)
+{
+       pool->elem_size = 0;
+       return 0;
+}
+
+static inline mempool_t *
+mempool_create_slab_pool(int min_nr, struct kmem_cache *kc)
+{
+       mempool_t *pool = malloc(sizeof(*pool));
+       pool->elem_size = 0;
+       return pool;
+}
+
+static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t size)
+{
+       pool->elem_size = size;
+       return 0;
+}
+
+static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order)
+{
+       pool->elem_size = PAGE_SIZE << order;
+       return 0;
+}
+
+static inline int mempool_init(mempool_t *pool, int min_nr,
+                              mempool_alloc_t *alloc_fn,
+                              mempool_free_t *free_fn,
+                              void *pool_data)
+{
+       pool->elem_size = (size_t) pool_data;
+       pool->pool_data = pool_data;
+       pool->alloc     = alloc_fn;
+       pool->free      = free_fn;
+       return 0;
+}
+
+#endif /* _LINUX_MEMPOOL_H */
diff --git a/include/linux/module.h b/include/linux/module.h
new file mode 100644 (file)
index 0000000..42d4e18
--- /dev/null
@@ -0,0 +1,48 @@
+#ifndef _LINUX_MODULE_H
+#define _LINUX_MODULE_H
+
+#include <linux/stat.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+
+struct module;
+
+#define module_init(initfn)                                    \
+       __attribute__((constructor(120)))                       \
+       static void __call_##initfn(void) { BUG_ON(initfn()); }
+
+#if 0
+#define module_exit(exitfn)                                    \
+       __attribute__((destructor(109)))                        \
+       static void __call_##exitfn(void) { exitfn(); }
+#endif
+
+#define module_exit(exitfn)                                    \
+       __attribute__((unused))                                 \
+       static void __call_##exitfn(void) { exitfn(); }
+
+#define MODULE_INFO(tag, info)
+#define MODULE_ALIAS(_alias)
+#define MODULE_SOFTDEP(_softdep)
+#define MODULE_LICENSE(_license)
+#define MODULE_AUTHOR(_author)
+#define MODULE_DESCRIPTION(_description)
+#define MODULE_VERSION(_version)
+
+static inline void __module_get(struct module *module)
+{
+}
+
+static inline int try_module_get(struct module *module)
+{
+       return 1;
+}
+
+static inline void module_put(struct module *module)
+{
+}
+
+#define module_param_named(name, value, type, perm)
+#define MODULE_PARM_DESC(_parm, desc)
+
+#endif /* _LINUX_MODULE_H */
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
new file mode 100644 (file)
index 0000000..801f06e
--- /dev/null
@@ -0,0 +1,18 @@
+#ifndef __TOOLS_LINUX_MUTEX_H
+#define __TOOLS_LINUX_MUTEX_H
+
+#include <pthread.h>
+
+struct mutex {
+       pthread_mutex_t lock;
+};
+
+#define DEFINE_MUTEX(mutexname) \
+       struct mutex mutexname = { .lock = PTHREAD_MUTEX_INITIALIZER }
+
+#define mutex_init(l)          pthread_mutex_init(&(l)->lock, NULL)
+#define mutex_lock(l)          pthread_mutex_lock(&(l)->lock)
+#define mutex_trylock(l)       (!pthread_mutex_trylock(&(l)->lock))
+#define mutex_unlock(l)                pthread_mutex_unlock(&(l)->lock)
+
+#endif /* __TOOLS_LINUX_MUTEX_H */
diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h
new file mode 100644 (file)
index 0000000..bde9f0d
--- /dev/null
@@ -0,0 +1,44 @@
+#ifndef __LINUX_OSQ_LOCK_H
+#define __LINUX_OSQ_LOCK_H
+
+/*
+ * An MCS like lock especially tailored for optimistic spinning for sleeping
+ * lock implementations (mutex, rwsem, etc).
+ */
+struct optimistic_spin_node {
+       struct optimistic_spin_node *next, *prev;
+       int locked; /* 1 if lock acquired */
+       int cpu; /* encoded CPU # + 1 value */
+};
+
+struct optimistic_spin_queue {
+       /*
+        * Stores an encoded value of the CPU # of the tail node in the queue.
+        * If the queue is empty, then it's set to OSQ_UNLOCKED_VAL.
+        */
+       atomic_t tail;
+};
+
+#define OSQ_UNLOCKED_VAL (0)
+
+/* Init macro and function. */
+#define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) }
+
+static inline void osq_lock_init(struct optimistic_spin_queue *lock)
+{
+       atomic_set(&lock->tail, OSQ_UNLOCKED_VAL);
+}
+
+static inline bool osq_lock(struct optimistic_spin_queue *lock)
+{
+       return false;
+}
+
+static inline void osq_unlock(struct optimistic_spin_queue *lock) {}
+
+static inline bool osq_is_locked(struct optimistic_spin_queue *lock)
+{
+       return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL;
+}
+
+#endif
diff --git a/include/linux/page.h b/include/linux/page.h
new file mode 100644 (file)
index 0000000..87be064
--- /dev/null
@@ -0,0 +1,28 @@
+#ifndef _LINUX_PAGE_H
+#define _LINUX_PAGE_H
+
+#include <sys/user.h>
+
+struct page;
+
+#ifndef PAGE_SIZE
+
+#define PAGE_SIZE   4096UL
+#define PAGE_MASK   (~(PAGE_SIZE - 1))
+
+#endif
+
+#define virt_to_page(p)                                                        \
+       ((struct page *) (((unsigned long) (p)) & PAGE_MASK))
+#define offset_in_page(p)              ((unsigned long) (p) & ~PAGE_MASK)
+
+#define page_address(p)                        ((void *) (p))
+
+#define kmap_atomic(page)              page_address(page)
+#define kunmap_atomic(addr)            do {} while (0)
+
+static const char zero_page[PAGE_SIZE];
+
+#define ZERO_PAGE(o)                   ((struct page *) &zero_page[0])
+
+#endif /* _LINUX_PAGE_H */
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
new file mode 100644 (file)
index 0000000..0655056
--- /dev/null
@@ -0,0 +1,180 @@
+#ifndef __TOOLS_LINUX_PERCPU_REFCOUNT_H
+#define __TOOLS_LINUX_PERCPU_REFCOUNT_H
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+
+struct percpu_ref;
+typedef void (percpu_ref_func_t)(struct percpu_ref *);
+
+/* flags set in the lower bits of percpu_ref->percpu_count_ptr */
+enum {
+       __PERCPU_REF_ATOMIC     = 1LU << 0,     /* operating in atomic mode */
+       __PERCPU_REF_DEAD       = 1LU << 1,     /* (being) killed */
+       __PERCPU_REF_ATOMIC_DEAD = __PERCPU_REF_ATOMIC | __PERCPU_REF_DEAD,
+
+       __PERCPU_REF_FLAG_BITS  = 2,
+};
+
+/* @flags for percpu_ref_init() */
+enum {
+       PERCPU_REF_INIT_ATOMIC  = 1 << 0,
+
+       /*
+        * Start dead w/ ref == 0 in atomic mode.  Must be revived with
+        * percpu_ref_reinit() before used.  Implies INIT_ATOMIC.
+        */
+       PERCPU_REF_INIT_DEAD    = 1 << 1,
+};
+
+struct percpu_ref {
+       atomic_long_t           count;
+       percpu_ref_func_t       *release;
+       percpu_ref_func_t       *confirm_switch;
+};
+
+static inline void percpu_ref_exit(struct percpu_ref *ref) {}
+
+static inline int __must_check percpu_ref_init(struct percpu_ref *ref,
+                                percpu_ref_func_t *release, unsigned int flags,
+                                gfp_t gfp)
+{
+       unsigned long start_count = 0;
+
+       if (!(flags & PERCPU_REF_INIT_DEAD))
+               start_count++;
+
+       atomic_long_set(&ref->count, start_count);
+
+       ref->release = release;
+       return 0;
+}
+
+/**
+ * percpu_ref_get_many - increment a percpu refcount
+ * @ref: percpu_ref to get
+ * @nr: number of references to get
+ *
+ * Analogous to atomic_long_add().
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
+{
+       atomic_long_add(nr, &ref->count);
+}
+
+/**
+ * percpu_ref_get - increment a percpu refcount
+ * @ref: percpu_ref to get
+ *
+ * Analagous to atomic_long_inc().
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_get(struct percpu_ref *ref)
+{
+       percpu_ref_get_many(ref, 1);
+}
+
+/**
+ * percpu_ref_tryget - try to increment a percpu refcount
+ * @ref: percpu_ref to try-get
+ *
+ * Increment a percpu refcount unless its count already reached zero.
+ * Returns %true on success; %false on failure.
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline bool percpu_ref_tryget(struct percpu_ref *ref)
+{
+       return atomic_long_inc_not_zero(&ref->count);
+}
+
+/**
+ * percpu_ref_tryget_live - try to increment a live percpu refcount
+ * @ref: percpu_ref to try-get
+ *
+ * Increment a percpu refcount unless it has already been killed.  Returns
+ * %true on success; %false on failure.
+ *
+ * Completion of percpu_ref_kill() in itself doesn't guarantee that this
+ * function will fail.  For such guarantee, percpu_ref_kill_and_confirm()
+ * should be used.  After the confirm_kill callback is invoked, it's
+ * guaranteed that no new reference will be given out by
+ * percpu_ref_tryget_live().
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
+{
+       return atomic_long_inc_not_zero(&ref->count);
+}
+
+/**
+ * percpu_ref_put_many - decrement a percpu refcount
+ * @ref: percpu_ref to put
+ * @nr: number of references to put
+ *
+ * Decrement the refcount, and if 0, call the release function (which was passed
+ * to percpu_ref_init())
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
+{
+       if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
+               ref->release(ref);
+}
+
+/**
+ * percpu_ref_put - decrement a percpu refcount
+ * @ref: percpu_ref to put
+ *
+ * Decrement the refcount, and if 0, call the release function (which was passed
+ * to percpu_ref_init())
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_put(struct percpu_ref *ref)
+{
+       percpu_ref_put_many(ref, 1);
+}
+
+static inline void percpu_ref_reinit(struct percpu_ref *ref)
+{
+       percpu_ref_get(ref);
+}
+
+/**
+ * percpu_ref_kill - drop the initial ref
+ * @ref: percpu_ref to kill
+ *
+ * Must be used to drop the initial ref on a percpu refcount; must be called
+ * precisely once before shutdown.
+ */
+static inline void percpu_ref_kill(struct percpu_ref *ref)
+{
+       percpu_ref_put(ref);
+}
+
+/**
+ * percpu_ref_is_zero - test whether a percpu refcount reached zero
+ * @ref: percpu_ref to test
+ *
+ * Returns %true if @ref reached zero.
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
+{
+       return !atomic_long_read(&ref->count);
+}
+
+static inline bool percpu_ref_is_dying(struct percpu_ref *ref)
+{
+       return percpu_ref_is_zero(ref);
+}
+
+#endif /* __TOOLS_LINUX_PERCPU_REFCOUNT_H */
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
new file mode 100644 (file)
index 0000000..153251c
--- /dev/null
@@ -0,0 +1,58 @@
+
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PERCPU_RWSEM_H
+#define _LINUX_PERCPU_RWSEM_H
+
+#include <pthread.h>
+#include <linux/preempt.h>
+
+struct percpu_rw_semaphore {
+       pthread_mutex_t         lock;
+};
+
+static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem)
+{
+       pthread_mutex_lock(&sem->lock);
+}
+
+static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+{
+       pthread_mutex_lock(&sem->lock);
+}
+
+static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
+{
+       return !pthread_mutex_trylock(&sem->lock);
+}
+
+static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem)
+{
+       pthread_mutex_unlock(&sem->lock);
+}
+
+static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
+{
+       pthread_mutex_unlock(&sem->lock);
+}
+
+static inline void percpu_down_write(struct percpu_rw_semaphore *sem)
+{
+       pthread_mutex_lock(&sem->lock);
+}
+
+static inline void percpu_up_write(struct percpu_rw_semaphore *sem)
+{
+       pthread_mutex_unlock(&sem->lock);
+}
+
+static inline void percpu_free_rwsem(struct percpu_rw_semaphore *sem) {}
+
+static inline int percpu_init_rwsem(struct percpu_rw_semaphore *sem)
+{
+       pthread_mutex_init(&sem->lock, NULL);
+       return 0;
+}
+
+#define percpu_rwsem_assert_held(sem)          do {} while (0)
+
+#endif
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
new file mode 100644 (file)
index 0000000..740d833
--- /dev/null
@@ -0,0 +1,191 @@
+#ifndef __TOOLS_LINUX_PERCPU_H
+#define __TOOLS_LINUX_PERCPU_H
+
+#include <linux/cpumask.h>
+
+#define __percpu
+
+#define free_percpu(percpu)                            free(percpu)
+
+#define __alloc_percpu_gfp(size, align, gfp)           calloc(1, size)
+#define __alloc_percpu(size, align)                    calloc(1, size)
+
+#define alloc_percpu_gfp(type, gfp)                                    \
+       (typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type),       \
+                                               __alignof__(type), gfp)
+#define alloc_percpu(type)                                             \
+       (typeof(type) __percpu *)__alloc_percpu(sizeof(type),           \
+                                               __alignof__(type))
+
+#define __verify_pcpu_ptr(ptr)
+
+#define per_cpu_ptr(ptr, cpu)  (ptr)
+#define raw_cpu_ptr(ptr)       (ptr)
+#define this_cpu_ptr(ptr)      raw_cpu_ptr(ptr)
+
+#define __pcpu_size_call_return(stem, variable)                                \
+({                                                                     \
+       typeof(variable) pscr_ret__;                                    \
+       __verify_pcpu_ptr(&(variable));                                 \
+       switch(sizeof(variable)) {                                      \
+       case 1: pscr_ret__ = stem##1(variable); break;                  \
+       case 2: pscr_ret__ = stem##2(variable); break;                  \
+       case 4: pscr_ret__ = stem##4(variable); break;                  \
+       case 8: pscr_ret__ = stem##8(variable); break;                  \
+       default:                                                        \
+               __bad_size_call_parameter(); break;                     \
+       }                                                               \
+       pscr_ret__;                                                     \
+})
+
+#define __pcpu_size_call_return2(stem, variable, ...)                  \
+({                                                                     \
+       typeof(variable) pscr2_ret__;                                   \
+       __verify_pcpu_ptr(&(variable));                                 \
+       switch(sizeof(variable)) {                                      \
+       case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break;    \
+       case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break;    \
+       case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break;    \
+       case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break;    \
+       default:                                                        \
+               __bad_size_call_parameter(); break;                     \
+       }                                                               \
+       pscr2_ret__;                                                    \
+})
+
+/*
+ * Special handling for cmpxchg_double.  cmpxchg_double is passed two
+ * percpu variables.  The first has to be aligned to a double word
+ * boundary and the second has to follow directly thereafter.
+ * We enforce this on all architectures even if they don't support
+ * a double cmpxchg instruction, since it's a cheap requirement, and it
+ * avoids breaking the requirement for architectures with the instruction.
+ */
+#define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...)          \
+({                                                                     \
+       bool pdcrb_ret__;                                               \
+       __verify_pcpu_ptr(&(pcp1));                                     \
+       BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2));                     \
+       VM_BUG_ON((unsigned long)(&(pcp1)) % (2 * sizeof(pcp1)));       \
+       VM_BUG_ON((unsigned long)(&(pcp2)) !=                           \
+                 (unsigned long)(&(pcp1)) + sizeof(pcp1));             \
+       switch(sizeof(pcp1)) {                                          \
+       case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break;  \
+       case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break;  \
+       case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break;  \
+       case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break;  \
+       default:                                                        \
+               __bad_size_call_parameter(); break;                     \
+       }                                                               \
+       pdcrb_ret__;                                                    \
+})
+
+#define __pcpu_size_call(stem, variable, ...)                          \
+do {                                                                   \
+       __verify_pcpu_ptr(&(variable));                                 \
+       switch(sizeof(variable)) {                                      \
+               case 1: stem##1(variable, __VA_ARGS__);break;           \
+               case 2: stem##2(variable, __VA_ARGS__);break;           \
+               case 4: stem##4(variable, __VA_ARGS__);break;           \
+               case 8: stem##8(variable, __VA_ARGS__);break;           \
+               default:                                                \
+                       __bad_size_call_parameter();break;              \
+       }                                                               \
+} while (0)
+
+#define raw_cpu_read(pcp)              __pcpu_size_call_return(raw_cpu_read_, pcp)
+#define raw_cpu_write(pcp, val)                __pcpu_size_call(raw_cpu_write_, pcp, val)
+#define raw_cpu_add(pcp, val)          __pcpu_size_call(raw_cpu_add_, pcp, val)
+#define raw_cpu_and(pcp, val)          __pcpu_size_call(raw_cpu_and_, pcp, val)
+#define raw_cpu_or(pcp, val)           __pcpu_size_call(raw_cpu_or_, pcp, val)
+#define raw_cpu_add_return(pcp, val)   __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)
+#define raw_cpu_xchg(pcp, nval)                __pcpu_size_call_return2(raw_cpu_xchg_, pcp, nval)
+#define raw_cpu_cmpxchg(pcp, oval, nval) \
+       __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
+#define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+       __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
+
+#define raw_cpu_sub(pcp, val)          raw_cpu_add(pcp, -(val))
+#define raw_cpu_inc(pcp)               raw_cpu_add(pcp, 1)
+#define raw_cpu_dec(pcp)               raw_cpu_sub(pcp, 1)
+#define raw_cpu_sub_return(pcp, val)   raw_cpu_add_return(pcp, -(typeof(pcp))(val))
+#define raw_cpu_inc_return(pcp)                raw_cpu_add_return(pcp, 1)
+#define raw_cpu_dec_return(pcp)                raw_cpu_add_return(pcp, -1)
+
+#define __this_cpu_read(pcp)                                           \
+({                                                                     \
+       raw_cpu_read(pcp);                                              \
+})
+
+#define __this_cpu_write(pcp, val)                                     \
+({                                                                     \
+       raw_cpu_write(pcp, val);                                        \
+})
+
+#define __this_cpu_add(pcp, val)                                       \
+({                                                                     \
+       raw_cpu_add(pcp, val);                                          \
+})
+
+#define __this_cpu_and(pcp, val)                                       \
+({                                                                     \
+       raw_cpu_and(pcp, val);                                          \
+})
+
+#define __this_cpu_or(pcp, val)                                                \
+({                                                                     \
+       raw_cpu_or(pcp, val);                                           \
+})
+
+#define __this_cpu_add_return(pcp, val)                                        \
+({                                                                     \
+       raw_cpu_add_return(pcp, val);                                   \
+})
+
+#define __this_cpu_xchg(pcp, nval)                                     \
+({                                                                     \
+       raw_cpu_xchg(pcp, nval);                                        \
+})
+
+#define __this_cpu_cmpxchg(pcp, oval, nval)                            \
+({                                                                     \
+       raw_cpu_cmpxchg(pcp, oval, nval);                               \
+})
+
+#define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+       raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2); \
+})
+
+#define __this_cpu_sub(pcp, val)       __this_cpu_add(pcp, -(typeof(pcp))(val))
+#define __this_cpu_inc(pcp)            __this_cpu_add(pcp, 1)
+#define __this_cpu_dec(pcp)            __this_cpu_sub(pcp, 1)
+#define __this_cpu_sub_return(pcp, val)        __this_cpu_add_return(pcp, -(typeof(pcp))(val))
+#define __this_cpu_inc_return(pcp)     __this_cpu_add_return(pcp, 1)
+#define __this_cpu_dec_return(pcp)     __this_cpu_add_return(pcp, -1)
+
+#define this_cpu_read(pcp)             ((pcp))
+#define this_cpu_write(pcp, val)       ((pcp) = val)
+#define this_cpu_add(pcp, val)         ((pcp) += val)
+#define this_cpu_and(pcp, val)         ((pcp) &= val)
+#define this_cpu_or(pcp, val)          ((pcp) |= val)
+#define this_cpu_add_return(pcp, val)  ((pcp) += val)
+#define this_cpu_xchg(pcp, nval)                                       \
+({                                                                     \
+       typeof(pcp) _r = (pcp);                                         \
+       (pcp) = (nval);                                                 \
+       _r;                                                             \
+})
+
+#define this_cpu_cmpxchg(pcp, oval, nval) \
+       __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
+#define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+       __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
+
+#define this_cpu_sub(pcp, val)         this_cpu_add(pcp, -(typeof(pcp))(val))
+#define this_cpu_inc(pcp)              this_cpu_add(pcp, 1)
+#define this_cpu_dec(pcp)              this_cpu_sub(pcp, 1)
+#define this_cpu_sub_return(pcp, val)  this_cpu_add_return(pcp, -(typeof(pcp))(val))
+#define this_cpu_inc_return(pcp)       this_cpu_add_return(pcp, 1)
+#define this_cpu_dec_return(pcp)       this_cpu_add_return(pcp, -1)
+
+#endif /* __TOOLS_LINUX_PERCPU_H */
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
new file mode 100644 (file)
index 0000000..1d21bfe
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+  File: linux/posix_acl.h
+
+  (C) 2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
+*/
+
+
+#ifndef __LINUX_POSIX_ACL_H
+#define __LINUX_POSIX_ACL_H
+
+#include <linux/bug.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+
+#define ACL_UNDEFINED_ID       (-1)
+
+/* a_type field in acl_user_posix_entry_t */
+#define ACL_TYPE_ACCESS                (0x8000)
+#define ACL_TYPE_DEFAULT       (0x4000)
+
+/* e_tag entry in struct posix_acl_entry */
+#define ACL_USER_OBJ           (0x01)
+#define ACL_USER               (0x02)
+#define ACL_GROUP_OBJ          (0x04)
+#define ACL_GROUP              (0x08)
+#define ACL_MASK               (0x10)
+#define ACL_OTHER              (0x20)
+
+/* permissions in the e_perm field */
+#define ACL_READ               (0x04)
+#define ACL_WRITE              (0x02)
+#define ACL_EXECUTE            (0x01)
+
+struct posix_acl_entry {
+       short                   e_tag;
+       unsigned short          e_perm;
+       union {
+               uid_t           e_uid;
+               gid_t           e_gid;
+       };
+};
+
+struct posix_acl {
+       struct rcu_head         a_rcu;
+       unsigned int            a_count;
+       struct posix_acl_entry  a_entries[0];
+};
+
+#endif  /* __LINUX_POSIX_ACL_H */
diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h
new file mode 100644 (file)
index 0000000..65beeb1
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+  File: linux/posix_acl_xattr.h
+
+  Extended attribute system call representation of Access Control Lists.
+
+  Copyright (C) 2000 by Andreas Gruenbacher <a.gruenbacher@computer.org>
+  Copyright (C) 2002 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
+ */
+#ifndef _POSIX_ACL_XATTR_H
+#define _POSIX_ACL_XATTR_H
+
+#include <uapi/linux/xattr.h>
+
+/* Supported ACL a_version fields */
+#define POSIX_ACL_XATTR_VERSION        0x0002
+
+/* An undefined entry e_id value */
+#define ACL_UNDEFINED_ID       (-1)
+
+typedef struct {
+       __le16                  e_tag;
+       __le16                  e_perm;
+       __le32                  e_id;
+} posix_acl_xattr_entry;
+
+typedef struct {
+       __le32                  a_version;
+       posix_acl_xattr_entry   a_entries[0];
+} posix_acl_xattr_header;
+
+extern const struct xattr_handler posix_acl_access_xattr_handler;
+extern const struct xattr_handler posix_acl_default_xattr_handler;
+
+#endif /* _POSIX_ACL_XATTR_H */
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
new file mode 100644 (file)
index 0000000..dbc7c24
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef __LINUX_PREEMPT_H
+#define __LINUX_PREEMPT_H
+
+extern void preempt_disable(void);
+extern void preempt_enable(void);
+
+#define sched_preempt_enable_no_resched()      preempt_enable()
+#define preempt_enable_no_resched()            preempt_enable()
+#define preempt_check_resched()                        do { } while (0)
+
+#define preempt_disable_notrace()              preempt_disable()
+#define preempt_enable_no_resched_notrace()    preempt_enable()
+#define preempt_enable_notrace()               preempt_enable()
+#define preemptible()                          0
+
+#endif /* __LINUX_PREEMPT_H */
diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h
new file mode 100644 (file)
index 0000000..13cb826
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _LINUX_PREFETCH_H
+#define _LINUX_PREFETCH_H
+
+#define prefetch(p)    \
+       ({ __maybe_unused typeof(p) __var = (p); })
+
+#endif /* _LINUX_PREFETCH_H */
diff --git a/include/linux/printk.h b/include/linux/printk.h
new file mode 100644 (file)
index 0000000..bc1619f
--- /dev/null
@@ -0,0 +1,210 @@
+#ifndef __TOOLS_LINUX_PRINTK_H
+#define __TOOLS_LINUX_PRINTK_H
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#define KERN_EMERG     ""
+#define KERN_ALERT     ""
+#define KERN_CRIT      ""
+#define KERN_ERR       ""
+#define KERN_WARNING   ""
+#define KERN_NOTICE    ""
+#define KERN_INFO      ""
+#define KERN_DEBUG     ""
+#define KERN_DEFAULT   ""
+#define KERN_CONT      ""
+
+static inline int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+       int i;
+
+       i = vsnprintf(buf, size, fmt, args);
+
+       if (likely(i < size))
+               return i;
+       if (size != 0)
+               return size - 1;
+       return 0;
+}
+
+static inline int scnprintf(char * buf, size_t size, const char * fmt, ...)
+{
+       va_list args;
+       int i;
+
+       va_start(args, fmt);
+       i = vscnprintf(buf, size, fmt, args);
+       va_end(args);
+
+       return i;
+}
+
+#define printk(...)    printf(__VA_ARGS__)
+#define vprintk(...)   vprintf(__VA_ARGS__)
+
+#define no_printk(fmt, ...)                            \
+({                                                     \
+       do {                                            \
+               if (0)                                  \
+                       printk(fmt, ##__VA_ARGS__);     \
+       } while (0);                                    \
+       0;                                              \
+})
+
+#define pr_emerg(fmt, ...) \
+       printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert(fmt, ...) \
+       printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit(fmt, ...) \
+       printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err(fmt, ...) \
+       printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning(fmt, ...) \
+       printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn pr_warning
+#define pr_notice(fmt, ...) \
+       printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info(fmt, ...) \
+       printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+/*
+ * Like KERN_CONT, pr_cont() should only be used when continuing
+ * a line with no newline ('\n') enclosed. Otherwise it defaults
+ * back to KERN_DEFAULT.
+ */
+#define pr_cont(fmt, ...) \
+       printk(KERN_CONT fmt, ##__VA_ARGS__)
+
+/* pr_devel() should produce zero code unless DEBUG is defined */
+#ifdef DEBUG
+#define pr_devel(fmt, ...) \
+       printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_devel(fmt, ...) \
+       no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+
+
+/* If you are writing a driver, please use dev_dbg instead */
+#if defined(CONFIG_DYNAMIC_DEBUG)
+#include <linux/dynamic_debug.h>
+
+/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */
+#define pr_debug(fmt, ...) \
+       dynamic_pr_debug(fmt, ##__VA_ARGS__)
+#elif defined(DEBUG)
+#define pr_debug(fmt, ...) \
+       printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_debug(fmt, ...) \
+       no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+
+/*
+ * Print a one-time message (analogous to WARN_ONCE() et al):
+ */
+
+#define printk_once(fmt, ...)                                  \
+({                                                             \
+       static bool __print_once __read_mostly;                 \
+       bool __ret_print_once = !__print_once;                  \
+                                                               \
+       if (!__print_once) {                                    \
+               __print_once = true;                            \
+               printk(fmt, ##__VA_ARGS__);                     \
+       }                                                       \
+       unlikely(__ret_print_once);                             \
+})
+#define printk_deferred_once(fmt, ...)                         \
+({                                                             \
+       static bool __print_once __read_mostly;                 \
+       bool __ret_print_once = !__print_once;                  \
+                                                               \
+       if (!__print_once) {                                    \
+               __print_once = true;                            \
+               printk_deferred(fmt, ##__VA_ARGS__);            \
+       }                                                       \
+       unlikely(__ret_print_once);                             \
+})
+
+#define pr_emerg_once(fmt, ...)                                        \
+       printk_once(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert_once(fmt, ...)                                        \
+       printk_once(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit_once(fmt, ...)                                 \
+       printk_once(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err_once(fmt, ...)                                  \
+       printk_once(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn_once(fmt, ...)                                 \
+       printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_notice_once(fmt, ...)                               \
+       printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info_once(fmt, ...)                                 \
+       printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_cont_once(fmt, ...)                                 \
+       printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__)
+
+#if defined(DEBUG)
+#define pr_devel_once(fmt, ...)                                        \
+       printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_devel_once(fmt, ...)                                        \
+       no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+
+/* If you are writing a driver, please use dev_dbg instead */
+#if defined(DEBUG)
+#define pr_debug_once(fmt, ...)                                        \
+       printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_debug_once(fmt, ...)                                        \
+       no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+
+/*
+ * ratelimited messages with local ratelimit_state,
+ * no local ratelimit_state used in the !PRINTK case
+ */
+#ifdef CONFIG_PRINTK
+#define printk_ratelimited(fmt, ...)                                   \
+({                                                                     \
+       static DEFINE_RATELIMIT_STATE(_rs,                              \
+                                     DEFAULT_RATELIMIT_INTERVAL,       \
+                                     DEFAULT_RATELIMIT_BURST);         \
+                                                                       \
+       if (__ratelimit(&_rs))                                          \
+               printk(fmt, ##__VA_ARGS__);                             \
+})
+#else
+#define printk_ratelimited(fmt, ...)                                   \
+       no_printk(fmt, ##__VA_ARGS__)
+#endif
+
+#define pr_emerg_ratelimited(fmt, ...)                                 \
+       printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert_ratelimited(fmt, ...)                                 \
+       printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit_ratelimited(fmt, ...)                                  \
+       printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err_ratelimited(fmt, ...)                                   \
+       printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn_ratelimited(fmt, ...)                                  \
+       printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_notice_ratelimited(fmt, ...)                                        \
+       printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info_ratelimited(fmt, ...)                                  \
+       printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+/* no pr_cont_ratelimited, don't do that... */
+
+#if defined(DEBUG)
+#define pr_devel_ratelimited(fmt, ...)                                 \
+       printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_devel_ratelimited(fmt, ...)                                 \
+       no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+#endif /* __TOOLS_LINUX_PRINTK_H */
diff --git a/include/linux/random.h b/include/linux/random.h
new file mode 100644 (file)
index 0000000..c38ae46
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * include/linux/random.h
+ *
+ * Include file for the random number generator.
+ */
+#ifndef _LINUX_RANDOM_H
+#define _LINUX_RANDOM_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <linux/bug.h>
+
+#ifdef SYS_getrandom
+static inline int getrandom(void *buf, size_t buflen, unsigned int flags)
+{
+        return syscall(SYS_getrandom, buf, buflen, flags);
+}
+#else
+extern int urandom_fd;
+
+static inline int getrandom(void *buf, size_t buflen, unsigned int flags)
+{
+       return read(urandom_fd, buf, buflen);
+}
+#endif
+
+static inline void get_random_bytes(void *buf, int nbytes)
+{
+       BUG_ON(getrandom(buf, nbytes, 0) != nbytes);
+}
+
+static inline void prandom_bytes(void *buf, int nbytes)
+{
+       return get_random_bytes(buf, nbytes);
+}
+
+#define get_random_type(type)                          \
+static inline type get_random_##type(void)             \
+{                                                      \
+       type v;                                         \
+                                                       \
+       get_random_bytes(&v, sizeof(v));                \
+       return v;                                       \
+}
+
+get_random_type(int);
+get_random_type(long);
+get_random_type(u64);
+
+#endif /* _LINUX_RANDOM_H */
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
new file mode 100644 (file)
index 0000000..680181d
--- /dev/null
@@ -0,0 +1,109 @@
+#ifndef _LINUX_RATELIMIT_H
+#define _LINUX_RATELIMIT_H
+
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+#define DEFAULT_RATELIMIT_INTERVAL     (5 * HZ)
+#define DEFAULT_RATELIMIT_BURST                10
+
+/* issue num suppressed message on exit */
+#define RATELIMIT_MSG_ON_RELEASE       1
+
+struct ratelimit_state {
+       raw_spinlock_t  lock;           /* protect the state */
+
+       int             interval;
+       int             burst;
+       int             printed;
+       int             missed;
+       unsigned long   begin;
+       unsigned long   flags;
+};
+
+#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) {                \
+               .lock           = __RAW_SPIN_LOCK_UNLOCKED(name.lock),  \
+               .interval       = interval_init,                        \
+               .burst          = burst_init,                           \
+       }
+
+#define RATELIMIT_STATE_INIT_DISABLED                                  \
+       RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST)
+
+#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init)                \
+                                                                       \
+       struct ratelimit_state name =                                   \
+               RATELIMIT_STATE_INIT(name, interval_init, burst_init)   \
+
+static inline void ratelimit_state_init(struct ratelimit_state *rs,
+                                       int interval, int burst)
+{
+       memset(rs, 0, sizeof(*rs));
+
+       raw_spin_lock_init(&rs->lock);
+       rs->interval    = interval;
+       rs->burst       = burst;
+}
+
+static inline void ratelimit_default_init(struct ratelimit_state *rs)
+{
+       return ratelimit_state_init(rs, DEFAULT_RATELIMIT_INTERVAL,
+                                       DEFAULT_RATELIMIT_BURST);
+}
+
+static inline void ratelimit_state_exit(struct ratelimit_state *rs)
+{
+       if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE))
+               return;
+
+       if (rs->missed) {
+               pr_warn("%s: %d output lines suppressed due to ratelimiting\n",
+                       current->comm, rs->missed);
+               rs->missed = 0;
+       }
+}
+
+static inline void
+ratelimit_set_flags(struct ratelimit_state *rs, unsigned long flags)
+{
+       rs->flags = flags;
+}
+
+extern struct ratelimit_state printk_ratelimit_state;
+
+extern int ___ratelimit(struct ratelimit_state *rs, const char *func);
+#define __ratelimit(state) ___ratelimit(state, __func__)
+
+#ifdef CONFIG_PRINTK
+
+#define WARN_ON_RATELIMIT(condition, state)                    \
+               WARN_ON((condition) && __ratelimit(state))
+
+#define WARN_RATELIMIT(condition, format, ...)                 \
+({                                                             \
+       static DEFINE_RATELIMIT_STATE(_rs,                      \
+                                     DEFAULT_RATELIMIT_INTERVAL,       \
+                                     DEFAULT_RATELIMIT_BURST); \
+       int rtn = !!(condition);                                \
+                                                               \
+       if (unlikely(rtn && __ratelimit(&_rs)))                 \
+               WARN(rtn, format, ##__VA_ARGS__);               \
+                                                               \
+       rtn;                                                    \
+})
+
+#else
+
+#define WARN_ON_RATELIMIT(condition, state)                    \
+       WARN_ON(condition)
+
+#define WARN_RATELIMIT(condition, format, ...)                 \
+({                                                             \
+       int rtn = WARN(condition, format, ##__VA_ARGS__);       \
+       rtn;                                                    \
+})
+
+#endif
+
+#endif /* _LINUX_RATELIMIT_H */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
new file mode 100644 (file)
index 0000000..81df4e1
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef _LINUX_RCULIST_H
+#define _LINUX_RCULIST_H
+
+#include <urcu/rculist.h>
+
+
+#include <urcu/rcuhlist.h>
+
+#define hlist_add_head_rcu             cds_hlist_add_head_rcu
+#define hlist_del_rcu                  cds_hlist_del_rcu
+
+#define hlist_for_each_rcu             cds_hlist_for_each_rcu
+#define hlist_for_each_entry_rcu       cds_hlist_for_each_entry_rcu_2
+
+
+#endif
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
new file mode 100644 (file)
index 0000000..c99d78a
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef __TOOLS_LINUX_RCUPDATE_H
+#define __TOOLS_LINUX_RCUPDATE_H
+
+#include <urcu.h>
+#include <linux/compiler.h>
+
+#define rcu_dereference_check(p, c)    rcu_dereference(p)
+#define rcu_dereference_raw(p)         rcu_dereference(p)
+#define rcu_dereference_protected(p, c)        rcu_dereference(p)
+#define rcu_access_pointer(p)          READ_ONCE(p)
+
+#define kfree_rcu(ptr, rcu_head)       kfree(ptr) /* XXX */
+
+#define RCU_INIT_POINTER(p, v)         WRITE_ONCE(p, v)
+
+#endif /* __TOOLS_LINUX_RCUPDATE_H */
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
new file mode 100644 (file)
index 0000000..8dbe153
--- /dev/null
@@ -0,0 +1,463 @@
+/*
+ * Resizable, Scalable, Concurrent Hash Table
+ *
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
+ * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * Code partially derived from nft_hash
+ * Rewritten with rehash code from br_multicast plus single list
+ * pointer as suggested by Josh Triplett
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _LINUX_RHASHTABLE_H
+#define _LINUX_RHASHTABLE_H
+
+#include <linux/atomic.h>
+#include <linux/cache.h>
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/jhash.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+
+#define RHT_BASE_BITS          4
+#define RHT_HASH_BITS          27
+#define RHT_BASE_SHIFT         RHT_HASH_BITS
+#define RHT_HASH_RESERVED_SPACE        (RHT_BASE_BITS + 1)
+
+struct rhash_head {
+       struct rhash_head __rcu         *next;
+};
+
+struct bucket_table {
+       unsigned int            size;
+       unsigned int            rehash;
+       u32                     hash_rnd;
+       unsigned int            locks_mask;
+       spinlock_t              *locks;
+       struct list_head        walkers;
+       struct rcu_head         rcu;
+
+       struct bucket_table __rcu *future_tbl;
+
+       struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
+};
+
+struct rhashtable_compare_arg {
+       struct rhashtable *ht;
+       const void *key;
+};
+
+typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
+typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
+typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
+                              const void *obj);
+
+struct rhashtable_params {
+       size_t                  nelem_hint;
+       size_t                  key_len;
+       size_t                  key_offset;
+       size_t                  head_offset;
+       unsigned int            insecure_max_entries;
+       unsigned int            max_size;
+       unsigned int            min_size;
+       u32                     nulls_base;
+       bool                    insecure_elasticity;
+       bool                    automatic_shrinking;
+       size_t                  locks_mul;
+       rht_hashfn_t            hashfn;
+       rht_obj_hashfn_t        obj_hashfn;
+       rht_obj_cmpfn_t         obj_cmpfn;
+};
+
+struct rhashtable {
+       struct bucket_table __rcu       *tbl;
+       atomic_t                        nelems;
+       unsigned int                    key_len;
+       unsigned int                    elasticity;
+       struct rhashtable_params        p;
+       struct work_struct              run_work;
+       struct mutex                    mutex;
+       spinlock_t                      lock;
+};
+
+struct rhashtable_walker {
+       struct list_head list;
+       struct bucket_table *tbl;
+};
+
+#define NULLS_MARKER(value) (1UL | (((long)value) << 1))
+
+static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
+{
+       return NULLS_MARKER(ht->p.nulls_base + hash);
+}
+
+#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \
+       ((ptr) = (typeof(ptr)) rht_marker(ht, hash))
+
+static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
+{
+       return ((unsigned long) ptr & 1);
+}
+
+static inline void *rht_obj(const struct rhashtable *ht,
+                           const struct rhash_head *he)
+{
+       return (char *)he - ht->p.head_offset;
+}
+
+static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
+                                           unsigned int hash)
+{
+       return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1);
+}
+
+static inline unsigned int rht_key_hashfn(
+       struct rhashtable *ht, const struct bucket_table *tbl,
+       const void *key, const struct rhashtable_params params)
+{
+       unsigned int hash;
+
+       /* params must be equal to ht->p if it isn't constant. */
+       if (!__builtin_constant_p(params.key_len))
+               hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd);
+       else if (params.key_len) {
+               unsigned int key_len = params.key_len;
+
+               if (params.hashfn)
+                       hash = params.hashfn(key, key_len, tbl->hash_rnd);
+               else if (key_len & (sizeof(u32) - 1))
+                       hash = jhash(key, key_len, tbl->hash_rnd);
+               else
+                       hash = jhash2(key, key_len / sizeof(u32),
+                                     tbl->hash_rnd);
+       } else {
+               unsigned int key_len = ht->p.key_len;
+
+               if (params.hashfn)
+                       hash = params.hashfn(key, key_len, tbl->hash_rnd);
+               else
+                       hash = jhash(key, key_len, tbl->hash_rnd);
+       }
+
+       return rht_bucket_index(tbl, hash);
+}
+
+static inline unsigned int rht_head_hashfn(
+       struct rhashtable *ht, const struct bucket_table *tbl,
+       const struct rhash_head *he, const struct rhashtable_params params)
+{
+       const char *ptr = rht_obj(ht, he);
+
+       return likely(params.obj_hashfn) ?
+              rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
+                                                           ht->p.key_len,
+                                                      tbl->hash_rnd)) :
+              rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
+}
+
+static inline bool rht_grow_above_75(const struct rhashtable *ht,
+                                    const struct bucket_table *tbl)
+{
+       /* Expand table when exceeding 75% load */
+       return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
+              (!ht->p.max_size || tbl->size < ht->p.max_size);
+}
+
+static inline bool rht_shrink_below_30(const struct rhashtable *ht,
+                                      const struct bucket_table *tbl)
+{
+       /* Shrink table beneath 30% load */
+       return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
+              tbl->size > ht->p.min_size;
+}
+
+static inline bool rht_grow_above_100(const struct rhashtable *ht,
+                                     const struct bucket_table *tbl)
+{
+       return atomic_read(&ht->nelems) > tbl->size &&
+               (!ht->p.max_size || tbl->size < ht->p.max_size);
+}
+
+static inline bool rht_grow_above_max(const struct rhashtable *ht,
+                                     const struct bucket_table *tbl)
+{
+       return ht->p.insecure_max_entries &&
+              atomic_read(&ht->nelems) >= ht->p.insecure_max_entries;
+}
+
+static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl,
+                                         unsigned int hash)
+{
+       return &tbl->locks[hash & tbl->locks_mask];
+}
+
+int rhashtable_insert_rehash(struct rhashtable *, struct bucket_table *);
+struct bucket_table *rhashtable_insert_slow(struct rhashtable *,
+                                           const void *,
+                                           struct rhash_head *,
+                                           struct bucket_table *);
+
+int rhashtable_init(struct rhashtable *, const struct rhashtable_params *);
+void rhashtable_destroy(struct rhashtable *);
+
+#define rht_dereference(p, ht)                 rcu_dereference(p)
+#define rht_dereference_rcu(p, ht)             rcu_dereference(p)
+#define rht_dereference_bucket(p, tbl, hash)   rcu_dereference(p)
+#define rht_dereference_bucket_rcu(p, tbl, hash) rcu_dereference(p)
+
+#define rht_entry(tpos, pos, member) \
+       ({ tpos = container_of(pos, typeof(*tpos), member); 1; })
+
+#define rht_for_each_continue(pos, head, tbl, hash) \
+       for (pos = rht_dereference_bucket(head, tbl, hash); \
+            !rht_is_a_nulls(pos); \
+            pos = rht_dereference_bucket((pos)->next, tbl, hash))
+
+#define rht_for_each(pos, tbl, hash) \
+       rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash)
+
+#define rht_for_each_rcu_continue(pos, head, tbl, hash)                        \
+       for (({barrier(); }),                                           \
+            pos = rht_dereference_bucket_rcu(head, tbl, hash);         \
+            !rht_is_a_nulls(pos);                                      \
+            pos = rcu_dereference_raw(pos->next))
+
+#define rht_for_each_rcu(pos, tbl, hash)                               \
+       rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash)
+
+#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \
+       for (({barrier(); }),                                               \
+            pos = rht_dereference_bucket_rcu(head, tbl, hash);             \
+            (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);        \
+            pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
+
+#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)           \
+       rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
+                                       tbl, hash, member)
+
+static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
+                                    const void *obj)
+{
+       struct rhashtable *ht = arg->ht;
+       const char *ptr = obj;
+
+       return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
+}
+
+static inline void *rhashtable_lookup_fast(
+       struct rhashtable *ht, const void *key,
+       const struct rhashtable_params params)
+{
+       struct rhashtable_compare_arg arg = {
+               .ht = ht,
+               .key = key,
+       };
+       const struct bucket_table *tbl;
+       struct rhash_head *he;
+       unsigned int hash;
+
+       rcu_read_lock();
+
+       tbl = rht_dereference_rcu(ht->tbl, ht);
+restart:
+       hash = rht_key_hashfn(ht, tbl, key, params);
+       rht_for_each_rcu(he, tbl, hash) {
+               if (params.obj_cmpfn ?
+                   params.obj_cmpfn(&arg, rht_obj(ht, he)) :
+                   rhashtable_compare(&arg, rht_obj(ht, he)))
+                       continue;
+               rcu_read_unlock();
+               return rht_obj(ht, he);
+       }
+
+       /* Ensure we see any new tables. */
+       smp_rmb();
+
+       tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+       if (unlikely(tbl))
+               goto restart;
+       rcu_read_unlock();
+
+       return NULL;
+}
+
+static inline int __rhashtable_insert_fast(
+       struct rhashtable *ht, const void *key, struct rhash_head *obj,
+       const struct rhashtable_params params)
+{
+       struct rhashtable_compare_arg arg = {
+               .ht = ht,
+               .key = key,
+       };
+       struct bucket_table *tbl, *new_tbl;
+       struct rhash_head *head;
+       spinlock_t *lock;
+       unsigned int elasticity;
+       unsigned int hash;
+       int err;
+
+restart:
+       rcu_read_lock();
+
+       tbl = rht_dereference_rcu(ht->tbl, ht);
+
+       /* All insertions must grab the oldest table containing
+        * the hashed bucket that is yet to be rehashed.
+        */
+       for (;;) {
+               hash = rht_head_hashfn(ht, tbl, obj, params);
+               lock = rht_bucket_lock(tbl, hash);
+               spin_lock_bh(lock);
+
+               if (tbl->rehash <= hash)
+                       break;
+
+               spin_unlock_bh(lock);
+               tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+       }
+
+       new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+       if (unlikely(new_tbl)) {
+               tbl = rhashtable_insert_slow(ht, key, obj, new_tbl);
+               if (!IS_ERR_OR_NULL(tbl))
+                       goto slow_path;
+
+               err = PTR_ERR(tbl);
+               goto out;
+       }
+
+       err = -E2BIG;
+       if (unlikely(rht_grow_above_max(ht, tbl)))
+               goto out;
+
+       if (unlikely(rht_grow_above_100(ht, tbl))) {
+slow_path:
+               spin_unlock_bh(lock);
+               err = rhashtable_insert_rehash(ht, tbl);
+               rcu_read_unlock();
+               if (err)
+                       return err;
+
+               goto restart;
+       }
+
+       err = -EEXIST;
+       elasticity = ht->elasticity;
+       rht_for_each(head, tbl, hash) {
+               if (key &&
+                   unlikely(!(params.obj_cmpfn ?
+                              params.obj_cmpfn(&arg, rht_obj(ht, head)) :
+                              rhashtable_compare(&arg, rht_obj(ht, head)))))
+                       goto out;
+               if (!--elasticity)
+                       goto slow_path;
+       }
+
+       err = 0;
+
+       head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+
+       RCU_INIT_POINTER(obj->next, head);
+
+       rcu_assign_pointer(tbl->buckets[hash], obj);
+
+       atomic_inc(&ht->nelems);
+       if (rht_grow_above_75(ht, tbl))
+               schedule_work(&ht->run_work);
+
+out:
+       spin_unlock_bh(lock);
+       rcu_read_unlock();
+
+       return err;
+}
+
+static inline int rhashtable_lookup_insert_fast(
+       struct rhashtable *ht, struct rhash_head *obj,
+       const struct rhashtable_params params)
+{
+       const char *key = rht_obj(ht, obj);
+
+       BUG_ON(ht->p.obj_hashfn);
+
+       return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj,
+                                       params);
+}
+
+static inline int __rhashtable_remove_fast(
+       struct rhashtable *ht, struct bucket_table *tbl,
+       struct rhash_head *obj, const struct rhashtable_params params)
+{
+       struct rhash_head __rcu **pprev;
+       struct rhash_head *he;
+       spinlock_t * lock;
+       unsigned int hash;
+       int err = -ENOENT;
+
+       hash = rht_head_hashfn(ht, tbl, obj, params);
+       lock = rht_bucket_lock(tbl, hash);
+
+       spin_lock_bh(lock);
+
+       pprev = &tbl->buckets[hash];
+       rht_for_each(he, tbl, hash) {
+               if (he != obj) {
+                       pprev = &he->next;
+                       continue;
+               }
+
+               rcu_assign_pointer(*pprev, obj->next);
+               err = 0;
+               break;
+       }
+
+       spin_unlock_bh(lock);
+
+       return err;
+}
+
+static inline int rhashtable_remove_fast(
+       struct rhashtable *ht, struct rhash_head *obj,
+       const struct rhashtable_params params)
+{
+       struct bucket_table *tbl;
+       int err;
+
+       rcu_read_lock();
+
+       tbl = rht_dereference_rcu(ht->tbl, ht);
+
+       /* Because we have already taken (and released) the bucket
+        * lock in old_tbl, if we find that future_tbl is not yet
+        * visible then that guarantees the entry to still be in
+        * the old tbl if it exists.
+        */
+       while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) &&
+              (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
+               ;
+
+       if (err)
+               goto out;
+
+       atomic_dec(&ht->nelems);
+       if (unlikely(ht->p.automatic_shrinking &&
+                    rht_shrink_below_30(ht, tbl)))
+               schedule_work(&ht->run_work);
+
+out:
+       rcu_read_unlock();
+
+       return err;
+}
+
+#endif /* _LINUX_RHASHTABLE_H */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
new file mode 100644 (file)
index 0000000..9d70e6e
--- /dev/null
@@ -0,0 +1,28 @@
+#ifndef __TOOLS_LINUX_RWSEM_H
+#define __TOOLS_LINUX_RWSEM_H
+
+#include <pthread.h>
+
+struct rw_semaphore {
+       pthread_rwlock_t        lock;
+};
+
+#define __RWSEM_INITIALIZER(name)                              \
+       { .lock = PTHREAD_RWLOCK_INITIALIZER }
+
+#define DECLARE_RWSEM(name) \
+       struct rw_semaphore name = __RWSEM_INITIALIZER(name)
+
+static inline void init_rwsem(struct rw_semaphore *lock)
+{
+       pthread_rwlock_init(&lock->lock, NULL);
+}
+
+#define down_read(l)           pthread_rwlock_rdlock(&(l)->lock)
+#define down_read_trylock(l)   (!pthread_rwlock_tryrdlock(&(l)->lock))
+#define up_read(l)             pthread_rwlock_unlock(&(l)->lock)
+
+#define down_write(l)          pthread_rwlock_wrlock(&(l)->lock)
+#define up_write(l)            pthread_rwlock_unlock(&(l)->lock)
+
+#endif /* __TOOLS_LINUX_RWSEM_H */
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
new file mode 100644 (file)
index 0000000..1e4395c
--- /dev/null
@@ -0,0 +1,109 @@
+#ifndef _LINUX_SCATTERLIST_H
+#define _LINUX_SCATTERLIST_H
+
+#include <linux/bug.h>
+#include <linux/slab.h>
+
+struct scatterlist {
+       unsigned long   page_link;
+       unsigned int    offset;
+       unsigned int    length;
+};
+
+#define sg_is_chain(sg)                ((sg)->page_link & 0x01)
+#define sg_is_last(sg)         ((sg)->page_link & 0x02)
+#define sg_chain_ptr(sg)       \
+       ((struct scatterlist *) ((sg)->page_link & ~0x03))
+
+static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
+{
+       unsigned long page_link = sg->page_link & 0x3;
+
+       /*
+        * In order for the low bit stealing approach to work, pages
+        * must be aligned at a 32-bit boundary as a minimum.
+        */
+       BUG_ON((unsigned long) page & 0x03);
+       sg->page_link = page_link | (unsigned long) page;
+}
+
+static inline void sg_set_page(struct scatterlist *sg, struct page *page,
+                              unsigned int len, unsigned int offset)
+{
+       sg_assign_page(sg, page);
+       sg->offset = offset;
+       sg->length = len;
+}
+
+static inline struct page *sg_page(struct scatterlist *sg)
+{
+       return (struct page *)((sg)->page_link & ~0x3);
+}
+
+static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
+                             unsigned int buflen)
+{
+       sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
+}
+
+static inline struct scatterlist *sg_next(struct scatterlist *sg)
+{
+       if (sg_is_last(sg))
+               return NULL;
+
+       sg++;
+       if (unlikely(sg_is_chain(sg)))
+               sg = sg_chain_ptr(sg);
+
+       return sg;
+}
+
+#define for_each_sg(sglist, sg, nr, __i)       \
+       for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
+
+static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
+                           struct scatterlist *sgl)
+{
+       /*
+        * offset and length are unused for chain entry.  Clear them.
+        */
+       prv[prv_nents - 1].offset = 0;
+       prv[prv_nents - 1].length = 0;
+
+       /*
+        * Set lowest bit to indicate a link pointer, and make sure to clear
+        * the termination bit if it happens to be set.
+        */
+       prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02;
+}
+
+static inline void sg_mark_end(struct scatterlist *sg)
+{
+       sg->page_link |= 0x02;
+       sg->page_link &= ~0x01;
+}
+
+static inline void sg_unmark_end(struct scatterlist *sg)
+{
+       sg->page_link &= ~0x02;
+}
+
+static inline void *sg_virt(struct scatterlist *sg)
+{
+       return page_address(sg_page(sg)) + sg->offset;
+}
+
+static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
+{
+       memset(sgl, 0, sizeof(*sgl) * nents);
+       sg_mark_end(&sgl[nents - 1]);
+}
+
+static inline void sg_init_one(struct scatterlist *sg, const void *buf,
+                              unsigned int buflen)
+{
+       sg_init_table(sg, 1);
+       sg_set_buf(sg, buf, buflen);
+}
+
+#endif /* _LINUX_SCATTERLIST_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
new file mode 100644 (file)
index 0000000..48d20e2
--- /dev/null
@@ -0,0 +1,160 @@
+#ifndef __TOOLS_LINUX_SCHED_H
+#define __TOOLS_LINUX_SCHED_H
+
+#include <pthread.h>
+#include <time.h>
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/completion.h>
+#include <linux/jiffies.h>
+#include <linux/time64.h>
+
+#define TASK_RUNNING           0
+#define TASK_INTERRUPTIBLE     1
+#define TASK_UNINTERRUPTIBLE   2
+#define __TASK_STOPPED         4
+#define __TASK_TRACED          8
+/* in tsk->exit_state */
+#define EXIT_DEAD              16
+#define EXIT_ZOMBIE            32
+#define EXIT_TRACE             (EXIT_ZOMBIE | EXIT_DEAD)
+/* in tsk->state again */
+#define TASK_DEAD              64
+#define TASK_WAKEKILL          128
+#define TASK_WAKING            256
+#define TASK_PARKED            512
+#define TASK_NOLOAD            1024
+#define TASK_NEW               2048
+#define TASK_IDLE_WORKER       4096
+#define TASK_STATE_MAX         8192
+
+/* Convenience macros for the sake of set_task_state */
+#define TASK_KILLABLE          (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
+#define TASK_STOPPED           (TASK_WAKEKILL | __TASK_STOPPED)
+#define TASK_TRACED            (TASK_WAKEKILL | __TASK_TRACED)
+
+#define TASK_IDLE              (TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
+
+/* Convenience macros for the sake of wake_up */
+#define TASK_NORMAL            (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
+#define TASK_ALL               (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
+
+#define TASK_COMM_LEN 16
+
+#define PF_EXITING     0x00000004      /* getting shut down */
+#define PF_EXITPIDONE  0x00000008      /* pi exit done on shut down */
+#define PF_VCPU                0x00000010      /* I'm a virtual CPU */
+#define PF_WQ_WORKER   0x00000020      /* I'm a workqueue worker */
+#define PF_FORKNOEXEC  0x00000040      /* forked but didn't exec */
+#define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
+#define PF_SUPERPRIV   0x00000100      /* used super-user privileges */
+#define PF_DUMPCORE    0x00000200      /* dumped core */
+#define PF_SIGNALED    0x00000400      /* killed by a signal */
+#define PF_MEMALLOC    0x00000800      /* Allocating memory */
+#define PF_NPROC_EXCEEDED 0x00001000   /* set_user noticed that RLIMIT_NPROC was exceeded */
+#define PF_USED_MATH   0x00002000      /* if unset the fpu must be initialized before use */
+#define PF_USED_ASYNC  0x00004000      /* used async_schedule*(), used by module init */
+#define PF_NOFREEZE    0x00008000      /* this thread should not be frozen */
+#define PF_FROZEN      0x00010000      /* frozen for system suspend */
+#define PF_FSTRANS     0x00020000      /* inside a filesystem transaction */
+#define PF_KSWAPD      0x00040000      /* I am kswapd */
+#define PF_MEMALLOC_NOIO 0x00080000    /* Allocating memory without IO involved */
+#define PF_LESS_THROTTLE 0x00100000    /* Throttle me less: I clean memory */
+#define PF_KTHREAD     0x00200000      /* I am a kernel thread */
+#define PF_RANDOMIZE   0x00400000      /* randomize virtual address space */
+#define PF_SWAPWRITE   0x00800000      /* Allowed to write to swap */
+#define PF_NO_SETAFFINITY 0x04000000   /* Userland is not allowed to meddle with cpus_allowed */
+#define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
+#define PF_MUTEX_TESTER        0x20000000      /* Thread belongs to the rt mutex tester */
+#define PF_FREEZER_SKIP        0x40000000      /* Freezer should not count it as freezable */
+
+struct task_struct {
+       pthread_t               thread;
+
+       int                     (*thread_fn)(void *);
+       void                    *thread_data;
+
+       atomic_t                usage;
+       int                     state;
+
+       /* kthread: */
+       unsigned long           kthread_flags;
+       struct completion       exited;
+
+       unsigned                flags;
+
+       bool                    on_cpu;
+       char                    comm[TASK_COMM_LEN];
+       pid_t                   pid;
+
+       struct bio_list         *bio_list;
+};
+
+extern __thread struct task_struct *current;
+
+#define __set_task_state(tsk, state_value)             \
+       do { (tsk)->state = (state_value); } while (0)
+#define set_task_state(tsk, state_value)               \
+       smp_store_mb((tsk)->state, (state_value))
+#define __set_current_state(state_value)               \
+       do { current->state = (state_value); } while (0)
+#define set_current_state(state_value)                 \
+       smp_store_mb(current->state, (state_value))
+
+#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
+
+extern void __put_task_struct(struct task_struct *t);
+
+static inline void put_task_struct(struct task_struct *t)
+{
+       if (atomic_dec_and_test(&t->usage))
+               __put_task_struct(t);
+}
+
+#define cond_resched()
+#define need_resched() 0
+
+void schedule(void);
+
+#define        MAX_SCHEDULE_TIMEOUT    LONG_MAX
+long schedule_timeout(long timeout);
+
+static inline void io_schedule(void)
+{
+       schedule();
+}
+
+static inline long io_schedule_timeout(long timeout)
+{
+       return schedule_timeout(timeout);
+}
+
+int wake_up_process(struct task_struct *);
+
+static inline u64 ktime_get_seconds(void)
+{
+       struct timespec ts;
+
+       clock_gettime(CLOCK_MONOTONIC, &ts);
+
+       return ts.tv_sec;
+}
+
+static inline u64 ktime_get_real_seconds(void)
+{
+       struct timespec ts;
+
+       clock_gettime(CLOCK_REALTIME, &ts);
+
+       return ts.tv_sec;
+}
+
+static inline void ktime_get_coarse_real_ts64(struct timespec64 *ts)
+{
+       clock_gettime(CLOCK_REALTIME_COARSE, ts);
+}
+
+#define current_kernel_time64()        current_kernel_time()
+#define CURRENT_TIME           (current_kernel_time())
+
+#endif /* __TOOLS_LINUX_SCHED_H */
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
new file mode 100644 (file)
index 0000000..a89c626
--- /dev/null
@@ -0,0 +1,6 @@
+
+static inline void task_cputime_adjusted(struct task_struct *p, u64 *utime, u64 *stime)
+{
+       *utime = 0;
+       *stime = 0;
+}
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
new file mode 100644 (file)
index 0000000..347105c
--- /dev/null
@@ -0,0 +1,18 @@
+#ifndef _LINUX_SCHED_MM_H
+#define _LINUX_SCHED_MM_H
+
+#define PF_MEMALLOC_NOFS 0
+
+static inline unsigned int memalloc_nofs_save(void)
+{
+       unsigned int flags = current->flags & PF_MEMALLOC_NOFS;
+       current->flags |= PF_MEMALLOC_NOFS;
+       return flags;
+}
+
+static inline void memalloc_nofs_restore(unsigned int flags)
+{
+       current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags;
+}
+
+#endif /* _LINUX_SCHED_MM_H */
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
new file mode 100644 (file)
index 0000000..ef3040e
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _SCHED_RT_H
+#define _SCHED_RT_H
+
+static inline int rt_task(struct task_struct *p)
+{
+       return 0;
+}
+
+#endif /* _SCHED_RT_H */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
new file mode 100644 (file)
index 0000000..20bdc05
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_SIGNAL_H
+#define _LINUX_SCHED_SIGNAL_H
+
+static inline int fatal_signal_pending(struct task_struct *p)
+{
+       return 0;
+}
+
+#endif /* _LINUX_SCHED_SIGNAL_H */
+
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
new file mode 100644 (file)
index 0000000..498e717
--- /dev/null
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2008 Intel Corporation
+ * Author: Matthew Wilcox <willy@linux.intel.com>
+ *
+ * Please see kernel/locking/semaphore.c for documentation of these functions
+ */
+#ifndef __LINUX_SEMAPHORE_H
+#define __LINUX_SEMAPHORE_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+/* Please don't access any members of this structure directly */
+struct semaphore {
+       raw_spinlock_t          lock;
+       unsigned int            count;
+       struct list_head        wait_list;
+};
+
+#define __SEMAPHORE_INITIALIZER(name, n)                               \
+{                                                                      \
+       .lock           = __RAW_SPIN_LOCK_UNLOCKED((name).lock),        \
+       .count          = n,                                            \
+       .wait_list      = LIST_HEAD_INIT((name).wait_list),             \
+}
+
+#define DEFINE_SEMAPHORE(name) \
+       struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
+
+static inline void sema_init(struct semaphore *sem, int val)
+{
+       *sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val);
+}
+
+extern void down(struct semaphore *sem);
+extern int __must_check down_interruptible(struct semaphore *sem);
+extern int __must_check down_killable(struct semaphore *sem);
+extern int __must_check down_trylock(struct semaphore *sem);
+extern int __must_check down_timeout(struct semaphore *sem, long);
+extern void up(struct semaphore *sem);
+
+#endif /* __LINUX_SEMAPHORE_H */
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
new file mode 100644 (file)
index 0000000..b455ebc
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef _LINUX_SEQ_FILE_H
+#define _LINUX_SEQ_FILE_H
+
+#include <linux/types.h>
+#include <linux/fs.h>
+
+struct seq_file {
+       char *buf;
+       size_t size;
+       size_t from;
+       size_t count;
+       size_t pad_until;
+       loff_t index;
+       loff_t read_pos;
+       u64 version;
+       int poll_event;
+       const struct file *file;
+       void *private;
+};
+
+#endif
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
new file mode 100644 (file)
index 0000000..435420f
--- /dev/null
@@ -0,0 +1,47 @@
+#ifndef __LINUX_SEQLOCK_H
+#define __LINUX_SEQLOCK_H
+
+#include <linux/compiler.h>
+
+typedef struct seqcount {
+       unsigned sequence;
+} seqcount_t;
+
+static inline void seqcount_init(seqcount_t *s)
+{
+       s->sequence = 0;
+}
+
+static inline unsigned read_seqcount_begin(const seqcount_t *s)
+{
+       unsigned ret;
+
+repeat:
+       ret = READ_ONCE(s->sequence);
+       if (unlikely(ret & 1)) {
+               cpu_relax();
+               goto repeat;
+       }
+       smp_rmb();
+       return ret;
+}
+
+static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
+{
+       smp_rmb();
+       return unlikely(s->sequence != start);
+}
+
+static inline void write_seqcount_begin(seqcount_t *s)
+{
+       s->sequence++;
+       smp_wmb();
+}
+
+static inline void write_seqcount_end(seqcount_t *s)
+{
+       smp_wmb();
+       s->sequence++;
+}
+
+#endif /* __LINUX_SEQLOCK_H */
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
new file mode 100644 (file)
index 0000000..626b768
--- /dev/null
@@ -0,0 +1,30 @@
+#ifndef __TOOLS_LINUX_SHRINKER_H
+#define __TOOLS_LINUX_SHRINKER_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+struct shrink_control {
+       gfp_t gfp_mask;
+       unsigned long nr_to_scan;
+};
+
+#define SHRINK_STOP (~0UL)
+
+struct shrinker {
+       unsigned long (*count_objects)(struct shrinker *,
+                                      struct shrink_control *sc);
+       unsigned long (*scan_objects)(struct shrinker *,
+                                     struct shrink_control *sc);
+
+       int seeks;      /* seeks to recreate an obj */
+       long batch;     /* reclaim batch size, 0 = default */
+       struct list_head list;
+};
+
+int register_shrinker(struct shrinker *);
+void unregister_shrinker(struct shrinker *);
+
+void run_shrinkers(void);
+
+#endif /* __TOOLS_LINUX_SHRINKER_H */
diff --git a/include/linux/six.h b/include/linux/six.h
new file mode 100644 (file)
index 0000000..a16e94f
--- /dev/null
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_SIX_H
+#define _LINUX_SIX_H
+
+/*
+ * Shared/intent/exclusive locks: sleepable read/write locks, much like rw
+ * semaphores, except with a third intermediate state, intent. Basic operations
+ * are:
+ *
+ * six_lock_read(&foo->lock);
+ * six_unlock_read(&foo->lock);
+ *
+ * six_lock_intent(&foo->lock);
+ * six_unlock_intent(&foo->lock);
+ *
+ * six_lock_write(&foo->lock);
+ * six_unlock_write(&foo->lock);
+ *
+ * Intent locks block other intent locks, but do not block read locks, and you
+ * must have an intent lock held before taking a write lock, like so:
+ *
+ * six_lock_intent(&foo->lock);
+ * six_lock_write(&foo->lock);
+ * six_unlock_write(&foo->lock);
+ * six_unlock_intent(&foo->lock);
+ *
+ * Other operations:
+ *
+ *   six_trylock_read()
+ *   six_trylock_intent()
+ *   six_trylock_write()
+ *
+ *   six_lock_downgrade():     convert from intent to read
+ *   six_lock_tryupgrade():    attempt to convert from read to intent
+ *
+ * Locks also embed a sequence number, which is incremented when the lock is
+ * locked or unlocked for write. The current sequence number can be grabbed
+ * while a lock is held from lock->state.seq; then, if you drop the lock you can
+ * use six_relock_(read|intent_write)(lock, seq) to attempt to retake the lock
+ * iff it hasn't been locked for write in the meantime.
+ *
+ * There are also operations that take the lock type as a parameter, where the
+ * type is one of SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write:
+ *
+ *   six_lock_type(lock, type)
+ *   six_unlock_type(lock, type)
+ *   six_relock(lock, type, seq)
+ *   six_trylock_type(lock, type)
+ *   six_trylock_convert(lock, from, to)
+ *
+ * A lock may be held multiple types by the same thread (for read or intent,
+ * not write). However, the six locks code does _not_ implement the actual
+ * recursive checks itself though - rather, if your code (e.g. btree iterator
+ * code) knows that the current thread already has a lock held, and for the
+ * correct type, six_lock_increment() may be used to bump up the counter for
+ * that type - the only effect is that one more call to unlock will be required
+ * before the lock is unlocked.
+ */
+
+#include <linux/lockdep.h>
+#include <linux/osq_lock.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+
+#define SIX_LOCK_SEPARATE_LOCKFNS
+
+union six_lock_state {
+       struct {
+               atomic64_t      counter;
+       };
+
+       struct {
+               u64             v;
+       };
+
+       struct {
+               /* for waitlist_bitnr() */
+               unsigned long   l;
+       };
+
+       struct {
+               unsigned        read_lock:28;
+               unsigned        intent_lock:1;
+               unsigned        waiters:3;
+               /*
+                * seq works much like in seqlocks: it's incremented every time
+                * we lock and unlock for write.
+                *
+                * If it's odd write lock is held, even unlocked.
+                *
+                * Thus readers can unlock, and then lock again later iff it
+                * hasn't been modified in the meantime.
+                */
+               u32             seq;
+       };
+};
+
+enum six_lock_type {
+       SIX_LOCK_read,
+       SIX_LOCK_intent,
+       SIX_LOCK_write,
+};
+
+struct six_lock {
+       union six_lock_state    state;
+       unsigned                intent_lock_recurse;
+       struct task_struct      *owner;
+       struct optimistic_spin_queue osq;
+
+       raw_spinlock_t          wait_lock;
+       struct list_head        wait_list[2];
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       struct lockdep_map      dep_map;
+#endif
+};
+
+typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
+
+static __always_inline void __six_lock_init(struct six_lock *lock,
+                                           const char *name,
+                                           struct lock_class_key *key)
+{
+       atomic64_set(&lock->state.counter, 0);
+       raw_spin_lock_init(&lock->wait_lock);
+       INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_read]);
+       INIT_LIST_HEAD(&lock->wait_list[SIX_LOCK_intent]);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       debug_check_no_locks_freed((void *) lock, sizeof(*lock));
+       lockdep_init_map(&lock->dep_map, name, key, 0);
+#endif
+}
+
+#define six_lock_init(lock)                                            \
+do {                                                                   \
+       static struct lock_class_key __key;                             \
+                                                                       \
+       __six_lock_init((lock), #lock, &__key);                         \
+} while (0)
+
+#define __SIX_VAL(field, _v)   (((union six_lock_state) { .field = _v }).v)
+
+#define __SIX_LOCK(type)                                               \
+bool six_trylock_##type(struct six_lock *);                            \
+bool six_relock_##type(struct six_lock *, u32);                                \
+int six_lock_##type(struct six_lock *, six_lock_should_sleep_fn, void *);\
+void six_unlock_##type(struct six_lock *);
+
+__SIX_LOCK(read)
+__SIX_LOCK(intent)
+__SIX_LOCK(write)
+#undef __SIX_LOCK
+
+#define SIX_LOCK_DISPATCH(type, fn, ...)                       \
+       switch (type) {                                         \
+       case SIX_LOCK_read:                                     \
+               return fn##_read(__VA_ARGS__);                  \
+       case SIX_LOCK_intent:                                   \
+               return fn##_intent(__VA_ARGS__);                \
+       case SIX_LOCK_write:                                    \
+               return fn##_write(__VA_ARGS__);                 \
+       default:                                                \
+               BUG();                                          \
+       }
+
+static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
+{
+       SIX_LOCK_DISPATCH(type, six_trylock, lock);
+}
+
+static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
+                                  unsigned seq)
+{
+       SIX_LOCK_DISPATCH(type, six_relock, lock, seq);
+}
+
+static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
+                               six_lock_should_sleep_fn should_sleep_fn, void *p)
+{
+       SIX_LOCK_DISPATCH(type, six_lock, lock, should_sleep_fn, p);
+}
+
+static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
+{
+       SIX_LOCK_DISPATCH(type, six_unlock, lock);
+}
+
+void six_lock_downgrade(struct six_lock *);
+bool six_lock_tryupgrade(struct six_lock *);
+bool six_trylock_convert(struct six_lock *, enum six_lock_type,
+                        enum six_lock_type);
+
+void six_lock_increment(struct six_lock *, enum six_lock_type);
+
+void six_lock_wakeup_all(struct six_lock *);
+
+#endif /* _LINUX_SIX_H */
diff --git a/include/linux/slab.h b/include/linux/slab.h
new file mode 100644 (file)
index 0000000..32ffa55
--- /dev/null
@@ -0,0 +1,135 @@
+#ifndef __TOOLS_LINUX_SLAB_H
+#define __TOOLS_LINUX_SLAB_H
+
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/page.h>
+#include <linux/shrinker.h>
+#include <linux/types.h>
+
+#define ARCH_KMALLOC_MINALIGN          16
+#define KMALLOC_MAX_SIZE               SIZE_MAX
+
+static inline void *kmalloc(size_t size, gfp_t flags)
+{
+       void *p;
+
+       run_shrinkers();
+
+       if (size) {
+               size_t alignment = min(rounddown_pow_of_two(size), (size_t)PAGE_SIZE);
+               alignment = max(sizeof(void *), alignment);
+               if (posix_memalign(&p, alignment, size))
+                       p = NULL;
+       } else {
+               p = malloc(0);
+       }
+       if (p && (flags & __GFP_ZERO))
+               memset(p, 0, size);
+
+       return p;
+}
+
+static inline void *krealloc(void *old, size_t size, gfp_t flags)
+{
+       void *new;
+
+       run_shrinkers();
+
+       new = malloc(size);
+       if (!new)
+               return NULL;
+
+       if (flags & __GFP_ZERO)
+               memset(new, 0, size);
+
+       memcpy(new, old,
+              min(malloc_usable_size(old),
+                  malloc_usable_size(new)));
+       free(old);
+
+       return new;
+}
+
+#define kzalloc(size, flags)           kmalloc(size, flags|__GFP_ZERO)
+#define kmalloc_array(n, size, flags)                                  \
+       ((size) != 0 && (n) > SIZE_MAX / (size)                         \
+        ? NULL : kmalloc(n * size, flags))
+
+#define kcalloc(n, size, flags)                kmalloc_array(n, size, flags|__GFP_ZERO)
+
+#define kfree(p)                       free(p)
+#define kzfree(p)                      free(p)
+
+#define kvmalloc(size, flags)          kmalloc(size, flags)
+#define kvfree(p)                      kfree(p)
+
+static inline struct page *alloc_pages(gfp_t flags, unsigned int order)
+{
+       size_t size = PAGE_SIZE << order;
+       void *p;
+
+       run_shrinkers();
+
+       p = aligned_alloc(PAGE_SIZE, size);
+       if (p && (flags & __GFP_ZERO))
+               memset(p, 0, size);
+
+       return p;
+}
+
+#define alloc_page(gfp)                        alloc_pages(gfp, 0)
+
+#define __get_free_pages(gfp, order)   ((unsigned long) alloc_pages(gfp, order))
+#define __get_free_page(gfp)           __get_free_pages(gfp, 0)
+
+#define __free_pages(page, order)                      \
+do {                                                   \
+       (void) order;                                   \
+       free(page);                                     \
+} while (0)
+
+#define free_pages(addr, order)                                \
+do {                                                   \
+       (void) order;                                   \
+       free((void *) (addr));                          \
+} while (0)
+
+#define __free_page(page) __free_pages((page), 0)
+#define free_page(addr) free_pages((addr), 0)
+
+#define VM_IOREMAP             0x00000001      /* ioremap() and friends */
+#define VM_ALLOC               0x00000002      /* vmalloc() */
+#define VM_MAP                 0x00000004      /* vmap()ed pages */
+#define VM_USERMAP             0x00000008      /* suitable for remap_vmalloc_range */
+#define VM_UNINITIALIZED       0x00000020      /* vm_struct is not fully initialized */
+#define VM_NO_GUARD            0x00000040      /* don't add guard page */
+#define VM_KASAN               0x00000080      /* has allocated kasan shadow memory */
+
+static inline void vunmap(const void *addr) {}
+
+static inline void *vmap(struct page **pages, unsigned int count,
+                        unsigned long flags, unsigned prot)
+{
+       return NULL;
+}
+
+#define is_vmalloc_addr(page)          0
+
+#define vmalloc_to_page(addr)          ((struct page *) (addr))
+
+static inline void *kmemdup(const void *src, size_t len, gfp_t gfp)
+{
+       void *p;
+
+       p = kmalloc(len, gfp);
+       if (p)
+               memcpy(p, src, len);
+       return p;
+}
+
+#endif /* __TOOLS_LINUX_SLAB_H */
diff --git a/include/linux/sort.h b/include/linux/sort.h
new file mode 100644 (file)
index 0000000..afea044
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef _LINUX_SORT_H
+#define _LINUX_SORT_H
+
+#include <stdlib.h>
+
+static inline void sort(void *base, size_t num, size_t size,
+                       int (*cmp_func)(const void *, const void *),
+                       void (*swap_func)(void *, void *, int size))
+{
+       return qsort(base, num, size, cmp_func);
+}
+
+#endif
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
new file mode 100644 (file)
index 0000000..c9be6b6
--- /dev/null
@@ -0,0 +1,60 @@
+#ifndef __TOOLS_LINUX_SPINLOCK_H
+#define __TOOLS_LINUX_SPINLOCK_H
+
+#include <linux/atomic.h>
+
+typedef struct {
+       int             count;
+} raw_spinlock_t;
+
+#define __RAW_SPIN_LOCK_UNLOCKED(name) (raw_spinlock_t) { .count = 0 }
+
+static inline void raw_spin_lock_init(raw_spinlock_t *lock)
+{
+       smp_store_release(&lock->count, 0);
+}
+
+static inline void raw_spin_lock(raw_spinlock_t *lock)
+{
+       while (xchg_acquire(&lock->count, 1))
+               ;
+}
+
+static inline void raw_spin_unlock(raw_spinlock_t *lock)
+{
+       smp_store_release(&lock->count, 0);
+}
+
+#define raw_spin_lock_irq(lock)                raw_spin_lock(lock)
+#define raw_spin_unlock_irq(lock)      raw_spin_unlock(lock)
+
+#define raw_spin_lock_irqsave(lock, flags)             \
+do {                                                   \
+       flags = 0;                                      \
+       raw_spin_lock(lock);                            \
+} while (0)
+
+#define raw_spin_unlock_irqrestore(lock, flags) raw_spin_unlock(lock)
+
+typedef raw_spinlock_t spinlock_t;
+
+#define __SPIN_LOCK_UNLOCKED(name)     __RAW_SPIN_LOCK_UNLOCKED(name)
+
+#define DEFINE_SPINLOCK(x)     spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
+
+#define spin_lock_init(lock)           raw_spin_lock_init(lock)
+#define spin_lock(lock)                        raw_spin_lock(lock)
+#define spin_unlock(lock)              raw_spin_unlock(lock)
+
+#define spin_lock_nested(lock, n)      spin_lock(lock)
+
+#define spin_lock_bh(lock)             raw_spin_lock(lock)
+#define spin_unlock_bh(lock)           raw_spin_unlock(lock)
+
+#define spin_lock_irq(lock)            raw_spin_lock(lock)
+#define spin_unlock_irq(lock)          raw_spin_unlock(lock)
+
+#define spin_lock_irqsave(lock, flags) raw_spin_lock_irqsave(lock, flags)
+#define spin_unlock_irqrestore(lock, flags) raw_spin_unlock_irqrestore(lock, flags)
+
+#endif /* __TOOLS_LINUX_SPINLOCK_H */
diff --git a/include/linux/stat.h b/include/linux/stat.h
new file mode 100644 (file)
index 0000000..1a30957
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef _LINUX_STAT_H
+#define _LINUX_STAT_H
+
+#include <sys/stat.h>
+
+#define S_IRWXUGO      (S_IRWXU|S_IRWXG|S_IRWXO)
+#define S_IALLUGO      (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
+#define S_IRUGO                (S_IRUSR|S_IRGRP|S_IROTH)
+#define S_IWUGO                (S_IWUSR|S_IWGRP|S_IWOTH)
+#define S_IXUGO                (S_IXUSR|S_IXGRP|S_IXOTH)
+
+#endif
diff --git a/include/linux/string.h b/include/linux/string.h
new file mode 100644 (file)
index 0000000..4806e2c
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef _TOOLS_LINUX_STRING_H_
+#define _TOOLS_LINUX_STRING_H_
+
+#include <stdlib.h>
+#include <string.h>
+#include <linux/types.h>       /* for size_t */
+
+extern size_t strlcpy(char *dest, const char *src, size_t size);
+extern char *strim(char *);
+extern void memzero_explicit(void *, size_t);
+int match_string(const char * const *, size_t, const char *);
+
+#define kstrndup(s, n, gfp)            strndup(s, n)
+
+#endif /* _LINUX_STRING_H_ */
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
new file mode 100644 (file)
index 0000000..3ba2f48
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef _SYSFS_H_
+#define _SYSFS_H_
+
+#include <linux/compiler.h>
+
+struct kobject;
+
+struct attribute {
+       const char              *name;
+       umode_t                 mode;
+};
+
+struct sysfs_ops {
+       ssize_t (*show)(struct kobject *, struct attribute *, char *);
+       ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
+};
+
+static inline int sysfs_create_files(struct kobject *kobj,
+                                   const struct attribute **attr)
+{
+       return 0;
+}
+
+static inline int sysfs_create_link(struct kobject *kobj,
+                                   struct kobject *target, const char *name)
+{
+       return 0;
+}
+
+static inline void sysfs_remove_link(struct kobject *kobj, const char *name)
+{
+}
+
+#endif /* _SYSFS_H_ */
diff --git a/include/linux/time64.h b/include/linux/time64.h
new file mode 100644 (file)
index 0000000..cd6cc1c
--- /dev/null
@@ -0,0 +1,51 @@
+#ifndef _LINUX_TIME64_H
+#define _LINUX_TIME64_H
+
+#include <linux/types.h>
+
+#define timespec64  timespec
+
+typedef __s64 time64_t;
+
+/* Parameters used to convert the timespec values: */
+#define MSEC_PER_SEC   1000L
+#define USEC_PER_MSEC  1000L
+#define NSEC_PER_USEC  1000L
+#define NSEC_PER_MSEC  1000000L
+#define USEC_PER_SEC   1000000L
+#define NSEC_PER_SEC   1000000000L
+#define FSEC_PER_SEC   1000000000000000LL
+
+static inline struct timespec ns_to_timespec(const u64 nsec)
+{
+       return (struct timespec) {
+               .tv_sec = nsec / NSEC_PER_SEC,
+               .tv_nsec = nsec % NSEC_PER_SEC,
+       };
+}
+
+static inline s64 timespec_to_ns(const struct timespec *ts)
+{
+       return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
+static inline struct timespec timespec_trunc(struct timespec t, unsigned gran)
+{
+       /* Avoid division in the common cases 1 ns and 1 s. */
+       if (gran == 1) {
+               /* nothing */
+       } else if (gran == NSEC_PER_SEC) {
+               t.tv_nsec = 0;
+       } else if (gran > 1 && gran < NSEC_PER_SEC) {
+               t.tv_nsec -= t.tv_nsec % gran;
+       } else {
+               WARN(1, "illegal file time granularity: %u", gran);
+       }
+       return t;
+}
+
+#define ns_to_timespec64       ns_to_timespec
+#define timespec64_to_ns       timespec_to_ns
+#define timespec64_trunc       timespec_trunc
+
+#endif /* _LINUX_TIME64_H */
diff --git a/include/linux/timer.h b/include/linux/timer.h
new file mode 100644 (file)
index 0000000..9667acf
--- /dev/null
@@ -0,0 +1,46 @@
+#ifndef __TOOLS_LINUX_TIMER_H
+#define __TOOLS_LINUX_TIMER_H
+
+#include <string.h>
+#include <linux/types.h>
+
+struct timer_list {
+       unsigned long           expires;
+       void                    (*function)(struct timer_list *timer);
+       bool                    pending;
+};
+
+static inline void timer_setup(struct timer_list *timer,
+                              void (*func)(struct timer_list *),
+                              unsigned int flags)
+{
+       memset(timer, 0, sizeof(*timer));
+       timer->function = func;
+}
+
+#define timer_setup_on_stack(timer, callback, flags)                   \
+       timer_setup(timer, callback, flags)
+
+#define destroy_timer_on_stack(timer) do {} while (0)
+
+static inline int timer_pending(const struct timer_list *timer)
+{
+       return timer->pending;
+}
+
+int del_timer(struct timer_list * timer);
+int del_timer_sync(struct timer_list *timer);
+
+#define del_singleshot_timer_sync(timer) del_timer_sync(timer)
+
+int mod_timer(struct timer_list *timer, unsigned long expires);
+
+static inline void add_timer(struct timer_list *timer)
+{
+       BUG_ON(timer_pending(timer));
+       mod_timer(timer, timer->expires);
+}
+
+void flush_timers(void);
+
+#endif /* __TOOLS_LINUX_TIMER_H */
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
new file mode 100644 (file)
index 0000000..1686cb9
--- /dev/null
@@ -0,0 +1,62 @@
+#ifndef __TOOLS_LINUX_TRACEPOINT_H
+#define __TOOLS_LINUX_TRACEPOINT_H
+
+#define PARAMS(args...) args
+
+#define TP_PROTO(args...)      args
+#define TP_ARGS(args...)       args
+#define TP_CONDITION(args...)  args
+
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
+       static inline void trace_##name(proto)                          \
+       { }                                                             \
+       static inline void trace_##name##_rcuidle(proto)                \
+       { }                                                             \
+       static inline int                                               \
+       register_trace_##name(void (*probe)(data_proto),                \
+                             void *data)                               \
+       {                                                               \
+               return -ENOSYS;                                         \
+       }                                                               \
+       static inline int                                               \
+       unregister_trace_##name(void (*probe)(data_proto),              \
+                               void *data)                             \
+       {                                                               \
+               return -ENOSYS;                                         \
+       }                                                               \
+       static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
+       {                                                               \
+       }                                                               \
+       static inline bool                                              \
+       trace_##name##_enabled(void)                                    \
+       {                                                               \
+               return false;                                           \
+       }
+
+#define DEFINE_TRACE_FN(name, reg, unreg)
+#define DEFINE_TRACE(name)
+#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
+#define EXPORT_TRACEPOINT_SYMBOL(name)
+
+#define DECLARE_TRACE_NOARGS(name)                                     \
+       __DECLARE_TRACE(name, void, ,                                   \
+                       cpu_online(raw_smp_processor_id()),             \
+                       void *__data, __data)
+
+#define DECLARE_TRACE(name, proto, args)                               \
+       __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),              \
+                       cpu_online(raw_smp_processor_id()),             \
+                       PARAMS(void *__data, proto),                    \
+                       PARAMS(__data, args))
+
+#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
+#define DEFINE_EVENT(template, name, proto, args)              \
+       DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\
+       DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
+       DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define TRACE_EVENT(name, proto, args, struct, assign, print)  \
+       DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+
+#endif /* __TOOLS_LINUX_TRACEPOINT_H */
diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h
new file mode 100644 (file)
index 0000000..eb5b74a
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef TYPECHECK_H_INCLUDED
+#define TYPECHECK_H_INCLUDED
+
+/*
+ * Check at compile time that something is of a particular type.
+ * Always evaluates to 1 so you may use it easily in comparisons.
+ */
+#define typecheck(type,x) \
+({     type __dummy; \
+       typeof(x) __dummy2; \
+       (void)(&__dummy == &__dummy2); \
+       1; \
+})
+
+/*
+ * Check at compile time that 'function' is a certain type, or is a pointer
+ * to that type (needs to use typedef for the function type.)
+ */
+#define typecheck_fn(type,function) \
+({     typeof(type) __tmp = function; \
+       (void)__tmp; \
+})
+
+#endif         /* TYPECHECK_H_INCLUDED */
diff --git a/include/linux/types.h b/include/linux/types.h
new file mode 100644 (file)
index 0000000..387c383
--- /dev/null
@@ -0,0 +1,76 @@
+#ifndef _TOOLS_LINUX_TYPES_H_
+#define _TOOLS_LINUX_TYPES_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <fcntl.h>
+#include <sys/types.h>
+
+#define __SANE_USERSPACE_TYPES__       /* For PPC64, to get LL64 types */
+#include <asm/types.h>
+
+#define BITS_PER_LONG  __BITS_PER_LONG
+
+struct page;
+struct kmem_cache;
+
+typedef unsigned long          pgoff_t;
+
+typedef unsigned short         umode_t;
+
+typedef unsigned gfp_t;
+
+#define GFP_KERNEL     0
+#define GFP_ATOMIC     0
+#define GFP_NOFS       0
+#define GFP_NOIO       0
+#define GFP_NOWAIT     0
+#define __GFP_FS       0
+#define __GFP_IO       0
+#define __GFP_NOWARN   0
+#define __GFP_NORETRY  0
+#define __GFP_ZERO     1
+
+#define PAGE_ALLOC_COSTLY_ORDER        6
+
+typedef __u64 u64;
+typedef __s64 s64;
+typedef __u32 u32;
+typedef __s32 s32;
+typedef __u16 u16;
+typedef __s16 s16;
+typedef __u8  u8;
+typedef __s8  s8;
+
+#ifdef __CHECKER__
+#define __bitwise__ __attribute__((bitwise))
+#else
+#define __bitwise__
+#endif
+#ifdef __CHECK_ENDIAN__
+#define __bitwise __bitwise__
+#else
+#define __bitwise
+#endif
+
+#define __force
+#define __user
+#define __must_check
+#define __cold
+
+typedef __u16 __bitwise __le16;
+typedef __u16 __bitwise __be16;
+typedef __u32 __bitwise __le32;
+typedef __u32 __bitwise __be32;
+typedef __u64 __bitwise __le64;
+typedef __u64 __bitwise __be64;
+
+#ifndef __aligned_u64
+# define __aligned_u64 __u64 __attribute__((aligned(8)))
+#endif
+
+typedef u64 sector_t;
+
+#endif /* _TOOLS_LINUX_TYPES_H_ */
diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h
new file mode 100644 (file)
index 0000000..9356b24
--- /dev/null
@@ -0,0 +1,70 @@
+#ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H
+#define _LINUX_UNALIGNED_BE_BYTESHIFT_H
+
+#include <linux/types.h>
+
+static inline u16 __get_unaligned_be16(const u8 *p)
+{
+       return p[0] << 8 | p[1];
+}
+
+static inline u32 __get_unaligned_be32(const u8 *p)
+{
+       return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
+}
+
+static inline u64 __get_unaligned_be64(const u8 *p)
+{
+       return (u64)__get_unaligned_be32(p) << 32 |
+              __get_unaligned_be32(p + 4);
+}
+
+static inline void __put_unaligned_be16(u16 val, u8 *p)
+{
+       *p++ = val >> 8;
+       *p++ = val;
+}
+
+static inline void __put_unaligned_be32(u32 val, u8 *p)
+{
+       __put_unaligned_be16(val >> 16, p);
+       __put_unaligned_be16(val, p + 2);
+}
+
+static inline void __put_unaligned_be64(u64 val, u8 *p)
+{
+       __put_unaligned_be32(val >> 32, p);
+       __put_unaligned_be32(val, p + 4);
+}
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+       return __get_unaligned_be16((const u8 *)p);
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+       return __get_unaligned_be32((const u8 *)p);
+}
+
+static inline u64 get_unaligned_be64(const void *p)
+{
+       return __get_unaligned_be64((const u8 *)p);
+}
+
+static inline void put_unaligned_be16(u16 val, void *p)
+{
+       __put_unaligned_be16(val, p);
+}
+
+static inline void put_unaligned_be32(u32 val, void *p)
+{
+       __put_unaligned_be32(val, p);
+}
+
+static inline void put_unaligned_be64(u64 val, void *p)
+{
+       __put_unaligned_be64(val, p);
+}
+
+#endif /* _LINUX_UNALIGNED_BE_BYTESHIFT_H */
diff --git a/include/linux/unaligned/be_struct.h b/include/linux/unaligned/be_struct.h
new file mode 100644 (file)
index 0000000..1324158
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef _LINUX_UNALIGNED_BE_STRUCT_H
+#define _LINUX_UNALIGNED_BE_STRUCT_H
+
+#include <linux/unaligned/packed_struct.h>
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+       return __get_unaligned_cpu16((const u8 *)p);
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+       return __get_unaligned_cpu32((const u8 *)p);
+}
+
+static inline u64 get_unaligned_be64(const void *p)
+{
+       return __get_unaligned_cpu64((const u8 *)p);
+}
+
+static inline void put_unaligned_be16(u16 val, void *p)
+{
+       __put_unaligned_cpu16(val, p);
+}
+
+static inline void put_unaligned_be32(u32 val, void *p)
+{
+       __put_unaligned_cpu32(val, p);
+}
+
+static inline void put_unaligned_be64(u64 val, void *p)
+{
+       __put_unaligned_cpu64(val, p);
+}
+
+#endif /* _LINUX_UNALIGNED_BE_STRUCT_H */
diff --git a/include/linux/unaligned/generic.h b/include/linux/unaligned/generic.h
new file mode 100644 (file)
index 0000000..02d97ff
--- /dev/null
@@ -0,0 +1,68 @@
+#ifndef _LINUX_UNALIGNED_GENERIC_H
+#define _LINUX_UNALIGNED_GENERIC_H
+
+/*
+ * Cause a link-time error if we try an unaligned access other than
+ * 1,2,4 or 8 bytes long
+ */
+extern void __bad_unaligned_access_size(void);
+
+#define __get_unaligned_le(ptr) ((__force typeof(*(ptr)))({                    \
+       __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr),                      \
+       __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_le16((ptr)),   \
+       __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_le32((ptr)),   \
+       __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_le64((ptr)),   \
+       __bad_unaligned_access_size()))));                                      \
+       }))
+
+#define __get_unaligned_be(ptr) ((__force typeof(*(ptr)))({                    \
+       __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr),                      \
+       __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_be16((ptr)),   \
+       __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_be32((ptr)),   \
+       __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_be64((ptr)),   \
+       __bad_unaligned_access_size()))));                                      \
+       }))
+
+#define __put_unaligned_le(val, ptr) ({                                        \
+       void *__gu_p = (ptr);                                           \
+       switch (sizeof(*(ptr))) {                                       \
+       case 1:                                                         \
+               *(u8 *)__gu_p = (__force u8)(val);                      \
+               break;                                                  \
+       case 2:                                                         \
+               put_unaligned_le16((__force u16)(val), __gu_p);         \
+               break;                                                  \
+       case 4:                                                         \
+               put_unaligned_le32((__force u32)(val), __gu_p);         \
+               break;                                                  \
+       case 8:                                                         \
+               put_unaligned_le64((__force u64)(val), __gu_p);         \
+               break;                                                  \
+       default:                                                        \
+               __bad_unaligned_access_size();                          \
+               break;                                                  \
+       }                                                               \
+       (void)0; })
+
+#define __put_unaligned_be(val, ptr) ({                                        \
+       void *__gu_p = (ptr);                                           \
+       switch (sizeof(*(ptr))) {                                       \
+       case 1:                                                         \
+               *(u8 *)__gu_p = (__force u8)(val);                      \
+               break;                                                  \
+       case 2:                                                         \
+               put_unaligned_be16((__force u16)(val), __gu_p);         \
+               break;                                                  \
+       case 4:                                                         \
+               put_unaligned_be32((__force u32)(val), __gu_p);         \
+               break;                                                  \
+       case 8:                                                         \
+               put_unaligned_be64((__force u64)(val), __gu_p);         \
+               break;                                                  \
+       default:                                                        \
+               __bad_unaligned_access_size();                          \
+               break;                                                  \
+       }                                                               \
+       (void)0; })
+
+#endif /* _LINUX_UNALIGNED_GENERIC_H */
diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h
new file mode 100644 (file)
index 0000000..be376fb
--- /dev/null
@@ -0,0 +1,70 @@
+#ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H
+#define _LINUX_UNALIGNED_LE_BYTESHIFT_H
+
+#include <linux/types.h>
+
+static inline u16 __get_unaligned_le16(const u8 *p)
+{
+       return p[0] | p[1] << 8;
+}
+
+static inline u32 __get_unaligned_le32(const u8 *p)
+{
+       return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
+}
+
+static inline u64 __get_unaligned_le64(const u8 *p)
+{
+       return (u64)__get_unaligned_le32(p + 4) << 32 |
+              __get_unaligned_le32(p);
+}
+
+static inline void __put_unaligned_le16(u16 val, u8 *p)
+{
+       *p++ = val;
+       *p++ = val >> 8;
+}
+
+static inline void __put_unaligned_le32(u32 val, u8 *p)
+{
+       __put_unaligned_le16(val >> 16, p + 2);
+       __put_unaligned_le16(val, p);
+}
+
+static inline void __put_unaligned_le64(u64 val, u8 *p)
+{
+       __put_unaligned_le32(val >> 32, p + 4);
+       __put_unaligned_le32(val, p);
+}
+
+static inline u16 get_unaligned_le16(const void *p)
+{
+       return __get_unaligned_le16((const u8 *)p);
+}
+
+static inline u32 get_unaligned_le32(const void *p)
+{
+       return __get_unaligned_le32((const u8 *)p);
+}
+
+static inline u64 get_unaligned_le64(const void *p)
+{
+       return __get_unaligned_le64((const u8 *)p);
+}
+
+static inline void put_unaligned_le16(u16 val, void *p)
+{
+       __put_unaligned_le16(val, p);
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
+{
+       __put_unaligned_le32(val, p);
+}
+
+static inline void put_unaligned_le64(u64 val, void *p)
+{
+       __put_unaligned_le64(val, p);
+}
+
+#endif /* _LINUX_UNALIGNED_LE_BYTESHIFT_H */
diff --git a/include/linux/unaligned/le_struct.h b/include/linux/unaligned/le_struct.h
new file mode 100644 (file)
index 0000000..088c457
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef _LINUX_UNALIGNED_LE_STRUCT_H
+#define _LINUX_UNALIGNED_LE_STRUCT_H
+
+#include <linux/unaligned/packed_struct.h>
+
+static inline u16 get_unaligned_le16(const void *p)
+{
+       return __get_unaligned_cpu16((const u8 *)p);
+}
+
+static inline u32 get_unaligned_le32(const void *p)
+{
+       return __get_unaligned_cpu32((const u8 *)p);
+}
+
+static inline u64 get_unaligned_le64(const void *p)
+{
+       return __get_unaligned_cpu64((const u8 *)p);
+}
+
+static inline void put_unaligned_le16(u16 val, void *p)
+{
+       __put_unaligned_cpu16(val, p);
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
+{
+       __put_unaligned_cpu32(val, p);
+}
+
+static inline void put_unaligned_le64(u64 val, void *p)
+{
+       __put_unaligned_cpu64(val, p);
+}
+
+#endif /* _LINUX_UNALIGNED_LE_STRUCT_H */
diff --git a/include/linux/unaligned/packed_struct.h b/include/linux/unaligned/packed_struct.h
new file mode 100644 (file)
index 0000000..c0d817d
--- /dev/null
@@ -0,0 +1,46 @@
+#ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H
+#define _LINUX_UNALIGNED_PACKED_STRUCT_H
+
+#include <linux/kernel.h>
+
+struct __una_u16 { u16 x; } __packed;
+struct __una_u32 { u32 x; } __packed;
+struct __una_u64 { u64 x; } __packed;
+
+static inline u16 __get_unaligned_cpu16(const void *p)
+{
+       const struct __una_u16 *ptr = (const struct __una_u16 *)p;
+       return ptr->x;
+}
+
+static inline u32 __get_unaligned_cpu32(const void *p)
+{
+       const struct __una_u32 *ptr = (const struct __una_u32 *)p;
+       return ptr->x;
+}
+
+static inline u64 __get_unaligned_cpu64(const void *p)
+{
+       const struct __una_u64 *ptr = (const struct __una_u64 *)p;
+       return ptr->x;
+}
+
+static inline void __put_unaligned_cpu16(u16 val, void *p)
+{
+       struct __una_u16 *ptr = (struct __una_u16 *)p;
+       ptr->x = val;
+}
+
+static inline void __put_unaligned_cpu32(u32 val, void *p)
+{
+       struct __una_u32 *ptr = (struct __una_u32 *)p;
+       ptr->x = val;
+}
+
+static inline void __put_unaligned_cpu64(u64 val, void *p)
+{
+       struct __una_u64 *ptr = (struct __una_u64 *)p;
+       ptr->x = val;
+}
+
+#endif /* _LINUX_UNALIGNED_PACKED_STRUCT_H */
diff --git a/include/linux/uuid.h b/include/linux/uuid.h
new file mode 100644 (file)
index 0000000..c8eeb70
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * UUID/GUID definition
+ *
+ * Copyright (C) 2010, 2016 Intel Corp.
+ *     Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef _LINUX_UUID_H_
+#define _LINUX_UUID_H_
+
+#include <string.h>
+#include <asm/types.h>
+
+typedef struct {
+       __u8 b[16];
+} uuid_le;
+
+typedef struct {
+       __u8 b[16];
+} uuid_be;
+
+#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)               \
+((uuid_le)                                                             \
+{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
+   (b) & 0xff, ((b) >> 8) & 0xff,                                      \
+   (c) & 0xff, ((c) >> 8) & 0xff,                                      \
+   (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
+
+#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)               \
+((uuid_be)                                                             \
+{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
+   ((b) >> 8) & 0xff, (b) & 0xff,                                      \
+   ((c) >> 8) & 0xff, (c) & 0xff,                                      \
+   (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
+
+static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2)
+{
+       return memcmp(&u1, &u2, sizeof(uuid_le));
+}
+
+#endif
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
new file mode 100644 (file)
index 0000000..efcc191
--- /dev/null
@@ -0,0 +1,57 @@
+#ifndef __TOOLS_LINUX_VMALLOC_H
+#define __TOOLS_LINUX_VMALLOC_H
+
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "linux/slab.h"
+#include "tools-util.h"
+
+#define PAGE_KERNEL            0
+#define PAGE_KERNEL_EXEC       1
+
+#define vfree(p)               free(p)
+
+static inline void *__vmalloc(unsigned long size, gfp_t gfp_mask)
+{
+       void *p;
+
+       run_shrinkers();
+
+       p = aligned_alloc(PAGE_SIZE, size);
+       if (!p)
+               return NULL;
+
+       if (gfp_mask & __GFP_ZERO)
+               memset(p, 0, size);
+
+       return p;
+}
+
+static inline void *vmalloc_exec(unsigned long size, gfp_t gfp_mask)
+{
+       void *p;
+
+       p = __vmalloc(size, gfp_mask);
+       if (!p)
+               return NULL;
+
+       if (mprotect(p, size, PROT_READ|PROT_WRITE|PROT_EXEC)) {
+               vfree(p);
+               return NULL;
+       }
+
+       return p;
+}
+
+static inline void *vmalloc(unsigned long size)
+{
+       return __vmalloc(size, GFP_KERNEL);
+}
+
+static inline void *vzalloc(unsigned long size)
+{
+       return __vmalloc(size, GFP_KERNEL|__GFP_ZERO);
+}
+
+#endif /* __TOOLS_LINUX_VMALLOC_H */
diff --git a/include/linux/wait.h b/include/linux/wait.h
new file mode 100644 (file)
index 0000000..62d15e5
--- /dev/null
@@ -0,0 +1,133 @@
+#ifndef _LINUX_WAIT_H
+#define _LINUX_WAIT_H
+
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+typedef struct __wait_queue wait_queue_t;
+typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
+
+#define WQ_FLAG_EXCLUSIVE      0x01
+
+struct __wait_queue {
+       unsigned int            flags;
+       void                    *private;
+       wait_queue_func_t       func;
+       struct list_head        task_list;
+};
+
+typedef struct {
+       spinlock_t              lock;
+       struct list_head        task_list;
+} wait_queue_head_t;
+
+void wake_up(wait_queue_head_t *);
+void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
+void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
+
+#define DECLARE_WAITQUEUE(name, tsk)                                   \
+       wait_queue_t name = {                                           \
+               .private        = tsk,                                  \
+               .func           = default_wake_function,                \
+               .task_list      = { NULL, NULL }                        \
+       }
+
+#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {                          \
+       .lock           = __SPIN_LOCK_UNLOCKED(name.lock),              \
+       .task_list      = { &(name).task_list, &(name).task_list } }
+
+#define DECLARE_WAIT_QUEUE_HEAD(name) \
+       wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
+
+static inline void init_waitqueue_head(wait_queue_head_t *q)
+{
+       spin_lock_init(&q->lock);
+       INIT_LIST_HEAD(&q->task_list);
+}
+
+#define DEFINE_WAIT(name)                                              \
+       wait_queue_t name = {                                           \
+               .private        = current,                              \
+               .func           = autoremove_wake_function,             \
+               .task_list      = LIST_HEAD_INIT((name).task_list),     \
+       }
+
+#define ___wait_cond_timeout(condition)                                        \
+({                                                                     \
+       bool __cond = (condition);                                      \
+       if (__cond && !__ret)                                           \
+               __ret = 1;                                              \
+       __cond || !__ret;                                               \
+})
+
+#define ___wait_event(wq, condition, state, exclusive, ret, cmd)       \
+({                                                                     \
+       DEFINE_WAIT(__wait);                                            \
+       long __ret = ret;                                               \
+                                                                       \
+       for (;;) {                                                      \
+               prepare_to_wait(&wq, &__wait, state);                   \
+               if (condition)                                          \
+                       break;                                          \
+               cmd;                                                    \
+       }                                                               \
+       finish_wait(&wq, &__wait);                                      \
+       __ret;                                                          \
+})
+
+#define __wait_event(wq, condition)                                    \
+       (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,  \
+                           schedule())
+
+#define wait_event(wq, condition)                                      \
+do {                                                                   \
+       if (condition)                                                  \
+               break;                                                  \
+       __wait_event(wq, condition);                                    \
+} while (0)
+
+#define wait_event_killable(wq, condition)     ({wait_event(wq, condition); 0; })
+
+#define __wait_event_timeout(wq, condition, timeout)                   \
+       ___wait_event(wq, ___wait_cond_timeout(condition),              \
+                     TASK_UNINTERRUPTIBLE, 0, timeout,                 \
+                     __ret = schedule_timeout(__ret))
+
+#define wait_event_timeout(wq, condition, timeout)                     \
+({                                                                     \
+       long __ret = timeout;                                           \
+       if (!___wait_cond_timeout(condition))                           \
+               __ret = __wait_event_timeout(wq, condition, timeout);   \
+       __ret;                                                          \
+})
+
+void wake_up_bit(void *, int);
+void __wait_on_bit(void *, int, unsigned);
+void __wait_on_bit_lock(void *, int, unsigned);
+
+static inline int
+wait_on_bit(unsigned long *word, int bit, unsigned mode)
+{
+       if (!test_bit(bit, word))
+               return 0;
+       __wait_on_bit(word, bit, mode);
+       return 0;
+}
+
+static inline int
+wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
+{
+       if (!test_and_set_bit(bit, word))
+               return 0;
+       __wait_on_bit_lock(word, bit, mode);
+       return 0;
+}
+
+#define wait_on_bit_io(w, b, m)                        wait_on_bit(w, b, m)
+#define wait_on_bit_lock_io(w, b, m)           wait_on_bit_lock(w, b, m)
+
+#endif /* _LINUX_WAIT_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
new file mode 100644 (file)
index 0000000..1406c95
--- /dev/null
@@ -0,0 +1,185 @@
+#ifndef __TOOLS_LINUX_WORKQUEUE_H
+#define __TOOLS_LINUX_WORKQUEUE_H
+
+#include <linux/list.h>
+#include <linux/timer.h>
+
+struct task_struct;
+struct workqueue_struct;
+struct work_struct;
+typedef void (*work_func_t)(struct work_struct *work);
+void delayed_work_timer_fn(struct timer_list *);
+
+#define work_data_bits(work) ((unsigned long *)(&(work)->data))
+
+#if 0
+enum {
+       //WORK_STRUCT_PENDING_BIT       = 0,    /* work item is pending execution */
+       //WORK_STRUCT_DELAYED_BIT       = 1,    /* work item is delayed */
+       //
+       //WORK_STRUCT_PENDING   = 1 << WORK_STRUCT_PENDING_BIT,
+       //WORK_STRUCT_DELAYED   = 1 << WORK_STRUCT_DELAYED_BIT,
+};
+#endif
+
+struct work_struct {
+       atomic_long_t data;
+       struct list_head entry;
+       work_func_t func;
+};
+
+#define INIT_WORK(_work, _func)                                        \
+do {                                                           \
+       (_work)->data.counter = 0;                              \
+       INIT_LIST_HEAD(&(_work)->entry);                        \
+       (_work)->func = (_func);                                \
+} while (0)
+
+struct delayed_work {
+       struct work_struct work;
+       struct timer_list timer;
+       struct workqueue_struct *wq;
+};
+
+#define INIT_DELAYED_WORK(_work, _func)                                        \
+       do {                                                            \
+               INIT_WORK(&(_work)->work, (_func));                     \
+               timer_setup(&(_work)->timer, delayed_work_timer_fn, 0); \
+       } while (0)
+
+static inline struct delayed_work *to_delayed_work(struct work_struct *work)
+{
+       return container_of(work, struct delayed_work, work);
+}
+
+enum {
+       WQ_UNBOUND              = 1 << 1, /* not bound to any cpu */
+       WQ_FREEZABLE            = 1 << 2, /* freeze during suspend */
+       WQ_MEM_RECLAIM          = 1 << 3, /* may be used for memory reclaim */
+       WQ_HIGHPRI              = 1 << 4, /* high priority */
+       WQ_CPU_INTENSIVE        = 1 << 5, /* cpu intensive workqueue */
+       WQ_SYSFS                = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */
+
+       /*
+        * Per-cpu workqueues are generally preferred because they tend to
+        * show better performance thanks to cache locality.  Per-cpu
+        * workqueues exclude the scheduler from choosing the CPU to
+        * execute the worker threads, which has an unfortunate side effect
+        * of increasing power consumption.
+        *
+        * The scheduler considers a CPU idle if it doesn't have any task
+        * to execute and tries to keep idle cores idle to conserve power;
+        * however, for example, a per-cpu work item scheduled from an
+        * interrupt handler on an idle CPU will force the scheduler to
+        * excute the work item on that CPU breaking the idleness, which in
+        * turn may lead to more scheduling choices which are sub-optimal
+        * in terms of power consumption.
+        *
+        * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default
+        * but become unbound if workqueue.power_efficient kernel param is
+        * specified.  Per-cpu workqueues which are identified to
+        * contribute significantly to power-consumption are identified and
+        * marked with this flag and enabling the power_efficient mode
+        * leads to noticeable power saving at the cost of small
+        * performance disadvantage.
+        *
+        * http://thread.gmane.org/gmane.linux.kernel/1480396
+        */
+       WQ_POWER_EFFICIENT      = 1 << 7,
+
+       __WQ_DRAINING           = 1 << 16, /* internal: workqueue is draining */
+       __WQ_ORDERED            = 1 << 17, /* internal: workqueue is ordered */
+       __WQ_LEGACY             = 1 << 18, /* internal: create*_workqueue() */
+
+       WQ_MAX_ACTIVE           = 512,    /* I like 512, better ideas? */
+       WQ_MAX_UNBOUND_PER_CPU  = 4,      /* 4 * #cpus for unbound wq */
+       WQ_DFL_ACTIVE           = WQ_MAX_ACTIVE / 2,
+};
+
+/* unbound wq's aren't per-cpu, scale max_active according to #cpus */
+#define WQ_UNBOUND_MAX_ACTIVE  WQ_MAX_ACTIVE
+
+extern struct workqueue_struct *system_wq;
+extern struct workqueue_struct *system_highpri_wq;
+extern struct workqueue_struct *system_long_wq;
+extern struct workqueue_struct *system_unbound_wq;
+extern struct workqueue_struct *system_freezable_wq;
+
+extern struct workqueue_struct *
+alloc_workqueue(const char *fmt, unsigned int flags,
+               int max_active, ...) __printf(1, 4);
+
+#define alloc_ordered_workqueue(fmt, flags, args...)                   \
+       alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
+
+#define create_workqueue(name)                                         \
+       alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
+#define create_freezable_workqueue(name)                               \
+       alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \
+                       WQ_MEM_RECLAIM, 1, (name))
+#define create_singlethread_workqueue(name)                            \
+       alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
+
+extern void destroy_workqueue(struct workqueue_struct *wq);
+
+struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask);
+void free_workqueue_attrs(struct workqueue_attrs *attrs);
+int apply_workqueue_attrs(struct workqueue_struct *wq,
+                         const struct workqueue_attrs *attrs);
+
+extern bool queue_work(struct workqueue_struct *wq,
+                      struct work_struct *work);
+extern bool queue_delayed_work(struct workqueue_struct *wq,
+                       struct delayed_work *work, unsigned long delay);
+extern bool mod_delayed_work(struct workqueue_struct *wq,
+                       struct delayed_work *dwork, unsigned long delay);
+
+extern void flush_workqueue(struct workqueue_struct *wq);
+extern void drain_workqueue(struct workqueue_struct *wq);
+
+extern int schedule_on_each_cpu(work_func_t func);
+
+extern bool flush_work(struct work_struct *work);
+extern bool cancel_work_sync(struct work_struct *work);
+
+extern bool flush_delayed_work(struct delayed_work *dwork);
+extern bool cancel_delayed_work(struct delayed_work *dwork);
+extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
+
+extern void workqueue_set_max_active(struct workqueue_struct *wq,
+                                    int max_active);
+extern bool current_is_workqueue_rescuer(void);
+extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
+extern unsigned int work_busy(struct work_struct *work);
+extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
+extern void print_worker_info(const char *log_lvl, struct task_struct *task);
+extern void show_workqueue_state(void);
+
+static inline bool schedule_work_on(int cpu, struct work_struct *work)
+{
+       return queue_work(system_wq, work);
+}
+
+static inline bool schedule_work(struct work_struct *work)
+{
+       return queue_work(system_wq, work);
+}
+
+static inline void flush_scheduled_work(void)
+{
+       flush_workqueue(system_wq);
+}
+
+static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
+                                           unsigned long delay)
+{
+       return queue_delayed_work(system_wq, dwork, delay);
+}
+
+static inline bool schedule_delayed_work(struct delayed_work *dwork,
+                                        unsigned long delay)
+{
+       return queue_delayed_work(system_wq, dwork, delay);
+}
+
+#endif /* __TOOLS_LINUX_WORKQUEUE_H */
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
new file mode 100644 (file)
index 0000000..fbc1e1f
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+  File: linux/xattr.h
+
+  Extended attributes handling.
+
+  Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
+  Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
+  Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+*/
+#ifndef _LINUX_XATTR_H
+#define _LINUX_XATTR_H
+
+
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <uapi/linux/xattr.h>
+
+#ifndef XATTR_CREATE
+#define XATTR_CREATE   0x1
+#endif
+
+#ifndef XATTR_REPLACE
+#define XATTR_REPLACE  0x2
+#endif
+
+struct inode;
+struct dentry;
+
+/*
+ * struct xattr_handler: When @name is set, match attributes with exactly that
+ * name.  When @prefix is set instead, match attributes with that prefix and
+ * with a non-empty suffix.
+ */
+struct xattr_handler {
+       const char *name;
+       const char *prefix;
+       int flags;      /* fs private flags */
+       bool (*list)(struct dentry *dentry);
+       int (*get)(const struct xattr_handler *, struct dentry *dentry,
+                  struct inode *inode, const char *name, void *buffer,
+                  size_t size);
+       int (*set)(const struct xattr_handler *, struct dentry *dentry,
+                  struct inode *inode, const char *name, const void *buffer,
+                  size_t size, int flags);
+};
+
+const char *xattr_full_name(const struct xattr_handler *, const char *);
+
+struct xattr {
+       const char *name;
+       void *value;
+       size_t value_len;
+};
+
+ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t);
+ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t);
+ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
+int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int);
+int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int);
+int vfs_removexattr(struct dentry *, const char *);
+
+ssize_t generic_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size);
+ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size);
+int generic_setxattr(struct dentry *dentry, struct inode *inode,
+                    const char *name, const void *value, size_t size, int flags);
+int generic_removexattr(struct dentry *dentry, const char *name);
+ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name,
+                          char **xattr_value, size_t size, gfp_t flags);
+
+static inline const char *xattr_prefix(const struct xattr_handler *handler)
+{
+       return handler->prefix ?: handler->name;
+}
+
+#endif /* _LINUX_XATTR_H */
diff --git a/include/linux/zlib.h b/include/linux/zlib.h
new file mode 100644 (file)
index 0000000..45cfbd8
--- /dev/null
@@ -0,0 +1,18 @@
+#ifndef _ZLIB_H
+#define _ZLIB_H
+
+#include <zlib.h>
+
+#define zlib_inflate_workspacesize()           0
+#define zlib_deflate_workspacesize(windowBits, memLevel)       0
+
+#define zlib_inflateInit2      inflateInit2
+#define zlib_inflate           inflate
+
+#define zlib_deflateInit2      deflateInit2
+#define zlib_deflate           deflate
+#define zlib_deflateEnd                deflateEnd
+
+#define DEF_MEM_LEVEL 8
+
+#endif /* _ZLIB_H */
diff --git a/include/linux/zstd.h b/include/linux/zstd.h
new file mode 100644 (file)
index 0000000..0dd1b02
--- /dev/null
@@ -0,0 +1,10 @@
+#include <zstd.h>
+
+#define ZSTD_initDCtx(w, s)    ZSTD_initStaticDCtx(w, s)
+#define ZSTD_initCCtx(w, s)    ZSTD_initStaticCCtx(w, s)
+
+#define ZSTD_compressCCtx(w, dst, d_len, src, src_len, params) \
+       ZSTD_compressCCtx(w, dst, d_len, src, src_len, 0)
+
+#define ZSTD_CCtxWorkspaceBound(p)     ZSTD_estimateCCtxSize(0)
+#define ZSTD_DCtxWorkspaceBound()      ZSTD_estimateDCtxSize()
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h
new file mode 100644 (file)
index 0000000..9b4e829
--- /dev/null
@@ -0,0 +1,664 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bcachefs
+
+#if !defined(_TRACE_BCACHE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BCACHE_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(bpos,
+       TP_PROTO(struct bpos *p),
+       TP_ARGS(p),
+
+       TP_STRUCT__entry(
+               __field(u64,    inode                           )
+               __field(u64,    offset                          )
+       ),
+
+       TP_fast_assign(
+               __entry->inode  = p->inode;
+               __entry->offset = p->offset;
+       ),
+
+       TP_printk("%llu:%llu", __entry->inode, __entry->offset)
+);
+
+DECLARE_EVENT_CLASS(bkey,
+       TP_PROTO(const struct bkey *k),
+       TP_ARGS(k),
+
+       TP_STRUCT__entry(
+               __field(u64,    inode                           )
+               __field(u64,    offset                          )
+               __field(u32,    size                            )
+       ),
+
+       TP_fast_assign(
+               __entry->inode  = k->p.inode;
+               __entry->offset = k->p.offset;
+               __entry->size   = k->size;
+       ),
+
+       TP_printk("%llu:%llu len %u", __entry->inode,
+                 __entry->offset, __entry->size)
+);
+
+DECLARE_EVENT_CLASS(bch_fs,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c),
+
+       TP_STRUCT__entry(
+               __array(char,           uuid,   16 )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
+       ),
+
+       TP_printk("%pU", __entry->uuid)
+);
+
+DECLARE_EVENT_CLASS(bio,
+       TP_PROTO(struct bio *bio),
+       TP_ARGS(bio),
+
+       TP_STRUCT__entry(
+               __field(dev_t,          dev                     )
+               __field(sector_t,       sector                  )
+               __field(unsigned int,   nr_sector               )
+               __array(char,           rwbs,   6               )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_disk ? bio_dev(bio) : 0;
+               __entry->sector         = bio->bi_iter.bi_sector;
+               __entry->nr_sector      = bio->bi_iter.bi_size >> 9;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+       ),
+
+       TP_printk("%d,%d  %s %llu + %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector, __entry->nr_sector)
+);
+
+/* io.c: */
+
+DEFINE_EVENT(bio, read_split,
+       TP_PROTO(struct bio *bio),
+       TP_ARGS(bio)
+);
+
+DEFINE_EVENT(bio, read_bounce,
+       TP_PROTO(struct bio *bio),
+       TP_ARGS(bio)
+);
+
+DEFINE_EVENT(bio, read_retry,
+       TP_PROTO(struct bio *bio),
+       TP_ARGS(bio)
+);
+
+DEFINE_EVENT(bio, promote,
+       TP_PROTO(struct bio *bio),
+       TP_ARGS(bio)
+);
+
+/* Journal */
+
+DEFINE_EVENT(bch_fs, journal_full,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c)
+);
+
+DEFINE_EVENT(bch_fs, journal_entry_full,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c)
+);
+
+DEFINE_EVENT(bio, journal_write,
+       TP_PROTO(struct bio *bio),
+       TP_ARGS(bio)
+);
+
+/* bset.c: */
+
+DEFINE_EVENT(bpos, bkey_pack_pos_fail,
+       TP_PROTO(struct bpos *p),
+       TP_ARGS(p)
+);
+
+/* Btree */
+
+DECLARE_EVENT_CLASS(btree_node,
+       TP_PROTO(struct bch_fs *c, struct btree *b),
+       TP_ARGS(c, b),
+
+       TP_STRUCT__entry(
+               __array(char,           uuid,           16      )
+               __field(u8,             level                   )
+               __field(u8,             id                      )
+               __field(u64,            inode                   )
+               __field(u64,            offset                  )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
+               __entry->level          = b->c.level;
+               __entry->id             = b->c.btree_id;
+               __entry->inode          = b->key.k.p.inode;
+               __entry->offset         = b->key.k.p.offset;
+       ),
+
+       TP_printk("%pU  %u id %u %llu:%llu",
+                 __entry->uuid, __entry->level, __entry->id,
+                 __entry->inode, __entry->offset)
+);
+
+DEFINE_EVENT(btree_node, btree_read,
+       TP_PROTO(struct bch_fs *c, struct btree *b),
+       TP_ARGS(c, b)
+);
+
+TRACE_EVENT(btree_write,
+       TP_PROTO(struct btree *b, unsigned bytes, unsigned sectors),
+       TP_ARGS(b, bytes, sectors),
+
+       TP_STRUCT__entry(
+               __field(enum btree_node_type,   type)
+               __field(unsigned,       bytes                   )
+               __field(unsigned,       sectors                 )
+       ),
+
+       TP_fast_assign(
+               __entry->type   = btree_node_type(b);
+               __entry->bytes  = bytes;
+               __entry->sectors = sectors;
+       ),
+
+       TP_printk("bkey type %u bytes %u sectors %u",
+                 __entry->type , __entry->bytes, __entry->sectors)
+);
+
+DEFINE_EVENT(btree_node, btree_node_alloc,
+       TP_PROTO(struct bch_fs *c, struct btree *b),
+       TP_ARGS(c, b)
+);
+
+DEFINE_EVENT(btree_node, btree_node_free,
+       TP_PROTO(struct bch_fs *c, struct btree *b),
+       TP_ARGS(c, b)
+);
+
+DEFINE_EVENT(btree_node, btree_node_reap,
+       TP_PROTO(struct bch_fs *c, struct btree *b),
+       TP_ARGS(c, b)
+);
+
+DECLARE_EVENT_CLASS(btree_node_cannibalize_lock,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c),
+
+       TP_STRUCT__entry(
+               __array(char,                   uuid,   16      )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
+       ),
+
+       TP_printk("%pU", __entry->uuid)
+);
+
+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock_fail,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c)
+);
+
+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c)
+);
+
+DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c)
+);
+
+DEFINE_EVENT(bch_fs, btree_node_cannibalize_unlock,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c)
+);
+
+TRACE_EVENT(btree_reserve_get_fail,
+       TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl),
+       TP_ARGS(c, required, cl),
+
+       TP_STRUCT__entry(
+               __array(char,                   uuid,   16      )
+               __field(size_t,                 required        )
+               __field(struct closure *,       cl              )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
+               __entry->required = required;
+               __entry->cl = cl;
+       ),
+
+       TP_printk("%pU required %zu by %p", __entry->uuid,
+                 __entry->required, __entry->cl)
+);
+
+TRACE_EVENT(btree_insert_key,
+       TP_PROTO(struct bch_fs *c, struct btree *b, struct bkey_i *k),
+       TP_ARGS(c, b, k),
+
+       TP_STRUCT__entry(
+               __field(u8,             id                      )
+               __field(u64,            inode                   )
+               __field(u64,            offset                  )
+               __field(u32,            size                    )
+       ),
+
+       TP_fast_assign(
+               __entry->id             = b->c.btree_id;
+               __entry->inode          = k->k.p.inode;
+               __entry->offset         = k->k.p.offset;
+               __entry->size           = k->k.size;
+       ),
+
+       TP_printk("btree %u: %llu:%llu len %u", __entry->id,
+                 __entry->inode, __entry->offset, __entry->size)
+);
+
+DEFINE_EVENT(btree_node, btree_split,
+       TP_PROTO(struct bch_fs *c, struct btree *b),
+       TP_ARGS(c, b)
+);
+
+DEFINE_EVENT(btree_node, btree_compact,
+       TP_PROTO(struct bch_fs *c, struct btree *b),
+       TP_ARGS(c, b)
+);
+
+DEFINE_EVENT(btree_node, btree_merge,
+       TP_PROTO(struct bch_fs *c, struct btree *b),
+       TP_ARGS(c, b)
+);
+
+DEFINE_EVENT(btree_node, btree_set_root,
+       TP_PROTO(struct bch_fs *c, struct btree *b),
+       TP_ARGS(c, b)
+);
+
+/* Garbage collection */
+
+DEFINE_EVENT(btree_node, btree_gc_coalesce,
+       TP_PROTO(struct bch_fs *c, struct btree *b),
+       TP_ARGS(c, b)
+);
+
+TRACE_EVENT(btree_gc_coalesce_fail,
+       TP_PROTO(struct bch_fs *c, int reason),
+       TP_ARGS(c, reason),
+
+       TP_STRUCT__entry(
+               __field(u8,             reason                  )
+               __array(char,           uuid,   16              )
+       ),
+
+       TP_fast_assign(
+               __entry->reason         = reason;
+               memcpy(__entry->uuid, c->disk_sb.sb->user_uuid.b, 16);
+       ),
+
+       TP_printk("%pU: %u", __entry->uuid, __entry->reason)
+);
+
+DEFINE_EVENT(btree_node, btree_gc_rewrite_node,
+       TP_PROTO(struct bch_fs *c, struct btree *b),
+       TP_ARGS(c, b)
+);
+
+DEFINE_EVENT(btree_node, btree_gc_rewrite_node_fail,
+       TP_PROTO(struct bch_fs *c, struct btree *b),
+       TP_ARGS(c, b)
+);
+
+DEFINE_EVENT(bch_fs, gc_start,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c)
+);
+
+DEFINE_EVENT(bch_fs, gc_end,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c)
+);
+
+DEFINE_EVENT(bch_fs, gc_coalesce_start,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c)
+);
+
+DEFINE_EVENT(bch_fs, gc_coalesce_end,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c)
+);
+
+DEFINE_EVENT(bch_fs, gc_cannot_inc_gens,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c)
+);
+
+/* Allocator */
+
+TRACE_EVENT(alloc_batch,
+       TP_PROTO(struct bch_dev *ca, size_t free, size_t total),
+       TP_ARGS(ca, free, total),
+
+       TP_STRUCT__entry(
+               __array(char,           uuid,   16      )
+               __field(size_t,         free            )
+               __field(size_t,         total           )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->uuid, ca->uuid.b, 16);
+               __entry->free = free;
+               __entry->total = total;
+       ),
+
+       TP_printk("%pU free %zu total %zu",
+               __entry->uuid, __entry->free, __entry->total)
+);
+
+TRACE_EVENT(invalidate,
+       TP_PROTO(struct bch_dev *ca, u64 offset, unsigned sectors),
+       TP_ARGS(ca, offset, sectors),
+
+       TP_STRUCT__entry(
+               __field(unsigned,       sectors                 )
+               __field(dev_t,          dev                     )
+               __field(__u64,          offset                  )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = ca->disk_sb.bdev->bd_dev;
+               __entry->offset         = offset,
+               __entry->sectors        = sectors;
+       ),
+
+       TP_printk("invalidated %u sectors at %d,%d sector=%llu",
+                 __entry->sectors, MAJOR(__entry->dev),
+                 MINOR(__entry->dev), __entry->offset)
+);
+
+DEFINE_EVENT(bch_fs, rescale_prios,
+       TP_PROTO(struct bch_fs *c),
+       TP_ARGS(c)
+);
+
+DECLARE_EVENT_CLASS(bucket_alloc,
+       TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
+       TP_ARGS(ca, reserve),
+
+       TP_STRUCT__entry(
+               __array(char,                   uuid,   16)
+               __field(enum alloc_reserve,     reserve   )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->uuid, ca->uuid.b, 16);
+               __entry->reserve = reserve;
+       ),
+
+       TP_printk("%pU reserve %d", __entry->uuid, __entry->reserve)
+);
+
+DEFINE_EVENT(bucket_alloc, bucket_alloc,
+       TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
+       TP_ARGS(ca, reserve)
+);
+
+DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
+       TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
+       TP_ARGS(ca, reserve)
+);
+
+DEFINE_EVENT(bucket_alloc, open_bucket_alloc_fail,
+       TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve),
+       TP_ARGS(ca, reserve)
+);
+
+/* Moving IO */
+
+DEFINE_EVENT(bkey, move_extent,
+       TP_PROTO(const struct bkey *k),
+       TP_ARGS(k)
+);
+
+DEFINE_EVENT(bkey, move_alloc_fail,
+       TP_PROTO(const struct bkey *k),
+       TP_ARGS(k)
+);
+
+DEFINE_EVENT(bkey, move_race,
+       TP_PROTO(const struct bkey *k),
+       TP_ARGS(k)
+);
+
+TRACE_EVENT(move_data,
+       TP_PROTO(struct bch_fs *c, u64 sectors_moved,
+                u64 keys_moved),
+       TP_ARGS(c, sectors_moved, keys_moved),
+
+       TP_STRUCT__entry(
+               __array(char,           uuid,   16      )
+               __field(u64,            sectors_moved   )
+               __field(u64,            keys_moved      )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
+               __entry->sectors_moved = sectors_moved;
+               __entry->keys_moved = keys_moved;
+       ),
+
+       TP_printk("%pU sectors_moved %llu keys_moved %llu",
+               __entry->uuid, __entry->sectors_moved, __entry->keys_moved)
+);
+
+TRACE_EVENT(copygc,
+       TP_PROTO(struct bch_fs *c,
+                u64 sectors_moved, u64 sectors_not_moved,
+                u64 buckets_moved, u64 buckets_not_moved),
+       TP_ARGS(c,
+               sectors_moved, sectors_not_moved,
+               buckets_moved, buckets_not_moved),
+
+       TP_STRUCT__entry(
+               __array(char,           uuid,   16              )
+               __field(u64,            sectors_moved           )
+               __field(u64,            sectors_not_moved       )
+               __field(u64,            buckets_moved           )
+               __field(u64,            buckets_not_moved       )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
+               __entry->sectors_moved          = sectors_moved;
+               __entry->sectors_not_moved      = sectors_not_moved;
+               __entry->buckets_moved          = buckets_moved;
+               __entry->buckets_not_moved = buckets_moved;
+       ),
+
+       TP_printk("%pU sectors moved %llu remain %llu buckets moved %llu remain %llu",
+               __entry->uuid,
+               __entry->sectors_moved, __entry->sectors_not_moved,
+               __entry->buckets_moved, __entry->buckets_not_moved)
+);
+
+TRACE_EVENT(transaction_restart_ip,
+       TP_PROTO(unsigned long caller, unsigned long ip),
+       TP_ARGS(caller, ip),
+
+       TP_STRUCT__entry(
+               __field(unsigned long,          caller  )
+               __field(unsigned long,          ip      )
+       ),
+
+       TP_fast_assign(
+               __entry->caller = caller;
+               __entry->ip     = ip;
+       ),
+
+       TP_printk("%pF %pF", (void *) __entry->caller, (void *) __entry->ip)
+);
+
+DECLARE_EVENT_CLASS(transaction_restart,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip),
+
+       TP_STRUCT__entry(
+               __field(unsigned long,          ip      )
+       ),
+
+       TP_fast_assign(
+               __entry->ip = ip;
+       ),
+
+       TP_printk("%pf", (void *) __entry->ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_btree_node_reused,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_would_deadlock,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+TRACE_EVENT(trans_restart_iters_realloced,
+       TP_PROTO(unsigned long ip, unsigned nr),
+       TP_ARGS(ip, nr),
+
+       TP_STRUCT__entry(
+               __field(unsigned long,          ip      )
+               __field(unsigned,               nr      )
+       ),
+
+       TP_fast_assign(
+               __entry->ip     = ip;
+               __entry->nr     = nr;
+       ),
+
+       TP_printk("%pf nr %u", (void *) __entry->ip, __entry->nr)
+);
+
+TRACE_EVENT(trans_restart_mem_realloced,
+       TP_PROTO(unsigned long ip, unsigned long bytes),
+       TP_ARGS(ip, bytes),
+
+       TP_STRUCT__entry(
+               __field(unsigned long,          ip      )
+               __field(unsigned long,          bytes   )
+       ),
+
+       TP_fast_assign(
+               __entry->ip     = ip;
+               __entry->bytes  = bytes;
+       ),
+
+       TP_printk("%pf bytes %lu", (void *) __entry->ip, __entry->bytes)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_journal_res_get,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_journal_preres_get,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_mark_replicas,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_fault_inject,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_btree_node_split,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_mark,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_upgrade,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_iter_upgrade,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_traverse,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_atomic,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
+DECLARE_EVENT_CLASS(node_lock_fail,
+       TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+       TP_ARGS(level, iter_seq, node, node_seq),
+
+       TP_STRUCT__entry(
+               __field(u32,            level)
+               __field(u32,            iter_seq)
+               __field(u32,            node)
+               __field(u32,            node_seq)
+       ),
+
+       TP_fast_assign(
+               __entry->level          = level;
+               __entry->iter_seq       = iter_seq;
+               __entry->node           = node;
+               __entry->node_seq       = node_seq;
+       ),
+
+       TP_printk("level %u iter seq %u node %u node seq %u",
+                 __entry->level, __entry->iter_seq,
+                 __entry->node, __entry->node_seq)
+);
+
+DEFINE_EVENT(node_lock_fail, node_upgrade_fail,
+       TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+       TP_ARGS(level, iter_seq, node, node_seq)
+);
+
+DEFINE_EVENT(node_lock_fail, node_relock_fail,
+       TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+       TP_ARGS(level, iter_seq, node, node_seq)
+);
+
+#endif /* _TRACE_BCACHE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
new file mode 100644 (file)
index 0000000..1590c49
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+  File: linux/xattr.h
+
+  Extended attributes handling.
+
+  Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
+  Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
+  Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+*/
+
+#include <linux/libc-compat.h>
+
+#ifndef _UAPI_LINUX_XATTR_H
+#define _UAPI_LINUX_XATTR_H
+
+#if __UAPI_DEF_XATTR
+#define __USE_KERNEL_XATTR_DEFS
+
+#define XATTR_CREATE   0x1     /* set value, fail if attr already exists */
+#define XATTR_REPLACE  0x2     /* set value, fail if attr does not exist */
+#endif
+
+/* Namespaces */
+#define XATTR_OS2_PREFIX "os2."
+#define XATTR_OS2_PREFIX_LEN (sizeof(XATTR_OS2_PREFIX) - 1)
+
+#define XATTR_MAC_OSX_PREFIX "osx."
+#define XATTR_MAC_OSX_PREFIX_LEN (sizeof(XATTR_MAC_OSX_PREFIX) - 1)
+
+#define XATTR_BTRFS_PREFIX "btrfs."
+#define XATTR_BTRFS_PREFIX_LEN (sizeof(XATTR_BTRFS_PREFIX) - 1)
+
+#define XATTR_SECURITY_PREFIX  "security."
+#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1)
+
+#define XATTR_SYSTEM_PREFIX "system."
+#define XATTR_SYSTEM_PREFIX_LEN (sizeof(XATTR_SYSTEM_PREFIX) - 1)
+
+#define XATTR_TRUSTED_PREFIX "trusted."
+#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1)
+
+#define XATTR_USER_PREFIX "user."
+#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1)
+
+/* Security namespace */
+#define XATTR_EVM_SUFFIX "evm"
+#define XATTR_NAME_EVM XATTR_SECURITY_PREFIX XATTR_EVM_SUFFIX
+
+#define XATTR_IMA_SUFFIX "ima"
+#define XATTR_NAME_IMA XATTR_SECURITY_PREFIX XATTR_IMA_SUFFIX
+
+#define XATTR_SELINUX_SUFFIX "selinux"
+#define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX
+
+#define XATTR_SMACK_SUFFIX "SMACK64"
+#define XATTR_SMACK_IPIN "SMACK64IPIN"
+#define XATTR_SMACK_IPOUT "SMACK64IPOUT"
+#define XATTR_SMACK_EXEC "SMACK64EXEC"
+#define XATTR_SMACK_TRANSMUTE "SMACK64TRANSMUTE"
+#define XATTR_SMACK_MMAP "SMACK64MMAP"
+#define XATTR_NAME_SMACK XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX
+#define XATTR_NAME_SMACKIPIN   XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN
+#define XATTR_NAME_SMACKIPOUT  XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT
+#define XATTR_NAME_SMACKEXEC   XATTR_SECURITY_PREFIX XATTR_SMACK_EXEC
+#define XATTR_NAME_SMACKTRANSMUTE XATTR_SECURITY_PREFIX XATTR_SMACK_TRANSMUTE
+#define XATTR_NAME_SMACKMMAP XATTR_SECURITY_PREFIX XATTR_SMACK_MMAP
+
+#define XATTR_CAPS_SUFFIX "capability"
+#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
+
+#define XATTR_POSIX_ACL_ACCESS  "posix_acl_access"
+#define XATTR_NAME_POSIX_ACL_ACCESS XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_ACCESS
+#define XATTR_POSIX_ACL_DEFAULT  "posix_acl_default"
+#define XATTR_NAME_POSIX_ACL_DEFAULT XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_DEFAULT
+
+
+#endif /* _UAPI_LINUX_XATTR_H */
diff --git a/initramfs/hook b/initramfs/hook
new file mode 100755 (executable)
index 0000000..c172dbd
--- /dev/null
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+PREREQ=""
+
+prereqs()
+{
+    echo "$PREREQ"
+}
+
+case $1 in
+prereqs)
+    prereqs
+    exit 0
+    ;;
+esac
+
+. /usr/share/initramfs-tools/hook-functions
+
+manual_add_modules 'bcachefs'
+
+# chacha20 and poly1305 are used for encrypted bcachefs filesystems.
+add_loaded_modules 'chacha20[-_]*'
+add_loaded_modules 'poly1305[-_]*'
+
+# Add the bcachefs utility to the initramfs
+# Note: make install replaces this with the install path, so it must be last
+#copy_exec /usr/local/sbin/bcachefs /sbin/bcachefs
diff --git a/initramfs/script b/initramfs/script
new file mode 100755 (executable)
index 0000000..e98a623
--- /dev/null
@@ -0,0 +1,99 @@
+#!/bin/sh
+
+PREREQ=""
+
+prereqs()
+{
+    echo "$PREREQ"
+}
+
+case $1 in
+# get pre-requisites
+prereqs)
+    prereqs
+    exit 0
+    ;;
+esac
+
+# Nothing to do if ROOTFSTYPE is set to something other than bcachefs
+if [ -n "$ROOTFSTYPE" -a "$ROOTFSTYPE" != bcachefs ]; then
+    exit 0
+fi
+
+# source for resolve_device() and panic() functions
+. /scripts/functions
+
+#
+# Helper functions
+#
+message()
+{
+    if [ -x /bin/plymouth ] && plymouth --ping; then
+        plymouth message --text="$*"
+    else
+        echo "$*" >&2
+    fi
+}
+
+panic2()
+{
+    # Send the panic message to plymouth
+    if [ -x /bin/plymouth ] && plymouth --ping; then
+        plymouth message --text="$*"
+    fi
+    panic "$@"
+    exit 1
+}
+
+unlock()
+{
+    local msg=$1
+    shift
+
+    if [ -x /bin/plymouth ] && plymouth --ping; then
+        msg=$(plymouth ask-for-password --prompt="$msg" | \
+              bcachefs unlock "$@" 2>&1)
+        # If the unlock failed, send any printed messages to plymouth
+        if [ $? -ne 0 ]; then
+            plymouth message --text="Bcachefs: $msg"
+            return 1
+        fi
+    else
+        # If unlock() is called multiple times, don't re-print the prompt message
+        # unless it has changed
+        if [ "$LAST_UNLOCK_MSG" != "$msg" ]; then
+            echo "$msg" >&2
+            LAST_UNLOCK_MSG=$msg
+        fi
+        bcachefs unlock "$@"
+    fi
+}
+
+# Resolve the root device (e.g. if root is specified by UUID)
+DEV=$(resolve_device "$ROOT")
+
+# Check if the root device needs unlocking:
+if bcachefs unlock -c $DEV >/dev/null 2>&1; then
+    if [ "$DEV" == "$ROOT" ]; then
+        msg="Please unlock $DEV:"
+    else
+        msg="Please unlock $DEV ($ROOT):"
+    fi
+
+    count=0
+    tries=3
+    while [ $tries -le 0 -o $count -lt $tries ]; do
+        if unlock "$msg" "$DEV"; then
+            message "Bcachefs: $DEV successfully unlocked"
+            break
+        fi
+
+        let count++
+    done
+
+    if [ $tries -gt 0 -a $count -ge $tries ]; then
+        panic2 "Bcachefs: maximum number of tries exceeded for $DEV"
+    fi
+fi
+
+exit 0
diff --git a/libbcachefs.c b/libbcachefs.c
new file mode 100644 (file)
index 0000000..7ff02b8
--- /dev/null
@@ -0,0 +1,1166 @@
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <uuid/uuid.h>
+
+#include "libbcachefs.h"
+#include "crypto.h"
+#include "libbcachefs/bcachefs_format.h"
+#include "libbcachefs/btree_cache.h"
+#include "libbcachefs/checksum.h"
+#include "libbcachefs/disk_groups.h"
+#include "libbcachefs/journal_seq_blacklist.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/replicas.h"
+#include "libbcachefs/super-io.h"
+#include "tools-util.h"
+
+#define NSEC_PER_SEC   1000000000L
+
+/* minimum size filesystem we can create, given a bucket size: */
+static u64 min_size(unsigned bucket_size)
+{
+       return BCH_MIN_NR_NBUCKETS * bucket_size;
+}
+
+static void init_layout(struct bch_sb_layout *l, unsigned block_size,
+                       u64 start, u64 end)
+{
+       unsigned sb_size;
+       u64 backup; /* offset of 2nd sb */
+
+       memset(l, 0, sizeof(*l));
+
+       if (start != BCH_SB_SECTOR)
+               start = round_up(start, block_size);
+       end = round_down(end, block_size);
+
+       if (start >= end)
+               die("insufficient space for superblocks");
+
+       /*
+        * Create two superblocks in the allowed range: reserve a maximum of 64k
+        */
+       sb_size = min_t(u64, 128, end - start / 2);
+
+       backup = start + sb_size;
+       backup = round_up(backup, block_size);
+
+       backup = min(backup, end);
+
+       sb_size = min(end - backup, backup- start);
+       sb_size = rounddown_pow_of_two(sb_size);
+
+       if (sb_size < 8)
+               die("insufficient space for superblocks");
+
+       l->magic                = BCACHE_MAGIC;
+       l->layout_type          = 0;
+       l->nr_superblocks       = 2;
+       l->sb_max_size_bits     = ilog2(sb_size);
+       l->sb_offset[0]         = cpu_to_le64(start);
+       l->sb_offset[1]         = cpu_to_le64(backup);
+}
+
+void bch2_pick_bucket_size(struct bch_opts opts, struct dev_opts *dev)
+{
+       if (!dev->sb_offset) {
+               dev->sb_offset  = BCH_SB_SECTOR;
+               dev->sb_end     = BCH_SB_SECTOR + 256;
+       }
+
+       if (!dev->size)
+               dev->size = get_size(dev->path, dev->fd) >> 9;
+
+       if (!dev->bucket_size) {
+               if (dev->size < min_size(opts.block_size))
+                       die("cannot format %s, too small (%llu sectors, min %llu)",
+                           dev->path, dev->size, min_size(opts.block_size));
+
+               /* Bucket size must be >= block size: */
+               dev->bucket_size = opts.block_size;
+
+               /* Bucket size must be >= btree node size: */
+               if (opt_defined(opts, btree_node_size))
+                       dev->bucket_size = max_t(unsigned, dev->bucket_size,
+                                                opts.btree_node_size);
+
+               /* Want a bucket size of at least 128k, if possible: */
+               dev->bucket_size = max(dev->bucket_size, 256U);
+
+               if (dev->size >= min_size(dev->bucket_size)) {
+                       unsigned scale = max(1,
+                                            ilog2(dev->size / min_size(dev->bucket_size)) / 4);
+
+                       scale = rounddown_pow_of_two(scale);
+
+                       /* max bucket size 1 mb */
+                       dev->bucket_size = min(dev->bucket_size * scale, 1U << 11);
+               } else {
+                       do {
+                               dev->bucket_size /= 2;
+                       } while (dev->size < min_size(dev->bucket_size));
+               }
+       }
+
+       dev->nbuckets   = dev->size / dev->bucket_size;
+
+       if (dev->bucket_size < opts.block_size)
+               die("Bucket size cannot be smaller than block size");
+
+       if (opt_defined(opts, btree_node_size) &&
+           dev->bucket_size < opts.btree_node_size)
+               die("Bucket size cannot be smaller than btree node size");
+
+       if (dev->nbuckets < BCH_MIN_NR_NBUCKETS)
+               die("Not enough buckets: %llu, need %u (bucket size %u)",
+                   dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->bucket_size);
+
+}
+
+static unsigned parse_target(struct bch_sb_handle *sb,
+                            struct dev_opts *devs, size_t nr_devs,
+                            const char *s)
+{
+       struct dev_opts *i;
+       int idx;
+
+       if (!s)
+               return 0;
+
+       for (i = devs; i < devs + nr_devs; i++)
+               if (!strcmp(s, i->path))
+                       return dev_to_target(i - devs);
+
+       idx = bch2_disk_path_find(sb, s);
+       if (idx >= 0)
+               return group_to_target(idx);
+
+       die("Invalid target %s", s);
+       return 0;
+}
+
+struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
+                          struct bch_opts      fs_opts,
+                          struct format_opts   opts,
+                          struct dev_opts      *devs,
+                          size_t               nr_devs)
+{
+       struct bch_sb_handle sb = { NULL };
+       struct dev_opts *i;
+       struct bch_sb_field_members *mi;
+       unsigned max_dev_block_size = 0;
+       unsigned opt_id;
+
+       for (i = devs; i < devs + nr_devs; i++)
+               max_dev_block_size = max(max_dev_block_size,
+                                        get_blocksize(i->path, i->fd));
+
+       /* calculate block size: */
+       if (!opt_defined(fs_opts, block_size)) {
+               opt_set(fs_opts, block_size, max_dev_block_size);
+       } else if (fs_opts.block_size < max_dev_block_size)
+               die("blocksize too small: %u, must be greater than device blocksize %u",
+                   fs_opts.block_size, max_dev_block_size);
+
+       /* calculate bucket sizes: */
+       for (i = devs; i < devs + nr_devs; i++)
+               bch2_pick_bucket_size(fs_opts, i);
+
+       /* calculate btree node size: */
+       if (!opt_defined(fs_opts, btree_node_size)) {
+               /* 256k default btree node size */
+               opt_set(fs_opts, btree_node_size, 512);
+
+               for (i = devs; i < devs + nr_devs; i++)
+                       fs_opts.btree_node_size =
+                               min_t(unsigned, fs_opts.btree_node_size,
+                                     i->bucket_size);
+       }
+
+       if (!is_power_of_2(fs_opts.block_size))
+               die("block size must be power of 2");
+
+       if (!is_power_of_2(fs_opts.btree_node_size))
+               die("btree node size must be power of 2");
+
+       if (uuid_is_null(opts.uuid.b))
+               uuid_generate(opts.uuid.b);
+
+       if (bch2_sb_realloc(&sb, 0))
+               die("insufficient memory");
+
+       sb.sb->version          = le16_to_cpu(bcachefs_metadata_version_current);
+       sb.sb->version_min      = le16_to_cpu(bcachefs_metadata_version_current);
+       sb.sb->magic            = BCACHE_MAGIC;
+       sb.sb->block_size       = cpu_to_le16(fs_opts.block_size);
+       sb.sb->user_uuid        = opts.uuid;
+       sb.sb->nr_devices       = nr_devs;
+
+       uuid_generate(sb.sb->uuid.b);
+
+       if (opts.label)
+               memcpy(sb.sb->label,
+                      opts.label,
+                      min(strlen(opts.label), sizeof(sb.sb->label)));
+
+       for (opt_id = 0;
+            opt_id < bch2_opts_nr;
+            opt_id++) {
+               const struct bch_option *opt = &bch2_opt_table[opt_id];
+               u64 v;
+
+               if (opt->set_sb == SET_NO_SB_OPT)
+                       continue;
+
+               v = bch2_opt_defined_by_id(&fs_opts, opt_id)
+                       ? bch2_opt_get_by_id(&fs_opts, opt_id)
+                       : bch2_opt_get_by_id(&bch2_opts_default, opt_id);
+
+               opt->set_sb(sb.sb, v);
+       }
+
+       SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb.sb,
+                               ilog2(opts.encoded_extent_max));
+
+       struct timespec now;
+       if (clock_gettime(CLOCK_REALTIME, &now))
+               die("error getting current time: %m");
+
+       sb.sb->time_base_lo     = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec);
+       sb.sb->time_precision   = cpu_to_le32(1);
+
+       /* Member info: */
+       mi = bch2_sb_resize_members(&sb,
+                       (sizeof(*mi) + sizeof(struct bch_member) *
+                        nr_devs) / sizeof(u64));
+
+       for (i = devs; i < devs + nr_devs; i++) {
+               struct bch_member *m = mi->members + (i - devs);
+
+               uuid_generate(m->uuid.b);
+               m->nbuckets     = cpu_to_le64(i->nbuckets);
+               m->first_bucket = 0;
+               m->bucket_size  = cpu_to_le16(i->bucket_size);
+
+               SET_BCH_MEMBER_REPLACEMENT(m,   CACHE_REPLACEMENT_LRU);
+               SET_BCH_MEMBER_DISCARD(m,       i->discard);
+               SET_BCH_MEMBER_DATA_ALLOWED(m,  i->data_allowed);
+               SET_BCH_MEMBER_DURABILITY(m,    i->durability + 1);
+       }
+
+       /* Disk groups */
+       for (i = devs; i < devs + nr_devs; i++) {
+               struct bch_member *m = mi->members + (i - devs);
+               int idx;
+
+               if (!i->group)
+                       continue;
+
+               idx = bch2_disk_path_find_or_create(&sb, i->group);
+               if (idx < 0)
+                       die("error creating disk path: %s", idx);
+
+               SET_BCH_MEMBER_GROUP(m, idx + 1);
+       }
+
+       SET_BCH_SB_FOREGROUND_TARGET(sb.sb,
+               parse_target(&sb, devs, nr_devs, fs_opt_strs.foreground_target));
+       SET_BCH_SB_BACKGROUND_TARGET(sb.sb,
+               parse_target(&sb, devs, nr_devs, fs_opt_strs.background_target));
+       SET_BCH_SB_PROMOTE_TARGET(sb.sb,
+               parse_target(&sb, devs, nr_devs, fs_opt_strs.promote_target));
+
+       /* Crypt: */
+       if (opts.encrypted) {
+               struct bch_sb_field_crypt *crypt =
+                       bch2_sb_resize_crypt(&sb, sizeof(*crypt) / sizeof(u64));
+
+               bch_sb_crypt_init(sb.sb, crypt, opts.passphrase);
+               SET_BCH_SB_ENCRYPTION_TYPE(sb.sb, 1);
+       }
+
+       for (i = devs; i < devs + nr_devs; i++) {
+               sb.sb->dev_idx = i - devs;
+
+               init_layout(&sb.sb->layout, fs_opts.block_size,
+                           i->sb_offset, i->sb_end);
+
+               if (i->sb_offset == BCH_SB_SECTOR) {
+                       /* Zero start of disk */
+                       static const char zeroes[BCH_SB_SECTOR << 9];
+
+                       xpwrite(i->fd, zeroes, BCH_SB_SECTOR << 9, 0);
+               }
+
+               bch2_super_write(i->fd, sb.sb);
+               close(i->fd);
+       }
+
+       return sb.sb;
+}
+
+void bch2_super_write(int fd, struct bch_sb *sb)
+{
+       struct nonce nonce = { 0 };
+
+       unsigned i;
+       for (i = 0; i < sb->layout.nr_superblocks; i++) {
+               sb->offset = sb->layout.sb_offset[i];
+
+               if (sb->offset == BCH_SB_SECTOR) {
+                       /* Write backup layout */
+                       xpwrite(fd, &sb->layout, sizeof(sb->layout),
+                               BCH_SB_LAYOUT_SECTOR << 9);
+               }
+
+               sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb), nonce, sb);
+               xpwrite(fd, sb, vstruct_bytes(sb),
+                       le64_to_cpu(sb->offset) << 9);
+       }
+
+       fsync(fd);
+}
+
+struct bch_sb *__bch2_super_read(int fd, u64 sector)
+{
+       struct bch_sb sb, *ret;
+
+       xpread(fd, &sb, sizeof(sb), sector << 9);
+
+       if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
+               die("not a bcachefs superblock");
+
+       size_t bytes = vstruct_bytes(&sb);
+
+       ret = malloc(bytes);
+
+       xpread(fd, ret, bytes, sector << 9);
+
+       return ret;
+}
+
+static unsigned get_dev_has_data(struct bch_sb *sb, unsigned dev)
+{
+       struct bch_sb_field_replicas *replicas;
+       struct bch_replicas_entry *r;
+       unsigned i, data_has = 0;
+
+       replicas = bch2_sb_get_replicas(sb);
+
+       if (replicas)
+               for_each_replicas_entry(replicas, r)
+                       for (i = 0; i < r->nr_devs; i++)
+                               if (r->devs[i] == dev)
+                                       data_has |= 1 << r->data_type;
+
+       return data_has;
+}
+
+static int bch2_sb_get_target(struct bch_sb *sb, char *buf, size_t len, u64 v)
+{
+       struct target t = target_decode(v);
+       int ret;
+
+       switch (t.type) {
+       case TARGET_NULL:
+               return scnprintf(buf, len, "none");
+       case TARGET_DEV: {
+               struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
+               struct bch_member *m = mi->members + t.dev;
+
+               if (bch2_dev_exists(sb, mi, t.dev)) {
+                       char uuid_str[40];
+
+                       uuid_unparse(m->uuid.b, uuid_str);
+
+                       ret = scnprintf(buf, len, "Device %u (%s)", t.dev,
+                               uuid_str);
+               } else {
+                       ret = scnprintf(buf, len, "Bad device %u", t.dev);
+               }
+
+               break;
+       }
+       case TARGET_GROUP: {
+               struct bch_sb_field_disk_groups *gi;
+               gi = bch2_sb_get_disk_groups(sb);
+
+               struct bch_disk_group *g = gi->entries + t.group;
+
+               if (t.group < disk_groups_nr(gi) && !BCH_GROUP_DELETED(g)) {
+                       ret = scnprintf(buf, len, "Group %u (%.*s)", t.group,
+                               BCH_SB_LABEL_SIZE, g->label);
+               } else {
+                       ret = scnprintf(buf, len, "Bad group %u", t.group);
+               }
+               break;
+       }
+       default:
+               BUG();
+       }
+
+       return ret;
+}
+
+/* superblock printing: */
+
+static void bch2_sb_print_layout(struct bch_sb *sb, enum units units)
+{
+       struct bch_sb_layout *l = &sb->layout;
+       unsigned i;
+
+       printf("  type:                         %u\n"
+              "  superblock max size:          %s\n"
+              "  nr superblocks:               %u\n"
+              "  Offsets:                      ",
+              l->layout_type,
+              pr_units(1 << l->sb_max_size_bits, units),
+              l->nr_superblocks);
+
+       for (i = 0; i < l->nr_superblocks; i++) {
+               if (i)
+                       printf(", ");
+               printf("%llu", le64_to_cpu(l->sb_offset[i]));
+       }
+       putchar('\n');
+}
+
+static void bch2_sb_print_journal(struct bch_sb *sb, struct bch_sb_field *f,
+                                 enum units units)
+{
+       struct bch_sb_field_journal *journal = field_to_type(f, journal);
+       unsigned i, nr = bch2_nr_journal_buckets(journal);
+
+       printf("  Buckets:                      ");
+       for (i = 0; i < nr; i++) {
+               if (i)
+                       putchar(' ');
+               printf("%llu", le64_to_cpu(journal->buckets[i]));
+       }
+       putchar('\n');
+}
+
+static void bch2_sb_print_members(struct bch_sb *sb, struct bch_sb_field *f,
+                                 enum units units)
+{
+       struct bch_sb_field_members *mi = field_to_type(f, members);
+       struct bch_sb_field_disk_groups *gi = bch2_sb_get_disk_groups(sb);
+       unsigned i;
+
+       for (i = 0; i < sb->nr_devices; i++) {
+               struct bch_member *m = mi->members + i;
+               time_t last_mount = le64_to_cpu(m->last_mount);
+               char member_uuid_str[40];
+               char data_allowed_str[100];
+               char data_has_str[100];
+               char group[BCH_SB_LABEL_SIZE+10];
+               char time_str[64];
+
+               if (!bch2_member_exists(m))
+                       continue;
+
+               uuid_unparse(m->uuid.b, member_uuid_str);
+
+               if (BCH_MEMBER_GROUP(m)) {
+                       unsigned idx = BCH_MEMBER_GROUP(m) - 1;
+
+                       if (idx < disk_groups_nr(gi)) {
+                               snprintf(group, sizeof(group), "%.*s (%u)",
+                                       BCH_SB_LABEL_SIZE,
+                                       gi->entries[idx].label, idx);
+                       } else {
+                               strcpy(group, "(bad disk groups section)");
+                       }
+               } else {
+                       strcpy(group, "(none)");
+               }
+
+               bch2_flags_to_text(&PBUF(data_allowed_str),
+                                  bch2_data_types,
+                                  BCH_MEMBER_DATA_ALLOWED(m));
+               if (!data_allowed_str[0])
+                       strcpy(data_allowed_str, "(none)");
+
+               bch2_flags_to_text(&PBUF(data_has_str),
+                                  bch2_data_types,
+                                  get_dev_has_data(sb, i));
+               if (!data_has_str[0])
+                       strcpy(data_has_str, "(none)");
+
+               if (last_mount) {
+                       struct tm *tm = localtime(&last_mount);
+                       size_t err = strftime(time_str, sizeof(time_str), "%c", tm);
+                       if (!err)
+                               strcpy(time_str, "(formatting error)");
+               } else {
+                       strcpy(time_str, "(never)");
+               }
+
+               printf("  Device %u:\n"
+                      "    UUID:                       %s\n"
+                      "    Size:                       %s\n"
+                      "    Bucket size:                %s\n"
+                      "    First bucket:               %u\n"
+                      "    Buckets:                    %llu\n"
+                      "    Last mount:                 %s\n"
+                      "    State:                      %s\n"
+                      "    Group:                      %s\n"
+                      "    Data allowed:               %s\n"
+
+                      "    Has data:                   %s\n"
+
+                      "    Replacement policy:         %s\n"
+                      "    Discard:                    %llu\n",
+                      i, member_uuid_str,
+                      pr_units(le16_to_cpu(m->bucket_size) *
+                               le64_to_cpu(m->nbuckets), units),
+                      pr_units(le16_to_cpu(m->bucket_size), units),
+                      le16_to_cpu(m->first_bucket),
+                      le64_to_cpu(m->nbuckets),
+                      time_str,
+
+                      BCH_MEMBER_STATE(m) < BCH_MEMBER_STATE_NR
+                      ? bch2_dev_state[BCH_MEMBER_STATE(m)]
+                      : "unknown",
+
+                      group,
+                      data_allowed_str,
+                      data_has_str,
+
+                      BCH_MEMBER_REPLACEMENT(m) < CACHE_REPLACEMENT_NR
+                      ? bch2_cache_replacement_policies[BCH_MEMBER_REPLACEMENT(m)]
+                      : "unknown",
+
+                      BCH_MEMBER_DISCARD(m));
+       }
+}
+
+static void bch2_sb_print_crypt(struct bch_sb *sb, struct bch_sb_field *f,
+                               enum units units)
+{
+       struct bch_sb_field_crypt *crypt = field_to_type(f, crypt);
+
+       printf("  KFD:                  %llu\n"
+              "  scrypt n:             %llu\n"
+              "  scrypt r:             %llu\n"
+              "  scrypt p:             %llu\n",
+              BCH_CRYPT_KDF_TYPE(crypt),
+              BCH_KDF_SCRYPT_N(crypt),
+              BCH_KDF_SCRYPT_R(crypt),
+              BCH_KDF_SCRYPT_P(crypt));
+}
+
+static void bch2_sb_print_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f,
+                                  enum units units)
+{
+       struct bch_sb_field_replicas_v0 *replicas = field_to_type(f, replicas_v0);
+       struct bch_replicas_entry_v0 *e;
+       unsigned i;
+
+       for_each_replicas_entry(replicas, e) {
+               printf_pad(32, "  %s:", bch2_data_types[e->data_type]);
+
+               putchar('[');
+               for (i = 0; i < e->nr_devs; i++) {
+                       if (i)
+                               putchar(' ');
+                       printf("%u", e->devs[i]);
+               }
+               printf("]\n");
+       }
+}
+
+static void bch2_sb_print_replicas(struct bch_sb *sb, struct bch_sb_field *f,
+                                  enum units units)
+{
+       struct bch_sb_field_replicas *replicas = field_to_type(f, replicas);
+       struct bch_replicas_entry *e;
+       unsigned i;
+
+       for_each_replicas_entry(replicas, e) {
+               printf_pad(32, "  %s: %u/%u",
+                          bch2_data_types[e->data_type],
+                          e->nr_required,
+                          e->nr_devs);
+
+               putchar('[');
+               for (i = 0; i < e->nr_devs; i++) {
+                       if (i)
+                               putchar(' ');
+                       printf("%u", e->devs[i]);
+               }
+               printf("]\n");
+       }
+}
+
+static void bch2_sb_print_quota(struct bch_sb *sb, struct bch_sb_field *f,
+                               enum units units)
+{
+}
+
+static void bch2_sb_print_disk_groups(struct bch_sb *sb, struct bch_sb_field *f,
+                                     enum units units)
+{
+}
+
+static void bch2_sb_print_clean(struct bch_sb *sb, struct bch_sb_field *f,
+                               enum units units)
+{
+}
+
+static void bch2_sb_print_journal_seq_blacklist(struct bch_sb *sb, struct bch_sb_field *f,
+                               enum units units)
+{
+       struct bch_sb_field_journal_seq_blacklist *bl = field_to_type(f, journal_seq_blacklist);
+       unsigned i, nr = blacklist_nr_entries(bl);
+
+       for (i = 0; i < nr; i++) {
+               struct journal_seq_blacklist_entry *e =
+                       bl->start + i;
+
+               printf("  %llu-%llu\n",
+                      le64_to_cpu(e->start),
+                      le64_to_cpu(e->end));
+       }
+}
+
+typedef void (*sb_field_print_fn)(struct bch_sb *, struct bch_sb_field *, enum units);
+
+struct bch_sb_field_toolops {
+       sb_field_print_fn       print;
+};
+
+static const struct bch_sb_field_toolops bch2_sb_field_ops[] = {
+#define x(f, nr)                                       \
+       [BCH_SB_FIELD_##f] = {                          \
+               .print  = bch2_sb_print_##f,            \
+       },
+       BCH_SB_FIELDS()
+#undef x
+};
+
+static inline void bch2_sb_field_print(struct bch_sb *sb,
+                                      struct bch_sb_field *f,
+                                      enum units units)
+{
+       unsigned type = le32_to_cpu(f->type);
+
+       if (type < BCH_SB_FIELD_NR)
+               bch2_sb_field_ops[type].print(sb, f, units);
+       else
+               printf("(unknown field %u)\n", type);
+}
+
+void bch2_sb_print(struct bch_sb *sb, bool print_layout,
+                  unsigned fields, enum units units)
+{
+       struct bch_sb_field_members *mi;
+       char user_uuid_str[40], internal_uuid_str[40];
+       char features_str[200];
+       char fields_have_str[200];
+       char label[BCH_SB_LABEL_SIZE + 1];
+       char time_str[64];
+       char foreground_str[64];
+       char background_str[64];
+       char promote_str[64];
+       struct bch_sb_field *f;
+       u64 fields_have = 0;
+       unsigned nr_devices = 0;
+       time_t time_base = le64_to_cpu(sb->time_base_lo) / NSEC_PER_SEC;
+
+       memcpy(label, sb->label, BCH_SB_LABEL_SIZE);
+       label[BCH_SB_LABEL_SIZE] = '\0';
+
+       uuid_unparse(sb->user_uuid.b, user_uuid_str);
+       uuid_unparse(sb->uuid.b, internal_uuid_str);
+
+       if (time_base) {
+               struct tm *tm = localtime(&time_base);
+               size_t err = strftime(time_str, sizeof(time_str), "%c", tm);
+               if (!err)
+                       strcpy(time_str, "(formatting error)");
+       } else {
+               strcpy(time_str, "(not set)");
+       }
+
+       mi = bch2_sb_get_members(sb);
+       if (mi) {
+               struct bch_member *m;
+
+               for (m = mi->members;
+                    m < mi->members + sb->nr_devices;
+                    m++)
+                       nr_devices += bch2_member_exists(m);
+       }
+
+       bch2_sb_get_target(sb, foreground_str, sizeof(foreground_str),
+               BCH_SB_FOREGROUND_TARGET(sb));
+
+       bch2_sb_get_target(sb, background_str, sizeof(background_str),
+               BCH_SB_BACKGROUND_TARGET(sb));
+
+       bch2_sb_get_target(sb, promote_str, sizeof(promote_str),
+               BCH_SB_PROMOTE_TARGET(sb));
+
+       bch2_flags_to_text(&PBUF(features_str),
+                          bch2_sb_features,
+                          le64_to_cpu(sb->features[0]));
+
+       vstruct_for_each(sb, f)
+               fields_have |= 1 << le32_to_cpu(f->type);
+       bch2_flags_to_text(&PBUF(fields_have_str),
+                          bch2_sb_fields, fields_have);
+
+       printf("External UUID:                  %s\n"
+              "Internal UUID:                  %s\n"
+              "Label:                          %s\n"
+              "Version:                        %llu\n"
+              "Created:                        %s\n"
+              "Squence number:                 %llu\n"
+              "Block_size:                     %s\n"
+              "Btree node size:                %s\n"
+              "Error action:                   %s\n"
+              "Clean:                          %llu\n"
+              "Features:                       %s\n"
+
+              "Metadata replicas:              %llu\n"
+              "Data replicas:                  %llu\n"
+
+              "Metadata checksum type:         %s (%llu)\n"
+              "Data checksum type:             %s (%llu)\n"
+              "Compression type:               %s (%llu)\n"
+
+              "Foreground write target:        %s\n"
+              "Background write target:        %s\n"
+              "Promote target:                 %s\n"
+
+              "String hash type:               %s (%llu)\n"
+              "32 bit inodes:                  %llu\n"
+              "GC reserve percentage:          %llu%%\n"
+              "Root reserve percentage:        %llu%%\n"
+
+              "Devices:                        %u live, %u total\n"
+              "Sections:                       %s\n"
+              "Superblock size:                %llu\n",
+              user_uuid_str,
+              internal_uuid_str,
+              label,
+              le64_to_cpu(sb->version),
+              time_str,
+              le64_to_cpu(sb->seq),
+              pr_units(le16_to_cpu(sb->block_size), units),
+              pr_units(BCH_SB_BTREE_NODE_SIZE(sb), units),
+
+              BCH_SB_ERROR_ACTION(sb) < BCH_NR_ERROR_ACTIONS
+              ? bch2_error_actions[BCH_SB_ERROR_ACTION(sb)]
+              : "unknown",
+
+              BCH_SB_CLEAN(sb),
+              features_str,
+
+              BCH_SB_META_REPLICAS_WANT(sb),
+              BCH_SB_DATA_REPLICAS_WANT(sb),
+
+              BCH_SB_META_CSUM_TYPE(sb) < BCH_CSUM_OPT_NR
+              ? bch2_csum_opts[BCH_SB_META_CSUM_TYPE(sb)]
+              : "unknown",
+              BCH_SB_META_CSUM_TYPE(sb),
+
+              BCH_SB_DATA_CSUM_TYPE(sb) < BCH_CSUM_OPT_NR
+              ? bch2_csum_opts[BCH_SB_DATA_CSUM_TYPE(sb)]
+              : "unknown",
+              BCH_SB_DATA_CSUM_TYPE(sb),
+
+              BCH_SB_COMPRESSION_TYPE(sb) < BCH_COMPRESSION_OPT_NR
+              ? bch2_compression_opts[BCH_SB_COMPRESSION_TYPE(sb)]
+              : "unknown",
+              BCH_SB_COMPRESSION_TYPE(sb),
+
+              foreground_str,
+              background_str,
+              promote_str,
+
+              BCH_SB_STR_HASH_TYPE(sb) < BCH_STR_HASH_NR
+              ? bch2_str_hash_types[BCH_SB_STR_HASH_TYPE(sb)]
+              : "unknown",
+              BCH_SB_STR_HASH_TYPE(sb),
+
+              BCH_SB_INODE_32BIT(sb),
+              BCH_SB_GC_RESERVE(sb),
+              BCH_SB_ROOT_RESERVE(sb),
+
+              nr_devices, sb->nr_devices,
+              fields_have_str,
+              vstruct_bytes(sb));
+
+       if (print_layout) {
+               printf("\n"
+                      "Layout:\n");
+               bch2_sb_print_layout(sb, units);
+       }
+
+       vstruct_for_each(sb, f) {
+               unsigned type = le32_to_cpu(f->type);
+               char name[60];
+
+               if (!(fields & (1 << type)))
+                       continue;
+
+               if (type < BCH_SB_FIELD_NR) {
+                       scnprintf(name, sizeof(name), "%s", bch2_sb_fields[type]);
+                       name[0] = toupper(name[0]);
+               } else {
+                       scnprintf(name, sizeof(name), "(unknown field %u)", type);
+               }
+
+               printf("\n%s (size %llu):\n", name, vstruct_bytes(f));
+               if (type < BCH_SB_FIELD_NR)
+                       bch2_sb_field_print(sb, f, units);
+       }
+}
+
+/* ioctl interface: */
+
+/* Global control device: */
+int bcachectl_open(void)
+{
+       return xopen("/dev/bcachefs-ctl", O_RDWR);
+}
+
+/* Filesystem handles (ioctl, sysfs dir): */
+
+#define SYSFS_BASE "/sys/fs/bcachefs/"
+
+void bcache_fs_close(struct bchfs_handle fs)
+{
+       close(fs.ioctl_fd);
+       close(fs.sysfs_fd);
+}
+
+struct bchfs_handle bcache_fs_open(const char *path)
+{
+       struct bchfs_handle ret;
+
+       if (!uuid_parse(path, ret.uuid.b)) {
+               /* It's a UUID, look it up in sysfs: */
+               char *sysfs = mprintf(SYSFS_BASE "%s", path);
+               ret.sysfs_fd = xopen(sysfs, O_RDONLY);
+
+               char *minor = read_file_str(ret.sysfs_fd, "minor");
+               char *ctl = mprintf("/dev/bcachefs%s-ctl", minor);
+               ret.ioctl_fd = xopen(ctl, O_RDWR);
+
+               free(sysfs);
+               free(minor);
+               free(ctl);
+       } else {
+               /* It's a path: */
+               ret.ioctl_fd = xopen(path, O_RDONLY);
+
+               struct bch_ioctl_query_uuid uuid;
+               if (ioctl(ret.ioctl_fd, BCH_IOCTL_QUERY_UUID, &uuid) < 0)
+                       die("error opening %s: not a bcachefs filesystem", path);
+
+               ret.uuid = uuid.uuid;
+
+               char uuid_str[40];
+               uuid_unparse(uuid.uuid.b, uuid_str);
+
+               char *sysfs = mprintf(SYSFS_BASE "%s", uuid_str);
+               ret.sysfs_fd = xopen(sysfs, O_RDONLY);
+               free(sysfs);
+       }
+
+       return ret;
+}
+
+/*
+ * Given a path to a block device, open the filesystem it belongs to; also
+ * return the device's idx:
+ */
+struct bchfs_handle bchu_fs_open_by_dev(const char *path, unsigned *idx)
+{
+       char buf[1024], *uuid_str;
+
+       struct stat stat = xstat(path);
+
+       if (!S_ISBLK(stat.st_mode))
+               die("%s is not a block device", path);
+
+       char *sysfs = mprintf("/sys/dev/block/%u:%u/bcachefs",
+                             major(stat.st_dev),
+                             minor(stat.st_dev));
+       ssize_t len = readlink(sysfs, buf, sizeof(buf));
+       free(sysfs);
+
+       if (len > 0) {
+               char *p = strrchr(buf, '/');
+               if (!p || sscanf(p + 1, "dev-%u", idx) != 1)
+                       die("error parsing sysfs");
+
+               *p = '\0';
+               p = strrchr(buf, '/');
+               uuid_str = p + 1;
+       } else {
+               struct bch_opts opts = bch2_opts_empty();
+
+               opt_set(opts, noexcl,   true);
+               opt_set(opts, nochanges, true);
+
+               struct bch_sb_handle sb;
+               int ret = bch2_read_super(path, &opts, &sb);
+               if (ret)
+                       die("Error opening %s: %s", path, strerror(-ret));
+
+               *idx = sb.sb->dev_idx;
+               uuid_str = buf;
+               uuid_unparse(sb.sb->user_uuid.b, uuid_str);
+
+               bch2_free_super(&sb);
+       }
+
+       return bcache_fs_open(uuid_str);
+}
+
+int bchu_data(struct bchfs_handle fs, struct bch_ioctl_data cmd)
+{
+       int progress_fd = xioctl(fs.ioctl_fd, BCH_IOCTL_DATA, &cmd);
+
+       while (1) {
+               struct bch_ioctl_data_event e;
+
+               if (read(progress_fd, &e, sizeof(e)) != sizeof(e))
+                       die("error reading from progress fd %m");
+
+               if (e.type)
+                       continue;
+
+               if (e.p.data_type == U8_MAX)
+                       break;
+
+               printf("\33[2K\r");
+
+               printf("%llu%% complete: current position %s",
+                      e.p.sectors_total
+                      ? e.p.sectors_done * 100 / e.p.sectors_total
+                      : 0,
+                      bch2_data_types[e.p.data_type]);
+
+               switch (e.p.data_type) {
+               case BCH_DATA_btree:
+               case BCH_DATA_user:
+                       printf(" %s:%llu:%llu",
+                              bch2_btree_ids[e.p.btree_id],
+                              e.p.pos.inode,
+                              e.p.pos.offset);
+               }
+
+               fflush(stdout);
+               sleep(1);
+       }
+       printf("\nDone\n");
+
+       close(progress_fd);
+       return 0;
+}
+
+/* option parsing */
+
+struct bch_opt_strs bch2_cmdline_opts_get(int *argc, char *argv[],
+                                         unsigned opt_types)
+{
+       struct bch_opt_strs opts;
+       unsigned i = 1;
+
+       memset(&opts, 0, sizeof(opts));
+
+       while (i < *argc) {
+               char *optstr = strcmp_prefix(argv[i], "--");
+               char *valstr = NULL, *p;
+               int optid, nr_args = 1;
+
+               if (!optstr) {
+                       i++;
+                       continue;
+               }
+
+               optstr = strdup(optstr);
+
+               p = optstr;
+               while (isalpha(*p) || *p == '_')
+                       p++;
+
+               if (*p == '=') {
+                       *p = '\0';
+                       valstr = p + 1;
+               }
+
+               optid = bch2_opt_lookup(optstr);
+               if (optid < 0 ||
+                   !(bch2_opt_table[optid].mode & opt_types)) {
+                       free(optstr);
+                       i++;
+                       continue;
+               }
+
+               if (!valstr &&
+                   bch2_opt_table[optid].type != BCH_OPT_BOOL) {
+                       nr_args = 2;
+                       valstr = argv[i + 1];
+               }
+
+               if (!valstr)
+                       valstr = "1";
+
+               opts.by_id[optid] = valstr;
+
+               *argc -= nr_args;
+               memmove(&argv[i],
+                       &argv[i + nr_args],
+                       sizeof(char *) * (*argc - i));
+               argv[*argc] = NULL;
+       }
+
+       return opts;
+}
+
+struct bch_opts bch2_parse_opts(struct bch_opt_strs strs)
+{
+       struct bch_opts opts = bch2_opts_empty();
+       unsigned i;
+       int ret;
+       u64 v;
+
+       for (i = 0; i < bch2_opts_nr; i++) {
+               if (!strs.by_id[i] ||
+                   bch2_opt_table[i].type == BCH_OPT_FN)
+                       continue;
+
+               ret = bch2_opt_parse(NULL, &bch2_opt_table[i],
+                                    strs.by_id[i], &v);
+               if (ret < 0)
+                       die("Invalid %s: %s",
+                           bch2_opt_table[i].attr.name,
+                           strerror(-ret));
+
+               bch2_opt_set_by_id(&opts, i, v);
+       }
+
+       return opts;
+}
+
+void bch2_opts_usage(unsigned opt_types)
+{
+       const struct bch_option *opt;
+       unsigned i, c = 0, helpcol = 30;
+
+       void tabalign() {
+               while (c < helpcol) {
+                       putchar(' ');
+                       c++;
+               }
+       }
+
+       void newline() {
+               printf("\n");
+               c = 0;
+       }
+
+       for (opt = bch2_opt_table;
+            opt < bch2_opt_table + bch2_opts_nr;
+            opt++) {
+               if (!(opt->mode & opt_types))
+                       continue;
+
+               c += printf("      --%s", opt->attr.name);
+
+               switch (opt->type) {
+               case BCH_OPT_BOOL:
+                       break;
+               case BCH_OPT_STR:
+                       c += printf("=(");
+                       for (i = 0; opt->choices[i]; i++) {
+                               if (i)
+                                       c += printf("|");
+                               c += printf("%s", opt->choices[i]);
+                       }
+                       c += printf(")");
+                       break;
+               default:
+                       c += printf("=%s", opt->hint);
+                       break;
+               }
+
+               if (opt->help) {
+                       const char *l = opt->help;
+
+                       if (c >= helpcol)
+                               newline();
+
+                       while (1) {
+                               const char *n = strchrnul(l, '\n');
+
+                               tabalign();
+                               printf("%.*s", (int) (n - l), l);
+                               newline();
+
+                               if (!*n)
+                                       break;
+                               l = n + 1;
+                       }
+               } else {
+                       newline();
+               }
+       }
+}
+
+dev_names bchu_fs_get_devices(struct bchfs_handle fs)
+{
+       DIR *dir = fdopendir(fs.sysfs_fd);
+       struct dirent *d;
+       dev_names devs;
+
+       darray_init(devs);
+
+       while ((errno = 0), (d = readdir(dir))) {
+               struct dev_name n = { 0, NULL, NULL };
+
+               if (sscanf(d->d_name, "dev-%u", &n.idx) != 1)
+                       continue;
+
+               char *block_attr = mprintf("dev-%u/block", n.idx);
+
+               char sysfs_block_buf[4096];
+               ssize_t r = readlinkat(fs.sysfs_fd, block_attr,
+                                      sysfs_block_buf, sizeof(sysfs_block_buf));
+               if (r > 0) {
+                       sysfs_block_buf[r] = '\0';
+                       n.dev = strdup(basename(sysfs_block_buf));
+               }
+
+               free(block_attr);
+
+               char *label_attr = mprintf("dev-%u/label", n.idx);
+               n.label = read_file_str(fs.sysfs_fd, label_attr);
+               free(label_attr);
+
+               darray_append(devs, n);
+       }
+
+       closedir(dir);
+
+       return devs;
+}
diff --git a/libbcachefs.h b/libbcachefs.h
new file mode 100644 (file)
index 0000000..30add92
--- /dev/null
@@ -0,0 +1,229 @@
+#ifndef _LIBBCACHE_H
+#define _LIBBCACHE_H
+
+#include <linux/uuid.h>
+#include <stdbool.h>
+
+#include "libbcachefs/bcachefs_format.h"
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/vstructs.h"
+#include "tools-util.h"
+
+/* option parsing */
+
+struct bch_opt_strs {
+union {
+       char                    *by_id[bch2_opts_nr];
+struct {
+#define x(_name, ...)  char    *_name;
+       BCH_OPTS()
+#undef x
+};
+};
+};
+
+struct bch_opt_strs bch2_cmdline_opts_get(int *, char *[], unsigned);
+struct bch_opts bch2_parse_opts(struct bch_opt_strs);
+void bch2_opts_usage(unsigned);
+
+struct format_opts {
+       char            *label;
+       uuid_le         uuid;
+
+       unsigned        encoded_extent_max;
+
+       bool            encrypted;
+       char            *passphrase;
+};
+
+static inline struct format_opts format_opts_default()
+{
+       return (struct format_opts) {
+               .encoded_extent_max     = 128,
+       };
+}
+
+struct dev_opts {
+       int             fd;
+       char            *path;
+       u64             size; /* 512 byte sectors */
+       unsigned        bucket_size;
+       const char      *group;
+       unsigned        data_allowed;
+       unsigned        durability;
+       bool            discard;
+
+       u64             nbuckets;
+
+       u64             sb_offset;
+       u64             sb_end;
+};
+
+static inline struct dev_opts dev_opts_default()
+{
+       return (struct dev_opts) {
+               .data_allowed           = ~0U << 2,
+               .durability             = 1,
+       };
+}
+
+void bch2_pick_bucket_size(struct bch_opts, struct dev_opts *);
+struct bch_sb *bch2_format(struct bch_opt_strs,
+                          struct bch_opts,
+                          struct format_opts, struct dev_opts *, size_t);
+
+void bch2_super_write(int, struct bch_sb *);
+struct bch_sb *__bch2_super_read(int, u64);
+
+void bch2_sb_print(struct bch_sb *, bool, unsigned, enum units);
+
+/* ioctl interface: */
+
+int bcachectl_open(void);
+
+struct bchfs_handle {
+       uuid_le uuid;
+       int     ioctl_fd;
+       int     sysfs_fd;
+};
+
+void bcache_fs_close(struct bchfs_handle);
+struct bchfs_handle bcache_fs_open(const char *);
+struct bchfs_handle bchu_fs_open_by_dev(const char *, unsigned *);
+
+static inline void bchu_disk_add(struct bchfs_handle fs, char *dev)
+{
+       struct bch_ioctl_disk i = { .dev = (unsigned long) dev, };
+
+       xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ADD, &i);
+}
+
+static inline void bchu_disk_remove(struct bchfs_handle fs, unsigned dev_idx,
+                                   unsigned flags)
+{
+       struct bch_ioctl_disk i = {
+               .flags  = flags|BCH_BY_INDEX,
+               .dev    = dev_idx,
+       };
+
+       xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_REMOVE, &i);
+}
+
+static inline void bchu_disk_online(struct bchfs_handle fs, char *dev)
+{
+       struct bch_ioctl_disk i = { .dev = (unsigned long) dev, };
+
+       xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ONLINE, &i);
+}
+
+static inline void bchu_disk_offline(struct bchfs_handle fs, unsigned dev_idx,
+                                    unsigned flags)
+{
+       struct bch_ioctl_disk i = {
+               .flags  = flags|BCH_BY_INDEX,
+               .dev    = dev_idx,
+       };
+
+       xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_OFFLINE, &i);
+}
+
+static inline void bchu_disk_set_state(struct bchfs_handle fs, unsigned dev,
+                                      unsigned new_state, unsigned flags)
+{
+       struct bch_ioctl_disk_set_state i = {
+               .flags          = flags|BCH_BY_INDEX,
+               .new_state      = new_state,
+               .dev            = dev,
+       };
+
+       xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_SET_STATE, &i);
+}
+
+static inline struct bch_ioctl_fs_usage *bchu_fs_usage(struct bchfs_handle fs)
+{
+       struct bch_ioctl_fs_usage *u = NULL;
+       size_t replica_entries_bytes = 4096;
+
+       while (1) {
+               u = xrealloc(u, sizeof(*u) + replica_entries_bytes);
+               u->replica_entries_bytes = replica_entries_bytes;
+
+               if (!ioctl(fs.ioctl_fd, BCH_IOCTL_FS_USAGE, u))
+                       return u;
+
+               if (errno != ERANGE)
+                       die("BCH_IOCTL_USAGE error: %m");
+
+               replica_entries_bytes *= 2;
+       }
+}
+
+static inline struct bch_ioctl_dev_usage bchu_dev_usage(struct bchfs_handle fs,
+                                                       unsigned idx)
+{
+       struct bch_ioctl_dev_usage i = { .dev = idx, .flags = BCH_BY_INDEX};
+
+       if (xioctl(fs.ioctl_fd, BCH_IOCTL_DEV_USAGE, &i))
+               die("BCH_IOCTL_DEV_USAGE error: %m");
+       return i;
+}
+
+static inline struct bch_sb *bchu_read_super(struct bchfs_handle fs, unsigned idx)
+{
+       size_t size = 4096;
+       struct bch_sb *sb = NULL;
+
+       while (1) {
+               sb = xrealloc(sb, size);
+               struct bch_ioctl_read_super i = {
+                       .size   = size,
+                       .sb     = (unsigned long) sb,
+               };
+
+               if (idx != -1) {
+                       i.flags |= BCH_READ_DEV|BCH_BY_INDEX;
+                       i.dev = idx;
+               }
+
+               if (!ioctl(fs.ioctl_fd, BCH_IOCTL_READ_SUPER, &i))
+                       return sb;
+               if (errno != ERANGE)
+                       die("BCH_IOCTL_READ_SUPER error: %m");
+               size *= 2;
+       }
+}
+
+static inline unsigned bchu_disk_get_idx(struct bchfs_handle fs, dev_t dev)
+{
+       struct bch_ioctl_disk_get_idx i = { .dev = dev };
+
+       return xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_GET_IDX, &i);
+}
+
+static inline void bchu_disk_resize(struct bchfs_handle fs,
+                                   unsigned idx,
+                                   u64 nbuckets)
+{
+       struct bch_ioctl_disk_resize i = {
+               .flags  = BCH_BY_INDEX,
+               .dev    = idx,
+               .nbuckets = nbuckets,
+       };
+
+       xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_RESIZE, &i);
+}
+
+int bchu_data(struct bchfs_handle, struct bch_ioctl_data);
+
+struct dev_name {
+       unsigned        idx;
+       char            *dev;
+       char            *label;
+       uuid_le         uuid;
+};
+typedef darray(struct dev_name) dev_names;
+
+dev_names bchu_fs_get_devices(struct bchfs_handle);
+
+#endif /* _LIBBCACHE_H */
diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c
new file mode 100644 (file)
index 0000000..76c98dd
--- /dev/null
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+
+#include "bcachefs.h"
+
+#include <linux/fs.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "acl.h"
+#include "fs.h"
+#include "xattr.h"
+
+static inline size_t bch2_acl_size(unsigned nr_short, unsigned nr_long)
+{
+       return sizeof(bch_acl_header) +
+               sizeof(bch_acl_entry_short) * nr_short +
+               sizeof(bch_acl_entry) * nr_long;
+}
+
+static inline int acl_to_xattr_type(int type)
+{
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               return KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS;
+       case ACL_TYPE_DEFAULT:
+               return KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT;
+       default:
+               BUG();
+       }
+}
+
+/*
+ * Convert from filesystem to in-memory representation.
+ */
+static struct posix_acl *bch2_acl_from_disk(const void *value, size_t size)
+{
+       const void *p, *end = value + size;
+       struct posix_acl *acl;
+       struct posix_acl_entry *out;
+       unsigned count = 0;
+
+       if (!value)
+               return NULL;
+       if (size < sizeof(bch_acl_header))
+               goto invalid;
+       if (((bch_acl_header *)value)->a_version !=
+           cpu_to_le32(BCH_ACL_VERSION))
+               goto invalid;
+
+       p = value + sizeof(bch_acl_header);
+       while (p < end) {
+               const bch_acl_entry *entry = p;
+
+               if (p + sizeof(bch_acl_entry_short) > end)
+                       goto invalid;
+
+               switch (le16_to_cpu(entry->e_tag)) {
+               case ACL_USER_OBJ:
+               case ACL_GROUP_OBJ:
+               case ACL_MASK:
+               case ACL_OTHER:
+                       p += sizeof(bch_acl_entry_short);
+                       break;
+               case ACL_USER:
+               case ACL_GROUP:
+                       p += sizeof(bch_acl_entry);
+                       break;
+               default:
+                       goto invalid;
+               }
+
+               count++;
+       }
+
+       if (p > end)
+               goto invalid;
+
+       if (!count)
+               return NULL;
+
+       acl = posix_acl_alloc(count, GFP_KERNEL);
+       if (!acl)
+               return ERR_PTR(-ENOMEM);
+
+       out = acl->a_entries;
+
+       p = value + sizeof(bch_acl_header);
+       while (p < end) {
+               const bch_acl_entry *in = p;
+
+               out->e_tag  = le16_to_cpu(in->e_tag);
+               out->e_perm = le16_to_cpu(in->e_perm);
+
+               switch (out->e_tag) {
+               case ACL_USER_OBJ:
+               case ACL_GROUP_OBJ:
+               case ACL_MASK:
+               case ACL_OTHER:
+                       p += sizeof(bch_acl_entry_short);
+                       break;
+               case ACL_USER:
+                       out->e_uid = make_kuid(&init_user_ns,
+                                              le32_to_cpu(in->e_id));
+                       p += sizeof(bch_acl_entry);
+                       break;
+               case ACL_GROUP:
+                       out->e_gid = make_kgid(&init_user_ns,
+                                              le32_to_cpu(in->e_id));
+                       p += sizeof(bch_acl_entry);
+                       break;
+               }
+
+               out++;
+       }
+
+       BUG_ON(out != acl->a_entries + acl->a_count);
+
+       return acl;
+invalid:
+       pr_err("invalid acl entry");
+       return ERR_PTR(-EINVAL);
+}
+
+#define acl_for_each_entry(acl, acl_e)                 \
+       for (acl_e = acl->a_entries;                    \
+            acl_e < acl->a_entries + acl->a_count;     \
+            acl_e++)
+
+/*
+ * Convert from in-memory to filesystem representation.
+ */
+static struct bkey_i_xattr *
+bch2_acl_to_xattr(struct btree_trans *trans,
+                 const struct posix_acl *acl,
+                 int type)
+{
+       struct bkey_i_xattr *xattr;
+       bch_acl_header *acl_header;
+       const struct posix_acl_entry *acl_e;
+       void *outptr;
+       unsigned nr_short = 0, nr_long = 0, acl_len, u64s;
+
+       acl_for_each_entry(acl, acl_e) {
+               switch (acl_e->e_tag) {
+               case ACL_USER:
+               case ACL_GROUP:
+                       nr_long++;
+                       break;
+               case ACL_USER_OBJ:
+               case ACL_GROUP_OBJ:
+               case ACL_MASK:
+               case ACL_OTHER:
+                       nr_short++;
+                       break;
+               default:
+                       return ERR_PTR(-EINVAL);
+               }
+       }
+
+       acl_len = bch2_acl_size(nr_short, nr_long);
+       u64s = BKEY_U64s + xattr_val_u64s(0, acl_len);
+
+       if (u64s > U8_MAX)
+               return ERR_PTR(-E2BIG);
+
+       xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
+       if (IS_ERR(xattr))
+               return xattr;
+
+       bkey_xattr_init(&xattr->k_i);
+       xattr->k.u64s           = u64s;
+       xattr->v.x_type         = acl_to_xattr_type(type);
+       xattr->v.x_name_len     = 0,
+       xattr->v.x_val_len      = cpu_to_le16(acl_len);
+
+       acl_header = xattr_val(&xattr->v);
+       acl_header->a_version = cpu_to_le32(BCH_ACL_VERSION);
+
+       outptr = (void *) acl_header + sizeof(*acl_header);
+
+       acl_for_each_entry(acl, acl_e) {
+               bch_acl_entry *entry = outptr;
+
+               entry->e_tag = cpu_to_le16(acl_e->e_tag);
+               entry->e_perm = cpu_to_le16(acl_e->e_perm);
+               switch (acl_e->e_tag) {
+               case ACL_USER:
+                       entry->e_id = cpu_to_le32(
+                               from_kuid(&init_user_ns, acl_e->e_uid));
+                       outptr += sizeof(bch_acl_entry);
+                       break;
+               case ACL_GROUP:
+                       entry->e_id = cpu_to_le32(
+                               from_kgid(&init_user_ns, acl_e->e_gid));
+                       outptr += sizeof(bch_acl_entry);
+                       break;
+
+               case ACL_USER_OBJ:
+               case ACL_GROUP_OBJ:
+               case ACL_MASK:
+               case ACL_OTHER:
+                       outptr += sizeof(bch_acl_entry_short);
+                       break;
+               }
+       }
+
+       BUG_ON(outptr != xattr_val(&xattr->v) + acl_len);
+
+       return xattr;
+}
+
+struct posix_acl *bch2_get_acl(struct inode *vinode, int type)
+{
+       struct bch_inode_info *inode = to_bch_ei(vinode);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c_xattr xattr;
+       struct posix_acl *acl = NULL;
+
+       bch2_trans_init(&trans, c, 0, 0);
+retry:
+       bch2_trans_begin(&trans);
+
+       iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
+                       &inode->ei_str_hash, inode->v.i_ino,
+                       &X_SEARCH(acl_to_xattr_type(type), "", 0),
+                       0);
+       if (IS_ERR(iter)) {
+               if (PTR_ERR(iter) == -EINTR)
+                       goto retry;
+
+               if (PTR_ERR(iter) != -ENOENT)
+                       acl = ERR_CAST(iter);
+               goto out;
+       }
+
+       xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
+
+       acl = bch2_acl_from_disk(xattr_val(xattr.v),
+                       le16_to_cpu(xattr.v->x_val_len));
+
+       if (!IS_ERR(acl))
+               set_cached_acl(&inode->v, type, acl);
+out:
+       bch2_trans_exit(&trans);
+       return acl;
+}
+
+int bch2_set_acl_trans(struct btree_trans *trans,
+                      struct bch_inode_unpacked *inode_u,
+                      const struct bch_hash_info *hash_info,
+                      struct posix_acl *acl, int type)
+{
+       int ret;
+
+       if (type == ACL_TYPE_DEFAULT &&
+           !S_ISDIR(inode_u->bi_mode))
+               return acl ? -EACCES : 0;
+
+       if (acl) {
+               struct bkey_i_xattr *xattr =
+                       bch2_acl_to_xattr(trans, acl, type);
+               if (IS_ERR(xattr))
+                       return PTR_ERR(xattr);
+
+               ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
+                                   inode_u->bi_inum, &xattr->k_i, 0);
+       } else {
+               struct xattr_search_key search =
+                       X_SEARCH(acl_to_xattr_type(type), "", 0);
+
+               ret = bch2_hash_delete(trans, bch2_xattr_hash_desc, hash_info,
+                                      inode_u->bi_inum, &search);
+       }
+
+       return ret == -ENOENT ? 0 : ret;
+}
+
+int bch2_set_acl(struct inode *vinode, struct posix_acl *_acl, int type)
+{
+       struct bch_inode_info *inode = to_bch_ei(vinode);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct btree_trans trans;
+       struct btree_iter *inode_iter;
+       struct bch_inode_unpacked inode_u;
+       struct posix_acl *acl;
+       umode_t mode;
+       int ret;
+
+       mutex_lock(&inode->ei_update_lock);
+       bch2_trans_init(&trans, c, 0, 0);
+retry:
+       bch2_trans_begin(&trans);
+       acl = _acl;
+
+       inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
+                                    BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(inode_iter);
+       if (ret)
+               goto btree_err;
+
+       mode = inode_u.bi_mode;
+
+       if (type == ACL_TYPE_ACCESS) {
+               ret = posix_acl_update_mode(&inode->v, &mode, &acl);
+               if (ret)
+                       goto err;
+       }
+
+       ret = bch2_set_acl_trans(&trans, &inode_u,
+                                &inode->ei_str_hash,
+                                acl, type);
+       if (ret)
+               goto btree_err;
+
+       inode_u.bi_ctime        = bch2_current_time(c);
+       inode_u.bi_mode         = mode;
+
+       ret =   bch2_inode_write(&trans, inode_iter, &inode_u) ?:
+               bch2_trans_commit(&trans, NULL,
+                                 &inode->ei_journal_seq,
+                                 BTREE_INSERT_NOUNLOCK);
+btree_err:
+       if (ret == -EINTR)
+               goto retry;
+       if (unlikely(ret))
+               goto err;
+
+       bch2_inode_update_after_write(c, inode, &inode_u,
+                                     ATTR_CTIME|ATTR_MODE);
+
+       set_cached_acl(&inode->v, type, acl);
+err:
+       bch2_trans_exit(&trans);
+       mutex_unlock(&inode->ei_update_lock);
+
+       return ret;
+}
+
+int bch2_acl_chmod(struct btree_trans *trans,
+                  struct bch_inode_info *inode,
+                  umode_t mode,
+                  struct posix_acl **new_acl)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c_xattr xattr;
+       struct bkey_i_xattr *new;
+       struct posix_acl *acl;
+       int ret = 0;
+
+       iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc,
+                       &inode->ei_str_hash, inode->v.i_ino,
+                       &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0),
+                       BTREE_ITER_INTENT);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter) != -ENOENT ? PTR_ERR(iter) : 0;
+
+       xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
+
+       acl = bch2_acl_from_disk(xattr_val(xattr.v),
+                       le16_to_cpu(xattr.v->x_val_len));
+       if (IS_ERR_OR_NULL(acl))
+               return PTR_ERR(acl);
+
+       ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode);
+       if (ret)
+               goto err;
+
+       new = bch2_acl_to_xattr(trans, acl, ACL_TYPE_ACCESS);
+       if (IS_ERR(new)) {
+               ret = PTR_ERR(new);
+               goto err;
+       }
+
+       new->k.p = iter->pos;
+       bch2_trans_update(trans, iter, &new->k_i, 0);
+       *new_acl = acl;
+       acl = NULL;
+err:
+       kfree(acl);
+       return ret;
+}
+
+#endif /* CONFIG_BCACHEFS_POSIX_ACL */
diff --git a/libbcachefs/acl.h b/libbcachefs/acl.h
new file mode 100644 (file)
index 0000000..cb62d50
--- /dev/null
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ACL_H
+#define _BCACHEFS_ACL_H
+
+struct bch_inode_unpacked;
+struct bch_hash_info;
+struct bch_inode_info;
+struct posix_acl;
+
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+
+#define BCH_ACL_VERSION        0x0001
+
+typedef struct {
+       __le16          e_tag;
+       __le16          e_perm;
+       __le32          e_id;
+} bch_acl_entry;
+
+typedef struct {
+       __le16          e_tag;
+       __le16          e_perm;
+} bch_acl_entry_short;
+
+typedef struct {
+       __le32          a_version;
+} bch_acl_header;
+
+struct posix_acl *bch2_get_acl(struct inode *, int);
+
+int bch2_set_acl_trans(struct btree_trans *,
+                      struct bch_inode_unpacked *,
+                      const struct bch_hash_info *,
+                      struct posix_acl *, int);
+int bch2_set_acl(struct inode *, struct posix_acl *, int);
+int bch2_acl_chmod(struct btree_trans *, struct bch_inode_info *,
+                  umode_t, struct posix_acl **);
+
+#else
+
+static inline int bch2_set_acl_trans(struct btree_trans *trans,
+                                    struct bch_inode_unpacked *inode_u,
+                                    const struct bch_hash_info *hash_info,
+                                    struct posix_acl *acl, int type)
+{
+       return 0;
+}
+
+static inline int bch2_acl_chmod(struct btree_trans *trans,
+                                struct bch_inode_info *inode,
+                                umode_t mode,
+                                struct posix_acl **new_acl)
+{
+       return 0;
+}
+
+#endif /* CONFIG_BCACHEFS_POSIX_ACL */
+
+#endif /* _BCACHEFS_ACL_H */
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
new file mode 100644 (file)
index 0000000..54096e8
--- /dev/null
@@ -0,0 +1,1479 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_key_cache.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "btree_gc.h"
+#include "buckets.h"
+#include "clock.h"
+#include "debug.h"
+#include "ec.h"
+#include "error.h"
+#include "recovery.h"
+
+#include <linux/kthread.h>
+#include <linux/math64.h>
+#include <linux/random.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/sched/task.h>
+#include <linux/sort.h>
+#include <trace/events/bcachefs.h>
+
+static const char * const bch2_alloc_field_names[] = {
+#define x(name, bytes) #name,
+       BCH_ALLOC_FIELDS()
+#undef x
+       NULL
+};
+
+static void bch2_recalc_oldest_io(struct bch_fs *, struct bch_dev *, int);
+
+/* Ratelimiting/PD controllers */
+
+static void pd_controllers_update(struct work_struct *work)
+{
+       struct bch_fs *c = container_of(to_delayed_work(work),
+                                          struct bch_fs,
+                                          pd_controllers_update);
+       struct bch_dev *ca;
+       s64 free = 0, fragmented = 0;
+       unsigned i;
+
+       for_each_member_device(ca, c, i) {
+               struct bch_dev_usage stats = bch2_dev_usage_read(ca);
+
+               free += bucket_to_sector(ca,
+                               __dev_buckets_free(ca, stats)) << 9;
+               /*
+                * Bytes of internal fragmentation, which can be
+                * reclaimed by copy GC
+                */
+               fragmented += max_t(s64, 0, (bucket_to_sector(ca,
+                                       stats.buckets[BCH_DATA_user] +
+                                       stats.buckets[BCH_DATA_cached]) -
+                                 (stats.sectors[BCH_DATA_user] +
+                                  stats.sectors[BCH_DATA_cached])) << 9);
+       }
+
+       bch2_pd_controller_update(&c->copygc_pd, free, fragmented, -1);
+       schedule_delayed_work(&c->pd_controllers_update,
+                             c->pd_controllers_update_seconds * HZ);
+}
+
+/* Persistent alloc info: */
+
+static inline u64 get_alloc_field(const struct bch_alloc *a,
+                                 const void **p, unsigned field)
+{
+       unsigned bytes = BCH_ALLOC_FIELD_BYTES[field];
+       u64 v;
+
+       if (!(a->fields & (1 << field)))
+               return 0;
+
+       switch (bytes) {
+       case 1:
+               v = *((const u8 *) *p);
+               break;
+       case 2:
+               v = le16_to_cpup(*p);
+               break;
+       case 4:
+               v = le32_to_cpup(*p);
+               break;
+       case 8:
+               v = le64_to_cpup(*p);
+               break;
+       default:
+               BUG();
+       }
+
+       *p += bytes;
+       return v;
+}
+
+static inline void put_alloc_field(struct bkey_i_alloc *a, void **p,
+                                  unsigned field, u64 v)
+{
+       unsigned bytes = BCH_ALLOC_FIELD_BYTES[field];
+
+       if (!v)
+               return;
+
+       a->v.fields |= 1 << field;
+
+       switch (bytes) {
+       case 1:
+               *((u8 *) *p) = v;
+               break;
+       case 2:
+               *((__le16 *) *p) = cpu_to_le16(v);
+               break;
+       case 4:
+               *((__le32 *) *p) = cpu_to_le32(v);
+               break;
+       case 8:
+               *((__le64 *) *p) = cpu_to_le64(v);
+               break;
+       default:
+               BUG();
+       }
+
+       *p += bytes;
+}
+
+struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
+{
+       struct bkey_alloc_unpacked ret = { .gen = 0 };
+
+       if (k.k->type == KEY_TYPE_alloc) {
+               const struct bch_alloc *a = bkey_s_c_to_alloc(k).v;
+               const void *d = a->data;
+               unsigned idx = 0;
+
+               ret.gen = a->gen;
+
+#define x(_name, _bits)        ret._name = get_alloc_field(a, &d, idx++);
+               BCH_ALLOC_FIELDS()
+#undef  x
+       }
+       return ret;
+}
+
+void bch2_alloc_pack(struct bkey_i_alloc *dst,
+                    const struct bkey_alloc_unpacked src)
+{
+       unsigned idx = 0;
+       void *d = dst->v.data;
+       unsigned bytes;
+
+       dst->v.fields   = 0;
+       dst->v.gen      = src.gen;
+
+#define x(_name, _bits)        put_alloc_field(dst, &d, idx++, src._name);
+       BCH_ALLOC_FIELDS()
+#undef  x
+
+       bytes = (void *) d - (void *) &dst->v;
+       set_bkey_val_bytes(&dst->k, bytes);
+       memset_u64s_tail(&dst->v, 0, bytes);
+}
+
+static unsigned bch_alloc_val_u64s(const struct bch_alloc *a)
+{
+       unsigned i, bytes = offsetof(struct bch_alloc, data);
+
+       for (i = 0; i < ARRAY_SIZE(BCH_ALLOC_FIELD_BYTES); i++)
+               if (a->fields & (1 << i))
+                       bytes += BCH_ALLOC_FIELD_BYTES[i];
+
+       return DIV_ROUND_UP(bytes, sizeof(u64));
+}
+
+const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
+
+       if (k.k->p.inode >= c->sb.nr_devices ||
+           !c->devs[k.k->p.inode])
+               return "invalid device";
+
+       /* allow for unknown fields */
+       if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v))
+               return "incorrect value size";
+
+       return NULL;
+}
+
+void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
+                       struct bkey_s_c k)
+{
+       struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
+       const void *d = a.v->data;
+       unsigned i;
+
+       pr_buf(out, "gen %u", a.v->gen);
+
+       for (i = 0; i < BCH_ALLOC_FIELD_NR; i++)
+               if (a.v->fields & (1 << i))
+                       pr_buf(out, " %s %llu",
+                              bch2_alloc_field_names[i],
+                              get_alloc_field(a.v, &d, i));
+}
+
+static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
+                             unsigned level, struct bkey_s_c k)
+{
+       struct bch_dev *ca;
+       struct bucket *g;
+       struct bkey_alloc_unpacked u;
+
+       if (level || k.k->type != KEY_TYPE_alloc)
+               return 0;
+
+       ca = bch_dev_bkey_exists(c, k.k->p.inode);
+       g = __bucket(ca, k.k->p.offset, 0);
+       u = bch2_alloc_unpack(k);
+
+       g->_mark.gen            = u.gen;
+       g->_mark.data_type      = u.data_type;
+       g->_mark.dirty_sectors  = u.dirty_sectors;
+       g->_mark.cached_sectors = u.cached_sectors;
+       g->io_time[READ]        = u.read_time;
+       g->io_time[WRITE]       = u.write_time;
+       g->oldest_gen           = u.oldest_gen;
+       g->gen_valid            = 1;
+
+       return 0;
+}
+
+int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
+{
+       struct bch_dev *ca;
+       unsigned i;
+       int ret = 0;
+
+       down_read(&c->gc_lock);
+       ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_ALLOC,
+                                         NULL, bch2_alloc_read_fn);
+       up_read(&c->gc_lock);
+
+       if (ret) {
+               bch_err(c, "error reading alloc info: %i", ret);
+               return ret;
+       }
+
+       percpu_down_write(&c->mark_lock);
+       bch2_dev_usage_from_buckets(c);
+       percpu_up_write(&c->mark_lock);
+
+       mutex_lock(&c->bucket_clock[READ].lock);
+       for_each_member_device(ca, c, i) {
+               down_read(&ca->bucket_lock);
+               bch2_recalc_oldest_io(c, ca, READ);
+               up_read(&ca->bucket_lock);
+       }
+       mutex_unlock(&c->bucket_clock[READ].lock);
+
+       mutex_lock(&c->bucket_clock[WRITE].lock);
+       for_each_member_device(ca, c, i) {
+               down_read(&ca->bucket_lock);
+               bch2_recalc_oldest_io(c, ca, WRITE);
+               up_read(&ca->bucket_lock);
+       }
+       mutex_unlock(&c->bucket_clock[WRITE].lock);
+
+       return 0;
+}
+
+static int bch2_alloc_write_key(struct btree_trans *trans,
+                               struct btree_iter *iter,
+                               unsigned flags)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_s_c k;
+       struct bch_dev *ca;
+       struct bucket_array *ba;
+       struct bucket *g;
+       struct bucket_mark m;
+       struct bkey_alloc_unpacked old_u, new_u;
+       __BKEY_PADDED(k, 8) alloc_key; /* hack: */
+       struct bkey_i_alloc *a;
+       int ret;
+retry:
+       bch2_trans_begin(trans);
+
+       ret = bch2_btree_key_cache_flush(trans,
+                       BTREE_ID_ALLOC, iter->pos);
+       if (ret)
+               goto err;
+
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       old_u = bch2_alloc_unpack(k);
+
+       percpu_down_read(&c->mark_lock);
+       ca      = bch_dev_bkey_exists(c, iter->pos.inode);
+       ba      = bucket_array(ca);
+
+       g       = &ba->b[iter->pos.offset];
+       m       = READ_ONCE(g->mark);
+       new_u   = alloc_mem_to_key(g, m);
+       percpu_up_read(&c->mark_lock);
+
+       if (!bkey_alloc_unpacked_cmp(old_u, new_u))
+               return 0;
+
+       a = bkey_alloc_init(&alloc_key.k);
+       a->k.p = iter->pos;
+       bch2_alloc_pack(a, new_u);
+
+       bch2_trans_update(trans, iter, &a->k_i,
+                         BTREE_TRIGGER_NORUN);
+       ret = bch2_trans_commit(trans, NULL, NULL,
+                               BTREE_INSERT_NOFAIL|
+                               BTREE_INSERT_USE_RESERVE|
+                               flags);
+err:
+       if (ret == -EINTR)
+               goto retry;
+       return ret;
+}
+
+int bch2_dev_alloc_write(struct bch_fs *c, struct bch_dev *ca, unsigned flags)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       u64 first_bucket, nbuckets;
+       int ret = 0;
+
+       percpu_down_read(&c->mark_lock);
+       first_bucket    = bucket_array(ca)->first_bucket;
+       nbuckets        = bucket_array(ca)->nbuckets;
+       percpu_up_read(&c->mark_lock);
+
+       BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
+                                  POS(ca->dev_idx, first_bucket),
+                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+       while (iter->pos.offset < nbuckets) {
+               bch2_trans_cond_resched(&trans);
+
+               ret = bch2_alloc_write_key(&trans, iter, flags);
+               if (ret)
+                       break;
+               bch2_btree_iter_next_slot(iter);
+       }
+
+       bch2_trans_exit(&trans);
+
+       return ret;
+}
+
+int bch2_alloc_write(struct bch_fs *c, unsigned flags)
+{
+       struct bch_dev *ca;
+       unsigned i;
+       int ret = 0;
+
+       for_each_rw_member(ca, c, i) {
+               bch2_dev_alloc_write(c, ca, flags);
+               if (ret) {
+                       percpu_ref_put(&ca->io_ref);
+                       break;
+               }
+       }
+
+       return ret;
+}
+
+/* Bucket IO clocks: */
+
+static void bch2_recalc_oldest_io(struct bch_fs *c, struct bch_dev *ca, int rw)
+{
+       struct bucket_clock *clock = &c->bucket_clock[rw];
+       struct bucket_array *buckets = bucket_array(ca);
+       struct bucket *g;
+       u16 max_last_io = 0;
+       unsigned i;
+
+       lockdep_assert_held(&c->bucket_clock[rw].lock);
+
+       /* Recalculate max_last_io for this device: */
+       for_each_bucket(g, buckets)
+               max_last_io = max(max_last_io, bucket_last_io(c, g, rw));
+
+       ca->max_last_bucket_io[rw] = max_last_io;
+
+       /* Recalculate global max_last_io: */
+       max_last_io = 0;
+
+       for_each_member_device(ca, c, i)
+               max_last_io = max(max_last_io, ca->max_last_bucket_io[rw]);
+
+       clock->max_last_io = max_last_io;
+}
+
+static void bch2_rescale_bucket_io_times(struct bch_fs *c, int rw)
+{
+       struct bucket_clock *clock = &c->bucket_clock[rw];
+       struct bucket_array *buckets;
+       struct bch_dev *ca;
+       struct bucket *g;
+       unsigned i;
+
+       trace_rescale_prios(c);
+
+       for_each_member_device(ca, c, i) {
+               down_read(&ca->bucket_lock);
+               buckets = bucket_array(ca);
+
+               for_each_bucket(g, buckets)
+                       g->io_time[rw] = clock->hand -
+                       bucket_last_io(c, g, rw) / 2;
+
+               bch2_recalc_oldest_io(c, ca, rw);
+
+               up_read(&ca->bucket_lock);
+       }
+}
+
+static inline u64 bucket_clock_freq(u64 capacity)
+{
+       return max(capacity >> 10, 2028ULL);
+}
+
+static void bch2_inc_clock_hand(struct io_timer *timer)
+{
+       struct bucket_clock *clock = container_of(timer,
+                                               struct bucket_clock, rescale);
+       struct bch_fs *c = container_of(clock,
+                                       struct bch_fs, bucket_clock[clock->rw]);
+       struct bch_dev *ca;
+       u64 capacity;
+       unsigned i;
+
+       mutex_lock(&clock->lock);
+
+       /* if clock cannot be advanced more, rescale prio */
+       if (clock->max_last_io >= U16_MAX - 2)
+               bch2_rescale_bucket_io_times(c, clock->rw);
+
+       BUG_ON(clock->max_last_io >= U16_MAX - 2);
+
+       for_each_member_device(ca, c, i)
+               ca->max_last_bucket_io[clock->rw]++;
+       clock->max_last_io++;
+       clock->hand++;
+
+       mutex_unlock(&clock->lock);
+
+       capacity = READ_ONCE(c->capacity);
+
+       if (!capacity)
+               return;
+
+       /*
+        * we only increment when 0.1% of the filesystem capacity has been read
+        * or written too, this determines if it's time
+        *
+        * XXX: we shouldn't really be going off of the capacity of devices in
+        * RW mode (that will be 0 when we're RO, yet we can still service
+        * reads)
+        */
+       timer->expire += bucket_clock_freq(capacity);
+
+       bch2_io_timer_add(&c->io_clock[clock->rw], timer);
+}
+
+static void bch2_bucket_clock_init(struct bch_fs *c, int rw)
+{
+       struct bucket_clock *clock = &c->bucket_clock[rw];
+
+       clock->hand             = 1;
+       clock->rw               = rw;
+       clock->rescale.fn       = bch2_inc_clock_hand;
+       clock->rescale.expire   = bucket_clock_freq(c->capacity);
+       mutex_init(&clock->lock);
+}
+
+int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
+                             size_t bucket_nr, int rw)
+{
+       struct bch_fs *c = trans->c;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
+       struct btree_iter *iter;
+       struct bucket *g;
+       struct bkey_i_alloc *a;
+       struct bkey_alloc_unpacked u;
+       u16 *time;
+       int ret = 0;
+
+       iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, POS(dev, bucket_nr),
+                                  BTREE_ITER_CACHED|
+                                  BTREE_ITER_CACHED_NOFILL|
+                                  BTREE_ITER_INTENT);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter);
+
+       a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
+       ret = PTR_ERR_OR_ZERO(a);
+       if (ret)
+               goto out;
+
+       percpu_down_read(&c->mark_lock);
+       g = bucket(ca, bucket_nr);
+       u = alloc_mem_to_key(g, READ_ONCE(g->mark));
+       percpu_up_read(&c->mark_lock);
+
+       bkey_alloc_init(&a->k_i);
+       a->k.p = iter->pos;
+
+       time = rw == READ ? &u.read_time : &u.write_time;
+       if (*time == c->bucket_clock[rw].hand)
+               goto out;
+
+       *time = c->bucket_clock[rw].hand;
+
+       bch2_alloc_pack(a, u);
+
+       ret   = bch2_trans_update(trans, iter, &a->k_i, 0) ?:
+               bch2_trans_commit(trans, NULL, NULL, 0);
+out:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+/* Background allocator thread: */
+
+/*
+ * Scans for buckets to be invalidated, invalidates them, rewrites prios/gens
+ * (marking them as invalidated on disk), then optionally issues discard
+ * commands to the newly free buckets, then puts them on the various freelists.
+ */
+
+#define BUCKET_GC_GEN_MAX      96U
+
+/**
+ * wait_buckets_available - wait on reclaimable buckets
+ *
+ * If there aren't enough available buckets to fill up free_inc, wait until
+ * there are.
+ */
+static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca)
+{
+       unsigned long gc_count = c->gc_count;
+       u64 available;
+       int ret = 0;
+
+       ca->allocator_state = ALLOCATOR_BLOCKED;
+       closure_wake_up(&c->freelist_wait);
+
+       while (1) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (kthread_should_stop()) {
+                       ret = 1;
+                       break;
+               }
+
+               if (gc_count != c->gc_count)
+                       ca->inc_gen_really_needs_gc = 0;
+
+               available = max_t(s64, 0, dev_buckets_available(ca) -
+                                 ca->inc_gen_really_needs_gc);
+
+               if (available > fifo_free(&ca->free_inc) ||
+                   (available &&
+                    (!fifo_full(&ca->free[RESERVE_BTREE]) ||
+                     !fifo_full(&ca->free[RESERVE_MOVINGGC]))))
+                       break;
+
+               up_read(&c->gc_lock);
+               schedule();
+               try_to_freeze();
+               down_read(&c->gc_lock);
+       }
+
+       __set_current_state(TASK_RUNNING);
+       ca->allocator_state = ALLOCATOR_RUNNING;
+       closure_wake_up(&c->freelist_wait);
+
+       return ret;
+}
+
+static bool bch2_can_invalidate_bucket(struct bch_dev *ca,
+                                      size_t bucket,
+                                      struct bucket_mark mark)
+{
+       u8 gc_gen;
+
+       if (!is_available_bucket(mark))
+               return false;
+
+       if (ca->buckets_nouse &&
+           test_bit(bucket, ca->buckets_nouse))
+               return false;
+
+       gc_gen = bucket_gc_gen(ca, bucket);
+
+       if (gc_gen >= BUCKET_GC_GEN_MAX / 2)
+               ca->inc_gen_needs_gc++;
+
+       if (gc_gen >= BUCKET_GC_GEN_MAX)
+               ca->inc_gen_really_needs_gc++;
+
+       return gc_gen < BUCKET_GC_GEN_MAX;
+}
+
+/*
+ * Determines what order we're going to reuse buckets, smallest bucket_key()
+ * first.
+ *
+ *
+ * - We take into account the read prio of the bucket, which gives us an
+ *   indication of how hot the data is -- we scale the prio so that the prio
+ *   farthest from the clock is worth 1/8th of the closest.
+ *
+ * - The number of sectors of cached data in the bucket, which gives us an
+ *   indication of the cost in cache misses this eviction will cause.
+ *
+ * - If hotness * sectors used compares equal, we pick the bucket with the
+ *   smallest bucket_gc_gen() - since incrementing the same bucket's generation
+ *   number repeatedly forces us to run mark and sweep gc to avoid generation
+ *   number wraparound.
+ */
+
+static unsigned long bucket_sort_key(struct bch_fs *c, struct bch_dev *ca,
+                                    size_t b, struct bucket_mark m)
+{
+       unsigned last_io = bucket_last_io(c, bucket(ca, b), READ);
+       unsigned max_last_io = ca->max_last_bucket_io[READ];
+
+       /*
+        * Time since last read, scaled to [0, 8) where larger value indicates
+        * more recently read data:
+        */
+       unsigned long hotness = (max_last_io - last_io) * 7 / max_last_io;
+
+       /* How much we want to keep the data in this bucket: */
+       unsigned long data_wantness =
+               (hotness + 1) * bucket_sectors_used(m);
+
+       unsigned long needs_journal_commit =
+               bucket_needs_journal_commit(m, c->journal.last_seq_ondisk);
+
+       return  (data_wantness << 9) |
+               (needs_journal_commit << 8) |
+               (bucket_gc_gen(ca, b) / 16);
+}
+
+static inline int bucket_alloc_cmp(alloc_heap *h,
+                                  struct alloc_heap_entry l,
+                                  struct alloc_heap_entry r)
+{
+       return  cmp_int(l.key, r.key) ?:
+               cmp_int(r.nr, l.nr) ?:
+               cmp_int(l.bucket, r.bucket);
+}
+
+static inline int bucket_idx_cmp(const void *_l, const void *_r)
+{
+       const struct alloc_heap_entry *l = _l, *r = _r;
+
+       return cmp_int(l->bucket, r->bucket);
+}
+
+static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
+{
+       struct bucket_array *buckets;
+       struct alloc_heap_entry e = { 0 };
+       size_t b, i, nr = 0;
+
+       ca->alloc_heap.used = 0;
+
+       mutex_lock(&c->bucket_clock[READ].lock);
+       down_read(&ca->bucket_lock);
+
+       buckets = bucket_array(ca);
+
+       bch2_recalc_oldest_io(c, ca, READ);
+
+       /*
+        * Find buckets with lowest read priority, by building a maxheap sorted
+        * by read priority and repeatedly replacing the maximum element until
+        * all buckets have been visited.
+        */
+       for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++) {
+               struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
+               unsigned long key = bucket_sort_key(c, ca, b, m);
+
+               if (!bch2_can_invalidate_bucket(ca, b, m))
+                       continue;
+
+               if (e.nr && e.bucket + e.nr == b && e.key == key) {
+                       e.nr++;
+               } else {
+                       if (e.nr)
+                               heap_add_or_replace(&ca->alloc_heap, e,
+                                       -bucket_alloc_cmp, NULL);
+
+                       e = (struct alloc_heap_entry) {
+                               .bucket = b,
+                               .nr     = 1,
+                               .key    = key,
+                       };
+               }
+
+               cond_resched();
+       }
+
+       if (e.nr)
+               heap_add_or_replace(&ca->alloc_heap, e,
+                               -bucket_alloc_cmp, NULL);
+
+       for (i = 0; i < ca->alloc_heap.used; i++)
+               nr += ca->alloc_heap.data[i].nr;
+
+       while (nr - ca->alloc_heap.data[0].nr >= ALLOC_SCAN_BATCH(ca)) {
+               nr -= ca->alloc_heap.data[0].nr;
+               heap_pop(&ca->alloc_heap, e, -bucket_alloc_cmp, NULL);
+       }
+
+       up_read(&ca->bucket_lock);
+       mutex_unlock(&c->bucket_clock[READ].lock);
+}
+
+static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
+{
+       struct bucket_array *buckets = bucket_array(ca);
+       struct bucket_mark m;
+       size_t b, start;
+
+       if (ca->fifo_last_bucket <  ca->mi.first_bucket ||
+           ca->fifo_last_bucket >= ca->mi.nbuckets)
+               ca->fifo_last_bucket = ca->mi.first_bucket;
+
+       start = ca->fifo_last_bucket;
+
+       do {
+               ca->fifo_last_bucket++;
+               if (ca->fifo_last_bucket == ca->mi.nbuckets)
+                       ca->fifo_last_bucket = ca->mi.first_bucket;
+
+               b = ca->fifo_last_bucket;
+               m = READ_ONCE(buckets->b[b].mark);
+
+               if (bch2_can_invalidate_bucket(ca, b, m)) {
+                       struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
+
+                       heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
+                       if (heap_full(&ca->alloc_heap))
+                               break;
+               }
+
+               cond_resched();
+       } while (ca->fifo_last_bucket != start);
+}
+
+static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca)
+{
+       struct bucket_array *buckets = bucket_array(ca);
+       struct bucket_mark m;
+       size_t checked, i;
+
+       for (checked = 0;
+            checked < ca->mi.nbuckets / 2;
+            checked++) {
+               size_t b = bch2_rand_range(ca->mi.nbuckets -
+                                          ca->mi.first_bucket) +
+                       ca->mi.first_bucket;
+
+               m = READ_ONCE(buckets->b[b].mark);
+
+               if (bch2_can_invalidate_bucket(ca, b, m)) {
+                       struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
+
+                       heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
+                       if (heap_full(&ca->alloc_heap))
+                               break;
+               }
+
+               cond_resched();
+       }
+
+       sort(ca->alloc_heap.data,
+            ca->alloc_heap.used,
+            sizeof(ca->alloc_heap.data[0]),
+            bucket_idx_cmp, NULL);
+
+       /* remove duplicates: */
+       for (i = 0; i + 1 < ca->alloc_heap.used; i++)
+               if (ca->alloc_heap.data[i].bucket ==
+                   ca->alloc_heap.data[i + 1].bucket)
+                       ca->alloc_heap.data[i].nr = 0;
+}
+
+static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
+{
+       size_t i, nr = 0;
+
+       ca->inc_gen_needs_gc                    = 0;
+
+       switch (ca->mi.replacement) {
+       case CACHE_REPLACEMENT_LRU:
+               find_reclaimable_buckets_lru(c, ca);
+               break;
+       case CACHE_REPLACEMENT_FIFO:
+               find_reclaimable_buckets_fifo(c, ca);
+               break;
+       case CACHE_REPLACEMENT_RANDOM:
+               find_reclaimable_buckets_random(c, ca);
+               break;
+       }
+
+       heap_resort(&ca->alloc_heap, bucket_alloc_cmp, NULL);
+
+       for (i = 0; i < ca->alloc_heap.used; i++)
+               nr += ca->alloc_heap.data[i].nr;
+
+       return nr;
+}
+
+static inline long next_alloc_bucket(struct bch_dev *ca)
+{
+       struct alloc_heap_entry e, *top = ca->alloc_heap.data;
+
+       while (ca->alloc_heap.used) {
+               if (top->nr) {
+                       size_t b = top->bucket;
+
+                       top->bucket++;
+                       top->nr--;
+                       return b;
+               }
+
+               heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
+       }
+
+       return -1;
+}
+
+/*
+ * returns sequence number of most recent journal entry that updated this
+ * bucket:
+ */
+static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
+{
+       if (m.journal_seq_valid) {
+               u64 journal_seq = atomic64_read(&c->journal.seq);
+               u64 bucket_seq  = journal_seq;
+
+               bucket_seq &= ~((u64) U16_MAX);
+               bucket_seq |= m.journal_seq;
+
+               if (bucket_seq > journal_seq)
+                       bucket_seq -= 1 << 16;
+
+               return bucket_seq;
+       } else {
+               return 0;
+       }
+}
+
+static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
+                                      struct bch_dev *ca,
+                                      struct btree_iter *iter,
+                                      u64 *journal_seq, unsigned flags)
+{
+#if 0
+       __BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
+#else
+       /* hack: */
+       __BKEY_PADDED(k, 8) alloc_key;
+#endif
+       struct bch_fs *c = trans->c;
+       struct bkey_i_alloc *a;
+       struct bkey_alloc_unpacked u;
+       struct bucket *g;
+       struct bucket_mark m;
+       bool invalidating_cached_data;
+       size_t b;
+       int ret = 0;
+
+       BUG_ON(!ca->alloc_heap.used ||
+              !ca->alloc_heap.data[0].nr);
+       b = ca->alloc_heap.data[0].bucket;
+
+       /* first, put on free_inc and mark as owned by allocator: */
+       percpu_down_read(&c->mark_lock);
+       spin_lock(&c->freelist_lock);
+
+       verify_not_on_freelist(c, ca, b);
+
+       BUG_ON(!fifo_push(&ca->free_inc, b));
+
+       g = bucket(ca, b);
+       m = READ_ONCE(g->mark);
+
+       invalidating_cached_data = m.cached_sectors != 0;
+
+       /*
+        * If we're not invalidating cached data, we only increment the bucket
+        * gen in memory here, the incremented gen will be updated in the btree
+        * by bch2_trans_mark_pointer():
+        */
+
+       if (!invalidating_cached_data)
+               bch2_invalidate_bucket(c, ca, b, &m);
+       else
+               bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
+
+       spin_unlock(&c->freelist_lock);
+       percpu_up_read(&c->mark_lock);
+
+       if (!invalidating_cached_data)
+               goto out;
+
+       /*
+        * If the read-only path is trying to shut down, we can't be generating
+        * new btree updates:
+        */
+       if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) {
+               ret = 1;
+               goto out;
+       }
+
+       BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
+
+       bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
+retry:
+       ret = bch2_btree_iter_traverse(iter);
+       if (ret)
+               return ret;
+
+       percpu_down_read(&c->mark_lock);
+       g = bucket(ca, iter->pos.offset);
+       m = READ_ONCE(g->mark);
+       u = alloc_mem_to_key(g, m);
+
+       percpu_up_read(&c->mark_lock);
+
+       invalidating_cached_data = u.cached_sectors != 0;
+
+       u.gen++;
+       u.data_type     = 0;
+       u.dirty_sectors = 0;
+       u.cached_sectors = 0;
+       u.read_time     = c->bucket_clock[READ].hand;
+       u.write_time    = c->bucket_clock[WRITE].hand;
+
+       a = bkey_alloc_init(&alloc_key.k);
+       a->k.p = iter->pos;
+       bch2_alloc_pack(a, u);
+
+       bch2_trans_update(trans, iter, &a->k_i,
+                         BTREE_TRIGGER_BUCKET_INVALIDATE);
+
+       /*
+        * XXX:
+        * when using deferred btree updates, we have journal reclaim doing
+        * btree updates and thus requiring the allocator to make forward
+        * progress, and here the allocator is requiring space in the journal -
+        * so we need a journal pre-reservation:
+        */
+       ret = bch2_trans_commit(trans, NULL,
+                               invalidating_cached_data ? journal_seq : NULL,
+                               BTREE_INSERT_NOUNLOCK|
+                               BTREE_INSERT_NOCHECK_RW|
+                               BTREE_INSERT_NOFAIL|
+                               BTREE_INSERT_USE_RESERVE|
+                               BTREE_INSERT_USE_ALLOC_RESERVE|
+                               flags);
+       if (ret == -EINTR)
+               goto retry;
+out:
+       if (!ret) {
+               /* remove from alloc_heap: */
+               struct alloc_heap_entry e, *top = ca->alloc_heap.data;
+
+               top->bucket++;
+               top->nr--;
+
+               if (!top->nr)
+                       heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
+
+               /*
+                * Make sure we flush the last journal entry that updated this
+                * bucket (i.e. deleting the last reference) before writing to
+                * this bucket again:
+                */
+               *journal_seq = max(*journal_seq, bucket_journal_seq(c, m));
+       } else {
+               size_t b2;
+
+               /* remove from free_inc: */
+               percpu_down_read(&c->mark_lock);
+               spin_lock(&c->freelist_lock);
+
+               bch2_mark_alloc_bucket(c, ca, b, false,
+                                      gc_pos_alloc(c, NULL), 0);
+
+               BUG_ON(!fifo_pop_back(&ca->free_inc, b2));
+               BUG_ON(b != b2);
+
+               spin_unlock(&c->freelist_lock);
+               percpu_up_read(&c->mark_lock);
+       }
+
+       return ret < 0 ? ret : 0;
+}
+
+/*
+ * Pull buckets off ca->alloc_heap, invalidate them, move them to ca->free_inc:
+ */
+static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       u64 journal_seq = 0;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
+                                  POS(ca->dev_idx, 0),
+                                  BTREE_ITER_CACHED|
+                                  BTREE_ITER_CACHED_NOFILL|
+                                  BTREE_ITER_INTENT);
+
+       /* Only use nowait if we've already invalidated at least one bucket: */
+       while (!ret &&
+              !fifo_full(&ca->free_inc) &&
+              ca->alloc_heap.used)
+               ret = bch2_invalidate_one_bucket2(&trans, ca, iter, &journal_seq,
+                               BTREE_INSERT_GC_LOCK_HELD|
+                               (!fifo_empty(&ca->free_inc)
+                                ? BTREE_INSERT_NOWAIT : 0));
+
+       bch2_trans_exit(&trans);
+
+       /* If we used NOWAIT, don't return the error: */
+       if (!fifo_empty(&ca->free_inc))
+               ret = 0;
+       if (ret) {
+               bch_err(ca, "error invalidating buckets: %i", ret);
+               return ret;
+       }
+
+       if (journal_seq)
+               ret = bch2_journal_flush_seq(&c->journal, journal_seq);
+       if (ret) {
+               bch_err(ca, "journal error: %i", ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket)
+{
+       unsigned i;
+       int ret = 0;
+
+       while (1) {
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               spin_lock(&c->freelist_lock);
+               for (i = 0; i < RESERVE_NR; i++) {
+
+                       /*
+                        * Don't strand buckets on the copygc freelist until
+                        * after recovery is finished:
+                        */
+                       if (!test_bit(BCH_FS_STARTED, &c->flags) &&
+                           i == RESERVE_MOVINGGC)
+                               continue;
+
+                       if (fifo_push(&ca->free[i], bucket)) {
+                               fifo_pop(&ca->free_inc, bucket);
+
+                               closure_wake_up(&c->freelist_wait);
+                               ca->allocator_state = ALLOCATOR_RUNNING;
+
+                               spin_unlock(&c->freelist_lock);
+                               goto out;
+                       }
+               }
+
+               if (ca->allocator_state != ALLOCATOR_BLOCKED_FULL) {
+                       ca->allocator_state = ALLOCATOR_BLOCKED_FULL;
+                       closure_wake_up(&c->freelist_wait);
+               }
+
+               spin_unlock(&c->freelist_lock);
+
+               if ((current->flags & PF_KTHREAD) &&
+                   kthread_should_stop()) {
+                       ret = 1;
+                       break;
+               }
+
+               schedule();
+               try_to_freeze();
+       }
+out:
+       __set_current_state(TASK_RUNNING);
+       return ret;
+}
+
+/*
+ * Pulls buckets off free_inc, discards them (if enabled), then adds them to
+ * freelists, waiting until there's room if necessary:
+ */
+static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca)
+{
+       while (!fifo_empty(&ca->free_inc)) {
+               size_t bucket = fifo_peek(&ca->free_inc);
+
+               if (ca->mi.discard &&
+                   blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
+                       blkdev_issue_discard(ca->disk_sb.bdev,
+                                            bucket_to_sector(ca, bucket),
+                                            ca->mi.bucket_size, GFP_NOIO, 0);
+
+               if (push_invalidated_bucket(c, ca, bucket))
+                       return 1;
+       }
+
+       return 0;
+}
+
+/**
+ * bch_allocator_thread - move buckets from free_inc to reserves
+ *
+ * The free_inc FIFO is populated by find_reclaimable_buckets(), and
+ * the reserves are depleted by bucket allocation. When we run out
+ * of free_inc, try to invalidate some buckets and write out
+ * prios and gens.
+ */
+static int bch2_allocator_thread(void *arg)
+{
+       struct bch_dev *ca = arg;
+       struct bch_fs *c = ca->fs;
+       size_t nr;
+       int ret;
+
+       set_freezable();
+       ca->allocator_state = ALLOCATOR_RUNNING;
+
+       while (1) {
+               cond_resched();
+               if (kthread_should_stop())
+                       break;
+
+               pr_debug("discarding %zu invalidated buckets",
+                        fifo_used(&ca->free_inc));
+
+               ret = discard_invalidated_buckets(c, ca);
+               if (ret)
+                       goto stop;
+
+               down_read(&c->gc_lock);
+
+               ret = bch2_invalidate_buckets(c, ca);
+               if (ret) {
+                       up_read(&c->gc_lock);
+                       goto stop;
+               }
+
+               if (!fifo_empty(&ca->free_inc)) {
+                       up_read(&c->gc_lock);
+                       continue;
+               }
+
+               pr_debug("free_inc now empty");
+
+               do {
+                       /*
+                        * Find some buckets that we can invalidate, either
+                        * they're completely unused, or only contain clean data
+                        * that's been written back to the backing device or
+                        * another cache tier
+                        */
+
+                       pr_debug("scanning for reclaimable buckets");
+
+                       nr = find_reclaimable_buckets(c, ca);
+
+                       pr_debug("found %zu buckets", nr);
+
+                       trace_alloc_batch(ca, nr, ca->alloc_heap.size);
+
+                       if ((ca->inc_gen_needs_gc >= ALLOC_SCAN_BATCH(ca) ||
+                            ca->inc_gen_really_needs_gc) &&
+                           c->gc_thread) {
+                               atomic_inc(&c->kick_gc);
+                               wake_up_process(c->gc_thread);
+                       }
+
+                       /*
+                        * If we found any buckets, we have to invalidate them
+                        * before we scan for more - but if we didn't find very
+                        * many we may want to wait on more buckets being
+                        * available so we don't spin:
+                        */
+                       if (!nr ||
+                           (nr < ALLOC_SCAN_BATCH(ca) &&
+                            !fifo_empty(&ca->free[RESERVE_NONE]))) {
+                               ret = wait_buckets_available(c, ca);
+                               if (ret) {
+                                       up_read(&c->gc_lock);
+                                       goto stop;
+                               }
+                       }
+               } while (!nr);
+
+               up_read(&c->gc_lock);
+
+               pr_debug("%zu buckets to invalidate", nr);
+
+               /*
+                * alloc_heap is now full of newly-invalidated buckets: next,
+                * write out the new bucket gens:
+                */
+       }
+
+stop:
+       pr_debug("alloc thread stopping (ret %i)", ret);
+       ca->allocator_state = ALLOCATOR_STOPPED;
+       closure_wake_up(&c->freelist_wait);
+       return 0;
+}
+
+/* Startup/shutdown (ro/rw): */
+
+void bch2_recalc_capacity(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       u64 capacity = 0, reserved_sectors = 0, gc_reserve, copygc_threshold = 0;
+       unsigned bucket_size_max = 0;
+       unsigned long ra_pages = 0;
+       unsigned i, j;
+
+       lockdep_assert_held(&c->state_lock);
+
+       for_each_online_member(ca, c, i) {
+               struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_bdi;
+
+               ra_pages += bdi->ra_pages;
+       }
+
+       bch2_set_ra_pages(c, ra_pages);
+
+       for_each_rw_member(ca, c, i) {
+               u64 dev_reserve = 0;
+
+               /*
+                * We need to reserve buckets (from the number
+                * of currently available buckets) against
+                * foreground writes so that mainly copygc can
+                * make forward progress.
+                *
+                * We need enough to refill the various reserves
+                * from scratch - copygc will use its entire
+                * reserve all at once, then run against when
+                * its reserve is refilled (from the formerly
+                * available buckets).
+                *
+                * This reserve is just used when considering if
+                * allocations for foreground writes must wait -
+                * not -ENOSPC calculations.
+                */
+               for (j = 0; j < RESERVE_NONE; j++)
+                       dev_reserve += ca->free[j].size;
+
+               dev_reserve += 1;       /* btree write point */
+               dev_reserve += 1;       /* copygc write point */
+               dev_reserve += 1;       /* rebalance write point */
+
+               dev_reserve *= ca->mi.bucket_size;
+
+               copygc_threshold += dev_reserve;
+
+               capacity += bucket_to_sector(ca, ca->mi.nbuckets -
+                                            ca->mi.first_bucket);
+
+               reserved_sectors += dev_reserve * 2;
+
+               bucket_size_max = max_t(unsigned, bucket_size_max,
+                                       ca->mi.bucket_size);
+       }
+
+       gc_reserve = c->opts.gc_reserve_bytes
+               ? c->opts.gc_reserve_bytes >> 9
+               : div64_u64(capacity * c->opts.gc_reserve_percent, 100);
+
+       reserved_sectors = max(gc_reserve, reserved_sectors);
+
+       reserved_sectors = min(reserved_sectors, capacity);
+
+       c->copygc_threshold = copygc_threshold;
+       c->capacity = capacity - reserved_sectors;
+
+       c->bucket_size_max = bucket_size_max;
+
+       /* Wake up case someone was waiting for buckets */
+       closure_wake_up(&c->freelist_wait);
+}
+
+static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
+{
+       struct open_bucket *ob;
+       bool ret = false;
+
+       for (ob = c->open_buckets;
+            ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
+            ob++) {
+               spin_lock(&ob->lock);
+               if (ob->valid && !ob->on_partial_list &&
+                   ob->ptr.dev == ca->dev_idx)
+                       ret = true;
+               spin_unlock(&ob->lock);
+       }
+
+       return ret;
+}
+
+/* device goes ro: */
+void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
+{
+       unsigned i;
+
+       BUG_ON(ca->alloc_thread);
+
+       /* First, remove device from allocation groups: */
+
+       for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
+               clear_bit(ca->dev_idx, c->rw_devs[i].d);
+
+       /*
+        * Capacity is calculated based off of devices in allocation groups:
+        */
+       bch2_recalc_capacity(c);
+
+       /* Next, close write points that point to this device... */
+       for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
+               bch2_writepoint_stop(c, ca, &c->write_points[i]);
+
+       bch2_writepoint_stop(c, ca, &c->copygc_write_point);
+       bch2_writepoint_stop(c, ca, &c->rebalance_write_point);
+       bch2_writepoint_stop(c, ca, &c->btree_write_point);
+
+       mutex_lock(&c->btree_reserve_cache_lock);
+       while (c->btree_reserve_cache_nr) {
+               struct btree_alloc *a =
+                       &c->btree_reserve_cache[--c->btree_reserve_cache_nr];
+
+               bch2_open_buckets_put(c, &a->ob);
+       }
+       mutex_unlock(&c->btree_reserve_cache_lock);
+
+       while (1) {
+               struct open_bucket *ob;
+
+               spin_lock(&c->freelist_lock);
+               if (!ca->open_buckets_partial_nr) {
+                       spin_unlock(&c->freelist_lock);
+                       break;
+               }
+               ob = c->open_buckets +
+                       ca->open_buckets_partial[--ca->open_buckets_partial_nr];
+               ob->on_partial_list = false;
+               spin_unlock(&c->freelist_lock);
+
+               bch2_open_bucket_put(c, ob);
+       }
+
+       bch2_ec_stop_dev(c, ca);
+
+       /*
+        * Wake up threads that were blocked on allocation, so they can notice
+        * the device can no longer be removed and the capacity has changed:
+        */
+       closure_wake_up(&c->freelist_wait);
+
+       /*
+        * journal_res_get() can block waiting for free space in the journal -
+        * it needs to notice there may not be devices to allocate from anymore:
+        */
+       wake_up(&c->journal.wait);
+
+       /* Now wait for any in flight writes: */
+
+       closure_wait_event(&c->open_buckets_wait,
+                          !bch2_dev_has_open_write_point(c, ca));
+}
+
+/* device goes rw: */
+void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
+{
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
+               if (ca->mi.data_allowed & (1 << i))
+                       set_bit(ca->dev_idx, c->rw_devs[i].d);
+}
+
+void bch2_dev_allocator_quiesce(struct bch_fs *c, struct bch_dev *ca)
+{
+       if (ca->alloc_thread)
+               closure_wait_event(&c->freelist_wait,
+                                  ca->allocator_state != ALLOCATOR_RUNNING);
+}
+
+/* stop allocator thread: */
+void bch2_dev_allocator_stop(struct bch_dev *ca)
+{
+       struct task_struct *p;
+
+       p = rcu_dereference_protected(ca->alloc_thread, 1);
+       ca->alloc_thread = NULL;
+
+       /*
+        * We need an rcu barrier between setting ca->alloc_thread = NULL and
+        * the thread shutting down to avoid bch2_wake_allocator() racing:
+        *
+        * XXX: it would be better to have the rcu barrier be asynchronous
+        * instead of blocking us here
+        */
+       synchronize_rcu();
+
+       if (p) {
+               kthread_stop(p);
+               put_task_struct(p);
+       }
+}
+
+/* start allocator thread: */
+int bch2_dev_allocator_start(struct bch_dev *ca)
+{
+       struct task_struct *p;
+
+       /*
+        * allocator thread already started?
+        */
+       if (ca->alloc_thread)
+               return 0;
+
+       p = kthread_create(bch2_allocator_thread, ca,
+                          "bch_alloc[%s]", ca->name);
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+
+       get_task_struct(p);
+       rcu_assign_pointer(ca->alloc_thread, p);
+       wake_up_process(p);
+       return 0;
+}
+
+void bch2_fs_allocator_background_init(struct bch_fs *c)
+{
+       spin_lock_init(&c->freelist_lock);
+       bch2_bucket_clock_init(c, READ);
+       bch2_bucket_clock_init(c, WRITE);
+
+       c->pd_controllers_update_seconds = 5;
+       INIT_DELAYED_WORK(&c->pd_controllers_update, pd_controllers_update);
+}
diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h
new file mode 100644 (file)
index 0000000..870714f
--- /dev/null
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ALLOC_BACKGROUND_H
+#define _BCACHEFS_ALLOC_BACKGROUND_H
+
+#include "bcachefs.h"
+#include "alloc_types.h"
+#include "debug.h"
+
+struct bkey_alloc_unpacked {
+       u8              gen;
+#define x(_name, _bits)        u##_bits _name;
+       BCH_ALLOC_FIELDS()
+#undef  x
+};
+
+/* returns true if not equal */
+static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
+                                          struct bkey_alloc_unpacked r)
+{
+       return l.gen != r.gen
+#define x(_name, _bits)        || l._name != r._name
+       BCH_ALLOC_FIELDS()
+#undef  x
+       ;
+}
+
+struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
+void bch2_alloc_pack(struct bkey_i_alloc *,
+                    const struct bkey_alloc_unpacked);
+
+int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
+
+static inline struct bkey_alloc_unpacked
+alloc_mem_to_key(struct bucket *g, struct bucket_mark m)
+{
+       return (struct bkey_alloc_unpacked) {
+               .gen            = m.gen,
+               .oldest_gen     = g->oldest_gen,
+               .data_type      = m.data_type,
+               .dirty_sectors  = m.dirty_sectors,
+               .cached_sectors = m.cached_sectors,
+               .read_time      = g->io_time[READ],
+               .write_time     = g->io_time[WRITE],
+       };
+}
+
+#define ALLOC_SCAN_BATCH(ca)           max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
+
+const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_alloc (struct bkey_ops) {                \
+       .key_invalid    = bch2_alloc_invalid,           \
+       .val_to_text    = bch2_alloc_to_text,           \
+}
+
+struct journal_keys;
+int bch2_alloc_read(struct bch_fs *, struct journal_keys *);
+
+static inline void bch2_wake_allocator(struct bch_dev *ca)
+{
+       struct task_struct *p;
+
+       rcu_read_lock();
+       p = rcu_dereference(ca->alloc_thread);
+       if (p) {
+               wake_up_process(p);
+               ca->allocator_state = ALLOCATOR_RUNNING;
+       }
+       rcu_read_unlock();
+}
+
+static inline void verify_not_on_freelist(struct bch_fs *c, struct bch_dev *ca,
+                                         size_t bucket)
+{
+       if (expensive_debug_checks(c)) {
+               size_t iter;
+               long i;
+               unsigned j;
+
+               for (j = 0; j < RESERVE_NR; j++)
+                       fifo_for_each_entry(i, &ca->free[j], iter)
+                               BUG_ON(i == bucket);
+               fifo_for_each_entry(i, &ca->free_inc, iter)
+                       BUG_ON(i == bucket);
+       }
+}
+
+void bch2_recalc_capacity(struct bch_fs *);
+
+void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
+void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
+
+void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
+void bch2_dev_allocator_stop(struct bch_dev *);
+int bch2_dev_allocator_start(struct bch_dev *);
+
+int bch2_dev_alloc_write(struct bch_fs *, struct bch_dev *, unsigned);
+int bch2_alloc_write(struct bch_fs *, unsigned);
+void bch2_fs_allocator_background_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
new file mode 100644 (file)
index 0000000..7a92e3d
--- /dev/null
@@ -0,0 +1,990 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Primary bucket allocation code
+ *
+ * Copyright 2012 Google, Inc.
+ *
+ * Allocation in bcache is done in terms of buckets:
+ *
+ * Each bucket has associated an 8 bit gen; this gen corresponds to the gen in
+ * btree pointers - they must match for the pointer to be considered valid.
+ *
+ * Thus (assuming a bucket has no dirty data or metadata in it) we can reuse a
+ * bucket simply by incrementing its gen.
+ *
+ * The gens (along with the priorities; it's really the gens are important but
+ * the code is named as if it's the priorities) are written in an arbitrary list
+ * of buckets on disk, with a pointer to them in the journal header.
+ *
+ * When we invalidate a bucket, we have to write its new gen to disk and wait
+ * for that write to complete before we use it - otherwise after a crash we
+ * could have pointers that appeared to be good but pointed to data that had
+ * been overwritten.
+ *
+ * Since the gens and priorities are all stored contiguously on disk, we can
+ * batch this up: We fill up the free_inc list with freshly invalidated buckets,
+ * call prio_write(), and when prio_write() finishes we pull buckets off the
+ * free_inc list and optionally discard them.
+ *
+ * free_inc isn't the only freelist - if it was, we'd often have to sleep while
+ * priorities and gens were being written before we could allocate. c->free is a
+ * smaller freelist, and buckets on that list are always ready to be used.
+ *
+ * If we've got discards enabled, that happens when a bucket moves from the
+ * free_inc list to the free list.
+ *
+ * It's important to ensure that gens don't wrap around - with respect to
+ * either the oldest gen in the btree or the gen on disk. This is quite
+ * difficult to do in practice, but we explicitly guard against it anyways - if
+ * a bucket is in danger of wrapping around we simply skip invalidating it that
+ * time around, and we garbage collect or rewrite the priorities sooner than we
+ * would have otherwise.
+ *
+ * bch2_bucket_alloc() allocates a single bucket from a specific device.
+ *
+ * bch2_bucket_alloc_set() allocates one or more buckets from different devices
+ * in a given filesystem.
+ *
+ * invalidate_buckets() drives all the processes described above. It's called
+ * from bch2_bucket_alloc() and a few other places that need to make sure free
+ * buckets are ready.
+ *
+ * invalidate_buckets_(lru|fifo)() find buckets that are available to be
+ * invalidated, and then invalidate them and stick them on the free_inc list -
+ * in either lru or fifo order.
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "btree_gc.h"
+#include "buckets.h"
+#include "clock.h"
+#include "debug.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "io.h"
+
+#include <linux/math64.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <trace/events/bcachefs.h>
+
+/*
+ * Open buckets represent a bucket that's currently being allocated from.  They
+ * serve two purposes:
+ *
+ *  - They track buckets that have been partially allocated, allowing for
+ *    sub-bucket sized allocations - they're used by the sector allocator below
+ *
+ *  - They provide a reference to the buckets they own that mark and sweep GC
+ *    can find, until the new allocation has a pointer to it inserted into the
+ *    btree
+ *
+ * When allocating some space with the sector allocator, the allocation comes
+ * with a reference to an open bucket - the caller is required to put that
+ * reference _after_ doing the index update that makes its allocation reachable.
+ */
+
+void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
+{
+       struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+
+       if (ob->ec) {
+               bch2_ec_bucket_written(c, ob);
+               return;
+       }
+
+       percpu_down_read(&c->mark_lock);
+       spin_lock(&ob->lock);
+
+       bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr),
+                              false, gc_pos_alloc(c, ob), 0);
+       ob->valid = false;
+       ob->type = 0;
+
+       spin_unlock(&ob->lock);
+       percpu_up_read(&c->mark_lock);
+
+       spin_lock(&c->freelist_lock);
+       ob->freelist = c->open_buckets_freelist;
+       c->open_buckets_freelist = ob - c->open_buckets;
+       c->open_buckets_nr_free++;
+       spin_unlock(&c->freelist_lock);
+
+       closure_wake_up(&c->open_buckets_wait);
+}
+
+void bch2_open_bucket_write_error(struct bch_fs *c,
+                                 struct open_buckets *obs,
+                                 unsigned dev)
+{
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, obs, ob, i)
+               if (ob->ptr.dev == dev &&
+                   ob->ec)
+                       bch2_ec_bucket_cancel(c, ob);
+}
+
+static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
+{
+       struct open_bucket *ob;
+
+       BUG_ON(!c->open_buckets_freelist || !c->open_buckets_nr_free);
+
+       ob = c->open_buckets + c->open_buckets_freelist;
+       c->open_buckets_freelist = ob->freelist;
+       atomic_set(&ob->pin, 1);
+       ob->type = 0;
+
+       c->open_buckets_nr_free--;
+       return ob;
+}
+
+static void open_bucket_free_unused(struct bch_fs *c,
+                                   struct write_point *wp,
+                                   struct open_bucket *ob)
+{
+       struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+       bool may_realloc = wp->type == BCH_DATA_user;
+
+       BUG_ON(ca->open_buckets_partial_nr >
+              ARRAY_SIZE(ca->open_buckets_partial));
+
+       if (ca->open_buckets_partial_nr <
+           ARRAY_SIZE(ca->open_buckets_partial) &&
+           may_realloc) {
+               spin_lock(&c->freelist_lock);
+               ob->on_partial_list = true;
+               ca->open_buckets_partial[ca->open_buckets_partial_nr++] =
+                       ob - c->open_buckets;
+               spin_unlock(&c->freelist_lock);
+
+               closure_wake_up(&c->open_buckets_wait);
+               closure_wake_up(&c->freelist_wait);
+       } else {
+               bch2_open_bucket_put(c, ob);
+       }
+}
+
+static void verify_not_stale(struct bch_fs *c, const struct open_buckets *obs)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, obs, ob, i) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+
+               BUG_ON(ptr_stale(ca, &ob->ptr));
+       }
+#endif
+}
+
+/* _only_ for allocating the journal on a new device: */
+long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
+{
+       struct bucket_array *buckets;
+       ssize_t b;
+
+       rcu_read_lock();
+       buckets = bucket_array(ca);
+
+       for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++)
+               if (is_available_bucket(buckets->b[b].mark))
+                       goto success;
+       b = -1;
+success:
+       rcu_read_unlock();
+       return b;
+}
+
+static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
+{
+       switch (reserve) {
+       case RESERVE_ALLOC:
+               return 0;
+       case RESERVE_BTREE:
+               return OPEN_BUCKETS_COUNT / 4;
+       default:
+               return OPEN_BUCKETS_COUNT / 2;
+       }
+}
+
+/**
+ * bch_bucket_alloc - allocate a single bucket from a specific device
+ *
+ * Returns index of bucket on success, 0 on failure
+ * */
+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
+                                     enum alloc_reserve reserve,
+                                     bool may_alloc_partial,
+                                     struct closure *cl)
+{
+       struct bucket_array *buckets;
+       struct open_bucket *ob;
+       long bucket = 0;
+
+       spin_lock(&c->freelist_lock);
+
+       if (may_alloc_partial) {
+               int i;
+
+               for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) {
+                       ob = c->open_buckets + ca->open_buckets_partial[i];
+
+                       if (reserve <= ob->alloc_reserve) {
+                               array_remove_item(ca->open_buckets_partial,
+                                                 ca->open_buckets_partial_nr,
+                                                 i);
+                               ob->on_partial_list = false;
+                               ob->alloc_reserve = reserve;
+                               spin_unlock(&c->freelist_lock);
+                               return ob;
+                       }
+               }
+       }
+
+       if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) {
+               if (cl)
+                       closure_wait(&c->open_buckets_wait, cl);
+
+               if (!c->blocked_allocate_open_bucket)
+                       c->blocked_allocate_open_bucket = local_clock();
+
+               spin_unlock(&c->freelist_lock);
+               trace_open_bucket_alloc_fail(ca, reserve);
+               return ERR_PTR(-OPEN_BUCKETS_EMPTY);
+       }
+
+       if (likely(fifo_pop(&ca->free[RESERVE_NONE], bucket)))
+               goto out;
+
+       switch (reserve) {
+       case RESERVE_ALLOC:
+               if (fifo_pop(&ca->free[RESERVE_BTREE], bucket))
+                       goto out;
+               break;
+       case RESERVE_BTREE:
+               if (fifo_used(&ca->free[RESERVE_BTREE]) * 2 >=
+                   ca->free[RESERVE_BTREE].size &&
+                   fifo_pop(&ca->free[RESERVE_BTREE], bucket))
+                       goto out;
+               break;
+       case RESERVE_MOVINGGC:
+               if (fifo_pop(&ca->free[RESERVE_MOVINGGC], bucket))
+                       goto out;
+               break;
+       default:
+               break;
+       }
+
+       if (cl)
+               closure_wait(&c->freelist_wait, cl);
+
+       if (!c->blocked_allocate)
+               c->blocked_allocate = local_clock();
+
+       spin_unlock(&c->freelist_lock);
+
+       trace_bucket_alloc_fail(ca, reserve);
+       return ERR_PTR(-FREELIST_EMPTY);
+out:
+       verify_not_on_freelist(c, ca, bucket);
+
+       ob = bch2_open_bucket_alloc(c);
+
+       spin_lock(&ob->lock);
+       buckets = bucket_array(ca);
+
+       ob->valid       = true;
+       ob->sectors_free = ca->mi.bucket_size;
+       ob->alloc_reserve = reserve;
+       ob->ptr         = (struct bch_extent_ptr) {
+               .type   = 1 << BCH_EXTENT_ENTRY_ptr,
+               .gen    = buckets->b[bucket].mark.gen,
+               .offset = bucket_to_sector(ca, bucket),
+               .dev    = ca->dev_idx,
+       };
+
+       spin_unlock(&ob->lock);
+
+       if (c->blocked_allocate_open_bucket) {
+               bch2_time_stats_update(
+                       &c->times[BCH_TIME_blocked_allocate_open_bucket],
+                       c->blocked_allocate_open_bucket);
+               c->blocked_allocate_open_bucket = 0;
+       }
+
+       if (c->blocked_allocate) {
+               bch2_time_stats_update(
+                       &c->times[BCH_TIME_blocked_allocate],
+                       c->blocked_allocate);
+               c->blocked_allocate = 0;
+       }
+
+       spin_unlock(&c->freelist_lock);
+
+       bch2_wake_allocator(ca);
+
+       trace_bucket_alloc(ca, reserve);
+       return ob;
+}
+
+static int __dev_stripe_cmp(struct dev_stripe_state *stripe,
+                           unsigned l, unsigned r)
+{
+       return ((stripe->next_alloc[l] > stripe->next_alloc[r]) -
+               (stripe->next_alloc[l] < stripe->next_alloc[r]));
+}
+
+#define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r)
+
+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
+                                         struct dev_stripe_state *stripe,
+                                         struct bch_devs_mask *devs)
+{
+       struct dev_alloc_list ret = { .nr = 0 };
+       unsigned i;
+
+       for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX)
+               ret.devs[ret.nr++] = i;
+
+       bubble_sort(ret.devs, ret.nr, dev_stripe_cmp);
+       return ret;
+}
+
+void bch2_dev_stripe_increment(struct bch_dev *ca,
+                              struct dev_stripe_state *stripe)
+{
+       u64 *v = stripe->next_alloc + ca->dev_idx;
+       u64 free_space = dev_buckets_free(ca);
+       u64 free_space_inv = free_space
+               ? div64_u64(1ULL << 48, free_space)
+               : 1ULL << 48;
+       u64 scale = *v / 4;
+
+       if (*v + free_space_inv >= *v)
+               *v += free_space_inv;
+       else
+               *v = U64_MAX;
+
+       for (v = stripe->next_alloc;
+            v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++)
+               *v = *v < scale ? 0 : *v - scale;
+}
+
+#define BUCKET_MAY_ALLOC_PARTIAL       (1 << 0)
+#define BUCKET_ALLOC_USE_DURABILITY    (1 << 1)
+
+static void add_new_bucket(struct bch_fs *c,
+                          struct open_buckets *ptrs,
+                          struct bch_devs_mask *devs_may_alloc,
+                          unsigned *nr_effective,
+                          bool *have_cache,
+                          unsigned flags,
+                          struct open_bucket *ob)
+{
+       unsigned durability =
+               bch_dev_bkey_exists(c, ob->ptr.dev)->mi.durability;
+
+       __clear_bit(ob->ptr.dev, devs_may_alloc->d);
+       *nr_effective   += (flags & BUCKET_ALLOC_USE_DURABILITY)
+               ? durability : 1;
+       *have_cache     |= !durability;
+
+       ob_push(c, ptrs, ob);
+}
+
+enum bucket_alloc_ret
+bch2_bucket_alloc_set(struct bch_fs *c,
+                     struct open_buckets *ptrs,
+                     struct dev_stripe_state *stripe,
+                     struct bch_devs_mask *devs_may_alloc,
+                     unsigned nr_replicas,
+                     unsigned *nr_effective,
+                     bool *have_cache,
+                     enum alloc_reserve reserve,
+                     unsigned flags,
+                     struct closure *cl)
+{
+       struct dev_alloc_list devs_sorted =
+               bch2_dev_alloc_list(c, stripe, devs_may_alloc);
+       struct bch_dev *ca;
+       enum bucket_alloc_ret ret = INSUFFICIENT_DEVICES;
+       unsigned i;
+
+       BUG_ON(*nr_effective >= nr_replicas);
+
+       for (i = 0; i < devs_sorted.nr; i++) {
+               struct open_bucket *ob;
+
+               ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
+               if (!ca)
+                       continue;
+
+               if (!ca->mi.durability && *have_cache)
+                       continue;
+
+               ob = bch2_bucket_alloc(c, ca, reserve,
+                               flags & BUCKET_MAY_ALLOC_PARTIAL, cl);
+               if (IS_ERR(ob)) {
+                       ret = -PTR_ERR(ob);
+
+                       if (cl)
+                               return ret;
+                       continue;
+               }
+
+               add_new_bucket(c, ptrs, devs_may_alloc,
+                              nr_effective, have_cache, flags, ob);
+
+               bch2_dev_stripe_increment(ca, stripe);
+
+               if (*nr_effective >= nr_replicas)
+                       return ALLOC_SUCCESS;
+       }
+
+       return ret;
+}
+
+/* Allocate from stripes: */
+
+/*
+ * if we can't allocate a new stripe because there are already too many
+ * partially filled stripes, force allocating from an existing stripe even when
+ * it's to a device we don't want:
+ */
+
+static void bucket_alloc_from_stripe(struct bch_fs *c,
+                                    struct open_buckets *ptrs,
+                                    struct write_point *wp,
+                                    struct bch_devs_mask *devs_may_alloc,
+                                    u16 target,
+                                    unsigned erasure_code,
+                                    unsigned nr_replicas,
+                                    unsigned *nr_effective,
+                                    bool *have_cache,
+                                    unsigned flags)
+{
+       struct dev_alloc_list devs_sorted;
+       struct ec_stripe_head *h;
+       struct open_bucket *ob;
+       struct bch_dev *ca;
+       unsigned i, ec_idx;
+
+       if (!erasure_code)
+               return;
+
+       if (nr_replicas < 2)
+               return;
+
+       if (ec_open_bucket(c, ptrs))
+               return;
+
+       h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1);
+       if (!h)
+               return;
+
+       devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
+
+       for (i = 0; i < devs_sorted.nr; i++)
+               open_bucket_for_each(c, &h->s->blocks, ob, ec_idx)
+                       if (ob->ptr.dev == devs_sorted.devs[i] &&
+                           !test_and_set_bit(h->s->data_block_idx[ec_idx],
+                                             h->s->blocks_allocated))
+                               goto got_bucket;
+       goto out_put_head;
+got_bucket:
+       ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+
+       ob->ec_idx      = h->s->data_block_idx[ec_idx];
+       ob->ec          = h->s;
+
+       add_new_bucket(c, ptrs, devs_may_alloc,
+                      nr_effective, have_cache, flags, ob);
+       atomic_inc(&h->s->pin);
+out_put_head:
+       bch2_ec_stripe_head_put(c, h);
+}
+
+/* Sector allocator */
+
+static void get_buckets_from_writepoint(struct bch_fs *c,
+                                       struct open_buckets *ptrs,
+                                       struct write_point *wp,
+                                       struct bch_devs_mask *devs_may_alloc,
+                                       unsigned nr_replicas,
+                                       unsigned *nr_effective,
+                                       bool *have_cache,
+                                       unsigned flags,
+                                       bool need_ec)
+{
+       struct open_buckets ptrs_skip = { .nr = 0 };
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, &wp->ptrs, ob, i) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+
+               if (*nr_effective < nr_replicas &&
+                   test_bit(ob->ptr.dev, devs_may_alloc->d) &&
+                   (ca->mi.durability ||
+                    (wp->type == BCH_DATA_user && !*have_cache)) &&
+                   (ob->ec || !need_ec)) {
+                       add_new_bucket(c, ptrs, devs_may_alloc,
+                                      nr_effective, have_cache,
+                                      flags, ob);
+               } else {
+                       ob_push(c, &ptrs_skip, ob);
+               }
+       }
+       wp->ptrs = ptrs_skip;
+}
+
+static enum bucket_alloc_ret
+open_bucket_add_buckets(struct bch_fs *c,
+                       struct open_buckets *ptrs,
+                       struct write_point *wp,
+                       struct bch_devs_list *devs_have,
+                       u16 target,
+                       unsigned erasure_code,
+                       unsigned nr_replicas,
+                       unsigned *nr_effective,
+                       bool *have_cache,
+                       enum alloc_reserve reserve,
+                       unsigned flags,
+                       struct closure *_cl)
+{
+       struct bch_devs_mask devs;
+       struct open_bucket *ob;
+       struct closure *cl = NULL;
+       enum bucket_alloc_ret ret;
+       unsigned i;
+
+       rcu_read_lock();
+       devs = target_rw_devs(c, wp->type, target);
+       rcu_read_unlock();
+
+       /* Don't allocate from devices we already have pointers to: */
+       for (i = 0; i < devs_have->nr; i++)
+               __clear_bit(devs_have->devs[i], devs.d);
+
+       open_bucket_for_each(c, ptrs, ob, i)
+               __clear_bit(ob->ptr.dev, devs.d);
+
+       if (erasure_code) {
+               if (!ec_open_bucket(c, ptrs)) {
+                       get_buckets_from_writepoint(c, ptrs, wp, &devs,
+                                                   nr_replicas, nr_effective,
+                                                   have_cache, flags, true);
+                       if (*nr_effective >= nr_replicas)
+                               return 0;
+               }
+
+               if (!ec_open_bucket(c, ptrs)) {
+                       bucket_alloc_from_stripe(c, ptrs, wp, &devs,
+                                                target, erasure_code,
+                                                nr_replicas, nr_effective,
+                                                have_cache, flags);
+                       if (*nr_effective >= nr_replicas)
+                               return 0;
+               }
+       }
+
+       get_buckets_from_writepoint(c, ptrs, wp, &devs,
+                                   nr_replicas, nr_effective,
+                                   have_cache, flags, false);
+       if (*nr_effective >= nr_replicas)
+               return 0;
+
+       percpu_down_read(&c->mark_lock);
+       rcu_read_lock();
+
+retry_blocking:
+       /*
+        * Try nonblocking first, so that if one device is full we'll try from
+        * other devices:
+        */
+       ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs,
+                               nr_replicas, nr_effective, have_cache,
+                               reserve, flags, cl);
+       if (ret && ret != INSUFFICIENT_DEVICES && !cl && _cl) {
+               cl = _cl;
+               goto retry_blocking;
+       }
+
+       rcu_read_unlock();
+       percpu_up_read(&c->mark_lock);
+
+       return ret;
+}
+
+void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
+                               struct open_buckets *obs)
+{
+       struct open_buckets ptrs = { .nr = 0 };
+       struct open_bucket *ob, *ob2;
+       unsigned i, j;
+
+       open_bucket_for_each(c, obs, ob, i) {
+               bool drop = !ca || ob->ptr.dev == ca->dev_idx;
+
+               if (!drop && ob->ec) {
+                       mutex_lock(&ob->ec->lock);
+                       open_bucket_for_each(c, &ob->ec->blocks, ob2, j)
+                               drop |= ob2->ptr.dev == ca->dev_idx;
+                       open_bucket_for_each(c, &ob->ec->parity, ob2, j)
+                               drop |= ob2->ptr.dev == ca->dev_idx;
+                       mutex_unlock(&ob->ec->lock);
+               }
+
+               if (drop)
+                       bch2_open_bucket_put(c, ob);
+               else
+                       ob_push(c, &ptrs, ob);
+       }
+
+       *obs = ptrs;
+}
+
+void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
+                         struct write_point *wp)
+{
+       mutex_lock(&wp->lock);
+       bch2_open_buckets_stop_dev(c, ca, &wp->ptrs);
+       mutex_unlock(&wp->lock);
+}
+
+static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
+                                                unsigned long write_point)
+{
+       unsigned hash =
+               hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
+
+       return &c->write_points_hash[hash];
+}
+
+static struct write_point *__writepoint_find(struct hlist_head *head,
+                                            unsigned long write_point)
+{
+       struct write_point *wp;
+
+       hlist_for_each_entry_rcu(wp, head, node)
+               if (wp->write_point == write_point)
+                       return wp;
+
+       return NULL;
+}
+
+static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
+{
+       u64 stranded    = c->write_points_nr * c->bucket_size_max;
+       u64 free        = bch2_fs_usage_read_short(c).free;
+
+       return stranded * factor > free;
+}
+
+static bool try_increase_writepoints(struct bch_fs *c)
+{
+       struct write_point *wp;
+
+       if (c->write_points_nr == ARRAY_SIZE(c->write_points) ||
+           too_many_writepoints(c, 32))
+               return false;
+
+       wp = c->write_points + c->write_points_nr++;
+       hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
+       return true;
+}
+
+static bool try_decrease_writepoints(struct bch_fs *c,
+                                    unsigned old_nr)
+{
+       struct write_point *wp;
+
+       mutex_lock(&c->write_points_hash_lock);
+       if (c->write_points_nr < old_nr) {
+               mutex_unlock(&c->write_points_hash_lock);
+               return true;
+       }
+
+       if (c->write_points_nr == 1 ||
+           !too_many_writepoints(c, 8)) {
+               mutex_unlock(&c->write_points_hash_lock);
+               return false;
+       }
+
+       wp = c->write_points + --c->write_points_nr;
+
+       hlist_del_rcu(&wp->node);
+       mutex_unlock(&c->write_points_hash_lock);
+
+       bch2_writepoint_stop(c, NULL, wp);
+       return true;
+}
+
+static struct write_point *writepoint_find(struct bch_fs *c,
+                                          unsigned long write_point)
+{
+       struct write_point *wp, *oldest;
+       struct hlist_head *head;
+
+       if (!(write_point & 1UL)) {
+               wp = (struct write_point *) write_point;
+               mutex_lock(&wp->lock);
+               return wp;
+       }
+
+       head = writepoint_hash(c, write_point);
+restart_find:
+       wp = __writepoint_find(head, write_point);
+       if (wp) {
+lock_wp:
+               mutex_lock(&wp->lock);
+               if (wp->write_point == write_point)
+                       goto out;
+               mutex_unlock(&wp->lock);
+               goto restart_find;
+       }
+restart_find_oldest:
+       oldest = NULL;
+       for (wp = c->write_points;
+            wp < c->write_points + c->write_points_nr; wp++)
+               if (!oldest || time_before64(wp->last_used, oldest->last_used))
+                       oldest = wp;
+
+       mutex_lock(&oldest->lock);
+       mutex_lock(&c->write_points_hash_lock);
+       if (oldest >= c->write_points + c->write_points_nr ||
+           try_increase_writepoints(c)) {
+               mutex_unlock(&c->write_points_hash_lock);
+               mutex_unlock(&oldest->lock);
+               goto restart_find_oldest;
+       }
+
+       wp = __writepoint_find(head, write_point);
+       if (wp && wp != oldest) {
+               mutex_unlock(&c->write_points_hash_lock);
+               mutex_unlock(&oldest->lock);
+               goto lock_wp;
+       }
+
+       wp = oldest;
+       hlist_del_rcu(&wp->node);
+       wp->write_point = write_point;
+       hlist_add_head_rcu(&wp->node, head);
+       mutex_unlock(&c->write_points_hash_lock);
+out:
+       wp->last_used = sched_clock();
+       return wp;
+}
+
+/*
+ * Get us an open_bucket we can allocate from, return with it locked:
+ */
+struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
+                               unsigned target,
+                               unsigned erasure_code,
+                               struct write_point_specifier write_point,
+                               struct bch_devs_list *devs_have,
+                               unsigned nr_replicas,
+                               unsigned nr_replicas_required,
+                               enum alloc_reserve reserve,
+                               unsigned flags,
+                               struct closure *cl)
+{
+       struct write_point *wp;
+       struct open_bucket *ob;
+       struct open_buckets ptrs;
+       unsigned nr_effective, write_points_nr;
+       unsigned ob_flags = 0;
+       bool have_cache;
+       enum bucket_alloc_ret ret;
+       int i;
+
+       if (!(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS))
+               ob_flags |= BUCKET_ALLOC_USE_DURABILITY;
+
+       BUG_ON(!nr_replicas || !nr_replicas_required);
+retry:
+       ptrs.nr         = 0;
+       nr_effective    = 0;
+       write_points_nr = c->write_points_nr;
+       have_cache      = false;
+
+       wp = writepoint_find(c, write_point.v);
+
+       if (wp->type == BCH_DATA_user)
+               ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
+
+       /* metadata may not allocate on cache devices: */
+       if (wp->type != BCH_DATA_user)
+               have_cache = true;
+
+       if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
+               ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
+                                             target, erasure_code,
+                                             nr_replicas, &nr_effective,
+                                             &have_cache, reserve,
+                                             ob_flags, cl);
+       } else {
+               ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
+                                             target, erasure_code,
+                                             nr_replicas, &nr_effective,
+                                             &have_cache, reserve,
+                                             ob_flags, NULL);
+               if (!ret)
+                       goto alloc_done;
+
+               ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
+                                             0, erasure_code,
+                                             nr_replicas, &nr_effective,
+                                             &have_cache, reserve,
+                                             ob_flags, cl);
+       }
+alloc_done:
+       BUG_ON(!ret && nr_effective < nr_replicas);
+
+       if (erasure_code && !ec_open_bucket(c, &ptrs))
+               pr_debug("failed to get ec bucket: ret %u", ret);
+
+       if (ret == INSUFFICIENT_DEVICES &&
+           nr_effective >= nr_replicas_required)
+               ret = 0;
+
+       if (ret)
+               goto err;
+
+       /* Free buckets we didn't use: */
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
+               open_bucket_free_unused(c, wp, ob);
+
+       wp->ptrs = ptrs;
+
+       wp->sectors_free = UINT_MAX;
+
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
+               wp->sectors_free = min(wp->sectors_free, ob->sectors_free);
+
+       BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
+
+       verify_not_stale(c, &wp->ptrs);
+
+       return wp;
+err:
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
+               if (ptrs.nr < ARRAY_SIZE(ptrs.v))
+                       ob_push(c, &ptrs, ob);
+               else
+                       open_bucket_free_unused(c, wp, ob);
+       wp->ptrs = ptrs;
+
+       mutex_unlock(&wp->lock);
+
+       if (ret == FREELIST_EMPTY &&
+           try_decrease_writepoints(c, write_points_nr))
+               goto retry;
+
+       switch (ret) {
+       case OPEN_BUCKETS_EMPTY:
+       case FREELIST_EMPTY:
+               return cl ? ERR_PTR(-EAGAIN) : ERR_PTR(-ENOSPC);
+       case INSUFFICIENT_DEVICES:
+               return ERR_PTR(-EROFS);
+       default:
+               BUG();
+       }
+}
+
+/*
+ * Append pointers to the space we just allocated to @k, and mark @sectors space
+ * as allocated out of @ob
+ */
+void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
+                                   struct bkey_i *k, unsigned sectors)
+
+{
+       struct open_bucket *ob;
+       unsigned i;
+
+       BUG_ON(sectors > wp->sectors_free);
+       wp->sectors_free -= sectors;
+
+       open_bucket_for_each(c, &wp->ptrs, ob, i) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+               struct bch_extent_ptr tmp = ob->ptr;
+
+               tmp.cached = !ca->mi.durability &&
+                       wp->type == BCH_DATA_user;
+
+               tmp.offset += ca->mi.bucket_size - ob->sectors_free;
+               bch2_bkey_append_ptr(k, tmp);
+
+               BUG_ON(sectors > ob->sectors_free);
+               ob->sectors_free -= sectors;
+       }
+}
+
+/*
+ * Append pointers to the space we just allocated to @k, and mark @sectors space
+ * as allocated out of @ob
+ */
+void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
+{
+       struct open_buckets ptrs = { .nr = 0 }, keep = { .nr = 0 };
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
+               ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob);
+       wp->ptrs = keep;
+
+       mutex_unlock(&wp->lock);
+
+       bch2_open_buckets_put(c, &ptrs);
+}
+
+static inline void writepoint_init(struct write_point *wp,
+                                  enum bch_data_type type)
+{
+       mutex_init(&wp->lock);
+       wp->type = type;
+}
+
+void bch2_fs_allocator_foreground_init(struct bch_fs *c)
+{
+       struct open_bucket *ob;
+       struct write_point *wp;
+
+       mutex_init(&c->write_points_hash_lock);
+       c->write_points_nr = ARRAY_SIZE(c->write_points);
+
+       /* open bucket 0 is a sentinal NULL: */
+       spin_lock_init(&c->open_buckets[0].lock);
+
+       for (ob = c->open_buckets + 1;
+            ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
+               spin_lock_init(&ob->lock);
+               c->open_buckets_nr_free++;
+
+               ob->freelist = c->open_buckets_freelist;
+               c->open_buckets_freelist = ob - c->open_buckets;
+       }
+
+       writepoint_init(&c->btree_write_point,          BCH_DATA_btree);
+       writepoint_init(&c->rebalance_write_point,      BCH_DATA_user);
+       writepoint_init(&c->copygc_write_point,         BCH_DATA_user);
+
+       for (wp = c->write_points;
+            wp < c->write_points + c->write_points_nr; wp++) {
+               writepoint_init(wp, BCH_DATA_user);
+
+               wp->last_used   = sched_clock();
+               wp->write_point = (unsigned long) wp;
+               hlist_add_head_rcu(&wp->node,
+                                  writepoint_hash(c, wp->write_point));
+       }
+}
diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h
new file mode 100644 (file)
index 0000000..c658295
--- /dev/null
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ALLOC_FOREGROUND_H
+#define _BCACHEFS_ALLOC_FOREGROUND_H
+
+#include "bcachefs.h"
+#include "alloc_types.h"
+
+#include <linux/hash.h>
+
+struct bkey;
+struct bch_dev;
+struct bch_fs;
+struct bch_devs_List;
+
+enum bucket_alloc_ret {
+       ALLOC_SUCCESS,
+       OPEN_BUCKETS_EMPTY,
+       FREELIST_EMPTY,         /* Allocator thread not keeping up */
+       INSUFFICIENT_DEVICES,
+};
+
+struct dev_alloc_list {
+       unsigned        nr;
+       u8              devs[BCH_SB_MEMBERS_MAX];
+};
+
+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
+                                         struct dev_stripe_state *,
+                                         struct bch_devs_mask *);
+void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
+
+long bch2_bucket_alloc_new_fs(struct bch_dev *);
+
+struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
+                                     enum alloc_reserve, bool,
+                                     struct closure *);
+
+static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
+                          struct open_bucket *ob)
+{
+       BUG_ON(obs->nr >= ARRAY_SIZE(obs->v));
+
+       obs->v[obs->nr++] = ob - c->open_buckets;
+}
+
+#define open_bucket_for_each(_c, _obs, _ob, _i)                                \
+       for ((_i) = 0;                                                  \
+            (_i) < (_obs)->nr &&                                       \
+            ((_ob) = (_c)->open_buckets + (_obs)->v[_i], true);        \
+            (_i)++)
+
+static inline struct open_bucket *ec_open_bucket(struct bch_fs *c,
+                                                struct open_buckets *obs)
+{
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, obs, ob, i)
+               if (ob->ec)
+                       return ob;
+
+       return NULL;
+}
+
+void bch2_open_bucket_write_error(struct bch_fs *,
+                       struct open_buckets *, unsigned);
+
+void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
+
+static inline void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
+{
+       if (atomic_dec_and_test(&ob->pin))
+               __bch2_open_bucket_put(c, ob);
+}
+
+static inline void bch2_open_buckets_put(struct bch_fs *c,
+                                        struct open_buckets *ptrs)
+{
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, ptrs, ob, i)
+               bch2_open_bucket_put(c, ob);
+       ptrs->nr = 0;
+}
+
+static inline void bch2_open_bucket_get(struct bch_fs *c,
+                                       struct write_point *wp,
+                                       struct open_buckets *ptrs)
+{
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, &wp->ptrs, ob, i) {
+               ob->type = wp->type;
+               atomic_inc(&ob->pin);
+               ob_push(c, ptrs, ob);
+       }
+}
+
+enum bucket_alloc_ret
+bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *,
+                     struct dev_stripe_state *, struct bch_devs_mask *,
+                     unsigned, unsigned *, bool *, enum alloc_reserve,
+                     unsigned, struct closure *);
+
+struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
+                                            unsigned, unsigned,
+                                            struct write_point_specifier,
+                                            struct bch_devs_list *,
+                                            unsigned, unsigned,
+                                            enum alloc_reserve,
+                                            unsigned,
+                                            struct closure *);
+
+void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
+                                   struct bkey_i *, unsigned);
+void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
+
+void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,
+                               struct open_buckets *);
+
+void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
+                         struct write_point *);
+
+static inline struct write_point_specifier writepoint_hashed(unsigned long v)
+{
+       return (struct write_point_specifier) { .v = v | 1 };
+}
+
+static inline struct write_point_specifier writepoint_ptr(struct write_point *wp)
+{
+       return (struct write_point_specifier) { .v = (unsigned long) wp };
+}
+
+void bch2_fs_allocator_foreground_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
diff --git a/libbcachefs/alloc_types.h b/libbcachefs/alloc_types.h
new file mode 100644 (file)
index 0000000..2070546
--- /dev/null
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ALLOC_TYPES_H
+#define _BCACHEFS_ALLOC_TYPES_H
+
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+
+#include "clock_types.h"
+#include "fifo.h"
+
+struct ec_bucket_buf;
+
+/* There's two of these clocks, one for reads and one for writes: */
+struct bucket_clock {
+       /*
+        * "now" in (read/write) IO time - incremented whenever we do X amount
+        * of reads or writes.
+        *
+        * Goes with the bucket read/write prios: when we read or write to a
+        * bucket we reset the bucket's prio to the current hand; thus hand -
+        * prio = time since bucket was last read/written.
+        *
+        * The units are some amount (bytes/sectors) of data read/written, and
+        * the units can change on the fly if we need to rescale to fit
+        * everything in a u16 - your only guarantee is that the units are
+        * consistent.
+        */
+       u16                     hand;
+       u16                     max_last_io;
+
+       int                     rw;
+
+       struct io_timer         rescale;
+       struct mutex            lock;
+};
+
+/* There is one reserve for each type of btree, one for prios and gens
+ * and one for moving GC */
+enum alloc_reserve {
+       RESERVE_ALLOC           = -1,
+       RESERVE_BTREE           = 0,
+       RESERVE_MOVINGGC        = 1,
+       RESERVE_NONE            = 2,
+       RESERVE_NR              = 3,
+};
+
+typedef FIFO(long)     alloc_fifo;
+
+#define OPEN_BUCKETS_COUNT     1024
+
+#define WRITE_POINT_HASH_NR    32
+#define WRITE_POINT_MAX                32
+
+typedef u16                    open_bucket_idx_t;
+
+struct open_bucket {
+       spinlock_t              lock;
+       atomic_t                pin;
+       open_bucket_idx_t       freelist;
+
+       /*
+        * When an open bucket has an ec_stripe attached, this is the index of
+        * the block in the stripe this open_bucket corresponds to:
+        */
+       u8                      ec_idx;
+       u8                      type;
+       unsigned                valid:1;
+       unsigned                on_partial_list:1;
+       int                     alloc_reserve:3;
+       unsigned                sectors_free;
+       struct bch_extent_ptr   ptr;
+       struct ec_stripe_new    *ec;
+};
+
+#define OPEN_BUCKET_LIST_MAX   15
+
+struct open_buckets {
+       open_bucket_idx_t       nr;
+       open_bucket_idx_t       v[OPEN_BUCKET_LIST_MAX];
+};
+
+struct dev_stripe_state {
+       u64                     next_alloc[BCH_SB_MEMBERS_MAX];
+};
+
+struct write_point {
+       struct hlist_node       node;
+       struct mutex            lock;
+       u64                     last_used;
+       unsigned long           write_point;
+       enum bch_data_type      type;
+       bool                    is_ec;
+
+       /* calculated based on how many pointers we're actually going to use: */
+       unsigned                sectors_free;
+
+       struct open_buckets     ptrs;
+       struct dev_stripe_state stripe;
+};
+
+struct write_point_specifier {
+       unsigned long           v;
+};
+
+struct alloc_heap_entry {
+       size_t                  bucket;
+       size_t                  nr;
+       unsigned long           key;
+};
+
+typedef HEAP(struct alloc_heap_entry) alloc_heap;
+
+#endif /* _BCACHEFS_ALLOC_TYPES_H */
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
new file mode 100644 (file)
index 0000000..29f4116
--- /dev/null
@@ -0,0 +1,882 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_H
+#define _BCACHEFS_H
+
+/*
+ * SOME HIGH LEVEL CODE DOCUMENTATION:
+ *
+ * Bcache mostly works with cache sets, cache devices, and backing devices.
+ *
+ * Support for multiple cache devices hasn't quite been finished off yet, but
+ * it's about 95% plumbed through. A cache set and its cache devices is sort of
+ * like a md raid array and its component devices. Most of the code doesn't care
+ * about individual cache devices, the main abstraction is the cache set.
+ *
+ * Multiple cache devices is intended to give us the ability to mirror dirty
+ * cached data and metadata, without mirroring clean cached data.
+ *
+ * Backing devices are different, in that they have a lifetime independent of a
+ * cache set. When you register a newly formatted backing device it'll come up
+ * in passthrough mode, and then you can attach and detach a backing device from
+ * a cache set at runtime - while it's mounted and in use. Detaching implicitly
+ * invalidates any cached data for that backing device.
+ *
+ * A cache set can have multiple (many) backing devices attached to it.
+ *
+ * There's also flash only volumes - this is the reason for the distinction
+ * between struct cached_dev and struct bcache_device. A flash only volume
+ * works much like a bcache device that has a backing device, except the
+ * "cached" data is always dirty. The end result is that we get thin
+ * provisioning with very little additional code.
+ *
+ * Flash only volumes work but they're not production ready because the moving
+ * garbage collector needs more work. More on that later.
+ *
+ * BUCKETS/ALLOCATION:
+ *
+ * Bcache is primarily designed for caching, which means that in normal
+ * operation all of our available space will be allocated. Thus, we need an
+ * efficient way of deleting things from the cache so we can write new things to
+ * it.
+ *
+ * To do this, we first divide the cache device up into buckets. A bucket is the
+ * unit of allocation; they're typically around 1 mb - anywhere from 128k to 2M+
+ * works efficiently.
+ *
+ * Each bucket has a 16 bit priority, and an 8 bit generation associated with
+ * it. The gens and priorities for all the buckets are stored contiguously and
+ * packed on disk (in a linked list of buckets - aside from the superblock, all
+ * of bcache's metadata is stored in buckets).
+ *
+ * The priority is used to implement an LRU. We reset a bucket's priority when
+ * we allocate it or on cache it, and every so often we decrement the priority
+ * of each bucket. It could be used to implement something more sophisticated,
+ * if anyone ever gets around to it.
+ *
+ * The generation is used for invalidating buckets. Each pointer also has an 8
+ * bit generation embedded in it; for a pointer to be considered valid, its gen
+ * must match the gen of the bucket it points into.  Thus, to reuse a bucket all
+ * we have to do is increment its gen (and write its new gen to disk; we batch
+ * this up).
+ *
+ * Bcache is entirely COW - we never write twice to a bucket, even buckets that
+ * contain metadata (including btree nodes).
+ *
+ * THE BTREE:
+ *
+ * Bcache is in large part design around the btree.
+ *
+ * At a high level, the btree is just an index of key -> ptr tuples.
+ *
+ * Keys represent extents, and thus have a size field. Keys also have a variable
+ * number of pointers attached to them (potentially zero, which is handy for
+ * invalidating the cache).
+ *
+ * The key itself is an inode:offset pair. The inode number corresponds to a
+ * backing device or a flash only volume. The offset is the ending offset of the
+ * extent within the inode - not the starting offset; this makes lookups
+ * slightly more convenient.
+ *
+ * Pointers contain the cache device id, the offset on that device, and an 8 bit
+ * generation number. More on the gen later.
+ *
+ * Index lookups are not fully abstracted - cache lookups in particular are
+ * still somewhat mixed in with the btree code, but things are headed in that
+ * direction.
+ *
+ * Updates are fairly well abstracted, though. There are two different ways of
+ * updating the btree; insert and replace.
+ *
+ * BTREE_INSERT will just take a list of keys and insert them into the btree -
+ * overwriting (possibly only partially) any extents they overlap with. This is
+ * used to update the index after a write.
+ *
+ * BTREE_REPLACE is really cmpxchg(); it inserts a key into the btree iff it is
+ * overwriting a key that matches another given key. This is used for inserting
+ * data into the cache after a cache miss, and for background writeback, and for
+ * the moving garbage collector.
+ *
+ * There is no "delete" operation; deleting things from the index is
+ * accomplished by either by invalidating pointers (by incrementing a bucket's
+ * gen) or by inserting a key with 0 pointers - which will overwrite anything
+ * previously present at that location in the index.
+ *
+ * This means that there are always stale/invalid keys in the btree. They're
+ * filtered out by the code that iterates through a btree node, and removed when
+ * a btree node is rewritten.
+ *
+ * BTREE NODES:
+ *
+ * Our unit of allocation is a bucket, and we we can't arbitrarily allocate and
+ * free smaller than a bucket - so, that's how big our btree nodes are.
+ *
+ * (If buckets are really big we'll only use part of the bucket for a btree node
+ * - no less than 1/4th - but a bucket still contains no more than a single
+ * btree node. I'd actually like to change this, but for now we rely on the
+ * bucket's gen for deleting btree nodes when we rewrite/split a node.)
+ *
+ * Anyways, btree nodes are big - big enough to be inefficient with a textbook
+ * btree implementation.
+ *
+ * The way this is solved is that btree nodes are internally log structured; we
+ * can append new keys to an existing btree node without rewriting it. This
+ * means each set of keys we write is sorted, but the node is not.
+ *
+ * We maintain this log structure in memory - keeping 1Mb of keys sorted would
+ * be expensive, and we have to distinguish between the keys we have written and
+ * the keys we haven't. So to do a lookup in a btree node, we have to search
+ * each sorted set. But we do merge written sets together lazily, so the cost of
+ * these extra searches is quite low (normally most of the keys in a btree node
+ * will be in one big set, and then there'll be one or two sets that are much
+ * smaller).
+ *
+ * This log structure makes bcache's btree more of a hybrid between a
+ * conventional btree and a compacting data structure, with some of the
+ * advantages of both.
+ *
+ * GARBAGE COLLECTION:
+ *
+ * We can't just invalidate any bucket - it might contain dirty data or
+ * metadata. If it once contained dirty data, other writes might overwrite it
+ * later, leaving no valid pointers into that bucket in the index.
+ *
+ * Thus, the primary purpose of garbage collection is to find buckets to reuse.
+ * It also counts how much valid data it each bucket currently contains, so that
+ * allocation can reuse buckets sooner when they've been mostly overwritten.
+ *
+ * It also does some things that are really internal to the btree
+ * implementation. If a btree node contains pointers that are stale by more than
+ * some threshold, it rewrites the btree node to avoid the bucket's generation
+ * wrapping around. It also merges adjacent btree nodes if they're empty enough.
+ *
+ * THE JOURNAL:
+ *
+ * Bcache's journal is not necessary for consistency; we always strictly
+ * order metadata writes so that the btree and everything else is consistent on
+ * disk in the event of an unclean shutdown, and in fact bcache had writeback
+ * caching (with recovery from unclean shutdown) before journalling was
+ * implemented.
+ *
+ * Rather, the journal is purely a performance optimization; we can't complete a
+ * write until we've updated the index on disk, otherwise the cache would be
+ * inconsistent in the event of an unclean shutdown. This means that without the
+ * journal, on random write workloads we constantly have to update all the leaf
+ * nodes in the btree, and those writes will be mostly empty (appending at most
+ * a few keys each) - highly inefficient in terms of amount of metadata writes,
+ * and it puts more strain on the various btree resorting/compacting code.
+ *
+ * The journal is just a log of keys we've inserted; on startup we just reinsert
+ * all the keys in the open journal entries. That means that when we're updating
+ * a node in the btree, we can wait until a 4k block of keys fills up before
+ * writing them out.
+ *
+ * For simplicity, we only journal updates to leaf nodes; updates to parent
+ * nodes are rare enough (since our leaf nodes are huge) that it wasn't worth
+ * the complexity to deal with journalling them (in particular, journal replay)
+ * - updates to non leaf nodes just happen synchronously (see btree_split()).
+ */
+
+#undef pr_fmt
+#define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__
+
+#include <linux/bug.h>
+#include <linux/bio.h>
+#include <linux/closure.h>
+#include <linux/kobject.h>
+#include <linux/list.h>
+#include <linux/math64.h>
+#include <linux/mutex.h>
+#include <linux/percpu-refcount.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/rhashtable.h>
+#include <linux/rwsem.h>
+#include <linux/semaphore.h>
+#include <linux/seqlock.h>
+#include <linux/shrinker.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/zstd.h>
+
+#include "bcachefs_format.h"
+#include "fifo.h"
+#include "opts.h"
+#include "util.h"
+
+#define dynamic_fault(...)             0
+#define race_fault(...)                        0
+
+#define bch2_fs_init_fault(name)                                       \
+       dynamic_fault("bcachefs:bch_fs_init:" name)
+#define bch2_meta_read_fault(name)                                     \
+        dynamic_fault("bcachefs:meta:read:" name)
+#define bch2_meta_write_fault(name)                                    \
+        dynamic_fault("bcachefs:meta:write:" name)
+
+#ifdef __KERNEL__
+#define bch2_fmt(_c, fmt)      "bcachefs (%s): " fmt "\n", ((_c)->name)
+#else
+#define bch2_fmt(_c, fmt)      fmt "\n"
+#endif
+
+#define bch_info(c, fmt, ...) \
+       printk(KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_notice(c, fmt, ...) \
+       printk(KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_warn(c, fmt, ...) \
+       printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_warn_ratelimited(c, fmt, ...) \
+       printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_err(c, fmt, ...) \
+       printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_err_ratelimited(c, fmt, ...) \
+       printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
+
+#define bch_verbose(c, fmt, ...)                                       \
+do {                                                                   \
+       if ((c)->opts.verbose)                                          \
+               bch_info(c, fmt, ##__VA_ARGS__);                        \
+} while (0)
+
+#define pr_verbose_init(opts, fmt, ...)                                        \
+do {                                                                   \
+       if (opt_get(opts, verbose))                                     \
+               pr_info(fmt, ##__VA_ARGS__);                            \
+} while (0)
+
+/* Parameters that are useful for debugging, but should always be compiled in: */
+#define BCH_DEBUG_PARAMS_ALWAYS()                                      \
+       BCH_DEBUG_PARAM(key_merging_disabled,                           \
+               "Disables merging of extents")                          \
+       BCH_DEBUG_PARAM(btree_gc_always_rewrite,                        \
+               "Causes mark and sweep to compact and rewrite every "   \
+               "btree node it traverses")                              \
+       BCH_DEBUG_PARAM(btree_gc_rewrite_disabled,                      \
+               "Disables rewriting of btree nodes during mark and sweep")\
+       BCH_DEBUG_PARAM(btree_shrinker_disabled,                        \
+               "Disables the shrinker callback for the btree node cache")
+
+/* Parameters that should only be compiled in in debug mode: */
+#define BCH_DEBUG_PARAMS_DEBUG()                                       \
+       BCH_DEBUG_PARAM(expensive_debug_checks,                         \
+               "Enables various runtime debugging checks that "        \
+               "significantly affect performance")                     \
+       BCH_DEBUG_PARAM(debug_check_iterators,                          \
+               "Enables extra verification for btree iterators")       \
+       BCH_DEBUG_PARAM(debug_check_bkeys,                              \
+               "Run bkey_debugcheck (primarily checking GC/allocation "\
+               "information) when iterating over keys")                \
+       BCH_DEBUG_PARAM(verify_btree_ondisk,                            \
+               "Reread btree nodes at various points to verify the "   \
+               "mergesort in the read path against modifications "     \
+               "done in memory")                                       \
+       BCH_DEBUG_PARAM(journal_seq_verify,                             \
+               "Store the journal sequence number in the version "     \
+               "number of every btree key, and verify that btree "     \
+               "update ordering is preserved during recovery")         \
+       BCH_DEBUG_PARAM(inject_invalid_keys,                            \
+               "Store the journal sequence number in the version "     \
+               "number of every btree key, and verify that btree "     \
+               "update ordering is preserved during recovery")         \
+       BCH_DEBUG_PARAM(test_alloc_startup,                             \
+               "Force allocator startup to use the slowpath where it"  \
+               "can't find enough free buckets without invalidating"   \
+               "cached data")                                          \
+       BCH_DEBUG_PARAM(force_reconstruct_read,                         \
+               "Force reads to use the reconstruct path, when reading" \
+               "from erasure coded extents")                           \
+       BCH_DEBUG_PARAM(test_restart_gc,                                \
+               "Test restarting mark and sweep gc when bucket gens change")
+
+#define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG()
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALL()
+#else
+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS()
+#endif
+
+#define BCH_TIME_STATS()                       \
+       x(btree_node_mem_alloc)                 \
+       x(btree_node_split)                     \
+       x(btree_node_sort)                      \
+       x(btree_node_read)                      \
+       x(btree_gc)                             \
+       x(btree_lock_contended_read)            \
+       x(btree_lock_contended_intent)          \
+       x(btree_lock_contended_write)           \
+       x(data_write)                           \
+       x(data_read)                            \
+       x(data_promote)                         \
+       x(journal_write)                        \
+       x(journal_delay)                        \
+       x(journal_flush_seq)                    \
+       x(blocked_journal)                      \
+       x(blocked_allocate)                     \
+       x(blocked_allocate_open_bucket)
+
+enum bch_time_stats {
+#define x(name) BCH_TIME_##name,
+       BCH_TIME_STATS()
+#undef x
+       BCH_TIME_STAT_NR
+};
+
+#include "alloc_types.h"
+#include "btree_types.h"
+#include "buckets_types.h"
+#include "clock_types.h"
+#include "ec_types.h"
+#include "journal_types.h"
+#include "keylist_types.h"
+#include "quota_types.h"
+#include "rebalance_types.h"
+#include "replicas_types.h"
+#include "super_types.h"
+
+/* Number of nodes btree coalesce will try to coalesce at once */
+#define GC_MERGE_NODES         4U
+
+/* Maximum number of nodes we might need to allocate atomically: */
+#define BTREE_RESERVE_MAX      (BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
+
+/* Size of the freelist we allocate btree nodes from: */
+#define BTREE_NODE_RESERVE     (BTREE_RESERVE_MAX * 4)
+
+#define BTREE_NODE_OPEN_BUCKET_RESERVE (BTREE_RESERVE_MAX * BCH_REPLICAS_MAX)
+
+struct btree;
+
+enum gc_phase {
+       GC_PHASE_NOT_RUNNING,
+       GC_PHASE_START,
+       GC_PHASE_SB,
+
+       GC_PHASE_BTREE_EC,
+       GC_PHASE_BTREE_EXTENTS,
+       GC_PHASE_BTREE_INODES,
+       GC_PHASE_BTREE_DIRENTS,
+       GC_PHASE_BTREE_XATTRS,
+       GC_PHASE_BTREE_ALLOC,
+       GC_PHASE_BTREE_QUOTAS,
+       GC_PHASE_BTREE_REFLINK,
+
+       GC_PHASE_PENDING_DELETE,
+       GC_PHASE_ALLOC,
+};
+
+struct gc_pos {
+       enum gc_phase           phase;
+       struct bpos             pos;
+       unsigned                level;
+};
+
+struct io_count {
+       u64                     sectors[2][BCH_DATA_NR];
+};
+
+struct bch_dev {
+       struct kobject          kobj;
+       struct percpu_ref       ref;
+       struct completion       ref_completion;
+       struct percpu_ref       io_ref;
+       struct completion       io_ref_completion;
+
+       struct bch_fs           *fs;
+
+       u8                      dev_idx;
+       /*
+        * Cached version of this device's member info from superblock
+        * Committed by bch2_write_super() -> bch_fs_mi_update()
+        */
+       struct bch_member_cpu   mi;
+       uuid_le                 uuid;
+       char                    name[BDEVNAME_SIZE];
+
+       struct bch_sb_handle    disk_sb;
+       struct bch_sb           *sb_read_scratch;
+       int                     sb_write_error;
+
+       struct bch_devs_mask    self;
+
+       /* biosets used in cloned bios for writing multiple replicas */
+       struct bio_set          replica_set;
+
+       /*
+        * Buckets:
+        * Per-bucket arrays are protected by c->mark_lock, bucket_lock and
+        * gc_lock, for device resize - holding any is sufficient for access:
+        * Or rcu_read_lock(), but only for ptr_stale():
+        */
+       struct bucket_array __rcu *buckets[2];
+       unsigned long           *buckets_nouse;
+       struct rw_semaphore     bucket_lock;
+
+       struct bch_dev_usage __percpu *usage[2];
+
+       /* Allocator: */
+       struct task_struct __rcu *alloc_thread;
+
+       /*
+        * free: Buckets that are ready to be used
+        *
+        * free_inc: Incoming buckets - these are buckets that currently have
+        * cached data in them, and we can't reuse them until after we write
+        * their new gen to disk. After prio_write() finishes writing the new
+        * gens/prios, they'll be moved to the free list (and possibly discarded
+        * in the process)
+        */
+       alloc_fifo              free[RESERVE_NR];
+       alloc_fifo              free_inc;
+
+       open_bucket_idx_t       open_buckets_partial[OPEN_BUCKETS_COUNT];
+       open_bucket_idx_t       open_buckets_partial_nr;
+
+       size_t                  fifo_last_bucket;
+
+       /* last calculated minimum prio */
+       u16                     max_last_bucket_io[2];
+
+       size_t                  inc_gen_needs_gc;
+       size_t                  inc_gen_really_needs_gc;
+
+       /*
+        * XXX: this should be an enum for allocator state, so as to include
+        * error state
+        */
+       enum {
+               ALLOCATOR_STOPPED,
+               ALLOCATOR_RUNNING,
+               ALLOCATOR_BLOCKED,
+               ALLOCATOR_BLOCKED_FULL,
+       }                       allocator_state;
+
+       alloc_heap              alloc_heap;
+
+       atomic64_t              rebalance_work;
+
+       struct journal_device   journal;
+
+       struct work_struct      io_error_work;
+
+       /* The rest of this all shows up in sysfs */
+       atomic64_t              cur_latency[2];
+       struct time_stats       io_latency[2];
+
+#define CONGESTED_MAX          1024
+       atomic_t                congested;
+       u64                     congested_last;
+
+       struct io_count __percpu *io_done;
+};
+
+enum {
+       /* startup: */
+       BCH_FS_ALLOC_READ_DONE,
+       BCH_FS_ALLOC_CLEAN,
+       BCH_FS_ALLOCATOR_RUNNING,
+       BCH_FS_ALLOCATOR_STOPPING,
+       BCH_FS_INITIAL_GC_DONE,
+       BCH_FS_BTREE_INTERIOR_REPLAY_DONE,
+       BCH_FS_FSCK_DONE,
+       BCH_FS_STARTED,
+       BCH_FS_RW,
+
+       /* shutdown: */
+       BCH_FS_STOPPING,
+       BCH_FS_EMERGENCY_RO,
+       BCH_FS_WRITE_DISABLE_COMPLETE,
+
+       /* errors: */
+       BCH_FS_ERROR,
+       BCH_FS_ERRORS_FIXED,
+
+       /* misc: */
+       BCH_FS_FIXED_GENS,
+       BCH_FS_ALLOC_WRITTEN,
+       BCH_FS_REBUILD_REPLICAS,
+       BCH_FS_HOLD_BTREE_WRITES,
+};
+
+struct btree_debug {
+       unsigned                id;
+       struct dentry           *btree;
+       struct dentry           *btree_format;
+       struct dentry           *failed;
+};
+
+struct bch_fs_pcpu {
+       u64                     sectors_available;
+};
+
+struct journal_seq_blacklist_table {
+       size_t                  nr;
+       struct journal_seq_blacklist_table_entry {
+               u64             start;
+               u64             end;
+               bool            dirty;
+       }                       entries[0];
+};
+
+struct journal_keys {
+       struct journal_key {
+               enum btree_id   btree_id:8;
+               unsigned        level:8;
+               struct bkey_i   *k;
+               u32             journal_seq;
+               u32             journal_offset;
+       }                       *d;
+       size_t                  nr;
+       u64                     journal_seq_base;
+};
+
+struct bch_fs {
+       struct closure          cl;
+
+       struct list_head        list;
+       struct kobject          kobj;
+       struct kobject          internal;
+       struct kobject          opts_dir;
+       struct kobject          time_stats;
+       unsigned long           flags;
+
+       int                     minor;
+       struct device           *chardev;
+       struct super_block      *vfs_sb;
+       char                    name[40];
+
+       /* ro/rw, add/remove/resize devices: */
+       struct rw_semaphore     state_lock;
+
+       /* Counts outstanding writes, for clean transition to read-only */
+       struct percpu_ref       writes;
+       struct work_struct      read_only_work;
+
+       struct bch_dev __rcu    *devs[BCH_SB_MEMBERS_MAX];
+
+       struct bch_replicas_cpu replicas;
+       struct bch_replicas_cpu replicas_gc;
+       struct mutex            replicas_gc_lock;
+
+       struct journal_entry_res replicas_journal_res;
+
+       struct bch_disk_groups_cpu __rcu *disk_groups;
+
+       struct bch_opts         opts;
+
+       /* Updated by bch2_sb_update():*/
+       struct {
+               uuid_le         uuid;
+               uuid_le         user_uuid;
+
+               u16             version;
+               u16             encoded_extent_max;
+
+               u8              nr_devices;
+               u8              clean;
+
+               u8              encryption_type;
+
+               u64             time_base_lo;
+               u32             time_base_hi;
+               u32             time_precision;
+               u64             features;
+               u64             compat;
+       }                       sb;
+
+       struct bch_sb_handle    disk_sb;
+
+       unsigned short          block_bits;     /* ilog2(block_size) */
+
+       u16                     btree_foreground_merge_threshold;
+
+       struct closure          sb_write;
+       struct mutex            sb_lock;
+
+       /* BTREE CACHE */
+       struct bio_set          btree_bio;
+
+       struct btree_root       btree_roots[BTREE_ID_NR];
+       struct mutex            btree_root_lock;
+
+       struct btree_cache      btree_cache;
+
+       /*
+        * Cache of allocated btree nodes - if we allocate a btree node and
+        * don't use it, if we free it that space can't be reused until going
+        * _all_ the way through the allocator (which exposes us to a livelock
+        * when allocating btree reserves fail halfway through) - instead, we
+        * can stick them here:
+        */
+       struct btree_alloc      btree_reserve_cache[BTREE_NODE_RESERVE * 2];
+       unsigned                btree_reserve_cache_nr;
+       struct mutex            btree_reserve_cache_lock;
+
+       mempool_t               btree_interior_update_pool;
+       struct list_head        btree_interior_update_list;
+       struct list_head        btree_interior_updates_unwritten;
+       struct mutex            btree_interior_update_lock;
+       struct closure_waitlist btree_interior_update_wait;
+
+       struct workqueue_struct *btree_interior_update_worker;
+       struct work_struct      btree_interior_update_work;
+
+       /* btree_iter.c: */
+       struct mutex            btree_trans_lock;
+       struct list_head        btree_trans_list;
+       mempool_t               btree_iters_pool;
+
+       struct btree_key_cache  btree_key_cache;
+
+       struct workqueue_struct *wq;
+       /* copygc needs its own workqueue for index updates.. */
+       struct workqueue_struct *copygc_wq;
+       struct workqueue_struct *journal_reclaim_wq;
+
+       /* ALLOCATION */
+       struct delayed_work     pd_controllers_update;
+       unsigned                pd_controllers_update_seconds;
+
+       struct bch_devs_mask    rw_devs[BCH_DATA_NR];
+
+       u64                     capacity; /* sectors */
+
+       /*
+        * When capacity _decreases_ (due to a disk being removed), we
+        * increment capacity_gen - this invalidates outstanding reservations
+        * and forces them to be revalidated
+        */
+       u32                     capacity_gen;
+       unsigned                bucket_size_max;
+
+       atomic64_t              sectors_available;
+
+       struct bch_fs_pcpu __percpu     *pcpu;
+
+       struct percpu_rw_semaphore      mark_lock;
+
+       seqcount_t                      usage_lock;
+       struct bch_fs_usage             *usage_base;
+       struct bch_fs_usage __percpu    *usage[2];
+       struct bch_fs_usage __percpu    *usage_gc;
+
+       /* single element mempool: */
+       struct mutex            usage_scratch_lock;
+       struct bch_fs_usage     *usage_scratch;
+
+       /*
+        * When we invalidate buckets, we use both the priority and the amount
+        * of good data to determine which buckets to reuse first - to weight
+        * those together consistently we keep track of the smallest nonzero
+        * priority of any bucket.
+        */
+       struct bucket_clock     bucket_clock[2];
+
+       struct io_clock         io_clock[2];
+
+       /* JOURNAL SEQ BLACKLIST */
+       struct journal_seq_blacklist_table *
+                               journal_seq_blacklist_table;
+       struct work_struct      journal_seq_blacklist_gc_work;
+
+       /* ALLOCATOR */
+       spinlock_t              freelist_lock;
+       struct closure_waitlist freelist_wait;
+       u64                     blocked_allocate;
+       u64                     blocked_allocate_open_bucket;
+       open_bucket_idx_t       open_buckets_freelist;
+       open_bucket_idx_t       open_buckets_nr_free;
+       struct closure_waitlist open_buckets_wait;
+       struct open_bucket      open_buckets[OPEN_BUCKETS_COUNT];
+
+       struct write_point      btree_write_point;
+       struct write_point      rebalance_write_point;
+
+       struct write_point      write_points[WRITE_POINT_MAX];
+       struct hlist_head       write_points_hash[WRITE_POINT_HASH_NR];
+       struct mutex            write_points_hash_lock;
+       unsigned                write_points_nr;
+
+       /* GARBAGE COLLECTION */
+       struct task_struct      *gc_thread;
+       atomic_t                kick_gc;
+       unsigned long           gc_count;
+
+       /*
+        * Tracks GC's progress - everything in the range [ZERO_KEY..gc_cur_pos]
+        * has been marked by GC.
+        *
+        * gc_cur_phase is a superset of btree_ids (BTREE_ID_EXTENTS etc.)
+        *
+        * Protected by gc_pos_lock. Only written to by GC thread, so GC thread
+        * can read without a lock.
+        */
+       seqcount_t              gc_pos_lock;
+       struct gc_pos           gc_pos;
+
+       /*
+        * The allocation code needs gc_mark in struct bucket to be correct, but
+        * it's not while a gc is in progress.
+        */
+       struct rw_semaphore     gc_lock;
+
+       /* IO PATH */
+       struct semaphore        io_in_flight;
+       struct bio_set          bio_read;
+       struct bio_set          bio_read_split;
+       struct bio_set          bio_write;
+       struct mutex            bio_bounce_pages_lock;
+       mempool_t               bio_bounce_pages;
+       struct rhashtable       promote_table;
+
+       mempool_t               compression_bounce[2];
+       mempool_t               compress_workspace[BCH_COMPRESSION_TYPE_NR];
+       mempool_t               decompress_workspace;
+       ZSTD_parameters         zstd_params;
+
+       struct crypto_shash     *sha256;
+       struct crypto_sync_skcipher *chacha20;
+       struct crypto_shash     *poly1305;
+
+       atomic64_t              key_version;
+
+       mempool_t               large_bkey_pool;
+
+       /* REBALANCE */
+       struct bch_fs_rebalance rebalance;
+
+       /* COPYGC */
+       struct task_struct      *copygc_thread;
+       copygc_heap             copygc_heap;
+       struct bch_pd_controller copygc_pd;
+       struct write_point      copygc_write_point;
+       u64                     copygc_threshold;
+
+       /* STRIPES: */
+       GENRADIX(struct stripe) stripes[2];
+
+       ec_stripes_heap         ec_stripes_heap;
+       spinlock_t              ec_stripes_heap_lock;
+
+       /* ERASURE CODING */
+       struct list_head        ec_stripe_head_list;
+       struct mutex            ec_stripe_head_lock;
+
+       struct list_head        ec_stripe_new_list;
+       struct mutex            ec_stripe_new_lock;
+
+       struct work_struct      ec_stripe_create_work;
+       u64                     ec_stripe_hint;
+
+       struct bio_set          ec_bioset;
+
+       struct work_struct      ec_stripe_delete_work;
+       struct llist_head       ec_stripe_delete_list;
+
+       /* REFLINK */
+       u64                     reflink_hint;
+
+       /* VFS IO PATH - fs-io.c */
+       struct bio_set          writepage_bioset;
+       struct bio_set          dio_write_bioset;
+       struct bio_set          dio_read_bioset;
+
+       struct bio_list         btree_write_error_list;
+       struct work_struct      btree_write_error_work;
+       spinlock_t              btree_write_error_lock;
+
+       /* ERRORS */
+       struct list_head        fsck_errors;
+       struct mutex            fsck_error_lock;
+       bool                    fsck_alloc_err;
+
+       /* QUOTAS */
+       struct bch_memquota_type quotas[QTYP_NR];
+
+       /* DEBUG JUNK */
+       struct dentry           *debug;
+       struct btree_debug      btree_debug[BTREE_ID_NR];
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct btree            *verify_data;
+       struct btree_node       *verify_ondisk;
+       struct mutex            verify_lock;
+#endif
+
+       u64                     unused_inode_hint;
+
+       /*
+        * A btree node on disk could have too many bsets for an iterator to fit
+        * on the stack - have to dynamically allocate them
+        */
+       mempool_t               fill_iter;
+
+       mempool_t               btree_bounce_pool;
+
+       struct journal          journal;
+       struct list_head        journal_entries;
+       struct journal_keys     journal_keys;
+
+       u64                     last_bucket_seq_cleanup;
+
+       /* The rest of this all shows up in sysfs */
+       atomic_long_t           read_realloc_races;
+       atomic_long_t           extent_migrate_done;
+       atomic_long_t           extent_migrate_raced;
+
+       unsigned                btree_gc_periodic:1;
+       unsigned                copy_gc_enabled:1;
+       bool                    promote_whole_extents;
+
+#define BCH_DEBUG_PARAM(name, description) bool name;
+       BCH_DEBUG_PARAMS_ALL()
+#undef BCH_DEBUG_PARAM
+
+       struct time_stats       times[BCH_TIME_STAT_NR];
+};
+
+static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
+{
+#ifndef NO_BCACHEFS_FS
+       if (c->vfs_sb)
+               c->vfs_sb->s_bdi->ra_pages = ra_pages;
+#endif
+}
+
+static inline unsigned bucket_bytes(const struct bch_dev *ca)
+{
+       return ca->mi.bucket_size << 9;
+}
+
+static inline unsigned block_bytes(const struct bch_fs *c)
+{
+       return c->opts.block_size << 9;
+}
+
+static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, u64 time)
+{
+       return ns_to_timespec64(time * c->sb.time_precision + c->sb.time_base_lo);
+}
+
+static inline s64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts)
+{
+       s64 ns = timespec64_to_ns(&ts) - c->sb.time_base_lo;
+
+       if (c->sb.time_precision == 1)
+               return ns;
+
+       return div_s64(ns, c->sb.time_precision);
+}
+
+static inline s64 bch2_current_time(struct bch_fs *c)
+{
+       struct timespec64 now;
+
+       ktime_get_coarse_real_ts64(&now);
+       return timespec_to_bch2_time(c, now);
+}
+
+static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
+{
+       return dev < c->sb.nr_devices && c->devs[dev];
+}
+
+#endif /* _BCACHEFS_H */
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
new file mode 100644 (file)
index 0000000..d5a2230
--- /dev/null
@@ -0,0 +1,1671 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FORMAT_H
+#define _BCACHEFS_FORMAT_H
+
+/*
+ * bcachefs on disk data structures
+ *
+ * OVERVIEW:
+ *
+ * There are three main types of on disk data structures in bcachefs (this is
+ * reduced from 5 in bcache)
+ *
+ *  - superblock
+ *  - journal
+ *  - btree
+ *
+ * The btree is the primary structure; most metadata exists as keys in the
+ * various btrees. There are only a small number of btrees, they're not
+ * sharded - we have one btree for extents, another for inodes, et cetera.
+ *
+ * SUPERBLOCK:
+ *
+ * The superblock contains the location of the journal, the list of devices in
+ * the filesystem, and in general any metadata we need in order to decide
+ * whether we can start a filesystem or prior to reading the journal/btree
+ * roots.
+ *
+ * The superblock is extensible, and most of the contents of the superblock are
+ * in variable length, type tagged fields; see struct bch_sb_field.
+ *
+ * Backup superblocks do not reside in a fixed location; also, superblocks do
+ * not have a fixed size. To locate backup superblocks we have struct
+ * bch_sb_layout; we store a copy of this inside every superblock, and also
+ * before the first superblock.
+ *
+ * JOURNAL:
+ *
+ * The journal primarily records btree updates in the order they occurred;
+ * journal replay consists of just iterating over all the keys in the open
+ * journal entries and re-inserting them into the btrees.
+ *
+ * The journal also contains entry types for the btree roots, and blacklisted
+ * journal sequence numbers (see journal_seq_blacklist.c).
+ *
+ * BTREE:
+ *
+ * bcachefs btrees are copy on write b+ trees, where nodes are big (typically
+ * 128k-256k) and log structured. We use struct btree_node for writing the first
+ * entry in a given node (offset 0), and struct btree_node_entry for all
+ * subsequent writes.
+ *
+ * After the header, btree node entries contain a list of keys in sorted order.
+ * Values are stored inline with the keys; since values are variable length (and
+ * keys effectively are variable length too, due to packing) we can't do random
+ * access without building up additional in memory tables in the btree node read
+ * path.
+ *
+ * BTREE KEYS (struct bkey):
+ *
+ * The various btrees share a common format for the key - so as to avoid
+ * switching in fastpath lookup/comparison code - but define their own
+ * structures for the key values.
+ *
+ * The size of a key/value pair is stored as a u8 in units of u64s, so the max
+ * size is just under 2k. The common part also contains a type tag for the
+ * value, and a format field indicating whether the key is packed or not (and
+ * also meant to allow adding new key fields in the future, if desired).
+ *
+ * bkeys, when stored within a btree node, may also be packed. In that case, the
+ * bkey_format in that node is used to unpack it. Packed bkeys mean that we can
+ * be generous with field sizes in the common part of the key format (64 bit
+ * inode number, 64 bit offset, 96 bit version field, etc.) for negligible cost.
+ */
+
+#include <asm/types.h>
+#include <asm/byteorder.h>
+#include <linux/kernel.h>
+#include <linux/uuid.h>
+
+#define LE_BITMASK(_bits, name, type, field, offset, end)              \
+static const unsigned  name##_OFFSET = offset;                         \
+static const unsigned  name##_BITS = (end - offset);                   \
+static const __u##_bits        name##_MAX = (1ULL << (end - offset)) - 1;      \
+                                                                       \
+static inline __u64 name(const type *k)                                        \
+{                                                                      \
+       return (__le##_bits##_to_cpu(k->field) >> offset) &             \
+               ~(~0ULL << (end - offset));                             \
+}                                                                      \
+                                                                       \
+static inline void SET_##name(type *k, __u64 v)                                \
+{                                                                      \
+       __u##_bits new = __le##_bits##_to_cpu(k->field);                \
+                                                                       \
+       new &= ~(~(~0ULL << (end - offset)) << offset);                 \
+       new |= (v & ~(~0ULL << (end - offset))) << offset;              \
+       k->field = __cpu_to_le##_bits(new);                             \
+}
+
+#define LE16_BITMASK(n, t, f, o, e)    LE_BITMASK(16, n, t, f, o, e)
+#define LE32_BITMASK(n, t, f, o, e)    LE_BITMASK(32, n, t, f, o, e)
+#define LE64_BITMASK(n, t, f, o, e)    LE_BITMASK(64, n, t, f, o, e)
+
+struct bkey_format {
+       __u8            key_u64s;
+       __u8            nr_fields;
+       /* One unused slot for now: */
+       __u8            bits_per_field[6];
+       __le64          field_offset[6];
+};
+
+/* Btree keys - all units are in sectors */
+
+struct bpos {
+       /*
+        * Word order matches machine byte order - btree code treats a bpos as a
+        * single large integer, for search/comparison purposes
+        *
+        * Note that wherever a bpos is embedded in another on disk data
+        * structure, it has to be byte swabbed when reading in metadata that
+        * wasn't written in native endian order:
+        */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+       __u32           snapshot;
+       __u64           offset;
+       __u64           inode;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+       __u64           inode;
+       __u64           offset;         /* Points to end of extent - sectors */
+       __u32           snapshot;
+#else
+#error edit for your odd byteorder.
+#endif
+} __attribute__((packed, aligned(4)));
+
+#define KEY_INODE_MAX                  ((__u64)~0ULL)
+#define KEY_OFFSET_MAX                 ((__u64)~0ULL)
+#define KEY_SNAPSHOT_MAX               ((__u32)~0U)
+#define KEY_SIZE_MAX                   ((__u32)~0U)
+
+static inline struct bpos POS(__u64 inode, __u64 offset)
+{
+       struct bpos ret;
+
+       ret.inode       = inode;
+       ret.offset      = offset;
+       ret.snapshot    = 0;
+
+       return ret;
+}
+
+#define POS_MIN                                POS(0, 0)
+#define POS_MAX                                POS(KEY_INODE_MAX, KEY_OFFSET_MAX)
+
+/* Empty placeholder struct, for container_of() */
+struct bch_val {
+       __u64           __nothing[0];
+};
+
+struct bversion {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+       __u64           lo;
+       __u32           hi;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+       __u32           hi;
+       __u64           lo;
+#endif
+} __attribute__((packed, aligned(4)));
+
+struct bkey {
+       /* Size of combined key and value, in u64s */
+       __u8            u64s;
+
+       /* Format of key (0 for format local to btree node) */
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u8            format:7,
+                       needs_whiteout:1;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u8            needs_whiteout:1,
+                       format:7;
+#else
+#error edit for your odd byteorder.
+#endif
+
+       /* Type of the value */
+       __u8            type;
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+       __u8            pad[1];
+
+       struct bversion version;
+       __u32           size;           /* extent size, in sectors */
+       struct bpos     p;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+       struct bpos     p;
+       __u32           size;           /* extent size, in sectors */
+       struct bversion version;
+
+       __u8            pad[1];
+#endif
+} __attribute__((packed, aligned(8)));
+
+struct bkey_packed {
+       __u64           _data[0];
+
+       /* Size of combined key and value, in u64s */
+       __u8            u64s;
+
+       /* Format of key (0 for format local to btree node) */
+
+       /*
+        * XXX: next incompat on disk format change, switch format and
+        * needs_whiteout - bkey_packed() will be cheaper if format is the high
+        * bits of the bitfield
+        */
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u8            format:7,
+                       needs_whiteout:1;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u8            needs_whiteout:1,
+                       format:7;
+#endif
+
+       /* Type of the value */
+       __u8            type;
+       __u8            key_start[0];
+
+       /*
+        * We copy bkeys with struct assignment in various places, and while
+        * that shouldn't be done with packed bkeys we can't disallow it in C,
+        * and it's legal to cast a bkey to a bkey_packed  - so padding it out
+        * to the same size as struct bkey should hopefully be safest.
+        */
+       __u8            pad[sizeof(struct bkey) - 3];
+} __attribute__((packed, aligned(8)));
+
+#define BKEY_U64s                      (sizeof(struct bkey) / sizeof(__u64))
+#define BKEY_U64s_MAX                  U8_MAX
+#define BKEY_VAL_U64s_MAX              (BKEY_U64s_MAX - BKEY_U64s)
+
+#define KEY_PACKED_BITS_START          24
+
+#define KEY_FORMAT_LOCAL_BTREE         0
+#define KEY_FORMAT_CURRENT             1
+
+enum bch_bkey_fields {
+       BKEY_FIELD_INODE,
+       BKEY_FIELD_OFFSET,
+       BKEY_FIELD_SNAPSHOT,
+       BKEY_FIELD_SIZE,
+       BKEY_FIELD_VERSION_HI,
+       BKEY_FIELD_VERSION_LO,
+       BKEY_NR_FIELDS,
+};
+
+#define bkey_format_field(name, field)                                 \
+       [BKEY_FIELD_##name] = (sizeof(((struct bkey *) NULL)->field) * 8)
+
+#define BKEY_FORMAT_CURRENT                                            \
+((struct bkey_format) {                                                        \
+       .key_u64s       = BKEY_U64s,                                    \
+       .nr_fields      = BKEY_NR_FIELDS,                               \
+       .bits_per_field = {                                             \
+               bkey_format_field(INODE,        p.inode),               \
+               bkey_format_field(OFFSET,       p.offset),              \
+               bkey_format_field(SNAPSHOT,     p.snapshot),            \
+               bkey_format_field(SIZE,         size),                  \
+               bkey_format_field(VERSION_HI,   version.hi),            \
+               bkey_format_field(VERSION_LO,   version.lo),            \
+       },                                                              \
+})
+
+/* bkey with inline value */
+struct bkey_i {
+       __u64                   _data[0];
+
+       union {
+       struct {
+               /* Size of combined key and value, in u64s */
+               __u8            u64s;
+       };
+       struct {
+               struct bkey     k;
+               struct bch_val  v;
+       };
+       };
+};
+
+#define KEY(_inode, _offset, _size)                                    \
+((struct bkey) {                                                       \
+       .u64s           = BKEY_U64s,                                    \
+       .format         = KEY_FORMAT_CURRENT,                           \
+       .p              = POS(_inode, _offset),                         \
+       .size           = _size,                                        \
+})
+
+static inline void bkey_init(struct bkey *k)
+{
+       *k = KEY(0, 0, 0);
+}
+
+#define bkey_bytes(_k)         ((_k)->u64s * sizeof(__u64))
+
+#define __BKEY_PADDED(key, pad)                                        \
+       struct { struct bkey_i key; __u64 key ## _pad[pad]; }
+
+/*
+ * - DELETED keys are used internally to mark keys that should be ignored but
+ *   override keys in composition order.  Their version number is ignored.
+ *
+ * - DISCARDED keys indicate that the data is all 0s because it has been
+ *   discarded. DISCARDs may have a version; if the version is nonzero the key
+ *   will be persistent, otherwise the key will be dropped whenever the btree
+ *   node is rewritten (like DELETED keys).
+ *
+ * - ERROR: any read of the data returns a read error, as the data was lost due
+ *   to a failing device. Like DISCARDED keys, they can be removed (overridden)
+ *   by new writes or cluster-wide GC. Node repair can also overwrite them with
+ *   the same or a more recent version number, but not with an older version
+ *   number.
+ *
+ * - WHITEOUT: for hash table btrees
+*/
+#define BCH_BKEY_TYPES()                               \
+       x(deleted,              0)                      \
+       x(discard,              1)                      \
+       x(error,                2)                      \
+       x(cookie,               3)                      \
+       x(whiteout,             4)                      \
+       x(btree_ptr,            5)                      \
+       x(extent,               6)                      \
+       x(reservation,          7)                      \
+       x(inode,                8)                      \
+       x(inode_generation,     9)                      \
+       x(dirent,               10)                     \
+       x(xattr,                11)                     \
+       x(alloc,                12)                     \
+       x(quota,                13)                     \
+       x(stripe,               14)                     \
+       x(reflink_p,            15)                     \
+       x(reflink_v,            16)                     \
+       x(inline_data,          17)                     \
+       x(btree_ptr_v2,         18)
+
+enum bch_bkey_type {
+#define x(name, nr) KEY_TYPE_##name    = nr,
+       BCH_BKEY_TYPES()
+#undef x
+       KEY_TYPE_MAX,
+};
+
+struct bch_cookie {
+       struct bch_val          v;
+       __le64                  cookie;
+};
+
+/* Extents */
+
+/*
+ * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
+ * preceded by checksum/compression information (bch_extent_crc32 or
+ * bch_extent_crc64).
+ *
+ * One major determining factor in the format of extents is how we handle and
+ * represent extents that have been partially overwritten and thus trimmed:
+ *
+ * If an extent is not checksummed or compressed, when the extent is trimmed we
+ * don't have to remember the extent we originally allocated and wrote: we can
+ * merely adjust ptr->offset to point to the start of the data that is currently
+ * live. The size field in struct bkey records the current (live) size of the
+ * extent, and is also used to mean "size of region on disk that we point to" in
+ * this case.
+ *
+ * Thus an extent that is not checksummed or compressed will consist only of a
+ * list of bch_extent_ptrs, with none of the fields in
+ * bch_extent_crc32/bch_extent_crc64.
+ *
+ * When an extent is checksummed or compressed, it's not possible to read only
+ * the data that is currently live: we have to read the entire extent that was
+ * originally written, and then return only the part of the extent that is
+ * currently live.
+ *
+ * Thus, in addition to the current size of the extent in struct bkey, we need
+ * to store the size of the originally allocated space - this is the
+ * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
+ * when the extent is trimmed, instead of modifying the offset field of the
+ * pointer, we keep a second smaller offset field - "offset into the original
+ * extent of the currently live region".
+ *
+ * The other major determining factor is replication and data migration:
+ *
+ * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
+ * write, we will initially write all the replicas in the same format, with the
+ * same checksum type and compression format - however, when copygc runs later (or
+ * tiering/cache promotion, anything that moves data), it is not in general
+ * going to rewrite all the pointers at once - one of the replicas may be in a
+ * bucket on one device that has very little fragmentation while another lives
+ * in a bucket that has become heavily fragmented, and thus is being rewritten
+ * sooner than the rest.
+ *
+ * Thus it will only move a subset of the pointers (or in the case of
+ * tiering/cache promotion perhaps add a single pointer without dropping any
+ * current pointers), and if the extent has been partially overwritten it must
+ * write only the currently live portion (or copygc would not be able to reduce
+ * fragmentation!) - which necessitates a different bch_extent_crc format for
+ * the new pointer.
+ *
+ * But in the interests of space efficiency, we don't want to store one
+ * bch_extent_crc for each pointer if we don't have to.
+ *
+ * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
+ * bch_extent_ptrs appended arbitrarily one after the other. We determine the
+ * type of a given entry with a scheme similar to utf8 (except we're encoding a
+ * type, not a size), encoding the type in the position of the first set bit:
+ *
+ * bch_extent_crc32    - 0b1
+ * bch_extent_ptr      - 0b10
+ * bch_extent_crc64    - 0b100
+ *
+ * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
+ * bch_extent_crc64 is the least constrained).
+ *
+ * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
+ * until the next bch_extent_crc32/64.
+ *
+ * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
+ * is neither checksummed nor compressed.
+ */
+
+/* 128 bits, sufficient for cryptographic MACs: */
+struct bch_csum {
+       __le64                  lo;
+       __le64                  hi;
+} __attribute__((packed, aligned(8)));
+
+#define BCH_EXTENT_ENTRY_TYPES()               \
+       x(ptr,                  0)              \
+       x(crc32,                1)              \
+       x(crc64,                2)              \
+       x(crc128,               3)              \
+       x(stripe_ptr,           4)
+#define BCH_EXTENT_ENTRY_MAX   5
+
+enum bch_extent_entry_type {
+#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
+       BCH_EXTENT_ENTRY_TYPES()
+#undef x
+};
+
+/* Compressed/uncompressed size are stored biased by 1: */
+struct bch_extent_crc32 {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u32                   type:2,
+                               _compressed_size:7,
+                               _uncompressed_size:7,
+                               offset:7,
+                               _unused:1,
+                               csum_type:4,
+                               compression_type:4;
+       __u32                   csum;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u32                   csum;
+       __u32                   compression_type:4,
+                               csum_type:4,
+                               _unused:1,
+                               offset:7,
+                               _uncompressed_size:7,
+                               _compressed_size:7,
+                               type:2;
+#endif
+} __attribute__((packed, aligned(8)));
+
+#define CRC32_SIZE_MAX         (1U << 7)
+#define CRC32_NONCE_MAX                0
+
+struct bch_extent_crc64 {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u64                   type:3,
+                               _compressed_size:9,
+                               _uncompressed_size:9,
+                               offset:9,
+                               nonce:10,
+                               csum_type:4,
+                               compression_type:4,
+                               csum_hi:16;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u64                   csum_hi:16,
+                               compression_type:4,
+                               csum_type:4,
+                               nonce:10,
+                               offset:9,
+                               _uncompressed_size:9,
+                               _compressed_size:9,
+                               type:3;
+#endif
+       __u64                   csum_lo;
+} __attribute__((packed, aligned(8)));
+
+#define CRC64_SIZE_MAX         (1U << 9)
+#define CRC64_NONCE_MAX                ((1U << 10) - 1)
+
+struct bch_extent_crc128 {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u64                   type:4,
+                               _compressed_size:13,
+                               _uncompressed_size:13,
+                               offset:13,
+                               nonce:13,
+                               csum_type:4,
+                               compression_type:4;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u64                   compression_type:4,
+                               csum_type:4,
+                               nonce:13,
+                               offset:13,
+                               _uncompressed_size:13,
+                               _compressed_size:13,
+                               type:4;
+#endif
+       struct bch_csum         csum;
+} __attribute__((packed, aligned(8)));
+
+#define CRC128_SIZE_MAX                (1U << 13)
+#define CRC128_NONCE_MAX       ((1U << 13) - 1)
+
+/*
+ * @reservation - pointer hasn't been written to, just reserved
+ */
+struct bch_extent_ptr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u64                   type:1,
+                               cached:1,
+                               unused:1,
+                               reservation:1,
+                               offset:44, /* 8 petabytes */
+                               dev:8,
+                               gen:8;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u64                   gen:8,
+                               dev:8,
+                               offset:44,
+                               reservation:1,
+                               unused:1,
+                               cached:1,
+                               type:1;
+#endif
+} __attribute__((packed, aligned(8)));
+
+struct bch_extent_stripe_ptr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u64                   type:5,
+                               block:8,
+                               idx:51;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u64                   idx:51,
+                               block:8,
+                               type:5;
+#endif
+};
+
+struct bch_extent_reservation {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u64                   type:6,
+                               unused:22,
+                               replicas:4,
+                               generation:32;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u64                   generation:32,
+                               replicas:4,
+                               unused:22,
+                               type:6;
+#endif
+};
+
+union bch_extent_entry {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ||  __BITS_PER_LONG == 64
+       unsigned long                   type;
+#elif __BITS_PER_LONG == 32
+       struct {
+               unsigned long           pad;
+               unsigned long           type;
+       };
+#else
+#error edit for your odd byteorder.
+#endif
+
+#define x(f, n) struct bch_extent_##f  f;
+       BCH_EXTENT_ENTRY_TYPES()
+#undef x
+};
+
+struct bch_btree_ptr {
+       struct bch_val          v;
+
+       struct bch_extent_ptr   start[0];
+       __u64                   _data[0];
+} __attribute__((packed, aligned(8)));
+
+struct bch_btree_ptr_v2 {
+       struct bch_val          v;
+
+       __u64                   mem_ptr;
+       __le64                  seq;
+       __le16                  sectors_written;
+       /* In case we ever decide to do variable size btree nodes: */
+       __le16                  sectors;
+       struct bpos             min_key;
+       struct bch_extent_ptr   start[0];
+       __u64                   _data[0];
+} __attribute__((packed, aligned(8)));
+
+struct bch_extent {
+       struct bch_val          v;
+
+       union bch_extent_entry  start[0];
+       __u64                   _data[0];
+} __attribute__((packed, aligned(8)));
+
+struct bch_reservation {
+       struct bch_val          v;
+
+       __le32                  generation;
+       __u8                    nr_replicas;
+       __u8                    pad[3];
+} __attribute__((packed, aligned(8)));
+
+/* Maximum size (in u64s) a single pointer could be: */
+#define BKEY_EXTENT_PTR_U64s_MAX\
+       ((sizeof(struct bch_extent_crc128) +                    \
+         sizeof(struct bch_extent_ptr)) / sizeof(u64))
+
+/* Maximum possible size of an entire extent value: */
+#define BKEY_EXTENT_VAL_U64s_MAX                               \
+       (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
+
+#define BKEY_PADDED(key)       __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)
+
+/* * Maximum possible size of an entire extent, key + value: */
+#define BKEY_EXTENT_U64s_MAX           (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
+
+/* Btree pointers don't carry around checksums: */
+#define BKEY_BTREE_PTR_VAL_U64s_MAX                            \
+       ((sizeof(struct bch_btree_ptr_v2) +                     \
+         sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(u64))
+#define BKEY_BTREE_PTR_U64s_MAX                                        \
+       (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
+
+/* Inodes */
+
+#define BLOCKDEV_INODE_MAX     4096
+
+#define BCACHEFS_ROOT_INO      4096
+
+struct bch_inode {
+       struct bch_val          v;
+
+       __le64                  bi_hash_seed;
+       __le32                  bi_flags;
+       __le16                  bi_mode;
+       __u8                    fields[0];
+} __attribute__((packed, aligned(8)));
+
+struct bch_inode_generation {
+       struct bch_val          v;
+
+       __le32                  bi_generation;
+       __le32                  pad;
+} __attribute__((packed, aligned(8)));
+
+#define BCH_INODE_FIELDS()                     \
+       x(bi_atime,                     64)     \
+       x(bi_ctime,                     64)     \
+       x(bi_mtime,                     64)     \
+       x(bi_otime,                     64)     \
+       x(bi_size,                      64)     \
+       x(bi_sectors,                   64)     \
+       x(bi_uid,                       32)     \
+       x(bi_gid,                       32)     \
+       x(bi_nlink,                     32)     \
+       x(bi_generation,                32)     \
+       x(bi_dev,                       32)     \
+       x(bi_data_checksum,             8)      \
+       x(bi_compression,               8)      \
+       x(bi_project,                   32)     \
+       x(bi_background_compression,    8)      \
+       x(bi_data_replicas,             8)      \
+       x(bi_promote_target,            16)     \
+       x(bi_foreground_target,         16)     \
+       x(bi_background_target,         16)     \
+       x(bi_erasure_code,              16)     \
+       x(bi_fields_set,                16)
+
+/* subset of BCH_INODE_FIELDS */
+#define BCH_INODE_OPTS()                       \
+       x(data_checksum,                8)      \
+       x(compression,                  8)      \
+       x(project,                      32)     \
+       x(background_compression,       8)      \
+       x(data_replicas,                8)      \
+       x(promote_target,               16)     \
+       x(foreground_target,            16)     \
+       x(background_target,            16)     \
+       x(erasure_code,                 16)
+
+enum inode_opt_id {
+#define x(name, ...)                           \
+       Inode_opt_##name,
+       BCH_INODE_OPTS()
+#undef  x
+       Inode_opt_nr,
+};
+
+enum {
+       /*
+        * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
+        * flags)
+        */
+       __BCH_INODE_SYNC        = 0,
+       __BCH_INODE_IMMUTABLE   = 1,
+       __BCH_INODE_APPEND      = 2,
+       __BCH_INODE_NODUMP      = 3,
+       __BCH_INODE_NOATIME     = 4,
+
+       __BCH_INODE_I_SIZE_DIRTY= 5,
+       __BCH_INODE_I_SECTORS_DIRTY= 6,
+       __BCH_INODE_UNLINKED    = 7,
+
+       /* bits 20+ reserved for packed fields below: */
+};
+
+#define BCH_INODE_SYNC         (1 << __BCH_INODE_SYNC)
+#define BCH_INODE_IMMUTABLE    (1 << __BCH_INODE_IMMUTABLE)
+#define BCH_INODE_APPEND       (1 << __BCH_INODE_APPEND)
+#define BCH_INODE_NODUMP       (1 << __BCH_INODE_NODUMP)
+#define BCH_INODE_NOATIME      (1 << __BCH_INODE_NOATIME)
+#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
+#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
+#define BCH_INODE_UNLINKED     (1 << __BCH_INODE_UNLINKED)
+
+LE32_BITMASK(INODE_STR_HASH,   struct bch_inode, bi_flags, 20, 24);
+LE32_BITMASK(INODE_NR_FIELDS,  struct bch_inode, bi_flags, 24, 32);
+
+/* Dirents */
+
+/*
+ * Dirents (and xattrs) have to implement string lookups; since our b-tree
+ * doesn't support arbitrary length strings for the key, we instead index by a
+ * 64 bit hash (currently truncated sha1) of the string, stored in the offset
+ * field of the key - using linear probing to resolve hash collisions. This also
+ * provides us with the readdir cookie posix requires.
+ *
+ * Linear probing requires us to use whiteouts for deletions, in the event of a
+ * collision:
+ */
+
+struct bch_dirent {
+       struct bch_val          v;
+
+       /* Target inode number: */
+       __le64                  d_inum;
+
+       /*
+        * Copy of mode bits 12-15 from the target inode - so userspace can get
+        * the filetype without having to do a stat()
+        */
+       __u8                    d_type;
+
+       __u8                    d_name[];
+} __attribute__((packed, aligned(8)));
+
+#define BCH_NAME_MAX   (U8_MAX * sizeof(u64) -                         \
+                        sizeof(struct bkey) -                          \
+                        offsetof(struct bch_dirent, d_name))
+
+
+/* Xattrs */
+
+#define KEY_TYPE_XATTR_INDEX_USER                      0
+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS  1
+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2
+#define KEY_TYPE_XATTR_INDEX_TRUSTED                   3
+#define KEY_TYPE_XATTR_INDEX_SECURITY          4
+
+struct bch_xattr {
+       struct bch_val          v;
+       __u8                    x_type;
+       __u8                    x_name_len;
+       __le16                  x_val_len;
+       __u8                    x_name[];
+} __attribute__((packed, aligned(8)));
+
+/* Bucket/allocation information: */
+
+struct bch_alloc {
+       struct bch_val          v;
+       __u8                    fields;
+       __u8                    gen;
+       __u8                    data[];
+} __attribute__((packed, aligned(8)));
+
+#define BCH_ALLOC_FIELDS()                     \
+       x(read_time,            16)             \
+       x(write_time,           16)             \
+       x(data_type,            8)              \
+       x(dirty_sectors,        16)             \
+       x(cached_sectors,       16)             \
+       x(oldest_gen,           8)
+
+enum {
+#define x(name, bytes) BCH_ALLOC_FIELD_##name,
+       BCH_ALLOC_FIELDS()
+#undef x
+       BCH_ALLOC_FIELD_NR
+};
+
+static const unsigned BCH_ALLOC_FIELD_BYTES[] = {
+#define x(name, bits) [BCH_ALLOC_FIELD_##name] = bits / 8,
+       BCH_ALLOC_FIELDS()
+#undef x
+};
+
+#define x(name, bits) + (bits / 8)
+static const unsigned BKEY_ALLOC_VAL_U64s_MAX =
+       DIV_ROUND_UP(offsetof(struct bch_alloc, data)
+                    BCH_ALLOC_FIELDS(), sizeof(u64));
+#undef x
+
+#define BKEY_ALLOC_U64s_MAX    (BKEY_U64s + BKEY_ALLOC_VAL_U64s_MAX)
+
+/* Quotas: */
+
+enum quota_types {
+       QTYP_USR                = 0,
+       QTYP_GRP                = 1,
+       QTYP_PRJ                = 2,
+       QTYP_NR                 = 3,
+};
+
+enum quota_counters {
+       Q_SPC                   = 0,
+       Q_INO                   = 1,
+       Q_COUNTERS              = 2,
+};
+
+struct bch_quota_counter {
+       __le64                  hardlimit;
+       __le64                  softlimit;
+};
+
+struct bch_quota {
+       struct bch_val          v;
+       struct bch_quota_counter c[Q_COUNTERS];
+} __attribute__((packed, aligned(8)));
+
+/* Erasure coding */
+
+struct bch_stripe {
+       struct bch_val          v;
+       __le16                  sectors;
+       __u8                    algorithm;
+       __u8                    nr_blocks;
+       __u8                    nr_redundant;
+
+       __u8                    csum_granularity_bits;
+       __u8                    csum_type;
+       __u8                    pad;
+
+       struct bch_extent_ptr   ptrs[0];
+} __attribute__((packed, aligned(8)));
+
+/* Reflink: */
+
+struct bch_reflink_p {
+       struct bch_val          v;
+       __le64                  idx;
+
+       __le32                  reservation_generation;
+       __u8                    nr_replicas;
+       __u8                    pad[3];
+};
+
+struct bch_reflink_v {
+       struct bch_val          v;
+       __le64                  refcount;
+       union bch_extent_entry  start[0];
+       __u64                   _data[0];
+};
+
+/* Inline data */
+
+struct bch_inline_data {
+       struct bch_val          v;
+       u8                      data[0];
+};
+
+/* Optional/variable size superblock sections: */
+
+struct bch_sb_field {
+       __u64                   _data[0];
+       __le32                  u64s;
+       __le32                  type;
+};
+
+#define BCH_SB_FIELDS()                \
+       x(journal,      0)      \
+       x(members,      1)      \
+       x(crypt,        2)      \
+       x(replicas_v0,  3)      \
+       x(quota,        4)      \
+       x(disk_groups,  5)      \
+       x(clean,        6)      \
+       x(replicas,     7)      \
+       x(journal_seq_blacklist, 8)
+
+enum bch_sb_field_type {
+#define x(f, nr)       BCH_SB_FIELD_##f = nr,
+       BCH_SB_FIELDS()
+#undef x
+       BCH_SB_FIELD_NR
+};
+
+/* BCH_SB_FIELD_journal: */
+
+struct bch_sb_field_journal {
+       struct bch_sb_field     field;
+       __le64                  buckets[0];
+};
+
+/* BCH_SB_FIELD_members: */
+
+#define BCH_MIN_NR_NBUCKETS    (1 << 6)
+
+struct bch_member {
+       uuid_le                 uuid;
+       __le64                  nbuckets;       /* device size */
+       __le16                  first_bucket;   /* index of first bucket used */
+       __le16                  bucket_size;    /* sectors */
+       __le32                  pad;
+       __le64                  last_mount;     /* time_t */
+
+       __le64                  flags[2];
+};
+
+LE64_BITMASK(BCH_MEMBER_STATE,         struct bch_member, flags[0],  0,  4)
+/* 4-10 unused, was TIER, HAS_(META)DATA */
+LE64_BITMASK(BCH_MEMBER_REPLACEMENT,   struct bch_member, flags[0], 10, 14)
+LE64_BITMASK(BCH_MEMBER_DISCARD,       struct bch_member, flags[0], 14, 15)
+LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED,  struct bch_member, flags[0], 15, 20)
+LE64_BITMASK(BCH_MEMBER_GROUP,         struct bch_member, flags[0], 20, 28)
+LE64_BITMASK(BCH_MEMBER_DURABILITY,    struct bch_member, flags[0], 28, 30)
+
+#define BCH_TIER_MAX                   4U
+
+#if 0
+LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS,        struct bch_member, flags[1], 0,  20);
+LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
+#endif
+
+enum bch_member_state {
+       BCH_MEMBER_STATE_RW             = 0,
+       BCH_MEMBER_STATE_RO             = 1,
+       BCH_MEMBER_STATE_FAILED         = 2,
+       BCH_MEMBER_STATE_SPARE          = 3,
+       BCH_MEMBER_STATE_NR             = 4,
+};
+
+enum cache_replacement {
+       CACHE_REPLACEMENT_LRU           = 0,
+       CACHE_REPLACEMENT_FIFO          = 1,
+       CACHE_REPLACEMENT_RANDOM        = 2,
+       CACHE_REPLACEMENT_NR            = 3,
+};
+
+struct bch_sb_field_members {
+       struct bch_sb_field     field;
+       struct bch_member       members[0];
+};
+
+/* BCH_SB_FIELD_crypt: */
+
+struct nonce {
+       __le32                  d[4];
+};
+
+struct bch_key {
+       __le64                  key[4];
+};
+
+#define BCH_KEY_MAGIC                                  \
+       (((u64) 'b' <<  0)|((u64) 'c' <<  8)|           \
+        ((u64) 'h' << 16)|((u64) '*' << 24)|           \
+        ((u64) '*' << 32)|((u64) 'k' << 40)|           \
+        ((u64) 'e' << 48)|((u64) 'y' << 56))
+
+struct bch_encrypted_key {
+       __le64                  magic;
+       struct bch_key          key;
+};
+
+/*
+ * If this field is present in the superblock, it stores an encryption key which
+ * is used encrypt all other data/metadata. The key will normally be encrypted
+ * with the key userspace provides, but if encryption has been turned off we'll
+ * just store the master key unencrypted in the superblock so we can access the
+ * previously encrypted data.
+ */
+struct bch_sb_field_crypt {
+       struct bch_sb_field     field;
+
+       __le64                  flags;
+       __le64                  kdf_flags;
+       struct bch_encrypted_key key;
+};
+
+LE64_BITMASK(BCH_CRYPT_KDF_TYPE,       struct bch_sb_field_crypt, flags, 0, 4);
+
+enum bch_kdf_types {
+       BCH_KDF_SCRYPT          = 0,
+       BCH_KDF_NR              = 1,
+};
+
+/* stored as base 2 log of scrypt params: */
+LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags,  0, 16);
+LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32);
+LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
+
+/* BCH_SB_FIELD_replicas: */
+
+#define BCH_DATA_TYPES()               \
+       x(none,         0)              \
+       x(sb,           1)              \
+       x(journal,      2)              \
+       x(btree,        3)              \
+       x(user,         4)              \
+       x(cached,       5)
+
+enum bch_data_type {
+#define x(t, n) BCH_DATA_##t,
+       BCH_DATA_TYPES()
+#undef x
+       BCH_DATA_NR
+};
+
+struct bch_replicas_entry_v0 {
+       __u8                    data_type;
+       __u8                    nr_devs;
+       __u8                    devs[0];
+} __attribute__((packed));
+
+struct bch_sb_field_replicas_v0 {
+       struct bch_sb_field     field;
+       struct bch_replicas_entry_v0 entries[0];
+} __attribute__((packed, aligned(8)));
+
+struct bch_replicas_entry {
+       __u8                    data_type;
+       __u8                    nr_devs;
+       __u8                    nr_required;
+       __u8                    devs[0];
+} __attribute__((packed));
+
+#define replicas_entry_bytes(_i)                                       \
+       (offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
+
+struct bch_sb_field_replicas {
+       struct bch_sb_field     field;
+       struct bch_replicas_entry entries[0];
+} __attribute__((packed, aligned(8)));
+
+/* BCH_SB_FIELD_quota: */
+
+struct bch_sb_quota_counter {
+       __le32                          timelimit;
+       __le32                          warnlimit;
+};
+
+struct bch_sb_quota_type {
+       __le64                          flags;
+       struct bch_sb_quota_counter     c[Q_COUNTERS];
+};
+
+struct bch_sb_field_quota {
+       struct bch_sb_field             field;
+       struct bch_sb_quota_type        q[QTYP_NR];
+} __attribute__((packed, aligned(8)));
+
+/* BCH_SB_FIELD_disk_groups: */
+
+#define BCH_SB_LABEL_SIZE              32
+
+struct bch_disk_group {
+       __u8                    label[BCH_SB_LABEL_SIZE];
+       __le64                  flags[2];
+} __attribute__((packed, aligned(8)));
+
+LE64_BITMASK(BCH_GROUP_DELETED,                struct bch_disk_group, flags[0], 0,  1)
+LE64_BITMASK(BCH_GROUP_DATA_ALLOWED,   struct bch_disk_group, flags[0], 1,  6)
+LE64_BITMASK(BCH_GROUP_PARENT,         struct bch_disk_group, flags[0], 6, 24)
+
+struct bch_sb_field_disk_groups {
+       struct bch_sb_field     field;
+       struct bch_disk_group   entries[0];
+} __attribute__((packed, aligned(8)));
+
+/*
+ * On clean shutdown, store btree roots and current journal sequence number in
+ * the superblock:
+ */
+struct jset_entry {
+       __le16                  u64s;
+       __u8                    btree_id;
+       __u8                    level;
+       __u8                    type; /* designates what this jset holds */
+       __u8                    pad[3];
+
+       union {
+               struct bkey_i   start[0];
+               __u64           _data[0];
+       };
+};
+
+struct bch_sb_field_clean {
+       struct bch_sb_field     field;
+
+       __le32                  flags;
+       __le16                  read_clock;
+       __le16                  write_clock;
+       __le64                  journal_seq;
+
+       union {
+               struct jset_entry start[0];
+               __u64           _data[0];
+       };
+};
+
+struct journal_seq_blacklist_entry {
+       __le64                  start;
+       __le64                  end;
+};
+
+struct bch_sb_field_journal_seq_blacklist {
+       struct bch_sb_field     field;
+
+       union {
+               struct journal_seq_blacklist_entry start[0];
+               __u64           _data[0];
+       };
+};
+
+/* Superblock: */
+
+/*
+ * New versioning scheme:
+ * One common version number for all on disk data structures - superblock, btree
+ * nodes, journal entries
+ */
+#define BCH_JSET_VERSION_OLD                   2
+#define BCH_BSET_VERSION_OLD                   3
+
+enum bcachefs_metadata_version {
+       bcachefs_metadata_version_min                   = 9,
+       bcachefs_metadata_version_new_versioning        = 10,
+       bcachefs_metadata_version_bkey_renumber         = 10,
+       bcachefs_metadata_version_inode_btree_change    = 11,
+       bcachefs_metadata_version_max                   = 12,
+};
+
+#define bcachefs_metadata_version_current      (bcachefs_metadata_version_max - 1)
+
+#define BCH_SB_SECTOR                  8
+#define BCH_SB_MEMBERS_MAX             64 /* XXX kill */
+
+struct bch_sb_layout {
+       uuid_le                 magic;  /* bcachefs superblock UUID */
+       __u8                    layout_type;
+       __u8                    sb_max_size_bits; /* base 2 of 512 byte sectors */
+       __u8                    nr_superblocks;
+       __u8                    pad[5];
+       __le64                  sb_offset[61];
+} __attribute__((packed, aligned(8)));
+
+#define BCH_SB_LAYOUT_SECTOR   7
+
+/*
+ * @offset     - sector where this sb was written
+ * @version    - on disk format version
+ * @version_min        - Oldest metadata version this filesystem contains; so we can
+ *               safely drop compatibility code and refuse to mount filesystems
+ *               we'd need it for
+ * @magic      - identifies as a bcachefs superblock (BCACHE_MAGIC)
+ * @seq                - incremented each time superblock is written
+ * @uuid       - used for generating various magic numbers and identifying
+ *                member devices, never changes
+ * @user_uuid  - user visible UUID, may be changed
+ * @label      - filesystem label
+ * @seq                - identifies most recent superblock, incremented each time
+ *               superblock is written
+ * @features   - enabled incompatible features
+ */
+struct bch_sb {
+       struct bch_csum         csum;
+       __le16                  version;
+       __le16                  version_min;
+       __le16                  pad[2];
+       uuid_le                 magic;
+       uuid_le                 uuid;
+       uuid_le                 user_uuid;
+       __u8                    label[BCH_SB_LABEL_SIZE];
+       __le64                  offset;
+       __le64                  seq;
+
+       __le16                  block_size;
+       __u8                    dev_idx;
+       __u8                    nr_devices;
+       __le32                  u64s;
+
+       __le64                  time_base_lo;
+       __le32                  time_base_hi;
+       __le32                  time_precision;
+
+       __le64                  flags[8];
+       __le64                  features[2];
+       __le64                  compat[2];
+
+       struct bch_sb_layout    layout;
+
+       union {
+               struct bch_sb_field start[0];
+               __le64          _data[0];
+       };
+} __attribute__((packed, aligned(8)));
+
+/*
+ * Flags:
+ * BCH_SB_INITALIZED   - set on first mount
+ * BCH_SB_CLEAN                - did we shut down cleanly? Just a hint, doesn't affect
+ *                       behaviour of mount/recovery path:
+ * BCH_SB_INODE_32BIT  - limit inode numbers to 32 bits
+ * BCH_SB_128_BIT_MACS - 128 bit macs instead of 80
+ * BCH_SB_ENCRYPTION_TYPE - if nonzero encryption is enabled; overrides
+ *                        DATA/META_CSUM_TYPE. Also indicates encryption
+ *                        algorithm in use, if/when we get more than one
+ */
+
+LE16_BITMASK(BCH_SB_BLOCK_SIZE,                struct bch_sb, block_size, 0, 16);
+
+LE64_BITMASK(BCH_SB_INITIALIZED,       struct bch_sb, flags[0],  0,  1);
+LE64_BITMASK(BCH_SB_CLEAN,             struct bch_sb, flags[0],  1,  2);
+LE64_BITMASK(BCH_SB_CSUM_TYPE,         struct bch_sb, flags[0],  2,  8);
+LE64_BITMASK(BCH_SB_ERROR_ACTION,      struct bch_sb, flags[0],  8, 12);
+
+LE64_BITMASK(BCH_SB_BTREE_NODE_SIZE,   struct bch_sb, flags[0], 12, 28);
+
+LE64_BITMASK(BCH_SB_GC_RESERVE,                struct bch_sb, flags[0], 28, 33);
+LE64_BITMASK(BCH_SB_ROOT_RESERVE,      struct bch_sb, flags[0], 33, 40);
+
+LE64_BITMASK(BCH_SB_META_CSUM_TYPE,    struct bch_sb, flags[0], 40, 44);
+LE64_BITMASK(BCH_SB_DATA_CSUM_TYPE,    struct bch_sb, flags[0], 44, 48);
+
+LE64_BITMASK(BCH_SB_META_REPLICAS_WANT,        struct bch_sb, flags[0], 48, 52);
+LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT,        struct bch_sb, flags[0], 52, 56);
+
+LE64_BITMASK(BCH_SB_POSIX_ACL,         struct bch_sb, flags[0], 56, 57);
+LE64_BITMASK(BCH_SB_USRQUOTA,          struct bch_sb, flags[0], 57, 58);
+LE64_BITMASK(BCH_SB_GRPQUOTA,          struct bch_sb, flags[0], 58, 59);
+LE64_BITMASK(BCH_SB_PRJQUOTA,          struct bch_sb, flags[0], 59, 60);
+
+LE64_BITMASK(BCH_SB_HAS_ERRORS,                struct bch_sb, flags[0], 60, 61);
+
+LE64_BITMASK(BCH_SB_REFLINK,           struct bch_sb, flags[0], 61, 62);
+
+/* 61-64 unused */
+
+LE64_BITMASK(BCH_SB_STR_HASH_TYPE,     struct bch_sb, flags[1],  0,  4);
+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE,  struct bch_sb, flags[1],  4,  8);
+LE64_BITMASK(BCH_SB_INODE_32BIT,       struct bch_sb, flags[1],  8,  9);
+
+LE64_BITMASK(BCH_SB_128_BIT_MACS,      struct bch_sb, flags[1],  9, 10);
+LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE,   struct bch_sb, flags[1], 10, 14);
+
+/*
+ * Max size of an extent that may require bouncing to read or write
+ * (checksummed, compressed): 64k
+ */
+LE64_BITMASK(BCH_SB_ENCODED_EXTENT_MAX_BITS,
+                                       struct bch_sb, flags[1], 14, 20);
+
+LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24);
+LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28);
+
+LE64_BITMASK(BCH_SB_PROMOTE_TARGET,    struct bch_sb, flags[1], 28, 40);
+LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52);
+LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64);
+
+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE,
+                                       struct bch_sb, flags[2],  0,  4);
+LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES,  struct bch_sb, flags[2],  4, 64);
+
+LE64_BITMASK(BCH_SB_ERASURE_CODE,      struct bch_sb, flags[3],  0, 16);
+
+/*
+ * Features:
+ *
+ * journal_seq_blacklist_v3:   gates BCH_SB_FIELD_journal_seq_blacklist
+ * reflink:                    gates KEY_TYPE_reflink
+ * inline_data:                        gates KEY_TYPE_inline_data
+ * new_siphash:                        gates BCH_STR_HASH_SIPHASH
+ * new_extent_overwrite:       gates BTREE_NODE_NEW_EXTENT_OVERWRITE
+ */
+#define BCH_SB_FEATURES()                      \
+       x(lz4,                          0)      \
+       x(gzip,                         1)      \
+       x(zstd,                         2)      \
+       x(atomic_nlink,                 3)      \
+       x(ec,                           4)      \
+       x(journal_seq_blacklist_v3,     5)      \
+       x(reflink,                      6)      \
+       x(new_siphash,                  7)      \
+       x(inline_data,                  8)      \
+       x(new_extent_overwrite,         9)      \
+       x(incompressible,               10)     \
+       x(btree_ptr_v2,                 11)     \
+       x(extents_above_btree_updates,  12)     \
+       x(btree_updates_journalled,     13)
+
+#define BCH_SB_FEATURES_ALL                            \
+       ((1ULL << BCH_FEATURE_new_siphash)|             \
+        (1ULL << BCH_FEATURE_new_extent_overwrite)|    \
+        (1ULL << BCH_FEATURE_btree_ptr_v2)|            \
+        (1ULL << BCH_FEATURE_extents_above_btree_updates))
+
+enum bch_sb_feature {
+#define x(f, n) BCH_FEATURE_##f,
+       BCH_SB_FEATURES()
+#undef x
+       BCH_FEATURE_NR,
+};
+
+enum bch_sb_compat {
+       BCH_COMPAT_FEAT_ALLOC_INFO      = 0,
+       BCH_COMPAT_FEAT_ALLOC_METADATA  = 1,
+};
+
+/* options: */
+
+#define BCH_REPLICAS_MAX               4U
+
+enum bch_error_actions {
+       BCH_ON_ERROR_CONTINUE           = 0,
+       BCH_ON_ERROR_RO                 = 1,
+       BCH_ON_ERROR_PANIC              = 2,
+       BCH_NR_ERROR_ACTIONS            = 3,
+};
+
+enum bch_str_hash_type {
+       BCH_STR_HASH_CRC32C             = 0,
+       BCH_STR_HASH_CRC64              = 1,
+       BCH_STR_HASH_SIPHASH_OLD        = 2,
+       BCH_STR_HASH_SIPHASH            = 3,
+       BCH_STR_HASH_NR                 = 4,
+};
+
+enum bch_str_hash_opts {
+       BCH_STR_HASH_OPT_CRC32C         = 0,
+       BCH_STR_HASH_OPT_CRC64          = 1,
+       BCH_STR_HASH_OPT_SIPHASH        = 2,
+       BCH_STR_HASH_OPT_NR             = 3,
+};
+
+enum bch_csum_type {
+       BCH_CSUM_NONE                   = 0,
+       BCH_CSUM_CRC32C_NONZERO         = 1,
+       BCH_CSUM_CRC64_NONZERO          = 2,
+       BCH_CSUM_CHACHA20_POLY1305_80   = 3,
+       BCH_CSUM_CHACHA20_POLY1305_128  = 4,
+       BCH_CSUM_CRC32C                 = 5,
+       BCH_CSUM_CRC64                  = 6,
+       BCH_CSUM_NR                     = 7,
+};
+
+static const unsigned bch_crc_bytes[] = {
+       [BCH_CSUM_NONE]                         = 0,
+       [BCH_CSUM_CRC32C_NONZERO]               = 4,
+       [BCH_CSUM_CRC32C]                       = 4,
+       [BCH_CSUM_CRC64_NONZERO]                = 8,
+       [BCH_CSUM_CRC64]                        = 8,
+       [BCH_CSUM_CHACHA20_POLY1305_80]         = 10,
+       [BCH_CSUM_CHACHA20_POLY1305_128]        = 16,
+};
+
+static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type)
+{
+       switch (type) {
+       case BCH_CSUM_CHACHA20_POLY1305_80:
+       case BCH_CSUM_CHACHA20_POLY1305_128:
+               return true;
+       default:
+               return false;
+       }
+}
+
+enum bch_csum_opts {
+       BCH_CSUM_OPT_NONE               = 0,
+       BCH_CSUM_OPT_CRC32C             = 1,
+       BCH_CSUM_OPT_CRC64              = 2,
+       BCH_CSUM_OPT_NR                 = 3,
+};
+
+#define BCH_COMPRESSION_TYPES()                \
+       x(none,                 0)      \
+       x(lz4_old,              1)      \
+       x(gzip,                 2)      \
+       x(lz4,                  3)      \
+       x(zstd,                 4)      \
+       x(incompressible,       5)
+
+enum bch_compression_type {
+#define x(t, n) BCH_COMPRESSION_TYPE_##t,
+       BCH_COMPRESSION_TYPES()
+#undef x
+       BCH_COMPRESSION_TYPE_NR
+};
+
+#define BCH_COMPRESSION_OPTS()         \
+       x(none,         0)              \
+       x(lz4,          1)              \
+       x(gzip,         2)              \
+       x(zstd,         3)
+
+enum bch_compression_opts {
+#define x(t, n) BCH_COMPRESSION_OPT_##t,
+       BCH_COMPRESSION_OPTS()
+#undef x
+       BCH_COMPRESSION_OPT_NR
+};
+
+/*
+ * Magic numbers
+ *
+ * The various other data structures have their own magic numbers, which are
+ * xored with the first part of the cache set's UUID
+ */
+
+#define BCACHE_MAGIC                                                   \
+       UUID_LE(0xf67385c6, 0x1a4e, 0xca45,                             \
+               0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81)
+
+#define BCACHEFS_STATFS_MAGIC          0xca451a4e
+
+#define JSET_MAGIC             __cpu_to_le64(0x245235c1a3625032ULL)
+#define BSET_MAGIC             __cpu_to_le64(0x90135c78b99e07f5ULL)
+
+static inline __le64 __bch2_sb_magic(struct bch_sb *sb)
+{
+       __le64 ret;
+       memcpy(&ret, &sb->uuid, sizeof(ret));
+       return ret;
+}
+
+static inline __u64 __jset_magic(struct bch_sb *sb)
+{
+       return __le64_to_cpu(__bch2_sb_magic(sb) ^ JSET_MAGIC);
+}
+
+static inline __u64 __bset_magic(struct bch_sb *sb)
+{
+       return __le64_to_cpu(__bch2_sb_magic(sb) ^ BSET_MAGIC);
+}
+
+/* Journal */
+
+#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
+
+#define BCH_JSET_ENTRY_TYPES()                 \
+       x(btree_keys,           0)              \
+       x(btree_root,           1)              \
+       x(prio_ptrs,            2)              \
+       x(blacklist,            3)              \
+       x(blacklist_v2,         4)              \
+       x(usage,                5)              \
+       x(data_usage,           6)
+
+enum {
+#define x(f, nr)       BCH_JSET_ENTRY_##f      = nr,
+       BCH_JSET_ENTRY_TYPES()
+#undef x
+       BCH_JSET_ENTRY_NR
+};
+
+/*
+ * Journal sequence numbers can be blacklisted: bsets record the max sequence
+ * number of all the journal entries they contain updates for, so that on
+ * recovery we can ignore those bsets that contain index updates newer that what
+ * made it into the journal.
+ *
+ * This means that we can't reuse that journal_seq - we have to skip it, and
+ * then record that we skipped it so that the next time we crash and recover we
+ * don't think there was a missing journal entry.
+ */
+struct jset_entry_blacklist {
+       struct jset_entry       entry;
+       __le64                  seq;
+};
+
+struct jset_entry_blacklist_v2 {
+       struct jset_entry       entry;
+       __le64                  start;
+       __le64                  end;
+};
+
+enum {
+       FS_USAGE_RESERVED               = 0,
+       FS_USAGE_INODES                 = 1,
+       FS_USAGE_KEY_VERSION            = 2,
+       FS_USAGE_NR                     = 3
+};
+
+struct jset_entry_usage {
+       struct jset_entry       entry;
+       __le64                  v;
+} __attribute__((packed));
+
+struct jset_entry_data_usage {
+       struct jset_entry       entry;
+       __le64                  v;
+       struct bch_replicas_entry r;
+} __attribute__((packed));
+
+/*
+ * On disk format for a journal entry:
+ * seq is monotonically increasing; every journal entry has its own unique
+ * sequence number.
+ *
+ * last_seq is the oldest journal entry that still has keys the btree hasn't
+ * flushed to disk yet.
+ *
+ * version is for on disk format changes.
+ */
+struct jset {
+       struct bch_csum         csum;
+
+       __le64                  magic;
+       __le64                  seq;
+       __le32                  version;
+       __le32                  flags;
+
+       __le32                  u64s; /* size of d[] in u64s */
+
+       __u8                    encrypted_start[0];
+
+       __le16                  read_clock;
+       __le16                  write_clock;
+
+       /* Sequence number of oldest dirty journal entry */
+       __le64                  last_seq;
+
+
+       union {
+               struct jset_entry start[0];
+               __u64           _data[0];
+       };
+} __attribute__((packed, aligned(8)));
+
+LE32_BITMASK(JSET_CSUM_TYPE,   struct jset, flags, 0, 4);
+LE32_BITMASK(JSET_BIG_ENDIAN,  struct jset, flags, 4, 5);
+
+#define BCH_JOURNAL_BUCKETS_MIN                8
+
+/* Btree: */
+
+#define BCH_BTREE_IDS()                                        \
+       x(EXTENTS,      0, "extents")                   \
+       x(INODES,       1, "inodes")                    \
+       x(DIRENTS,      2, "dirents")                   \
+       x(XATTRS,       3, "xattrs")                    \
+       x(ALLOC,        4, "alloc")                     \
+       x(QUOTAS,       5, "quotas")                    \
+       x(EC,           6, "stripes")                   \
+       x(REFLINK,      7, "reflink")
+
+enum btree_id {
+#define x(kwd, val, name) BTREE_ID_##kwd = val,
+       BCH_BTREE_IDS()
+#undef x
+       BTREE_ID_NR
+};
+
+#define BTREE_MAX_DEPTH                4U
+
+/* Btree nodes */
+
+/*
+ * Btree nodes
+ *
+ * On disk a btree node is a list/log of these; within each set the keys are
+ * sorted
+ */
+struct bset {
+       __le64                  seq;
+
+       /*
+        * Highest journal entry this bset contains keys for.
+        * If on recovery we don't see that journal entry, this bset is ignored:
+        * this allows us to preserve the order of all index updates after a
+        * crash, since the journal records a total order of all index updates
+        * and anything that didn't make it to the journal doesn't get used.
+        */
+       __le64                  journal_seq;
+
+       __le32                  flags;
+       __le16                  version;
+       __le16                  u64s; /* count of d[] in u64s */
+
+       union {
+               struct bkey_packed start[0];
+               __u64           _data[0];
+       };
+} __attribute__((packed, aligned(8)));
+
+LE32_BITMASK(BSET_CSUM_TYPE,   struct bset, flags, 0, 4);
+
+LE32_BITMASK(BSET_BIG_ENDIAN,  struct bset, flags, 4, 5);
+LE32_BITMASK(BSET_SEPARATE_WHITEOUTS,
+                               struct bset, flags, 5, 6);
+
+struct btree_node {
+       struct bch_csum         csum;
+       __le64                  magic;
+
+       /* this flags field is encrypted, unlike bset->flags: */
+       __le64                  flags;
+
+       /* Closed interval: */
+       struct bpos             min_key;
+       struct bpos             max_key;
+       struct bch_extent_ptr   ptr;
+       struct bkey_format      format;
+
+       union {
+       struct bset             keys;
+       struct {
+               __u8            pad[22];
+               __le16          u64s;
+               __u64           _data[0];
+
+       };
+       };
+} __attribute__((packed, aligned(8)));
+
+LE64_BITMASK(BTREE_NODE_ID,    struct btree_node, flags,  0,  4);
+LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags,  4,  8);
+LE64_BITMASK(BTREE_NODE_NEW_EXTENT_OVERWRITE,
+                               struct btree_node, flags,  8,  9);
+/* 9-32 unused */
+LE64_BITMASK(BTREE_NODE_SEQ,   struct btree_node, flags, 32, 64);
+
+struct btree_node_entry {
+       struct bch_csum         csum;
+
+       union {
+       struct bset             keys;
+       struct {
+               __u8            pad[22];
+               __le16          u64s;
+               __u64           _data[0];
+
+       };
+       };
+} __attribute__((packed, aligned(8)));
+
+#endif /* _BCACHEFS_FORMAT_H */
diff --git a/libbcachefs/bcachefs_ioctl.h b/libbcachefs/bcachefs_ioctl.h
new file mode 100644 (file)
index 0000000..d71157a
--- /dev/null
@@ -0,0 +1,332 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_IOCTL_H
+#define _BCACHEFS_IOCTL_H
+
+#include <linux/uuid.h>
+#include <asm/ioctl.h>
+#include "bcachefs_format.h"
+
+/*
+ * Flags common to multiple ioctls:
+ */
+#define BCH_FORCE_IF_DATA_LOST         (1 << 0)
+#define BCH_FORCE_IF_METADATA_LOST     (1 << 1)
+#define BCH_FORCE_IF_DATA_DEGRADED     (1 << 2)
+#define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3)
+
+#define BCH_FORCE_IF_DEGRADED                  \
+       (BCH_FORCE_IF_DATA_DEGRADED|            \
+        BCH_FORCE_IF_METADATA_DEGRADED)
+
+/*
+ * If cleared, ioctl that refer to a device pass it as a pointer to a pathname
+ * (e.g. /dev/sda1); if set, the dev field is the device's index within the
+ * filesystem:
+ */
+#define BCH_BY_INDEX                   (1 << 4)
+
+/*
+ * For BCH_IOCTL_READ_SUPER: get superblock of a specific device, not filesystem
+ * wide superblock:
+ */
+#define BCH_READ_DEV                   (1 << 5)
+
+/* global control dev: */
+
+/* These are currently broken, and probably unnecessary: */
+#if 0
+#define BCH_IOCTL_ASSEMBLE     _IOW(0xbc, 1, struct bch_ioctl_assemble)
+#define BCH_IOCTL_INCREMENTAL  _IOW(0xbc, 2, struct bch_ioctl_incremental)
+
+struct bch_ioctl_assemble {
+       __u32                   flags;
+       __u32                   nr_devs;
+       __u64                   pad;
+       __u64                   devs[];
+};
+
+struct bch_ioctl_incremental {
+       __u32                   flags;
+       __u64                   pad;
+       __u64                   dev;
+};
+#endif
+
+/* filesystem ioctls: */
+
+#define BCH_IOCTL_QUERY_UUID   _IOR(0xbc,      1,  struct bch_ioctl_query_uuid)
+
+/* These only make sense when we also have incremental assembly */
+#if 0
+#define BCH_IOCTL_START                _IOW(0xbc,      2,  struct bch_ioctl_start)
+#define BCH_IOCTL_STOP         _IO(0xbc,       3)
+#endif
+
+#define BCH_IOCTL_DISK_ADD     _IOW(0xbc,      4,  struct bch_ioctl_disk)
+#define BCH_IOCTL_DISK_REMOVE  _IOW(0xbc,      5,  struct bch_ioctl_disk)
+#define BCH_IOCTL_DISK_ONLINE  _IOW(0xbc,      6,  struct bch_ioctl_disk)
+#define BCH_IOCTL_DISK_OFFLINE _IOW(0xbc,      7,  struct bch_ioctl_disk)
+#define BCH_IOCTL_DISK_SET_STATE _IOW(0xbc,    8,  struct bch_ioctl_disk_set_state)
+#define BCH_IOCTL_DATA         _IOW(0xbc,      10, struct bch_ioctl_data)
+#define BCH_IOCTL_FS_USAGE     _IOWR(0xbc,     11, struct bch_ioctl_fs_usage)
+#define BCH_IOCTL_DEV_USAGE    _IOWR(0xbc,     11, struct bch_ioctl_dev_usage)
+#define BCH_IOCTL_READ_SUPER   _IOW(0xbc,      12, struct bch_ioctl_read_super)
+#define BCH_IOCTL_DISK_GET_IDX _IOW(0xbc,      13,  struct bch_ioctl_disk_get_idx)
+#define BCH_IOCTL_DISK_RESIZE  _IOW(0xbc,      14,  struct bch_ioctl_disk_resize)
+
+/* ioctl below act on a particular file, not the filesystem as a whole: */
+
+#define BCHFS_IOC_REINHERIT_ATTRS      _IOR(0xbc, 64, const char __user *)
+
+/*
+ * BCH_IOCTL_QUERY_UUID: get filesystem UUID
+ *
+ * Returns user visible UUID, not internal UUID (which may not ever be changed);
+ * the filesystem's sysfs directory may be found under /sys/fs/bcachefs with
+ * this UUID.
+ */
+struct bch_ioctl_query_uuid {
+       uuid_le                 uuid;
+};
+
+#if 0
+struct bch_ioctl_start {
+       __u32                   flags;
+       __u32                   pad;
+};
+#endif
+
+/*
+ * BCH_IOCTL_DISK_ADD: add a new device to an existing filesystem
+ *
+ * The specified device must not be open or in use. On success, the new device
+ * will be an online member of the filesystem just like any other member.
+ *
+ * The device must first be prepared by userspace by formatting with a bcachefs
+ * superblock, which is only used for passing in superblock options/parameters
+ * for that device (in struct bch_member). The new device's superblock should
+ * not claim to be a member of any existing filesystem - UUIDs on it will be
+ * ignored.
+ */
+
+/*
+ * BCH_IOCTL_DISK_REMOVE: permanently remove a member device from a filesystem
+ *
+ * Any data present on @dev will be permanently deleted, and @dev will be
+ * removed from its slot in the filesystem's list of member devices. The device
+ * may be either offline or offline.
+ *
+ * Will fail removing @dev would leave us with insufficient read write devices
+ * or degraded/unavailable data, unless the approprate BCH_FORCE_IF_* flags are
+ * set.
+ */
+
+/*
+ * BCH_IOCTL_DISK_ONLINE: given a disk that is already a member of a filesystem
+ * but is not open (e.g. because we started in degraded mode), bring it online
+ *
+ * all existing data on @dev will be available once the device is online,
+ * exactly as if @dev was present when the filesystem was first mounted
+ */
+
+/*
+ * BCH_IOCTL_DISK_OFFLINE: offline a disk, causing the kernel to close that
+ * block device, without removing it from the filesystem (so it can be brought
+ * back online later)
+ *
+ * Data present on @dev will be unavailable while @dev is offline (unless
+ * replicated), but will still be intact and untouched if @dev is brought back
+ * online
+ *
+ * Will fail (similarly to BCH_IOCTL_DISK_SET_STATE) if offlining @dev would
+ * leave us with insufficient read write devices or degraded/unavailable data,
+ * unless the approprate BCH_FORCE_IF_* flags are set.
+ */
+
+struct bch_ioctl_disk {
+       __u32                   flags;
+       __u32                   pad;
+       __u64                   dev;
+};
+
+/*
+ * BCH_IOCTL_DISK_SET_STATE: modify state of a member device of a filesystem
+ *
+ * @new_state          - one of the bch_member_state states (rw, ro, failed,
+ *                       spare)
+ *
+ * Will refuse to change member state if we would then have insufficient devices
+ * to write to, or if it would result in degraded data (when @new_state is
+ * failed or spare) unless the appropriate BCH_FORCE_IF_* flags are set.
+ */
+struct bch_ioctl_disk_set_state {
+       __u32                   flags;
+       __u8                    new_state;
+       __u8                    pad[3];
+       __u64                   dev;
+};
+
+enum bch_data_ops {
+       BCH_DATA_OP_SCRUB       = 0,
+       BCH_DATA_OP_REREPLICATE = 1,
+       BCH_DATA_OP_MIGRATE     = 2,
+       BCH_DATA_OP_NR          = 3,
+};
+
+/*
+ * BCH_IOCTL_DATA: operations that walk and manipulate filesystem data (e.g.
+ * scrub, rereplicate, migrate).
+ *
+ * This ioctl kicks off a job in the background, and returns a file descriptor.
+ * Reading from the file descriptor returns a struct bch_ioctl_data_event,
+ * indicating current progress, and closing the file descriptor will stop the
+ * job. The file descriptor is O_CLOEXEC.
+ */
+struct bch_ioctl_data {
+       __u32                   op;
+       __u32                   flags;
+
+       struct bpos             start;
+       struct bpos             end;
+
+       union {
+       struct {
+               __u32           dev;
+               __u32           pad;
+       }                       migrate;
+       struct {
+               __u64           pad[8];
+       };
+       };
+} __attribute__((packed, aligned(8)));
+
+enum bch_data_event {
+       BCH_DATA_EVENT_PROGRESS = 0,
+       /* XXX: add an event for reporting errors */
+       BCH_DATA_EVENT_NR       = 1,
+};
+
+struct bch_ioctl_data_progress {
+       __u8                    data_type;
+       __u8                    btree_id;
+       __u8                    pad[2];
+       struct bpos             pos;
+
+       __u64                   sectors_done;
+       __u64                   sectors_total;
+} __attribute__((packed, aligned(8)));
+
+struct bch_ioctl_data_event {
+       __u8                    type;
+       __u8                    pad[7];
+       union {
+       struct bch_ioctl_data_progress p;
+       __u64                   pad2[15];
+       };
+} __attribute__((packed, aligned(8)));
+
+struct bch_replicas_usage {
+       __u64                   sectors;
+       struct bch_replicas_entry r;
+} __attribute__((packed));
+
+static inline struct bch_replicas_usage *
+replicas_usage_next(struct bch_replicas_usage *u)
+{
+       return (void *) u + replicas_entry_bytes(&u->r) + 8;
+}
+
+/*
+ * BCH_IOCTL_FS_USAGE: query filesystem disk space usage
+ *
+ * Returns disk space usage broken out by data type, number of replicas, and
+ * by component device
+ *
+ * @replica_entries_bytes - size, in bytes, allocated for replica usage entries
+ *
+ * On success, @replica_entries_bytes will be changed to indicate the number of
+ * bytes actually used.
+ *
+ * Returns -ERANGE if @replica_entries_bytes was too small
+ */
+struct bch_ioctl_fs_usage {
+       __u64                   capacity;
+       __u64                   used;
+       __u64                   online_reserved;
+       __u64                   persistent_reserved[BCH_REPLICAS_MAX];
+
+       __u32                   replica_entries_bytes;
+       __u32                   pad;
+
+       struct bch_replicas_usage replicas[0];
+};
+
+/*
+ * BCH_IOCTL_DEV_USAGE: query device disk space usage
+ *
+ * Returns disk space usage broken out by data type - both by buckets and
+ * sectors.
+ */
+struct bch_ioctl_dev_usage {
+       __u64                   dev;
+       __u32                   flags;
+       __u8                    state;
+       __u8                    pad[7];
+
+       __u32                   bucket_size;
+       __u64                   nr_buckets;
+       __u64                   available_buckets;
+
+       __u64                   buckets[BCH_DATA_NR];
+       __u64                   sectors[BCH_DATA_NR];
+
+       __u64                   ec_buckets;
+       __u64                   ec_sectors;
+};
+
+/*
+ * BCH_IOCTL_READ_SUPER: read filesystem superblock
+ *
+ * Equivalent to reading the superblock directly from the block device, except
+ * avoids racing with the kernel writing the superblock or having to figure out
+ * which block device to read
+ *
+ * @sb         - buffer to read into
+ * @size       - size of userspace allocated buffer
+ * @dev                - device to read superblock for, if BCH_READ_DEV flag is
+ *               specified
+ *
+ * Returns -ERANGE if buffer provided is too small
+ */
+struct bch_ioctl_read_super {
+       __u32                   flags;
+       __u32                   pad;
+       __u64                   dev;
+       __u64                   size;
+       __u64                   sb;
+};
+
+/*
+ * BCH_IOCTL_DISK_GET_IDX: give a path to a block device, query filesystem to
+ * determine if disk is a (online) member - if so, returns device's index
+ *
+ * Returns -ENOENT if not found
+ */
+struct bch_ioctl_disk_get_idx {
+       __u64                   dev;
+};
+
+/*
+ * BCH_IOCTL_DISK_RESIZE: resize filesystem on a device
+ *
+ * @dev                - member to resize
+ * @nbuckets   - new number of buckets
+ */
+struct bch_ioctl_disk_resize {
+       __u32                   flags;
+       __u32                   pad;
+       __u64                   dev;
+       __u64                   nbuckets;
+};
+
+#endif /* _BCACHEFS_IOCTL_H */
diff --git a/libbcachefs/bkey.c b/libbcachefs/bkey.c
new file mode 100644 (file)
index 0000000..4d0c912
--- /dev/null
@@ -0,0 +1,1154 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey.h"
+#include "bkey_methods.h"
+#include "bset.h"
+#include "util.h"
+
+#undef EBUG_ON
+
+#ifdef DEBUG_BKEYS
+#define EBUG_ON(cond)          BUG_ON(cond)
+#else
+#define EBUG_ON(cond)
+#endif
+
+const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT;
+
+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *,
+                             const struct bkey_packed *);
+
+void bch2_to_binary(char *out, const u64 *p, unsigned nr_bits)
+{
+       unsigned bit = high_bit_offset, done = 0;
+
+       while (1) {
+               while (bit < 64) {
+                       if (done && !(done % 8))
+                               *out++ = ' ';
+                       *out++ = *p & (1ULL << (63 - bit)) ? '1' : '0';
+                       bit++;
+                       done++;
+                       if (done == nr_bits) {
+                               *out++ = '\0';
+                               return;
+                       }
+               }
+
+               p = next_word(p);
+               bit = 0;
+       }
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
+                                const struct bkey *unpacked,
+                                const struct bkey_format *format)
+{
+       struct bkey tmp;
+
+       BUG_ON(bkeyp_val_u64s(format, packed) !=
+              bkey_val_u64s(unpacked));
+
+       BUG_ON(packed->u64s < bkeyp_key_u64s(format, packed));
+
+       tmp = __bch2_bkey_unpack_key(format, packed);
+
+       if (memcmp(&tmp, unpacked, sizeof(struct bkey))) {
+               char buf1[160], buf2[160];
+               char buf3[160], buf4[160];
+
+               bch2_bkey_to_text(&PBUF(buf1), unpacked);
+               bch2_bkey_to_text(&PBUF(buf2), &tmp);
+               bch2_to_binary(buf3, (void *) unpacked, 80);
+               bch2_to_binary(buf4, high_word(format, packed), 80);
+
+               panic("keys differ: format u64s %u fields %u %u %u %u %u\n%s\n%s\n%s\n%s\n",
+                     format->key_u64s,
+                     format->bits_per_field[0],
+                     format->bits_per_field[1],
+                     format->bits_per_field[2],
+                     format->bits_per_field[3],
+                     format->bits_per_field[4],
+                     buf1, buf2, buf3, buf4);
+       }
+}
+
+#else
+static inline void bch2_bkey_pack_verify(const struct bkey_packed *packed,
+                                       const struct bkey *unpacked,
+                                       const struct bkey_format *format) {}
+#endif
+
+struct pack_state {
+       const struct bkey_format *format;
+       unsigned                bits;   /* bits remaining in current word */
+       u64                     w;      /* current word */
+       u64                     *p;     /* pointer to next word */
+};
+
+__always_inline
+static struct pack_state pack_state_init(const struct bkey_format *format,
+                                        struct bkey_packed *k)
+{
+       u64 *p = high_word(format, k);
+
+       return (struct pack_state) {
+               .format = format,
+               .bits   = 64 - high_bit_offset,
+               .w      = 0,
+               .p      = p,
+       };
+}
+
+__always_inline
+static void pack_state_finish(struct pack_state *state,
+                             struct bkey_packed *k)
+{
+       EBUG_ON(state->p <  k->_data);
+       EBUG_ON(state->p >= k->_data + state->format->key_u64s);
+
+       *state->p = state->w;
+}
+
+struct unpack_state {
+       const struct bkey_format *format;
+       unsigned                bits;   /* bits remaining in current word */
+       u64                     w;      /* current word */
+       const u64               *p;     /* pointer to next word */
+};
+
+__always_inline
+static struct unpack_state unpack_state_init(const struct bkey_format *format,
+                                            const struct bkey_packed *k)
+{
+       const u64 *p = high_word(format, k);
+
+       return (struct unpack_state) {
+               .format = format,
+               .bits   = 64 - high_bit_offset,
+               .w      = *p << high_bit_offset,
+               .p      = p,
+       };
+}
+
+__always_inline
+static u64 get_inc_field(struct unpack_state *state, unsigned field)
+{
+       unsigned bits = state->format->bits_per_field[field];
+       u64 v = 0, offset = le64_to_cpu(state->format->field_offset[field]);
+
+       if (bits >= state->bits) {
+               v = state->w >> (64 - bits);
+               bits -= state->bits;
+
+               state->p = next_word(state->p);
+               state->w = *state->p;
+               state->bits = 64;
+       }
+
+       /* avoid shift by 64 if bits is 0 - bits is never 64 here: */
+       v |= (state->w >> 1) >> (63 - bits);
+       state->w <<= bits;
+       state->bits -= bits;
+
+       return v + offset;
+}
+
+__always_inline
+static bool set_inc_field(struct pack_state *state, unsigned field, u64 v)
+{
+       unsigned bits = state->format->bits_per_field[field];
+       u64 offset = le64_to_cpu(state->format->field_offset[field]);
+
+       if (v < offset)
+               return false;
+
+       v -= offset;
+
+       if (fls64(v) > bits)
+               return false;
+
+       if (bits > state->bits) {
+               bits -= state->bits;
+               /* avoid shift by 64 if bits is 0 - bits is never 64 here: */
+               state->w |= (v >> 1) >> (bits - 1);
+
+               *state->p = state->w;
+               state->p = next_word(state->p);
+               state->w = 0;
+               state->bits = 64;
+       }
+
+       state->bits -= bits;
+       state->w |= v << state->bits;
+
+       return true;
+}
+
+/*
+ * Note: does NOT set out->format (we don't know what it should be here!)
+ *
+ * Also: doesn't work on extents - it doesn't preserve the invariant that
+ * if k is packed bkey_start_pos(k) will successfully pack
+ */
+static bool bch2_bkey_transform_key(const struct bkey_format *out_f,
+                                  struct bkey_packed *out,
+                                  const struct bkey_format *in_f,
+                                  const struct bkey_packed *in)
+{
+       struct pack_state out_s = pack_state_init(out_f, out);
+       struct unpack_state in_s = unpack_state_init(in_f, in);
+       unsigned i;
+
+       out->_data[0] = 0;
+
+       for (i = 0; i < BKEY_NR_FIELDS; i++)
+               if (!set_inc_field(&out_s, i, get_inc_field(&in_s, i)))
+                       return false;
+
+       /* Can't happen because the val would be too big to unpack: */
+       EBUG_ON(in->u64s - in_f->key_u64s + out_f->key_u64s > U8_MAX);
+
+       pack_state_finish(&out_s, out);
+       out->u64s       = out_f->key_u64s + in->u64s - in_f->key_u64s;
+       out->needs_whiteout = in->needs_whiteout;
+       out->type       = in->type;
+
+       return true;
+}
+
+bool bch2_bkey_transform(const struct bkey_format *out_f,
+                       struct bkey_packed *out,
+                       const struct bkey_format *in_f,
+                       const struct bkey_packed *in)
+{
+       if (!bch2_bkey_transform_key(out_f, out, in_f, in))
+               return false;
+
+       memcpy_u64s((u64 *) out + out_f->key_u64s,
+                   (u64 *) in + in_f->key_u64s,
+                   (in->u64s - in_f->key_u64s));
+       return true;
+}
+
+#define bkey_fields()                                                  \
+       x(BKEY_FIELD_INODE,             p.inode)                        \
+       x(BKEY_FIELD_OFFSET,            p.offset)                       \
+       x(BKEY_FIELD_SNAPSHOT,          p.snapshot)                     \
+       x(BKEY_FIELD_SIZE,              size)                           \
+       x(BKEY_FIELD_VERSION_HI,        version.hi)                     \
+       x(BKEY_FIELD_VERSION_LO,        version.lo)
+
+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *format,
+                             const struct bkey_packed *in)
+{
+       struct unpack_state state = unpack_state_init(format, in);
+       struct bkey out;
+
+       EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
+       EBUG_ON(in->u64s < format->key_u64s);
+       EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
+       EBUG_ON(in->u64s - format->key_u64s + BKEY_U64s > U8_MAX);
+
+       out.u64s        = BKEY_U64s + in->u64s - format->key_u64s;
+       out.format      = KEY_FORMAT_CURRENT;
+       out.needs_whiteout = in->needs_whiteout;
+       out.type        = in->type;
+       out.pad[0]      = 0;
+
+#define x(id, field)   out.field = get_inc_field(&state, id);
+       bkey_fields()
+#undef x
+
+       return out;
+}
+
+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK
+struct bpos __bkey_unpack_pos(const struct bkey_format *format,
+                                    const struct bkey_packed *in)
+{
+       struct unpack_state state = unpack_state_init(format, in);
+       struct bpos out;
+
+       EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
+       EBUG_ON(in->u64s < format->key_u64s);
+       EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
+
+       out.inode       = get_inc_field(&state, BKEY_FIELD_INODE);
+       out.offset      = get_inc_field(&state, BKEY_FIELD_OFFSET);
+       out.snapshot    = get_inc_field(&state, BKEY_FIELD_SNAPSHOT);
+
+       return out;
+}
+#endif
+
+/**
+ * bch2_bkey_pack_key -- pack just the key, not the value
+ */
+bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
+                  const struct bkey_format *format)
+{
+       struct pack_state state = pack_state_init(format, out);
+
+       EBUG_ON((void *) in == (void *) out);
+       EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
+       EBUG_ON(in->format != KEY_FORMAT_CURRENT);
+
+       out->_data[0] = 0;
+
+#define x(id, field)   if (!set_inc_field(&state, id, in->field)) return false;
+       bkey_fields()
+#undef x
+
+       /*
+        * Extents - we have to guarantee that if an extent is packed, a trimmed
+        * version will also pack:
+        */
+       if (bkey_start_offset(in) <
+           le64_to_cpu(format->field_offset[BKEY_FIELD_OFFSET]))
+               return false;
+
+       pack_state_finish(&state, out);
+       out->u64s       = format->key_u64s + in->u64s - BKEY_U64s;
+       out->format     = KEY_FORMAT_LOCAL_BTREE;
+       out->needs_whiteout = in->needs_whiteout;
+       out->type       = in->type;
+
+       bch2_bkey_pack_verify(out, in, format);
+       return true;
+}
+
+/**
+ * bch2_bkey_unpack -- unpack the key and the value
+ */
+void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst,
+                const struct bkey_packed *src)
+{
+       __bkey_unpack_key(b, &dst->k, src);
+
+       memcpy_u64s(&dst->v,
+                   bkeyp_val(&b->format, src),
+                   bkeyp_val_u64s(&b->format, src));
+}
+
+/**
+ * bch2_bkey_pack -- pack the key and the value
+ */
+bool bch2_bkey_pack(struct bkey_packed *out, const struct bkey_i *in,
+              const struct bkey_format *format)
+{
+       struct bkey_packed tmp;
+
+       if (!bch2_bkey_pack_key(&tmp, &in->k, format))
+               return false;
+
+       memmove_u64s((u64 *) out + format->key_u64s,
+                    &in->v,
+                    bkey_val_u64s(&in->k));
+       memcpy_u64s(out, &tmp, format->key_u64s);
+
+       return true;
+}
+
+__always_inline
+static bool set_inc_field_lossy(struct pack_state *state, unsigned field, u64 v)
+{
+       unsigned bits = state->format->bits_per_field[field];
+       u64 offset = le64_to_cpu(state->format->field_offset[field]);
+       bool ret = true;
+
+       EBUG_ON(v < offset);
+       v -= offset;
+
+       if (fls64(v) > bits) {
+               v = ~(~0ULL << bits);
+               ret = false;
+       }
+
+       if (bits > state->bits) {
+               bits -= state->bits;
+               state->w |= (v >> 1) >> (bits - 1);
+
+               *state->p = state->w;
+               state->p = next_word(state->p);
+               state->w = 0;
+               state->bits = 64;
+       }
+
+       state->bits -= bits;
+       state->w |= v << state->bits;
+
+       return ret;
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+static bool bkey_packed_successor(struct bkey_packed *out,
+                                 const struct btree *b,
+                                 struct bkey_packed k)
+{
+       const struct bkey_format *f = &b->format;
+       unsigned nr_key_bits = b->nr_key_bits;
+       unsigned first_bit, offset;
+       u64 *p;
+
+       EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
+
+       if (!nr_key_bits)
+               return false;
+
+       *out = k;
+
+       first_bit = high_bit_offset + nr_key_bits - 1;
+       p = nth_word(high_word(f, out), first_bit >> 6);
+       offset = 63 - (first_bit & 63);
+
+       while (nr_key_bits) {
+               unsigned bits = min(64 - offset, nr_key_bits);
+               u64 mask = (~0ULL >> (64 - bits)) << offset;
+
+               if ((*p & mask) != mask) {
+                       *p += 1ULL << offset;
+                       EBUG_ON(bkey_cmp_packed(b, out, &k) <= 0);
+                       return true;
+               }
+
+               *p &= ~mask;
+               p = prev_word(p);
+               nr_key_bits -= bits;
+               offset = 0;
+       }
+
+       return false;
+}
+#endif
+
+/*
+ * Returns a packed key that compares <= in
+ *
+ * This is used in bset_search_tree(), where we need a packed pos in order to be
+ * able to compare against the keys in the auxiliary search tree - and it's
+ * legal to use a packed pos that isn't equivalent to the original pos,
+ * _provided_ it compares <= to the original pos.
+ */
+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out,
+                                          struct bpos in,
+                                          const struct btree *b)
+{
+       const struct bkey_format *f = &b->format;
+       struct pack_state state = pack_state_init(f, out);
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct bpos orig = in;
+#endif
+       bool exact = true;
+
+       out->_data[0] = 0;
+
+       if (unlikely(in.snapshot <
+                    le64_to_cpu(f->field_offset[BKEY_FIELD_SNAPSHOT]))) {
+               if (!in.offset-- &&
+                   !in.inode--)
+                       return BKEY_PACK_POS_FAIL;
+               in.snapshot     = KEY_SNAPSHOT_MAX;
+               exact = false;
+       }
+
+       if (unlikely(in.offset <
+                    le64_to_cpu(f->field_offset[BKEY_FIELD_OFFSET]))) {
+               if (!in.inode--)
+                       return BKEY_PACK_POS_FAIL;
+               in.offset       = KEY_OFFSET_MAX;
+               in.snapshot     = KEY_SNAPSHOT_MAX;
+               exact = false;
+       }
+
+       if (unlikely(in.inode <
+                    le64_to_cpu(f->field_offset[BKEY_FIELD_INODE])))
+               return BKEY_PACK_POS_FAIL;
+
+       if (!set_inc_field_lossy(&state, BKEY_FIELD_INODE, in.inode)) {
+               in.offset       = KEY_OFFSET_MAX;
+               in.snapshot     = KEY_SNAPSHOT_MAX;
+               exact = false;
+       }
+
+       if (!set_inc_field_lossy(&state, BKEY_FIELD_OFFSET, in.offset)) {
+               in.snapshot     = KEY_SNAPSHOT_MAX;
+               exact = false;
+       }
+
+       if (!set_inc_field_lossy(&state, BKEY_FIELD_SNAPSHOT, in.snapshot))
+               exact = false;
+
+       pack_state_finish(&state, out);
+       out->u64s       = f->key_u64s;
+       out->format     = KEY_FORMAT_LOCAL_BTREE;
+       out->type       = KEY_TYPE_deleted;
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+       if (exact) {
+               BUG_ON(bkey_cmp_left_packed(b, out, &orig));
+       } else {
+               struct bkey_packed successor;
+
+               BUG_ON(bkey_cmp_left_packed(b, out, &orig) >= 0);
+               BUG_ON(bkey_packed_successor(&successor, b, *out) &&
+                      bkey_cmp_left_packed(b, &successor, &orig) < 0);
+       }
+#endif
+
+       return exact ? BKEY_PACK_POS_EXACT : BKEY_PACK_POS_SMALLER;
+}
+
+void bch2_bkey_format_init(struct bkey_format_state *s)
+{
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(s->field_min); i++)
+               s->field_min[i] = U64_MAX;
+
+       for (i = 0; i < ARRAY_SIZE(s->field_max); i++)
+               s->field_max[i] = 0;
+
+       /* Make sure we can store a size of 0: */
+       s->field_min[BKEY_FIELD_SIZE] = 0;
+}
+
+static void __bkey_format_add(struct bkey_format_state *s,
+                             unsigned field, u64 v)
+{
+       s->field_min[field] = min(s->field_min[field], v);
+       s->field_max[field] = max(s->field_max[field], v);
+}
+
+/*
+ * Changes @format so that @k can be successfully packed with @format
+ */
+void bch2_bkey_format_add_key(struct bkey_format_state *s, const struct bkey *k)
+{
+#define x(id, field) __bkey_format_add(s, id, k->field);
+       bkey_fields()
+#undef x
+       __bkey_format_add(s, BKEY_FIELD_OFFSET, bkey_start_offset(k));
+}
+
+void bch2_bkey_format_add_pos(struct bkey_format_state *s, struct bpos p)
+{
+       unsigned field = 0;
+
+       __bkey_format_add(s, field++, p.inode);
+       __bkey_format_add(s, field++, p.offset);
+       __bkey_format_add(s, field++, p.snapshot);
+}
+
+/*
+ * We don't want it to be possible for the packed format to represent fields
+ * bigger than a u64... that will cause confusion and issues (like with
+ * bkey_packed_successor())
+ */
+static void set_format_field(struct bkey_format *f, enum bch_bkey_fields i,
+                            unsigned bits, u64 offset)
+{
+       offset = bits == 64 ? 0 : min(offset, U64_MAX - ((1ULL << bits) - 1));
+
+       f->bits_per_field[i]    = bits;
+       f->field_offset[i]      = cpu_to_le64(offset);
+}
+
+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s)
+{
+       unsigned i, bits = KEY_PACKED_BITS_START;
+       struct bkey_format ret = {
+               .nr_fields = BKEY_NR_FIELDS,
+       };
+
+       for (i = 0; i < ARRAY_SIZE(s->field_min); i++) {
+               s->field_min[i] = min(s->field_min[i], s->field_max[i]);
+
+               set_format_field(&ret, i,
+                                fls64(s->field_max[i] - s->field_min[i]),
+                                s->field_min[i]);
+
+               bits += ret.bits_per_field[i];
+       }
+
+       /* allow for extent merging: */
+       if (ret.bits_per_field[BKEY_FIELD_SIZE]) {
+               ret.bits_per_field[BKEY_FIELD_SIZE] += 4;
+               bits += 4;
+       }
+
+       ret.key_u64s = DIV_ROUND_UP(bits, 64);
+
+       /* if we have enough spare bits, round fields up to nearest byte */
+       bits = ret.key_u64s * 64 - bits;
+
+       for (i = 0; i < ARRAY_SIZE(ret.bits_per_field); i++) {
+               unsigned r = round_up(ret.bits_per_field[i], 8) -
+                       ret.bits_per_field[i];
+
+               if (r <= bits) {
+                       set_format_field(&ret, i,
+                                        ret.bits_per_field[i] + r,
+                                        le64_to_cpu(ret.field_offset[i]));
+                       bits -= r;
+               }
+       }
+
+       EBUG_ON(bch2_bkey_format_validate(&ret));
+       return ret;
+}
+
+const char *bch2_bkey_format_validate(struct bkey_format *f)
+{
+       unsigned i, bits = KEY_PACKED_BITS_START;
+
+       if (f->nr_fields != BKEY_NR_FIELDS)
+               return "incorrect number of fields";
+
+       for (i = 0; i < f->nr_fields; i++) {
+               u64 field_offset = le64_to_cpu(f->field_offset[i]);
+
+               if (f->bits_per_field[i] > 64)
+                       return "field too large";
+
+               if (field_offset &&
+                   (f->bits_per_field[i] == 64 ||
+                   (field_offset + ((1ULL << f->bits_per_field[i]) - 1) <
+                    field_offset)))
+                       return "offset + bits overflow";
+
+               bits += f->bits_per_field[i];
+       }
+
+       if (f->key_u64s != DIV_ROUND_UP(bits, 64))
+               return "incorrect key_u64s";
+
+       return NULL;
+}
+
+/*
+ * Most significant differing bit
+ * Bits are indexed from 0 - return is [0, nr_key_bits)
+ */
+__pure
+unsigned bch2_bkey_greatest_differing_bit(const struct btree *b,
+                                         const struct bkey_packed *l_k,
+                                         const struct bkey_packed *r_k)
+{
+       const u64 *l = high_word(&b->format, l_k);
+       const u64 *r = high_word(&b->format, r_k);
+       unsigned nr_key_bits = b->nr_key_bits;
+       unsigned word_bits = 64 - high_bit_offset;
+       u64 l_v, r_v;
+
+       EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
+
+       /* for big endian, skip past header */
+       l_v = *l & (~0ULL >> high_bit_offset);
+       r_v = *r & (~0ULL >> high_bit_offset);
+
+       while (nr_key_bits) {
+               if (nr_key_bits < word_bits) {
+                       l_v >>= word_bits - nr_key_bits;
+                       r_v >>= word_bits - nr_key_bits;
+                       nr_key_bits = 0;
+               } else {
+                       nr_key_bits -= word_bits;
+               }
+
+               if (l_v != r_v)
+                       return fls64(l_v ^ r_v) - 1 + nr_key_bits;
+
+               l = next_word(l);
+               r = next_word(r);
+
+               l_v = *l;
+               r_v = *r;
+               word_bits = 64;
+       }
+
+       return 0;
+}
+
+/*
+ * First set bit
+ * Bits are indexed from 0 - return is [0, nr_key_bits)
+ */
+__pure
+unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k)
+{
+       const u64 *p = high_word(&b->format, k);
+       unsigned nr_key_bits = b->nr_key_bits;
+       unsigned ret = 0, offset;
+
+       EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
+
+       offset = nr_key_bits;
+       while (offset > 64) {
+               p = next_word(p);
+               offset -= 64;
+       }
+
+       offset = 64 - offset;
+
+       while (nr_key_bits) {
+               unsigned bits = nr_key_bits + offset < 64
+                       ? nr_key_bits
+                       : 64 - offset;
+
+               u64 mask = (~0ULL >> (64 - bits)) << offset;
+
+               if (*p & mask)
+                       return ret + __ffs64(*p & mask) - offset;
+
+               p = prev_word(p);
+               nr_key_bits -= bits;
+               ret += bits;
+               offset = 0;
+       }
+
+       return 0;
+}
+
+#ifdef CONFIG_X86_64
+
+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
+                                 unsigned nr_key_bits)
+{
+       long d0, d1, d2, d3;
+       int cmp;
+
+       /* we shouldn't need asm for this, but gcc is being retarded: */
+
+       asm(".intel_syntax noprefix;"
+           "xor eax, eax;"
+           "xor edx, edx;"
+           "1:;"
+           "mov r8, [rdi];"
+           "mov r9, [rsi];"
+           "sub ecx, 64;"
+           "jl 2f;"
+
+           "cmp r8, r9;"
+           "jnz 3f;"
+
+           "lea rdi, [rdi - 8];"
+           "lea rsi, [rsi - 8];"
+           "jmp 1b;"
+
+           "2:;"
+           "not ecx;"
+           "shr r8, 1;"
+           "shr r9, 1;"
+           "shr r8, cl;"
+           "shr r9, cl;"
+           "cmp r8, r9;"
+
+           "3:\n"
+           "seta al;"
+           "setb dl;"
+           "sub eax, edx;"
+           ".att_syntax prefix;"
+           : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
+           : "0" (l), "1" (r), "3" (nr_key_bits)
+           : "r8", "r9", "cc", "memory");
+
+       return cmp;
+}
+
+#define I(_x)                  (*(out)++ = (_x))
+#define I1(i0)                                         I(i0)
+#define I2(i0, i1)             (I1(i0),                I(i1))
+#define I3(i0, i1, i2)         (I2(i0, i1),            I(i2))
+#define I4(i0, i1, i2, i3)     (I3(i0, i1, i2),        I(i3))
+#define I5(i0, i1, i2, i3, i4) (I4(i0, i1, i2, i3),    I(i4))
+
+static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
+                             enum bch_bkey_fields field,
+                             unsigned dst_offset, unsigned dst_size,
+                             bool *eax_zeroed)
+{
+       unsigned bits = format->bits_per_field[field];
+       u64 offset = le64_to_cpu(format->field_offset[field]);
+       unsigned i, byte, bit_offset, align, shl, shr;
+
+       if (!bits && !offset) {
+               if (!*eax_zeroed) {
+                       /* xor eax, eax */
+                       I2(0x31, 0xc0);
+               }
+
+               *eax_zeroed = true;
+               goto set_field;
+       }
+
+       if (!bits) {
+               /* just return offset: */
+
+               switch (dst_size) {
+               case 8:
+                       if (offset > S32_MAX) {
+                               /* mov [rdi + dst_offset], offset */
+                               I3(0xc7, 0x47, dst_offset);
+                               memcpy(out, &offset, 4);
+                               out += 4;
+
+                               I3(0xc7, 0x47, dst_offset + 4);
+                               memcpy(out, (void *) &offset + 4, 4);
+                               out += 4;
+                       } else {
+                               /* mov [rdi + dst_offset], offset */
+                               /* sign extended */
+                               I4(0x48, 0xc7, 0x47, dst_offset);
+                               memcpy(out, &offset, 4);
+                               out += 4;
+                       }
+                       break;
+               case 4:
+                       /* mov [rdi + dst_offset], offset */
+                       I3(0xc7, 0x47, dst_offset);
+                       memcpy(out, &offset, 4);
+                       out += 4;
+                       break;
+               default:
+                       BUG();
+               }
+
+               return out;
+       }
+
+       bit_offset = format->key_u64s * 64;
+       for (i = 0; i <= field; i++)
+               bit_offset -= format->bits_per_field[i];
+
+       byte = bit_offset / 8;
+       bit_offset -= byte * 8;
+
+       *eax_zeroed = false;
+
+       if (bit_offset == 0 && bits == 8) {
+               /* movzx eax, BYTE PTR [rsi + imm8] */
+               I4(0x0f, 0xb6, 0x46, byte);
+       } else if (bit_offset == 0 && bits == 16) {
+               /* movzx eax, WORD PTR [rsi + imm8] */
+               I4(0x0f, 0xb7, 0x46, byte);
+       } else if (bit_offset + bits <= 32) {
+               align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
+               byte -= align;
+               bit_offset += align * 8;
+
+               BUG_ON(bit_offset + bits > 32);
+
+               /* mov eax, [rsi + imm8] */
+               I3(0x8b, 0x46, byte);
+
+               if (bit_offset) {
+                       /* shr eax, imm8 */
+                       I3(0xc1, 0xe8, bit_offset);
+               }
+
+               if (bit_offset + bits < 32) {
+                       unsigned mask = ~0U >> (32 - bits);
+
+                       /* and eax, imm32 */
+                       I1(0x25);
+                       memcpy(out, &mask, 4);
+                       out += 4;
+               }
+       } else if (bit_offset + bits <= 64) {
+               align = min(8 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 7);
+               byte -= align;
+               bit_offset += align * 8;
+
+               BUG_ON(bit_offset + bits > 64);
+
+               /* mov rax, [rsi + imm8] */
+               I4(0x48, 0x8b, 0x46, byte);
+
+               shl = 64 - bit_offset - bits;
+               shr = bit_offset + shl;
+
+               if (shl) {
+                       /* shl rax, imm8 */
+                       I4(0x48, 0xc1, 0xe0, shl);
+               }
+
+               if (shr) {
+                       /* shr rax, imm8 */
+                       I4(0x48, 0xc1, 0xe8, shr);
+               }
+       } else {
+               align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
+               byte -= align;
+               bit_offset += align * 8;
+
+               BUG_ON(bit_offset + bits > 96);
+
+               /* mov rax, [rsi + byte] */
+               I4(0x48, 0x8b, 0x46, byte);
+
+               /* mov edx, [rsi + byte + 8] */
+               I3(0x8b, 0x56, byte + 8);
+
+               /* bits from next word: */
+               shr = bit_offset + bits - 64;
+               BUG_ON(shr > bit_offset);
+
+               /* shr rax, bit_offset */
+               I4(0x48, 0xc1, 0xe8, shr);
+
+               /* shl rdx, imm8 */
+               I4(0x48, 0xc1, 0xe2, 64 - shr);
+
+               /* or rax, rdx */
+               I3(0x48, 0x09, 0xd0);
+
+               shr = bit_offset - shr;
+
+               if (shr) {
+                       /* shr rax, imm8 */
+                       I4(0x48, 0xc1, 0xe8, shr);
+               }
+       }
+
+       /* rax += offset: */
+       if (offset > S32_MAX) {
+               /* mov rdx, imm64 */
+               I2(0x48, 0xba);
+               memcpy(out, &offset, 8);
+               out += 8;
+               /* add %rdx, %rax */
+               I3(0x48, 0x01, 0xd0);
+       } else if (offset + (~0ULL >> (64 - bits)) > U32_MAX) {
+               /* add rax, imm32 */
+               I2(0x48, 0x05);
+               memcpy(out, &offset, 4);
+               out += 4;
+       } else if (offset) {
+               /* add eax, imm32 */
+               I1(0x05);
+               memcpy(out, &offset, 4);
+               out += 4;
+       }
+set_field:
+       switch (dst_size) {
+       case 8:
+               /* mov [rdi + dst_offset], rax */
+               I4(0x48, 0x89, 0x47, dst_offset);
+               break;
+       case 4:
+               /* mov [rdi + dst_offset], eax */
+               I3(0x89, 0x47, dst_offset);
+               break;
+       default:
+               BUG();
+       }
+
+       return out;
+}
+
+int bch2_compile_bkey_format(const struct bkey_format *format, void *_out)
+{
+       bool eax_zeroed = false;
+       u8 *out = _out;
+
+       /*
+        * rdi: dst - unpacked key
+        * rsi: src - packed key
+        */
+
+       /* k->u64s, k->format, k->type */
+
+       /* mov eax, [rsi] */
+       I2(0x8b, 0x06);
+
+       /* add eax, BKEY_U64s - format->key_u64s */
+       I5(0x05, BKEY_U64s - format->key_u64s, KEY_FORMAT_CURRENT, 0, 0);
+
+       /* and eax, imm32: mask out k->pad: */
+       I5(0x25, 0xff, 0xff, 0xff, 0);
+
+       /* mov [rdi], eax */
+       I2(0x89, 0x07);
+
+#define x(id, field)                                                   \
+       out = compile_bkey_field(format, out, id,                       \
+                                offsetof(struct bkey, field),          \
+                                sizeof(((struct bkey *) NULL)->field), \
+                                &eax_zeroed);
+       bkey_fields()
+#undef x
+
+       /* retq */
+       I1(0xc3);
+
+       return (void *) out - _out;
+}
+
+#else
+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
+                                 unsigned nr_key_bits)
+{
+       u64 l_v, r_v;
+
+       if (!nr_key_bits)
+               return 0;
+
+       /* for big endian, skip past header */
+       nr_key_bits += high_bit_offset;
+       l_v = *l & (~0ULL >> high_bit_offset);
+       r_v = *r & (~0ULL >> high_bit_offset);
+
+       while (1) {
+               if (nr_key_bits < 64) {
+                       l_v >>= 64 - nr_key_bits;
+                       r_v >>= 64 - nr_key_bits;
+                       nr_key_bits = 0;
+               } else {
+                       nr_key_bits -= 64;
+               }
+
+               if (!nr_key_bits || l_v != r_v)
+                       break;
+
+               l = next_word(l);
+               r = next_word(r);
+
+               l_v = *l;
+               r_v = *r;
+       }
+
+       return cmp_int(l_v, r_v);
+}
+#endif
+
+__pure
+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l,
+                                         const struct bkey_packed *r,
+                                         const struct btree *b)
+{
+       const struct bkey_format *f = &b->format;
+       int ret;
+
+       EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
+       EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
+
+       ret = __bkey_cmp_bits(high_word(f, l),
+                             high_word(f, r),
+                             b->nr_key_bits);
+
+       EBUG_ON(ret != bkey_cmp(bkey_unpack_pos(b, l),
+                               bkey_unpack_pos(b, r)));
+       return ret;
+}
+
+__pure __flatten
+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *b,
+                                              const struct bkey_packed *l,
+                                              const struct bpos *r)
+{
+       return bkey_cmp(bkey_unpack_pos_format_checked(b, l), *r);
+}
+
+__pure __flatten
+int __bch2_bkey_cmp_packed(const struct bkey_packed *l,
+                          const struct bkey_packed *r,
+                          const struct btree *b)
+{
+       struct bkey unpacked;
+
+       if (likely(bkey_packed(l) && bkey_packed(r)))
+               return __bch2_bkey_cmp_packed_format_checked(l, r, b);
+
+       if (bkey_packed(l)) {
+               __bkey_unpack_key_format_checked(b, &unpacked, l);
+               l = (void*) &unpacked;
+       } else if (bkey_packed(r)) {
+               __bkey_unpack_key_format_checked(b, &unpacked, r);
+               r = (void*) &unpacked;
+       }
+
+       return bkey_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
+}
+
+__pure __flatten
+int __bch2_bkey_cmp_left_packed(const struct btree *b,
+                               const struct bkey_packed *l,
+                               const struct bpos *r)
+{
+       const struct bkey *l_unpacked;
+
+       return unlikely(l_unpacked = packed_to_bkey_c(l))
+               ? bkey_cmp(l_unpacked->p, *r)
+               : __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
+}
+
+void bch2_bpos_swab(struct bpos *p)
+{
+       u8 *l = (u8 *) p;
+       u8 *h = ((u8 *) &p[1]) - 1;
+
+       while (l < h) {
+               swap(*l, *h);
+               l++;
+               --h;
+       }
+}
+
+void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k)
+{
+       const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current;
+       u8 *l = k->key_start;
+       u8 *h = (u8 *) (k->_data + f->key_u64s) - 1;
+
+       while (l < h) {
+               swap(*l, *h);
+               l++;
+               --h;
+       }
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_bkey_pack_test(void)
+{
+       struct bkey t = KEY(4134ULL, 1250629070527416633ULL, 0);
+       struct bkey_packed p;
+
+       struct bkey_format test_format = {
+               .key_u64s       = 2,
+               .nr_fields      = BKEY_NR_FIELDS,
+               .bits_per_field = {
+                       13,
+                       64,
+               },
+       };
+
+       struct unpack_state in_s =
+               unpack_state_init(&bch2_bkey_format_current, (void *) &t);
+       struct pack_state out_s = pack_state_init(&test_format, &p);
+       unsigned i;
+
+       for (i = 0; i < out_s.format->nr_fields; i++) {
+               u64 a, v = get_inc_field(&in_s, i);
+
+               switch (i) {
+#define x(id, field)   case id: a = t.field; break;
+       bkey_fields()
+#undef x
+               default:
+                       BUG();
+               }
+
+               if (a != v)
+                       panic("got %llu actual %llu i %u\n", v, a, i);
+
+               if (!set_inc_field(&out_s, i, v))
+                       panic("failed at %u\n", i);
+       }
+
+       BUG_ON(!bch2_bkey_pack_key(&p, &t, &test_format));
+}
+#endif
diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h
new file mode 100644 (file)
index 0000000..cbcfbd2
--- /dev/null
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_H
+#define _BCACHEFS_BKEY_H
+
+#include <linux/bug.h>
+#include "bcachefs_format.h"
+
+#include "util.h"
+#include "vstructs.h"
+
+#ifdef CONFIG_X86_64
+#define HAVE_BCACHEFS_COMPILED_UNPACK  1
+#endif
+
+void bch2_to_binary(char *, const u64 *, unsigned);
+
+/* bkey with split value, const */
+struct bkey_s_c {
+       const struct bkey       *k;
+       const struct bch_val    *v;
+};
+
+/* bkey with split value */
+struct bkey_s {
+       union {
+       struct {
+               struct bkey     *k;
+               struct bch_val  *v;
+       };
+       struct bkey_s_c         s_c;
+       };
+};
+
+#define bkey_next(_k)          vstruct_next(_k)
+
+static inline struct bkey_packed *bkey_next_skip_noops(struct bkey_packed *k,
+                                                      struct bkey_packed *end)
+{
+       k = bkey_next(k);
+
+       while (k != end && !k->u64s)
+               k = (void *) ((u64 *) k + 1);
+       return k;
+}
+
+#define bkey_val_u64s(_k)      ((_k)->u64s - BKEY_U64s)
+
+static inline size_t bkey_val_bytes(const struct bkey *k)
+{
+       return bkey_val_u64s(k) * sizeof(u64);
+}
+
+static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s)
+{
+       k->u64s = BKEY_U64s + val_u64s;
+}
+
+static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
+{
+       k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64));
+}
+
+#define bkey_val_end(_k)       ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k)))
+
+#define bkey_deleted(_k)       ((_k)->type == KEY_TYPE_deleted)
+
+#define bkey_whiteout(_k)                              \
+       ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard)
+
+#define bkey_packed_typecheck(_k)                                      \
+({                                                                     \
+       BUILD_BUG_ON(!type_is(_k, struct bkey *) &&                     \
+                    !type_is(_k, struct bkey_packed *));               \
+       type_is(_k, struct bkey_packed *);                              \
+})
+
+enum bkey_lr_packed {
+       BKEY_PACKED_BOTH,
+       BKEY_PACKED_RIGHT,
+       BKEY_PACKED_LEFT,
+       BKEY_PACKED_NONE,
+};
+
+#define bkey_lr_packed_typecheck(_l, _r)                               \
+       (!bkey_packed_typecheck(_l) + ((!bkey_packed_typecheck(_r)) << 1))
+
+#define bkey_lr_packed(_l, _r)                                         \
+       ((_l)->format + ((_r)->format << 1))
+
+#define bkey_copy(_dst, _src)                                  \
+do {                                                           \
+       BUILD_BUG_ON(!type_is(_dst, struct bkey_i *) &&         \
+                    !type_is(_dst, struct bkey_packed *));     \
+       BUILD_BUG_ON(!type_is(_src, struct bkey_i *) &&         \
+                    !type_is(_src, struct bkey_packed *));     \
+       EBUG_ON((u64 *) (_dst) > (u64 *) (_src) &&              \
+               (u64 *) (_dst) < (u64 *) (_src) +               \
+               ((struct bkey *) (_src))->u64s);                \
+                                                               \
+       memcpy_u64s_small((_dst), (_src),                       \
+                         ((struct bkey *) (_src))->u64s);      \
+} while (0)
+
+struct btree;
+
+struct bkey_format_state {
+       u64 field_min[BKEY_NR_FIELDS];
+       u64 field_max[BKEY_NR_FIELDS];
+};
+
+void bch2_bkey_format_init(struct bkey_format_state *);
+void bch2_bkey_format_add_key(struct bkey_format_state *, const struct bkey *);
+void bch2_bkey_format_add_pos(struct bkey_format_state *, struct bpos);
+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *);
+const char *bch2_bkey_format_validate(struct bkey_format *);
+
+__pure
+unsigned bch2_bkey_greatest_differing_bit(const struct btree *,
+                                         const struct bkey_packed *,
+                                         const struct bkey_packed *);
+__pure
+unsigned bch2_bkey_ffs(const struct btree *, const struct bkey_packed *);
+
+__pure
+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *,
+                                    const struct bkey_packed *,
+                                    const struct btree *);
+
+__pure
+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *,
+                                         const struct bkey_packed *,
+                                         const struct bpos *);
+
+__pure
+int __bch2_bkey_cmp_packed(const struct bkey_packed *,
+                          const struct bkey_packed *,
+                          const struct btree *);
+
+__pure
+int __bch2_bkey_cmp_left_packed(const struct btree *,
+                               const struct bkey_packed *,
+                               const struct bpos *);
+
+static inline __pure
+int bkey_cmp_left_packed(const struct btree *b,
+                        const struct bkey_packed *l, const struct bpos *r)
+{
+       return __bch2_bkey_cmp_left_packed(b, l, r);
+}
+
+/*
+ * we prefer to pass bpos by ref, but it's often enough terribly convenient to
+ * pass it by by val... as much as I hate c++, const ref would be nice here:
+ */
+__pure __flatten
+static inline int bkey_cmp_left_packed_byval(const struct btree *b,
+                                            const struct bkey_packed *l,
+                                            struct bpos r)
+{
+       return bkey_cmp_left_packed(b, l, &r);
+}
+
+/*
+ * If @_l or @_r are struct bkey * (not bkey_packed *), uses type information to
+ * skip dispatching on k->format:
+ */
+#define bkey_cmp_packed(_b, _l, _r)                                    \
+({                                                                     \
+       int _cmp;                                                       \
+                                                                       \
+       switch (bkey_lr_packed_typecheck(_l, _r)) {                     \
+       case BKEY_PACKED_NONE:                                          \
+               _cmp = bkey_cmp(((struct bkey *) (_l))->p,              \
+                               ((struct bkey *) (_r))->p);             \
+               break;                                                  \
+       case BKEY_PACKED_LEFT:                                          \
+               _cmp = bkey_cmp_left_packed((_b),                       \
+                                 (struct bkey_packed *) (_l),          \
+                                 &((struct bkey *) (_r))->p);          \
+               break;                                                  \
+       case BKEY_PACKED_RIGHT:                                         \
+               _cmp = -bkey_cmp_left_packed((_b),                      \
+                                 (struct bkey_packed *) (_r),          \
+                                 &((struct bkey *) (_l))->p);          \
+               break;                                                  \
+       case BKEY_PACKED_BOTH:                                          \
+               _cmp = __bch2_bkey_cmp_packed((void *) (_l),            \
+                                        (void *) (_r), (_b));          \
+               break;                                                  \
+       }                                                               \
+       _cmp;                                                           \
+})
+
+#if 1
+static __always_inline int bkey_cmp(struct bpos l, struct bpos r)
+{
+       if (l.inode != r.inode)
+               return l.inode < r.inode ? -1 : 1;
+       if (l.offset != r.offset)
+               return l.offset < r.offset ? -1 : 1;
+       if (l.snapshot != r.snapshot)
+               return l.snapshot < r.snapshot ? -1 : 1;
+       return 0;
+}
+#else
+int bkey_cmp(struct bpos l, struct bpos r);
+#endif
+
+static inline struct bpos bpos_min(struct bpos l, struct bpos r)
+{
+       return bkey_cmp(l, r) < 0 ? l : r;
+}
+
+void bch2_bpos_swab(struct bpos *);
+void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
+
+static __always_inline int bversion_cmp(struct bversion l, struct bversion r)
+{
+       return  cmp_int(l.hi, r.hi) ?:
+               cmp_int(l.lo, r.lo);
+}
+
+#define ZERO_VERSION   ((struct bversion) { .hi = 0, .lo = 0 })
+#define MAX_VERSION    ((struct bversion) { .hi = ~0, .lo = ~0ULL })
+
+static __always_inline int bversion_zero(struct bversion v)
+{
+       return !bversion_cmp(v, ZERO_VERSION);
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+/* statement expressions confusing unlikely()? */
+#define bkey_packed(_k)                                                        \
+       ({ EBUG_ON((_k)->format > KEY_FORMAT_CURRENT);                  \
+        (_k)->format != KEY_FORMAT_CURRENT; })
+#else
+#define bkey_packed(_k)                ((_k)->format != KEY_FORMAT_CURRENT)
+#endif
+
+/*
+ * It's safe to treat an unpacked bkey as a packed one, but not the reverse
+ */
+static inline struct bkey_packed *bkey_to_packed(struct bkey_i *k)
+{
+       return (struct bkey_packed *) k;
+}
+
+static inline const struct bkey_packed *bkey_to_packed_c(const struct bkey_i *k)
+{
+       return (const struct bkey_packed *) k;
+}
+
+static inline struct bkey_i *packed_to_bkey(struct bkey_packed *k)
+{
+       return bkey_packed(k) ? NULL : (struct bkey_i *) k;
+}
+
+static inline const struct bkey *packed_to_bkey_c(const struct bkey_packed *k)
+{
+       return bkey_packed(k) ? NULL : (const struct bkey *) k;
+}
+
+static inline unsigned bkey_format_key_bits(const struct bkey_format *format)
+{
+       return format->bits_per_field[BKEY_FIELD_INODE] +
+               format->bits_per_field[BKEY_FIELD_OFFSET] +
+               format->bits_per_field[BKEY_FIELD_SNAPSHOT];
+}
+
+static inline struct bpos bkey_successor(struct bpos p)
+{
+       struct bpos ret = p;
+
+       if (!++ret.offset)
+               BUG_ON(!++ret.inode);
+
+       return ret;
+}
+
+static inline struct bpos bkey_predecessor(struct bpos p)
+{
+       struct bpos ret = p;
+
+       if (!ret.offset--)
+               BUG_ON(!ret.inode--);
+
+       return ret;
+}
+
+static inline u64 bkey_start_offset(const struct bkey *k)
+{
+       return k->p.offset - k->size;
+}
+
+static inline struct bpos bkey_start_pos(const struct bkey *k)
+{
+       return (struct bpos) {
+               .inode          = k->p.inode,
+               .offset         = bkey_start_offset(k),
+               .snapshot       = k->p.snapshot,
+       };
+}
+
+/* Packed helpers */
+
+static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
+                                     const struct bkey_packed *k)
+{
+       unsigned ret = bkey_packed(k) ? format->key_u64s : BKEY_U64s;
+
+       EBUG_ON(k->u64s < ret);
+       return ret;
+}
+
+static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
+                                      const struct bkey_packed *k)
+{
+       return bkeyp_key_u64s(format, k) * sizeof(u64);
+}
+
+static inline unsigned bkeyp_val_u64s(const struct bkey_format *format,
+                                     const struct bkey_packed *k)
+{
+       return k->u64s - bkeyp_key_u64s(format, k);
+}
+
+static inline size_t bkeyp_val_bytes(const struct bkey_format *format,
+                                    const struct bkey_packed *k)
+{
+       return bkeyp_val_u64s(format, k) * sizeof(u64);
+}
+
+static inline void set_bkeyp_val_u64s(const struct bkey_format *format,
+                                     struct bkey_packed *k, unsigned val_u64s)
+{
+       k->u64s = bkeyp_key_u64s(format, k) + val_u64s;
+}
+
+#define bkeyp_val(_format, _k)                                         \
+        ((struct bch_val *) ((_k)->_data + bkeyp_key_u64s(_format, _k)))
+
+extern const struct bkey_format bch2_bkey_format_current;
+
+bool bch2_bkey_transform(const struct bkey_format *,
+                        struct bkey_packed *,
+                        const struct bkey_format *,
+                        const struct bkey_packed *);
+
+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *,
+                                  const struct bkey_packed *);
+
+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK
+struct bpos __bkey_unpack_pos(const struct bkey_format *,
+                             const struct bkey_packed *);
+#endif
+
+bool bch2_bkey_pack_key(struct bkey_packed *, const struct bkey *,
+                  const struct bkey_format *);
+
+enum bkey_pack_pos_ret {
+       BKEY_PACK_POS_EXACT,
+       BKEY_PACK_POS_SMALLER,
+       BKEY_PACK_POS_FAIL,
+};
+
+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *, struct bpos,
+                                          const struct btree *);
+
+static inline bool bkey_pack_pos(struct bkey_packed *out, struct bpos in,
+                                const struct btree *b)
+{
+       return bch2_bkey_pack_pos_lossy(out, in, b) == BKEY_PACK_POS_EXACT;
+}
+
+void bch2_bkey_unpack(const struct btree *, struct bkey_i *,
+                const struct bkey_packed *);
+bool bch2_bkey_pack(struct bkey_packed *, const struct bkey_i *,
+              const struct bkey_format *);
+
+static inline u64 bkey_field_max(const struct bkey_format *f,
+                                enum bch_bkey_fields nr)
+{
+       return f->bits_per_field[nr] < 64
+               ? (le64_to_cpu(f->field_offset[nr]) +
+                  ~(~0ULL << f->bits_per_field[nr]))
+               : U64_MAX;
+}
+
+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
+
+int bch2_compile_bkey_format(const struct bkey_format *, void *);
+
+#else
+
+static inline int bch2_compile_bkey_format(const struct bkey_format *format,
+                                         void *out) { return 0; }
+
+#endif
+
+static inline void bkey_reassemble(struct bkey_i *dst,
+                                  struct bkey_s_c src)
+{
+       dst->k = *src.k;
+       memcpy_u64s_small(&dst->v, src.v, bkey_val_u64s(src.k));
+}
+
+#define bkey_s_null            ((struct bkey_s)   { .k = NULL })
+#define bkey_s_c_null          ((struct bkey_s_c) { .k = NULL })
+
+#define bkey_s_err(err)                ((struct bkey_s)   { .k = ERR_PTR(err) })
+#define bkey_s_c_err(err)      ((struct bkey_s_c) { .k = ERR_PTR(err) })
+
+static inline struct bkey_s bkey_to_s(struct bkey *k)
+{
+       return (struct bkey_s) { .k = k, .v = NULL };
+}
+
+static inline struct bkey_s_c bkey_to_s_c(const struct bkey *k)
+{
+       return (struct bkey_s_c) { .k = k, .v = NULL };
+}
+
+static inline struct bkey_s bkey_i_to_s(struct bkey_i *k)
+{
+       return (struct bkey_s) { .k = &k->k, .v = &k->v };
+}
+
+static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k)
+{
+       return (struct bkey_s_c) { .k = &k->k, .v = &k->v };
+}
+
+/*
+ * For a given type of value (e.g. struct bch_extent), generates the types for
+ * bkey + bch_extent - inline, split, split const - and also all the conversion
+ * functions, which also check that the value is of the correct type.
+ *
+ * We use anonymous unions for upcasting - e.g. converting from e.g. a
+ * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion
+ * functions.
+ */
+#define BKEY_VAL_ACCESSORS(name)                                       \
+struct bkey_i_##name {                                                 \
+       union {                                                         \
+               struct bkey             k;                              \
+               struct bkey_i           k_i;                            \
+       };                                                              \
+       struct bch_##name               v;                              \
+};                                                                     \
+                                                                       \
+struct bkey_s_c_##name {                                               \
+       union {                                                         \
+       struct {                                                        \
+               const struct bkey       *k;                             \
+               const struct bch_##name *v;                             \
+       };                                                              \
+       struct bkey_s_c                 s_c;                            \
+       };                                                              \
+};                                                                     \
+                                                                       \
+struct bkey_s_##name {                                                 \
+       union {                                                         \
+       struct {                                                        \
+               struct bkey             *k;                             \
+               struct bch_##name       *v;                             \
+       };                                                              \
+       struct bkey_s_c_##name          c;                              \
+       struct bkey_s                   s;                              \
+       struct bkey_s_c                 s_c;                            \
+       };                                                              \
+};                                                                     \
+                                                                       \
+static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k) \
+{                                                                      \
+       EBUG_ON(k->k.type != KEY_TYPE_##name);                          \
+       return container_of(&k->k, struct bkey_i_##name, k);            \
+}                                                                      \
+                                                                       \
+static inline const struct bkey_i_##name *                             \
+bkey_i_to_##name##_c(const struct bkey_i *k)                           \
+{                                                                      \
+       EBUG_ON(k->k.type != KEY_TYPE_##name);                          \
+       return container_of(&k->k, struct bkey_i_##name, k);            \
+}                                                                      \
+                                                                       \
+static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k)   \
+{                                                                      \
+       EBUG_ON(k.k->type != KEY_TYPE_##name);                          \
+       return (struct bkey_s_##name) {                                 \
+               .k = k.k,                                               \
+               .v = container_of(k.v, struct bch_##name, v),           \
+       };                                                              \
+}                                                                      \
+                                                                       \
+static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\
+{                                                                      \
+       EBUG_ON(k.k->type != KEY_TYPE_##name);                          \
+       return (struct bkey_s_c_##name) {                               \
+               .k = k.k,                                               \
+               .v = container_of(k.v, struct bch_##name, v),           \
+       };                                                              \
+}                                                                      \
+                                                                       \
+static inline struct bkey_s_##name name##_i_to_s(struct bkey_i_##name *k)\
+{                                                                      \
+       return (struct bkey_s_##name) {                                 \
+               .k = &k->k,                                             \
+               .v = &k->v,                                             \
+       };                                                              \
+}                                                                      \
+                                                                       \
+static inline struct bkey_s_c_##name                                   \
+name##_i_to_s_c(const struct bkey_i_##name *k)                         \
+{                                                                      \
+       return (struct bkey_s_c_##name) {                               \
+               .k = &k->k,                                             \
+               .v = &k->v,                                             \
+       };                                                              \
+}                                                                      \
+                                                                       \
+static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k)        \
+{                                                                      \
+       EBUG_ON(k->k.type != KEY_TYPE_##name);                          \
+       return (struct bkey_s_##name) {                                 \
+               .k = &k->k,                                             \
+               .v = container_of(&k->v, struct bch_##name, v),         \
+       };                                                              \
+}                                                                      \
+                                                                       \
+static inline struct bkey_s_c_##name                                   \
+bkey_i_to_s_c_##name(const struct bkey_i *k)                           \
+{                                                                      \
+       EBUG_ON(k->k.type != KEY_TYPE_##name);                          \
+       return (struct bkey_s_c_##name) {                               \
+               .k = &k->k,                                             \
+               .v = container_of(&k->v, struct bch_##name, v),         \
+       };                                                              \
+}                                                                      \
+                                                                       \
+static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
+{                                                                      \
+       struct bkey_i_##name *k =                                       \
+               container_of(&_k->k, struct bkey_i_##name, k);          \
+                                                                       \
+       bkey_init(&k->k);                                               \
+       memset(&k->v, 0, sizeof(k->v));                                 \
+       k->k.type = KEY_TYPE_##name;                                    \
+       set_bkey_val_bytes(&k->k, sizeof(k->v));                        \
+                                                                       \
+       return k;                                                       \
+}
+
+BKEY_VAL_ACCESSORS(cookie);
+BKEY_VAL_ACCESSORS(btree_ptr);
+BKEY_VAL_ACCESSORS(extent);
+BKEY_VAL_ACCESSORS(reservation);
+BKEY_VAL_ACCESSORS(inode);
+BKEY_VAL_ACCESSORS(inode_generation);
+BKEY_VAL_ACCESSORS(dirent);
+BKEY_VAL_ACCESSORS(xattr);
+BKEY_VAL_ACCESSORS(alloc);
+BKEY_VAL_ACCESSORS(quota);
+BKEY_VAL_ACCESSORS(stripe);
+BKEY_VAL_ACCESSORS(reflink_p);
+BKEY_VAL_ACCESSORS(reflink_v);
+BKEY_VAL_ACCESSORS(inline_data);
+BKEY_VAL_ACCESSORS(btree_ptr_v2);
+
+/* byte order helpers */
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+static inline unsigned high_word_offset(const struct bkey_format *f)
+{
+       return f->key_u64s - 1;
+}
+
+#define high_bit_offset                0
+#define nth_word(p, n)         ((p) - (n))
+
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+
+static inline unsigned high_word_offset(const struct bkey_format *f)
+{
+       return 0;
+}
+
+#define high_bit_offset                KEY_PACKED_BITS_START
+#define nth_word(p, n)         ((p) + (n))
+
+#else
+#error edit for your odd byteorder.
+#endif
+
+#define high_word(f, k)                ((k)->_data + high_word_offset(f))
+#define next_word(p)           nth_word(p, 1)
+#define prev_word(p)           nth_word(p, -1)
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_bkey_pack_test(void);
+#else
+static inline void bch2_bkey_pack_test(void) {}
+#endif
+
+#endif /* _BCACHEFS_BKEY_H */
diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c
new file mode 100644 (file)
index 0000000..36e0c51
--- /dev/null
@@ -0,0 +1,353 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_methods.h"
+#include "btree_types.h"
+#include "alloc_background.h"
+#include "dirent.h"
+#include "ec.h"
+#include "error.h"
+#include "extents.h"
+#include "inode.h"
+#include "quota.h"
+#include "reflink.h"
+#include "xattr.h"
+
+const char * const bch2_bkey_types[] = {
+#define x(name, nr) #name,
+       BCH_BKEY_TYPES()
+#undef x
+       NULL
+};
+
+static const char *deleted_key_invalid(const struct bch_fs *c,
+                                       struct bkey_s_c k)
+{
+       return NULL;
+}
+
+#define bch2_bkey_ops_deleted (struct bkey_ops) {      \
+       .key_invalid = deleted_key_invalid,             \
+}
+
+#define bch2_bkey_ops_discard (struct bkey_ops) {      \
+       .key_invalid = deleted_key_invalid,             \
+}
+
+static const char *empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       if (bkey_val_bytes(k.k))
+               return "value size should be zero";
+
+       return NULL;
+}
+
+#define bch2_bkey_ops_error (struct bkey_ops) {                \
+       .key_invalid = empty_val_key_invalid,           \
+}
+
+static const char *key_type_cookie_invalid(const struct bch_fs *c,
+                                          struct bkey_s_c k)
+{
+       if (bkey_val_bytes(k.k) != sizeof(struct bch_cookie))
+               return "incorrect value size";
+
+       return NULL;
+}
+
+#define bch2_bkey_ops_cookie (struct bkey_ops) {       \
+       .key_invalid = key_type_cookie_invalid,         \
+}
+
+#define bch2_bkey_ops_whiteout (struct bkey_ops) {     \
+       .key_invalid = empty_val_key_invalid,           \
+}
+
+static const char *key_type_inline_data_invalid(const struct bch_fs *c,
+                                          struct bkey_s_c k)
+{
+       return NULL;
+}
+
+static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
+                                        struct bkey_s_c k)
+{
+       pr_buf(out, "(%zu bytes)", bkey_val_bytes(k.k));
+}
+
+#define bch2_bkey_ops_inline_data (struct bkey_ops) {  \
+       .key_invalid    = key_type_inline_data_invalid, \
+       .val_to_text    = key_type_inline_data_to_text, \
+}
+
+static const struct bkey_ops bch2_bkey_ops[] = {
+#define x(name, nr) [KEY_TYPE_##name]  = bch2_bkey_ops_##name,
+       BCH_BKEY_TYPES()
+#undef x
+};
+
+const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k)
+{
+       if (k.k->type >= KEY_TYPE_MAX)
+               return "invalid type";
+
+       return bch2_bkey_ops[k.k->type].key_invalid(c, k);
+}
+
+const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
+                               enum btree_node_type type)
+{
+       if (k.k->u64s < BKEY_U64s)
+               return "u64s too small";
+
+       if (type == BKEY_TYPE_BTREE &&
+           bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
+               return "value too big";
+
+       if (btree_node_type_is_extents(type)) {
+               if ((k.k->size == 0) != bkey_deleted(k.k))
+                       return "bad size field";
+
+               if (k.k->size > k.k->p.offset)
+                       return "size greater than offset";
+       } else {
+               if (k.k->size)
+                       return "nonzero size field";
+       }
+
+       if (k.k->p.snapshot)
+               return "nonzero snapshot";
+
+       if (type != BKEY_TYPE_BTREE &&
+           !bkey_cmp(k.k->p, POS_MAX))
+               return "POS_MAX key";
+
+       return NULL;
+}
+
+const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
+                             enum btree_node_type type)
+{
+       return __bch2_bkey_invalid(c, k, type) ?:
+               bch2_bkey_val_invalid(c, k);
+}
+
+const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k)
+{
+       if (bkey_cmp(k.k->p, b->data->min_key) < 0)
+               return "key before start of btree node";
+
+       if (bkey_cmp(k.k->p, b->data->max_key) > 0)
+               return "key past end of btree node";
+
+       return NULL;
+}
+
+void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
+{
+       const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
+       const char *invalid;
+
+       BUG_ON(!k.k->u64s);
+
+       invalid = bch2_bkey_invalid(c, k, btree_node_type(b)) ?:
+               bch2_bkey_in_btree_node(b, k);
+       if (invalid) {
+               char buf[160];
+
+               bch2_bkey_val_to_text(&PBUF(buf), c, k);
+               bch2_fs_inconsistent(c, "invalid bkey %s: %s", buf, invalid);
+               return;
+       }
+
+       if (ops->key_debugcheck)
+               ops->key_debugcheck(c, k);
+}
+
+void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
+{
+       if (!bkey_cmp(pos, POS_MIN))
+               pr_buf(out, "POS_MIN");
+       else if (!bkey_cmp(pos, POS_MAX))
+               pr_buf(out, "POS_MAX");
+       else
+               pr_buf(out, "%llu:%llu", pos.inode, pos.offset);
+}
+
+void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
+{
+       if (k) {
+               pr_buf(out, "u64s %u type %s ", k->u64s,
+                      bch2_bkey_types[k->type]);
+
+               bch2_bpos_to_text(out, k->p);
+
+               pr_buf(out, " snap %u len %u ver %llu",
+                      k->p.snapshot, k->size, k->version.lo);
+       } else {
+               pr_buf(out, "(null)");
+       }
+}
+
+void bch2_val_to_text(struct printbuf *out, struct bch_fs *c,
+                     struct bkey_s_c k)
+{
+       const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
+
+       if (likely(ops->val_to_text))
+               ops->val_to_text(out, c, k);
+}
+
+void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
+                          struct bkey_s_c k)
+{
+       bch2_bkey_to_text(out, k.k);
+
+       if (k.k) {
+               pr_buf(out, ": ");
+               bch2_val_to_text(out, c, k);
+       }
+}
+
+void bch2_bkey_swab_val(struct bkey_s k)
+{
+       const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
+
+       if (ops->swab)
+               ops->swab(k);
+}
+
+bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
+{
+       const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type];
+
+       return ops->key_normalize
+               ? ops->key_normalize(c, k)
+               : false;
+}
+
+enum merge_result bch2_bkey_merge(struct bch_fs *c,
+                                 struct bkey_s l, struct bkey_s r)
+{
+       const struct bkey_ops *ops = &bch2_bkey_ops[l.k->type];
+       enum merge_result ret;
+
+       if (key_merging_disabled(c) ||
+           !ops->key_merge ||
+           l.k->type != r.k->type ||
+           bversion_cmp(l.k->version, r.k->version) ||
+           bkey_cmp(l.k->p, bkey_start_pos(r.k)))
+               return BCH_MERGE_NOMERGE;
+
+       ret = ops->key_merge(c, l, r);
+
+       if (ret != BCH_MERGE_NOMERGE)
+               l.k->needs_whiteout |= r.k->needs_whiteout;
+       return ret;
+}
+
+static const struct old_bkey_type {
+       u8              btree_node_type;
+       u8              old;
+       u8              new;
+} bkey_renumber_table[] = {
+       {BKEY_TYPE_BTREE,       128, KEY_TYPE_btree_ptr         },
+       {BKEY_TYPE_EXTENTS,     128, KEY_TYPE_extent            },
+       {BKEY_TYPE_EXTENTS,     129, KEY_TYPE_extent            },
+       {BKEY_TYPE_EXTENTS,     130, KEY_TYPE_reservation       },
+       {BKEY_TYPE_INODES,      128, KEY_TYPE_inode             },
+       {BKEY_TYPE_INODES,      130, KEY_TYPE_inode_generation  },
+       {BKEY_TYPE_DIRENTS,     128, KEY_TYPE_dirent            },
+       {BKEY_TYPE_DIRENTS,     129, KEY_TYPE_whiteout          },
+       {BKEY_TYPE_XATTRS,      128, KEY_TYPE_xattr             },
+       {BKEY_TYPE_XATTRS,      129, KEY_TYPE_whiteout          },
+       {BKEY_TYPE_ALLOC,       128, KEY_TYPE_alloc             },
+       {BKEY_TYPE_QUOTAS,      128, KEY_TYPE_quota             },
+};
+
+void bch2_bkey_renumber(enum btree_node_type btree_node_type,
+                       struct bkey_packed *k,
+                       int write)
+{
+       const struct old_bkey_type *i;
+
+       for (i = bkey_renumber_table;
+            i < bkey_renumber_table + ARRAY_SIZE(bkey_renumber_table);
+            i++)
+               if (btree_node_type == i->btree_node_type &&
+                   k->type == (write ? i->new : i->old)) {
+                       k->type = write ? i->old : i->new;
+                       break;
+               }
+}
+
+void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
+                       unsigned version, unsigned big_endian,
+                       int write,
+                       struct bkey_format *f,
+                       struct bkey_packed *k)
+{
+       const struct bkey_ops *ops;
+       struct bkey uk;
+       struct bkey_s u;
+       int i;
+
+       /*
+        * Do these operations in reverse order in the write path:
+        */
+
+       for (i = 0; i < 4; i++)
+       switch (!write ? i : 3 - i) {
+       case 0:
+               if (big_endian != CPU_BIG_ENDIAN)
+                       bch2_bkey_swab_key(f, k);
+               break;
+       case 1:
+               if (version < bcachefs_metadata_version_bkey_renumber)
+                       bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write);
+               break;
+       case 2:
+               if (version < bcachefs_metadata_version_inode_btree_change &&
+                   btree_id == BTREE_ID_INODES) {
+                       if (!bkey_packed(k)) {
+                               struct bkey_i *u = packed_to_bkey(k);
+                               swap(u->k.p.inode, u->k.p.offset);
+                       } else if (f->bits_per_field[BKEY_FIELD_INODE] &&
+                                  f->bits_per_field[BKEY_FIELD_OFFSET]) {
+                               struct bkey_format tmp = *f, *in = f, *out = &tmp;
+
+                               swap(tmp.bits_per_field[BKEY_FIELD_INODE],
+                                    tmp.bits_per_field[BKEY_FIELD_OFFSET]);
+                               swap(tmp.field_offset[BKEY_FIELD_INODE],
+                                    tmp.field_offset[BKEY_FIELD_OFFSET]);
+
+                               if (!write)
+                                       swap(in, out);
+
+                               uk = __bch2_bkey_unpack_key(in, k);
+                               swap(uk.p.inode, uk.p.offset);
+                               BUG_ON(!bch2_bkey_pack_key(k, &uk, out));
+                       }
+               }
+               break;
+       case 3:
+               if (!bkey_packed(k)) {
+                       u = bkey_i_to_s(packed_to_bkey(k));
+               } else {
+                       uk = __bch2_bkey_unpack_key(f, k);
+                       u.k = &uk;
+                       u.v = bkeyp_val(f, k);
+               }
+
+               if (big_endian != CPU_BIG_ENDIAN)
+                       bch2_bkey_swab_val(u);
+
+               ops = &bch2_bkey_ops[k->type];
+
+               if (ops->compat)
+                       ops->compat(btree_id, version, big_endian, write, u);
+               break;
+       default:
+               BUG();
+       }
+}
diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h
new file mode 100644 (file)
index 0000000..0bca725
--- /dev/null
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_METHODS_H
+#define _BCACHEFS_BKEY_METHODS_H
+
+#include "bkey.h"
+
+struct bch_fs;
+struct btree;
+struct bkey;
+enum btree_node_type;
+
+extern const char * const bch2_bkey_types[];
+
+enum merge_result {
+       BCH_MERGE_NOMERGE,
+
+       /*
+        * The keys were mergeable, but would have overflowed size - so instead
+        * l was changed to the maximum size, and both keys were modified:
+        */
+       BCH_MERGE_PARTIAL,
+       BCH_MERGE_MERGE,
+};
+
+struct bkey_ops {
+       /* Returns reason for being invalid if invalid, else NULL: */
+       const char *    (*key_invalid)(const struct bch_fs *,
+                                      struct bkey_s_c);
+       void            (*key_debugcheck)(struct bch_fs *, struct bkey_s_c);
+       void            (*val_to_text)(struct printbuf *, struct bch_fs *,
+                                      struct bkey_s_c);
+       void            (*swab)(struct bkey_s);
+       bool            (*key_normalize)(struct bch_fs *, struct bkey_s);
+       enum merge_result (*key_merge)(struct bch_fs *,
+                                      struct bkey_s, struct bkey_s);
+       void            (*compat)(enum btree_id id, unsigned version,
+                                 unsigned big_endian, int write,
+                                 struct bkey_s);
+};
+
+const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c);
+const char *__bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
+                               enum btree_node_type);
+const char *bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
+                             enum btree_node_type);
+const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c);
+
+void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
+
+void bch2_bpos_to_text(struct printbuf *, struct bpos);
+void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
+void bch2_val_to_text(struct printbuf *, struct bch_fs *,
+                     struct bkey_s_c);
+void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *,
+                          struct bkey_s_c);
+
+void bch2_bkey_swab_val(struct bkey_s);
+
+bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
+
+enum merge_result bch2_bkey_merge(struct bch_fs *,
+                                 struct bkey_s, struct bkey_s);
+
+void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
+
+void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned,
+                       int, struct bkey_format *, struct bkey_packed *);
+
+static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id,
+                              unsigned version, unsigned big_endian,
+                              int write,
+                              struct bkey_format *f,
+                              struct bkey_packed *k)
+{
+       if (version < bcachefs_metadata_version_current ||
+           big_endian != CPU_BIG_ENDIAN)
+               __bch2_bkey_compat(level, btree_id, version,
+                                  big_endian, write, f, k);
+
+}
+
+#endif /* _BCACHEFS_BKEY_METHODS_H */
diff --git a/libbcachefs/bkey_on_stack.h b/libbcachefs/bkey_on_stack.h
new file mode 100644 (file)
index 0000000..f607a0c
--- /dev/null
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_ON_STACK_H
+#define _BCACHEFS_BKEY_ON_STACK_H
+
+#include "bcachefs.h"
+
+struct bkey_on_stack {
+       struct bkey_i   *k;
+       u64             onstack[12];
+};
+
+static inline void bkey_on_stack_realloc(struct bkey_on_stack *s,
+                                        struct bch_fs *c, unsigned u64s)
+{
+       if (s->k == (void *) s->onstack &&
+           u64s > ARRAY_SIZE(s->onstack)) {
+               s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
+               memcpy(s->k, s->onstack, sizeof(s->onstack));
+       }
+}
+
+static inline void bkey_on_stack_reassemble(struct bkey_on_stack *s,
+                                           struct bch_fs *c,
+                                           struct bkey_s_c k)
+{
+       bkey_on_stack_realloc(s, c, k.k->u64s);
+       bkey_reassemble(s->k, k);
+}
+
+static inline void bkey_on_stack_init(struct bkey_on_stack *s)
+{
+       s->k = (void *) s->onstack;
+}
+
+static inline void bkey_on_stack_exit(struct bkey_on_stack *s,
+                                     struct bch_fs *c)
+{
+       if (s->k != (void *) s->onstack)
+               mempool_free(s->k, &c->large_bkey_pool);
+       s->k = NULL;
+}
+
+#endif /* _BCACHEFS_BKEY_ON_STACK_H */
diff --git a/libbcachefs/bkey_sort.c b/libbcachefs/bkey_sort.c
new file mode 100644 (file)
index 0000000..839e78d
--- /dev/null
@@ -0,0 +1,515 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "bkey_on_stack.h"
+#include "bkey_sort.h"
+#include "bset.h"
+#include "extents.h"
+
+typedef int (*sort_cmp_fn)(struct btree *,
+                          struct bkey_packed *,
+                          struct bkey_packed *);
+
+static inline bool sort_iter_end(struct sort_iter *iter)
+{
+       return !iter->used;
+}
+
+static inline void __sort_iter_sift(struct sort_iter *iter,
+                                   unsigned from,
+                                   sort_cmp_fn cmp)
+{
+       unsigned i;
+
+       for (i = from;
+            i + 1 < iter->used &&
+            cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0;
+            i++)
+               swap(iter->data[i], iter->data[i + 1]);
+}
+
+static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp)
+{
+
+       __sort_iter_sift(iter, 0, cmp);
+}
+
+static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
+{
+       unsigned i = iter->used;
+
+       while (i--)
+               __sort_iter_sift(iter, i, cmp);
+}
+
+static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
+{
+       return !sort_iter_end(iter) ? iter->data->k : NULL;
+}
+
+static inline void __sort_iter_advance(struct sort_iter *iter,
+                                      unsigned idx, sort_cmp_fn cmp)
+{
+       struct sort_iter_set *i = iter->data + idx;
+
+       BUG_ON(idx >= iter->used);
+
+       i->k = bkey_next_skip_noops(i->k, i->end);
+
+       BUG_ON(i->k > i->end);
+
+       if (i->k == i->end)
+               array_remove_item(iter->data, iter->used, idx);
+       else
+               __sort_iter_sift(iter, idx, cmp);
+}
+
+static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
+{
+       __sort_iter_advance(iter, 0, cmp);
+}
+
+static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
+                                                sort_cmp_fn cmp)
+{
+       struct bkey_packed *ret = sort_iter_peek(iter);
+
+       if (ret)
+               sort_iter_advance(iter, cmp);
+
+       return ret;
+}
+
+/*
+ * If keys compare equal, compare by pointer order:
+ */
+static inline int key_sort_fix_overlapping_cmp(struct btree *b,
+                                              struct bkey_packed *l,
+                                              struct bkey_packed *r)
+{
+       return bkey_cmp_packed(b, l, r) ?:
+               cmp_int((unsigned long) l, (unsigned long) r);
+}
+
+static inline bool should_drop_next_key(struct sort_iter *iter)
+{
+       /*
+        * key_sort_cmp() ensures that when keys compare equal the older key
+        * comes first; so if l->k compares equal to r->k then l->k is older
+        * and should be dropped.
+        */
+       return iter->used >= 2 &&
+               !bkey_cmp_packed(iter->b,
+                                iter->data[0].k,
+                                iter->data[1].k);
+}
+
+struct btree_nr_keys
+bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
+                             struct sort_iter *iter)
+{
+       struct bkey_packed *out = dst->start;
+       struct bkey_packed *k;
+       struct btree_nr_keys nr;
+
+       memset(&nr, 0, sizeof(nr));
+
+       sort_iter_sort(iter, key_sort_fix_overlapping_cmp);
+
+       while ((k = sort_iter_peek(iter))) {
+               if (!bkey_whiteout(k) &&
+                   !should_drop_next_key(iter)) {
+                       bkey_copy(out, k);
+                       btree_keys_account_key_add(&nr, 0, out);
+                       out = bkey_next(out);
+               }
+
+               sort_iter_advance(iter, key_sort_fix_overlapping_cmp);
+       }
+
+       dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
+       return nr;
+}
+
+static void extent_sort_append(struct bch_fs *c,
+                              struct bkey_format *f,
+                              struct btree_nr_keys *nr,
+                              struct bkey_packed **out,
+                              struct bkey_s k)
+{
+       if (!bkey_whiteout(k.k)) {
+               if (!bch2_bkey_pack_key(*out, k.k, f))
+                       memcpy_u64s_small(*out, k.k, BKEY_U64s);
+
+               memcpy_u64s_small(bkeyp_val(f, *out), k.v, bkey_val_u64s(k.k));
+
+               btree_keys_account_key_add(nr, 0, *out);
+               *out = bkey_next(*out);
+       }
+}
+
+/* Sort + repack in a new format: */
+struct btree_nr_keys
+bch2_sort_repack(struct bset *dst, struct btree *src,
+                struct btree_node_iter *src_iter,
+                struct bkey_format *out_f,
+                bool filter_whiteouts)
+{
+       struct bkey_format *in_f = &src->format;
+       struct bkey_packed *in, *out = vstruct_last(dst);
+       struct btree_nr_keys nr;
+
+       memset(&nr, 0, sizeof(nr));
+
+       while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
+               if (filter_whiteouts && bkey_whiteout(in))
+                       continue;
+
+               if (bch2_bkey_transform(out_f, out, bkey_packed(in)
+                                      ? in_f : &bch2_bkey_format_current, in))
+                       out->format = KEY_FORMAT_LOCAL_BTREE;
+               else
+                       bch2_bkey_unpack(src, (void *) out, in);
+
+               btree_keys_account_key_add(&nr, 0, out);
+               out = bkey_next(out);
+       }
+
+       dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
+       return nr;
+}
+
+/* Sort, repack, and call bch2_bkey_normalize() to drop stale pointers: */
+struct btree_nr_keys
+bch2_sort_repack_merge(struct bch_fs *c,
+                      struct bset *dst, struct btree *src,
+                      struct btree_node_iter *iter,
+                      struct bkey_format *out_f,
+                      bool filter_whiteouts)
+{
+       struct bkey_packed *out = vstruct_last(dst), *k_packed;
+       struct bkey_on_stack k;
+       struct btree_nr_keys nr;
+
+       memset(&nr, 0, sizeof(nr));
+       bkey_on_stack_init(&k);
+
+       while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
+               if (filter_whiteouts && bkey_whiteout(k_packed))
+                       continue;
+
+               /*
+                * NOTE:
+                * bch2_bkey_normalize may modify the key we pass it (dropping
+                * stale pointers) and we don't have a write lock on the src
+                * node; we have to make a copy of the entire key before calling
+                * normalize
+                */
+               bkey_on_stack_realloc(&k, c, k_packed->u64s + BKEY_U64s);
+               bch2_bkey_unpack(src, k.k, k_packed);
+
+               if (filter_whiteouts &&
+                   bch2_bkey_normalize(c, bkey_i_to_s(k.k)))
+                       continue;
+
+               extent_sort_append(c, out_f, &nr, &out, bkey_i_to_s(k.k));
+       }
+
+       dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
+       bkey_on_stack_exit(&k, c);
+       return nr;
+}
+
+static inline int sort_keys_cmp(struct btree *b,
+                               struct bkey_packed *l,
+                               struct bkey_packed *r)
+{
+       return bkey_cmp_packed(b, l, r) ?:
+               (int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
+               (int) l->needs_whiteout - (int) r->needs_whiteout;
+}
+
+unsigned bch2_sort_keys(struct bkey_packed *dst,
+                       struct sort_iter *iter,
+                       bool filter_whiteouts)
+{
+       const struct bkey_format *f = &iter->b->format;
+       struct bkey_packed *in, *next, *out = dst;
+
+       sort_iter_sort(iter, sort_keys_cmp);
+
+       while ((in = sort_iter_next(iter, sort_keys_cmp))) {
+               bool needs_whiteout = false;
+
+               if (bkey_whiteout(in) &&
+                   (filter_whiteouts || !in->needs_whiteout))
+                       continue;
+
+               while ((next = sort_iter_peek(iter)) &&
+                      !bkey_cmp_packed(iter->b, in, next)) {
+                       BUG_ON(in->needs_whiteout &&
+                              next->needs_whiteout);
+                       needs_whiteout |= in->needs_whiteout;
+                       in = sort_iter_next(iter, sort_keys_cmp);
+               }
+
+               if (bkey_whiteout(in)) {
+                       memcpy_u64s(out, in, bkeyp_key_u64s(f, in));
+                       set_bkeyp_val_u64s(f, out, 0);
+               } else {
+                       bkey_copy(out, in);
+               }
+               out->needs_whiteout |= needs_whiteout;
+               out = bkey_next(out);
+       }
+
+       return (u64 *) out - (u64 *) dst;
+}
+
+/* Compat code for btree_node_old_extent_overwrite: */
+
+/*
+ * If keys compare equal, compare by pointer order:
+ *
+ * Necessary for sort_fix_overlapping() - if there are multiple keys that
+ * compare equal in different sets, we have to process them newest to oldest.
+ */
+static inline int extent_sort_fix_overlapping_cmp(struct btree *b,
+                                                 struct bkey_packed *l,
+                                                 struct bkey_packed *r)
+{
+       struct bkey ul = bkey_unpack_key(b, l);
+       struct bkey ur = bkey_unpack_key(b, r);
+
+       return bkey_cmp(bkey_start_pos(&ul),
+                       bkey_start_pos(&ur)) ?:
+               cmp_int((unsigned long) r, (unsigned long) l);
+}
+
+/*
+ * The algorithm in extent_sort_fix_overlapping() relies on keys in the same
+ * bset being ordered by start offset - but 0 size whiteouts (which are always
+ * KEY_TYPE_deleted) break this ordering, so we need to skip over them:
+ */
+static void extent_iter_advance(struct sort_iter *iter, unsigned idx)
+{
+       struct sort_iter_set *i = iter->data + idx;
+
+       do {
+               i->k = bkey_next_skip_noops(i->k, i->end);
+       } while (i->k != i->end && bkey_deleted(i->k));
+
+       if (i->k == i->end)
+               array_remove_item(iter->data, iter->used, idx);
+       else
+               __sort_iter_sift(iter, idx, extent_sort_fix_overlapping_cmp);
+}
+
+struct btree_nr_keys
+bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
+                                struct sort_iter *iter)
+{
+       struct btree *b = iter->b;
+       struct bkey_format *f = &b->format;
+       struct sort_iter_set *_l = iter->data, *_r = iter->data + 1;
+       struct bkey_packed *out = dst->start;
+       struct bkey l_unpacked, r_unpacked;
+       struct bkey_s l, r;
+       struct btree_nr_keys nr;
+       struct bkey_on_stack split;
+       unsigned i;
+
+       memset(&nr, 0, sizeof(nr));
+       bkey_on_stack_init(&split);
+
+       sort_iter_sort(iter, extent_sort_fix_overlapping_cmp);
+       for (i = 0; i < iter->used;) {
+               if (bkey_deleted(iter->data[i].k))
+                       __sort_iter_advance(iter, i,
+                                           extent_sort_fix_overlapping_cmp);
+               else
+                       i++;
+       }
+
+       while (!sort_iter_end(iter)) {
+               l = __bkey_disassemble(b, _l->k, &l_unpacked);
+
+               if (iter->used == 1) {
+                       extent_sort_append(c, f, &nr, &out, l);
+                       extent_iter_advance(iter, 0);
+                       continue;
+               }
+
+               r = __bkey_disassemble(b, _r->k, &r_unpacked);
+
+               /* If current key and next key don't overlap, just append */
+               if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
+                       extent_sort_append(c, f, &nr, &out, l);
+                       extent_iter_advance(iter, 0);
+                       continue;
+               }
+
+               /* Skip 0 size keys */
+               if (!r.k->size) {
+                       extent_iter_advance(iter, 1);
+                       continue;
+               }
+
+               /*
+                * overlap: keep the newer key and trim the older key so they
+                * don't overlap. comparing pointers tells us which one is
+                * newer, since the bsets are appended one after the other.
+                */
+
+               /* can't happen because of comparison func */
+               BUG_ON(_l->k < _r->k &&
+                      !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k)));
+
+               if (_l->k > _r->k) {
+                       /* l wins, trim r */
+                       if (bkey_cmp(l.k->p, r.k->p) >= 0) {
+                               extent_iter_advance(iter, 1);
+                       } else {
+                               bch2_cut_front_s(l.k->p, r);
+                               extent_save(b, _r->k, r.k);
+                               __sort_iter_sift(iter, 1,
+                                        extent_sort_fix_overlapping_cmp);
+                       }
+               } else if (bkey_cmp(l.k->p, r.k->p) > 0) {
+
+                       /*
+                        * r wins, but it overlaps in the middle of l - split l:
+                        */
+                       bkey_on_stack_reassemble(&split, c, l.s_c);
+                       bch2_cut_back(bkey_start_pos(r.k), split.k);
+
+                       bch2_cut_front_s(r.k->p, l);
+                       extent_save(b, _l->k, l.k);
+
+                       __sort_iter_sift(iter, 0,
+                                        extent_sort_fix_overlapping_cmp);
+
+                       extent_sort_append(c, f, &nr, &out,
+                                          bkey_i_to_s(split.k));
+               } else {
+                       bch2_cut_back_s(bkey_start_pos(r.k), l);
+                       extent_save(b, _l->k, l.k);
+               }
+       }
+
+       dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
+
+       bkey_on_stack_exit(&split, c);
+       return nr;
+}
+
+static inline int sort_extents_cmp(struct btree *b,
+                                  struct bkey_packed *l,
+                                  struct bkey_packed *r)
+{
+       return bkey_cmp_packed(b, l, r) ?:
+               (int) bkey_deleted(l) - (int) bkey_deleted(r);
+}
+
+unsigned bch2_sort_extents(struct bkey_packed *dst,
+                          struct sort_iter *iter,
+                          bool filter_whiteouts)
+{
+       struct bkey_packed *in, *out = dst;
+
+       sort_iter_sort(iter, sort_extents_cmp);
+
+       while ((in = sort_iter_next(iter, sort_extents_cmp))) {
+               if (bkey_deleted(in))
+                       continue;
+
+               if (bkey_whiteout(in) &&
+                   (filter_whiteouts || !in->needs_whiteout))
+                       continue;
+
+               bkey_copy(out, in);
+               out = bkey_next(out);
+       }
+
+       return (u64 *) out - (u64 *) dst;
+}
+
+static inline int sort_extent_whiteouts_cmp(struct btree *b,
+                                           struct bkey_packed *l,
+                                           struct bkey_packed *r)
+{
+       struct bkey ul = bkey_unpack_key(b, l);
+       struct bkey ur = bkey_unpack_key(b, r);
+
+       return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur));
+}
+
+unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst,
+                                   struct sort_iter *iter)
+{
+       const struct bkey_format *f = &iter->b->format;
+       struct bkey_packed *in, *out = dst;
+       struct bkey_i l, r;
+       bool prev = false, l_packed = false;
+       u64 max_packed_size     = bkey_field_max(f, BKEY_FIELD_SIZE);
+       u64 max_packed_offset   = bkey_field_max(f, BKEY_FIELD_OFFSET);
+       u64 new_size;
+
+       max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX);
+
+       sort_iter_sort(iter, sort_extent_whiteouts_cmp);
+
+       while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) {
+               if (bkey_deleted(in))
+                       continue;
+
+               EBUG_ON(bkeyp_val_u64s(f, in));
+               EBUG_ON(in->type != KEY_TYPE_discard);
+
+               r.k = bkey_unpack_key(iter->b, in);
+
+               if (prev &&
+                   bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) {
+                       if (bkey_cmp(l.k.p, r.k.p) >= 0)
+                               continue;
+
+                       new_size = l_packed
+                               ? min(max_packed_size, max_packed_offset -
+                                     bkey_start_offset(&l.k))
+                               : KEY_SIZE_MAX;
+
+                       new_size = min(new_size, r.k.p.offset -
+                                      bkey_start_offset(&l.k));
+
+                       BUG_ON(new_size < l.k.size);
+
+                       bch2_key_resize(&l.k, new_size);
+
+                       if (bkey_cmp(l.k.p, r.k.p) >= 0)
+                               continue;
+
+                       bch2_cut_front(l.k.p, &r);
+               }
+
+               if (prev) {
+                       if (!bch2_bkey_pack(out, &l, f)) {
+                               BUG_ON(l_packed);
+                               bkey_copy(out, &l);
+                       }
+                       out = bkey_next(out);
+               }
+
+               l = r;
+               prev = true;
+               l_packed = bkey_packed(in);
+       }
+
+       if (prev) {
+               if (!bch2_bkey_pack(out, &l, f)) {
+                       BUG_ON(l_packed);
+                       bkey_copy(out, &l);
+               }
+               out = bkey_next(out);
+       }
+
+       return (u64 *) out - (u64 *) dst;
+}
diff --git a/libbcachefs/bkey_sort.h b/libbcachefs/bkey_sort.h
new file mode 100644 (file)
index 0000000..458a051
--- /dev/null
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_SORT_H
+#define _BCACHEFS_BKEY_SORT_H
+
+struct sort_iter {
+       struct btree            *b;
+       unsigned                used;
+       unsigned                size;
+
+       struct sort_iter_set {
+               struct bkey_packed *k, *end;
+       } data[MAX_BSETS + 1];
+};
+
+static inline void sort_iter_init(struct sort_iter *iter, struct btree *b)
+{
+       iter->b = b;
+       iter->used = 0;
+       iter->size = ARRAY_SIZE(iter->data);
+}
+
+static inline void sort_iter_add(struct sort_iter *iter,
+                                struct bkey_packed *k,
+                                struct bkey_packed *end)
+{
+       BUG_ON(iter->used >= iter->size);
+
+       if (k != end)
+               iter->data[iter->used++] = (struct sort_iter_set) { k, end };
+}
+
+struct btree_nr_keys
+bch2_key_sort_fix_overlapping(struct bch_fs *, struct bset *,
+                             struct sort_iter *);
+struct btree_nr_keys
+bch2_extent_sort_fix_overlapping(struct bch_fs *, struct bset *,
+                                struct sort_iter *);
+
+struct btree_nr_keys
+bch2_sort_repack(struct bset *, struct btree *,
+                struct btree_node_iter *,
+                struct bkey_format *, bool);
+struct btree_nr_keys
+bch2_sort_repack_merge(struct bch_fs *,
+                      struct bset *, struct btree *,
+                      struct btree_node_iter *,
+                      struct bkey_format *, bool);
+
+unsigned bch2_sort_keys(struct bkey_packed *,
+                       struct sort_iter *, bool);
+unsigned bch2_sort_extents(struct bkey_packed *,
+                          struct sort_iter *, bool);
+
+unsigned bch2_sort_extent_whiteouts(struct bkey_packed *,
+                                   struct sort_iter *);
+
+#endif /* _BCACHEFS_BKEY_SORT_H */
diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c
new file mode 100644 (file)
index 0000000..f7c2841
--- /dev/null
@@ -0,0 +1,1742 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Code for working with individual keys, and sorted sets of keys with in a
+ * btree node
+ *
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "btree_cache.h"
+#include "bset.h"
+#include "eytzinger.h"
+#include "util.h"
+
+#include <asm/unaligned.h>
+#include <linux/console.h>
+#include <linux/random.h>
+#include <linux/prefetch.h>
+
+/* hack.. */
+#include "alloc_types.h"
+#include <trace/events/bcachefs.h>
+
+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *,
+                                                 struct btree *);
+
+static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
+{
+       unsigned n = ARRAY_SIZE(iter->data);
+
+       while (n && __btree_node_iter_set_end(iter, n - 1))
+               --n;
+
+       return n;
+}
+
+struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k)
+{
+       unsigned offset = __btree_node_key_to_offset(b, k);
+       struct bset_tree *t;
+
+       for_each_bset(b, t)
+               if (offset <= t->end_offset) {
+                       EBUG_ON(offset < btree_bkey_first_offset(t));
+                       return t;
+               }
+
+       BUG();
+}
+
+/*
+ * There are never duplicate live keys in the btree - but including keys that
+ * have been flagged as deleted (and will be cleaned up later) we _will_ see
+ * duplicates.
+ *
+ * Thus the sort order is: usual key comparison first, but for keys that compare
+ * equal the deleted key(s) come first, and the (at most one) live version comes
+ * last.
+ *
+ * The main reason for this is insertion: to handle overwrites, we first iterate
+ * over keys that compare equal to our insert key, and then insert immediately
+ * prior to the first key greater than the key we're inserting - our insert
+ * position will be after all keys that compare equal to our insert key, which
+ * by the time we actually do the insert will all be deleted.
+ */
+
+void bch2_dump_bset(struct bch_fs *c, struct btree *b,
+                   struct bset *i, unsigned set)
+{
+       struct bkey_packed *_k, *_n;
+       struct bkey uk, n;
+       struct bkey_s_c k;
+       char buf[200];
+
+       if (!i->u64s)
+               return;
+
+       for (_k = i->start;
+            _k < vstruct_last(i);
+            _k = _n) {
+               _n = bkey_next_skip_noops(_k, vstruct_last(i));
+
+               k = bkey_disassemble(b, _k, &uk);
+               if (c)
+                       bch2_bkey_val_to_text(&PBUF(buf), c, k);
+               else
+                       bch2_bkey_to_text(&PBUF(buf), k.k);
+               printk(KERN_ERR "block %u key %5zu: %s\n", set,
+                      _k->_data - i->_data, buf);
+
+               if (_n == vstruct_last(i))
+                       continue;
+
+               n = bkey_unpack_key(b, _n);
+
+               if (bkey_cmp(bkey_start_pos(&n), k.k->p) < 0) {
+                       printk(KERN_ERR "Key skipped backwards\n");
+                       continue;
+               }
+
+               if (!bkey_deleted(k.k) &&
+                   !bkey_cmp(n.p, k.k->p))
+                       printk(KERN_ERR "Duplicate keys\n");
+       }
+}
+
+void bch2_dump_btree_node(struct bch_fs *c, struct btree *b)
+{
+       struct bset_tree *t;
+
+       console_lock();
+       for_each_bset(b, t)
+               bch2_dump_bset(c, b, bset(b, t), t - b->set);
+       console_unlock();
+}
+
+void bch2_dump_btree_node_iter(struct btree *b,
+                             struct btree_node_iter *iter)
+{
+       struct btree_node_iter_set *set;
+
+       printk(KERN_ERR "btree node iter with %u/%u sets:\n",
+              __btree_node_iter_used(iter), b->nsets);
+
+       btree_node_iter_for_each(iter, set) {
+               struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
+               struct bset_tree *t = bch2_bkey_to_bset(b, k);
+               struct bkey uk = bkey_unpack_key(b, k);
+               char buf[100];
+
+               bch2_bkey_to_text(&PBUF(buf), &uk);
+               printk(KERN_ERR "set %zu key %u: %s\n",
+                      t - b->set, set->k, buf);
+       }
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+void __bch2_verify_btree_nr_keys(struct btree *b)
+{
+       struct bset_tree *t;
+       struct bkey_packed *k;
+       struct btree_nr_keys nr = { 0 };
+
+       for_each_bset(b, t)
+               bset_tree_for_each_key(b, t, k)
+                       if (!bkey_whiteout(k))
+                               btree_keys_account_key_add(&nr, t - b->set, k);
+
+       BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
+}
+
+static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
+                                           struct btree *b)
+{
+       struct btree_node_iter iter = *_iter;
+       const struct bkey_packed *k, *n;
+
+       k = bch2_btree_node_iter_peek_all(&iter, b);
+       __bch2_btree_node_iter_advance(&iter, b);
+       n = bch2_btree_node_iter_peek_all(&iter, b);
+
+       bkey_unpack_key(b, k);
+
+       if (n &&
+           bkey_iter_cmp(b, k, n) > 0) {
+               struct btree_node_iter_set *set;
+               struct bkey ku = bkey_unpack_key(b, k);
+               struct bkey nu = bkey_unpack_key(b, n);
+               char buf1[80], buf2[80];
+
+               bch2_dump_btree_node(NULL, b);
+               bch2_bkey_to_text(&PBUF(buf1), &ku);
+               bch2_bkey_to_text(&PBUF(buf2), &nu);
+               printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n",
+                      buf1, buf2);
+               printk(KERN_ERR "iter was:");
+
+               btree_node_iter_for_each(_iter, set) {
+                       struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
+                       struct bset_tree *t = bch2_bkey_to_bset(b, k);
+                       printk(" [%zi %zi]", t - b->set,
+                              k->_data - bset(b, t)->_data);
+               }
+               panic("\n");
+       }
+}
+
+void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
+                                struct btree *b)
+{
+       struct btree_node_iter_set *set, *s2;
+       struct bkey_packed *k, *p;
+       struct bset_tree *t;
+
+       if (bch2_btree_node_iter_end(iter))
+               return;
+
+       /* Verify no duplicates: */
+       btree_node_iter_for_each(iter, set)
+               btree_node_iter_for_each(iter, s2)
+                       BUG_ON(set != s2 && set->end == s2->end);
+
+       /* Verify that set->end is correct: */
+       btree_node_iter_for_each(iter, set) {
+               for_each_bset(b, t)
+                       if (set->end == t->end_offset)
+                               goto found;
+               BUG();
+found:
+               BUG_ON(set->k < btree_bkey_first_offset(t) ||
+                      set->k >= t->end_offset);
+       }
+
+       /* Verify iterator is sorted: */
+       btree_node_iter_for_each(iter, set)
+               BUG_ON(set != iter->data &&
+                      btree_node_iter_cmp(b, set[-1], set[0]) > 0);
+
+       k = bch2_btree_node_iter_peek_all(iter, b);
+
+       for_each_bset(b, t) {
+               if (iter->data[0].end == t->end_offset)
+                       continue;
+
+               p = bch2_bkey_prev_all(b, t,
+                       bch2_btree_node_iter_bset_pos(iter, b, t));
+
+               BUG_ON(p && bkey_iter_cmp(b, k, p) < 0);
+       }
+}
+
+void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
+                           struct bkey_packed *insert, unsigned clobber_u64s)
+{
+       struct bset_tree *t = bch2_bkey_to_bset(b, where);
+       struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
+       struct bkey_packed *next = (void *) (where->_data + clobber_u64s);
+#if 0
+       BUG_ON(prev &&
+              bkey_iter_cmp(b, prev, insert) > 0);
+#else
+       if (prev &&
+           bkey_iter_cmp(b, prev, insert) > 0) {
+               struct bkey k1 = bkey_unpack_key(b, prev);
+               struct bkey k2 = bkey_unpack_key(b, insert);
+               char buf1[100];
+               char buf2[100];
+
+               bch2_dump_btree_node(NULL, b);
+               bch2_bkey_to_text(&PBUF(buf1), &k1);
+               bch2_bkey_to_text(&PBUF(buf2), &k2);
+
+               panic("prev > insert:\n"
+                     "prev    key %s\n"
+                     "insert  key %s\n",
+                     buf1, buf2);
+       }
+#endif
+#if 0
+       BUG_ON(next != btree_bkey_last(b, t) &&
+              bkey_iter_cmp(b, insert, next) > 0);
+#else
+       if (next != btree_bkey_last(b, t) &&
+           bkey_iter_cmp(b, insert, next) > 0) {
+               struct bkey k1 = bkey_unpack_key(b, insert);
+               struct bkey k2 = bkey_unpack_key(b, next);
+               char buf1[100];
+               char buf2[100];
+
+               bch2_dump_btree_node(NULL, b);
+               bch2_bkey_to_text(&PBUF(buf1), &k1);
+               bch2_bkey_to_text(&PBUF(buf2), &k2);
+
+               panic("insert > next:\n"
+                     "insert  key %s\n"
+                     "next    key %s\n",
+                     buf1, buf2);
+       }
+#endif
+}
+
+#else
+
+static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
+                                                  struct btree *b) {}
+
+#endif
+
+/* Auxiliary search trees */
+
+#define BFLOAT_FAILED_UNPACKED U8_MAX
+#define BFLOAT_FAILED          U8_MAX
+
+struct bkey_float {
+       u8              exponent;
+       u8              key_offset;
+       u16             mantissa;
+};
+#define BKEY_MANTISSA_BITS     16
+
+static unsigned bkey_float_byte_offset(unsigned idx)
+{
+       return idx * sizeof(struct bkey_float);
+}
+
+struct ro_aux_tree {
+       struct bkey_float       f[0];
+};
+
+struct rw_aux_tree {
+       u16             offset;
+       struct bpos     k;
+};
+
+static unsigned bset_aux_tree_buf_end(const struct bset_tree *t)
+{
+       BUG_ON(t->aux_data_offset == U16_MAX);
+
+       switch (bset_aux_tree_type(t)) {
+       case BSET_NO_AUX_TREE:
+               return t->aux_data_offset;
+       case BSET_RO_AUX_TREE:
+               return t->aux_data_offset +
+                       DIV_ROUND_UP(t->size * sizeof(struct bkey_float) +
+                                    t->size * sizeof(u8), 8);
+       case BSET_RW_AUX_TREE:
+               return t->aux_data_offset +
+                       DIV_ROUND_UP(sizeof(struct rw_aux_tree) * t->size, 8);
+       default:
+               BUG();
+       }
+}
+
+static unsigned bset_aux_tree_buf_start(const struct btree *b,
+                                       const struct bset_tree *t)
+{
+       return t == b->set
+               ? DIV_ROUND_UP(b->unpack_fn_len, 8)
+               : bset_aux_tree_buf_end(t - 1);
+}
+
+static void *__aux_tree_base(const struct btree *b,
+                            const struct bset_tree *t)
+{
+       return b->aux_data + t->aux_data_offset * 8;
+}
+
+static struct ro_aux_tree *ro_aux_tree_base(const struct btree *b,
+                                           const struct bset_tree *t)
+{
+       EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
+
+       return __aux_tree_base(b, t);
+}
+
+static u8 *ro_aux_tree_prev(const struct btree *b,
+                           const struct bset_tree *t)
+{
+       EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
+
+       return __aux_tree_base(b, t) + bkey_float_byte_offset(t->size);
+}
+
+static struct bkey_float *bkey_float(const struct btree *b,
+                                    const struct bset_tree *t,
+                                    unsigned idx)
+{
+       return ro_aux_tree_base(b, t)->f + idx;
+}
+
+static void bset_aux_tree_verify(struct btree *b)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct bset_tree *t;
+
+       for_each_bset(b, t) {
+               if (t->aux_data_offset == U16_MAX)
+                       continue;
+
+               BUG_ON(t != b->set &&
+                      t[-1].aux_data_offset == U16_MAX);
+
+               BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t));
+               BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b));
+               BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b));
+       }
+#endif
+}
+
+void bch2_btree_keys_init(struct btree *b, bool *expensive_debug_checks)
+{
+       unsigned i;
+
+       b->nsets                = 0;
+       memset(&b->nr, 0, sizeof(b->nr));
+#ifdef CONFIG_BCACHEFS_DEBUG
+       b->expensive_debug_checks = expensive_debug_checks;
+#endif
+       for (i = 0; i < MAX_BSETS; i++)
+               b->set[i].data_offset = U16_MAX;
+
+       bch2_bset_set_no_aux_tree(b, b->set);
+}
+
+/* Binary tree stuff for auxiliary search trees */
+
+/*
+ * Cacheline/offset <-> bkey pointer arithmetic:
+ *
+ * t->tree is a binary search tree in an array; each node corresponds to a key
+ * in one cacheline in t->set (BSET_CACHELINE bytes).
+ *
+ * This means we don't have to store the full index of the key that a node in
+ * the binary tree points to; eytzinger1_to_inorder() gives us the cacheline, and
+ * then bkey_float->m gives us the offset within that cacheline, in units of 8
+ * bytes.
+ *
+ * cacheline_to_bkey() and friends abstract out all the pointer arithmetic to
+ * make this work.
+ *
+ * To construct the bfloat for an arbitrary key we need to know what the key
+ * immediately preceding it is: we have to check if the two keys differ in the
+ * bits we're going to store in bkey_float->mantissa. t->prev[j] stores the size
+ * of the previous key so we can walk backwards to it from t->tree[j]'s key.
+ */
+
+static inline void *bset_cacheline(const struct btree *b,
+                                  const struct bset_tree *t,
+                                  unsigned cacheline)
+{
+       return (void *) round_down((unsigned long) btree_bkey_first(b, t),
+                                  L1_CACHE_BYTES) +
+               cacheline * BSET_CACHELINE;
+}
+
+static struct bkey_packed *cacheline_to_bkey(const struct btree *b,
+                                            const struct bset_tree *t,
+                                            unsigned cacheline,
+                                            unsigned offset)
+{
+       return bset_cacheline(b, t, cacheline) + offset * 8;
+}
+
+static unsigned bkey_to_cacheline(const struct btree *b,
+                                 const struct bset_tree *t,
+                                 const struct bkey_packed *k)
+{
+       return ((void *) k - bset_cacheline(b, t, 0)) / BSET_CACHELINE;
+}
+
+static ssize_t __bkey_to_cacheline_offset(const struct btree *b,
+                                         const struct bset_tree *t,
+                                         unsigned cacheline,
+                                         const struct bkey_packed *k)
+{
+       return (u64 *) k - (u64 *) bset_cacheline(b, t, cacheline);
+}
+
+static unsigned bkey_to_cacheline_offset(const struct btree *b,
+                                        const struct bset_tree *t,
+                                        unsigned cacheline,
+                                        const struct bkey_packed *k)
+{
+       size_t m = __bkey_to_cacheline_offset(b, t, cacheline, k);
+
+       EBUG_ON(m > U8_MAX);
+       return m;
+}
+
+static inline struct bkey_packed *tree_to_bkey(const struct btree *b,
+                                              const struct bset_tree *t,
+                                              unsigned j)
+{
+       return cacheline_to_bkey(b, t,
+                       __eytzinger1_to_inorder(j, t->size, t->extra),
+                       bkey_float(b, t, j)->key_offset);
+}
+
+static struct bkey_packed *tree_to_prev_bkey(const struct btree *b,
+                                            const struct bset_tree *t,
+                                            unsigned j)
+{
+       unsigned prev_u64s = ro_aux_tree_prev(b, t)[j];
+
+       return (void *) (tree_to_bkey(b, t, j)->_data - prev_u64s);
+}
+
+static struct rw_aux_tree *rw_aux_tree(const struct btree *b,
+                                      const struct bset_tree *t)
+{
+       EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE);
+
+       return __aux_tree_base(b, t);
+}
+
+/*
+ * For the write set - the one we're currently inserting keys into - we don't
+ * maintain a full search tree, we just keep a simple lookup table in t->prev.
+ */
+static struct bkey_packed *rw_aux_to_bkey(const struct btree *b,
+                                         struct bset_tree *t,
+                                         unsigned j)
+{
+       return __btree_node_offset_to_key(b, rw_aux_tree(b, t)[j].offset);
+}
+
+static void rw_aux_tree_set(const struct btree *b, struct bset_tree *t,
+                           unsigned j, struct bkey_packed *k)
+{
+       EBUG_ON(k >= btree_bkey_last(b, t));
+
+       rw_aux_tree(b, t)[j] = (struct rw_aux_tree) {
+               .offset = __btree_node_key_to_offset(b, k),
+               .k      = bkey_unpack_pos(b, k),
+       };
+}
+
+static void bch2_bset_verify_rw_aux_tree(struct btree *b,
+                                       struct bset_tree *t)
+{
+       struct bkey_packed *k = btree_bkey_first(b, t);
+       unsigned j = 0;
+
+       if (!btree_keys_expensive_checks(b))
+               return;
+
+       BUG_ON(bset_has_ro_aux_tree(t));
+
+       if (!bset_has_rw_aux_tree(t))
+               return;
+
+       BUG_ON(t->size < 1);
+       BUG_ON(rw_aux_to_bkey(b, t, j) != k);
+
+       goto start;
+       while (1) {
+               if (rw_aux_to_bkey(b, t, j) == k) {
+                       BUG_ON(bkey_cmp(rw_aux_tree(b, t)[j].k,
+                                       bkey_unpack_pos(b, k)));
+start:
+                       if (++j == t->size)
+                               break;
+
+                       BUG_ON(rw_aux_tree(b, t)[j].offset <=
+                              rw_aux_tree(b, t)[j - 1].offset);
+               }
+
+               k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
+               BUG_ON(k >= btree_bkey_last(b, t));
+       }
+}
+
+/* returns idx of first entry >= offset: */
+static unsigned rw_aux_tree_bsearch(struct btree *b,
+                                   struct bset_tree *t,
+                                   unsigned offset)
+{
+       unsigned bset_offs = offset - btree_bkey_first_offset(t);
+       unsigned bset_u64s = t->end_offset - btree_bkey_first_offset(t);
+       unsigned idx = bset_u64s ? bset_offs * t->size / bset_u64s : 0;
+
+       EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE);
+       EBUG_ON(!t->size);
+       EBUG_ON(idx > t->size);
+
+       while (idx < t->size &&
+              rw_aux_tree(b, t)[idx].offset < offset)
+               idx++;
+
+       while (idx &&
+              rw_aux_tree(b, t)[idx - 1].offset >= offset)
+               idx--;
+
+       EBUG_ON(idx < t->size &&
+               rw_aux_tree(b, t)[idx].offset < offset);
+       EBUG_ON(idx && rw_aux_tree(b, t)[idx - 1].offset >= offset);
+       EBUG_ON(idx + 1 < t->size &&
+               rw_aux_tree(b, t)[idx].offset ==
+               rw_aux_tree(b, t)[idx + 1].offset);
+
+       return idx;
+}
+
+static inline unsigned bkey_mantissa(const struct bkey_packed *k,
+                                    const struct bkey_float *f,
+                                    unsigned idx)
+{
+       u64 v;
+
+       EBUG_ON(!bkey_packed(k));
+
+       v = get_unaligned((u64 *) (((u8 *) k->_data) + (f->exponent >> 3)));
+
+       /*
+        * In little endian, we're shifting off low bits (and then the bits we
+        * want are at the low end), in big endian we're shifting off high bits
+        * (and then the bits we want are at the high end, so we shift them
+        * back down):
+        */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+       v >>= f->exponent & 7;
+#else
+       v >>= 64 - (f->exponent & 7) - BKEY_MANTISSA_BITS;
+#endif
+       return (u16) v;
+}
+
+static void make_bfloat(struct btree *b, struct bset_tree *t,
+                       unsigned j,
+                       struct bkey_packed *min_key,
+                       struct bkey_packed *max_key)
+{
+       struct bkey_float *f = bkey_float(b, t, j);
+       struct bkey_packed *m = tree_to_bkey(b, t, j);
+       struct bkey_packed *l, *r;
+       unsigned mantissa;
+       int shift, exponent, high_bit;
+
+       if (is_power_of_2(j)) {
+               l = min_key;
+
+               if (!l->u64s) {
+                       if (!bkey_pack_pos(l, b->data->min_key, b)) {
+                               struct bkey_i tmp;
+
+                               bkey_init(&tmp.k);
+                               tmp.k.p = b->data->min_key;
+                               bkey_copy(l, &tmp);
+                       }
+               }
+       } else {
+               l = tree_to_prev_bkey(b, t, j >> ffs(j));
+
+               EBUG_ON(m < l);
+       }
+
+       if (is_power_of_2(j + 1)) {
+               r = max_key;
+
+               if (!r->u64s) {
+                       if (!bkey_pack_pos(r, t->max_key, b)) {
+                               struct bkey_i tmp;
+
+                               bkey_init(&tmp.k);
+                               tmp.k.p = t->max_key;
+                               bkey_copy(r, &tmp);
+                       }
+               }
+       } else {
+               r = tree_to_bkey(b, t, j >> (ffz(j) + 1));
+
+               EBUG_ON(m > r);
+       }
+
+       /*
+        * for failed bfloats, the lookup code falls back to comparing against
+        * the original key.
+        */
+
+       if (!bkey_packed(l) || !bkey_packed(r) || !bkey_packed(m) ||
+           !b->nr_key_bits) {
+               f->exponent = BFLOAT_FAILED_UNPACKED;
+               return;
+       }
+
+       /*
+        * The greatest differing bit of l and r is the first bit we must
+        * include in the bfloat mantissa we're creating in order to do
+        * comparisons - that bit always becomes the high bit of
+        * bfloat->mantissa, and thus the exponent we're calculating here is
+        * the position of what will become the low bit in bfloat->mantissa:
+        *
+        * Note that this may be negative - we may be running off the low end
+        * of the key: we handle this later:
+        */
+       high_bit = max(bch2_bkey_greatest_differing_bit(b, l, r),
+                      min_t(unsigned, BKEY_MANTISSA_BITS, b->nr_key_bits) - 1);
+       exponent = high_bit - (BKEY_MANTISSA_BITS - 1);
+
+       /*
+        * Then we calculate the actual shift value, from the start of the key
+        * (k->_data), to get the key bits starting at exponent:
+        */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+       shift = (int) (b->format.key_u64s * 64 - b->nr_key_bits) + exponent;
+
+       EBUG_ON(shift + BKEY_MANTISSA_BITS > b->format.key_u64s * 64);
+#else
+       shift = high_bit_offset +
+               b->nr_key_bits -
+               exponent -
+               BKEY_MANTISSA_BITS;
+
+       EBUG_ON(shift < KEY_PACKED_BITS_START);
+#endif
+       EBUG_ON(shift < 0 || shift >= BFLOAT_FAILED);
+
+       f->exponent = shift;
+       mantissa = bkey_mantissa(m, f, j);
+
+       /*
+        * If we've got garbage bits, set them to all 1s - it's legal for the
+        * bfloat to compare larger than the original key, but not smaller:
+        */
+       if (exponent < 0)
+               mantissa |= ~(~0U << -exponent);
+
+       f->mantissa = mantissa;
+}
+
+/* bytes remaining - only valid for last bset: */
+static unsigned __bset_tree_capacity(struct btree *b, struct bset_tree *t)
+{
+       bset_aux_tree_verify(b);
+
+       return btree_aux_data_bytes(b) - t->aux_data_offset * sizeof(u64);
+}
+
+static unsigned bset_ro_tree_capacity(struct btree *b, struct bset_tree *t)
+{
+       return __bset_tree_capacity(b, t) /
+               (sizeof(struct bkey_float) + sizeof(u8));
+}
+
+static unsigned bset_rw_tree_capacity(struct btree *b, struct bset_tree *t)
+{
+       return __bset_tree_capacity(b, t) / sizeof(struct rw_aux_tree);
+}
+
+static void __build_rw_aux_tree(struct btree *b, struct bset_tree *t)
+{
+       struct bkey_packed *k;
+
+       t->size = 1;
+       t->extra = BSET_RW_AUX_TREE_VAL;
+       rw_aux_tree(b, t)[0].offset =
+               __btree_node_key_to_offset(b, btree_bkey_first(b, t));
+
+       bset_tree_for_each_key(b, t, k) {
+               if (t->size == bset_rw_tree_capacity(b, t))
+                       break;
+
+               if ((void *) k - (void *) rw_aux_to_bkey(b, t, t->size - 1) >
+                   L1_CACHE_BYTES)
+                       rw_aux_tree_set(b, t, t->size++, k);
+       }
+}
+
+static void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
+{
+       struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t);
+       struct bkey_packed min_key, max_key;
+       unsigned j, cacheline = 1;
+
+       /* signal to make_bfloat() that they're uninitialized: */
+       min_key.u64s = max_key.u64s = 0;
+
+       t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)),
+                     bset_ro_tree_capacity(b, t));
+retry:
+       if (t->size < 2) {
+               t->size = 0;
+               t->extra = BSET_NO_AUX_TREE_VAL;
+               return;
+       }
+
+       t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1;
+
+       /* First we figure out where the first key in each cacheline is */
+       eytzinger1_for_each(j, t->size) {
+               while (bkey_to_cacheline(b, t, k) < cacheline)
+                       prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
+
+               if (k >= btree_bkey_last(b, t)) {
+                       /* XXX: this path sucks */
+                       t->size--;
+                       goto retry;
+               }
+
+               ro_aux_tree_prev(b, t)[j] = prev->u64s;
+               bkey_float(b, t, j)->key_offset =
+                       bkey_to_cacheline_offset(b, t, cacheline++, k);
+
+               EBUG_ON(tree_to_prev_bkey(b, t, j) != prev);
+               EBUG_ON(tree_to_bkey(b, t, j) != k);
+       }
+
+       while (k != btree_bkey_last(b, t))
+               prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
+
+       t->max_key = bkey_unpack_pos(b, prev);
+
+       /* Then we build the tree */
+       eytzinger1_for_each(j, t->size)
+               make_bfloat(b, t, j, &min_key, &max_key);
+}
+
+static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
+{
+       struct bset_tree *i;
+
+       for (i = b->set; i != t; i++)
+               BUG_ON(bset_has_rw_aux_tree(i));
+
+       bch2_bset_set_no_aux_tree(b, t);
+
+       /* round up to next cacheline: */
+       t->aux_data_offset = round_up(bset_aux_tree_buf_start(b, t),
+                                     SMP_CACHE_BYTES / sizeof(u64));
+
+       bset_aux_tree_verify(b);
+}
+
+void bch2_bset_build_aux_tree(struct btree *b, struct bset_tree *t,
+                            bool writeable)
+{
+       if (writeable
+           ? bset_has_rw_aux_tree(t)
+           : bset_has_ro_aux_tree(t))
+               return;
+
+       bset_alloc_tree(b, t);
+
+       if (!__bset_tree_capacity(b, t))
+               return;
+
+       if (writeable)
+               __build_rw_aux_tree(b, t);
+       else
+               __build_ro_aux_tree(b, t);
+
+       bset_aux_tree_verify(b);
+}
+
+void bch2_bset_init_first(struct btree *b, struct bset *i)
+{
+       struct bset_tree *t;
+
+       BUG_ON(b->nsets);
+
+       memset(i, 0, sizeof(*i));
+       get_random_bytes(&i->seq, sizeof(i->seq));
+       SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
+
+       t = &b->set[b->nsets++];
+       set_btree_bset(b, t, i);
+}
+
+void bch2_bset_init_next(struct bch_fs *c, struct btree *b,
+                        struct btree_node_entry *bne)
+{
+       struct bset *i = &bne->keys;
+       struct bset_tree *t;
+
+       BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c));
+       BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b)));
+       BUG_ON(b->nsets >= MAX_BSETS);
+
+       memset(i, 0, sizeof(*i));
+       i->seq = btree_bset_first(b)->seq;
+       SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
+
+       t = &b->set[b->nsets++];
+       set_btree_bset(b, t, i);
+}
+
+/*
+ * find _some_ key in the same bset as @k that precedes @k - not necessarily the
+ * immediate predecessor:
+ */
+static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t,
+                                      struct bkey_packed *k)
+{
+       struct bkey_packed *p;
+       unsigned offset;
+       int j;
+
+       EBUG_ON(k < btree_bkey_first(b, t) ||
+               k > btree_bkey_last(b, t));
+
+       if (k == btree_bkey_first(b, t))
+               return NULL;
+
+       switch (bset_aux_tree_type(t)) {
+       case BSET_NO_AUX_TREE:
+               p = btree_bkey_first(b, t);
+               break;
+       case BSET_RO_AUX_TREE:
+               j = min_t(unsigned, t->size - 1, bkey_to_cacheline(b, t, k));
+
+               do {
+                       p = j ? tree_to_bkey(b, t,
+                                       __inorder_to_eytzinger1(j--,
+                                                       t->size, t->extra))
+                             : btree_bkey_first(b, t);
+               } while (p >= k);
+               break;
+       case BSET_RW_AUX_TREE:
+               offset = __btree_node_key_to_offset(b, k);
+               j = rw_aux_tree_bsearch(b, t, offset);
+               p = j ? rw_aux_to_bkey(b, t, j - 1)
+                     : btree_bkey_first(b, t);
+               break;
+       }
+
+       return p;
+}
+
+struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
+                                         struct bset_tree *t,
+                                         struct bkey_packed *k,
+                                         unsigned min_key_type)
+{
+       struct bkey_packed *p, *i, *ret = NULL, *orig_k = k;
+
+       while ((p = __bkey_prev(b, t, k)) && !ret) {
+               for (i = p; i != k; i = bkey_next_skip_noops(i, k))
+                       if (i->type >= min_key_type)
+                               ret = i;
+
+               k = p;
+       }
+
+       if (btree_keys_expensive_checks(b)) {
+               BUG_ON(ret >= orig_k);
+
+               for (i = ret
+                       ? bkey_next_skip_noops(ret, orig_k)
+                       : btree_bkey_first(b, t);
+                    i != orig_k;
+                    i = bkey_next_skip_noops(i, orig_k))
+                       BUG_ON(i->type >= min_key_type);
+       }
+
+       return ret;
+}
+
+/* Insert */
+
+static void rw_aux_tree_fix_invalidated_key(struct btree *b,
+                                           struct bset_tree *t,
+                                           struct bkey_packed *k)
+{
+       unsigned offset = __btree_node_key_to_offset(b, k);
+       unsigned j = rw_aux_tree_bsearch(b, t, offset);
+
+       if (j < t->size &&
+           rw_aux_tree(b, t)[j].offset == offset)
+               rw_aux_tree_set(b, t, j, k);
+
+       bch2_bset_verify_rw_aux_tree(b, t);
+}
+
+static void ro_aux_tree_fix_invalidated_key(struct btree *b,
+                                           struct bset_tree *t,
+                                           struct bkey_packed *k)
+{
+       struct bkey_packed min_key, max_key;
+       unsigned inorder, j;
+
+       EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
+
+       /* signal to make_bfloat() that they're uninitialized: */
+       min_key.u64s = max_key.u64s = 0;
+
+       if (bkey_next_skip_noops(k, btree_bkey_last(b, t)) == btree_bkey_last(b, t)) {
+               t->max_key = bkey_unpack_pos(b, k);
+
+               for (j = 1; j < t->size; j = j * 2 + 1)
+                       make_bfloat(b, t, j, &min_key, &max_key);
+       }
+
+       inorder = bkey_to_cacheline(b, t, k);
+
+       if (inorder &&
+           inorder < t->size) {
+               j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
+
+               if (k == tree_to_bkey(b, t, j)) {
+                       /* Fix the node this key corresponds to */
+                       make_bfloat(b, t, j, &min_key, &max_key);
+
+                       /* Children for which this key is the right boundary */
+                       for (j = eytzinger1_left_child(j);
+                            j < t->size;
+                            j = eytzinger1_right_child(j))
+                               make_bfloat(b, t, j, &min_key, &max_key);
+               }
+       }
+
+       if (inorder + 1 < t->size) {
+               j = __inorder_to_eytzinger1(inorder + 1, t->size, t->extra);
+
+               if (k == tree_to_prev_bkey(b, t, j)) {
+                       make_bfloat(b, t, j, &min_key, &max_key);
+
+                       /* Children for which this key is the left boundary */
+                       for (j = eytzinger1_right_child(j);
+                            j < t->size;
+                            j = eytzinger1_left_child(j))
+                               make_bfloat(b, t, j, &min_key, &max_key);
+               }
+       }
+}
+
+/**
+ * bch2_bset_fix_invalidated_key() - given an existing  key @k that has been
+ * modified, fix any auxiliary search tree by remaking all the nodes in the
+ * auxiliary search tree that @k corresponds to
+ */
+void bch2_bset_fix_invalidated_key(struct btree *b, struct bkey_packed *k)
+{
+       struct bset_tree *t = bch2_bkey_to_bset(b, k);
+
+       switch (bset_aux_tree_type(t)) {
+       case BSET_NO_AUX_TREE:
+               break;
+       case BSET_RO_AUX_TREE:
+               ro_aux_tree_fix_invalidated_key(b, t, k);
+               break;
+       case BSET_RW_AUX_TREE:
+               rw_aux_tree_fix_invalidated_key(b, t, k);
+               break;
+       }
+}
+
+static void bch2_bset_fix_lookup_table(struct btree *b,
+                                      struct bset_tree *t,
+                                      struct bkey_packed *_where,
+                                      unsigned clobber_u64s,
+                                      unsigned new_u64s)
+{
+       int shift = new_u64s - clobber_u64s;
+       unsigned l, j, where = __btree_node_key_to_offset(b, _where);
+
+       EBUG_ON(bset_has_ro_aux_tree(t));
+
+       if (!bset_has_rw_aux_tree(t))
+               return;
+
+       /* returns first entry >= where */
+       l = rw_aux_tree_bsearch(b, t, where);
+
+       if (!l) /* never delete first entry */
+               l++;
+       else if (l < t->size &&
+                where < t->end_offset &&
+                rw_aux_tree(b, t)[l].offset == where)
+               rw_aux_tree_set(b, t, l++, _where);
+
+       /* l now > where */
+
+       for (j = l;
+            j < t->size &&
+            rw_aux_tree(b, t)[j].offset < where + clobber_u64s;
+            j++)
+               ;
+
+       if (j < t->size &&
+           rw_aux_tree(b, t)[j].offset + shift ==
+           rw_aux_tree(b, t)[l - 1].offset)
+               j++;
+
+       memmove(&rw_aux_tree(b, t)[l],
+               &rw_aux_tree(b, t)[j],
+               (void *) &rw_aux_tree(b, t)[t->size] -
+               (void *) &rw_aux_tree(b, t)[j]);
+       t->size -= j - l;
+
+       for (j = l; j < t->size; j++)
+              rw_aux_tree(b, t)[j].offset += shift;
+
+       EBUG_ON(l < t->size &&
+               rw_aux_tree(b, t)[l].offset ==
+               rw_aux_tree(b, t)[l - 1].offset);
+
+       if (t->size < bset_rw_tree_capacity(b, t) &&
+           (l < t->size
+            ? rw_aux_tree(b, t)[l].offset
+            : t->end_offset) -
+           rw_aux_tree(b, t)[l - 1].offset >
+           L1_CACHE_BYTES / sizeof(u64)) {
+               struct bkey_packed *start = rw_aux_to_bkey(b, t, l - 1);
+               struct bkey_packed *end = l < t->size
+                       ? rw_aux_to_bkey(b, t, l)
+                       : btree_bkey_last(b, t);
+               struct bkey_packed *k = start;
+
+               while (1) {
+                       k = bkey_next_skip_noops(k, end);
+                       if (k == end)
+                               break;
+
+                       if ((void *) k - (void *) start >= L1_CACHE_BYTES) {
+                               memmove(&rw_aux_tree(b, t)[l + 1],
+                                       &rw_aux_tree(b, t)[l],
+                                       (void *) &rw_aux_tree(b, t)[t->size] -
+                                       (void *) &rw_aux_tree(b, t)[l]);
+                               t->size++;
+                               rw_aux_tree_set(b, t, l, k);
+                               break;
+                       }
+               }
+       }
+
+       bch2_bset_verify_rw_aux_tree(b, t);
+       bset_aux_tree_verify(b);
+}
+
+void bch2_bset_insert(struct btree *b,
+                     struct btree_node_iter *iter,
+                     struct bkey_packed *where,
+                     struct bkey_i *insert,
+                     unsigned clobber_u64s)
+{
+       struct bkey_format *f = &b->format;
+       struct bset_tree *t = bset_tree_last(b);
+       struct bkey_packed packed, *src = bkey_to_packed(insert);
+
+       bch2_bset_verify_rw_aux_tree(b, t);
+       bch2_verify_insert_pos(b, where, bkey_to_packed(insert), clobber_u64s);
+
+       if (bch2_bkey_pack_key(&packed, &insert->k, f))
+               src = &packed;
+
+       if (!bkey_whiteout(&insert->k))
+               btree_keys_account_key_add(&b->nr, t - b->set, src);
+
+       if (src->u64s != clobber_u64s) {
+               u64 *src_p = where->_data + clobber_u64s;
+               u64 *dst_p = where->_data + src->u64s;
+
+               EBUG_ON((int) le16_to_cpu(bset(b, t)->u64s) <
+                       (int) clobber_u64s - src->u64s);
+
+               memmove_u64s(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p);
+               le16_add_cpu(&bset(b, t)->u64s, src->u64s - clobber_u64s);
+               set_btree_bset_end(b, t);
+       }
+
+       memcpy_u64s(where, src,
+                   bkeyp_key_u64s(f, src));
+       memcpy_u64s(bkeyp_val(f, where), &insert->v,
+                   bkeyp_val_u64s(f, src));
+
+       if (src->u64s != clobber_u64s)
+               bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s);
+
+       bch2_verify_btree_nr_keys(b);
+}
+
+void bch2_bset_delete(struct btree *b,
+                     struct bkey_packed *where,
+                     unsigned clobber_u64s)
+{
+       struct bset_tree *t = bset_tree_last(b);
+       u64 *src_p = where->_data + clobber_u64s;
+       u64 *dst_p = where->_data;
+
+       bch2_bset_verify_rw_aux_tree(b, t);
+
+       EBUG_ON(le16_to_cpu(bset(b, t)->u64s) < clobber_u64s);
+
+       memmove_u64s_down(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p);
+       le16_add_cpu(&bset(b, t)->u64s, -clobber_u64s);
+       set_btree_bset_end(b, t);
+
+       bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, 0);
+}
+
+/* Lookup */
+
+__flatten
+static struct bkey_packed *bset_search_write_set(const struct btree *b,
+                               struct bset_tree *t,
+                               struct bpos *search,
+                               const struct bkey_packed *packed_search)
+{
+       unsigned l = 0, r = t->size;
+
+       while (l + 1 != r) {
+               unsigned m = (l + r) >> 1;
+
+               if (bkey_cmp(rw_aux_tree(b, t)[m].k, *search) < 0)
+                       l = m;
+               else
+                       r = m;
+       }
+
+       return rw_aux_to_bkey(b, t, l);
+}
+
+static inline void prefetch_four_cachelines(void *p)
+{
+#ifdef CONFIG_X86_64
+       asm(".intel_syntax noprefix;"
+           "prefetcht0 [%0 - 127 + 64 * 0];"
+           "prefetcht0 [%0 - 127 + 64 * 1];"
+           "prefetcht0 [%0 - 127 + 64 * 2];"
+           "prefetcht0 [%0 - 127 + 64 * 3];"
+           ".att_syntax prefix;"
+           :
+           : "r" (p + 127));
+#else
+       prefetch(p + L1_CACHE_BYTES * 0);
+       prefetch(p + L1_CACHE_BYTES * 1);
+       prefetch(p + L1_CACHE_BYTES * 2);
+       prefetch(p + L1_CACHE_BYTES * 3);
+#endif
+}
+
+static inline bool bkey_mantissa_bits_dropped(const struct btree *b,
+                                             const struct bkey_float *f,
+                                             unsigned idx)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+       unsigned key_bits_start = b->format.key_u64s * 64 - b->nr_key_bits;
+
+       return f->exponent > key_bits_start;
+#else
+       unsigned key_bits_end = high_bit_offset + b->nr_key_bits;
+
+       return f->exponent + BKEY_MANTISSA_BITS < key_bits_end;
+#endif
+}
+
+__flatten
+static struct bkey_packed *bset_search_tree(const struct btree *b,
+                               struct bset_tree *t,
+                               struct bpos *search,
+                               const struct bkey_packed *packed_search)
+{
+       struct ro_aux_tree *base = ro_aux_tree_base(b, t);
+       struct bkey_float *f;
+       struct bkey_packed *k;
+       unsigned inorder, n = 1, l, r;
+       int cmp;
+
+       do {
+               if (likely(n << 4 < t->size))
+                       prefetch(&base->f[n << 4]);
+
+               f = &base->f[n];
+
+               if (!unlikely(packed_search))
+                       goto slowpath;
+               if (unlikely(f->exponent >= BFLOAT_FAILED))
+                       goto slowpath;
+
+               l = f->mantissa;
+               r = bkey_mantissa(packed_search, f, n);
+
+               if (unlikely(l == r) && bkey_mantissa_bits_dropped(b, f, n))
+                       goto slowpath;
+
+               n = n * 2 + (l < r);
+               continue;
+slowpath:
+               k = tree_to_bkey(b, t, n);
+               cmp = bkey_cmp_p_or_unp(b, k, packed_search, search);
+               if (!cmp)
+                       return k;
+
+               n = n * 2 + (cmp < 0);
+       } while (n < t->size);
+
+       inorder = __eytzinger1_to_inorder(n >> 1, t->size, t->extra);
+
+       /*
+        * n would have been the node we recursed to - the low bit tells us if
+        * we recursed left or recursed right.
+        */
+       if (likely(!(n & 1))) {
+               --inorder;
+               if (unlikely(!inorder))
+                       return btree_bkey_first(b, t);
+
+               f = &base->f[eytzinger1_prev(n >> 1, t->size)];
+       }
+
+       return cacheline_to_bkey(b, t, inorder, f->key_offset);
+}
+
+static __always_inline __flatten
+struct bkey_packed *__bch2_bset_search(struct btree *b,
+                               struct bset_tree *t,
+                               struct bpos *search,
+                               const struct bkey_packed *lossy_packed_search)
+{
+
+       /*
+        * First, we search for a cacheline, then lastly we do a linear search
+        * within that cacheline.
+        *
+        * To search for the cacheline, there's three different possibilities:
+        *  * The set is too small to have a search tree, so we just do a linear
+        *    search over the whole set.
+        *  * The set is the one we're currently inserting into; keeping a full
+        *    auxiliary search tree up to date would be too expensive, so we
+        *    use a much simpler lookup table to do a binary search -
+        *    bset_search_write_set().
+        *  * Or we use the auxiliary search tree we constructed earlier -
+        *    bset_search_tree()
+        */
+
+       switch (bset_aux_tree_type(t)) {
+       case BSET_NO_AUX_TREE:
+               return btree_bkey_first(b, t);
+       case BSET_RW_AUX_TREE:
+               return bset_search_write_set(b, t, search, lossy_packed_search);
+       case BSET_RO_AUX_TREE:
+               /*
+                * Each node in the auxiliary search tree covers a certain range
+                * of bits, and keys above and below the set it covers might
+                * differ outside those bits - so we have to special case the
+                * start and end - handle that here:
+                */
+
+               if (bkey_cmp(*search, t->max_key) > 0)
+                       return btree_bkey_last(b, t);
+
+               return bset_search_tree(b, t, search, lossy_packed_search);
+       default:
+               unreachable();
+       }
+}
+
+static __always_inline __flatten
+struct bkey_packed *bch2_bset_search_linear(struct btree *b,
+                               struct bset_tree *t,
+                               struct bpos *search,
+                               struct bkey_packed *packed_search,
+                               const struct bkey_packed *lossy_packed_search,
+                               struct bkey_packed *m)
+{
+       if (lossy_packed_search)
+               while (m != btree_bkey_last(b, t) &&
+                      bkey_iter_cmp_p_or_unp(b, m,
+                                       lossy_packed_search, search) < 0)
+                       m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
+
+       if (!packed_search)
+               while (m != btree_bkey_last(b, t) &&
+                      bkey_iter_pos_cmp(b, m, search) < 0)
+                       m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
+
+       if (btree_keys_expensive_checks(b)) {
+               struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
+
+               BUG_ON(prev &&
+                      bkey_iter_cmp_p_or_unp(b, prev,
+                                       packed_search, search) >= 0);
+       }
+
+       return m;
+}
+
+/*
+ * Returns the first key greater than or equal to @search
+ */
+static __always_inline __flatten
+struct bkey_packed *bch2_bset_search(struct btree *b,
+                               struct bset_tree *t,
+                               struct bpos *search,
+                               struct bkey_packed *packed_search,
+                               const struct bkey_packed *lossy_packed_search)
+{
+       struct bkey_packed *m = __bch2_bset_search(b, t, search,
+                                                  lossy_packed_search);
+
+       return bch2_bset_search_linear(b, t, search,
+                                packed_search, lossy_packed_search, m);
+}
+
+/* Btree node iterator */
+
+static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter,
+                             struct btree *b,
+                             const struct bkey_packed *k,
+                             const struct bkey_packed *end)
+{
+       if (k != end) {
+               struct btree_node_iter_set *pos;
+
+               btree_node_iter_for_each(iter, pos)
+                       ;
+
+               BUG_ON(pos >= iter->data + ARRAY_SIZE(iter->data));
+               *pos = (struct btree_node_iter_set) {
+                       __btree_node_key_to_offset(b, k),
+                       __btree_node_key_to_offset(b, end)
+               };
+       }
+}
+
+void bch2_btree_node_iter_push(struct btree_node_iter *iter,
+                              struct btree *b,
+                              const struct bkey_packed *k,
+                              const struct bkey_packed *end)
+{
+       __bch2_btree_node_iter_push(iter, b, k, end);
+       bch2_btree_node_iter_sort(iter, b);
+}
+
+noinline __flatten __attribute__((cold))
+static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
+                             struct btree *b, struct bpos *search)
+{
+       struct bset_tree *t;
+
+       trace_bkey_pack_pos_fail(search);
+
+       for_each_bset(b, t)
+               __bch2_btree_node_iter_push(iter, b,
+                       bch2_bset_search(b, t, search, NULL, NULL),
+                       btree_bkey_last(b, t));
+
+       bch2_btree_node_iter_sort(iter, b);
+}
+
+/**
+ * bch_btree_node_iter_init - initialize a btree node iterator, starting from a
+ * given position
+ *
+ * Main entry point to the lookup code for individual btree nodes:
+ *
+ * NOTE:
+ *
+ * When you don't filter out deleted keys, btree nodes _do_ contain duplicate
+ * keys. This doesn't matter for most code, but it does matter for lookups.
+ *
+ * Some adjacent keys with a string of equal keys:
+ *     i j k k k k l m
+ *
+ * If you search for k, the lookup code isn't guaranteed to return you any
+ * specific k. The lookup code is conceptually doing a binary search and
+ * iterating backwards is very expensive so if the pivot happens to land at the
+ * last k that's what you'll get.
+ *
+ * This works out ok, but it's something to be aware of:
+ *
+ *  - For non extents, we guarantee that the live key comes last - see
+ *    btree_node_iter_cmp(), keys_out_of_order(). So the duplicates you don't
+ *    see will only be deleted keys you don't care about.
+ *
+ *  - For extents, deleted keys sort last (see the comment at the top of this
+ *    file). But when you're searching for extents, you actually want the first
+ *    key strictly greater than your search key - an extent that compares equal
+ *    to the search key is going to have 0 sectors after the search key.
+ *
+ *    But this does mean that we can't just search for
+ *    bkey_successor(start_of_range) to get the first extent that overlaps with
+ *    the range we want - if we're unlucky and there's an extent that ends
+ *    exactly where we searched, then there could be a deleted key at the same
+ *    position and we'd get that when we search instead of the preceding extent
+ *    we needed.
+ *
+ *    So we've got to search for start_of_range, then after the lookup iterate
+ *    past any extents that compare equal to the position we searched for.
+ */
+__flatten
+void bch2_btree_node_iter_init(struct btree_node_iter *iter,
+                              struct btree *b, struct bpos *search)
+{
+       struct bkey_packed p, *packed_search = NULL;
+       struct btree_node_iter_set *pos = iter->data;
+       struct bkey_packed *k[MAX_BSETS];
+       unsigned i;
+
+       EBUG_ON(bkey_cmp(*search, b->data->min_key) < 0);
+       bset_aux_tree_verify(b);
+
+       memset(iter, 0, sizeof(*iter));
+
+       switch (bch2_bkey_pack_pos_lossy(&p, *search, b)) {
+       case BKEY_PACK_POS_EXACT:
+               packed_search = &p;
+               break;
+       case BKEY_PACK_POS_SMALLER:
+               packed_search = NULL;
+               break;
+       case BKEY_PACK_POS_FAIL:
+               btree_node_iter_init_pack_failed(iter, b, search);
+               return;
+       }
+
+       for (i = 0; i < b->nsets; i++) {
+               k[i] = __bch2_bset_search(b, b->set + i, search, &p);
+               prefetch_four_cachelines(k[i]);
+       }
+
+       for (i = 0; i < b->nsets; i++) {
+               struct bset_tree *t = b->set + i;
+               struct bkey_packed *end = btree_bkey_last(b, t);
+
+               k[i] = bch2_bset_search_linear(b, t, search,
+                                              packed_search, &p, k[i]);
+               if (k[i] != end)
+                       *pos++ = (struct btree_node_iter_set) {
+                               __btree_node_key_to_offset(b, k[i]),
+                               __btree_node_key_to_offset(b, end)
+                       };
+       }
+
+       bch2_btree_node_iter_sort(iter, b);
+}
+
+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter,
+                                         struct btree *b)
+{
+       struct bset_tree *t;
+
+       memset(iter, 0, sizeof(*iter));
+
+       for_each_bset(b, t)
+               __bch2_btree_node_iter_push(iter, b,
+                                          btree_bkey_first(b, t),
+                                          btree_bkey_last(b, t));
+       bch2_btree_node_iter_sort(iter, b);
+}
+
+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *iter,
+                                                 struct btree *b,
+                                                 struct bset_tree *t)
+{
+       struct btree_node_iter_set *set;
+
+       btree_node_iter_for_each(iter, set)
+               if (set->end == t->end_offset)
+                       return __btree_node_offset_to_key(b, set->k);
+
+       return btree_bkey_last(b, t);
+}
+
+static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter,
+                                           struct btree *b,
+                                           unsigned first)
+{
+       bool ret;
+
+       if ((ret = (btree_node_iter_cmp(b,
+                                       iter->data[first],
+                                       iter->data[first + 1]) > 0)))
+               swap(iter->data[first], iter->data[first + 1]);
+       return ret;
+}
+
+void bch2_btree_node_iter_sort(struct btree_node_iter *iter,
+                              struct btree *b)
+{
+       /* unrolled bubble sort: */
+
+       if (!__btree_node_iter_set_end(iter, 2)) {
+               btree_node_iter_sort_two(iter, b, 0);
+               btree_node_iter_sort_two(iter, b, 1);
+       }
+
+       if (!__btree_node_iter_set_end(iter, 1))
+               btree_node_iter_sort_two(iter, b, 0);
+}
+
+void bch2_btree_node_iter_set_drop(struct btree_node_iter *iter,
+                                  struct btree_node_iter_set *set)
+{
+       struct btree_node_iter_set *last =
+               iter->data + ARRAY_SIZE(iter->data) - 1;
+
+       memmove(&set[0], &set[1], (void *) last - (void *) set);
+       *last = (struct btree_node_iter_set) { 0, 0 };
+}
+
+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
+                                                 struct btree *b)
+{
+       iter->data->k += __bch2_btree_node_iter_peek_all(iter, b)->u64s;
+
+       EBUG_ON(iter->data->k > iter->data->end);
+
+       while (!__btree_node_iter_set_end(iter, 0) &&
+              !__bch2_btree_node_iter_peek_all(iter, b)->u64s)
+               iter->data->k++;
+
+       if (unlikely(__btree_node_iter_set_end(iter, 0))) {
+               bch2_btree_node_iter_set_drop(iter, iter->data);
+               return;
+       }
+
+       if (__btree_node_iter_set_end(iter, 1))
+               return;
+
+       if (!btree_node_iter_sort_two(iter, b, 0))
+               return;
+
+       if (__btree_node_iter_set_end(iter, 2))
+               return;
+
+       btree_node_iter_sort_two(iter, b, 1);
+}
+
+void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
+                                 struct btree *b)
+{
+       if (btree_keys_expensive_checks(b)) {
+               bch2_btree_node_iter_verify(iter, b);
+               bch2_btree_node_iter_next_check(iter, b);
+       }
+
+       __bch2_btree_node_iter_advance(iter, b);
+}
+
+/*
+ * Expensive:
+ */
+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter,
+                                                 struct btree *b)
+{
+       struct bkey_packed *k, *prev = NULL;
+       struct btree_node_iter_set *set;
+       struct bset_tree *t;
+       unsigned end = 0;
+
+       if (btree_keys_expensive_checks(b))
+               bch2_btree_node_iter_verify(iter, b);
+
+       for_each_bset(b, t) {
+               k = bch2_bkey_prev_all(b, t,
+                       bch2_btree_node_iter_bset_pos(iter, b, t));
+               if (k &&
+                   (!prev || bkey_iter_cmp(b, k, prev) > 0)) {
+                       prev = k;
+                       end = t->end_offset;
+               }
+       }
+
+       if (!prev)
+               return NULL;
+
+       /*
+        * We're manually memmoving instead of just calling sort() to ensure the
+        * prev we picked ends up in slot 0 - sort won't necessarily put it
+        * there because of duplicate deleted keys:
+        */
+       btree_node_iter_for_each(iter, set)
+               if (set->end == end)
+                       goto found;
+
+       BUG_ON(set != &iter->data[__btree_node_iter_used(iter)]);
+found:
+       BUG_ON(set >= iter->data + ARRAY_SIZE(iter->data));
+
+       memmove(&iter->data[1],
+               &iter->data[0],
+               (void *) set - (void *) &iter->data[0]);
+
+       iter->data[0].k = __btree_node_key_to_offset(b, prev);
+       iter->data[0].end = end;
+
+       if (btree_keys_expensive_checks(b))
+               bch2_btree_node_iter_verify(iter, b);
+       return prev;
+}
+
+struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *iter,
+                                                    struct btree *b,
+                                                    unsigned min_key_type)
+{
+       struct bkey_packed *prev;
+
+       do {
+               prev = bch2_btree_node_iter_prev_all(iter, b);
+       } while (prev && prev->type < min_key_type);
+
+       return prev;
+}
+
+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *iter,
+                                                struct btree *b,
+                                                struct bkey *u)
+{
+       struct bkey_packed *k = bch2_btree_node_iter_peek(iter, b);
+
+       return k ? bkey_disassemble(b, k, u) : bkey_s_c_null;
+}
+
+/* Mergesort */
+
+void bch2_btree_keys_stats(struct btree *b, struct bset_stats *stats)
+{
+       struct bset_tree *t;
+
+       for_each_bset(b, t) {
+               enum bset_aux_tree_type type = bset_aux_tree_type(t);
+               size_t j;
+
+               stats->sets[type].nr++;
+               stats->sets[type].bytes += le16_to_cpu(bset(b, t)->u64s) *
+                       sizeof(u64);
+
+               if (bset_has_ro_aux_tree(t)) {
+                       stats->floats += t->size - 1;
+
+                       for (j = 1; j < t->size; j++)
+                               stats->failed +=
+                                       bkey_float(b, t, j)->exponent ==
+                                       BFLOAT_FAILED;
+               }
+       }
+}
+
+void bch2_bfloat_to_text(struct printbuf *out, struct btree *b,
+                        struct bkey_packed *k)
+{
+       struct bset_tree *t = bch2_bkey_to_bset(b, k);
+       struct bkey uk;
+       unsigned j, inorder;
+
+       if (out->pos != out->end)
+               *out->pos = '\0';
+
+       if (!bset_has_ro_aux_tree(t))
+               return;
+
+       inorder = bkey_to_cacheline(b, t, k);
+       if (!inorder || inorder >= t->size)
+               return;
+
+       j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
+       if (k != tree_to_bkey(b, t, j))
+               return;
+
+       switch (bkey_float(b, t, j)->exponent) {
+       case BFLOAT_FAILED:
+               uk = bkey_unpack_key(b, k);
+               pr_buf(out,
+                      "    failed unpacked at depth %u\n"
+                      "\t%llu:%llu\n",
+                      ilog2(j),
+                      uk.p.inode, uk.p.offset);
+               break;
+       }
+}
diff --git a/libbcachefs/bset.h b/libbcachefs/bset.h
new file mode 100644 (file)
index 0000000..5921cf6
--- /dev/null
@@ -0,0 +1,661 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BSET_H
+#define _BCACHEFS_BSET_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "bcachefs_format.h"
+#include "bkey.h"
+#include "bkey_methods.h"
+#include "btree_types.h"
+#include "util.h" /* for time_stats */
+#include "vstructs.h"
+
+/*
+ * BKEYS:
+ *
+ * A bkey contains a key, a size field, a variable number of pointers, and some
+ * ancillary flag bits.
+ *
+ * We use two different functions for validating bkeys, bkey_invalid and
+ * bkey_deleted().
+ *
+ * The one exception to the rule that ptr_invalid() filters out invalid keys is
+ * that it also filters out keys of size 0 - these are keys that have been
+ * completely overwritten. It'd be safe to delete these in memory while leaving
+ * them on disk, just unnecessary work - so we filter them out when resorting
+ * instead.
+ *
+ * We can't filter out stale keys when we're resorting, because garbage
+ * collection needs to find them to ensure bucket gens don't wrap around -
+ * unless we're rewriting the btree node those stale keys still exist on disk.
+ *
+ * We also implement functions here for removing some number of sectors from the
+ * front or the back of a bkey - this is mainly used for fixing overlapping
+ * extents, by removing the overlapping sectors from the older key.
+ *
+ * BSETS:
+ *
+ * A bset is an array of bkeys laid out contiguously in memory in sorted order,
+ * along with a header. A btree node is made up of a number of these, written at
+ * different times.
+ *
+ * There could be many of them on disk, but we never allow there to be more than
+ * 4 in memory - we lazily resort as needed.
+ *
+ * We implement code here for creating and maintaining auxiliary search trees
+ * (described below) for searching an individial bset, and on top of that we
+ * implement a btree iterator.
+ *
+ * BTREE ITERATOR:
+ *
+ * Most of the code in bcache doesn't care about an individual bset - it needs
+ * to search entire btree nodes and iterate over them in sorted order.
+ *
+ * The btree iterator code serves both functions; it iterates through the keys
+ * in a btree node in sorted order, starting from either keys after a specific
+ * point (if you pass it a search key) or the start of the btree node.
+ *
+ * AUXILIARY SEARCH TREES:
+ *
+ * Since keys are variable length, we can't use a binary search on a bset - we
+ * wouldn't be able to find the start of the next key. But binary searches are
+ * slow anyways, due to terrible cache behaviour; bcache originally used binary
+ * searches and that code topped out at under 50k lookups/second.
+ *
+ * So we need to construct some sort of lookup table. Since we only insert keys
+ * into the last (unwritten) set, most of the keys within a given btree node are
+ * usually in sets that are mostly constant. We use two different types of
+ * lookup tables to take advantage of this.
+ *
+ * Both lookup tables share in common that they don't index every key in the
+ * set; they index one key every BSET_CACHELINE bytes, and then a linear search
+ * is used for the rest.
+ *
+ * For sets that have been written to disk and are no longer being inserted
+ * into, we construct a binary search tree in an array - traversing a binary
+ * search tree in an array gives excellent locality of reference and is very
+ * fast, since both children of any node are adjacent to each other in memory
+ * (and their grandchildren, and great grandchildren...) - this means
+ * prefetching can be used to great effect.
+ *
+ * It's quite useful performance wise to keep these nodes small - not just
+ * because they're more likely to be in L2, but also because we can prefetch
+ * more nodes on a single cacheline and thus prefetch more iterations in advance
+ * when traversing this tree.
+ *
+ * Nodes in the auxiliary search tree must contain both a key to compare against
+ * (we don't want to fetch the key from the set, that would defeat the purpose),
+ * and a pointer to the key. We use a few tricks to compress both of these.
+ *
+ * To compress the pointer, we take advantage of the fact that one node in the
+ * search tree corresponds to precisely BSET_CACHELINE bytes in the set. We have
+ * a function (to_inorder()) that takes the index of a node in a binary tree and
+ * returns what its index would be in an inorder traversal, so we only have to
+ * store the low bits of the offset.
+ *
+ * The key is 84 bits (KEY_DEV + key->key, the offset on the device). To
+ * compress that,  we take advantage of the fact that when we're traversing the
+ * search tree at every iteration we know that both our search key and the key
+ * we're looking for lie within some range - bounded by our previous
+ * comparisons. (We special case the start of a search so that this is true even
+ * at the root of the tree).
+ *
+ * So we know the key we're looking for is between a and b, and a and b don't
+ * differ higher than bit 50, we don't need to check anything higher than bit
+ * 50.
+ *
+ * We don't usually need the rest of the bits, either; we only need enough bits
+ * to partition the key range we're currently checking.  Consider key n - the
+ * key our auxiliary search tree node corresponds to, and key p, the key
+ * immediately preceding n.  The lowest bit we need to store in the auxiliary
+ * search tree is the highest bit that differs between n and p.
+ *
+ * Note that this could be bit 0 - we might sometimes need all 80 bits to do the
+ * comparison. But we'd really like our nodes in the auxiliary search tree to be
+ * of fixed size.
+ *
+ * The solution is to make them fixed size, and when we're constructing a node
+ * check if p and n differed in the bits we needed them to. If they don't we
+ * flag that node, and when doing lookups we fallback to comparing against the
+ * real key. As long as this doesn't happen to often (and it seems to reliably
+ * happen a bit less than 1% of the time), we win - even on failures, that key
+ * is then more likely to be in cache than if we were doing binary searches all
+ * the way, since we're touching so much less memory.
+ *
+ * The keys in the auxiliary search tree are stored in (software) floating
+ * point, with an exponent and a mantissa. The exponent needs to be big enough
+ * to address all the bits in the original key, but the number of bits in the
+ * mantissa is somewhat arbitrary; more bits just gets us fewer failures.
+ *
+ * We need 7 bits for the exponent and 3 bits for the key's offset (since keys
+ * are 8 byte aligned); using 22 bits for the mantissa means a node is 4 bytes.
+ * We need one node per 128 bytes in the btree node, which means the auxiliary
+ * search trees take up 3% as much memory as the btree itself.
+ *
+ * Constructing these auxiliary search trees is moderately expensive, and we
+ * don't want to be constantly rebuilding the search tree for the last set
+ * whenever we insert another key into it. For the unwritten set, we use a much
+ * simpler lookup table - it's just a flat array, so index i in the lookup table
+ * corresponds to the i range of BSET_CACHELINE bytes in the set. Indexing
+ * within each byte range works the same as with the auxiliary search trees.
+ *
+ * These are much easier to keep up to date when we insert a key - we do it
+ * somewhat lazily; when we shift a key up we usually just increment the pointer
+ * to it, only when it would overflow do we go to the trouble of finding the
+ * first key in that range of bytes again.
+ */
+
+extern bool bch2_expensive_debug_checks;
+
+static inline bool btree_keys_expensive_checks(const struct btree *b)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       return bch2_expensive_debug_checks || *b->expensive_debug_checks;
+#else
+       return false;
+#endif
+}
+
+enum bset_aux_tree_type {
+       BSET_NO_AUX_TREE,
+       BSET_RO_AUX_TREE,
+       BSET_RW_AUX_TREE,
+};
+
+#define BSET_TREE_NR_TYPES     3
+
+#define BSET_NO_AUX_TREE_VAL   (U16_MAX)
+#define BSET_RW_AUX_TREE_VAL   (U16_MAX - 1)
+
+static inline enum bset_aux_tree_type bset_aux_tree_type(const struct bset_tree *t)
+{
+       switch (t->extra) {
+       case BSET_NO_AUX_TREE_VAL:
+               EBUG_ON(t->size);
+               return BSET_NO_AUX_TREE;
+       case BSET_RW_AUX_TREE_VAL:
+               EBUG_ON(!t->size);
+               return BSET_RW_AUX_TREE;
+       default:
+               EBUG_ON(!t->size);
+               return BSET_RO_AUX_TREE;
+       }
+}
+
+/*
+ * BSET_CACHELINE was originally intended to match the hardware cacheline size -
+ * it used to be 64, but I realized the lookup code would touch slightly less
+ * memory if it was 128.
+ *
+ * It definites the number of bytes (in struct bset) per struct bkey_float in
+ * the auxiliar search tree - when we're done searching the bset_float tree we
+ * have this many bytes left that we do a linear search over.
+ *
+ * Since (after level 5) every level of the bset_tree is on a new cacheline,
+ * we're touching one fewer cacheline in the bset tree in exchange for one more
+ * cacheline in the linear search - but the linear search might stop before it
+ * gets to the second cacheline.
+ */
+
+#define BSET_CACHELINE         128
+
+static inline size_t btree_keys_cachelines(struct btree *b)
+{
+       return (1U << b->byte_order) / BSET_CACHELINE;
+}
+
+static inline size_t btree_aux_data_bytes(struct btree *b)
+{
+       return btree_keys_cachelines(b) * 8;
+}
+
+static inline size_t btree_aux_data_u64s(struct btree *b)
+{
+       return btree_aux_data_bytes(b) / sizeof(u64);
+}
+
+typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
+
+static inline void
+__bkey_unpack_key_format_checked(const struct btree *b,
+                              struct bkey *dst,
+                              const struct bkey_packed *src)
+{
+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
+       {
+               compiled_unpack_fn unpack_fn = b->aux_data;
+               unpack_fn(dst, src);
+
+               if (btree_keys_expensive_checks(b)) {
+                       struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
+
+                       BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
+               }
+       }
+#else
+       *dst = __bch2_bkey_unpack_key(&b->format, src);
+#endif
+}
+
+static inline struct bkey
+bkey_unpack_key_format_checked(const struct btree *b,
+                              const struct bkey_packed *src)
+{
+       struct bkey dst;
+
+       __bkey_unpack_key_format_checked(b, &dst, src);
+       return dst;
+}
+
+static inline void __bkey_unpack_key(const struct btree *b,
+                                    struct bkey *dst,
+                                    const struct bkey_packed *src)
+{
+       if (likely(bkey_packed(src)))
+               __bkey_unpack_key_format_checked(b, dst, src);
+       else
+               *dst = *packed_to_bkey_c(src);
+}
+
+/**
+ * bkey_unpack_key -- unpack just the key, not the value
+ */
+static inline struct bkey bkey_unpack_key(const struct btree *b,
+                                         const struct bkey_packed *src)
+{
+       return likely(bkey_packed(src))
+               ? bkey_unpack_key_format_checked(b, src)
+               : *packed_to_bkey_c(src);
+}
+
+static inline struct bpos
+bkey_unpack_pos_format_checked(const struct btree *b,
+                              const struct bkey_packed *src)
+{
+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
+       return bkey_unpack_key_format_checked(b, src).p;
+#else
+       return __bkey_unpack_pos(&b->format, src);
+#endif
+}
+
+static inline struct bpos bkey_unpack_pos(const struct btree *b,
+                                         const struct bkey_packed *src)
+{
+       return likely(bkey_packed(src))
+               ? bkey_unpack_pos_format_checked(b, src)
+               : packed_to_bkey_c(src)->p;
+}
+
+/* Disassembled bkeys */
+
+static inline struct bkey_s_c bkey_disassemble(struct btree *b,
+                                              const struct bkey_packed *k,
+                                              struct bkey *u)
+{
+       __bkey_unpack_key(b, u, k);
+
+       return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), };
+}
+
+/* non const version: */
+static inline struct bkey_s __bkey_disassemble(struct btree *b,
+                                              struct bkey_packed *k,
+                                              struct bkey *u)
+{
+       __bkey_unpack_key(b, u, k);
+
+       return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
+}
+
+#define for_each_bset(_b, _t)                                          \
+       for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++)
+
+#define bset_tree_for_each_key(_b, _t, _k)                             \
+       for (_k = btree_bkey_first(_b, _t);                             \
+            _k != btree_bkey_last(_b, _t);                             \
+            _k = bkey_next_skip_noops(_k, btree_bkey_last(_b, _t)))
+
+static inline bool bset_has_ro_aux_tree(struct bset_tree *t)
+{
+       return bset_aux_tree_type(t) == BSET_RO_AUX_TREE;
+}
+
+static inline bool bset_has_rw_aux_tree(struct bset_tree *t)
+{
+       return bset_aux_tree_type(t) == BSET_RW_AUX_TREE;
+}
+
+static inline void bch2_bset_set_no_aux_tree(struct btree *b,
+                                           struct bset_tree *t)
+{
+       BUG_ON(t < b->set);
+
+       for (; t < b->set + ARRAY_SIZE(b->set); t++) {
+               t->size = 0;
+               t->extra = BSET_NO_AUX_TREE_VAL;
+               t->aux_data_offset = U16_MAX;
+       }
+}
+
+static inline void btree_node_set_format(struct btree *b,
+                                        struct bkey_format f)
+{
+       int len;
+
+       b->format       = f;
+       b->nr_key_bits  = bkey_format_key_bits(&f);
+
+       len = bch2_compile_bkey_format(&b->format, b->aux_data);
+       BUG_ON(len < 0 || len > U8_MAX);
+
+       b->unpack_fn_len = len;
+
+       bch2_bset_set_no_aux_tree(b, b->set);
+}
+
+static inline struct bset *bset_next_set(struct btree *b,
+                                        unsigned block_bytes)
+{
+       struct bset *i = btree_bset_last(b);
+
+       EBUG_ON(!is_power_of_2(block_bytes));
+
+       return ((void *) i) + round_up(vstruct_bytes(i), block_bytes);
+}
+
+void bch2_btree_keys_init(struct btree *, bool *);
+
+void bch2_bset_init_first(struct btree *, struct bset *);
+void bch2_bset_init_next(struct bch_fs *, struct btree *,
+                        struct btree_node_entry *);
+void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool);
+void bch2_bset_fix_invalidated_key(struct btree *, struct bkey_packed *);
+
+void bch2_bset_insert(struct btree *, struct btree_node_iter *,
+                    struct bkey_packed *, struct bkey_i *, unsigned);
+void bch2_bset_delete(struct btree *, struct bkey_packed *, unsigned);
+
+/* Bkey utility code */
+
+/* packed or unpacked */
+static inline int bkey_cmp_p_or_unp(const struct btree *b,
+                                   const struct bkey_packed *l,
+                                   const struct bkey_packed *r_packed,
+                                   const struct bpos *r)
+{
+       EBUG_ON(r_packed && !bkey_packed(r_packed));
+
+       if (unlikely(!bkey_packed(l)))
+               return bkey_cmp(packed_to_bkey_c(l)->p, *r);
+
+       if (likely(r_packed))
+               return __bch2_bkey_cmp_packed_format_checked(l, r_packed, b);
+
+       return __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
+}
+
+struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *);
+
+struct bkey_packed *bch2_bkey_prev_filter(struct btree *, struct bset_tree *,
+                                         struct bkey_packed *, unsigned);
+
+static inline struct bkey_packed *
+bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
+{
+       return bch2_bkey_prev_filter(b, t, k, 0);
+}
+
+static inline struct bkey_packed *
+bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
+{
+       return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_discard + 1);
+}
+
+enum bch_extent_overlap {
+       BCH_EXTENT_OVERLAP_ALL          = 0,
+       BCH_EXTENT_OVERLAP_BACK         = 1,
+       BCH_EXTENT_OVERLAP_FRONT        = 2,
+       BCH_EXTENT_OVERLAP_MIDDLE       = 3,
+};
+
+/* Returns how k overlaps with m */
+static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
+                                                         const struct bkey *m)
+{
+       int cmp1 = bkey_cmp(k->p, m->p) < 0;
+       int cmp2 = bkey_cmp(bkey_start_pos(k),
+                           bkey_start_pos(m)) > 0;
+
+       return (cmp1 << 1) + cmp2;
+}
+
+/* Btree key iteration */
+
+void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *,
+                             const struct bkey_packed *,
+                             const struct bkey_packed *);
+void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *,
+                              struct bpos *);
+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *,
+                                         struct btree *);
+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *,
+                                                struct btree *,
+                                                struct bset_tree *);
+
+void bch2_btree_node_iter_sort(struct btree_node_iter *, struct btree *);
+void bch2_btree_node_iter_set_drop(struct btree_node_iter *,
+                                  struct btree_node_iter_set *);
+void bch2_btree_node_iter_advance(struct btree_node_iter *, struct btree *);
+
+#define btree_node_iter_for_each(_iter, _set)                          \
+       for (_set = (_iter)->data;                                      \
+            _set < (_iter)->data + ARRAY_SIZE((_iter)->data) &&        \
+            (_set)->k != (_set)->end;                                  \
+            _set++)
+
+static inline bool __btree_node_iter_set_end(struct btree_node_iter *iter,
+                                            unsigned i)
+{
+       return iter->data[i].k == iter->data[i].end;
+}
+
+static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter)
+{
+       return __btree_node_iter_set_end(iter, 0);
+}
+
+/*
+ * When keys compare equal, deleted keys compare first:
+ *
+ * XXX: only need to compare pointers for keys that are both within a
+ * btree_node_iterator - we need to break ties for prev() to work correctly
+ */
+static inline int bkey_iter_cmp(const struct btree *b,
+                               const struct bkey_packed *l,
+                               const struct bkey_packed *r)
+{
+       return bkey_cmp_packed(b, l, r)
+               ?: (int) bkey_deleted(r) - (int) bkey_deleted(l)
+               ?: cmp_int(l, r);
+}
+
+static inline int btree_node_iter_cmp(const struct btree *b,
+                                     struct btree_node_iter_set l,
+                                     struct btree_node_iter_set r)
+{
+       return bkey_iter_cmp(b,
+                       __btree_node_offset_to_key(b, l.k),
+                       __btree_node_offset_to_key(b, r.k));
+}
+
+/* These assume r (the search key) is not a deleted key: */
+static inline int bkey_iter_pos_cmp(const struct btree *b,
+                       const struct bkey_packed *l,
+                       const struct bpos *r)
+{
+       return bkey_cmp_left_packed(b, l, r)
+               ?: -((int) bkey_deleted(l));
+}
+
+static inline int bkey_iter_cmp_p_or_unp(const struct btree *b,
+                                   const struct bkey_packed *l,
+                                   const struct bkey_packed *r_packed,
+                                   const struct bpos *r)
+{
+       return bkey_cmp_p_or_unp(b, l, r_packed, r)
+               ?: -((int) bkey_deleted(l));
+}
+
+static inline struct bkey_packed *
+__bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
+                               struct btree *b)
+{
+       return __btree_node_offset_to_key(b, iter->data->k);
+}
+
+static inline struct bkey_packed *
+bch2_btree_node_iter_peek_filter(struct btree_node_iter *iter,
+                                struct btree *b,
+                                unsigned min_key_type)
+{
+       while (!bch2_btree_node_iter_end(iter)) {
+               struct bkey_packed *k = __bch2_btree_node_iter_peek_all(iter, b);
+
+               if (k->type >= min_key_type)
+                       return k;
+
+               bch2_btree_node_iter_advance(iter, b);
+       }
+
+       return NULL;
+}
+
+static inline struct bkey_packed *
+bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
+                             struct btree *b)
+{
+       return bch2_btree_node_iter_peek_filter(iter, b, 0);
+}
+
+static inline struct bkey_packed *
+bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b)
+{
+       return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_discard + 1);
+}
+
+static inline struct bkey_packed *
+bch2_btree_node_iter_next_all(struct btree_node_iter *iter, struct btree *b)
+{
+       struct bkey_packed *ret = bch2_btree_node_iter_peek_all(iter, b);
+
+       if (ret)
+               bch2_btree_node_iter_advance(iter, b);
+
+       return ret;
+}
+
+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *,
+                                                 struct btree *);
+struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *,
+                                                    struct btree *, unsigned);
+
+static inline struct bkey_packed *
+bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b)
+{
+       return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_discard + 1);
+}
+
+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
+                                               struct btree *,
+                                               struct bkey *);
+
+#define for_each_btree_node_key_unpack(b, k, iter, unpacked)           \
+       for (bch2_btree_node_iter_init_from_start((iter), (b));         \
+            (k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\
+            bch2_btree_node_iter_advance(iter, b))
+
+/* Accounting: */
+
+static inline void btree_keys_account_key(struct btree_nr_keys *n,
+                                         unsigned bset,
+                                         struct bkey_packed *k,
+                                         int sign)
+{
+       n->live_u64s            += k->u64s * sign;
+       n->bset_u64s[bset]      += k->u64s * sign;
+
+       if (bkey_packed(k))
+               n->packed_keys  += sign;
+       else
+               n->unpacked_keys += sign;
+}
+
+static inline void btree_keys_account_val_delta(struct btree *b,
+                                               struct bkey_packed *k,
+                                               int delta)
+{
+       struct bset_tree *t = bch2_bkey_to_bset(b, k);
+
+       b->nr.live_u64s                 += delta;
+       b->nr.bset_u64s[t - b->set]     += delta;
+}
+
+#define btree_keys_account_key_add(_nr, _bset_idx, _k)         \
+       btree_keys_account_key(_nr, _bset_idx, _k, 1)
+#define btree_keys_account_key_drop(_nr, _bset_idx, _k)        \
+       btree_keys_account_key(_nr, _bset_idx, _k, -1)
+
+#define btree_account_key_add(_b, _k)                          \
+       btree_keys_account_key(&(_b)->nr,                       \
+               bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, 1)
+#define btree_account_key_drop(_b, _k)                         \
+       btree_keys_account_key(&(_b)->nr,                       \
+               bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, -1)
+
+struct bset_stats {
+       struct {
+               size_t nr, bytes;
+       } sets[BSET_TREE_NR_TYPES];
+
+       size_t floats;
+       size_t failed;
+};
+
+void bch2_btree_keys_stats(struct btree *, struct bset_stats *);
+void bch2_bfloat_to_text(struct printbuf *, struct btree *,
+                        struct bkey_packed *);
+
+/* Debug stuff */
+
+void bch2_dump_bset(struct bch_fs *, struct btree *, struct bset *, unsigned);
+void bch2_dump_btree_node(struct bch_fs *, struct btree *);
+void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *);
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+void __bch2_verify_btree_nr_keys(struct btree *);
+void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
+void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
+                           struct bkey_packed *, unsigned);
+
+#else
+
+static inline void __bch2_verify_btree_nr_keys(struct btree *b) {}
+static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
+                                             struct btree *b) {}
+static inline void bch2_verify_insert_pos(struct btree *b,
+                                         struct bkey_packed *where,
+                                         struct bkey_packed *insert,
+                                         unsigned clobber_u64s) {}
+#endif
+
+static inline void bch2_verify_btree_nr_keys(struct btree *b)
+{
+       if (btree_keys_expensive_checks(b))
+               __bch2_verify_btree_nr_keys(b);
+}
+
+#endif /* _BCACHEFS_BSET_H */
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
new file mode 100644 (file)
index 0000000..bb94fa2
--- /dev/null
@@ -0,0 +1,1063 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "btree_locking.h"
+#include "debug.h"
+
+#include <linux/prefetch.h>
+#include <linux/sched/mm.h>
+#include <trace/events/bcachefs.h>
+
+const char * const bch2_btree_ids[] = {
+#define x(kwd, val, name) name,
+       BCH_BTREE_IDS()
+#undef x
+       NULL
+};
+
+void bch2_recalc_btree_reserve(struct bch_fs *c)
+{
+       unsigned i, reserve = 16;
+
+       if (!c->btree_roots[0].b)
+               reserve += 8;
+
+       for (i = 0; i < BTREE_ID_NR; i++)
+               if (c->btree_roots[i].b)
+                       reserve += min_t(unsigned, 1,
+                                        c->btree_roots[i].b->c.level) * 8;
+
+       c->btree_cache.reserve = reserve;
+}
+
+static inline unsigned btree_cache_can_free(struct btree_cache *bc)
+{
+       return max_t(int, 0, bc->used - bc->reserve);
+}
+
+static void __btree_node_data_free(struct bch_fs *c, struct btree *b)
+{
+       EBUG_ON(btree_node_write_in_flight(b));
+
+       kvpfree(b->data, btree_bytes(c));
+       b->data = NULL;
+       vfree(b->aux_data);
+       b->aux_data = NULL;
+}
+
+static void btree_node_data_free(struct bch_fs *c, struct btree *b)
+{
+       struct btree_cache *bc = &c->btree_cache;
+
+       __btree_node_data_free(c, b);
+       bc->used--;
+       list_move(&b->list, &bc->freed);
+}
+
+static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
+                                  const void *obj)
+{
+       const struct btree *b = obj;
+       const u64 *v = arg->key;
+
+       return b->hash_val == *v ? 0 : 1;
+}
+
+static const struct rhashtable_params bch_btree_cache_params = {
+       .head_offset    = offsetof(struct btree, hash),
+       .key_offset     = offsetof(struct btree, hash_val),
+       .key_len        = sizeof(u64),
+       .obj_cmpfn      = bch2_btree_cache_cmp_fn,
+};
+
+static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
+{
+       BUG_ON(b->data || b->aux_data);
+
+       b->data = kvpmalloc(btree_bytes(c), gfp);
+       if (!b->data)
+               return -ENOMEM;
+
+       b->aux_data = vmalloc_exec(btree_aux_data_bytes(b), gfp);
+       if (!b->aux_data) {
+               kvpfree(b->data, btree_bytes(c));
+               b->data = NULL;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static struct btree *__btree_node_mem_alloc(struct bch_fs *c)
+{
+       struct btree *b = kzalloc(sizeof(struct btree), GFP_KERNEL);
+       if (!b)
+               return NULL;
+
+       bkey_btree_ptr_init(&b->key);
+       six_lock_init(&b->c.lock);
+       INIT_LIST_HEAD(&b->list);
+       INIT_LIST_HEAD(&b->write_blocked);
+       b->byte_order = ilog2(btree_bytes(c));
+       return b;
+}
+
+static struct btree *btree_node_mem_alloc(struct bch_fs *c)
+{
+       struct btree_cache *bc = &c->btree_cache;
+       struct btree *b = __btree_node_mem_alloc(c);
+       if (!b)
+               return NULL;
+
+       if (btree_node_data_alloc(c, b, GFP_KERNEL)) {
+               kfree(b);
+               return NULL;
+       }
+
+       bc->used++;
+       list_add(&b->list, &bc->freeable);
+       return b;
+}
+
+/* Btree in memory cache - hash table */
+
+void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
+{
+       rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
+
+       /* Cause future lookups for this node to fail: */
+       b->hash_val = 0;
+
+       six_lock_wakeup_all(&b->c.lock);
+}
+
+int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
+{
+       BUG_ON(b->hash_val);
+       b->hash_val = btree_ptr_hash_val(&b->key);
+
+       return rhashtable_lookup_insert_fast(&bc->table, &b->hash,
+                                            bch_btree_cache_params);
+}
+
+int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
+                               unsigned level, enum btree_id id)
+{
+       int ret;
+
+       b->c.level      = level;
+       b->c.btree_id   = id;
+
+       mutex_lock(&bc->lock);
+       ret = __bch2_btree_node_hash_insert(bc, b);
+       if (!ret)
+               list_add(&b->list, &bc->live);
+       mutex_unlock(&bc->lock);
+
+       return ret;
+}
+
+__flatten
+static inline struct btree *btree_cache_find(struct btree_cache *bc,
+                                    const struct bkey_i *k)
+{
+       u64 v = btree_ptr_hash_val(k);
+
+       return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params);
+}
+
+/*
+ * this version is for btree nodes that have already been freed (we're not
+ * reaping a real btree node)
+ */
+static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
+{
+       struct btree_cache *bc = &c->btree_cache;
+       int ret = 0;
+
+       lockdep_assert_held(&bc->lock);
+
+       if (!six_trylock_intent(&b->c.lock))
+               return -ENOMEM;
+
+       if (!six_trylock_write(&b->c.lock))
+               goto out_unlock_intent;
+
+       if (btree_node_noevict(b))
+               goto out_unlock;
+
+       if (!btree_node_may_write(b))
+               goto out_unlock;
+
+       if (btree_node_dirty(b) &&
+           test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
+               goto out_unlock;
+
+       if (btree_node_dirty(b) ||
+           btree_node_write_in_flight(b) ||
+           btree_node_read_in_flight(b)) {
+               if (!flush)
+                       goto out_unlock;
+
+               wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
+                              TASK_UNINTERRUPTIBLE);
+
+               /*
+                * Using the underscore version because we don't want to compact
+                * bsets after the write, since this node is about to be evicted
+                * - unless btree verify mode is enabled, since it runs out of
+                * the post write cleanup:
+                */
+               if (verify_btree_ondisk(c))
+                       bch2_btree_node_write(c, b, SIX_LOCK_intent);
+               else
+                       __bch2_btree_node_write(c, b, SIX_LOCK_read);
+
+               /* wait for any in flight btree write */
+               btree_node_wait_on_io(b);
+       }
+out:
+       if (b->hash_val && !ret)
+               trace_btree_node_reap(c, b);
+       return ret;
+out_unlock:
+       six_unlock_write(&b->c.lock);
+out_unlock_intent:
+       six_unlock_intent(&b->c.lock);
+       ret = -ENOMEM;
+       goto out;
+}
+
+static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
+{
+       return __btree_node_reclaim(c, b, false);
+}
+
+static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
+{
+       return __btree_node_reclaim(c, b, true);
+}
+
+static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
+                                          struct shrink_control *sc)
+{
+       struct bch_fs *c = container_of(shrink, struct bch_fs,
+                                       btree_cache.shrink);
+       struct btree_cache *bc = &c->btree_cache;
+       struct btree *b, *t;
+       unsigned long nr = sc->nr_to_scan;
+       unsigned long can_free;
+       unsigned long touched = 0;
+       unsigned long freed = 0;
+       unsigned i, flags;
+
+       if (btree_shrinker_disabled(c))
+               return SHRINK_STOP;
+
+       /* Return -1 if we can't do anything right now */
+       if (sc->gfp_mask & __GFP_FS)
+               mutex_lock(&bc->lock);
+       else if (!mutex_trylock(&bc->lock))
+               return -1;
+
+       flags = memalloc_nofs_save();
+
+       /*
+        * It's _really_ critical that we don't free too many btree nodes - we
+        * have to always leave ourselves a reserve. The reserve is how we
+        * guarantee that allocating memory for a new btree node can always
+        * succeed, so that inserting keys into the btree can always succeed and
+        * IO can always make forward progress:
+        */
+       nr /= btree_pages(c);
+       can_free = btree_cache_can_free(bc);
+       nr = min_t(unsigned long, nr, can_free);
+
+       i = 0;
+       list_for_each_entry_safe(b, t, &bc->freeable, list) {
+               touched++;
+
+               if (freed >= nr)
+                       break;
+
+               if (++i > 3 &&
+                   !btree_node_reclaim(c, b)) {
+                       btree_node_data_free(c, b);
+                       six_unlock_write(&b->c.lock);
+                       six_unlock_intent(&b->c.lock);
+                       freed++;
+               }
+       }
+restart:
+       list_for_each_entry_safe(b, t, &bc->live, list) {
+               touched++;
+
+               if (freed >= nr) {
+                       /* Save position */
+                       if (&t->list != &bc->live)
+                               list_move_tail(&bc->live, &t->list);
+                       break;
+               }
+
+               if (!btree_node_accessed(b) &&
+                   !btree_node_reclaim(c, b)) {
+                       /* can't call bch2_btree_node_hash_remove under lock  */
+                       freed++;
+                       if (&t->list != &bc->live)
+                               list_move_tail(&bc->live, &t->list);
+
+                       btree_node_data_free(c, b);
+                       mutex_unlock(&bc->lock);
+
+                       bch2_btree_node_hash_remove(bc, b);
+                       six_unlock_write(&b->c.lock);
+                       six_unlock_intent(&b->c.lock);
+
+                       if (freed >= nr)
+                               goto out;
+
+                       if (sc->gfp_mask & __GFP_FS)
+                               mutex_lock(&bc->lock);
+                       else if (!mutex_trylock(&bc->lock))
+                               goto out;
+                       goto restart;
+               } else
+                       clear_btree_node_accessed(b);
+       }
+
+       memalloc_nofs_restore(flags);
+       mutex_unlock(&bc->lock);
+out:
+       return (unsigned long) freed * btree_pages(c);
+}
+
+static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
+                                           struct shrink_control *sc)
+{
+       struct bch_fs *c = container_of(shrink, struct bch_fs,
+                                       btree_cache.shrink);
+       struct btree_cache *bc = &c->btree_cache;
+
+       if (btree_shrinker_disabled(c))
+               return 0;
+
+       return btree_cache_can_free(bc) * btree_pages(c);
+}
+
+void bch2_fs_btree_cache_exit(struct bch_fs *c)
+{
+       struct btree_cache *bc = &c->btree_cache;
+       struct btree *b;
+       unsigned i, flags;
+
+       if (bc->shrink.list.next)
+               unregister_shrinker(&bc->shrink);
+
+       /* vfree() can allocate memory: */
+       flags = memalloc_nofs_save();
+       mutex_lock(&bc->lock);
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+       if (c->verify_data)
+               list_move(&c->verify_data->list, &bc->live);
+
+       kvpfree(c->verify_ondisk, btree_bytes(c));
+#endif
+
+       for (i = 0; i < BTREE_ID_NR; i++)
+               if (c->btree_roots[i].b)
+                       list_add(&c->btree_roots[i].b->list, &bc->live);
+
+       list_splice(&bc->freeable, &bc->live);
+
+       while (!list_empty(&bc->live)) {
+               b = list_first_entry(&bc->live, struct btree, list);
+
+               BUG_ON(btree_node_read_in_flight(b) ||
+                      btree_node_write_in_flight(b));
+
+               if (btree_node_dirty(b))
+                       bch2_btree_complete_write(c, b, btree_current_write(b));
+               clear_btree_node_dirty(b);
+
+               btree_node_data_free(c, b);
+       }
+
+       while (!list_empty(&bc->freed)) {
+               b = list_first_entry(&bc->freed, struct btree, list);
+               list_del(&b->list);
+               kfree(b);
+       }
+
+       mutex_unlock(&bc->lock);
+       memalloc_nofs_restore(flags);
+
+       if (bc->table_init_done)
+               rhashtable_destroy(&bc->table);
+}
+
+int bch2_fs_btree_cache_init(struct bch_fs *c)
+{
+       struct btree_cache *bc = &c->btree_cache;
+       unsigned i;
+       int ret = 0;
+
+       pr_verbose_init(c->opts, "");
+
+       ret = rhashtable_init(&bc->table, &bch_btree_cache_params);
+       if (ret)
+               goto out;
+
+       bc->table_init_done = true;
+
+       bch2_recalc_btree_reserve(c);
+
+       for (i = 0; i < bc->reserve; i++)
+               if (!btree_node_mem_alloc(c)) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+       list_splice_init(&bc->live, &bc->freeable);
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+       mutex_init(&c->verify_lock);
+
+       c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
+       if (!c->verify_ondisk) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       c->verify_data = btree_node_mem_alloc(c);
+       if (!c->verify_data) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       list_del_init(&c->verify_data->list);
+#endif
+
+       bc->shrink.count_objects        = bch2_btree_cache_count;
+       bc->shrink.scan_objects         = bch2_btree_cache_scan;
+       bc->shrink.seeks                = 4;
+       bc->shrink.batch                = btree_pages(c) * 2;
+       register_shrinker(&bc->shrink);
+out:
+       pr_verbose_init(c->opts, "ret %i", ret);
+       return ret;
+}
+
+void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
+{
+       mutex_init(&bc->lock);
+       INIT_LIST_HEAD(&bc->live);
+       INIT_LIST_HEAD(&bc->freeable);
+       INIT_LIST_HEAD(&bc->freed);
+}
+
+/*
+ * We can only have one thread cannibalizing other cached btree nodes at a time,
+ * or we'll deadlock. We use an open coded mutex to ensure that, which a
+ * cannibalize_bucket() will take. This means every time we unlock the root of
+ * the btree, we need to release this lock if we have it held.
+ */
+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c)
+{
+       struct btree_cache *bc = &c->btree_cache;
+
+       if (bc->alloc_lock == current) {
+               trace_btree_node_cannibalize_unlock(c);
+               bc->alloc_lock = NULL;
+               closure_wake_up(&bc->alloc_wait);
+       }
+}
+
+int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
+{
+       struct btree_cache *bc = &c->btree_cache;
+       struct task_struct *old;
+
+       old = cmpxchg(&bc->alloc_lock, NULL, current);
+       if (old == NULL || old == current)
+               goto success;
+
+       if (!cl) {
+               trace_btree_node_cannibalize_lock_fail(c);
+               return -ENOMEM;
+       }
+
+       closure_wait(&bc->alloc_wait, cl);
+
+       /* Try again, after adding ourselves to waitlist */
+       old = cmpxchg(&bc->alloc_lock, NULL, current);
+       if (old == NULL || old == current) {
+               /* We raced */
+               closure_wake_up(&bc->alloc_wait);
+               goto success;
+       }
+
+       trace_btree_node_cannibalize_lock_fail(c);
+       return -EAGAIN;
+
+success:
+       trace_btree_node_cannibalize_lock(c);
+       return 0;
+}
+
+static struct btree *btree_node_cannibalize(struct bch_fs *c)
+{
+       struct btree_cache *bc = &c->btree_cache;
+       struct btree *b;
+
+       list_for_each_entry_reverse(b, &bc->live, list)
+               if (!btree_node_reclaim(c, b))
+                       return b;
+
+       while (1) {
+               list_for_each_entry_reverse(b, &bc->live, list)
+                       if (!btree_node_write_and_reclaim(c, b))
+                               return b;
+
+               /*
+                * Rare case: all nodes were intent-locked.
+                * Just busy-wait.
+                */
+               WARN_ONCE(1, "btree cache cannibalize failed\n");
+               cond_resched();
+       }
+}
+
+struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c)
+{
+       struct btree_cache *bc = &c->btree_cache;
+       struct btree *b;
+       u64 start_time = local_clock();
+       unsigned flags;
+
+       flags = memalloc_nofs_save();
+       mutex_lock(&bc->lock);
+
+       /*
+        * btree_free() doesn't free memory; it sticks the node on the end of
+        * the list. Check if there's any freed nodes there:
+        */
+       list_for_each_entry(b, &bc->freeable, list)
+               if (!btree_node_reclaim(c, b))
+                       goto got_node;
+
+       /*
+        * We never free struct btree itself, just the memory that holds the on
+        * disk node. Check the freed list before allocating a new one:
+        */
+       list_for_each_entry(b, &bc->freed, list)
+               if (!btree_node_reclaim(c, b))
+                       goto got_node;
+
+       b = NULL;
+got_node:
+       if (b)
+               list_del_init(&b->list);
+       mutex_unlock(&bc->lock);
+
+       if (!b) {
+               b = __btree_node_mem_alloc(c);
+               if (!b)
+                       goto err;
+
+               BUG_ON(!six_trylock_intent(&b->c.lock));
+               BUG_ON(!six_trylock_write(&b->c.lock));
+       }
+
+       if (!b->data) {
+               if (btree_node_data_alloc(c, b, __GFP_NOWARN|GFP_KERNEL))
+                       goto err;
+
+               mutex_lock(&bc->lock);
+               bc->used++;
+               mutex_unlock(&bc->lock);
+       }
+
+       BUG_ON(btree_node_hashed(b));
+       BUG_ON(btree_node_write_in_flight(b));
+out:
+       b->flags                = 0;
+       b->written              = 0;
+       b->nsets                = 0;
+       b->sib_u64s[0]          = 0;
+       b->sib_u64s[1]          = 0;
+       b->whiteout_u64s        = 0;
+       bch2_btree_keys_init(b, &c->expensive_debug_checks);
+
+       bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
+                              start_time);
+
+       memalloc_nofs_restore(flags);
+       return b;
+err:
+       mutex_lock(&bc->lock);
+
+       if (b) {
+               list_add(&b->list, &bc->freed);
+               six_unlock_write(&b->c.lock);
+               six_unlock_intent(&b->c.lock);
+       }
+
+       /* Try to cannibalize another cached btree node: */
+       if (bc->alloc_lock == current) {
+               b = btree_node_cannibalize(c);
+               list_del_init(&b->list);
+               mutex_unlock(&bc->lock);
+
+               bch2_btree_node_hash_remove(bc, b);
+
+               trace_btree_node_cannibalize(c);
+               goto out;
+       }
+
+       mutex_unlock(&bc->lock);
+       memalloc_nofs_restore(flags);
+       return ERR_PTR(-ENOMEM);
+}
+
+/* Slowpath, don't want it inlined into btree_iter_traverse() */
+static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
+                               struct btree_iter *iter,
+                               const struct bkey_i *k,
+                               enum btree_id btree_id,
+                               unsigned level,
+                               enum six_lock_type lock_type,
+                               bool sync)
+{
+       struct btree_cache *bc = &c->btree_cache;
+       struct btree *b;
+
+       BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
+       /*
+        * Parent node must be locked, else we could read in a btree node that's
+        * been freed:
+        */
+       if (iter && !bch2_btree_node_relock(iter, level + 1))
+               return ERR_PTR(-EINTR);
+
+       b = bch2_btree_node_mem_alloc(c);
+       if (IS_ERR(b))
+               return b;
+
+       bkey_copy(&b->key, k);
+       if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
+               /* raced with another fill: */
+
+               /* mark as unhashed... */
+               b->hash_val = 0;
+
+               mutex_lock(&bc->lock);
+               list_add(&b->list, &bc->freeable);
+               mutex_unlock(&bc->lock);
+
+               six_unlock_write(&b->c.lock);
+               six_unlock_intent(&b->c.lock);
+               return NULL;
+       }
+
+       /*
+        * Unlock before doing IO:
+        *
+        * XXX: ideally should be dropping all btree node locks here
+        */
+       if (iter && btree_node_read_locked(iter, level + 1))
+               btree_node_unlock(iter, level + 1);
+
+       bch2_btree_node_read(c, b, sync);
+
+       six_unlock_write(&b->c.lock);
+
+       if (!sync) {
+               six_unlock_intent(&b->c.lock);
+               return NULL;
+       }
+
+       if (lock_type == SIX_LOCK_read)
+               six_lock_downgrade(&b->c.lock);
+
+       return b;
+}
+
+static int lock_node_check_fn(struct six_lock *lock, void *p)
+{
+       struct btree *b = container_of(lock, struct btree, c.lock);
+       const struct bkey_i *k = p;
+
+       return b->hash_val == btree_ptr_hash_val(k) ? 0 : -1;
+}
+
+/**
+ * bch_btree_node_get - find a btree node in the cache and lock it, reading it
+ * in from disk if necessary.
+ *
+ * If IO is necessary and running under generic_make_request, returns -EAGAIN.
+ *
+ * The btree node will have either a read or a write lock held, depending on
+ * the @write parameter.
+ */
+struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
+                                 const struct bkey_i *k, unsigned level,
+                                 enum six_lock_type lock_type)
+{
+       struct btree_cache *bc = &c->btree_cache;
+       struct btree *b;
+       struct bset_tree *t;
+
+       EBUG_ON(level >= BTREE_MAX_DEPTH);
+
+       b = btree_node_mem_ptr(k);
+       if (b)
+               goto lock_node;
+retry:
+       b = btree_cache_find(bc, k);
+       if (unlikely(!b)) {
+               /*
+                * We must have the parent locked to call bch2_btree_node_fill(),
+                * else we could read in a btree node from disk that's been
+                * freed:
+                */
+               b = bch2_btree_node_fill(c, iter, k, iter->btree_id,
+                                        level, lock_type, true);
+
+               /* We raced and found the btree node in the cache */
+               if (!b)
+                       goto retry;
+
+               if (IS_ERR(b))
+                       return b;
+       } else {
+lock_node:
+               /*
+                * There's a potential deadlock with splits and insertions into
+                * interior nodes we have to avoid:
+                *
+                * The other thread might be holding an intent lock on the node
+                * we want, and they want to update its parent node so they're
+                * going to upgrade their intent lock on the parent node to a
+                * write lock.
+                *
+                * But if we're holding a read lock on the parent, and we're
+                * trying to get the intent lock they're holding, we deadlock.
+                *
+                * So to avoid this we drop the read locks on parent nodes when
+                * we're starting to take intent locks - and handle the race.
+                *
+                * The race is that they might be about to free the node we
+                * want, and dropping our read lock on the parent node lets them
+                * update the parent marking the node we want as freed, and then
+                * free it:
+                *
+                * To guard against this, btree nodes are evicted from the cache
+                * when they're freed - and b->hash_val is zeroed out, which we
+                * check for after we lock the node.
+                *
+                * Then, bch2_btree_node_relock() on the parent will fail - because
+                * the parent was modified, when the pointer to the node we want
+                * was removed - and we'll bail out:
+                */
+               if (btree_node_read_locked(iter, level + 1))
+                       btree_node_unlock(iter, level + 1);
+
+               if (!btree_node_lock(b, k->k.p, level, iter, lock_type,
+                                    lock_node_check_fn, (void *) k)) {
+                       if (b->hash_val != btree_ptr_hash_val(k))
+                               goto retry;
+                       return ERR_PTR(-EINTR);
+               }
+
+               if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
+                            b->c.level != level ||
+                            race_fault())) {
+                       six_unlock_type(&b->c.lock, lock_type);
+                       if (bch2_btree_node_relock(iter, level + 1))
+                               goto retry;
+
+                       trace_trans_restart_btree_node_reused(iter->trans->ip);
+                       return ERR_PTR(-EINTR);
+               }
+       }
+
+       /* XXX: waiting on IO with btree locks held: */
+       wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
+                      TASK_UNINTERRUPTIBLE);
+
+       prefetch(b->aux_data);
+
+       for_each_bset(b, t) {
+               void *p = (u64 *) b->aux_data + t->aux_data_offset;
+
+               prefetch(p + L1_CACHE_BYTES * 0);
+               prefetch(p + L1_CACHE_BYTES * 1);
+               prefetch(p + L1_CACHE_BYTES * 2);
+       }
+
+       /* avoid atomic set bit if it's not needed: */
+       if (!btree_node_accessed(b))
+               set_btree_node_accessed(b);
+
+       if (unlikely(btree_node_read_error(b))) {
+               six_unlock_type(&b->c.lock, lock_type);
+               return ERR_PTR(-EIO);
+       }
+
+       EBUG_ON(b->c.btree_id != iter->btree_id ||
+               BTREE_NODE_LEVEL(b->data) != level ||
+               bkey_cmp(b->data->max_key, k->k.p));
+
+       return b;
+}
+
+struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
+                                        const struct bkey_i *k,
+                                        enum btree_id btree_id,
+                                        unsigned level)
+{
+       struct btree_cache *bc = &c->btree_cache;
+       struct btree *b;
+       struct bset_tree *t;
+       int ret;
+
+       EBUG_ON(level >= BTREE_MAX_DEPTH);
+
+       b = btree_node_mem_ptr(k);
+       if (b)
+               goto lock_node;
+retry:
+       b = btree_cache_find(bc, k);
+       if (unlikely(!b)) {
+               b = bch2_btree_node_fill(c, NULL, k, btree_id,
+                                        level, SIX_LOCK_read, true);
+
+               /* We raced and found the btree node in the cache */
+               if (!b)
+                       goto retry;
+
+               if (IS_ERR(b))
+                       return b;
+       } else {
+lock_node:
+               ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k);
+               if (ret)
+                       goto retry;
+
+               if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
+                            b->c.btree_id != btree_id ||
+                            b->c.level != level)) {
+                       six_unlock_read(&b->c.lock);
+                       goto retry;
+               }
+       }
+
+       /* XXX: waiting on IO with btree locks held: */
+       wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
+                      TASK_UNINTERRUPTIBLE);
+
+       prefetch(b->aux_data);
+
+       for_each_bset(b, t) {
+               void *p = (u64 *) b->aux_data + t->aux_data_offset;
+
+               prefetch(p + L1_CACHE_BYTES * 0);
+               prefetch(p + L1_CACHE_BYTES * 1);
+               prefetch(p + L1_CACHE_BYTES * 2);
+       }
+
+       /* avoid atomic set bit if it's not needed: */
+       if (!btree_node_accessed(b))
+               set_btree_node_accessed(b);
+
+       if (unlikely(btree_node_read_error(b))) {
+               six_unlock_read(&b->c.lock);
+               return ERR_PTR(-EIO);
+       }
+
+       EBUG_ON(b->c.btree_id != btree_id ||
+               BTREE_NODE_LEVEL(b->data) != level ||
+               bkey_cmp(b->data->max_key, k->k.p));
+
+       return b;
+}
+
+struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
+                                         struct btree_iter *iter,
+                                         struct btree *b,
+                                         enum btree_node_sibling sib)
+{
+       struct btree_trans *trans = iter->trans;
+       struct btree *parent;
+       struct btree_node_iter node_iter;
+       struct bkey_packed *k;
+       BKEY_PADDED(k) tmp;
+       struct btree *ret = NULL;
+       unsigned level = b->c.level;
+
+       parent = btree_iter_node(iter, level + 1);
+       if (!parent)
+               return NULL;
+
+       /*
+        * There's a corner case where a btree_iter might have a node locked
+        * that is just outside its current pos - when
+        * bch2_btree_iter_set_pos_same_leaf() gets to the end of the node.
+        *
+        * But the lock ordering checks in __bch2_btree_node_lock() go off of
+        * iter->pos, not the node's key: so if the iterator is marked as
+        * needing to be traversed, we risk deadlock if we don't bail out here:
+        */
+       if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
+               return ERR_PTR(-EINTR);
+
+       if (!bch2_btree_node_relock(iter, level + 1)) {
+               ret = ERR_PTR(-EINTR);
+               goto out;
+       }
+
+       node_iter = iter->l[parent->c.level].iter;
+
+       k = bch2_btree_node_iter_peek_all(&node_iter, parent);
+       BUG_ON(bkey_cmp_left_packed(parent, k, &b->key.k.p));
+
+       k = sib == btree_prev_sib
+               ? bch2_btree_node_iter_prev(&node_iter, parent)
+               : (bch2_btree_node_iter_advance(&node_iter, parent),
+                  bch2_btree_node_iter_peek(&node_iter, parent));
+       if (!k)
+               goto out;
+
+       bch2_bkey_unpack(parent, &tmp.k, k);
+
+       ret = bch2_btree_node_get(c, iter, &tmp.k, level,
+                                 SIX_LOCK_intent);
+
+       if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
+               struct btree_iter *linked;
+
+               if (!bch2_btree_node_relock(iter, level + 1))
+                       goto out;
+
+               /*
+                * We might have got -EINTR because trylock failed, and we're
+                * holding other locks that would cause us to deadlock:
+                */
+               trans_for_each_iter(trans, linked)
+                       if (btree_iter_cmp(iter, linked) < 0)
+                               __bch2_btree_iter_unlock(linked);
+
+               if (sib == btree_prev_sib)
+                       btree_node_unlock(iter, level);
+
+               ret = bch2_btree_node_get(c, iter, &tmp.k, level,
+                                         SIX_LOCK_intent);
+
+               /*
+                * before btree_iter_relock() calls btree_iter_verify_locks():
+                */
+               if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
+                       btree_node_unlock(iter, level + 1);
+
+               if (!bch2_btree_node_relock(iter, level)) {
+                       btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
+
+                       if (!IS_ERR(ret)) {
+                               six_unlock_intent(&ret->c.lock);
+                               ret = ERR_PTR(-EINTR);
+                       }
+               }
+
+               bch2_trans_relock(trans);
+       }
+out:
+       if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
+               btree_node_unlock(iter, level + 1);
+
+       if (PTR_ERR_OR_ZERO(ret) == -EINTR)
+               bch2_btree_iter_upgrade(iter, level + 2);
+
+       BUG_ON(!IS_ERR(ret) && !btree_node_locked(iter, level));
+
+       if (!IS_ERR_OR_NULL(ret)) {
+               struct btree *n1 = ret, *n2 = b;
+
+               if (sib != btree_prev_sib)
+                       swap(n1, n2);
+
+               BUG_ON(bkey_cmp(bkey_successor(n1->key.k.p),
+                               n2->data->min_key));
+       }
+
+       bch2_btree_trans_verify_locks(trans);
+
+       return ret;
+}
+
+void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
+                             const struct bkey_i *k, unsigned level)
+{
+       struct btree_cache *bc = &c->btree_cache;
+       struct btree *b;
+
+       BUG_ON(!btree_node_locked(iter, level + 1));
+       BUG_ON(level >= BTREE_MAX_DEPTH);
+
+       b = btree_cache_find(bc, k);
+       if (b)
+               return;
+
+       bch2_btree_node_fill(c, iter, k, iter->btree_id,
+                            level, SIX_LOCK_read, false);
+}
+
+void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
+                            struct btree *b)
+{
+       const struct bkey_format *f = &b->format;
+       struct bset_stats stats;
+
+       memset(&stats, 0, sizeof(stats));
+
+       bch2_btree_keys_stats(b, &stats);
+
+       pr_buf(out,
+              "l %u %llu:%llu - %llu:%llu:\n"
+              "    ptrs: ",
+              b->c.level,
+              b->data->min_key.inode,
+              b->data->min_key.offset,
+              b->data->max_key.inode,
+              b->data->max_key.offset);
+       bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+       pr_buf(out, "\n"
+              "    format: u64s %u fields %u %u %u %u %u\n"
+              "    unpack fn len: %u\n"
+              "    bytes used %zu/%zu (%zu%% full)\n"
+              "    sib u64s: %u, %u (merge threshold %zu)\n"
+              "    nr packed keys %u\n"
+              "    nr unpacked keys %u\n"
+              "    floats %zu\n"
+              "    failed unpacked %zu\n",
+              f->key_u64s,
+              f->bits_per_field[0],
+              f->bits_per_field[1],
+              f->bits_per_field[2],
+              f->bits_per_field[3],
+              f->bits_per_field[4],
+              b->unpack_fn_len,
+              b->nr.live_u64s * sizeof(u64),
+              btree_bytes(c) - sizeof(struct btree_node),
+              b->nr.live_u64s * 100 / btree_max_u64s(c),
+              b->sib_u64s[0],
+              b->sib_u64s[1],
+              BTREE_FOREGROUND_MERGE_THRESHOLD(c),
+              b->nr.packed_keys,
+              b->nr.unpacked_keys,
+              stats.floats,
+              stats.failed);
+}
diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h
new file mode 100644 (file)
index 0000000..d0d3a85
--- /dev/null
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_CACHE_H
+#define _BCACHEFS_BTREE_CACHE_H
+
+#include "bcachefs.h"
+#include "btree_types.h"
+
+struct btree_iter;
+
+extern const char * const bch2_btree_ids[];
+
+void bch2_recalc_btree_reserve(struct bch_fs *);
+
+void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
+int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
+int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
+                               unsigned, enum btree_id);
+
+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *);
+int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
+
+struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
+
+struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
+                                 const struct bkey_i *, unsigned,
+                                 enum six_lock_type);
+
+struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
+                                        enum btree_id, unsigned);
+
+struct btree *bch2_btree_node_get_sibling(struct bch_fs *, struct btree_iter *,
+                               struct btree *, enum btree_node_sibling);
+
+void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
+                             const struct bkey_i *, unsigned);
+
+void bch2_fs_btree_cache_exit(struct bch_fs *);
+int bch2_fs_btree_cache_init(struct bch_fs *);
+void bch2_fs_btree_cache_init_early(struct btree_cache *);
+
+static inline u64 btree_ptr_hash_val(const struct bkey_i *k)
+{
+       switch (k->k.type) {
+       case KEY_TYPE_btree_ptr:
+               return *((u64 *) bkey_i_to_btree_ptr_c(k)->v.start);
+       case KEY_TYPE_btree_ptr_v2:
+               return bkey_i_to_btree_ptr_v2_c(k)->v.seq;
+       default:
+               return 0;
+       }
+}
+
+static inline struct btree *btree_node_mem_ptr(const struct bkey_i *k)
+{
+       return k->k.type == KEY_TYPE_btree_ptr_v2
+               ? (void *)(unsigned long)bkey_i_to_btree_ptr_v2_c(k)->v.mem_ptr
+               : NULL;
+}
+
+/* is btree node in hash table? */
+static inline bool btree_node_hashed(struct btree *b)
+{
+       return b->hash_val != 0;
+}
+
+#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos)               \
+       for ((_tbl) = rht_dereference_rcu((_c)->btree_cache.table.tbl,  \
+                                         &(_c)->btree_cache.table),    \
+            _iter = 0; _iter < (_tbl)->size; _iter++)                  \
+               rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash)
+
+static inline size_t btree_bytes(struct bch_fs *c)
+{
+       return c->opts.btree_node_size << 9;
+}
+
+static inline size_t btree_max_u64s(struct bch_fs *c)
+{
+       return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64);
+}
+
+static inline size_t btree_pages(struct bch_fs *c)
+{
+       return btree_bytes(c) / PAGE_SIZE;
+}
+
+static inline unsigned btree_blocks(struct bch_fs *c)
+{
+       return c->opts.btree_node_size >> c->block_bits;
+}
+
+#define BTREE_SPLIT_THRESHOLD(c)               (btree_max_u64s(c) * 2 / 3)
+
+#define BTREE_FOREGROUND_MERGE_THRESHOLD(c)    (btree_max_u64s(c) * 1 / 3)
+#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c)                   \
+       (BTREE_FOREGROUND_MERGE_THRESHOLD(c) +                  \
+        (BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2))
+
+#define btree_node_root(_c, _b)        ((_c)->btree_roots[(_b)->c.btree_id].b)
+
+void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *,
+                            struct btree *);
+
+#endif /* _BCACHEFS_BTREE_CACHE_H */
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
new file mode 100644 (file)
index 0000000..e8c1e75
--- /dev/null
@@ -0,0 +1,1438 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright (C) 2014 Datera Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "bkey_methods.h"
+#include "bkey_on_stack.h"
+#include "btree_locking.h"
+#include "btree_update_interior.h"
+#include "btree_io.h"
+#include "btree_gc.h"
+#include "buckets.h"
+#include "clock.h"
+#include "debug.h"
+#include "ec.h"
+#include "error.h"
+#include "extents.h"
+#include "journal.h"
+#include "keylist.h"
+#include "move.h"
+#include "recovery.h"
+#include "replicas.h"
+#include "super-io.h"
+
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate.h>
+#include <linux/sched/task.h>
+#include <trace/events/bcachefs.h>
+
+static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
+{
+       preempt_disable();
+       write_seqcount_begin(&c->gc_pos_lock);
+       c->gc_pos = new_pos;
+       write_seqcount_end(&c->gc_pos_lock);
+       preempt_enable();
+}
+
+static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
+{
+       BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
+       __gc_pos_set(c, new_pos);
+}
+
+static int bch2_gc_check_topology(struct bch_fs *c,
+                                 struct bkey_s_c k,
+                                 struct bpos *expected_start,
+                                 struct bpos expected_end,
+                                 bool is_last)
+{
+       int ret = 0;
+
+       if (k.k->type == KEY_TYPE_btree_ptr_v2) {
+               struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
+
+               if (fsck_err_on(bkey_cmp(*expected_start, bp.v->min_key), c,
+                               "btree node with incorrect min_key: got %llu:%llu, should be %llu:%llu",
+                               bp.v->min_key.inode,
+                               bp.v->min_key.offset,
+                               expected_start->inode,
+                               expected_start->offset)) {
+                       BUG();
+               }
+       }
+
+       *expected_start = bkey_cmp(k.k->p, POS_MAX)
+               ? bkey_successor(k.k->p)
+               : k.k->p;
+
+       if (fsck_err_on(is_last &&
+                       bkey_cmp(k.k->p, expected_end), c,
+                       "btree node with incorrect max_key: got %llu:%llu, should be %llu:%llu",
+                       k.k->p.inode,
+                       k.k->p.offset,
+                       expected_end.inode,
+                       expected_end.offset)) {
+               BUG();
+       }
+fsck_err:
+       return ret;
+}
+
+/* marking of btree keys/nodes: */
+
+static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
+                           u8 *max_stale, bool initial)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
+       unsigned flags =
+               BTREE_TRIGGER_GC|
+               (initial ? BTREE_TRIGGER_NOATOMIC : 0);
+       int ret = 0;
+
+       if (initial) {
+               BUG_ON(journal_seq_verify(c) &&
+                      k.k->version.lo > journal_cur_seq(&c->journal));
+
+               /* XXX change to fsck check */
+               if (fsck_err_on(k.k->version.lo > atomic64_read(&c->key_version), c,
+                               "key version number higher than recorded: %llu > %llu",
+                               k.k->version.lo,
+                               atomic64_read(&c->key_version)))
+                       atomic64_set(&c->key_version, k.k->version.lo);
+
+               if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
+                   fsck_err_on(!bch2_bkey_replicas_marked(c, k), c,
+                               "superblock not marked as containing replicas (type %u)",
+                               k.k->type)) {
+                       ret = bch2_mark_bkey_replicas(c, k);
+                       if (ret)
+                               return ret;
+               }
+
+               bkey_for_each_ptr(ptrs, ptr) {
+                       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+                       struct bucket *g = PTR_BUCKET(ca, ptr, true);
+                       struct bucket *g2 = PTR_BUCKET(ca, ptr, false);
+
+                       if (mustfix_fsck_err_on(!g->gen_valid, c,
+                                       "bucket %u:%zu data type %s ptr gen %u missing in alloc btree",
+                                       ptr->dev, PTR_BUCKET_NR(ca, ptr),
+                                       bch2_data_types[ptr_data_type(k.k, ptr)],
+                                       ptr->gen)) {
+                               g2->_mark.gen   = g->_mark.gen          = ptr->gen;
+                               g2->gen_valid   = g->gen_valid          = true;
+                       }
+
+                       if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
+                                       "bucket %u:%zu data type %s ptr gen in the future: %u > %u",
+                                       ptr->dev, PTR_BUCKET_NR(ca, ptr),
+                                       bch2_data_types[ptr_data_type(k.k, ptr)],
+                                       ptr->gen, g->mark.gen)) {
+                               g2->_mark.gen   = g->_mark.gen          = ptr->gen;
+                               g2->gen_valid   = g->gen_valid          = true;
+                               g2->_mark.data_type             = 0;
+                               g2->_mark.dirty_sectors         = 0;
+                               g2->_mark.cached_sectors        = 0;
+                               set_bit(BCH_FS_FIXED_GENS, &c->flags);
+                       }
+               }
+       }
+
+       bkey_for_each_ptr(ptrs, ptr) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+               struct bucket *g = PTR_BUCKET(ca, ptr, true);
+
+               if (gen_after(g->oldest_gen, ptr->gen))
+                       g->oldest_gen = ptr->gen;
+
+               *max_stale = max(*max_stale, ptr_stale(ca, ptr));
+       }
+
+       bch2_mark_key(c, k, 0, k.k->size, NULL, 0, flags);
+fsck_err:
+       return ret;
+}
+
+static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale,
+                             bool initial)
+{
+       struct bpos next_node_start = b->data->min_key;
+       struct btree_node_iter iter;
+       struct bkey unpacked;
+       struct bkey_s_c k;
+       int ret = 0;
+
+       *max_stale = 0;
+
+       if (!btree_node_type_needs_gc(btree_node_type(b)))
+               return 0;
+
+       bch2_btree_node_iter_init_from_start(&iter, b);
+
+       while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
+               bch2_bkey_debugcheck(c, b, k);
+
+               ret = bch2_gc_mark_key(c, k, max_stale, initial);
+               if (ret)
+                       break;
+
+               bch2_btree_node_iter_advance(&iter, b);
+
+               if (b->c.level) {
+                       ret = bch2_gc_check_topology(c, k,
+                                       &next_node_start,
+                                       b->data->max_key,
+                                       bch2_btree_node_iter_end(&iter));
+                       if (ret)
+                               break;
+               }
+       }
+
+       return ret;
+}
+
+static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
+                        bool initial, bool metadata_only)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct btree *b;
+       unsigned depth = metadata_only                  ? 1
+               : expensive_debug_checks(c)             ? 0
+               : !btree_node_type_needs_gc(btree_id)   ? 1
+               : 0;
+       u8 max_stale = 0;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
+
+       __for_each_btree_node(&trans, iter, btree_id, POS_MIN,
+                             0, depth, BTREE_ITER_PREFETCH, b) {
+               bch2_verify_btree_nr_keys(b);
+
+               gc_pos_set(c, gc_pos_btree_node(b));
+
+               ret = btree_gc_mark_node(c, b, &max_stale, initial);
+               if (ret)
+                       break;
+
+               if (!initial) {
+                       if (max_stale > 64)
+                               bch2_btree_node_rewrite(c, iter,
+                                               b->data->keys.seq,
+                                               BTREE_INSERT_USE_RESERVE|
+                                               BTREE_INSERT_NOWAIT|
+                                               BTREE_INSERT_GC_LOCK_HELD);
+                       else if (!btree_gc_rewrite_disabled(c) &&
+                                (btree_gc_always_rewrite(c) || max_stale > 16))
+                               bch2_btree_node_rewrite(c, iter,
+                                               b->data->keys.seq,
+                                               BTREE_INSERT_NOWAIT|
+                                               BTREE_INSERT_GC_LOCK_HELD);
+               }
+
+               bch2_trans_cond_resched(&trans);
+       }
+       ret = bch2_trans_exit(&trans) ?: ret;
+       if (ret)
+               return ret;
+
+       mutex_lock(&c->btree_root_lock);
+       b = c->btree_roots[btree_id].b;
+       if (!btree_node_fake(b))
+               ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
+                                      &max_stale, initial);
+       gc_pos_set(c, gc_pos_btree_root(b->c.btree_id));
+       mutex_unlock(&c->btree_root_lock);
+
+       return ret;
+}
+
+static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
+                                     struct journal_keys *journal_keys,
+                                     unsigned target_depth)
+{
+       struct btree_and_journal_iter iter;
+       struct bkey_s_c k;
+       struct bpos next_node_start = b->data->min_key;
+       u8 max_stale = 0;
+       int ret = 0;
+
+       bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
+
+       while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
+               bch2_bkey_debugcheck(c, b, k);
+
+               BUG_ON(bkey_cmp(k.k->p, b->data->min_key) < 0);
+               BUG_ON(bkey_cmp(k.k->p, b->data->max_key) > 0);
+
+               ret = bch2_gc_mark_key(c, k, &max_stale, true);
+               if (ret)
+                       break;
+
+               if (b->c.level) {
+                       struct btree *child;
+                       BKEY_PADDED(k) tmp;
+
+                       bkey_reassemble(&tmp.k, k);
+                       k = bkey_i_to_s_c(&tmp.k);
+
+                       bch2_btree_and_journal_iter_advance(&iter);
+
+                       ret = bch2_gc_check_topology(c, k,
+                                       &next_node_start,
+                                       b->data->max_key,
+                                       !bch2_btree_and_journal_iter_peek(&iter).k);
+                       if (ret)
+                               break;
+
+                       if (b->c.level > target_depth) {
+                               child = bch2_btree_node_get_noiter(c, &tmp.k,
+                                                       b->c.btree_id, b->c.level - 1);
+                               ret = PTR_ERR_OR_ZERO(child);
+                               if (ret)
+                                       break;
+
+                               ret = bch2_gc_btree_init_recurse(c, child,
+                                               journal_keys, target_depth);
+                               six_unlock_read(&child->c.lock);
+
+                               if (ret)
+                                       break;
+                       }
+               } else {
+                       bch2_btree_and_journal_iter_advance(&iter);
+               }
+       }
+
+       return ret;
+}
+
+static int bch2_gc_btree_init(struct bch_fs *c,
+                             struct journal_keys *journal_keys,
+                             enum btree_id btree_id,
+                             bool metadata_only)
+{
+       struct btree *b;
+       unsigned target_depth = metadata_only           ? 1
+               : expensive_debug_checks(c)             ? 0
+               : !btree_node_type_needs_gc(btree_id)   ? 1
+               : 0;
+       u8 max_stale = 0;
+       int ret = 0;
+
+       b = c->btree_roots[btree_id].b;
+
+       if (btree_node_fake(b))
+               return 0;
+
+       six_lock_read(&b->c.lock, NULL, NULL);
+       if (fsck_err_on(bkey_cmp(b->data->min_key, POS_MIN), c,
+                       "btree root with incorrect min_key: %llu:%llu",
+                       b->data->min_key.inode,
+                       b->data->min_key.offset)) {
+               BUG();
+       }
+
+       if (fsck_err_on(bkey_cmp(b->data->max_key, POS_MAX), c,
+                       "btree root with incorrect min_key: %llu:%llu",
+                       b->data->max_key.inode,
+                       b->data->max_key.offset)) {
+               BUG();
+       }
+
+       if (b->c.level >= target_depth)
+               ret = bch2_gc_btree_init_recurse(c, b,
+                                       journal_keys, target_depth);
+
+       if (!ret)
+               ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
+                                      &max_stale, true);
+fsck_err:
+       six_unlock_read(&b->c.lock);
+
+       return ret;
+}
+
+static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
+{
+       return  (int) btree_id_to_gc_phase(l) -
+               (int) btree_id_to_gc_phase(r);
+}
+
+static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys,
+                         bool initial, bool metadata_only)
+{
+       enum btree_id ids[BTREE_ID_NR];
+       unsigned i;
+
+       for (i = 0; i < BTREE_ID_NR; i++)
+               ids[i] = i;
+       bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
+
+       for (i = 0; i < BTREE_ID_NR; i++) {
+               enum btree_id id = ids[i];
+               int ret = initial
+                       ? bch2_gc_btree_init(c, journal_keys,
+                                            id, metadata_only)
+                       : bch2_gc_btree(c, id, initial, metadata_only);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
+                                 u64 start, u64 end,
+                                 enum bch_data_type type,
+                                 unsigned flags)
+{
+       u64 b = sector_to_bucket(ca, start);
+
+       do {
+               unsigned sectors =
+                       min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
+
+               bch2_mark_metadata_bucket(c, ca, b, type, sectors,
+                                         gc_phase(GC_PHASE_SB), flags);
+               b++;
+               start += sectors;
+       } while (start < end);
+}
+
+void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
+                             unsigned flags)
+{
+       struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
+       unsigned i;
+       u64 b;
+
+       /*
+        * This conditional is kind of gross, but we may be called from the
+        * device add path, before the new device has actually been added to the
+        * running filesystem:
+        */
+       if (c) {
+               lockdep_assert_held(&c->sb_lock);
+               percpu_down_read(&c->mark_lock);
+       }
+
+       for (i = 0; i < layout->nr_superblocks; i++) {
+               u64 offset = le64_to_cpu(layout->sb_offset[i]);
+
+               if (offset == BCH_SB_SECTOR)
+                       mark_metadata_sectors(c, ca, 0, BCH_SB_SECTOR,
+                                             BCH_DATA_sb, flags);
+
+               mark_metadata_sectors(c, ca, offset,
+                                     offset + (1 << layout->sb_max_size_bits),
+                                     BCH_DATA_sb, flags);
+       }
+
+       for (i = 0; i < ca->journal.nr; i++) {
+               b = ca->journal.buckets[i];
+               bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_journal,
+                                         ca->mi.bucket_size,
+                                         gc_phase(GC_PHASE_SB), flags);
+       }
+
+       if (c)
+               percpu_up_read(&c->mark_lock);
+}
+
+static void bch2_mark_superblocks(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       unsigned i;
+
+       mutex_lock(&c->sb_lock);
+       gc_pos_set(c, gc_phase(GC_PHASE_SB));
+
+       for_each_online_member(ca, c, i)
+               bch2_mark_dev_superblock(c, ca, BTREE_TRIGGER_GC);
+       mutex_unlock(&c->sb_lock);
+}
+
+#if 0
+/* Also see bch2_pending_btree_node_free_insert_done() */
+static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
+{
+       struct btree_update *as;
+       struct pending_btree_node_free *d;
+
+       mutex_lock(&c->btree_interior_update_lock);
+       gc_pos_set(c, gc_phase(GC_PHASE_PENDING_DELETE));
+
+       for_each_pending_btree_node_free(c, as, d)
+               if (d->index_update_done)
+                       bch2_mark_key(c, bkey_i_to_s_c(&d->key),
+                                     0, 0, NULL, 0,
+                                     BTREE_TRIGGER_GC);
+
+       mutex_unlock(&c->btree_interior_update_lock);
+}
+#endif
+
+static void bch2_mark_allocator_buckets(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       struct open_bucket *ob;
+       size_t i, j, iter;
+       unsigned ci;
+
+       percpu_down_read(&c->mark_lock);
+
+       spin_lock(&c->freelist_lock);
+       gc_pos_set(c, gc_pos_alloc(c, NULL));
+
+       for_each_member_device(ca, c, ci) {
+               fifo_for_each_entry(i, &ca->free_inc, iter)
+                       bch2_mark_alloc_bucket(c, ca, i, true,
+                                              gc_pos_alloc(c, NULL),
+                                              BTREE_TRIGGER_GC);
+
+
+
+               for (j = 0; j < RESERVE_NR; j++)
+                       fifo_for_each_entry(i, &ca->free[j], iter)
+                               bch2_mark_alloc_bucket(c, ca, i, true,
+                                                      gc_pos_alloc(c, NULL),
+                                                      BTREE_TRIGGER_GC);
+       }
+
+       spin_unlock(&c->freelist_lock);
+
+       for (ob = c->open_buckets;
+            ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
+            ob++) {
+               spin_lock(&ob->lock);
+               if (ob->valid) {
+                       gc_pos_set(c, gc_pos_alloc(c, ob));
+                       ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+                       bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr), true,
+                                              gc_pos_alloc(c, ob),
+                                              BTREE_TRIGGER_GC);
+               }
+               spin_unlock(&ob->lock);
+       }
+
+       percpu_up_read(&c->mark_lock);
+}
+
+static void bch2_gc_free(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       unsigned i;
+
+       genradix_free(&c->stripes[1]);
+
+       for_each_member_device(ca, c, i) {
+               kvpfree(rcu_dereference_protected(ca->buckets[1], 1),
+                       sizeof(struct bucket_array) +
+                       ca->mi.nbuckets * sizeof(struct bucket));
+               ca->buckets[1] = NULL;
+
+               free_percpu(ca->usage[1]);
+               ca->usage[1] = NULL;
+       }
+
+       free_percpu(c->usage_gc);
+       c->usage_gc = NULL;
+}
+
+static int bch2_gc_done(struct bch_fs *c,
+                       bool initial, bool metadata_only)
+{
+       struct bch_dev *ca;
+       bool verify = !metadata_only &&
+               (!initial ||
+                (c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)));
+       unsigned i;
+       int ret = 0;
+
+#define copy_field(_f, _msg, ...)                                      \
+       if (dst->_f != src->_f) {                                       \
+               if (verify)                                             \
+                       fsck_err(c, _msg ": got %llu, should be %llu"   \
+                               , ##__VA_ARGS__, dst->_f, src->_f);     \
+               dst->_f = src->_f;                                      \
+               ret = 1;                                                \
+       }
+#define copy_stripe_field(_f, _msg, ...)                               \
+       if (dst->_f != src->_f) {                                       \
+               if (verify)                                             \
+                       fsck_err(c, "stripe %zu has wrong "_msg         \
+                               ": got %u, should be %u",               \
+                               dst_iter.pos, ##__VA_ARGS__,            \
+                               dst->_f, src->_f);                      \
+               dst->_f = src->_f;                                      \
+               dst->dirty = true;                                      \
+               ret = 1;                                                \
+       }
+#define copy_bucket_field(_f)                                          \
+       if (dst->b[b].mark._f != src->b[b].mark._f) {                   \
+               if (verify)                                             \
+                       fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f  \
+                               ": got %u, should be %u", i, b,         \
+                               dst->b[b].mark.gen,                     \
+                               bch2_data_types[dst->b[b].mark.data_type],\
+                               dst->b[b].mark._f, src->b[b].mark._f);  \
+               dst->b[b]._mark._f = src->b[b].mark._f;                 \
+               ret = 1;                                                \
+       }
+#define copy_dev_field(_f, _msg, ...)                                  \
+       copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
+#define copy_fs_field(_f, _msg, ...)                                   \
+       copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
+
+       if (!metadata_only) {
+               struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
+               struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
+               struct stripe *dst, *src;
+               unsigned i;
+
+               c->ec_stripes_heap.used = 0;
+
+               while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
+                      (src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
+                       BUG_ON(src_iter.pos != dst_iter.pos);
+
+                       copy_stripe_field(alive,        "alive");
+                       copy_stripe_field(sectors,      "sectors");
+                       copy_stripe_field(algorithm,    "algorithm");
+                       copy_stripe_field(nr_blocks,    "nr_blocks");
+                       copy_stripe_field(nr_redundant, "nr_redundant");
+                       copy_stripe_field(blocks_nonempty,
+                                         "blocks_nonempty");
+
+                       for (i = 0; i < ARRAY_SIZE(dst->block_sectors); i++)
+                               copy_stripe_field(block_sectors[i],
+                                                 "block_sectors[%u]", i);
+
+                       if (dst->alive) {
+                               spin_lock(&c->ec_stripes_heap_lock);
+                               bch2_stripes_heap_insert(c, dst, dst_iter.pos);
+                               spin_unlock(&c->ec_stripes_heap_lock);
+                       }
+
+                       genradix_iter_advance(&dst_iter, &c->stripes[0]);
+                       genradix_iter_advance(&src_iter, &c->stripes[1]);
+               }
+       }
+
+       for_each_member_device(ca, c, i) {
+               struct bucket_array *dst = __bucket_array(ca, 0);
+               struct bucket_array *src = __bucket_array(ca, 1);
+               size_t b;
+
+               for (b = 0; b < src->nbuckets; b++) {
+                       copy_bucket_field(gen);
+                       copy_bucket_field(data_type);
+                       copy_bucket_field(owned_by_allocator);
+                       copy_bucket_field(stripe);
+                       copy_bucket_field(dirty_sectors);
+                       copy_bucket_field(cached_sectors);
+
+                       dst->b[b].oldest_gen = src->b[b].oldest_gen;
+               }
+       };
+
+       bch2_fs_usage_acc_to_base(c, 0);
+       bch2_fs_usage_acc_to_base(c, 1);
+
+       bch2_dev_usage_from_buckets(c);
+
+       {
+               unsigned nr = fs_usage_u64s(c);
+               struct bch_fs_usage *dst = c->usage_base;
+               struct bch_fs_usage *src = (void *)
+                       bch2_acc_percpu_u64s((void *) c->usage_gc, nr);
+
+               copy_fs_field(hidden,           "hidden");
+               copy_fs_field(btree,            "btree");
+
+               if (!metadata_only) {
+                       copy_fs_field(data,     "data");
+                       copy_fs_field(cached,   "cached");
+                       copy_fs_field(reserved, "reserved");
+                       copy_fs_field(nr_inodes,"nr_inodes");
+
+                       for (i = 0; i < BCH_REPLICAS_MAX; i++)
+                               copy_fs_field(persistent_reserved[i],
+                                             "persistent_reserved[%i]", i);
+               }
+
+               for (i = 0; i < c->replicas.nr; i++) {
+                       struct bch_replicas_entry *e =
+                               cpu_replicas_entry(&c->replicas, i);
+                       char buf[80];
+
+                       if (metadata_only &&
+                           (e->data_type == BCH_DATA_user ||
+                            e->data_type == BCH_DATA_cached))
+                               continue;
+
+                       bch2_replicas_entry_to_text(&PBUF(buf), e);
+
+                       copy_fs_field(replicas[i], "%s", buf);
+               }
+       }
+
+#undef copy_fs_field
+#undef copy_dev_field
+#undef copy_bucket_field
+#undef copy_stripe_field
+#undef copy_field
+fsck_err:
+       return ret;
+}
+
+static int bch2_gc_start(struct bch_fs *c,
+                        bool metadata_only)
+{
+       struct bch_dev *ca;
+       unsigned i;
+       int ret;
+
+       BUG_ON(c->usage_gc);
+
+       c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
+                                        sizeof(u64), GFP_KERNEL);
+       if (!c->usage_gc) {
+               bch_err(c, "error allocating c->usage_gc");
+               return -ENOMEM;
+       }
+
+       for_each_member_device(ca, c, i) {
+               BUG_ON(ca->buckets[1]);
+               BUG_ON(ca->usage[1]);
+
+               ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) +
+                               ca->mi.nbuckets * sizeof(struct bucket),
+                               GFP_KERNEL|__GFP_ZERO);
+               if (!ca->buckets[1]) {
+                       percpu_ref_put(&ca->ref);
+                       bch_err(c, "error allocating ca->buckets[gc]");
+                       return -ENOMEM;
+               }
+
+               ca->usage[1] = alloc_percpu(struct bch_dev_usage);
+               if (!ca->usage[1]) {
+                       bch_err(c, "error allocating ca->usage[gc]");
+                       percpu_ref_put(&ca->ref);
+                       return -ENOMEM;
+               }
+       }
+
+       ret = bch2_ec_mem_alloc(c, true);
+       if (ret) {
+               bch_err(c, "error allocating ec gc mem");
+               return ret;
+       }
+
+       percpu_down_write(&c->mark_lock);
+
+       /*
+        * indicate to stripe code that we need to allocate for the gc stripes
+        * radix tree, too
+        */
+       gc_pos_set(c, gc_phase(GC_PHASE_START));
+
+       for_each_member_device(ca, c, i) {
+               struct bucket_array *dst = __bucket_array(ca, 1);
+               struct bucket_array *src = __bucket_array(ca, 0);
+               size_t b;
+
+               dst->first_bucket       = src->first_bucket;
+               dst->nbuckets           = src->nbuckets;
+
+               for (b = 0; b < src->nbuckets; b++) {
+                       struct bucket *d = &dst->b[b];
+                       struct bucket *s = &src->b[b];
+
+                       d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
+                       d->gen_valid = s->gen_valid;
+
+                       if (metadata_only &&
+                           (s->mark.data_type == BCH_DATA_user ||
+                            s->mark.data_type == BCH_DATA_cached)) {
+                               d->_mark = s->mark;
+                               d->_mark.owned_by_allocator = 0;
+                       }
+               }
+       };
+
+       percpu_up_write(&c->mark_lock);
+
+       return 0;
+}
+
+/**
+ * bch2_gc - walk _all_ references to buckets, and recompute them:
+ *
+ * Order matters here:
+ *  - Concurrent GC relies on the fact that we have a total ordering for
+ *    everything that GC walks - see  gc_will_visit_node(),
+ *    gc_will_visit_root()
+ *
+ *  - also, references move around in the course of index updates and
+ *    various other crap: everything needs to agree on the ordering
+ *    references are allowed to move around in - e.g., we're allowed to
+ *    start with a reference owned by an open_bucket (the allocator) and
+ *    move it to the btree, but not the reverse.
+ *
+ *    This is necessary to ensure that gc doesn't miss references that
+ *    move around - if references move backwards in the ordering GC
+ *    uses, GC could skip past them
+ */
+int bch2_gc(struct bch_fs *c, struct journal_keys *journal_keys,
+           bool initial, bool metadata_only)
+{
+       struct bch_dev *ca;
+       u64 start_time = local_clock();
+       unsigned i, iter = 0;
+       int ret;
+
+       lockdep_assert_held(&c->state_lock);
+       trace_gc_start(c);
+
+       down_write(&c->gc_lock);
+
+       /* flush interior btree updates: */
+       closure_wait_event(&c->btree_interior_update_wait,
+                          !bch2_btree_interior_updates_nr_pending(c));
+again:
+       ret = bch2_gc_start(c, metadata_only);
+       if (ret)
+               goto out;
+
+       bch2_mark_superblocks(c);
+
+       ret = bch2_gc_btrees(c, journal_keys, initial, metadata_only);
+       if (ret)
+               goto out;
+
+#if 0
+       bch2_mark_pending_btree_node_frees(c);
+#endif
+       bch2_mark_allocator_buckets(c);
+
+       c->gc_count++;
+out:
+       if (!ret &&
+           (test_bit(BCH_FS_FIXED_GENS, &c->flags) ||
+            (!iter && test_restart_gc(c)))) {
+               /*
+                * XXX: make sure gens we fixed got saved
+                */
+               if (iter++ <= 2) {
+                       bch_info(c, "Fixed gens, restarting mark and sweep:");
+                       clear_bit(BCH_FS_FIXED_GENS, &c->flags);
+                       __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
+
+                       percpu_down_write(&c->mark_lock);
+                       bch2_gc_free(c);
+                       percpu_up_write(&c->mark_lock);
+                       /* flush fsck errors, reset counters */
+                       bch2_flush_fsck_errs(c);
+
+                       goto again;
+               }
+
+               bch_info(c, "Unable to fix bucket gens, looping");
+               ret = -EINVAL;
+       }
+
+       if (!ret) {
+               bch2_journal_block(&c->journal);
+
+               percpu_down_write(&c->mark_lock);
+               ret = bch2_gc_done(c, initial, metadata_only);
+
+               bch2_journal_unblock(&c->journal);
+       } else {
+               percpu_down_write(&c->mark_lock);
+       }
+
+       /* Indicates that gc is no longer in progress: */
+       __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
+
+       bch2_gc_free(c);
+       percpu_up_write(&c->mark_lock);
+
+       up_write(&c->gc_lock);
+
+       trace_gc_end(c);
+       bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
+
+       /*
+        * Wake up allocator in case it was waiting for buckets
+        * because of not being able to inc gens
+        */
+       for_each_member_device(ca, c, i)
+               bch2_wake_allocator(ca);
+
+       /*
+        * At startup, allocations can happen directly instead of via the
+        * allocator thread - issue wakeup in case they blocked on gc_lock:
+        */
+       closure_wake_up(&c->freelist_wait);
+       return ret;
+}
+
+static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
+
+       percpu_down_read(&c->mark_lock);
+       bkey_for_each_ptr(ptrs, ptr) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+               struct bucket *g = PTR_BUCKET(ca, ptr, false);
+
+               if (gen_after(g->mark.gen, ptr->gen) > 16) {
+                       percpu_up_read(&c->mark_lock);
+                       return true;
+               }
+       }
+
+       bkey_for_each_ptr(ptrs, ptr) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+               struct bucket *g = PTR_BUCKET(ca, ptr, false);
+
+               if (gen_after(g->gc_gen, ptr->gen))
+                       g->gc_gen = ptr->gen;
+       }
+       percpu_up_read(&c->mark_lock);
+
+       return false;
+}
+
+/*
+ * For recalculating oldest gen, we only need to walk keys in leaf nodes; btree
+ * node pointers currently never have cached pointers that can become stale:
+ */
+static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_on_stack sk;
+       int ret = 0;
+
+       bkey_on_stack_init(&sk);
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
+                                  BTREE_ITER_PREFETCH);
+
+       while ((k = bch2_btree_iter_peek(iter)).k &&
+              !(ret = bkey_err(k))) {
+               if (gc_btree_gens_key(c, k)) {
+                       bkey_on_stack_reassemble(&sk, c, k);
+                       bch2_extent_normalize(c, bkey_i_to_s(sk.k));
+
+                       bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
+
+                       bch2_trans_update(&trans, iter, sk.k, 0);
+
+                       ret = bch2_trans_commit(&trans, NULL, NULL,
+                                               BTREE_INSERT_NOFAIL);
+                       if (ret == -EINTR)
+                               continue;
+                       if (ret) {
+                               break;
+                       }
+               }
+
+               bch2_btree_iter_next(iter);
+       }
+
+       bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&sk, c);
+
+       return ret;
+}
+
+int bch2_gc_gens(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       struct bucket_array *buckets;
+       struct bucket *g;
+       unsigned i;
+       int ret;
+
+       /*
+        * Ideally we would be using state_lock and not gc_lock here, but that
+        * introduces a deadlock in the RO path - we currently take the state
+        * lock at the start of going RO, thus the gc thread may get stuck:
+        */
+       down_read(&c->gc_lock);
+
+       for_each_member_device(ca, c, i) {
+               down_read(&ca->bucket_lock);
+               buckets = bucket_array(ca);
+
+               for_each_bucket(g, buckets)
+                       g->gc_gen = g->mark.gen;
+               up_read(&ca->bucket_lock);
+       }
+
+       for (i = 0; i < BTREE_ID_NR; i++)
+               if (btree_node_type_needs_gc(i)) {
+                       ret = bch2_gc_btree_gens(c, i);
+                       if (ret) {
+                               bch_err(c, "error recalculating oldest_gen: %i", ret);
+                               goto err;
+                       }
+               }
+
+       for_each_member_device(ca, c, i) {
+               down_read(&ca->bucket_lock);
+               buckets = bucket_array(ca);
+
+               for_each_bucket(g, buckets)
+                       g->oldest_gen = g->gc_gen;
+               up_read(&ca->bucket_lock);
+       }
+
+       c->gc_count++;
+err:
+       up_read(&c->gc_lock);
+       return ret;
+}
+
+/* Btree coalescing */
+
+static void recalc_packed_keys(struct btree *b)
+{
+       struct bset *i = btree_bset_first(b);
+       struct bkey_packed *k;
+
+       memset(&b->nr, 0, sizeof(b->nr));
+
+       BUG_ON(b->nsets != 1);
+
+       vstruct_for_each(i, k)
+               btree_keys_account_key_add(&b->nr, 0, k);
+}
+
+static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
+                               struct btree *old_nodes[GC_MERGE_NODES])
+{
+       struct btree *parent = btree_node_parent(iter, old_nodes[0]);
+       unsigned i, nr_old_nodes, nr_new_nodes, u64s = 0;
+       unsigned blocks = btree_blocks(c) * 2 / 3;
+       struct btree *new_nodes[GC_MERGE_NODES];
+       struct btree_update *as;
+       struct keylist keylist;
+       struct bkey_format_state format_state;
+       struct bkey_format new_format;
+
+       memset(new_nodes, 0, sizeof(new_nodes));
+       bch2_keylist_init(&keylist, NULL);
+
+       /* Count keys that are not deleted */
+       for (i = 0; i < GC_MERGE_NODES && old_nodes[i]; i++)
+               u64s += old_nodes[i]->nr.live_u64s;
+
+       nr_old_nodes = nr_new_nodes = i;
+
+       /* Check if all keys in @old_nodes could fit in one fewer node */
+       if (nr_old_nodes <= 1 ||
+           __vstruct_blocks(struct btree_node, c->block_bits,
+                            DIV_ROUND_UP(u64s, nr_old_nodes - 1)) > blocks)
+               return;
+
+       /* Find a format that all keys in @old_nodes can pack into */
+       bch2_bkey_format_init(&format_state);
+
+       for (i = 0; i < nr_old_nodes; i++)
+               __bch2_btree_calc_format(&format_state, old_nodes[i]);
+
+       new_format = bch2_bkey_format_done(&format_state);
+
+       /* Check if repacking would make any nodes too big to fit */
+       for (i = 0; i < nr_old_nodes; i++)
+               if (!bch2_btree_node_format_fits(c, old_nodes[i], &new_format)) {
+                       trace_btree_gc_coalesce_fail(c,
+                                       BTREE_GC_COALESCE_FAIL_FORMAT_FITS);
+                       return;
+               }
+
+       if (bch2_keylist_realloc(&keylist, NULL, 0,
+                       (BKEY_U64s + BKEY_EXTENT_U64s_MAX) * nr_old_nodes)) {
+               trace_btree_gc_coalesce_fail(c,
+                               BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC);
+               return;
+       }
+
+       as = bch2_btree_update_start(iter->trans, iter->btree_id,
+                       btree_update_reserve_required(c, parent) + nr_old_nodes,
+                       BTREE_INSERT_NOFAIL|
+                       BTREE_INSERT_USE_RESERVE,
+                       NULL);
+       if (IS_ERR(as)) {
+               trace_btree_gc_coalesce_fail(c,
+                               BTREE_GC_COALESCE_FAIL_RESERVE_GET);
+               bch2_keylist_free(&keylist, NULL);
+               return;
+       }
+
+       trace_btree_gc_coalesce(c, old_nodes[0]);
+
+       for (i = 0; i < nr_old_nodes; i++)
+               bch2_btree_interior_update_will_free_node(as, old_nodes[i]);
+
+       /* Repack everything with @new_format and sort down to one bset */
+       for (i = 0; i < nr_old_nodes; i++)
+               new_nodes[i] =
+                       __bch2_btree_node_alloc_replacement(as, old_nodes[i],
+                                                           new_format);
+
+       /*
+        * Conceptually we concatenate the nodes together and slice them
+        * up at different boundaries.
+        */
+       for (i = nr_new_nodes - 1; i > 0; --i) {
+               struct btree *n1 = new_nodes[i];
+               struct btree *n2 = new_nodes[i - 1];
+
+               struct bset *s1 = btree_bset_first(n1);
+               struct bset *s2 = btree_bset_first(n2);
+               struct bkey_packed *k, *last = NULL;
+
+               /* Calculate how many keys from @n2 we could fit inside @n1 */
+               u64s = 0;
+
+               for (k = s2->start;
+                    k < vstruct_last(s2) &&
+                    vstruct_blocks_plus(n1->data, c->block_bits,
+                                        u64s + k->u64s) <= blocks;
+                    k = bkey_next_skip_noops(k, vstruct_last(s2))) {
+                       last = k;
+                       u64s += k->u64s;
+               }
+
+               if (u64s == le16_to_cpu(s2->u64s)) {
+                       /* n2 fits entirely in n1 */
+                       n1->key.k.p = n1->data->max_key = n2->data->max_key;
+
+                       memcpy_u64s(vstruct_last(s1),
+                                   s2->start,
+                                   le16_to_cpu(s2->u64s));
+                       le16_add_cpu(&s1->u64s, le16_to_cpu(s2->u64s));
+
+                       set_btree_bset_end(n1, n1->set);
+
+                       six_unlock_write(&n2->c.lock);
+                       bch2_btree_node_free_never_inserted(c, n2);
+                       six_unlock_intent(&n2->c.lock);
+
+                       memmove(new_nodes + i - 1,
+                               new_nodes + i,
+                               sizeof(new_nodes[0]) * (nr_new_nodes - i));
+                       new_nodes[--nr_new_nodes] = NULL;
+               } else if (u64s) {
+                       /* move part of n2 into n1 */
+                       n1->key.k.p = n1->data->max_key =
+                               bkey_unpack_pos(n1, last);
+
+                       n2->data->min_key = bkey_successor(n1->data->max_key);
+
+                       memcpy_u64s(vstruct_last(s1),
+                                   s2->start, u64s);
+                       le16_add_cpu(&s1->u64s, u64s);
+
+                       memmove(s2->start,
+                               vstruct_idx(s2, u64s),
+                               (le16_to_cpu(s2->u64s) - u64s) * sizeof(u64));
+                       s2->u64s = cpu_to_le16(le16_to_cpu(s2->u64s) - u64s);
+
+                       set_btree_bset_end(n1, n1->set);
+                       set_btree_bset_end(n2, n2->set);
+               }
+       }
+
+       for (i = 0; i < nr_new_nodes; i++) {
+               struct btree *n = new_nodes[i];
+
+               recalc_packed_keys(n);
+               btree_node_reset_sib_u64s(n);
+
+               bch2_btree_build_aux_trees(n);
+
+               bch2_btree_update_add_new_node(as, n);
+               six_unlock_write(&n->c.lock);
+
+               bch2_btree_node_write(c, n, SIX_LOCK_intent);
+       }
+
+       /*
+        * The keys for the old nodes get deleted. We don't want to insert keys
+        * that compare equal to the keys for the new nodes we'll also be
+        * inserting - we can't because keys on a keylist must be strictly
+        * greater than the previous keys, and we also don't need to since the
+        * key for the new node will serve the same purpose (overwriting the key
+        * for the old node).
+        */
+       for (i = 0; i < nr_old_nodes; i++) {
+               struct bkey_i delete;
+               unsigned j;
+
+               for (j = 0; j < nr_new_nodes; j++)
+                       if (!bkey_cmp(old_nodes[i]->key.k.p,
+                                     new_nodes[j]->key.k.p))
+                               goto next;
+
+               bkey_init(&delete.k);
+               delete.k.p = old_nodes[i]->key.k.p;
+               bch2_keylist_add_in_order(&keylist, &delete);
+next:
+               i = i;
+       }
+
+       /*
+        * Keys for the new nodes get inserted: bch2_btree_insert_keys() only
+        * does the lookup once and thus expects the keys to be in sorted order
+        * so we have to make sure the new keys are correctly ordered with
+        * respect to the deleted keys added in the previous loop
+        */
+       for (i = 0; i < nr_new_nodes; i++)
+               bch2_keylist_add_in_order(&keylist, &new_nodes[i]->key);
+
+       /* Insert the newly coalesced nodes */
+       bch2_btree_insert_node(as, parent, iter, &keylist, 0);
+
+       BUG_ON(!bch2_keylist_empty(&keylist));
+
+       BUG_ON(iter->l[old_nodes[0]->c.level].b != old_nodes[0]);
+
+       bch2_btree_iter_node_replace(iter, new_nodes[0]);
+
+       for (i = 0; i < nr_new_nodes; i++)
+               bch2_btree_update_get_open_buckets(as, new_nodes[i]);
+
+       /* Free the old nodes and update our sliding window */
+       for (i = 0; i < nr_old_nodes; i++) {
+               bch2_btree_node_free_inmem(c, old_nodes[i], iter);
+
+               /*
+                * the index update might have triggered a split, in which case
+                * the nodes we coalesced - the new nodes we just created -
+                * might not be sibling nodes anymore - don't add them to the
+                * sliding window (except the first):
+                */
+               if (!i) {
+                       old_nodes[i] = new_nodes[i];
+               } else {
+                       old_nodes[i] = NULL;
+               }
+       }
+
+       for (i = 0; i < nr_new_nodes; i++)
+               six_unlock_intent(&new_nodes[i]->c.lock);
+
+       bch2_btree_update_done(as);
+       bch2_keylist_free(&keylist, NULL);
+}
+
+static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct btree *b;
+       bool kthread = (current->flags & PF_KTHREAD) != 0;
+       unsigned i;
+
+       /* Sliding window of adjacent btree nodes */
+       struct btree *merge[GC_MERGE_NODES];
+       u32 lock_seq[GC_MERGE_NODES];
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       /*
+        * XXX: We don't have a good way of positively matching on sibling nodes
+        * that have the same parent - this code works by handling the cases
+        * where they might not have the same parent, and is thus fragile. Ugh.
+        *
+        * Perhaps redo this to use multiple linked iterators?
+        */
+       memset(merge, 0, sizeof(merge));
+
+       __for_each_btree_node(&trans, iter, btree_id, POS_MIN,
+                             BTREE_MAX_DEPTH, 0,
+                             BTREE_ITER_PREFETCH, b) {
+               memmove(merge + 1, merge,
+                       sizeof(merge) - sizeof(merge[0]));
+               memmove(lock_seq + 1, lock_seq,
+                       sizeof(lock_seq) - sizeof(lock_seq[0]));
+
+               merge[0] = b;
+
+               for (i = 1; i < GC_MERGE_NODES; i++) {
+                       if (!merge[i] ||
+                           !six_relock_intent(&merge[i]->c.lock, lock_seq[i]))
+                               break;
+
+                       if (merge[i]->c.level != merge[0]->c.level) {
+                               six_unlock_intent(&merge[i]->c.lock);
+                               break;
+                       }
+               }
+               memset(merge + i, 0, (GC_MERGE_NODES - i) * sizeof(merge[0]));
+
+               bch2_coalesce_nodes(c, iter, merge);
+
+               for (i = 1; i < GC_MERGE_NODES && merge[i]; i++) {
+                       lock_seq[i] = merge[i]->c.lock.state.seq;
+                       six_unlock_intent(&merge[i]->c.lock);
+               }
+
+               lock_seq[0] = merge[0]->c.lock.state.seq;
+
+               if (kthread && kthread_should_stop()) {
+                       bch2_trans_exit(&trans);
+                       return -ESHUTDOWN;
+               }
+
+               bch2_trans_cond_resched(&trans);
+
+               /*
+                * If the parent node wasn't relocked, it might have been split
+                * and the nodes in our sliding window might not have the same
+                * parent anymore - blow away the sliding window:
+                */
+               if (btree_iter_node(iter, iter->level + 1) &&
+                   !btree_node_intent_locked(iter, iter->level + 1))
+                       memset(merge + 1, 0,
+                              (GC_MERGE_NODES - 1) * sizeof(merge[0]));
+       }
+       return bch2_trans_exit(&trans);
+}
+
+/**
+ * bch_coalesce - coalesce adjacent nodes with low occupancy
+ */
+void bch2_coalesce(struct bch_fs *c)
+{
+       enum btree_id id;
+
+       down_read(&c->gc_lock);
+       trace_gc_coalesce_start(c);
+
+       for (id = 0; id < BTREE_ID_NR; id++) {
+               int ret = c->btree_roots[id].b
+                       ? bch2_coalesce_btree(c, id)
+                       : 0;
+
+               if (ret) {
+                       if (ret != -ESHUTDOWN)
+                               bch_err(c, "btree coalescing failed: %d", ret);
+                       return;
+               }
+       }
+
+       trace_gc_coalesce_end(c);
+       up_read(&c->gc_lock);
+}
+
+static int bch2_gc_thread(void *arg)
+{
+       struct bch_fs *c = arg;
+       struct io_clock *clock = &c->io_clock[WRITE];
+       unsigned long last = atomic_long_read(&clock->now);
+       unsigned last_kick = atomic_read(&c->kick_gc);
+       int ret;
+
+       set_freezable();
+
+       while (1) {
+               while (1) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+
+                       if (kthread_should_stop()) {
+                               __set_current_state(TASK_RUNNING);
+                               return 0;
+                       }
+
+                       if (atomic_read(&c->kick_gc) != last_kick)
+                               break;
+
+                       if (c->btree_gc_periodic) {
+                               unsigned long next = last + c->capacity / 16;
+
+                               if (atomic_long_read(&clock->now) >= next)
+                                       break;
+
+                               bch2_io_clock_schedule_timeout(clock, next);
+                       } else {
+                               schedule();
+                       }
+
+                       try_to_freeze();
+               }
+               __set_current_state(TASK_RUNNING);
+
+               last = atomic_long_read(&clock->now);
+               last_kick = atomic_read(&c->kick_gc);
+
+               /*
+                * Full gc is currently incompatible with btree key cache:
+                */
+#if 0
+               ret = bch2_gc(c, NULL, false, false);
+#else
+               ret = bch2_gc_gens(c);
+#endif
+               if (ret < 0)
+                       bch_err(c, "btree gc failed: %i", ret);
+
+               debug_check_no_locks_held();
+       }
+
+       return 0;
+}
+
+void bch2_gc_thread_stop(struct bch_fs *c)
+{
+       struct task_struct *p;
+
+       p = c->gc_thread;
+       c->gc_thread = NULL;
+
+       if (p) {
+               kthread_stop(p);
+               put_task_struct(p);
+       }
+}
+
+int bch2_gc_thread_start(struct bch_fs *c)
+{
+       struct task_struct *p;
+
+       BUG_ON(c->gc_thread);
+
+       p = kthread_create(bch2_gc_thread, c, "bch_gc");
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+
+       get_task_struct(p);
+       c->gc_thread = p;
+       wake_up_process(p);
+       return 0;
+}
diff --git a/libbcachefs/btree_gc.h b/libbcachefs/btree_gc.h
new file mode 100644 (file)
index 0000000..3694a3d
--- /dev/null
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_GC_H
+#define _BCACHEFS_BTREE_GC_H
+
+#include "btree_types.h"
+
+void bch2_coalesce(struct bch_fs *);
+
+struct journal_keys;
+int bch2_gc(struct bch_fs *, struct journal_keys *, bool, bool);
+int bch2_gc_gens(struct bch_fs *);
+void bch2_gc_thread_stop(struct bch_fs *);
+int bch2_gc_thread_start(struct bch_fs *);
+void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
+
+/*
+ * For concurrent mark and sweep (with other index updates), we define a total
+ * ordering of _all_ references GC walks:
+ *
+ * Note that some references will have the same GC position as others - e.g.
+ * everything within the same btree node; in those cases we're relying on
+ * whatever locking exists for where those references live, i.e. the write lock
+ * on a btree node.
+ *
+ * That locking is also required to ensure GC doesn't pass the updater in
+ * between the updater adding/removing the reference and updating the GC marks;
+ * without that, we would at best double count sometimes.
+ *
+ * That part is important - whenever calling bch2_mark_pointers(), a lock _must_
+ * be held that prevents GC from passing the position the updater is at.
+ *
+ * (What about the start of gc, when we're clearing all the marks? GC clears the
+ * mark with the gc pos seqlock held, and bch_mark_bucket checks against the gc
+ * position inside its cmpxchg loop, so crap magically works).
+ */
+
+/* Position of (the start of) a gc phase: */
+static inline struct gc_pos gc_phase(enum gc_phase phase)
+{
+       return (struct gc_pos) {
+               .phase  = phase,
+               .pos    = POS_MIN,
+               .level  = 0,
+       };
+}
+
+static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
+{
+       if (l.phase != r.phase)
+               return l.phase < r.phase ? -1 : 1;
+       if (bkey_cmp(l.pos, r.pos))
+               return bkey_cmp(l.pos, r.pos);
+       if (l.level != r.level)
+               return l.level < r.level ? -1 : 1;
+       return 0;
+}
+
+static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id)
+{
+       switch (id) {
+#define x(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n;
+       BCH_BTREE_IDS()
+#undef x
+       default:
+               BUG();
+       }
+}
+
+static inline struct gc_pos gc_pos_btree(enum btree_id id,
+                                        struct bpos pos, unsigned level)
+{
+       return (struct gc_pos) {
+               .phase  = btree_id_to_gc_phase(id),
+               .pos    = pos,
+               .level  = level,
+       };
+}
+
+/*
+ * GC position of the pointers within a btree node: note, _not_ for &b->key
+ * itself, that lives in the parent node:
+ */
+static inline struct gc_pos gc_pos_btree_node(struct btree *b)
+{
+       return gc_pos_btree(b->c.btree_id, b->key.k.p, b->c.level);
+}
+
+/*
+ * GC position of the pointer to a btree root: we don't use
+ * gc_pos_pointer_to_btree_node() here to avoid a potential race with
+ * btree_split() increasing the tree depth - the new root will have level > the
+ * old root and thus have a greater gc position than the old root, but that
+ * would be incorrect since once gc has marked the root it's not coming back.
+ */
+static inline struct gc_pos gc_pos_btree_root(enum btree_id id)
+{
+       return gc_pos_btree(id, POS_MAX, BTREE_MAX_DEPTH);
+}
+
+static inline struct gc_pos gc_pos_alloc(struct bch_fs *c, struct open_bucket *ob)
+{
+       return (struct gc_pos) {
+               .phase  = GC_PHASE_ALLOC,
+               .pos    = POS(ob ? ob - c->open_buckets : 0, 0),
+       };
+}
+
+static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
+{
+       unsigned seq;
+       bool ret;
+
+       do {
+               seq = read_seqcount_begin(&c->gc_pos_lock);
+               ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
+       } while (read_seqcount_retry(&c->gc_pos_lock, seq));
+
+       return ret;
+}
+
+#endif /* _BCACHEFS_BTREE_GC_H */
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
new file mode 100644 (file)
index 0000000..2f50972
--- /dev/null
@@ -0,0 +1,1834 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_methods.h"
+#include "bkey_sort.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "btree_locking.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "checksum.h"
+#include "debug.h"
+#include "error.h"
+#include "extents.h"
+#include "io.h"
+#include "journal_reclaim.h"
+#include "journal_seq_blacklist.h"
+#include "super-io.h"
+
+#include <linux/sched/mm.h>
+#include <trace/events/bcachefs.h>
+
+static void verify_no_dups(struct btree *b,
+                          struct bkey_packed *start,
+                          struct bkey_packed *end,
+                          bool extents)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct bkey_packed *k, *p;
+
+       if (start == end)
+               return;
+
+       for (p = start, k = bkey_next_skip_noops(start, end);
+            k != end;
+            p = k, k = bkey_next_skip_noops(k, end)) {
+               struct bkey l = bkey_unpack_key(b, p);
+               struct bkey r = bkey_unpack_key(b, k);
+
+               BUG_ON(extents
+                      ? bkey_cmp(l.p, bkey_start_pos(&r)) > 0
+                      : bkey_cmp(l.p, bkey_start_pos(&r)) >= 0);
+               //BUG_ON(bkey_cmp_packed(&b->format, p, k) >= 0);
+       }
+#endif
+}
+
+static void set_needs_whiteout(struct bset *i, int v)
+{
+       struct bkey_packed *k;
+
+       for (k = i->start;
+            k != vstruct_last(i);
+            k = bkey_next_skip_noops(k, vstruct_last(i)))
+               k->needs_whiteout = v;
+}
+
+static void btree_bounce_free(struct bch_fs *c, size_t size,
+                             bool used_mempool, void *p)
+{
+       if (used_mempool)
+               mempool_free(p, &c->btree_bounce_pool);
+       else
+               vpfree(p, size);
+}
+
+static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
+                               bool *used_mempool)
+{
+       unsigned flags = memalloc_nofs_save();
+       void *p;
+
+       BUG_ON(size > btree_bytes(c));
+
+       *used_mempool = false;
+       p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
+       if (!p) {
+               *used_mempool = true;
+               p = mempool_alloc(&c->btree_bounce_pool, GFP_NOIO);
+       }
+       memalloc_nofs_restore(flags);
+       return p;
+}
+
+static void sort_bkey_ptrs(const struct btree *bt,
+                          struct bkey_packed **ptrs, unsigned nr)
+{
+       unsigned n = nr, a = nr / 2, b, c, d;
+
+       if (!a)
+               return;
+
+       /* Heap sort: see lib/sort.c: */
+       while (1) {
+               if (a)
+                       a--;
+               else if (--n)
+                       swap(ptrs[0], ptrs[n]);
+               else
+                       break;
+
+               for (b = a; c = 2 * b + 1, (d = c + 1) < n;)
+                       b = bkey_cmp_packed(bt,
+                                           ptrs[c],
+                                           ptrs[d]) >= 0 ? c : d;
+               if (d == n)
+                       b = c;
+
+               while (b != a &&
+                      bkey_cmp_packed(bt,
+                                      ptrs[a],
+                                      ptrs[b]) >= 0)
+                       b = (b - 1) / 2;
+               c = b;
+               while (b != a) {
+                       b = (b - 1) / 2;
+                       swap(ptrs[b], ptrs[c]);
+               }
+       }
+}
+
+static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
+{
+       struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k;
+       bool used_mempool = false;
+       size_t bytes = b->whiteout_u64s * sizeof(u64);
+
+       if (!b->whiteout_u64s)
+               return;
+
+       new_whiteouts = btree_bounce_alloc(c, bytes, &used_mempool);
+
+       ptrs = ptrs_end = ((void *) new_whiteouts + bytes);
+
+       for (k = unwritten_whiteouts_start(c, b);
+            k != unwritten_whiteouts_end(c, b);
+            k = bkey_next(k))
+               *--ptrs = k;
+
+       sort_bkey_ptrs(b, ptrs, ptrs_end - ptrs);
+
+       k = new_whiteouts;
+
+       while (ptrs != ptrs_end) {
+               bkey_copy(k, *ptrs);
+               k = bkey_next(k);
+               ptrs++;
+       }
+
+       verify_no_dups(b, new_whiteouts,
+                      (void *) ((u64 *) new_whiteouts + b->whiteout_u64s),
+                      btree_node_old_extent_overwrite(b));
+
+       memcpy_u64s(unwritten_whiteouts_start(c, b),
+                   new_whiteouts, b->whiteout_u64s);
+
+       btree_bounce_free(c, bytes, used_mempool, new_whiteouts);
+}
+
+static bool should_compact_bset(struct btree *b, struct bset_tree *t,
+                               bool compacting, enum compact_mode mode)
+{
+       if (!bset_dead_u64s(b, t))
+               return false;
+
+       switch (mode) {
+       case COMPACT_LAZY:
+               return should_compact_bset_lazy(b, t) ||
+                       (compacting && !bset_written(b, bset(b, t)));
+       case COMPACT_ALL:
+               return true;
+       default:
+               BUG();
+       }
+}
+
+static bool bch2_compact_extent_whiteouts(struct bch_fs *c,
+                                         struct btree *b,
+                                         enum compact_mode mode)
+{
+       const struct bkey_format *f = &b->format;
+       struct bset_tree *t;
+       struct bkey_packed *whiteouts = NULL;
+       struct bkey_packed *u_start, *u_pos;
+       struct sort_iter sort_iter;
+       unsigned bytes, whiteout_u64s = 0, u64s;
+       bool used_mempool, compacting = false;
+
+       BUG_ON(!btree_node_is_extents(b));
+
+       for_each_bset(b, t)
+               if (should_compact_bset(b, t, whiteout_u64s != 0, mode))
+                       whiteout_u64s += bset_dead_u64s(b, t);
+
+       if (!whiteout_u64s)
+               return false;
+
+       bch2_sort_whiteouts(c, b);
+
+       sort_iter_init(&sort_iter, b);
+
+       whiteout_u64s += b->whiteout_u64s;
+       bytes = whiteout_u64s * sizeof(u64);
+
+       whiteouts = btree_bounce_alloc(c, bytes, &used_mempool);
+       u_start = u_pos = whiteouts;
+
+       memcpy_u64s(u_pos, unwritten_whiteouts_start(c, b),
+                   b->whiteout_u64s);
+       u_pos = (void *) u_pos + b->whiteout_u64s * sizeof(u64);
+
+       sort_iter_add(&sort_iter, u_start, u_pos);
+
+       for_each_bset(b, t) {
+               struct bset *i = bset(b, t);
+               struct bkey_packed *k, *n, *out, *start, *end;
+               struct btree_node_entry *src = NULL, *dst = NULL;
+
+               if (t != b->set && !bset_written(b, i)) {
+                       src = container_of(i, struct btree_node_entry, keys);
+                       dst = max(write_block(b),
+                                 (void *) btree_bkey_last(b, t - 1));
+               }
+
+               if (src != dst)
+                       compacting = true;
+
+               if (!should_compact_bset(b, t, compacting, mode)) {
+                       if (src != dst) {
+                               memmove(dst, src, sizeof(*src) +
+                                       le16_to_cpu(src->keys.u64s) *
+                                       sizeof(u64));
+                               i = &dst->keys;
+                               set_btree_bset(b, t, i);
+                       }
+                       continue;
+               }
+
+               compacting = true;
+               u_start = u_pos;
+               start = i->start;
+               end = vstruct_last(i);
+
+               if (src != dst) {
+                       memmove(dst, src, sizeof(*src));
+                       i = &dst->keys;
+                       set_btree_bset(b, t, i);
+               }
+
+               out = i->start;
+
+               for (k = start; k != end; k = n) {
+                       n = bkey_next_skip_noops(k, end);
+
+                       if (bkey_deleted(k))
+                               continue;
+
+                       BUG_ON(bkey_whiteout(k) &&
+                              k->needs_whiteout &&
+                              bkey_written(b, k));
+
+                       if (bkey_whiteout(k) && !k->needs_whiteout)
+                               continue;
+
+                       if (bkey_whiteout(k)) {
+                               memcpy_u64s(u_pos, k, bkeyp_key_u64s(f, k));
+                               set_bkeyp_val_u64s(f, u_pos, 0);
+                               u_pos = bkey_next(u_pos);
+                       } else {
+                               bkey_copy(out, k);
+                               out = bkey_next(out);
+                       }
+               }
+
+               sort_iter_add(&sort_iter, u_start, u_pos);
+
+               i->u64s = cpu_to_le16((u64 *) out - i->_data);
+               set_btree_bset_end(b, t);
+               bch2_bset_set_no_aux_tree(b, t);
+       }
+
+       b->whiteout_u64s = (u64 *) u_pos - (u64 *) whiteouts;
+
+       BUG_ON((void *) unwritten_whiteouts_start(c, b) <
+              (void *) btree_bkey_last(b, bset_tree_last(b)));
+
+       u64s = bch2_sort_extent_whiteouts(unwritten_whiteouts_start(c, b),
+                                         &sort_iter);
+
+       BUG_ON(u64s > b->whiteout_u64s);
+       BUG_ON(u_pos != whiteouts && !u64s);
+
+       if (u64s != b->whiteout_u64s) {
+               void *src = unwritten_whiteouts_start(c, b);
+
+               b->whiteout_u64s = u64s;
+               memmove_u64s_up(unwritten_whiteouts_start(c, b), src, u64s);
+       }
+
+       verify_no_dups(b,
+                      unwritten_whiteouts_start(c, b),
+                      unwritten_whiteouts_end(c, b),
+                      true);
+
+       btree_bounce_free(c, bytes, used_mempool, whiteouts);
+
+       bch2_btree_build_aux_trees(b);
+
+       bch_btree_keys_u64s_remaining(c, b);
+       bch2_verify_btree_nr_keys(b);
+
+       return true;
+}
+
+static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode)
+{
+       struct bset_tree *t;
+       bool ret = false;
+
+       for_each_bset(b, t) {
+               struct bset *i = bset(b, t);
+               struct bkey_packed *k, *n, *out, *start, *end;
+               struct btree_node_entry *src = NULL, *dst = NULL;
+
+               if (t != b->set && !bset_written(b, i)) {
+                       src = container_of(i, struct btree_node_entry, keys);
+                       dst = max(write_block(b),
+                                 (void *) btree_bkey_last(b, t - 1));
+               }
+
+               if (src != dst)
+                       ret = true;
+
+               if (!should_compact_bset(b, t, ret, mode)) {
+                       if (src != dst) {
+                               memmove(dst, src, sizeof(*src) +
+                                       le16_to_cpu(src->keys.u64s) *
+                                       sizeof(u64));
+                               i = &dst->keys;
+                               set_btree_bset(b, t, i);
+                       }
+                       continue;
+               }
+
+               start   = btree_bkey_first(b, t);
+               end     = btree_bkey_last(b, t);
+
+               if (src != dst) {
+                       memmove(dst, src, sizeof(*src));
+                       i = &dst->keys;
+                       set_btree_bset(b, t, i);
+               }
+
+               out = i->start;
+
+               for (k = start; k != end; k = n) {
+                       n = bkey_next_skip_noops(k, end);
+
+                       if (!bkey_whiteout(k)) {
+                               bkey_copy(out, k);
+                               out = bkey_next(out);
+                       } else {
+                               BUG_ON(k->needs_whiteout);
+                       }
+               }
+
+               i->u64s = cpu_to_le16((u64 *) out - i->_data);
+               set_btree_bset_end(b, t);
+               bch2_bset_set_no_aux_tree(b, t);
+               ret = true;
+       }
+
+       bch2_verify_btree_nr_keys(b);
+
+       bch2_btree_build_aux_trees(b);
+
+       return ret;
+}
+
+bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
+                           enum compact_mode mode)
+{
+       return !btree_node_old_extent_overwrite(b)
+               ? bch2_drop_whiteouts(b, mode)
+               : bch2_compact_extent_whiteouts(c, b, mode);
+}
+
+static void btree_node_sort(struct bch_fs *c, struct btree *b,
+                           struct btree_iter *iter,
+                           unsigned start_idx,
+                           unsigned end_idx,
+                           bool filter_whiteouts)
+{
+       struct btree_node *out;
+       struct sort_iter sort_iter;
+       struct bset_tree *t;
+       struct bset *start_bset = bset(b, &b->set[start_idx]);
+       bool used_mempool = false;
+       u64 start_time, seq = 0;
+       unsigned i, u64s = 0, bytes, shift = end_idx - start_idx - 1;
+       bool sorting_entire_node = start_idx == 0 &&
+               end_idx == b->nsets;
+
+       sort_iter_init(&sort_iter, b);
+
+       for (t = b->set + start_idx;
+            t < b->set + end_idx;
+            t++) {
+               u64s += le16_to_cpu(bset(b, t)->u64s);
+               sort_iter_add(&sort_iter,
+                             btree_bkey_first(b, t),
+                             btree_bkey_last(b, t));
+       }
+
+       bytes = sorting_entire_node
+               ? btree_bytes(c)
+               : __vstruct_bytes(struct btree_node, u64s);
+
+       out = btree_bounce_alloc(c, bytes, &used_mempool);
+
+       start_time = local_clock();
+
+       if (btree_node_old_extent_overwrite(b))
+               filter_whiteouts = bset_written(b, start_bset);
+
+       u64s = (btree_node_old_extent_overwrite(b)
+               ? bch2_sort_extents
+               : bch2_sort_keys)(out->keys.start,
+                                 &sort_iter,
+                                 filter_whiteouts);
+
+       out->keys.u64s = cpu_to_le16(u64s);
+
+       BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes);
+
+       if (sorting_entire_node)
+               bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
+                                      start_time);
+
+       /* Make sure we preserve bset journal_seq: */
+       for (t = b->set + start_idx; t < b->set + end_idx; t++)
+               seq = max(seq, le64_to_cpu(bset(b, t)->journal_seq));
+       start_bset->journal_seq = cpu_to_le64(seq);
+
+       if (sorting_entire_node) {
+               unsigned u64s = le16_to_cpu(out->keys.u64s);
+
+               BUG_ON(bytes != btree_bytes(c));
+
+               /*
+                * Our temporary buffer is the same size as the btree node's
+                * buffer, we can just swap buffers instead of doing a big
+                * memcpy()
+                */
+               *out = *b->data;
+               out->keys.u64s = cpu_to_le16(u64s);
+               swap(out, b->data);
+               set_btree_bset(b, b->set, &b->data->keys);
+       } else {
+               start_bset->u64s = out->keys.u64s;
+               memcpy_u64s(start_bset->start,
+                           out->keys.start,
+                           le16_to_cpu(out->keys.u64s));
+       }
+
+       for (i = start_idx + 1; i < end_idx; i++)
+               b->nr.bset_u64s[start_idx] +=
+                       b->nr.bset_u64s[i];
+
+       b->nsets -= shift;
+
+       for (i = start_idx + 1; i < b->nsets; i++) {
+               b->nr.bset_u64s[i]      = b->nr.bset_u64s[i + shift];
+               b->set[i]               = b->set[i + shift];
+       }
+
+       for (i = b->nsets; i < MAX_BSETS; i++)
+               b->nr.bset_u64s[i] = 0;
+
+       set_btree_bset_end(b, &b->set[start_idx]);
+       bch2_bset_set_no_aux_tree(b, &b->set[start_idx]);
+
+       btree_bounce_free(c, bytes, used_mempool, out);
+
+       bch2_verify_btree_nr_keys(b);
+}
+
+void bch2_btree_sort_into(struct bch_fs *c,
+                        struct btree *dst,
+                        struct btree *src)
+{
+       struct btree_nr_keys nr;
+       struct btree_node_iter src_iter;
+       u64 start_time = local_clock();
+
+       BUG_ON(dst->nsets != 1);
+
+       bch2_bset_set_no_aux_tree(dst, dst->set);
+
+       bch2_btree_node_iter_init_from_start(&src_iter, src);
+
+       if (btree_node_is_extents(src))
+               nr = bch2_sort_repack_merge(c, btree_bset_first(dst),
+                               src, &src_iter,
+                               &dst->format,
+                               true);
+       else
+               nr = bch2_sort_repack(btree_bset_first(dst),
+                               src, &src_iter,
+                               &dst->format,
+                               true);
+
+       bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
+                              start_time);
+
+       set_btree_bset_end(dst, dst->set);
+
+       dst->nr.live_u64s       += nr.live_u64s;
+       dst->nr.bset_u64s[0]    += nr.bset_u64s[0];
+       dst->nr.packed_keys     += nr.packed_keys;
+       dst->nr.unpacked_keys   += nr.unpacked_keys;
+
+       bch2_verify_btree_nr_keys(dst);
+}
+
+#define SORT_CRIT      (4096 / sizeof(u64))
+
+/*
+ * We're about to add another bset to the btree node, so if there's currently
+ * too many bsets - sort some of them together:
+ */
+static bool btree_node_compact(struct bch_fs *c, struct btree *b,
+                              struct btree_iter *iter)
+{
+       unsigned unwritten_idx;
+       bool ret = false;
+
+       for (unwritten_idx = 0;
+            unwritten_idx < b->nsets;
+            unwritten_idx++)
+               if (!bset_written(b, bset(b, &b->set[unwritten_idx])))
+                       break;
+
+       if (b->nsets - unwritten_idx > 1) {
+               btree_node_sort(c, b, iter, unwritten_idx,
+                               b->nsets, false);
+               ret = true;
+       }
+
+       if (unwritten_idx > 1) {
+               btree_node_sort(c, b, iter, 0, unwritten_idx, false);
+               ret = true;
+       }
+
+       return ret;
+}
+
+void bch2_btree_build_aux_trees(struct btree *b)
+{
+       struct bset_tree *t;
+
+       for_each_bset(b, t)
+               bch2_bset_build_aux_tree(b, t,
+                               !bset_written(b, bset(b, t)) &&
+                               t == bset_tree_last(b));
+}
+
+/*
+ * @bch_btree_init_next - initialize a new (unwritten) bset that can then be
+ * inserted into
+ *
+ * Safe to call if there already is an unwritten bset - will only add a new bset
+ * if @b doesn't already have one.
+ *
+ * Returns true if we sorted (i.e. invalidated iterators
+ */
+void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
+                         struct btree_iter *iter)
+{
+       struct btree_node_entry *bne;
+       bool did_sort;
+
+       EBUG_ON(!(b->c.lock.state.seq & 1));
+       EBUG_ON(iter && iter->l[b->c.level].b != b);
+
+       did_sort = btree_node_compact(c, b, iter);
+
+       bne = want_new_bset(c, b);
+       if (bne)
+               bch2_bset_init_next(c, b, bne);
+
+       bch2_btree_build_aux_trees(b);
+
+       if (iter && did_sort)
+               bch2_btree_iter_reinit_node(iter, b);
+}
+
+static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
+                         struct btree *b, struct bset *i,
+                         unsigned offset, int write)
+{
+       pr_buf(out, "error validating btree node %sat btree %u level %u/%u\n"
+              "pos ",
+              write ? "before write " : "",
+              b->c.btree_id, b->c.level,
+              c->btree_roots[b->c.btree_id].level);
+       bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+
+       pr_buf(out, " node offset %u", b->written);
+       if (i)
+               pr_buf(out, " bset u64s %u", le16_to_cpu(i->u64s));
+}
+
+enum btree_err_type {
+       BTREE_ERR_FIXABLE,
+       BTREE_ERR_WANT_RETRY,
+       BTREE_ERR_MUST_RETRY,
+       BTREE_ERR_FATAL,
+};
+
+enum btree_validate_ret {
+       BTREE_RETRY_READ = 64,
+};
+
+#define btree_err(type, c, b, i, msg, ...)                             \
+({                                                                     \
+       __label__ out;                                                  \
+       char _buf[300];                                                 \
+       struct printbuf out = PBUF(_buf);                               \
+                                                                       \
+       btree_err_msg(&out, c, b, i, b->written, write);                \
+       pr_buf(&out, ": " msg, ##__VA_ARGS__);                          \
+                                                                       \
+       if (type == BTREE_ERR_FIXABLE &&                                \
+           write == READ &&                                            \
+           !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {             \
+               mustfix_fsck_err(c, "%s", _buf);                        \
+               goto out;                                               \
+       }                                                               \
+                                                                       \
+       switch (write) {                                                \
+       case READ:                                                      \
+               bch_err(c, "%s", _buf);                                 \
+                                                                       \
+               switch (type) {                                         \
+               case BTREE_ERR_FIXABLE:                                 \
+                       ret = BCH_FSCK_ERRORS_NOT_FIXED;                \
+                       goto fsck_err;                                  \
+               case BTREE_ERR_WANT_RETRY:                              \
+                       if (have_retry) {                               \
+                               ret = BTREE_RETRY_READ;                 \
+                               goto fsck_err;                          \
+                       }                                               \
+                       break;                                          \
+               case BTREE_ERR_MUST_RETRY:                              \
+                       ret = BTREE_RETRY_READ;                         \
+                       goto fsck_err;                                  \
+               case BTREE_ERR_FATAL:                                   \
+                       ret = BCH_FSCK_ERRORS_NOT_FIXED;                \
+                       goto fsck_err;                                  \
+               }                                                       \
+               break;                                                  \
+       case WRITE:                                                     \
+               bch_err(c, "corrupt metadata before write: %s", _buf);  \
+                                                                       \
+               if (bch2_fs_inconsistent(c)) {                          \
+                       ret = BCH_FSCK_ERRORS_NOT_FIXED;                \
+                       goto fsck_err;                                  \
+               }                                                       \
+               break;                                                  \
+       }                                                               \
+out:                                                                   \
+       true;                                                           \
+})
+
+#define btree_err_on(cond, ...)        ((cond) ? btree_err(__VA_ARGS__) : false)
+
+static int validate_bset(struct bch_fs *c, struct btree *b,
+                        struct bset *i, unsigned sectors,
+                        int write, bool have_retry)
+{
+       unsigned version = le16_to_cpu(i->version);
+       const char *err;
+       int ret = 0;
+
+       btree_err_on((version != BCH_BSET_VERSION_OLD &&
+                     version < bcachefs_metadata_version_min) ||
+                    version >= bcachefs_metadata_version_max,
+                    BTREE_ERR_FATAL, c, b, i,
+                    "unsupported bset version");
+
+       if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
+                        BTREE_ERR_FIXABLE, c, b, i,
+                        "bset past end of btree node")) {
+               i->u64s = 0;
+               return 0;
+       }
+
+       btree_err_on(b->written && !i->u64s,
+                    BTREE_ERR_FIXABLE, c, b, i,
+                    "empty bset");
+
+       if (!b->written) {
+               struct btree_node *bn =
+                       container_of(i, struct btree_node, keys);
+               /* These indicate that we read the wrong btree node: */
+
+               if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+                       struct bch_btree_ptr_v2 *bp =
+                               &bkey_i_to_btree_ptr_v2(&b->key)->v;
+
+                       /* XXX endianness */
+                       btree_err_on(bp->seq != bn->keys.seq,
+                                    BTREE_ERR_MUST_RETRY, c, b, NULL,
+                                    "incorrect sequence number (wrong btree node)");
+               }
+
+               btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id,
+                            BTREE_ERR_MUST_RETRY, c, b, i,
+                            "incorrect btree id");
+
+               btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level,
+                            BTREE_ERR_MUST_RETRY, c, b, i,
+                            "incorrect level");
+
+               if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
+                       u64 *p = (u64 *) &bn->ptr;
+
+                       *p = swab64(*p);
+               }
+
+               if (!write)
+                       compat_btree_node(b->c.level, b->c.btree_id, version,
+                                         BSET_BIG_ENDIAN(i), write, bn);
+
+               if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+                       struct bch_btree_ptr_v2 *bp =
+                               &bkey_i_to_btree_ptr_v2(&b->key)->v;
+
+                       btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
+                                    BTREE_ERR_MUST_RETRY, c, b, NULL,
+                                    "incorrect min_key: got %llu:%llu should be %llu:%llu",
+                                    b->data->min_key.inode,
+                                    b->data->min_key.offset,
+                                    bp->min_key.inode,
+                                    bp->min_key.offset);
+               }
+
+               btree_err_on(bkey_cmp(bn->max_key, b->key.k.p),
+                            BTREE_ERR_MUST_RETRY, c, b, i,
+                            "incorrect max key");
+
+               if (write)
+                       compat_btree_node(b->c.level, b->c.btree_id, version,
+                                         BSET_BIG_ENDIAN(i), write, bn);
+
+               /* XXX: ideally we would be validating min_key too */
+#if 0
+               /*
+                * not correct anymore, due to btree node write error
+                * handling
+                *
+                * need to add bn->seq to btree keys and verify
+                * against that
+                */
+               btree_err_on(!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
+                                                 bn->ptr),
+                            BTREE_ERR_FATAL, c, b, i,
+                            "incorrect backpointer");
+#endif
+               err = bch2_bkey_format_validate(&bn->format);
+               btree_err_on(err,
+                            BTREE_ERR_FATAL, c, b, i,
+                            "invalid bkey format: %s", err);
+
+               compat_bformat(b->c.level, b->c.btree_id, version,
+                              BSET_BIG_ENDIAN(i), write,
+                              &bn->format);
+       }
+fsck_err:
+       return ret;
+}
+
+static int validate_bset_keys(struct bch_fs *c, struct btree *b,
+                        struct bset *i, unsigned *whiteout_u64s,
+                        int write, bool have_retry)
+{
+       unsigned version = le16_to_cpu(i->version);
+       struct bkey_packed *k, *prev = NULL;
+       bool seen_non_whiteout = false;
+       int ret = 0;
+
+       if (!BSET_SEPARATE_WHITEOUTS(i)) {
+               seen_non_whiteout = true;
+               *whiteout_u64s = 0;
+       }
+
+       for (k = i->start;
+            k != vstruct_last(i);) {
+               struct bkey_s u;
+               struct bkey tmp;
+               const char *invalid;
+
+               if (btree_err_on(bkey_next(k) > vstruct_last(i),
+                                BTREE_ERR_FIXABLE, c, b, i,
+                                "key extends past end of bset")) {
+                       i->u64s = cpu_to_le16((u64 *) k - i->_data);
+                       break;
+               }
+
+               if (btree_err_on(k->format > KEY_FORMAT_CURRENT,
+                                BTREE_ERR_FIXABLE, c, b, i,
+                                "invalid bkey format %u", k->format)) {
+                       i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
+                       memmove_u64s_down(k, bkey_next(k),
+                                         (u64 *) vstruct_end(i) - (u64 *) k);
+                       continue;
+               }
+
+               /* XXX: validate k->u64s */
+               if (!write)
+                       bch2_bkey_compat(b->c.level, b->c.btree_id, version,
+                                   BSET_BIG_ENDIAN(i), write,
+                                   &b->format, k);
+
+               u = __bkey_disassemble(b, k, &tmp);
+
+               invalid = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b)) ?:
+                       bch2_bkey_in_btree_node(b, u.s_c) ?:
+                       (write ? bch2_bkey_val_invalid(c, u.s_c) : NULL);
+               if (invalid) {
+                       char buf[160];
+
+                       bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
+                       btree_err(BTREE_ERR_FIXABLE, c, b, i,
+                                 "invalid bkey:\n%s\n%s", invalid, buf);
+
+                       i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
+                       memmove_u64s_down(k, bkey_next(k),
+                                         (u64 *) vstruct_end(i) - (u64 *) k);
+                       continue;
+               }
+
+               if (write)
+                       bch2_bkey_compat(b->c.level, b->c.btree_id, version,
+                                   BSET_BIG_ENDIAN(i), write,
+                                   &b->format, k);
+
+               /*
+                * with the separate whiteouts thing (used for extents), the
+                * second set of keys actually can have whiteouts too, so we
+                * can't solely go off bkey_whiteout()...
+                */
+
+               if (!seen_non_whiteout &&
+                   (!bkey_whiteout(k) ||
+                    (prev && bkey_iter_cmp(b, prev, k) > 0))) {
+                       *whiteout_u64s = k->_data - i->_data;
+                       seen_non_whiteout = true;
+               } else if (prev && bkey_iter_cmp(b, prev, k) > 0) {
+                       char buf1[80];
+                       char buf2[80];
+                       struct bkey up = bkey_unpack_key(b, prev);
+
+                       bch2_bkey_to_text(&PBUF(buf1), &up);
+                       bch2_bkey_to_text(&PBUF(buf2), u.k);
+
+                       bch2_dump_bset(c, b, i, 0);
+                       btree_err(BTREE_ERR_FATAL, c, b, i,
+                                 "keys out of order: %s > %s",
+                                 buf1, buf2);
+                       /* XXX: repair this */
+               }
+
+               prev = k;
+               k = bkey_next_skip_noops(k, vstruct_last(i));
+       }
+fsck_err:
+       return ret;
+}
+
+int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry)
+{
+       struct btree_node_entry *bne;
+       struct sort_iter *iter;
+       struct btree_node *sorted;
+       struct bkey_packed *k;
+       struct bch_extent_ptr *ptr;
+       struct bset *i;
+       bool used_mempool, blacklisted;
+       unsigned u64s;
+       int ret, retry_read = 0, write = READ;
+
+       iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
+       sort_iter_init(iter, b);
+       iter->size = (btree_blocks(c) + 1) * 2;
+
+       if (bch2_meta_read_fault("btree"))
+               btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL,
+                         "dynamic fault");
+
+       btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c),
+                    BTREE_ERR_MUST_RETRY, c, b, NULL,
+                    "bad magic");
+
+       btree_err_on(!b->data->keys.seq,
+                    BTREE_ERR_MUST_RETRY, c, b, NULL,
+                    "bad btree header");
+
+       if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+               struct bch_btree_ptr_v2 *bp =
+                       &bkey_i_to_btree_ptr_v2(&b->key)->v;
+
+               btree_err_on(b->data->keys.seq != bp->seq,
+                            BTREE_ERR_MUST_RETRY, c, b, NULL,
+                            "got wrong btree node (seq %llx want %llx)",
+                            b->data->keys.seq, bp->seq);
+       }
+
+       while (b->written < c->opts.btree_node_size) {
+               unsigned sectors, whiteout_u64s = 0;
+               struct nonce nonce;
+               struct bch_csum csum;
+               bool first = !b->written;
+
+               if (!b->written) {
+                       i = &b->data->keys;
+
+                       btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
+                                    BTREE_ERR_WANT_RETRY, c, b, i,
+                                    "unknown checksum type");
+
+                       nonce = btree_nonce(i, b->written << 9);
+                       csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
+
+                       btree_err_on(bch2_crc_cmp(csum, b->data->csum),
+                                    BTREE_ERR_WANT_RETRY, c, b, i,
+                                    "invalid checksum");
+
+                       bset_encrypt(c, i, b->written << 9);
+
+                       if (btree_node_is_extents(b) &&
+                           !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) {
+                               set_btree_node_old_extent_overwrite(b);
+                               set_btree_node_need_rewrite(b);
+                       }
+
+                       sectors = vstruct_sectors(b->data, c->block_bits);
+               } else {
+                       bne = write_block(b);
+                       i = &bne->keys;
+
+                       if (i->seq != b->data->keys.seq)
+                               break;
+
+                       btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
+                                    BTREE_ERR_WANT_RETRY, c, b, i,
+                                    "unknown checksum type");
+
+                       nonce = btree_nonce(i, b->written << 9);
+                       csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+
+                       btree_err_on(bch2_crc_cmp(csum, bne->csum),
+                                    BTREE_ERR_WANT_RETRY, c, b, i,
+                                    "invalid checksum");
+
+                       bset_encrypt(c, i, b->written << 9);
+
+                       sectors = vstruct_sectors(bne, c->block_bits);
+               }
+
+               ret = validate_bset(c, b, i, sectors,
+                                   READ, have_retry);
+               if (ret)
+                       goto fsck_err;
+
+               if (!b->written)
+                       btree_node_set_format(b, b->data->format);
+
+               ret = validate_bset_keys(c, b, i, &whiteout_u64s,
+                                   READ, have_retry);
+               if (ret)
+                       goto fsck_err;
+
+               SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
+
+               b->written += sectors;
+
+               blacklisted = bch2_journal_seq_is_blacklisted(c,
+                                       le64_to_cpu(i->journal_seq),
+                                       true);
+
+               btree_err_on(blacklisted && first,
+                            BTREE_ERR_FIXABLE, c, b, i,
+                            "first btree node bset has blacklisted journal seq");
+               if (blacklisted && !first)
+                       continue;
+
+               sort_iter_add(iter, i->start,
+                             vstruct_idx(i, whiteout_u64s));
+
+               sort_iter_add(iter,
+                             vstruct_idx(i, whiteout_u64s),
+                             vstruct_last(i));
+       }
+
+       for (bne = write_block(b);
+            bset_byte_offset(b, bne) < btree_bytes(c);
+            bne = (void *) bne + block_bytes(c))
+               btree_err_on(bne->keys.seq == b->data->keys.seq,
+                            BTREE_ERR_WANT_RETRY, c, b, NULL,
+                            "found bset signature after last bset");
+
+       sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
+       sorted->keys.u64s = 0;
+
+       set_btree_bset(b, b->set, &b->data->keys);
+
+       b->nr = (btree_node_old_extent_overwrite(b)
+                ? bch2_extent_sort_fix_overlapping
+                : bch2_key_sort_fix_overlapping)(c, &sorted->keys, iter);
+
+       u64s = le16_to_cpu(sorted->keys.u64s);
+       *sorted = *b->data;
+       sorted->keys.u64s = cpu_to_le16(u64s);
+       swap(sorted, b->data);
+       set_btree_bset(b, b->set, &b->data->keys);
+       b->nsets = 1;
+
+       BUG_ON(b->nr.live_u64s != u64s);
+
+       btree_bounce_free(c, btree_bytes(c), used_mempool, sorted);
+
+       i = &b->data->keys;
+       for (k = i->start; k != vstruct_last(i);) {
+               struct bkey tmp;
+               struct bkey_s u = __bkey_disassemble(b, k, &tmp);
+               const char *invalid = bch2_bkey_val_invalid(c, u.s_c);
+
+               if (invalid ||
+                   (inject_invalid_keys(c) &&
+                    !bversion_cmp(u.k->version, MAX_VERSION))) {
+                       char buf[160];
+
+                       bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
+                       btree_err(BTREE_ERR_FIXABLE, c, b, i,
+                                 "invalid bkey %s: %s", buf, invalid);
+
+                       btree_keys_account_key_drop(&b->nr, 0, k);
+
+                       i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
+                       memmove_u64s_down(k, bkey_next(k),
+                                         (u64 *) vstruct_end(i) - (u64 *) k);
+                       set_btree_bset_end(b, b->set);
+                       continue;
+               }
+
+               if (u.k->type == KEY_TYPE_btree_ptr_v2) {
+                       struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u);
+
+                       bp.v->mem_ptr = 0;
+               }
+
+               k = bkey_next_skip_noops(k, vstruct_last(i));
+       }
+
+       bch2_bset_build_aux_tree(b, b->set, false);
+
+       set_needs_whiteout(btree_bset_first(b), true);
+
+       btree_node_reset_sib_u64s(b);
+
+       bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+
+               if (ca->mi.state != BCH_MEMBER_STATE_RW)
+                       set_btree_node_need_rewrite(b);
+       }
+out:
+       mempool_free(iter, &c->fill_iter);
+       return retry_read;
+fsck_err:
+       if (ret == BTREE_RETRY_READ) {
+               retry_read = 1;
+       } else {
+               bch2_inconsistent_error(c);
+               set_btree_node_read_error(b);
+       }
+       goto out;
+}
+
+static void btree_node_read_work(struct work_struct *work)
+{
+       struct btree_read_bio *rb =
+               container_of(work, struct btree_read_bio, work);
+       struct bch_fs *c        = rb->c;
+       struct bch_dev *ca      = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
+       struct btree *b         = rb->bio.bi_private;
+       struct bio *bio         = &rb->bio;
+       struct bch_io_failures failed = { .nr = 0 };
+       bool can_retry;
+
+       goto start;
+       while (1) {
+               bch_info(c, "retrying read");
+               ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
+               rb->have_ioref          = bch2_dev_get_ioref(ca, READ);
+               bio_reset(bio);
+               bio->bi_opf             = REQ_OP_READ|REQ_SYNC|REQ_META;
+               bio->bi_iter.bi_sector  = rb->pick.ptr.offset;
+               bio->bi_iter.bi_size    = btree_bytes(c);
+
+               if (rb->have_ioref) {
+                       bio_set_dev(bio, ca->disk_sb.bdev);
+                       submit_bio_wait(bio);
+               } else {
+                       bio->bi_status = BLK_STS_REMOVED;
+               }
+start:
+               bch2_dev_io_err_on(bio->bi_status, ca, "btree read: %s",
+                                  bch2_blk_status_to_str(bio->bi_status));
+               if (rb->have_ioref)
+                       percpu_ref_put(&ca->io_ref);
+               rb->have_ioref = false;
+
+               bch2_mark_io_failure(&failed, &rb->pick);
+
+               can_retry = bch2_bkey_pick_read_device(c,
+                               bkey_i_to_s_c(&b->key),
+                               &failed, &rb->pick) > 0;
+
+               if (!bio->bi_status &&
+                   !bch2_btree_node_read_done(c, b, can_retry))
+                       break;
+
+               if (!can_retry) {
+                       set_btree_node_read_error(b);
+                       break;
+               }
+       }
+
+       bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
+                              rb->start_time);
+       bio_put(&rb->bio);
+       clear_btree_node_read_in_flight(b);
+       wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
+}
+
+static void btree_node_read_endio(struct bio *bio)
+{
+       struct btree_read_bio *rb =
+               container_of(bio, struct btree_read_bio, bio);
+       struct bch_fs *c        = rb->c;
+
+       if (rb->have_ioref) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
+               bch2_latency_acct(ca, rb->start_time, READ);
+       }
+
+       queue_work(system_unbound_wq, &rb->work);
+}
+
+void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
+                         bool sync)
+{
+       struct extent_ptr_decoded pick;
+       struct btree_read_bio *rb;
+       struct bch_dev *ca;
+       struct bio *bio;
+       int ret;
+
+       trace_btree_read(c, b);
+
+       ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
+                                        NULL, &pick);
+       if (bch2_fs_fatal_err_on(ret <= 0, c,
+                       "btree node read error: no device to read from")) {
+               set_btree_node_read_error(b);
+               return;
+       }
+
+       ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+
+       bio = bio_alloc_bioset(GFP_NOIO, buf_pages(b->data,
+                                                  btree_bytes(c)),
+                              &c->btree_bio);
+       rb = container_of(bio, struct btree_read_bio, bio);
+       rb->c                   = c;
+       rb->start_time          = local_clock();
+       rb->have_ioref          = bch2_dev_get_ioref(ca, READ);
+       rb->pick                = pick;
+       INIT_WORK(&rb->work, btree_node_read_work);
+       bio->bi_opf             = REQ_OP_READ|REQ_SYNC|REQ_META;
+       bio->bi_iter.bi_sector  = pick.ptr.offset;
+       bio->bi_end_io          = btree_node_read_endio;
+       bio->bi_private         = b;
+       bch2_bio_map(bio, b->data, btree_bytes(c));
+
+       set_btree_node_read_in_flight(b);
+
+       if (rb->have_ioref) {
+               this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
+                            bio_sectors(bio));
+               bio_set_dev(bio, ca->disk_sb.bdev);
+
+               if (sync) {
+                       submit_bio_wait(bio);
+
+                       bio->bi_private = b;
+                       btree_node_read_work(&rb->work);
+               } else {
+                       submit_bio(bio);
+               }
+       } else {
+               bio->bi_status = BLK_STS_REMOVED;
+
+               if (sync)
+                       btree_node_read_work(&rb->work);
+               else
+                       queue_work(system_unbound_wq, &rb->work);
+
+       }
+}
+
+int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
+                       const struct bkey_i *k, unsigned level)
+{
+       struct closure cl;
+       struct btree *b;
+       int ret;
+
+       closure_init_stack(&cl);
+
+       do {
+               ret = bch2_btree_cache_cannibalize_lock(c, &cl);
+               closure_sync(&cl);
+       } while (ret);
+
+       b = bch2_btree_node_mem_alloc(c);
+       bch2_btree_cache_cannibalize_unlock(c);
+
+       BUG_ON(IS_ERR(b));
+
+       bkey_copy(&b->key, k);
+       BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id));
+
+       bch2_btree_node_read(c, b, true);
+
+       if (btree_node_read_error(b)) {
+               bch2_btree_node_hash_remove(&c->btree_cache, b);
+
+               mutex_lock(&c->btree_cache.lock);
+               list_move(&b->list, &c->btree_cache.freeable);
+               mutex_unlock(&c->btree_cache.lock);
+
+               ret = -EIO;
+               goto err;
+       }
+
+       bch2_btree_set_root_for_read(c, b);
+err:
+       six_unlock_write(&b->c.lock);
+       six_unlock_intent(&b->c.lock);
+
+       return ret;
+}
+
+void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
+                             struct btree_write *w)
+{
+       unsigned long old, new, v = READ_ONCE(b->will_make_reachable);
+
+       do {
+               old = new = v;
+               if (!(old & 1))
+                       break;
+
+               new &= ~1UL;
+       } while ((v = cmpxchg(&b->will_make_reachable, old, new)) != old);
+
+       if (old & 1)
+               closure_put(&((struct btree_update *) new)->cl);
+
+       bch2_journal_pin_drop(&c->journal, &w->journal);
+}
+
+static void btree_node_write_done(struct bch_fs *c, struct btree *b)
+{
+       struct btree_write *w = btree_prev_write(b);
+
+       bch2_btree_complete_write(c, b, w);
+       btree_node_io_unlock(b);
+}
+
+static void bch2_btree_node_write_error(struct bch_fs *c,
+                                       struct btree_write_bio *wbio)
+{
+       struct btree *b         = wbio->wbio.bio.bi_private;
+       __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
+       struct bch_extent_ptr *ptr;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_node_iter(&trans, b->c.btree_id, b->key.k.p,
+                                       BTREE_MAX_DEPTH, b->c.level, 0);
+retry:
+       ret = bch2_btree_iter_traverse(iter);
+       if (ret)
+               goto err;
+
+       /* has node been freed? */
+       if (iter->l[b->c.level].b != b) {
+               /* node has been freed: */
+               BUG_ON(!btree_node_dying(b));
+               goto out;
+       }
+
+       BUG_ON(!btree_node_hashed(b));
+
+       bkey_copy(&tmp.k, &b->key);
+
+       bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr,
+               bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
+
+       if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&tmp.k)))
+               goto err;
+
+       ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
+       if (ret == -EINTR)
+               goto retry;
+       if (ret)
+               goto err;
+out:
+       bch2_trans_exit(&trans);
+       bio_put(&wbio->wbio.bio);
+       btree_node_write_done(c, b);
+       return;
+err:
+       set_btree_node_noevict(b);
+       bch2_fs_fatal_error(c, "fatal error writing btree node");
+       goto out;
+}
+
+void bch2_btree_write_error_work(struct work_struct *work)
+{
+       struct bch_fs *c = container_of(work, struct bch_fs,
+                                       btree_write_error_work);
+       struct bio *bio;
+
+       while (1) {
+               spin_lock_irq(&c->btree_write_error_lock);
+               bio = bio_list_pop(&c->btree_write_error_list);
+               spin_unlock_irq(&c->btree_write_error_lock);
+
+               if (!bio)
+                       break;
+
+               bch2_btree_node_write_error(c,
+                       container_of(bio, struct btree_write_bio, wbio.bio));
+       }
+}
+
+static void btree_node_write_work(struct work_struct *work)
+{
+       struct btree_write_bio *wbio =
+               container_of(work, struct btree_write_bio, work);
+       struct bch_fs *c        = wbio->wbio.c;
+       struct btree *b         = wbio->wbio.bio.bi_private;
+
+       btree_bounce_free(c,
+               wbio->bytes,
+               wbio->wbio.used_mempool,
+               wbio->data);
+
+       if (wbio->wbio.failed.nr) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&c->btree_write_error_lock, flags);
+               bio_list_add(&c->btree_write_error_list, &wbio->wbio.bio);
+               spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
+
+               queue_work(c->wq, &c->btree_write_error_work);
+               return;
+       }
+
+       bio_put(&wbio->wbio.bio);
+       btree_node_write_done(c, b);
+}
+
+static void btree_node_write_endio(struct bio *bio)
+{
+       struct bch_write_bio *wbio      = to_wbio(bio);
+       struct bch_write_bio *parent    = wbio->split ? wbio->parent : NULL;
+       struct bch_write_bio *orig      = parent ?: wbio;
+       struct bch_fs *c                = wbio->c;
+       struct bch_dev *ca              = bch_dev_bkey_exists(c, wbio->dev);
+       unsigned long flags;
+
+       if (wbio->have_ioref)
+               bch2_latency_acct(ca, wbio->submit_time, WRITE);
+
+       if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write: %s",
+                              bch2_blk_status_to_str(bio->bi_status)) ||
+           bch2_meta_write_fault("btree")) {
+               spin_lock_irqsave(&c->btree_write_error_lock, flags);
+               bch2_dev_list_add_dev(&orig->failed, wbio->dev);
+               spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
+       }
+
+       if (wbio->have_ioref)
+               percpu_ref_put(&ca->io_ref);
+
+       if (parent) {
+               bio_put(bio);
+               bio_endio(&parent->bio);
+       } else {
+               struct btree_write_bio *wb =
+                       container_of(orig, struct btree_write_bio, wbio);
+
+               INIT_WORK(&wb->work, btree_node_write_work);
+               queue_work(system_unbound_wq, &wb->work);
+       }
+}
+
+static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
+                                  struct bset *i, unsigned sectors)
+{
+       unsigned whiteout_u64s = 0;
+       int ret;
+
+       if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE))
+               return -1;
+
+       ret = validate_bset(c, b, i, sectors, WRITE, false) ?:
+               validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false);
+       if (ret)
+               bch2_inconsistent_error(c);
+
+       return ret;
+}
+
+void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
+                           enum six_lock_type lock_type_held)
+{
+       struct btree_write_bio *wbio;
+       struct bset_tree *t;
+       struct bset *i;
+       struct btree_node *bn = NULL;
+       struct btree_node_entry *bne = NULL;
+       BKEY_PADDED(key) k;
+       struct bch_extent_ptr *ptr;
+       struct sort_iter sort_iter;
+       struct nonce nonce;
+       unsigned bytes_to_write, sectors_to_write, bytes, u64s;
+       u64 seq = 0;
+       bool used_mempool;
+       unsigned long old, new;
+       bool validate_before_checksum = false;
+       void *data;
+
+       if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
+               return;
+
+       /*
+        * We may only have a read lock on the btree node - the dirty bit is our
+        * "lock" against racing with other threads that may be trying to start
+        * a write, we do a write iff we clear the dirty bit. Since setting the
+        * dirty bit requires a write lock, we can't race with other threads
+        * redirtying it:
+        */
+       do {
+               old = new = READ_ONCE(b->flags);
+
+               if (!(old & (1 << BTREE_NODE_dirty)))
+                       return;
+
+               if (!btree_node_may_write(b))
+                       return;
+
+               if (old & (1 << BTREE_NODE_write_in_flight)) {
+                       btree_node_wait_on_io(b);
+                       continue;
+               }
+
+               new &= ~(1 << BTREE_NODE_dirty);
+               new &= ~(1 << BTREE_NODE_need_write);
+               new |=  (1 << BTREE_NODE_write_in_flight);
+               new |=  (1 << BTREE_NODE_just_written);
+               new ^=  (1 << BTREE_NODE_write_idx);
+       } while (cmpxchg_acquire(&b->flags, old, new) != old);
+
+       BUG_ON(btree_node_fake(b));
+       BUG_ON((b->will_make_reachable != 0) != !b->written);
+
+       BUG_ON(b->written >= c->opts.btree_node_size);
+       BUG_ON(b->written & (c->opts.block_size - 1));
+       BUG_ON(bset_written(b, btree_bset_last(b)));
+       BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c));
+       BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format)));
+
+       bch2_sort_whiteouts(c, b);
+
+       sort_iter_init(&sort_iter, b);
+
+       bytes = !b->written
+               ? sizeof(struct btree_node)
+               : sizeof(struct btree_node_entry);
+
+       bytes += b->whiteout_u64s * sizeof(u64);
+
+       for_each_bset(b, t) {
+               i = bset(b, t);
+
+               if (bset_written(b, i))
+                       continue;
+
+               bytes += le16_to_cpu(i->u64s) * sizeof(u64);
+               sort_iter_add(&sort_iter,
+                             btree_bkey_first(b, t),
+                             btree_bkey_last(b, t));
+               seq = max(seq, le64_to_cpu(i->journal_seq));
+       }
+
+       data = btree_bounce_alloc(c, bytes, &used_mempool);
+
+       if (!b->written) {
+               bn = data;
+               *bn = *b->data;
+               i = &bn->keys;
+       } else {
+               bne = data;
+               bne->keys = b->data->keys;
+               i = &bne->keys;
+       }
+
+       i->journal_seq  = cpu_to_le64(seq);
+       i->u64s         = 0;
+
+       if (!btree_node_old_extent_overwrite(b)) {
+               sort_iter_add(&sort_iter,
+                             unwritten_whiteouts_start(c, b),
+                             unwritten_whiteouts_end(c, b));
+               SET_BSET_SEPARATE_WHITEOUTS(i, false);
+       } else {
+               memcpy_u64s(i->start,
+                           unwritten_whiteouts_start(c, b),
+                           b->whiteout_u64s);
+               i->u64s = cpu_to_le16(b->whiteout_u64s);
+               SET_BSET_SEPARATE_WHITEOUTS(i, true);
+       }
+
+       b->whiteout_u64s = 0;
+
+       u64s = btree_node_old_extent_overwrite(b)
+               ? bch2_sort_extents(vstruct_last(i), &sort_iter, false)
+               : bch2_sort_keys(i->start, &sort_iter, false);
+       le16_add_cpu(&i->u64s, u64s);
+
+       set_needs_whiteout(i, false);
+
+       /* do we have data to write? */
+       if (b->written && !i->u64s)
+               goto nowrite;
+
+       bytes_to_write = vstruct_end(i) - data;
+       sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
+
+       memset(data + bytes_to_write, 0,
+              (sectors_to_write << 9) - bytes_to_write);
+
+       BUG_ON(b->written + sectors_to_write > c->opts.btree_node_size);
+       BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN);
+       BUG_ON(i->seq != b->data->keys.seq);
+
+       i->version = c->sb.version < bcachefs_metadata_version_new_versioning
+               ? cpu_to_le16(BCH_BSET_VERSION_OLD)
+               : cpu_to_le16(c->sb.version);
+       SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c));
+
+       if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)))
+               validate_before_checksum = true;
+
+       /* validate_bset will be modifying: */
+       if (le16_to_cpu(i->version) < bcachefs_metadata_version_max)
+               validate_before_checksum = true;
+
+       /* if we're going to be encrypting, check metadata validity first: */
+       if (validate_before_checksum &&
+           validate_bset_for_write(c, b, i, sectors_to_write))
+               goto err;
+
+       bset_encrypt(c, i, b->written << 9);
+
+       nonce = btree_nonce(i, b->written << 9);
+
+       if (bn)
+               bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn);
+       else
+               bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+
+       /* if we're not encrypting, check metadata after checksumming: */
+       if (!validate_before_checksum &&
+           validate_bset_for_write(c, b, i, sectors_to_write))
+               goto err;
+
+       /*
+        * We handle btree write errors by immediately halting the journal -
+        * after we've done that, we can't issue any subsequent btree writes
+        * because they might have pointers to new nodes that failed to write.
+        *
+        * Furthermore, there's no point in doing any more btree writes because
+        * with the journal stopped, we're never going to update the journal to
+        * reflect that those writes were done and the data flushed from the
+        * journal:
+        *
+        * Also on journal error, the pending write may have updates that were
+        * never journalled (interior nodes, see btree_update_nodes_written()) -
+        * it's critical that we don't do the write in that case otherwise we
+        * will have updates visible that weren't in the journal:
+        *
+        * Make sure to update b->written so bch2_btree_init_next() doesn't
+        * break:
+        */
+       if (bch2_journal_error(&c->journal) ||
+           c->opts.nochanges)
+               goto err;
+
+       trace_btree_write(b, bytes_to_write, sectors_to_write);
+
+       wbio = container_of(bio_alloc_bioset(GFP_NOIO,
+                               buf_pages(data, sectors_to_write << 9),
+                               &c->btree_bio),
+                           struct btree_write_bio, wbio.bio);
+       wbio_init(&wbio->wbio.bio);
+       wbio->data                      = data;
+       wbio->bytes                     = bytes;
+       wbio->wbio.used_mempool         = used_mempool;
+       wbio->wbio.bio.bi_opf           = REQ_OP_WRITE|REQ_META;
+       wbio->wbio.bio.bi_end_io        = btree_node_write_endio;
+       wbio->wbio.bio.bi_private       = b;
+
+       bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9);
+
+       /*
+        * If we're appending to a leaf node, we don't technically need FUA -
+        * this write just needs to be persisted before the next journal write,
+        * which will be marked FLUSH|FUA.
+        *
+        * Similarly if we're writing a new btree root - the pointer is going to
+        * be in the next journal entry.
+        *
+        * But if we're writing a new btree node (that isn't a root) or
+        * appending to a non leaf btree node, we need either FUA or a flush
+        * when we write the parent with the new pointer. FUA is cheaper than a
+        * flush, and writes appending to leaf nodes aren't blocking anything so
+        * just make all btree node writes FUA to keep things sane.
+        */
+
+       bkey_copy(&k.key, &b->key);
+
+       bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&k.key)), ptr)
+               ptr->offset += b->written;
+
+       b->written += sectors_to_write;
+
+       /* XXX: submitting IO with btree locks held: */
+       bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, &k.key);
+       return;
+err:
+       set_btree_node_noevict(b);
+       b->written += sectors_to_write;
+nowrite:
+       btree_bounce_free(c, bytes, used_mempool, data);
+       btree_node_write_done(c, b);
+}
+
+/*
+ * Work that must be done with write lock held:
+ */
+bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
+{
+       bool invalidated_iter = false;
+       struct btree_node_entry *bne;
+       struct bset_tree *t;
+
+       if (!btree_node_just_written(b))
+               return false;
+
+       BUG_ON(b->whiteout_u64s);
+
+       clear_btree_node_just_written(b);
+
+       /*
+        * Note: immediately after write, bset_written() doesn't work - the
+        * amount of data we had to write after compaction might have been
+        * smaller than the offset of the last bset.
+        *
+        * However, we know that all bsets have been written here, as long as
+        * we're still holding the write lock:
+        */
+
+       /*
+        * XXX: decide if we really want to unconditionally sort down to a
+        * single bset:
+        */
+       if (b->nsets > 1) {
+               btree_node_sort(c, b, NULL, 0, b->nsets, true);
+               invalidated_iter = true;
+       } else {
+               invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL);
+       }
+
+       for_each_bset(b, t)
+               set_needs_whiteout(bset(b, t), true);
+
+       bch2_btree_verify(c, b);
+
+       /*
+        * If later we don't unconditionally sort down to a single bset, we have
+        * to ensure this is still true:
+        */
+       BUG_ON((void *) btree_bkey_last(b, bset_tree_last(b)) > write_block(b));
+
+       bne = want_new_bset(c, b);
+       if (bne)
+               bch2_bset_init_next(c, b, bne);
+
+       bch2_btree_build_aux_trees(b);
+
+       return invalidated_iter;
+}
+
+/*
+ * Use this one if the node is intent locked:
+ */
+void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
+                         enum six_lock_type lock_type_held)
+{
+       BUG_ON(lock_type_held == SIX_LOCK_write);
+
+       if (lock_type_held == SIX_LOCK_intent ||
+           six_lock_tryupgrade(&b->c.lock)) {
+               __bch2_btree_node_write(c, b, SIX_LOCK_intent);
+
+               /* don't cycle lock unnecessarily: */
+               if (btree_node_just_written(b) &&
+                   six_trylock_write(&b->c.lock)) {
+                       bch2_btree_post_write_cleanup(c, b);
+                       six_unlock_write(&b->c.lock);
+               }
+
+               if (lock_type_held == SIX_LOCK_read)
+                       six_lock_downgrade(&b->c.lock);
+       } else {
+               __bch2_btree_node_write(c, b, SIX_LOCK_read);
+       }
+}
+
+static void __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
+{
+       struct bucket_table *tbl;
+       struct rhash_head *pos;
+       struct btree *b;
+       unsigned i;
+restart:
+       rcu_read_lock();
+       for_each_cached_btree(b, c, tbl, i, pos)
+               if (test_bit(flag, &b->flags)) {
+                       rcu_read_unlock();
+                       wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE);
+                       goto restart;
+
+               }
+       rcu_read_unlock();
+}
+
+void bch2_btree_flush_all_reads(struct bch_fs *c)
+{
+       __bch2_btree_flush_all(c, BTREE_NODE_read_in_flight);
+}
+
+void bch2_btree_flush_all_writes(struct bch_fs *c)
+{
+       __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
+}
+
+void bch2_btree_verify_flushed(struct bch_fs *c)
+{
+       struct bucket_table *tbl;
+       struct rhash_head *pos;
+       struct btree *b;
+       unsigned i;
+
+       rcu_read_lock();
+       for_each_cached_btree(b, c, tbl, i, pos) {
+               unsigned long flags = READ_ONCE(b->flags);
+
+               BUG_ON((flags & (1 << BTREE_NODE_dirty)) ||
+                      (flags & (1 << BTREE_NODE_write_in_flight)));
+       }
+       rcu_read_unlock();
+}
+
+void bch2_dirty_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       struct bucket_table *tbl;
+       struct rhash_head *pos;
+       struct btree *b;
+       unsigned i;
+
+       rcu_read_lock();
+       for_each_cached_btree(b, c, tbl, i, pos) {
+               unsigned long flags = READ_ONCE(b->flags);
+
+               if (!(flags & (1 << BTREE_NODE_dirty)))
+                       continue;
+
+               pr_buf(out, "%p d %u n %u l %u w %u b %u r %u:%lu\n",
+                      b,
+                      (flags & (1 << BTREE_NODE_dirty)) != 0,
+                      (flags & (1 << BTREE_NODE_need_write)) != 0,
+                      b->c.level,
+                      b->written,
+                      !list_empty_careful(&b->write_blocked),
+                      b->will_make_reachable != 0,
+                      b->will_make_reachable & 1);
+       }
+       rcu_read_unlock();
+}
diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h
new file mode 100644 (file)
index 0000000..626d0f0
--- /dev/null
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_IO_H
+#define _BCACHEFS_BTREE_IO_H
+
+#include "bkey_methods.h"
+#include "bset.h"
+#include "btree_locking.h"
+#include "checksum.h"
+#include "extents.h"
+#include "io_types.h"
+
+struct bch_fs;
+struct btree_write;
+struct btree;
+struct btree_iter;
+
+struct btree_read_bio {
+       struct bch_fs           *c;
+       u64                     start_time;
+       unsigned                have_ioref:1;
+       struct extent_ptr_decoded       pick;
+       struct work_struct      work;
+       struct bio              bio;
+};
+
+struct btree_write_bio {
+       struct work_struct      work;
+       void                    *data;
+       unsigned                bytes;
+       struct bch_write_bio    wbio;
+};
+
+static inline void btree_node_io_unlock(struct btree *b)
+{
+       EBUG_ON(!btree_node_write_in_flight(b));
+       clear_btree_node_write_in_flight(b);
+       wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
+}
+
+static inline void btree_node_io_lock(struct btree *b)
+{
+       wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight,
+                           TASK_UNINTERRUPTIBLE);
+}
+
+static inline void btree_node_wait_on_io(struct btree *b)
+{
+       wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
+                      TASK_UNINTERRUPTIBLE);
+}
+
+static inline bool btree_node_may_write(struct btree *b)
+{
+       return list_empty_careful(&b->write_blocked) &&
+               (!b->written || !b->will_make_reachable);
+}
+
+enum compact_mode {
+       COMPACT_LAZY,
+       COMPACT_ALL,
+};
+
+bool bch2_compact_whiteouts(struct bch_fs *, struct btree *,
+                           enum compact_mode);
+
+static inline bool should_compact_bset_lazy(struct btree *b,
+                                           struct bset_tree *t)
+{
+       unsigned total_u64s = bset_u64s(t);
+       unsigned dead_u64s = bset_dead_u64s(b, t);
+
+       return dead_u64s > 64 && dead_u64s * 3 > total_u64s;
+}
+
+static inline bool bch2_maybe_compact_whiteouts(struct bch_fs *c, struct btree *b)
+{
+       struct bset_tree *t;
+
+       for_each_bset(b, t)
+               if (should_compact_bset_lazy(b, t))
+                       return bch2_compact_whiteouts(c, b, COMPACT_LAZY);
+
+       return false;
+}
+
+static inline struct nonce btree_nonce(struct bset *i, unsigned offset)
+{
+       return (struct nonce) {{
+               [0] = cpu_to_le32(offset),
+               [1] = ((__le32 *) &i->seq)[0],
+               [2] = ((__le32 *) &i->seq)[1],
+               [3] = ((__le32 *) &i->journal_seq)[0]^BCH_NONCE_BTREE,
+       }};
+}
+
+static inline void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
+{
+       struct nonce nonce = btree_nonce(i, offset);
+
+       if (!offset) {
+               struct btree_node *bn = container_of(i, struct btree_node, keys);
+               unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
+
+               bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags,
+                            bytes);
+
+               nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE));
+       }
+
+       bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
+                    vstruct_end(i) - (void *) i->_data);
+}
+
+void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *);
+
+void bch2_btree_build_aux_trees(struct btree *);
+void bch2_btree_init_next(struct bch_fs *, struct btree *,
+                        struct btree_iter *);
+
+int bch2_btree_node_read_done(struct bch_fs *, struct btree *, bool);
+void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
+int bch2_btree_root_read(struct bch_fs *, enum btree_id,
+                        const struct bkey_i *, unsigned);
+
+void bch2_btree_complete_write(struct bch_fs *, struct btree *,
+                             struct btree_write *);
+void bch2_btree_write_error_work(struct work_struct *);
+
+void __bch2_btree_node_write(struct bch_fs *, struct btree *,
+                           enum six_lock_type);
+bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
+
+void bch2_btree_node_write(struct bch_fs *, struct btree *,
+                         enum six_lock_type);
+
+static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
+                                           enum six_lock_type lock_held)
+{
+       while (b->written &&
+              btree_node_need_write(b) &&
+              btree_node_may_write(b)) {
+               if (!btree_node_write_in_flight(b)) {
+                       bch2_btree_node_write(c, b, lock_held);
+                       break;
+               }
+
+               six_unlock_type(&b->c.lock, lock_held);
+               btree_node_wait_on_io(b);
+               btree_node_lock_type(c, b, lock_held);
+       }
+}
+
+#define bch2_btree_node_write_cond(_c, _b, cond)                       \
+do {                                                                   \
+       unsigned long old, new, v = READ_ONCE((_b)->flags);             \
+                                                                       \
+       do {                                                            \
+               old = new = v;                                          \
+                                                                       \
+               if (!(old & (1 << BTREE_NODE_dirty)) || !(cond))        \
+                       break;                                          \
+                                                                       \
+               new |= (1 << BTREE_NODE_need_write);                    \
+       } while ((v = cmpxchg(&(_b)->flags, old, new)) != old);         \
+                                                                       \
+       btree_node_write_if_need(_c, _b, SIX_LOCK_read);                \
+} while (0)
+
+void bch2_btree_flush_all_reads(struct bch_fs *);
+void bch2_btree_flush_all_writes(struct bch_fs *);
+void bch2_btree_verify_flushed(struct bch_fs *);
+void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *);
+
+static inline void compat_bformat(unsigned level, enum btree_id btree_id,
+                                unsigned version, unsigned big_endian,
+                                int write, struct bkey_format *f)
+{
+       if (version < bcachefs_metadata_version_inode_btree_change &&
+           btree_id == BTREE_ID_INODES) {
+               swap(f->bits_per_field[BKEY_FIELD_INODE],
+                    f->bits_per_field[BKEY_FIELD_OFFSET]);
+               swap(f->field_offset[BKEY_FIELD_INODE],
+                    f->field_offset[BKEY_FIELD_OFFSET]);
+       }
+}
+
+static inline void compat_bpos(unsigned level, enum btree_id btree_id,
+                              unsigned version, unsigned big_endian,
+                              int write, struct bpos *p)
+{
+       if (big_endian != CPU_BIG_ENDIAN)
+               bch2_bpos_swab(p);
+
+       if (version < bcachefs_metadata_version_inode_btree_change &&
+           btree_id == BTREE_ID_INODES)
+               swap(p->inode, p->offset);
+}
+
+static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
+                                    unsigned version, unsigned big_endian,
+                                    int write,
+                                    struct btree_node *bn)
+{
+       if (version < bcachefs_metadata_version_inode_btree_change &&
+           btree_node_type_is_extents(btree_id) &&
+           bkey_cmp(bn->min_key, POS_MIN) &&
+           write)
+               bn->min_key = bkey_predecessor(bn->min_key);
+
+       compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
+       compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
+
+       if (version < bcachefs_metadata_version_inode_btree_change &&
+           btree_node_type_is_extents(btree_id) &&
+           bkey_cmp(bn->min_key, POS_MIN) &&
+           !write)
+               bn->min_key = bkey_successor(bn->min_key);
+}
+
+#endif /* _BCACHEFS_BTREE_IO_H */
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
new file mode 100644 (file)
index 0000000..6fab76c
--- /dev/null
@@ -0,0 +1,2445 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_methods.h"
+#include "btree_cache.h"
+#include "btree_iter.h"
+#include "btree_key_cache.h"
+#include "btree_locking.h"
+#include "btree_update.h"
+#include "debug.h"
+#include "extents.h"
+#include "journal.h"
+
+#include <linux/prefetch.h>
+#include <trace/events/bcachefs.h>
+
+static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
+{
+       return l < BTREE_MAX_DEPTH &&
+               (unsigned long) iter->l[l].b >= 128;
+}
+
+static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
+{
+       struct bpos pos = iter->pos;
+
+       if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
+           bkey_cmp(pos, POS_MAX))
+               pos = bkey_successor(pos);
+       return pos;
+}
+
+static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
+                                             struct btree *b)
+{
+       return bkey_cmp(btree_iter_search_key(iter), b->data->min_key) < 0;
+}
+
+static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
+                                            struct btree *b)
+{
+       return bkey_cmp(b->key.k.p, btree_iter_search_key(iter)) < 0;
+}
+
+static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
+                                         struct btree *b)
+{
+       return iter->btree_id == b->c.btree_id &&
+               !btree_iter_pos_before_node(iter, b) &&
+               !btree_iter_pos_after_node(iter, b);
+}
+
+/* Btree node locking: */
+
+void bch2_btree_node_unlock_write(struct btree *b, struct btree_iter *iter)
+{
+       bch2_btree_node_unlock_write_inlined(b, iter);
+}
+
+void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
+{
+       struct btree_iter *linked;
+       unsigned readers = 0;
+
+       EBUG_ON(!btree_node_intent_locked(iter, b->c.level));
+
+       trans_for_each_iter(iter->trans, linked)
+               if (linked->l[b->c.level].b == b &&
+                   btree_node_read_locked(linked, b->c.level))
+                       readers++;
+
+       /*
+        * Must drop our read locks before calling six_lock_write() -
+        * six_unlock() won't do wakeups until the reader count
+        * goes to 0, and it's safe because we have the node intent
+        * locked:
+        */
+       atomic64_sub(__SIX_VAL(read_lock, readers),
+                    &b->c.lock.state.counter);
+       btree_node_lock_type(iter->trans->c, b, SIX_LOCK_write);
+       atomic64_add(__SIX_VAL(read_lock, readers),
+                    &b->c.lock.state.counter);
+}
+
+bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
+{
+       struct btree *b = btree_iter_node(iter, level);
+       int want = __btree_lock_want(iter, level);
+
+       if (!is_btree_node(iter, level))
+               return false;
+
+       if (race_fault())
+               return false;
+
+       if (six_relock_type(&b->c.lock, want, iter->l[level].lock_seq) ||
+           (btree_node_lock_seq_matches(iter, b, level) &&
+            btree_node_lock_increment(iter->trans, b, level, want))) {
+               mark_btree_node_locked(iter, level, want);
+               return true;
+       } else {
+               return false;
+       }
+}
+
+static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
+{
+       struct btree *b = iter->l[level].b;
+
+       EBUG_ON(btree_lock_want(iter, level) != BTREE_NODE_INTENT_LOCKED);
+
+       if (!is_btree_node(iter, level))
+               return false;
+
+       if (btree_node_intent_locked(iter, level))
+               return true;
+
+       if (race_fault())
+               return false;
+
+       if (btree_node_locked(iter, level)
+           ? six_lock_tryupgrade(&b->c.lock)
+           : six_relock_type(&b->c.lock, SIX_LOCK_intent, iter->l[level].lock_seq))
+               goto success;
+
+       if (btree_node_lock_seq_matches(iter, b, level) &&
+           btree_node_lock_increment(iter->trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
+               btree_node_unlock(iter, level);
+               goto success;
+       }
+
+       return false;
+success:
+       mark_btree_node_intent_locked(iter, level);
+       return true;
+}
+
+static inline bool btree_iter_get_locks(struct btree_iter *iter,
+                                       bool upgrade, bool trace)
+{
+       unsigned l = iter->level;
+       int fail_idx = -1;
+
+       do {
+               if (!btree_iter_node(iter, l))
+                       break;
+
+               if (!(upgrade
+                     ? bch2_btree_node_upgrade(iter, l)
+                     : bch2_btree_node_relock(iter, l))) {
+                       if (trace)
+                               (upgrade
+                                ? trace_node_upgrade_fail
+                                : trace_node_relock_fail)(l, iter->l[l].lock_seq,
+                                               is_btree_node(iter, l)
+                                               ? 0
+                                               : (unsigned long) iter->l[l].b,
+                                               is_btree_node(iter, l)
+                                               ? iter->l[l].b->c.lock.state.seq
+                                               : 0);
+
+                       fail_idx = l;
+                       btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+               }
+
+               l++;
+       } while (l < iter->locks_want);
+
+       /*
+        * When we fail to get a lock, we have to ensure that any child nodes
+        * can't be relocked so bch2_btree_iter_traverse has to walk back up to
+        * the node that we failed to relock:
+        */
+       while (fail_idx >= 0) {
+               btree_node_unlock(iter, fail_idx);
+               iter->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
+               --fail_idx;
+       }
+
+       if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
+               iter->uptodate = BTREE_ITER_NEED_PEEK;
+
+       bch2_btree_trans_verify_locks(iter->trans);
+
+       return iter->uptodate < BTREE_ITER_NEED_RELOCK;
+}
+
+static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
+                                 enum btree_iter_type type)
+{
+       return  type != BTREE_ITER_CACHED
+               ? container_of(_b, struct btree, c)->key.k.p
+               : container_of(_b, struct bkey_cached, c)->key.pos;
+}
+
+/* Slowpath: */
+bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
+                           unsigned level, struct btree_iter *iter,
+                           enum six_lock_type type,
+                           six_lock_should_sleep_fn should_sleep_fn,
+                           void *p)
+{
+       struct btree_trans *trans = iter->trans;
+       struct btree_iter *linked;
+       u64 start_time = local_clock();
+       bool ret = true;
+
+       /* Check if it's safe to block: */
+       trans_for_each_iter(trans, linked) {
+               if (!linked->nodes_locked)
+                       continue;
+
+               /*
+                * Can't block taking an intent lock if we have _any_ nodes read
+                * locked:
+                *
+                * - Our read lock blocks another thread with an intent lock on
+                *   the same node from getting a write lock, and thus from
+                *   dropping its intent lock
+                *
+                * - And the other thread may have multiple nodes intent locked:
+                *   both the node we want to intent lock, and the node we
+                *   already have read locked - deadlock:
+                */
+               if (type == SIX_LOCK_intent &&
+                   linked->nodes_locked != linked->nodes_intent_locked) {
+                       if (!(trans->nounlock)) {
+                               linked->locks_want = max_t(unsigned,
+                                               linked->locks_want,
+                                               __fls(linked->nodes_locked) + 1);
+                               if (!btree_iter_get_locks(linked, true, false))
+                                       ret = false;
+                       } else {
+                               ret = false;
+                       }
+               }
+
+               /*
+                * Interior nodes must be locked before their descendants: if
+                * another iterator has possible descendants locked of the node
+                * we're about to lock, it must have the ancestors locked too:
+                */
+               if (linked->btree_id == iter->btree_id &&
+                   level > __fls(linked->nodes_locked)) {
+                       if (!(trans->nounlock)) {
+                               linked->locks_want =
+                                       max(level + 1, max_t(unsigned,
+                                           linked->locks_want,
+                                           iter->locks_want));
+                               if (!btree_iter_get_locks(linked, true, false))
+                                       ret = false;
+                       } else {
+                               ret = false;
+                       }
+               }
+
+               /* Must lock btree nodes in key order: */
+               if ((cmp_int(iter->btree_id, linked->btree_id) ?:
+                    -cmp_int(btree_iter_type(iter), btree_iter_type(linked))) < 0)
+                       ret = false;
+
+               if (iter->btree_id == linked->btree_id &&
+                   btree_node_locked(linked, level) &&
+                   bkey_cmp(pos, btree_node_pos((void *) linked->l[level].b,
+                                                btree_iter_type(linked))) <= 0)
+                       ret = false;
+
+               /*
+                * Recheck if this is a node we already have locked - since one
+                * of the get_locks() calls might've successfully
+                * upgraded/relocked it:
+                */
+               if (linked->l[level].b == b &&
+                   btree_node_locked_type(linked, level) >= type) {
+                       six_lock_increment(&b->c.lock, type);
+                       return true;
+               }
+       }
+
+       if (unlikely(!ret)) {
+               trace_trans_restart_would_deadlock(iter->trans->ip);
+               return false;
+       }
+
+       if (six_trylock_type(&b->c.lock, type))
+               return true;
+
+       if (six_lock_type(&b->c.lock, type, should_sleep_fn, p))
+               return false;
+
+       bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)],
+                              start_time);
+       return true;
+}
+
+/* Btree iterator locking: */
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+static void bch2_btree_iter_verify_locks(struct btree_iter *iter)
+{
+       unsigned l;
+
+       if (!(iter->trans->iters_linked & (1ULL << iter->idx))) {
+               BUG_ON(iter->nodes_locked);
+               return;
+       }
+
+       for (l = 0; is_btree_node(iter, l); l++) {
+               if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
+                   !btree_node_locked(iter, l))
+                       continue;
+
+               BUG_ON(btree_lock_want(iter, l) !=
+                      btree_node_locked_type(iter, l));
+       }
+}
+
+void bch2_btree_trans_verify_locks(struct btree_trans *trans)
+{
+       struct btree_iter *iter;
+
+       trans_for_each_iter_all(trans, iter)
+               bch2_btree_iter_verify_locks(iter);
+}
+#else
+static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
+#endif
+
+__flatten
+bool bch2_btree_iter_relock(struct btree_iter *iter, bool trace)
+{
+       return btree_iter_get_locks(iter, false, trace);
+}
+
+bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
+                              unsigned new_locks_want)
+{
+       struct btree_iter *linked;
+
+       EBUG_ON(iter->locks_want >= new_locks_want);
+
+       iter->locks_want = new_locks_want;
+
+       if (btree_iter_get_locks(iter, true, true))
+               return true;
+
+       /*
+        * Ancestor nodes must be locked before child nodes, so set locks_want
+        * on iterators that might lock ancestors before us to avoid getting
+        * -EINTR later:
+        */
+       trans_for_each_iter(iter->trans, linked)
+               if (linked != iter &&
+                   linked->btree_id == iter->btree_id &&
+                   linked->locks_want < new_locks_want) {
+                       linked->locks_want = new_locks_want;
+                       btree_iter_get_locks(linked, true, false);
+               }
+
+       return false;
+}
+
+bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *iter,
+                                       unsigned new_locks_want)
+{
+       unsigned l = iter->level;
+
+       EBUG_ON(iter->locks_want >= new_locks_want);
+
+       iter->locks_want = new_locks_want;
+
+       do {
+               if (!btree_iter_node(iter, l))
+                       break;
+
+               if (!bch2_btree_node_upgrade(iter, l)) {
+                       iter->locks_want = l;
+                       return false;
+               }
+
+               l++;
+       } while (l < iter->locks_want);
+
+       return true;
+}
+
+void __bch2_btree_iter_downgrade(struct btree_iter *iter,
+                                unsigned downgrade_to)
+{
+       unsigned l, new_locks_want = downgrade_to ?:
+               (iter->flags & BTREE_ITER_INTENT ? 1 : 0);
+
+       if (iter->locks_want < downgrade_to) {
+               iter->locks_want = new_locks_want;
+
+               while (iter->nodes_locked &&
+                      (l = __fls(iter->nodes_locked)) >= iter->locks_want) {
+                       if (l > iter->level) {
+                               btree_node_unlock(iter, l);
+                       } else {
+                               if (btree_node_intent_locked(iter, l)) {
+                                       six_lock_downgrade(&iter->l[l].b->c.lock);
+                                       iter->nodes_intent_locked ^= 1 << l;
+                               }
+                               break;
+                       }
+               }
+       }
+
+       bch2_btree_trans_verify_locks(iter->trans);
+}
+
+void bch2_trans_downgrade(struct btree_trans *trans)
+{
+       struct btree_iter *iter;
+
+       trans_for_each_iter(trans, iter)
+               bch2_btree_iter_downgrade(iter);
+}
+
+/* Btree transaction locking: */
+
+bool bch2_trans_relock(struct btree_trans *trans)
+{
+       struct btree_iter *iter;
+       bool ret = true;
+
+       trans_for_each_iter(trans, iter)
+               if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
+                       ret &= bch2_btree_iter_relock(iter, true);
+
+       return ret;
+}
+
+void bch2_trans_unlock(struct btree_trans *trans)
+{
+       struct btree_iter *iter;
+
+       trans_for_each_iter(trans, iter)
+               __bch2_btree_iter_unlock(iter);
+}
+
+/* Btree iterator: */
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+static void bch2_btree_iter_verify_cached(struct btree_iter *iter)
+{
+       struct bkey_cached *ck;
+       bool locked = btree_node_locked(iter, 0);
+
+       if (!bch2_btree_node_relock(iter, 0))
+               return;
+
+       ck = (void *) iter->l[0].b;
+       BUG_ON(ck->key.btree_id != iter->btree_id ||
+              bkey_cmp(ck->key.pos, iter->pos));
+
+       if (!locked)
+               btree_node_unlock(iter, 0);
+}
+
+static void bch2_btree_iter_verify_level(struct btree_iter *iter,
+                                        unsigned level)
+{
+       struct bpos pos = btree_iter_search_key(iter);
+       struct btree_iter_level *l = &iter->l[level];
+       struct btree_node_iter tmp = l->iter;
+       bool locked = btree_node_locked(iter, level);
+       struct bkey_packed *p, *k;
+       char buf1[100], buf2[100];
+       const char *msg;
+
+       if (!debug_check_iterators(iter->trans->c))
+               return;
+
+       if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
+               if (!level)
+                       bch2_btree_iter_verify_cached(iter);
+               return;
+       }
+
+       BUG_ON(iter->level < iter->min_depth);
+
+       if (!btree_iter_node(iter, level))
+               return;
+
+       if (!bch2_btree_node_relock(iter, level))
+               return;
+
+       /*
+        * Ideally this invariant would always be true, and hopefully in the
+        * future it will be, but for now set_pos_same_leaf() breaks it:
+        */
+       BUG_ON(iter->uptodate < BTREE_ITER_NEED_TRAVERSE &&
+              !btree_iter_pos_in_node(iter, l->b));
+
+       /*
+        * node iterators don't use leaf node iterator:
+        */
+       if (btree_iter_type(iter) == BTREE_ITER_NODES &&
+           level <= iter->min_depth)
+               goto unlock;
+
+       bch2_btree_node_iter_verify(&l->iter, l->b);
+
+       /*
+        * For interior nodes, the iterator will have skipped past
+        * deleted keys:
+        *
+        * For extents, the iterator may have skipped past deleted keys (but not
+        * whiteouts)
+        */
+       p = level || btree_node_type_is_extents(iter->btree_id)
+               ? bch2_btree_node_iter_prev_filter(&tmp, l->b, KEY_TYPE_discard)
+               : bch2_btree_node_iter_prev_all(&tmp, l->b);
+       k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
+
+       if (p && bkey_iter_pos_cmp(l->b, p, &pos) >= 0) {
+               msg = "before";
+               goto err;
+       }
+
+       if (k && bkey_iter_pos_cmp(l->b, k, &pos) < 0) {
+               msg = "after";
+               goto err;
+       }
+unlock:
+       if (!locked)
+               btree_node_unlock(iter, level);
+       return;
+err:
+       strcpy(buf1, "(none)");
+       strcpy(buf2, "(none)");
+
+       if (p) {
+               struct bkey uk = bkey_unpack_key(l->b, p);
+               bch2_bkey_to_text(&PBUF(buf1), &uk);
+       }
+
+       if (k) {
+               struct bkey uk = bkey_unpack_key(l->b, k);
+               bch2_bkey_to_text(&PBUF(buf2), &uk);
+       }
+
+       panic("iterator should be %s key at level %u:\n"
+             "iter pos %s %llu:%llu\n"
+             "prev key %s\n"
+             "cur  key %s\n",
+             msg, level,
+             iter->flags & BTREE_ITER_IS_EXTENTS ? ">" : "=>",
+             iter->pos.inode, iter->pos.offset,
+             buf1, buf2);
+}
+
+static void bch2_btree_iter_verify(struct btree_iter *iter)
+{
+       unsigned i;
+
+       bch2_btree_trans_verify_locks(iter->trans);
+
+       for (i = 0; i < BTREE_MAX_DEPTH; i++)
+               bch2_btree_iter_verify_level(iter, i);
+}
+
+void bch2_btree_trans_verify_iters(struct btree_trans *trans, struct btree *b)
+{
+       struct btree_iter *iter;
+
+       if (!debug_check_iterators(trans->c))
+               return;
+
+       trans_for_each_iter_with_node(trans, b, iter)
+               bch2_btree_iter_verify_level(iter, b->c.level);
+}
+
+#else
+
+static inline void bch2_btree_iter_verify_level(struct btree_iter *iter, unsigned l) {}
+static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
+
+#endif
+
+static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
+                                       struct btree *b,
+                                       struct bset_tree *t,
+                                       struct bkey_packed *k)
+{
+       struct btree_node_iter_set *set;
+
+       btree_node_iter_for_each(iter, set)
+               if (set->end == t->end_offset) {
+                       set->k = __btree_node_key_to_offset(b, k);
+                       bch2_btree_node_iter_sort(iter, b);
+                       return;
+               }
+
+       bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
+}
+
+static void __bch2_btree_iter_fix_key_modified(struct btree_iter *iter,
+                                              struct btree *b,
+                                              struct bkey_packed *where)
+{
+       struct btree_iter_level *l = &iter->l[b->c.level];
+       struct bpos pos = btree_iter_search_key(iter);
+
+       if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b))
+               return;
+
+       if (bkey_iter_pos_cmp(l->b, where, &pos) < 0)
+               bch2_btree_node_iter_advance(&l->iter, l->b);
+
+       btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
+}
+
+void bch2_btree_iter_fix_key_modified(struct btree_iter *iter,
+                                     struct btree *b,
+                                     struct bkey_packed *where)
+{
+       struct btree_iter *linked;
+
+       trans_for_each_iter_with_node(iter->trans, b, linked) {
+               __bch2_btree_iter_fix_key_modified(linked, b, where);
+               bch2_btree_iter_verify_level(linked, b->c.level);
+       }
+}
+
+static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
+                                     struct btree *b,
+                                     struct btree_node_iter *node_iter,
+                                     struct bset_tree *t,
+                                     struct bkey_packed *where,
+                                     unsigned clobber_u64s,
+                                     unsigned new_u64s)
+{
+       const struct bkey_packed *end = btree_bkey_last(b, t);
+       struct btree_node_iter_set *set;
+       unsigned offset = __btree_node_key_to_offset(b, where);
+       int shift = new_u64s - clobber_u64s;
+       unsigned old_end = t->end_offset - shift;
+       unsigned orig_iter_pos = node_iter->data[0].k;
+       bool iter_current_key_modified =
+               orig_iter_pos >= offset &&
+               orig_iter_pos <= offset + clobber_u64s;
+       struct bpos iter_pos = btree_iter_search_key(iter);
+
+       btree_node_iter_for_each(node_iter, set)
+               if (set->end == old_end)
+                       goto found;
+
+       /* didn't find the bset in the iterator - might have to readd it: */
+       if (new_u64s &&
+           bkey_iter_pos_cmp(b, where, &iter_pos) >= 0) {
+               bch2_btree_node_iter_push(node_iter, b, where, end);
+               goto fixup_done;
+       } else {
+               /* Iterator is after key that changed */
+               return;
+       }
+found:
+       set->end = t->end_offset;
+
+       /* Iterator hasn't gotten to the key that changed yet: */
+       if (set->k < offset)
+               return;
+
+       if (new_u64s &&
+           bkey_iter_pos_cmp(b, where, &iter_pos) >= 0) {
+               set->k = offset;
+       } else if (set->k < offset + clobber_u64s) {
+               set->k = offset + new_u64s;
+               if (set->k == set->end)
+                       bch2_btree_node_iter_set_drop(node_iter, set);
+       } else {
+               /* Iterator is after key that changed */
+               set->k = (int) set->k + shift;
+               return;
+       }
+
+       bch2_btree_node_iter_sort(node_iter, b);
+fixup_done:
+       if (node_iter->data[0].k != orig_iter_pos)
+               iter_current_key_modified = true;
+
+       /*
+        * When a new key is added, and the node iterator now points to that
+        * key, the iterator might have skipped past deleted keys that should
+        * come after the key the iterator now points to. We have to rewind to
+        * before those deleted keys - otherwise
+        * bch2_btree_node_iter_prev_all() breaks:
+        */
+       if (!bch2_btree_node_iter_end(node_iter) &&
+           iter_current_key_modified &&
+           (b->c.level ||
+            btree_node_type_is_extents(iter->btree_id))) {
+               struct bset_tree *t;
+               struct bkey_packed *k, *k2, *p;
+
+               k = bch2_btree_node_iter_peek_all(node_iter, b);
+
+               for_each_bset(b, t) {
+                       bool set_pos = false;
+
+                       if (node_iter->data[0].end == t->end_offset)
+                               continue;
+
+                       k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t);
+
+                       while ((p = bch2_bkey_prev_all(b, t, k2)) &&
+                              bkey_iter_cmp(b, k, p) < 0) {
+                               k2 = p;
+                               set_pos = true;
+                       }
+
+                       if (set_pos)
+                               btree_node_iter_set_set_pos(node_iter,
+                                                           b, t, k2);
+               }
+       }
+
+       if (!b->c.level &&
+           node_iter == &iter->l[0].iter &&
+           iter_current_key_modified)
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
+}
+
+void bch2_btree_node_iter_fix(struct btree_iter *iter,
+                             struct btree *b,
+                             struct btree_node_iter *node_iter,
+                             struct bkey_packed *where,
+                             unsigned clobber_u64s,
+                             unsigned new_u64s)
+{
+       struct bset_tree *t = bch2_bkey_to_bset(b, where);
+       struct btree_iter *linked;
+
+       if (node_iter != &iter->l[b->c.level].iter) {
+               __bch2_btree_node_iter_fix(iter, b, node_iter, t,
+                                          where, clobber_u64s, new_u64s);
+
+               if (debug_check_iterators(iter->trans->c))
+                       bch2_btree_node_iter_verify(node_iter, b);
+       }
+
+       trans_for_each_iter_with_node(iter->trans, b, linked) {
+               __bch2_btree_node_iter_fix(linked, b,
+                                          &linked->l[b->c.level].iter, t,
+                                          where, clobber_u64s, new_u64s);
+               bch2_btree_iter_verify_level(linked, b->c.level);
+       }
+}
+
+static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
+                                                 struct btree_iter_level *l,
+                                                 struct bkey *u,
+                                                 struct bkey_packed *k)
+{
+       struct bkey_s_c ret;
+
+       if (unlikely(!k)) {
+               /*
+                * signal to bch2_btree_iter_peek_slot() that we're currently at
+                * a hole
+                */
+               u->type = KEY_TYPE_deleted;
+               return bkey_s_c_null;
+       }
+
+       ret = bkey_disassemble(l->b, k, u);
+
+       if (debug_check_bkeys(iter->trans->c))
+               bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
+
+       return ret;
+}
+
+/* peek_all() doesn't skip deleted keys */
+static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *iter,
+                                                   struct btree_iter_level *l,
+                                                   struct bkey *u)
+{
+       return __btree_iter_unpack(iter, l, u,
+                       bch2_btree_node_iter_peek_all(&l->iter, l->b));
+}
+
+static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter,
+                                               struct btree_iter_level *l)
+{
+       return __btree_iter_unpack(iter, l, &iter->k,
+                       bch2_btree_node_iter_peek(&l->iter, l->b));
+}
+
+static inline struct bkey_s_c __btree_iter_prev(struct btree_iter *iter,
+                                               struct btree_iter_level *l)
+{
+       return __btree_iter_unpack(iter, l, &iter->k,
+                       bch2_btree_node_iter_prev(&l->iter, l->b));
+}
+
+static inline bool btree_iter_advance_to_pos(struct btree_iter *iter,
+                                            struct btree_iter_level *l,
+                                            int max_advance)
+{
+       struct bpos pos = btree_iter_search_key(iter);
+       struct bkey_packed *k;
+       int nr_advanced = 0;
+
+       while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) &&
+              bkey_iter_pos_cmp(l->b, k, &pos) < 0) {
+               if (max_advance > 0 && nr_advanced >= max_advance)
+                       return false;
+
+               bch2_btree_node_iter_advance(&l->iter, l->b);
+               nr_advanced++;
+       }
+
+       return true;
+}
+
+/*
+ * Verify that iterator for parent node points to child node:
+ */
+static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
+{
+       struct btree_iter_level *l;
+       unsigned plevel;
+       bool parent_locked;
+       struct bkey_packed *k;
+
+       if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
+               return;
+
+       plevel = b->c.level + 1;
+       if (!btree_iter_node(iter, plevel))
+               return;
+
+       parent_locked = btree_node_locked(iter, plevel);
+
+       if (!bch2_btree_node_relock(iter, plevel))
+               return;
+
+       l = &iter->l[plevel];
+       k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
+       if (!k ||
+           bkey_deleted(k) ||
+           bkey_cmp_left_packed(l->b, k, &b->key.k.p)) {
+               char buf[100];
+               struct bkey uk = bkey_unpack_key(b, k);
+
+               bch2_bkey_to_text(&PBUF(buf), &uk);
+               panic("parent iter doesn't point to new node:\n%s\n%llu:%llu\n",
+                     buf, b->key.k.p.inode, b->key.k.p.offset);
+       }
+
+       if (!parent_locked)
+               btree_node_unlock(iter, b->c.level + 1);
+}
+
+static inline void __btree_iter_init(struct btree_iter *iter,
+                                    unsigned level)
+{
+       struct bpos pos = btree_iter_search_key(iter);
+       struct btree_iter_level *l = &iter->l[level];
+
+       bch2_btree_node_iter_init(&l->iter, l->b, &pos);
+
+       btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
+}
+
+static inline void btree_iter_node_set(struct btree_iter *iter,
+                                      struct btree *b)
+{
+       BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
+
+       btree_iter_verify_new_node(iter, b);
+
+       EBUG_ON(!btree_iter_pos_in_node(iter, b));
+       EBUG_ON(b->c.lock.state.seq & 1);
+
+       iter->l[b->c.level].lock_seq = b->c.lock.state.seq;
+       iter->l[b->c.level].b = b;
+       __btree_iter_init(iter, b->c.level);
+}
+
+/*
+ * A btree node is being replaced - update the iterator to point to the new
+ * node:
+ */
+void bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
+{
+       enum btree_node_locked_type t;
+       struct btree_iter *linked;
+
+       trans_for_each_iter(iter->trans, linked)
+               if (btree_iter_type(linked) != BTREE_ITER_CACHED &&
+                   btree_iter_pos_in_node(linked, b)) {
+                       /*
+                        * bch2_btree_iter_node_drop() has already been called -
+                        * the old node we're replacing has already been
+                        * unlocked and the pointer invalidated
+                        */
+                       BUG_ON(btree_node_locked(linked, b->c.level));
+
+                       t = btree_lock_want(linked, b->c.level);
+                       if (t != BTREE_NODE_UNLOCKED) {
+                               six_lock_increment(&b->c.lock, t);
+                               mark_btree_node_locked(linked, b->c.level, t);
+                       }
+
+                       btree_iter_node_set(linked, b);
+               }
+}
+
+void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
+{
+       struct btree_iter *linked;
+       unsigned level = b->c.level;
+
+       trans_for_each_iter(iter->trans, linked)
+               if (linked->l[level].b == b) {
+                       __btree_node_unlock(linked, level);
+                       linked->l[level].b = BTREE_ITER_NO_NODE_DROP;
+               }
+}
+
+/*
+ * A btree node has been modified in such a way as to invalidate iterators - fix
+ * them:
+ */
+void bch2_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b)
+{
+       struct btree_iter *linked;
+
+       trans_for_each_iter_with_node(iter->trans, b, linked)
+               __btree_iter_init(linked, b->c.level);
+}
+
+static int lock_root_check_fn(struct six_lock *lock, void *p)
+{
+       struct btree *b = container_of(lock, struct btree, c.lock);
+       struct btree **rootp = p;
+
+       return b == *rootp ? 0 : -1;
+}
+
+static inline int btree_iter_lock_root(struct btree_iter *iter,
+                                      unsigned depth_want)
+{
+       struct bch_fs *c = iter->trans->c;
+       struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
+       enum six_lock_type lock_type;
+       unsigned i;
+
+       EBUG_ON(iter->nodes_locked);
+
+       while (1) {
+               b = READ_ONCE(*rootp);
+               iter->level = READ_ONCE(b->c.level);
+
+               if (unlikely(iter->level < depth_want)) {
+                       /*
+                        * the root is at a lower depth than the depth we want:
+                        * got to the end of the btree, or we're walking nodes
+                        * greater than some depth and there are no nodes >=
+                        * that depth
+                        */
+                       iter->level = depth_want;
+                       for (i = iter->level; i < BTREE_MAX_DEPTH; i++)
+                               iter->l[i].b = NULL;
+                       return 1;
+               }
+
+               lock_type = __btree_lock_want(iter, iter->level);
+               if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
+                                             iter, lock_type,
+                                             lock_root_check_fn, rootp)))
+                       return -EINTR;
+
+               if (likely(b == READ_ONCE(*rootp) &&
+                          b->c.level == iter->level &&
+                          !race_fault())) {
+                       for (i = 0; i < iter->level; i++)
+                               iter->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
+                       iter->l[iter->level].b = b;
+                       for (i = iter->level + 1; i < BTREE_MAX_DEPTH; i++)
+                               iter->l[i].b = NULL;
+
+                       mark_btree_node_locked(iter, iter->level, lock_type);
+                       btree_iter_node_set(iter, b);
+                       return 0;
+               }
+
+               six_unlock_type(&b->c.lock, lock_type);
+       }
+}
+
+noinline
+static void btree_iter_prefetch(struct btree_iter *iter)
+{
+       struct bch_fs *c = iter->trans->c;
+       struct btree_iter_level *l = &iter->l[iter->level];
+       struct btree_node_iter node_iter = l->iter;
+       struct bkey_packed *k;
+       BKEY_PADDED(k) tmp;
+       unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
+               ? (iter->level > 1 ? 0 :  2)
+               : (iter->level > 1 ? 1 : 16);
+       bool was_locked = btree_node_locked(iter, iter->level);
+
+       while (nr) {
+               if (!bch2_btree_node_relock(iter, iter->level))
+                       return;
+
+               bch2_btree_node_iter_advance(&node_iter, l->b);
+               k = bch2_btree_node_iter_peek(&node_iter, l->b);
+               if (!k)
+                       break;
+
+               bch2_bkey_unpack(l->b, &tmp.k, k);
+               bch2_btree_node_prefetch(c, iter, &tmp.k, iter->level - 1);
+       }
+
+       if (!was_locked)
+               btree_node_unlock(iter, iter->level);
+}
+
+static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
+                                           unsigned plevel, struct btree *b)
+{
+       struct btree_iter_level *l = &iter->l[plevel];
+       bool locked = btree_node_locked(iter, plevel);
+       struct bkey_packed *k;
+       struct bch_btree_ptr_v2 *bp;
+
+       if (!bch2_btree_node_relock(iter, plevel))
+               return;
+
+       k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
+       BUG_ON(k->type != KEY_TYPE_btree_ptr_v2);
+
+       bp = (void *) bkeyp_val(&l->b->format, k);
+       bp->mem_ptr = (unsigned long)b;
+
+       if (!locked)
+               btree_node_unlock(iter, plevel);
+}
+
+static __always_inline int btree_iter_down(struct btree_iter *iter)
+{
+       struct bch_fs *c = iter->trans->c;
+       struct btree_iter_level *l = &iter->l[iter->level];
+       struct btree *b;
+       unsigned level = iter->level - 1;
+       enum six_lock_type lock_type = __btree_lock_want(iter, level);
+       BKEY_PADDED(k) tmp;
+
+       EBUG_ON(!btree_node_locked(iter, iter->level));
+
+       bch2_bkey_unpack(l->b, &tmp.k,
+                        bch2_btree_node_iter_peek(&l->iter, l->b));
+
+       b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type);
+       if (unlikely(IS_ERR(b)))
+               return PTR_ERR(b);
+
+       mark_btree_node_locked(iter, level, lock_type);
+       btree_iter_node_set(iter, b);
+
+       if (tmp.k.k.type == KEY_TYPE_btree_ptr_v2 &&
+           unlikely(b != btree_node_mem_ptr(&tmp.k)))
+               btree_node_mem_ptr_set(iter, level + 1, b);
+
+       if (iter->flags & BTREE_ITER_PREFETCH)
+               btree_iter_prefetch(iter);
+
+       iter->level = level;
+
+       return 0;
+}
+
+static void btree_iter_up(struct btree_iter *iter)
+{
+       btree_node_unlock(iter, iter->level++);
+}
+
+static int btree_iter_traverse_one(struct btree_iter *);
+
+static int __btree_iter_traverse_all(struct btree_trans *trans, int ret)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter *iter;
+       u8 sorted[BTREE_ITER_MAX];
+       unsigned i, nr_sorted = 0;
+
+       if (trans->in_traverse_all)
+               return -EINTR;
+
+       trans->in_traverse_all = true;
+retry_all:
+       nr_sorted = 0;
+
+       trans_for_each_iter(trans, iter)
+               sorted[nr_sorted++] = iter->idx;
+
+#define btree_iter_cmp_by_idx(_l, _r)                          \
+               btree_iter_cmp(&trans->iters[_l], &trans->iters[_r])
+
+       bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx);
+#undef btree_iter_cmp_by_idx
+       bch2_trans_unlock(trans);
+
+       if (unlikely(ret == -ENOMEM)) {
+               struct closure cl;
+
+               closure_init_stack(&cl);
+
+               do {
+                       ret = bch2_btree_cache_cannibalize_lock(c, &cl);
+                       closure_sync(&cl);
+               } while (ret);
+       }
+
+       if (unlikely(ret == -EIO)) {
+               trans->error = true;
+               goto out;
+       }
+
+       BUG_ON(ret && ret != -EINTR);
+
+       /* Now, redo traversals in correct order: */
+       for (i = 0; i < nr_sorted; i++) {
+               unsigned idx = sorted[i];
+
+               /*
+                * sucessfully traversing one iterator can cause another to be
+                * unlinked, in btree_key_cache_fill()
+                */
+               if (!(trans->iters_linked & (1ULL << idx)))
+                       continue;
+
+               ret = btree_iter_traverse_one(&trans->iters[idx]);
+               if (ret)
+                       goto retry_all;
+       }
+
+       if (hweight64(trans->iters_live) > 1)
+               ret = -EINTR;
+       else
+               trans_for_each_iter(trans, iter)
+                       if (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) {
+                               ret = -EINTR;
+                               break;
+                       }
+out:
+       bch2_btree_cache_cannibalize_unlock(c);
+
+       trans->in_traverse_all = false;
+       return ret;
+}
+
+int bch2_btree_iter_traverse_all(struct btree_trans *trans)
+{
+       return __btree_iter_traverse_all(trans, 0);
+}
+
+static inline bool btree_iter_good_node(struct btree_iter *iter,
+                                       unsigned l, int check_pos)
+{
+       if (!is_btree_node(iter, l) ||
+           !bch2_btree_node_relock(iter, l))
+               return false;
+
+       if (check_pos <= 0 && btree_iter_pos_before_node(iter, iter->l[l].b))
+               return false;
+       if (check_pos >= 0 && btree_iter_pos_after_node(iter, iter->l[l].b))
+               return false;
+       return true;
+}
+
+static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
+                                                    int check_pos)
+{
+       unsigned l = iter->level;
+
+       while (btree_iter_node(iter, l) &&
+              !btree_iter_good_node(iter, l, check_pos)) {
+               btree_node_unlock(iter, l);
+               iter->l[l].b = BTREE_ITER_NO_NODE_UP;
+               l++;
+       }
+
+       return l;
+}
+
+/*
+ * This is the main state machine for walking down the btree - walks down to a
+ * specified depth
+ *
+ * Returns 0 on success, -EIO on error (error reading in a btree node).
+ *
+ * On error, caller (peek_node()/peek_key()) must return NULL; the error is
+ * stashed in the iterator and returned from bch2_trans_exit().
+ */
+static int btree_iter_traverse_one(struct btree_iter *iter)
+{
+       unsigned depth_want = iter->level;
+
+       /*
+        * if we need interior nodes locked, call btree_iter_relock() to make
+        * sure we walk back up enough that we lock them:
+        */
+       if (iter->uptodate == BTREE_ITER_NEED_RELOCK ||
+           iter->locks_want > 1)
+               bch2_btree_iter_relock(iter, false);
+
+       if (btree_iter_type(iter) == BTREE_ITER_CACHED)
+               return bch2_btree_iter_traverse_cached(iter);
+
+       if (iter->uptodate < BTREE_ITER_NEED_RELOCK)
+               return 0;
+
+       if (unlikely(iter->level >= BTREE_MAX_DEPTH))
+               return 0;
+
+       /*
+        * XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos
+        * here unnecessary
+        */
+       iter->level = btree_iter_up_until_good_node(iter, 0);
+
+       /*
+        * If we've got a btree node locked (i.e. we aren't about to relock the
+        * root) - advance its node iterator if necessary:
+        *
+        * XXX correctly using BTREE_ITER_UPTODATE should make this unnecessary
+        */
+       if (is_btree_node(iter, iter->level)) {
+               BUG_ON(!btree_iter_pos_in_node(iter, iter->l[iter->level].b));
+
+               btree_iter_advance_to_pos(iter, &iter->l[iter->level], -1);
+       }
+
+       /*
+        * Note: iter->nodes[iter->level] may be temporarily NULL here - that
+        * would indicate to other code that we got to the end of the btree,
+        * here it indicates that relocking the root failed - it's critical that
+        * btree_iter_lock_root() comes next and that it can't fail
+        */
+       while (iter->level > depth_want) {
+               int ret = btree_iter_node(iter, iter->level)
+                       ? btree_iter_down(iter)
+                       : btree_iter_lock_root(iter, depth_want);
+               if (unlikely(ret)) {
+                       if (ret == 1)
+                               return 0;
+
+                       iter->level = depth_want;
+
+                       if (ret == -EIO) {
+                               iter->flags |= BTREE_ITER_ERROR;
+                               iter->l[iter->level].b =
+                                       BTREE_ITER_NO_NODE_ERROR;
+                       } else {
+                               iter->l[iter->level].b =
+                                       BTREE_ITER_NO_NODE_DOWN;
+                       }
+                       return ret;
+               }
+       }
+
+       iter->uptodate = BTREE_ITER_NEED_PEEK;
+
+       bch2_btree_iter_verify(iter);
+       return 0;
+}
+
+int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
+{
+       struct btree_trans *trans = iter->trans;
+       int ret;
+
+       ret =   bch2_trans_cond_resched(trans) ?:
+               btree_iter_traverse_one(iter);
+       if (unlikely(ret))
+               ret = __btree_iter_traverse_all(trans, ret);
+
+       return ret;
+}
+
+static inline void bch2_btree_iter_checks(struct btree_iter *iter)
+{
+       enum btree_iter_type type = btree_iter_type(iter);
+
+       EBUG_ON(iter->btree_id >= BTREE_ID_NR);
+
+       BUG_ON((type == BTREE_ITER_KEYS ||
+               type == BTREE_ITER_CACHED) &&
+              (bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
+               bkey_cmp(iter->pos, iter->k.p) > 0));
+
+       bch2_btree_iter_verify_locks(iter);
+       bch2_btree_iter_verify_level(iter, iter->level);
+}
+
+/* Iterate across nodes (leaf and interior nodes) */
+
+struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
+{
+       struct btree *b;
+       int ret;
+
+       EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES);
+       bch2_btree_iter_checks(iter);
+
+       if (iter->uptodate == BTREE_ITER_UPTODATE)
+               return iter->l[iter->level].b;
+
+       ret = bch2_btree_iter_traverse(iter);
+       if (ret)
+               return NULL;
+
+       b = btree_iter_node(iter, iter->level);
+       if (!b)
+               return NULL;
+
+       BUG_ON(bkey_cmp(b->key.k.p, iter->pos) < 0);
+
+       iter->pos = b->key.k.p;
+       iter->uptodate = BTREE_ITER_UPTODATE;
+
+       bch2_btree_iter_verify(iter);
+
+       return b;
+}
+
+struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
+{
+       struct btree *b;
+       int ret;
+
+       EBUG_ON(btree_iter_type(iter) != BTREE_ITER_NODES);
+       bch2_btree_iter_checks(iter);
+
+       /* already got to end? */
+       if (!btree_iter_node(iter, iter->level))
+               return NULL;
+
+       bch2_trans_cond_resched(iter->trans);
+
+       btree_iter_up(iter);
+
+       if (!bch2_btree_node_relock(iter, iter->level))
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
+
+       ret = bch2_btree_iter_traverse(iter);
+       if (ret)
+               return NULL;
+
+       /* got to end? */
+       b = btree_iter_node(iter, iter->level);
+       if (!b)
+               return NULL;
+
+       if (bkey_cmp(iter->pos, b->key.k.p) < 0) {
+               /*
+                * Haven't gotten to the end of the parent node: go back down to
+                * the next child node
+                */
+
+               /*
+                * We don't really want to be unlocking here except we can't
+                * directly tell btree_iter_traverse() "traverse to this level"
+                * except by setting iter->level, so we have to unlock so we
+                * don't screw up our lock invariants:
+                */
+               if (btree_node_read_locked(iter, iter->level))
+                       btree_node_unlock(iter, iter->level);
+
+               iter->pos       = bkey_successor(iter->pos);
+               iter->level     = iter->min_depth;
+
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+               ret = bch2_btree_iter_traverse(iter);
+               if (ret)
+                       return NULL;
+
+               b = iter->l[iter->level].b;
+       }
+
+       iter->pos = b->key.k.p;
+       iter->uptodate = BTREE_ITER_UPTODATE;
+
+       bch2_btree_iter_verify(iter);
+
+       return b;
+}
+
+/* Iterate across keys (in leaf nodes only) */
+
+void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_pos)
+{
+       struct btree_iter_level *l = &iter->l[0];
+
+       EBUG_ON(iter->level != 0);
+       EBUG_ON(bkey_cmp(new_pos, iter->pos) < 0);
+       EBUG_ON(!btree_node_locked(iter, 0));
+       EBUG_ON(bkey_cmp(new_pos, l->b->key.k.p) > 0);
+
+       bkey_init(&iter->k);
+       iter->k.p = iter->pos = new_pos;
+       btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
+
+       btree_iter_advance_to_pos(iter, l, -1);
+
+       /*
+        * XXX:
+        * keeping a node locked that's outside (even just outside) iter->pos
+        * breaks __bch2_btree_node_lock(). This seems to only affect
+        * bch2_btree_node_get_sibling so for now it's fixed there, but we
+        * should try to get rid of this corner case.
+        *
+        * (this behaviour is currently needed for BTREE_INSERT_NOUNLOCK)
+        */
+
+       if (bch2_btree_node_iter_end(&l->iter) &&
+           btree_iter_pos_after_node(iter, l->b))
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+}
+
+static void btree_iter_pos_changed(struct btree_iter *iter, int cmp)
+{
+       unsigned l = iter->level;
+
+       if (!cmp)
+               goto out;
+
+       if (unlikely(btree_iter_type(iter) == BTREE_ITER_CACHED)) {
+               btree_node_unlock(iter, 0);
+               iter->l[0].b = BTREE_ITER_NO_NODE_UP;
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+               return;
+       }
+
+       l = btree_iter_up_until_good_node(iter, cmp);
+
+       if (btree_iter_node(iter, l)) {
+               /*
+                * We might have to skip over many keys, or just a few: try
+                * advancing the node iterator, and if we have to skip over too
+                * many keys just reinit it (or if we're rewinding, since that
+                * is expensive).
+                */
+               if (cmp < 0 ||
+                   !btree_iter_advance_to_pos(iter, &iter->l[l], 8))
+                       __btree_iter_init(iter, l);
+
+               /* Don't leave it locked if we're not supposed to: */
+               if (btree_lock_want(iter, l) == BTREE_NODE_UNLOCKED)
+                       btree_node_unlock(iter, l);
+       }
+out:
+       if (l != iter->level)
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+       else
+               btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
+}
+
+void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos,
+                              bool strictly_greater)
+{
+       struct bpos old = btree_iter_search_key(iter);
+       int cmp;
+
+       iter->flags &= ~BTREE_ITER_IS_EXTENTS;
+       iter->flags |= strictly_greater ? BTREE_ITER_IS_EXTENTS : 0;
+
+       bkey_init(&iter->k);
+       iter->k.p = iter->pos = new_pos;
+
+       cmp = bkey_cmp(btree_iter_search_key(iter), old);
+
+       btree_iter_pos_changed(iter, cmp);
+}
+
+void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
+{
+       int cmp = bkey_cmp(new_pos, iter->pos);
+
+       bkey_init(&iter->k);
+       iter->k.p = iter->pos = new_pos;
+
+       btree_iter_pos_changed(iter, cmp);
+}
+
+static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
+{
+       struct btree_iter_level *l = &iter->l[0];
+       bool ret;
+
+       bkey_init(&iter->k);
+       iter->k.p = iter->pos = l->b->key.k.p;
+
+       ret = bkey_cmp(iter->pos, POS_MAX) != 0;
+       if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
+               iter->k.p = iter->pos = bkey_successor(iter->pos);
+
+       btree_iter_pos_changed(iter, 1);
+       return ret;
+}
+
+static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
+{
+       struct btree_iter_level *l = &iter->l[0];
+       bool ret;
+
+       bkey_init(&iter->k);
+       iter->k.p = iter->pos = l->b->data->min_key;
+       iter->uptodate  = BTREE_ITER_NEED_TRAVERSE;
+
+       ret = bkey_cmp(iter->pos, POS_MIN) != 0;
+       if (ret) {
+               iter->k.p = iter->pos = bkey_predecessor(iter->pos);
+
+               if (iter->flags & BTREE_ITER_IS_EXTENTS)
+                       iter->k.p = iter->pos = bkey_predecessor(iter->pos);
+       }
+
+       btree_iter_pos_changed(iter, -1);
+       return ret;
+}
+
+/**
+ * btree_iter_peek_uptodate - given an iterator that is uptodate, return the key
+ * it currently points to
+ */
+static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
+{
+       struct btree_iter_level *l = &iter->l[0];
+       struct bkey_s_c ret = { .k = &iter->k };
+
+       if (!bkey_deleted(&iter->k)) {
+               struct bkey_packed *_k =
+                       __bch2_btree_node_iter_peek_all(&l->iter, l->b);
+
+               ret.v = bkeyp_val(&l->b->format, _k);
+
+               if (debug_check_iterators(iter->trans->c)) {
+                       struct bkey k = bkey_unpack_key(l->b, _k);
+
+                       BUG_ON(memcmp(&k, &iter->k, sizeof(k)));
+               }
+
+               if (debug_check_bkeys(iter->trans->c))
+                       bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
+       }
+
+       return ret;
+}
+
+/**
+ * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
+ * current position
+ */
+struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
+{
+       struct btree_iter_level *l = &iter->l[0];
+       struct bkey_s_c k;
+       int ret;
+
+       EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
+       bch2_btree_iter_checks(iter);
+
+       if (iter->uptodate == BTREE_ITER_UPTODATE &&
+           !bkey_deleted(&iter->k))
+               return btree_iter_peek_uptodate(iter);
+
+       while (1) {
+               ret = bch2_btree_iter_traverse(iter);
+               if (unlikely(ret))
+                       return bkey_s_c_err(ret);
+
+               k = __btree_iter_peek(iter, l);
+               if (likely(k.k))
+                       break;
+
+               if (!btree_iter_set_pos_to_next_leaf(iter))
+                       return bkey_s_c_null;
+       }
+
+       /*
+        * iter->pos should always be equal to the key we just
+        * returned - except extents can straddle iter->pos:
+        */
+       if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
+           bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
+               iter->pos = bkey_start_pos(k.k);
+
+       iter->uptodate = BTREE_ITER_UPTODATE;
+
+       bch2_btree_iter_verify_level(iter, 0);
+       return k;
+}
+
+/**
+ * bch2_btree_iter_next: returns first key greater than iterator's current
+ * position
+ */
+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
+{
+       if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
+               return bkey_s_c_null;
+
+       bch2_btree_iter_set_pos(iter,
+               (iter->flags & BTREE_ITER_IS_EXTENTS)
+               ? iter->k.p
+               : bkey_successor(iter->k.p));
+
+       return bch2_btree_iter_peek(iter);
+}
+
+static struct bkey_s_c __btree_trans_updates_peek(struct btree_iter *iter)
+{
+       struct bpos pos = btree_iter_search_key(iter);
+       struct btree_trans *trans = iter->trans;
+       struct btree_insert_entry *i;
+
+       trans_for_each_update2(trans, i)
+               if ((cmp_int(iter->btree_id,    i->iter->btree_id) ?:
+                    bkey_cmp(pos,              i->k->k.p)) <= 0)
+                       break;
+
+       return i < trans->updates2 + trans->nr_updates2 &&
+               iter->btree_id == i->iter->btree_id
+               ? bkey_i_to_s_c(i->k)
+               : bkey_s_c_null;
+}
+
+static struct bkey_s_c __bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
+{
+       struct btree_iter_level *l = &iter->l[0];
+       struct bkey_s_c k = __btree_iter_peek(iter, l);
+       struct bkey_s_c u = __btree_trans_updates_peek(iter);
+
+       if (k.k && (!u.k || bkey_cmp(k.k->p, u.k->p) < 0))
+               return k;
+       if (u.k && bkey_cmp(u.k->p, l->b->key.k.p) <= 0) {
+               iter->k = *u.k;
+               return u;
+       }
+       return bkey_s_c_null;
+}
+
+struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
+{
+       struct bkey_s_c k;
+       int ret;
+
+       EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
+       bch2_btree_iter_checks(iter);
+
+       while (1) {
+               ret = bch2_btree_iter_traverse(iter);
+               if (unlikely(ret))
+                       return bkey_s_c_err(ret);
+
+               k = __bch2_btree_iter_peek_with_updates(iter);
+
+               if (k.k && bkey_deleted(k.k)) {
+                       bch2_btree_iter_set_pos(iter,
+                               (iter->flags & BTREE_ITER_IS_EXTENTS)
+                               ? iter->k.p
+                               : bkey_successor(iter->k.p));
+                       continue;
+               }
+
+               if (likely(k.k))
+                       break;
+
+               if (!btree_iter_set_pos_to_next_leaf(iter))
+                       return bkey_s_c_null;
+       }
+
+       /*
+        * iter->pos should always be equal to the key we just
+        * returned - except extents can straddle iter->pos:
+        */
+       if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
+           bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
+               iter->pos = bkey_start_pos(k.k);
+
+       iter->uptodate = BTREE_ITER_UPTODATE;
+       return k;
+}
+
+struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter)
+{
+       if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
+               return bkey_s_c_null;
+
+       bch2_btree_iter_set_pos(iter,
+               (iter->flags & BTREE_ITER_IS_EXTENTS)
+               ? iter->k.p
+               : bkey_successor(iter->k.p));
+
+       return bch2_btree_iter_peek_with_updates(iter);
+}
+
+/**
+ * bch2_btree_iter_peek_prev: returns first key less than or equal to
+ * iterator's current position
+ */
+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
+{
+       struct bpos pos = iter->pos;
+       struct btree_iter_level *l = &iter->l[0];
+       struct bkey_s_c k;
+       int ret;
+
+       EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
+       bch2_btree_iter_checks(iter);
+
+       if (iter->uptodate == BTREE_ITER_UPTODATE &&
+           !bkey_deleted(&iter->k))
+               return btree_iter_peek_uptodate(iter);
+
+       while (1) {
+               ret = bch2_btree_iter_traverse(iter);
+               if (unlikely(ret))
+                       return bkey_s_c_err(ret);
+
+               k = __btree_iter_peek(iter, l);
+               if (!k.k || bkey_cmp(bkey_start_pos(k.k), pos) > 0)
+                       k = __btree_iter_prev(iter, l);
+
+               if (likely(k.k))
+                       break;
+
+               if (!btree_iter_set_pos_to_prev_leaf(iter))
+                       return bkey_s_c_null;
+       }
+
+       EBUG_ON(bkey_cmp(bkey_start_pos(k.k), pos) > 0);
+       iter->pos       = bkey_start_pos(k.k);
+       iter->uptodate  = BTREE_ITER_UPTODATE;
+       return k;
+}
+
+/**
+ * bch2_btree_iter_prev: returns first key less than iterator's current
+ * position
+ */
+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
+{
+       struct bpos pos = bkey_start_pos(&iter->k);
+
+       EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
+       bch2_btree_iter_checks(iter);
+
+       if (unlikely(!bkey_cmp(pos, POS_MIN)))
+               return bkey_s_c_null;
+
+       bch2_btree_iter_set_pos(iter, bkey_predecessor(pos));
+
+       return bch2_btree_iter_peek_prev(iter);
+}
+
+static inline struct bkey_s_c
+__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
+{
+       struct btree_iter_level *l = &iter->l[0];
+       struct btree_node_iter node_iter;
+       struct bkey_s_c k;
+       struct bkey n;
+       int ret;
+
+       /* keys & holes can't span inode numbers: */
+       if (iter->pos.offset == KEY_OFFSET_MAX) {
+               if (iter->pos.inode == KEY_INODE_MAX)
+                       return bkey_s_c_null;
+
+               bch2_btree_iter_set_pos(iter, bkey_successor(iter->pos));
+
+               ret = bch2_btree_iter_traverse(iter);
+               if (unlikely(ret))
+                       return bkey_s_c_err(ret);
+       }
+
+       /*
+        * iterator is now at the correct position for inserting at iter->pos,
+        * but we need to keep iterating until we find the first non whiteout so
+        * we know how big a hole we have, if any:
+        */
+
+       node_iter = l->iter;
+       k = __btree_iter_unpack(iter, l, &iter->k,
+               bch2_btree_node_iter_peek(&node_iter, l->b));
+
+       if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
+               /*
+                * We're not setting iter->uptodate because the node iterator
+                * doesn't necessarily point at the key we're returning:
+                */
+
+               EBUG_ON(bkey_cmp(k.k->p, iter->pos) <= 0);
+               bch2_btree_iter_verify_level(iter, 0);
+               return k;
+       }
+
+       /* hole */
+
+       if (!k.k)
+               k.k = &l->b->key.k;
+
+       bkey_init(&n);
+       n.p = iter->pos;
+       bch2_key_resize(&n,
+                       min_t(u64, KEY_SIZE_MAX,
+                             (k.k->p.inode == n.p.inode
+                              ? bkey_start_offset(k.k)
+                              : KEY_OFFSET_MAX) -
+                             n.p.offset));
+
+       EBUG_ON(!n.size);
+
+       iter->k = n;
+       iter->uptodate = BTREE_ITER_UPTODATE;
+
+       bch2_btree_iter_verify_level(iter, 0);
+       return (struct bkey_s_c) { &iter->k, NULL };
+}
+
+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
+{
+       struct btree_iter_level *l = &iter->l[0];
+       struct bkey_s_c k;
+       int ret;
+
+       EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
+       bch2_btree_iter_checks(iter);
+
+       if (iter->uptodate == BTREE_ITER_UPTODATE)
+               return btree_iter_peek_uptodate(iter);
+
+       ret = bch2_btree_iter_traverse(iter);
+       if (unlikely(ret))
+               return bkey_s_c_err(ret);
+
+       if (iter->flags & BTREE_ITER_IS_EXTENTS)
+               return __bch2_btree_iter_peek_slot_extents(iter);
+
+       k = __btree_iter_peek_all(iter, l, &iter->k);
+
+       EBUG_ON(k.k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0);
+
+       if (!k.k || bkey_cmp(iter->pos, k.k->p)) {
+               /* hole */
+               bkey_init(&iter->k);
+               iter->k.p = iter->pos;
+               k = (struct bkey_s_c) { &iter->k, NULL };
+       }
+
+       iter->uptodate = BTREE_ITER_UPTODATE;
+       bch2_btree_iter_verify_level(iter, 0);
+       return k;
+}
+
+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
+{
+       if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
+               return bkey_s_c_null;
+
+       bch2_btree_iter_set_pos(iter,
+               (iter->flags & BTREE_ITER_IS_EXTENTS)
+               ? iter->k.p
+               : bkey_successor(iter->k.p));
+
+       return bch2_btree_iter_peek_slot(iter);
+}
+
+struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter)
+{
+       struct bkey_cached *ck;
+       int ret;
+
+       EBUG_ON(btree_iter_type(iter) != BTREE_ITER_CACHED);
+       bch2_btree_iter_checks(iter);
+
+       ret = bch2_btree_iter_traverse(iter);
+       if (unlikely(ret))
+               return bkey_s_c_err(ret);
+
+       ck = (void *) iter->l[0].b;
+
+       EBUG_ON(iter->btree_id != ck->key.btree_id ||
+               bkey_cmp(iter->pos, ck->key.pos));
+       BUG_ON(!ck->valid);
+
+       return bkey_i_to_s_c(ck->k);
+}
+
+static inline void bch2_btree_iter_init(struct btree_trans *trans,
+                       struct btree_iter *iter, enum btree_id btree_id,
+                       struct bpos pos, unsigned flags)
+{
+       struct bch_fs *c = trans->c;
+       unsigned i;
+
+       if (btree_node_type_is_extents(btree_id) &&
+           !(flags & BTREE_ITER_NODES))
+               flags |= BTREE_ITER_IS_EXTENTS;
+
+       iter->trans                     = trans;
+       iter->pos                       = pos;
+       bkey_init(&iter->k);
+       iter->k.p                       = pos;
+       iter->flags                     = flags;
+       iter->uptodate                  = BTREE_ITER_NEED_TRAVERSE;
+       iter->btree_id                  = btree_id;
+       iter->level                     = 0;
+       iter->min_depth                 = 0;
+       iter->locks_want                = flags & BTREE_ITER_INTENT ? 1 : 0;
+       iter->nodes_locked              = 0;
+       iter->nodes_intent_locked       = 0;
+       for (i = 0; i < ARRAY_SIZE(iter->l); i++)
+               iter->l[i].b            = BTREE_ITER_NO_NODE_INIT;
+
+       prefetch(c->btree_roots[btree_id].b);
+}
+
+/* new transactional stuff: */
+
+static inline void __bch2_trans_iter_free(struct btree_trans *trans,
+                                         unsigned idx)
+{
+       __bch2_btree_iter_unlock(&trans->iters[idx]);
+       trans->iters_linked             &= ~(1ULL << idx);
+       trans->iters_live               &= ~(1ULL << idx);
+       trans->iters_touched            &= ~(1ULL << idx);
+}
+
+int bch2_trans_iter_put(struct btree_trans *trans,
+                       struct btree_iter *iter)
+{
+       int ret;
+
+       if (IS_ERR_OR_NULL(iter))
+               return 0;
+
+       BUG_ON(trans->iters + iter->idx != iter);
+
+       ret = btree_iter_err(iter);
+
+       if (!(trans->iters_touched & (1ULL << iter->idx)) &&
+           !(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT))
+               __bch2_trans_iter_free(trans, iter->idx);
+
+       trans->iters_live       &= ~(1ULL << iter->idx);
+       return ret;
+}
+
+int bch2_trans_iter_free(struct btree_trans *trans,
+                        struct btree_iter *iter)
+{
+       if (IS_ERR_OR_NULL(iter))
+               return 0;
+
+       trans->iters_touched &= ~(1ULL << iter->idx);
+
+       return bch2_trans_iter_put(trans, iter);
+}
+
+static int bch2_trans_realloc_iters(struct btree_trans *trans,
+                                   unsigned new_size)
+{
+       void *p, *new_iters, *new_updates, *new_updates2;
+       size_t iters_bytes;
+       size_t updates_bytes;
+
+       new_size = roundup_pow_of_two(new_size);
+
+       BUG_ON(new_size > BTREE_ITER_MAX);
+
+       if (new_size <= trans->size)
+               return 0;
+
+       BUG_ON(trans->used_mempool);
+
+       bch2_trans_unlock(trans);
+
+       iters_bytes     = sizeof(struct btree_iter) * new_size;
+       updates_bytes   = sizeof(struct btree_insert_entry) * new_size;
+
+       p = kmalloc(iters_bytes +
+                   updates_bytes +
+                   updates_bytes, GFP_NOFS);
+       if (p)
+               goto success;
+
+       p = mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS);
+       new_size = BTREE_ITER_MAX;
+
+       trans->used_mempool = true;
+success:
+       new_iters       = p; p += iters_bytes;
+       new_updates     = p; p += updates_bytes;
+       new_updates2    = p; p += updates_bytes;
+
+       memcpy(new_iters, trans->iters,
+              sizeof(struct btree_iter) * trans->nr_iters);
+       memcpy(new_updates, trans->updates,
+              sizeof(struct btree_insert_entry) * trans->nr_updates);
+       memcpy(new_updates2, trans->updates2,
+              sizeof(struct btree_insert_entry) * trans->nr_updates2);
+
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
+               memset(trans->iters, POISON_FREE,
+                      sizeof(struct btree_iter) * trans->nr_iters +
+                      sizeof(struct btree_insert_entry) * trans->nr_iters);
+
+       if (trans->iters != trans->iters_onstack)
+               kfree(trans->iters);
+
+       trans->iters            = new_iters;
+       trans->updates          = new_updates;
+       trans->updates2         = new_updates2;
+       trans->size             = new_size;
+
+       if (trans->iters_live) {
+               trace_trans_restart_iters_realloced(trans->ip, trans->size);
+               return -EINTR;
+       }
+
+       return 0;
+}
+
+static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
+{
+       unsigned idx = __ffs64(~trans->iters_linked);
+
+       if (idx < trans->nr_iters)
+               goto got_slot;
+
+       if (trans->nr_iters == trans->size) {
+               int ret;
+
+               if (trans->nr_iters >= BTREE_ITER_MAX) {
+                       struct btree_iter *iter;
+
+                       trans_for_each_iter(trans, iter) {
+                               pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps",
+                                      bch2_btree_ids[iter->btree_id],
+                                      iter->pos.inode,
+                                      iter->pos.offset,
+                                      (trans->iters_live & (1ULL << iter->idx)) ? " live" : "",
+                                      (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "",
+                                      iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "",
+                                      (void *) iter->ip_allocated);
+                       }
+
+                       panic("trans iter oveflow\n");
+               }
+
+               ret = bch2_trans_realloc_iters(trans, trans->size * 2);
+               if (ret)
+                       return ERR_PTR(ret);
+       }
+
+       idx = trans->nr_iters++;
+       BUG_ON(trans->nr_iters > trans->size);
+
+       trans->iters[idx].idx = idx;
+got_slot:
+       BUG_ON(trans->iters_linked & (1ULL << idx));
+       trans->iters_linked |= 1ULL << idx;
+       trans->iters[idx].flags = 0;
+       return &trans->iters[idx];
+}
+
+static inline void btree_iter_copy(struct btree_iter *dst,
+                                  struct btree_iter *src)
+{
+       unsigned i, idx = dst->idx;
+
+       *dst = *src;
+       dst->idx = idx;
+       dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
+
+       for (i = 0; i < BTREE_MAX_DEPTH; i++)
+               if (btree_node_locked(dst, i))
+                       six_lock_increment(&dst->l[i].b->c.lock,
+                                          __btree_lock_want(dst, i));
+
+       dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
+       dst->flags &= ~BTREE_ITER_SET_POS_AFTER_COMMIT;
+}
+
+static inline struct bpos bpos_diff(struct bpos l, struct bpos r)
+{
+       if (bkey_cmp(l, r) > 0)
+               swap(l, r);
+
+       return POS(r.inode - l.inode, r.offset - l.offset);
+}
+
+static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
+                                                unsigned btree_id, struct bpos pos,
+                                                unsigned flags)
+{
+       struct btree_iter *iter, *best = NULL;
+
+       BUG_ON(trans->nr_iters > BTREE_ITER_MAX);
+
+       trans_for_each_iter(trans, iter) {
+               if (btree_iter_type(iter) != (flags & BTREE_ITER_TYPE))
+                       continue;
+
+               if (iter->btree_id != btree_id)
+                       continue;
+
+               if (best &&
+                   bkey_cmp(bpos_diff(best->pos, pos),
+                            bpos_diff(iter->pos, pos)) < 0)
+                       continue;
+
+               best = iter;
+       }
+
+       if (!best) {
+               iter = btree_trans_iter_alloc(trans);
+               if (IS_ERR(iter))
+                       return iter;
+
+               bch2_btree_iter_init(trans, iter, btree_id, pos, flags);
+       } else if ((trans->iters_live & (1ULL << best->idx)) ||
+                  (best->flags & BTREE_ITER_KEEP_UNTIL_COMMIT)) {
+               iter = btree_trans_iter_alloc(trans);
+               if (IS_ERR(iter))
+                       return iter;
+
+               btree_iter_copy(iter, best);
+       } else {
+               iter = best;
+       }
+
+       iter->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
+       iter->flags &= ~BTREE_ITER_USER_FLAGS;
+       iter->flags |= flags & BTREE_ITER_USER_FLAGS;
+
+       if (iter->flags & BTREE_ITER_INTENT)
+               bch2_btree_iter_upgrade(iter, 1);
+       else
+               bch2_btree_iter_downgrade(iter);
+
+       BUG_ON(iter->btree_id != btree_id);
+       BUG_ON((iter->flags ^ flags) & BTREE_ITER_TYPE);
+       BUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
+       BUG_ON(iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT);
+       BUG_ON(trans->iters_live & (1ULL << iter->idx));
+
+       trans->iters_live       |= 1ULL << iter->idx;
+       trans->iters_touched    |= 1ULL << iter->idx;
+
+       return iter;
+}
+
+struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
+                                        enum btree_id btree_id,
+                                        struct bpos pos, unsigned flags)
+{
+       struct btree_iter *iter =
+               __btree_trans_get_iter(trans, btree_id, pos, flags);
+
+       if (!IS_ERR(iter))
+               __bch2_btree_iter_set_pos(iter, pos,
+                       btree_node_type_is_extents(btree_id));
+       return iter;
+}
+
+struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
+                                           enum btree_id btree_id,
+                                           struct bpos pos,
+                                           unsigned locks_want,
+                                           unsigned depth,
+                                           unsigned flags)
+{
+       struct btree_iter *iter =
+               __btree_trans_get_iter(trans, btree_id, pos,
+                                      flags|BTREE_ITER_NODES);
+       unsigned i;
+
+       BUG_ON(IS_ERR(iter));
+       BUG_ON(bkey_cmp(iter->pos, pos));
+
+       iter->locks_want = locks_want;
+       iter->level     = depth;
+       iter->min_depth = depth;
+
+       for (i = 0; i < ARRAY_SIZE(iter->l); i++)
+               iter->l[i].b            = NULL;
+       iter->l[iter->level].b          = BTREE_ITER_NO_NODE_INIT;
+
+       return iter;
+}
+
+struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans,
+                                       struct btree_iter *src)
+{
+       struct btree_iter *iter;
+
+       iter = btree_trans_iter_alloc(trans);
+       if (IS_ERR(iter))
+               return iter;
+
+       btree_iter_copy(iter, src);
+
+       trans->iters_live |= 1ULL << iter->idx;
+       /*
+        * We don't need to preserve this iter since it's cheap to copy it
+        * again - this will cause trans_iter_put() to free it right away:
+        */
+       trans->iters_touched &= ~(1ULL << iter->idx);
+
+       return iter;
+}
+
+static int bch2_trans_preload_mem(struct btree_trans *trans, size_t size)
+{
+       if (size > trans->mem_bytes) {
+               size_t old_bytes = trans->mem_bytes;
+               size_t new_bytes = roundup_pow_of_two(size);
+               void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
+
+               if (!new_mem)
+                       return -ENOMEM;
+
+               trans->mem = new_mem;
+               trans->mem_bytes = new_bytes;
+
+               if (old_bytes) {
+                       trace_trans_restart_mem_realloced(trans->ip, new_bytes);
+                       return -EINTR;
+               }
+       }
+
+       return 0;
+}
+
+void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
+{
+       void *p;
+       int ret;
+
+       ret = bch2_trans_preload_mem(trans, trans->mem_top + size);
+       if (ret)
+               return ERR_PTR(ret);
+
+       p = trans->mem + trans->mem_top;
+       trans->mem_top += size;
+       return p;
+}
+
+inline void bch2_trans_unlink_iters(struct btree_trans *trans)
+{
+       u64 iters = trans->iters_linked &
+               ~trans->iters_touched &
+               ~trans->iters_live;
+
+       while (iters) {
+               unsigned idx = __ffs64(iters);
+
+               iters &= ~(1ULL << idx);
+               __bch2_trans_iter_free(trans, idx);
+       }
+}
+
+void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
+{
+       struct btree_iter *iter;
+
+       trans_for_each_iter(trans, iter)
+               iter->flags &= ~(BTREE_ITER_KEEP_UNTIL_COMMIT|
+                                BTREE_ITER_SET_POS_AFTER_COMMIT);
+
+       bch2_trans_unlink_iters(trans);
+
+       trans->iters_touched &= trans->iters_live;
+
+       trans->need_reset               = 0;
+       trans->nr_updates               = 0;
+       trans->nr_updates2              = 0;
+       trans->mem_top                  = 0;
+
+       trans->extra_journal_entries    = NULL;
+       trans->extra_journal_entry_u64s = 0;
+
+       if (trans->fs_usage_deltas) {
+               trans->fs_usage_deltas->used = 0;
+               memset(&trans->fs_usage_deltas->memset_start, 0,
+                      (void *) &trans->fs_usage_deltas->memset_end -
+                      (void *) &trans->fs_usage_deltas->memset_start);
+       }
+
+       if (!(flags & TRANS_RESET_NOTRAVERSE))
+               bch2_btree_iter_traverse_all(trans);
+}
+
+void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
+                    unsigned expected_nr_iters,
+                    size_t expected_mem_bytes)
+{
+       memset(trans, 0, offsetof(struct btree_trans, iters_onstack));
+
+       /*
+        * reallocating iterators currently completely breaks
+        * bch2_trans_iter_put():
+        */
+       expected_nr_iters = BTREE_ITER_MAX;
+
+       trans->c                = c;
+       trans->ip               = _RET_IP_;
+       trans->size             = ARRAY_SIZE(trans->iters_onstack);
+       trans->iters            = trans->iters_onstack;
+       trans->updates          = trans->updates_onstack;
+       trans->updates2         = trans->updates2_onstack;
+       trans->fs_usage_deltas  = NULL;
+
+       if (expected_nr_iters > trans->size)
+               bch2_trans_realloc_iters(trans, expected_nr_iters);
+
+       if (expected_mem_bytes)
+               bch2_trans_preload_mem(trans, expected_mem_bytes);
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+       trans->pid = current->pid;
+       mutex_lock(&c->btree_trans_lock);
+       list_add(&trans->list, &c->btree_trans_list);
+       mutex_unlock(&c->btree_trans_lock);
+#endif
+}
+
+int bch2_trans_exit(struct btree_trans *trans)
+{
+       bch2_trans_unlock(trans);
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+       mutex_lock(&trans->c->btree_trans_lock);
+       list_del(&trans->list);
+       mutex_unlock(&trans->c->btree_trans_lock);
+#endif
+
+       bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
+
+       kfree(trans->fs_usage_deltas);
+       kfree(trans->mem);
+       if (trans->used_mempool)
+               mempool_free(trans->iters, &trans->c->btree_iters_pool);
+       else if (trans->iters != trans->iters_onstack)
+               kfree(trans->iters);
+       trans->mem      = (void *) 0x1;
+       trans->iters    = (void *) 0x1;
+
+       return trans->error ? -EIO : 0;
+}
+
+static void bch2_btree_iter_node_to_text(struct printbuf *out,
+                                struct btree_bkey_cached_common *_b,
+                                enum btree_iter_type type)
+{
+       pr_buf(out, "    %px l=%u %s:",
+              _b, _b->level, bch2_btree_ids[_b->btree_id]);
+       bch2_bpos_to_text(out, btree_node_pos(_b, type));
+}
+
+void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct btree_trans *trans;
+       struct btree_iter *iter;
+       struct btree *b;
+       unsigned l;
+
+       mutex_lock(&c->btree_trans_lock);
+       list_for_each_entry(trans, &c->btree_trans_list, list) {
+               pr_buf(out, "%i %px %ps\n", trans->pid, trans, (void *) trans->ip);
+
+               trans_for_each_iter(trans, iter) {
+                       if (!iter->nodes_locked)
+                               continue;
+
+                       pr_buf(out, "  iter %u %s:",
+                              iter->idx,
+                              bch2_btree_ids[iter->btree_id]);
+                       bch2_bpos_to_text(out, iter->pos);
+                       pr_buf(out, "\n");
+
+                       for (l = 0; l < BTREE_MAX_DEPTH; l++) {
+                               if (btree_node_locked(iter, l)) {
+                                       pr_buf(out, "    %s l=%u ",
+                                              btree_node_intent_locked(iter, l) ? "i" : "r", l);
+                                       bch2_btree_iter_node_to_text(out,
+                                                       (void *) iter->l[l].b,
+                                                       btree_iter_type(iter));
+                                       pr_buf(out, "\n");
+                               }
+                       }
+               }
+
+               b = READ_ONCE(trans->locking);
+               if (b) {
+                       pr_buf(out, "  locking iter %u l=%u %s:",
+                              trans->locking_iter_idx,
+                              trans->locking_level,
+                              bch2_btree_ids[trans->locking_btree_id]);
+                       bch2_bpos_to_text(out, trans->locking_pos);
+
+
+                       pr_buf(out, " node ");
+                       bch2_btree_iter_node_to_text(out,
+                                       (void *) b,
+                                       btree_iter_type(&trans->iters[trans->locking_iter_idx]));
+                       pr_buf(out, "\n");
+               }
+       }
+       mutex_unlock(&c->btree_trans_lock);
+#endif
+}
+
+void bch2_fs_btree_iter_exit(struct bch_fs *c)
+{
+       mempool_exit(&c->btree_iters_pool);
+}
+
+int bch2_fs_btree_iter_init(struct bch_fs *c)
+{
+       unsigned nr = BTREE_ITER_MAX;
+
+       INIT_LIST_HEAD(&c->btree_trans_list);
+       mutex_init(&c->btree_trans_lock);
+
+       return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
+                       sizeof(struct btree_iter) * nr +
+                       sizeof(struct btree_insert_entry) * nr +
+                       sizeof(struct btree_insert_entry) * nr);
+}
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
new file mode 100644 (file)
index 0000000..bd9ec3e
--- /dev/null
@@ -0,0 +1,314 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_ITER_H
+#define _BCACHEFS_BTREE_ITER_H
+
+#include "bset.h"
+#include "btree_types.h"
+
+static inline void btree_iter_set_dirty(struct btree_iter *iter,
+                                       enum btree_iter_uptodate u)
+{
+       iter->uptodate = max_t(unsigned, iter->uptodate, u);
+}
+
+static inline struct btree *btree_iter_node(struct btree_iter *iter,
+                                           unsigned level)
+{
+       return level < BTREE_MAX_DEPTH ? iter->l[level].b : NULL;
+}
+
+static inline bool btree_node_lock_seq_matches(const struct btree_iter *iter,
+                                       const struct btree *b, unsigned level)
+{
+       /*
+        * We don't compare the low bits of the lock sequence numbers because
+        * @iter might have taken a write lock on @b, and we don't want to skip
+        * the linked iterator if the sequence numbers were equal before taking
+        * that write lock. The lock sequence number is incremented by taking
+        * and releasing write locks and is even when unlocked:
+        */
+       return iter->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1;
+}
+
+static inline struct btree *btree_node_parent(struct btree_iter *iter,
+                                             struct btree *b)
+{
+       return btree_iter_node(iter, b->c.level + 1);
+}
+
+static inline bool btree_trans_has_multiple_iters(const struct btree_trans *trans)
+{
+       return hweight64(trans->iters_linked) > 1;
+}
+
+static inline int btree_iter_err(const struct btree_iter *iter)
+{
+       return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
+}
+
+/* Iterate over iters within a transaction: */
+
+#define trans_for_each_iter_all(_trans, _iter)                         \
+       for (_iter = (_trans)->iters;                                   \
+            _iter < (_trans)->iters + (_trans)->nr_iters;              \
+            _iter++)
+
+static inline struct btree_iter *
+__trans_next_iter(struct btree_trans *trans, unsigned idx)
+{
+       EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx);
+
+       for (; idx < trans->nr_iters; idx++)
+               if (trans->iters_linked & (1ULL << idx))
+                       return &trans->iters[idx];
+
+       return NULL;
+}
+
+#define trans_for_each_iter(_trans, _iter)                             \
+       for (_iter = __trans_next_iter((_trans), 0);                    \
+            (_iter);                                                   \
+            _iter = __trans_next_iter((_trans), (_iter)->idx + 1))
+
+static inline bool __iter_has_node(const struct btree_iter *iter,
+                                  const struct btree *b)
+{
+       return iter->l[b->c.level].b == b &&
+               btree_node_lock_seq_matches(iter, b, b->c.level);
+}
+
+static inline struct btree_iter *
+__trans_next_iter_with_node(struct btree_trans *trans, struct btree *b,
+                           unsigned idx)
+{
+       struct btree_iter *iter = __trans_next_iter(trans, idx);
+
+       while (iter && !__iter_has_node(iter, b))
+               iter = __trans_next_iter(trans, iter->idx + 1);
+
+       return iter;
+}
+
+#define trans_for_each_iter_with_node(_trans, _b, _iter)               \
+       for (_iter = __trans_next_iter_with_node((_trans), (_b), 0);    \
+            (_iter);                                                   \
+            _iter = __trans_next_iter_with_node((_trans), (_b),        \
+                                                (_iter)->idx + 1))
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_btree_trans_verify_iters(struct btree_trans *, struct btree *);
+void bch2_btree_trans_verify_locks(struct btree_trans *);
+#else
+static inline void bch2_btree_trans_verify_iters(struct btree_trans *trans,
+                                                struct btree *b) {}
+static inline void bch2_btree_trans_verify_locks(struct btree_trans *iter) {}
+#endif
+
+void bch2_btree_iter_fix_key_modified(struct btree_iter *, struct btree *,
+                                          struct bkey_packed *);
+void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
+                             struct btree_node_iter *, struct bkey_packed *,
+                             unsigned, unsigned);
+
+bool bch2_btree_iter_relock(struct btree_iter *, bool);
+bool bch2_trans_relock(struct btree_trans *);
+void bch2_trans_unlock(struct btree_trans *);
+
+bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
+bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned);
+
+static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
+                                          unsigned new_locks_want)
+{
+       new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
+
+       return iter->locks_want < new_locks_want
+               ? (!iter->trans->nounlock
+                  ? __bch2_btree_iter_upgrade(iter, new_locks_want)
+                  : __bch2_btree_iter_upgrade_nounlock(iter, new_locks_want))
+               : iter->uptodate <= BTREE_ITER_NEED_PEEK;
+}
+
+void __bch2_btree_iter_downgrade(struct btree_iter *, unsigned);
+
+static inline void bch2_btree_iter_downgrade(struct btree_iter *iter)
+{
+       if (iter->locks_want > (iter->flags & BTREE_ITER_INTENT) ? 1 : 0)
+               __bch2_btree_iter_downgrade(iter, 0);
+}
+
+void bch2_trans_downgrade(struct btree_trans *);
+
+void bch2_btree_iter_node_replace(struct btree_iter *, struct btree *);
+void bch2_btree_iter_node_drop(struct btree_iter *, struct btree *);
+
+void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *);
+
+int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
+
+static inline int __must_check
+bch2_btree_iter_traverse(struct btree_iter *iter)
+{
+       return iter->uptodate >= BTREE_ITER_NEED_RELOCK
+               ? __bch2_btree_iter_traverse(iter)
+               : 0;
+}
+
+int bch2_btree_iter_traverse_all(struct btree_trans *);
+
+struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
+struct btree *bch2_btree_iter_next_node(struct btree_iter *);
+
+struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
+
+struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *);
+struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *);
+
+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
+
+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
+
+struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *);
+
+void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
+void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
+void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
+
+static inline int btree_iter_cmp(const struct btree_iter *l,
+                                const struct btree_iter *r)
+{
+       return   cmp_int(l->btree_id, r->btree_id) ?:
+               -cmp_int(btree_iter_type(l), btree_iter_type(r)) ?:
+                bkey_cmp(l->pos, r->pos);
+}
+
+/*
+ * Unlocks before scheduling
+ * Note: does not revalidate iterator
+ */
+static inline int bch2_trans_cond_resched(struct btree_trans *trans)
+{
+       if (need_resched() || race_fault()) {
+               bch2_trans_unlock(trans);
+               schedule();
+               return bch2_trans_relock(trans) ? 0 : -EINTR;
+       } else {
+               return 0;
+       }
+}
+
+#define __for_each_btree_node(_trans, _iter, _btree_id, _start,        \
+                             _locks_want, _depth, _flags, _b)          \
+       for (iter = bch2_trans_get_node_iter((_trans), (_btree_id),     \
+                               _start, _locks_want, _depth, _flags),   \
+            _b = bch2_btree_iter_peek_node(_iter);                     \
+            (_b);                                                      \
+            (_b) = bch2_btree_iter_next_node(_iter))
+
+#define for_each_btree_node(_trans, _iter, _btree_id, _start,          \
+                           _flags, _b)                                 \
+       __for_each_btree_node(_trans, _iter, _btree_id, _start,         \
+                             0, 0, _flags, _b)
+
+static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
+                                                    unsigned flags)
+{
+       if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_CACHED)
+               return bch2_btree_iter_peek_cached(iter);
+       else
+               return flags & BTREE_ITER_SLOTS
+                       ? bch2_btree_iter_peek_slot(iter)
+                       : bch2_btree_iter_peek(iter);
+}
+
+static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
+                                                    unsigned flags)
+{
+       return flags & BTREE_ITER_SLOTS
+               ? bch2_btree_iter_next_slot(iter)
+               : bch2_btree_iter_next(iter);
+}
+
+static inline int bkey_err(struct bkey_s_c k)
+{
+       return PTR_ERR_OR_ZERO(k.k);
+}
+
+#define for_each_btree_key(_trans, _iter, _btree_id,                   \
+                          _start, _flags, _k, _ret)                    \
+       for ((_ret) = PTR_ERR_OR_ZERO((_iter) =                         \
+                       bch2_trans_get_iter((_trans), (_btree_id),      \
+                                           (_start), (_flags))) ?:     \
+                     PTR_ERR_OR_ZERO(((_k) =                           \
+                       __bch2_btree_iter_peek(_iter, _flags)).k);      \
+            !_ret && (_k).k;                                           \
+            (_ret) = PTR_ERR_OR_ZERO(((_k) =                           \
+                       __bch2_btree_iter_next(_iter, _flags)).k))
+
+#define for_each_btree_key_continue(_iter, _flags, _k, _ret)           \
+       for ((_k) = __bch2_btree_iter_peek(_iter, _flags);              \
+            !((_ret) = bkey_err(_k)) && (_k).k;                        \
+            (_k) = __bch2_btree_iter_next(_iter, _flags))
+
+/* new multiple iterator interface: */
+
+int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *);
+int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *);
+
+void bch2_trans_unlink_iters(struct btree_trans *);
+
+struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id,
+                                        struct bpos, unsigned);
+
+static inline struct btree_iter *
+bch2_trans_get_iter(struct btree_trans *trans, enum btree_id btree_id,
+                   struct bpos pos, unsigned flags)
+{
+       struct btree_iter *iter =
+               __bch2_trans_get_iter(trans, btree_id, pos, flags);
+
+       if (!IS_ERR(iter))
+               iter->ip_allocated = _THIS_IP_;
+       return iter;
+}
+
+struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *,
+                                       struct btree_iter *);
+static inline struct btree_iter *
+bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src)
+{
+       struct btree_iter *iter =
+               __bch2_trans_copy_iter(trans, src);
+
+       if (!IS_ERR(iter))
+               iter->ip_allocated = _THIS_IP_;
+       return iter;
+
+}
+
+struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *,
+                               enum btree_id, struct bpos,
+                               unsigned, unsigned, unsigned);
+
+#define TRANS_RESET_NOTRAVERSE         (1 << 0)
+
+void bch2_trans_reset(struct btree_trans *, unsigned);
+
+static inline void bch2_trans_begin(struct btree_trans *trans)
+{
+       return bch2_trans_reset(trans, 0);
+}
+
+void *bch2_trans_kmalloc(struct btree_trans *, size_t);
+void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
+int bch2_trans_exit(struct btree_trans *);
+
+void bch2_btree_trans_to_text(struct printbuf *, struct bch_fs *);
+
+void bch2_fs_btree_iter_exit(struct bch_fs *);
+int bch2_fs_btree_iter_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_BTREE_ITER_H */
diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c
new file mode 100644 (file)
index 0000000..6166275
--- /dev/null
@@ -0,0 +1,519 @@
+
+#include "bcachefs.h"
+#include "btree_cache.h"
+#include "btree_iter.h"
+#include "btree_key_cache.h"
+#include "btree_locking.h"
+#include "btree_update.h"
+#include "error.h"
+#include "journal.h"
+#include "journal_reclaim.h"
+
+#include <trace/events/bcachefs.h>
+
+static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
+                                      const void *obj)
+{
+       const struct bkey_cached *ck = obj;
+       const struct bkey_cached_key *key = arg->key;
+
+       return cmp_int(ck->key.btree_id, key->btree_id) ?:
+               bkey_cmp(ck->key.pos, key->pos);
+}
+
+static const struct rhashtable_params bch2_btree_key_cache_params = {
+       .head_offset    = offsetof(struct bkey_cached, hash),
+       .key_offset     = offsetof(struct bkey_cached, key),
+       .key_len        = sizeof(struct bkey_cached_key),
+       .obj_cmpfn      = bch2_btree_key_cache_cmp_fn,
+};
+
+__flatten
+static inline struct bkey_cached *
+btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos)
+{
+       struct bkey_cached_key key = {
+               .btree_id       = btree_id,
+               .pos            = pos,
+       };
+
+       return rhashtable_lookup_fast(&c->btree_key_cache.table, &key,
+                                     bch2_btree_key_cache_params);
+}
+
+static bool bkey_cached_lock_for_evict(struct bkey_cached *ck)
+{
+       if (!six_trylock_intent(&ck->c.lock))
+               return false;
+
+       if (!six_trylock_write(&ck->c.lock)) {
+               six_unlock_intent(&ck->c.lock);
+               return false;
+       }
+
+       if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+               six_unlock_write(&ck->c.lock);
+               six_unlock_intent(&ck->c.lock);
+               return false;
+       }
+
+       return true;
+}
+
+static void bkey_cached_evict(struct btree_key_cache *c,
+                             struct bkey_cached *ck)
+{
+       BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash,
+                                     bch2_btree_key_cache_params));
+       memset(&ck->key, ~0, sizeof(ck->key));
+}
+
+static void bkey_cached_free(struct btree_key_cache *c,
+                            struct bkey_cached *ck)
+{
+       list_move(&ck->list, &c->freed);
+
+       kfree(ck->k);
+       ck->k           = NULL;
+       ck->u64s        = 0;
+
+       six_unlock_write(&ck->c.lock);
+       six_unlock_intent(&ck->c.lock);
+}
+
+static struct bkey_cached *
+bkey_cached_alloc(struct btree_key_cache *c)
+{
+       struct bkey_cached *ck;
+
+       list_for_each_entry(ck, &c->freed, list)
+               if (bkey_cached_lock_for_evict(ck))
+                       return ck;
+
+       list_for_each_entry(ck, &c->clean, list)
+               if (bkey_cached_lock_for_evict(ck)) {
+                       bkey_cached_evict(c, ck);
+                       return ck;
+               }
+
+       ck = kzalloc(sizeof(*ck), GFP_NOFS);
+       if (!ck)
+               return NULL;
+
+       INIT_LIST_HEAD(&ck->list);
+       six_lock_init(&ck->c.lock);
+       BUG_ON(!six_trylock_intent(&ck->c.lock));
+       BUG_ON(!six_trylock_write(&ck->c.lock));
+
+       return ck;
+}
+
+static struct bkey_cached *
+btree_key_cache_create(struct btree_key_cache *c,
+                      enum btree_id btree_id,
+                      struct bpos pos)
+{
+       struct bkey_cached *ck;
+
+       ck = bkey_cached_alloc(c);
+       if (!ck)
+               return ERR_PTR(-ENOMEM);
+
+       ck->c.level             = 0;
+       ck->c.btree_id          = btree_id;
+       ck->key.btree_id        = btree_id;
+       ck->key.pos             = pos;
+       ck->valid               = false;
+
+       BUG_ON(ck->flags);
+
+       if (rhashtable_lookup_insert_fast(&c->table,
+                                         &ck->hash,
+                                         bch2_btree_key_cache_params)) {
+               /* We raced with another fill: */
+               bkey_cached_free(c, ck);
+               return NULL;
+       }
+
+       list_move(&ck->list, &c->clean);
+       six_unlock_write(&ck->c.lock);
+
+       return ck;
+}
+
+static int btree_key_cache_fill(struct btree_trans *trans,
+                               struct btree_iter *ck_iter,
+                               struct bkey_cached *ck)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       unsigned new_u64s = 0;
+       struct bkey_i *new_k = NULL;
+       int ret;
+
+       iter = bch2_trans_get_iter(trans, ck->key.btree_id,
+                                  ck->key.pos, BTREE_ITER_SLOTS);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter);
+
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret) {
+               bch2_trans_iter_put(trans, iter);
+               return ret;
+       }
+
+       if (!bch2_btree_node_relock(ck_iter, 0)) {
+               bch2_trans_iter_put(trans, iter);
+               trace_transaction_restart_ip(trans->ip, _THIS_IP_);
+               return -EINTR;
+       }
+
+       if (k.k->u64s > ck->u64s) {
+               new_u64s = roundup_pow_of_two(k.k->u64s);
+               new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS);
+               if (!new_k) {
+                       bch2_trans_iter_put(trans, iter);
+                       return -ENOMEM;
+               }
+       }
+
+       bch2_btree_node_lock_write(ck_iter->l[0].b, ck_iter);
+       if (new_k) {
+               kfree(ck->k);
+               ck->u64s = new_u64s;
+               ck->k = new_k;
+       }
+
+       bkey_reassemble(ck->k, k);
+       ck->valid = true;
+       bch2_btree_node_unlock_write(ck_iter->l[0].b, ck_iter);
+
+       /* We're not likely to need this iterator again: */
+       bch2_trans_iter_free(trans, iter);
+
+       return 0;
+}
+
+static int bkey_cached_check_fn(struct six_lock *lock, void *p)
+{
+       struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock);
+       const struct btree_iter *iter = p;
+
+       return ck->key.btree_id == iter->btree_id &&
+               !bkey_cmp(ck->key.pos, iter->pos) ? 0 : -1;
+}
+
+int bch2_btree_iter_traverse_cached(struct btree_iter *iter)
+{
+       struct btree_trans *trans = iter->trans;
+       struct bch_fs *c = trans->c;
+       struct bkey_cached *ck;
+       int ret = 0;
+
+       BUG_ON(iter->level);
+
+       if (btree_node_locked(iter, 0)) {
+               ck = (void *) iter->l[0].b;
+               goto fill;
+       }
+retry:
+       ck = btree_key_cache_find(c, iter->btree_id, iter->pos);
+       if (!ck) {
+               if (iter->flags & BTREE_ITER_CACHED_NOCREATE) {
+                       iter->l[0].b = NULL;
+                       return 0;
+               }
+
+               mutex_lock(&c->btree_key_cache.lock);
+               ck = btree_key_cache_create(&c->btree_key_cache,
+                                           iter->btree_id, iter->pos);
+               mutex_unlock(&c->btree_key_cache.lock);
+
+               ret = PTR_ERR_OR_ZERO(ck);
+               if (ret)
+                       goto err;
+               if (!ck)
+                       goto retry;
+
+               mark_btree_node_locked(iter, 0, SIX_LOCK_intent);
+               iter->locks_want = 1;
+       } else {
+               enum six_lock_type lock_want = __btree_lock_want(iter, 0);
+
+               if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
+                                    bkey_cached_check_fn, iter)) {
+                       if (ck->key.btree_id != iter->btree_id ||
+                           bkey_cmp(ck->key.pos, iter->pos)) {
+                               goto retry;
+                       }
+
+                       trace_transaction_restart_ip(trans->ip, _THIS_IP_);
+                       ret = -EINTR;
+                       goto err;
+               }
+
+               if (ck->key.btree_id != iter->btree_id ||
+                   bkey_cmp(ck->key.pos, iter->pos)) {
+                       six_unlock_type(&ck->c.lock, lock_want);
+                       goto retry;
+               }
+
+               mark_btree_node_locked(iter, 0, lock_want);
+       }
+
+       iter->l[0].lock_seq     = ck->c.lock.state.seq;
+       iter->l[0].b            = (void *) ck;
+fill:
+       if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) {
+               if (!btree_node_intent_locked(iter, 0))
+                       bch2_btree_iter_upgrade(iter, 1);
+               if (!btree_node_intent_locked(iter, 0)) {
+                       trace_transaction_restart_ip(trans->ip, _THIS_IP_);
+                       ret = -EINTR;
+                       goto err;
+               }
+
+               ret = btree_key_cache_fill(trans, iter, ck);
+               if (ret)
+                       goto err;
+       }
+
+       iter->uptodate = BTREE_ITER_NEED_PEEK;
+       bch2_btree_iter_downgrade(iter);
+       return ret;
+err:
+       if (ret != -EINTR) {
+               btree_node_unlock(iter, 0);
+               iter->flags |= BTREE_ITER_ERROR;
+               iter->l[0].b = BTREE_ITER_NO_NODE_ERROR;
+       }
+       return ret;
+}
+
+static int btree_key_cache_flush_pos(struct btree_trans *trans,
+                                    struct bkey_cached_key key,
+                                    u64 journal_seq,
+                                    bool evict)
+{
+       struct bch_fs *c = trans->c;
+       struct journal *j = &c->journal;
+       struct btree_iter *c_iter = NULL, *b_iter = NULL;
+       struct bkey_cached *ck;
+       int ret;
+
+       b_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
+                                    BTREE_ITER_SLOTS|
+                                    BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(b_iter);
+       if (ret)
+               goto out;
+
+       c_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos,
+                                    BTREE_ITER_CACHED|
+                                    BTREE_ITER_CACHED_NOFILL|
+                                    BTREE_ITER_CACHED_NOCREATE|
+                                    BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(c_iter);
+       if (ret)
+               goto out;
+retry:
+       ret = bch2_btree_iter_traverse(c_iter);
+       if (ret)
+               goto err;
+
+       ck = (void *) c_iter->l[0].b;
+       if (!ck ||
+           (journal_seq && ck->journal.seq != journal_seq))
+               goto out;
+
+       if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+               if (!evict)
+                       goto out;
+               goto evict;
+       }
+
+       ret   = bch2_btree_iter_traverse(b_iter) ?:
+               bch2_trans_update(trans, b_iter, ck->k, BTREE_TRIGGER_NORUN) ?:
+               bch2_trans_commit(trans, NULL, NULL,
+                                 BTREE_INSERT_NOUNLOCK|
+                                 BTREE_INSERT_NOCHECK_RW|
+                                 BTREE_INSERT_NOFAIL|
+                                 BTREE_INSERT_USE_RESERVE|
+                                 BTREE_INSERT_USE_ALLOC_RESERVE|
+                                 BTREE_INSERT_JOURNAL_RESERVED|
+                                 BTREE_INSERT_JOURNAL_RECLAIM);
+err:
+       if (ret == -EINTR)
+               goto retry;
+
+       BUG_ON(ret && !bch2_journal_error(j));
+
+       if (ret)
+               goto out;
+
+       bch2_journal_pin_drop(j, &ck->journal);
+       bch2_journal_preres_put(j, &ck->res);
+       clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
+
+       if (!evict) {
+               mutex_lock(&c->btree_key_cache.lock);
+               list_move_tail(&ck->list, &c->btree_key_cache.clean);
+               mutex_unlock(&c->btree_key_cache.lock);
+       } else {
+evict:
+               BUG_ON(!btree_node_intent_locked(c_iter, 0));
+
+               mark_btree_node_unlocked(c_iter, 0);
+               c_iter->l[0].b = NULL;
+
+               six_lock_write(&ck->c.lock, NULL, NULL);
+
+               mutex_lock(&c->btree_key_cache.lock);
+               bkey_cached_evict(&c->btree_key_cache, ck);
+               bkey_cached_free(&c->btree_key_cache, ck);
+               mutex_unlock(&c->btree_key_cache.lock);
+       }
+out:
+       bch2_trans_iter_put(trans, b_iter);
+       bch2_trans_iter_put(trans, c_iter);
+       return ret;
+}
+
+static void btree_key_cache_journal_flush(struct journal *j,
+                                         struct journal_entry_pin *pin,
+                                         u64 seq)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct bkey_cached *ck =
+               container_of(pin, struct bkey_cached, journal);
+       struct bkey_cached_key key;
+       struct btree_trans trans;
+
+       six_lock_read(&ck->c.lock, NULL, NULL);
+       key = ck->key;
+
+       if (ck->journal.seq != seq ||
+           !test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+               six_unlock_read(&ck->c.lock);
+               return;
+       }
+       six_unlock_read(&ck->c.lock);
+
+       bch2_trans_init(&trans, c, 0, 0);
+       btree_key_cache_flush_pos(&trans, key, seq, false);
+       bch2_trans_exit(&trans);
+}
+
+/*
+ * Flush and evict a key from the key cache:
+ */
+int bch2_btree_key_cache_flush(struct btree_trans *trans,
+                              enum btree_id id, struct bpos pos)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_cached_key key = { id, pos };
+
+       /* Fastpath - assume it won't be found: */
+       if (!btree_key_cache_find(c, id, pos))
+               return 0;
+
+       return btree_key_cache_flush_pos(trans, key, 0, true);
+}
+
+bool bch2_btree_insert_key_cached(struct btree_trans *trans,
+                                 struct btree_iter *iter,
+                                 struct bkey_i *insert)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_cached *ck = (void *) iter->l[0].b;
+
+       BUG_ON(insert->u64s > ck->u64s);
+
+       if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
+               int difference;
+
+               BUG_ON(jset_u64s(insert->u64s) > trans->journal_preres.u64s);
+
+               difference = jset_u64s(insert->u64s) - ck->res.u64s;
+               if (difference > 0) {
+                       trans->journal_preres.u64s      -= difference;
+                       ck->res.u64s                    += difference;
+               }
+       }
+
+       bkey_copy(ck->k, insert);
+       ck->valid = true;
+
+       if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+               mutex_lock(&c->btree_key_cache.lock);
+               list_del_init(&ck->list);
+
+               set_bit(BKEY_CACHED_DIRTY, &ck->flags);
+               mutex_unlock(&c->btree_key_cache.lock);
+       }
+
+       bch2_journal_pin_update(&c->journal, trans->journal_res.seq,
+                               &ck->journal, btree_key_cache_journal_flush);
+       return true;
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
+                              enum btree_id id, struct bpos pos)
+{
+       BUG_ON(btree_key_cache_find(trans->c, id, pos));
+}
+#endif
+
+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *c)
+{
+       struct bkey_cached *ck, *n;
+
+       mutex_lock(&c->lock);
+       list_for_each_entry_safe(ck, n, &c->clean, list) {
+               kfree(ck->k);
+               kfree(ck);
+       }
+       list_for_each_entry_safe(ck, n, &c->freed, list)
+               kfree(ck);
+       mutex_unlock(&c->lock);
+
+       rhashtable_destroy(&c->table);
+}
+
+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
+{
+       mutex_init(&c->lock);
+       INIT_LIST_HEAD(&c->freed);
+       INIT_LIST_HEAD(&c->clean);
+}
+
+int bch2_fs_btree_key_cache_init(struct btree_key_cache *c)
+{
+       return rhashtable_init(&c->table, &bch2_btree_key_cache_params);
+}
+
+void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
+{
+       struct bucket_table *tbl;
+       struct bkey_cached *ck;
+       struct rhash_head *pos;
+       size_t i;
+
+       mutex_lock(&c->lock);
+       tbl = rht_dereference_rcu(c->table.tbl, &c->table);
+
+       for (i = 0; i < tbl->size; i++) {
+               rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
+                       pr_buf(out, "%s:",
+                              bch2_btree_ids[ck->key.btree_id]);
+                       bch2_bpos_to_text(out, ck->key.pos);
+
+                       if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
+                               pr_buf(out, " journal seq %llu", ck->journal.seq);
+                       pr_buf(out, "\n");
+               }
+       }
+       mutex_unlock(&c->lock);
+}
diff --git a/libbcachefs/btree_key_cache.h b/libbcachefs/btree_key_cache.h
new file mode 100644 (file)
index 0000000..b1756c6
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef _BCACHEFS_BTREE_KEY_CACHE_H
+#define _BCACHEFS_BTREE_KEY_CACHE_H
+
+int bch2_btree_iter_traverse_cached(struct btree_iter *);
+
+bool bch2_btree_insert_key_cached(struct btree_trans *,
+                       struct btree_iter *, struct bkey_i *);
+int bch2_btree_key_cache_flush(struct btree_trans *,
+                              enum btree_id, struct bpos);
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_btree_key_cache_verify_clean(struct btree_trans *,
+                               enum btree_id, struct bpos);
+#else
+static inline void
+bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
+                               enum btree_id id, struct bpos pos) {}
+#endif
+
+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *);
+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *);
+int bch2_fs_btree_key_cache_init(struct btree_key_cache *);
+
+void bch2_btree_key_cache_to_text(struct printbuf *, struct btree_key_cache *);
+
+#endif /* _BCACHEFS_BTREE_KEY_CACHE_H */
diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h
new file mode 100644 (file)
index 0000000..81fbf3e
--- /dev/null
@@ -0,0 +1,257 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_LOCKING_H
+#define _BCACHEFS_BTREE_LOCKING_H
+
+/*
+ * Only for internal btree use:
+ *
+ * The btree iterator tracks what locks it wants to take, and what locks it
+ * currently has - here we have wrappers for locking/unlocking btree nodes and
+ * updating the iterator state
+ */
+
+#include <linux/six.h>
+
+#include "btree_iter.h"
+
+/* matches six lock types */
+enum btree_node_locked_type {
+       BTREE_NODE_UNLOCKED             = -1,
+       BTREE_NODE_READ_LOCKED          = SIX_LOCK_read,
+       BTREE_NODE_INTENT_LOCKED        = SIX_LOCK_intent,
+};
+
+static inline int btree_node_locked_type(struct btree_iter *iter,
+                                        unsigned level)
+{
+       /*
+        * We're relying on the fact that if nodes_intent_locked is set
+        * nodes_locked must be set as well, so that we can compute without
+        * branches:
+        */
+       return BTREE_NODE_UNLOCKED +
+               ((iter->nodes_locked >> level) & 1) +
+               ((iter->nodes_intent_locked >> level) & 1);
+}
+
+static inline bool btree_node_intent_locked(struct btree_iter *iter,
+                                           unsigned level)
+{
+       return btree_node_locked_type(iter, level) == BTREE_NODE_INTENT_LOCKED;
+}
+
+static inline bool btree_node_read_locked(struct btree_iter *iter,
+                                         unsigned level)
+{
+       return btree_node_locked_type(iter, level) == BTREE_NODE_READ_LOCKED;
+}
+
+static inline bool btree_node_locked(struct btree_iter *iter, unsigned level)
+{
+       return iter->nodes_locked & (1 << level);
+}
+
+static inline void mark_btree_node_unlocked(struct btree_iter *iter,
+                                           unsigned level)
+{
+       iter->nodes_locked &= ~(1 << level);
+       iter->nodes_intent_locked &= ~(1 << level);
+}
+
+static inline void mark_btree_node_locked(struct btree_iter *iter,
+                                         unsigned level,
+                                         enum six_lock_type type)
+{
+       /* relying on this to avoid a branch */
+       BUILD_BUG_ON(SIX_LOCK_read   != 0);
+       BUILD_BUG_ON(SIX_LOCK_intent != 1);
+
+       iter->nodes_locked |= 1 << level;
+       iter->nodes_intent_locked |= type << level;
+}
+
+static inline void mark_btree_node_intent_locked(struct btree_iter *iter,
+                                                unsigned level)
+{
+       mark_btree_node_locked(iter, level, SIX_LOCK_intent);
+}
+
+static inline enum six_lock_type __btree_lock_want(struct btree_iter *iter, int level)
+{
+       return level < iter->locks_want
+               ? SIX_LOCK_intent
+               : SIX_LOCK_read;
+}
+
+static inline enum btree_node_locked_type
+btree_lock_want(struct btree_iter *iter, int level)
+{
+       if (level < iter->level)
+               return BTREE_NODE_UNLOCKED;
+       if (level < iter->locks_want)
+               return BTREE_NODE_INTENT_LOCKED;
+       if (level == iter->level)
+               return BTREE_NODE_READ_LOCKED;
+       return BTREE_NODE_UNLOCKED;
+}
+
+static inline void __btree_node_unlock(struct btree_iter *iter, unsigned level)
+{
+       int lock_type = btree_node_locked_type(iter, level);
+
+       EBUG_ON(level >= BTREE_MAX_DEPTH);
+
+       if (lock_type != BTREE_NODE_UNLOCKED)
+               six_unlock_type(&iter->l[level].b->c.lock, lock_type);
+       mark_btree_node_unlocked(iter, level);
+}
+
+static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
+{
+       EBUG_ON(!level && iter->trans->nounlock);
+
+       __btree_node_unlock(iter, level);
+}
+
+static inline void __bch2_btree_iter_unlock(struct btree_iter *iter)
+{
+       btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
+
+       while (iter->nodes_locked)
+               btree_node_unlock(iter, __ffs(iter->nodes_locked));
+}
+
+static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type)
+{
+       switch (type) {
+       case SIX_LOCK_read:
+               return BCH_TIME_btree_lock_contended_read;
+       case SIX_LOCK_intent:
+               return BCH_TIME_btree_lock_contended_intent;
+       case SIX_LOCK_write:
+               return BCH_TIME_btree_lock_contended_write;
+       default:
+               BUG();
+       }
+}
+
+/*
+ * wrapper around six locks that just traces lock contended time
+ */
+static inline void __btree_node_lock_type(struct bch_fs *c, struct btree *b,
+                                         enum six_lock_type type)
+{
+       u64 start_time = local_clock();
+
+       six_lock_type(&b->c.lock, type, NULL, NULL);
+       bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
+}
+
+static inline void btree_node_lock_type(struct bch_fs *c, struct btree *b,
+                                       enum six_lock_type type)
+{
+       if (!six_trylock_type(&b->c.lock, type))
+               __btree_node_lock_type(c, b, type);
+}
+
+/*
+ * Lock a btree node if we already have it locked on one of our linked
+ * iterators:
+ */
+static inline bool btree_node_lock_increment(struct btree_trans *trans,
+                                            struct btree *b, unsigned level,
+                                            enum btree_node_locked_type want)
+{
+       struct btree_iter *iter;
+
+       trans_for_each_iter(trans, iter)
+               if (iter->l[level].b == b &&
+                   btree_node_locked_type(iter, level) >= want) {
+                       six_lock_increment(&b->c.lock, want);
+                       return true;
+               }
+
+       return false;
+}
+
+bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
+                           struct btree_iter *, enum six_lock_type,
+                           six_lock_should_sleep_fn, void *);
+
+static inline bool btree_node_lock(struct btree *b,
+                       struct bpos pos, unsigned level,
+                       struct btree_iter *iter,
+                       enum six_lock_type type,
+                       six_lock_should_sleep_fn should_sleep_fn, void *p)
+{
+       struct btree_trans *trans = iter->trans;
+       bool ret;
+
+       EBUG_ON(level >= BTREE_MAX_DEPTH);
+       EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx)));
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+       trans->locking          = b;
+       trans->locking_iter_idx = iter->idx;
+       trans->locking_pos      = pos;
+       trans->locking_btree_id = iter->btree_id;
+       trans->locking_level    = level;
+#endif
+       ret   = likely(six_trylock_type(&b->c.lock, type)) ||
+               btree_node_lock_increment(trans, b, level, type) ||
+               __bch2_btree_node_lock(b, pos, level, iter, type,
+                                      should_sleep_fn, p);
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+       trans->locking = NULL;
+#endif
+       return ret;
+}
+
+bool __bch2_btree_node_relock(struct btree_iter *, unsigned);
+
+static inline bool bch2_btree_node_relock(struct btree_iter *iter,
+                                         unsigned level)
+{
+       EBUG_ON(btree_node_locked(iter, level) &&
+               btree_node_locked_type(iter, level) !=
+               __btree_lock_want(iter, level));
+
+       return likely(btree_node_locked(iter, level)) ||
+               __bch2_btree_node_relock(iter, level);
+}
+
+/*
+ * Updates the saved lock sequence number, so that bch2_btree_node_relock() will
+ * succeed:
+ */
+static inline void
+bch2_btree_node_unlock_write_inlined(struct btree *b, struct btree_iter *iter)
+{
+       struct btree_iter *linked;
+
+       EBUG_ON(iter->l[b->c.level].b != b);
+       EBUG_ON(iter->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq);
+
+       trans_for_each_iter_with_node(iter->trans, b, linked)
+               linked->l[b->c.level].lock_seq += 2;
+
+       six_unlock_write(&b->c.lock);
+}
+
+void bch2_btree_node_unlock_write(struct btree *, struct btree_iter *);
+
+void __bch2_btree_node_lock_write(struct btree *, struct btree_iter *);
+
+static inline void bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
+{
+       EBUG_ON(iter->l[b->c.level].b != b);
+       EBUG_ON(iter->l[b->c.level].lock_seq != b->c.lock.state.seq);
+
+       if (unlikely(!six_trylock_write(&b->c.lock)))
+               __bch2_btree_node_lock_write(b, iter);
+}
+
+#endif /* _BCACHEFS_BTREE_LOCKING_H */
+
+
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
new file mode 100644 (file)
index 0000000..c1717b7
--- /dev/null
@@ -0,0 +1,662 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_TYPES_H
+#define _BCACHEFS_BTREE_TYPES_H
+
+#include <linux/list.h>
+#include <linux/rhashtable.h>
+#include <linux/six.h>
+
+#include "bkey_methods.h"
+#include "buckets_types.h"
+#include "journal_types.h"
+
+struct open_bucket;
+struct btree_update;
+struct btree_trans;
+
+#define MAX_BSETS              3U
+
+struct btree_nr_keys {
+
+       /*
+        * Amount of live metadata (i.e. size of node after a compaction) in
+        * units of u64s
+        */
+       u16                     live_u64s;
+       u16                     bset_u64s[MAX_BSETS];
+
+       /* live keys only: */
+       u16                     packed_keys;
+       u16                     unpacked_keys;
+};
+
+struct bset_tree {
+       /*
+        * We construct a binary tree in an array as if the array
+        * started at 1, so that things line up on the same cachelines
+        * better: see comments in bset.c at cacheline_to_bkey() for
+        * details
+        */
+
+       /* size of the binary tree and prev array */
+       u16                     size;
+
+       /* function of size - precalculated for to_inorder() */
+       u16                     extra;
+
+       u16                     data_offset;
+       u16                     aux_data_offset;
+       u16                     end_offset;
+
+       struct bpos             max_key;
+};
+
+struct btree_write {
+       struct journal_entry_pin        journal;
+};
+
+struct btree_alloc {
+       struct open_buckets     ob;
+       BKEY_PADDED(k);
+};
+
+struct btree_bkey_cached_common {
+       struct six_lock         lock;
+       u8                      level;
+       u8                      btree_id;
+};
+
+struct btree {
+       struct btree_bkey_cached_common c;
+
+       struct rhash_head       hash;
+       u64                     hash_val;
+
+       unsigned long           flags;
+       u16                     written;
+       u8                      nsets;
+       u8                      nr_key_bits;
+
+       struct bkey_format      format;
+
+       struct btree_node       *data;
+       void                    *aux_data;
+
+       /*
+        * Sets of sorted keys - the real btree node - plus a binary search tree
+        *
+        * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
+        * to the memory we have allocated for this btree node. Additionally,
+        * set[0]->data points to the entire btree node as it exists on disk.
+        */
+       struct bset_tree        set[MAX_BSETS];
+
+       struct btree_nr_keys    nr;
+       u16                     sib_u64s[2];
+       u16                     whiteout_u64s;
+       u8                      byte_order;
+       u8                      unpack_fn_len;
+
+       /*
+        * XXX: add a delete sequence number, so when bch2_btree_node_relock()
+        * fails because the lock sequence number has changed - i.e. the
+        * contents were modified - we can still relock the node if it's still
+        * the one we want, without redoing the traversal
+        */
+
+       /*
+        * For asynchronous splits/interior node updates:
+        * When we do a split, we allocate new child nodes and update the parent
+        * node to point to them: we update the parent in memory immediately,
+        * but then we must wait until the children have been written out before
+        * the update to the parent can be written - this is a list of the
+        * btree_updates that are blocking this node from being
+        * written:
+        */
+       struct list_head        write_blocked;
+
+       /*
+        * Also for asynchronous splits/interior node updates:
+        * If a btree node isn't reachable yet, we don't want to kick off
+        * another write - because that write also won't yet be reachable and
+        * marking it as completed before it's reachable would be incorrect:
+        */
+       unsigned long           will_make_reachable;
+
+       struct open_buckets     ob;
+
+       /* lru list */
+       struct list_head        list;
+
+       struct btree_write      writes[2];
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+       bool                    *expensive_debug_checks;
+#endif
+
+       /* Key/pointer for this btree node */
+       __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
+};
+
+struct btree_cache {
+       struct rhashtable       table;
+       bool                    table_init_done;
+       /*
+        * We never free a struct btree, except on shutdown - we just put it on
+        * the btree_cache_freed list and reuse it later. This simplifies the
+        * code, and it doesn't cost us much memory as the memory usage is
+        * dominated by buffers that hold the actual btree node data and those
+        * can be freed - and the number of struct btrees allocated is
+        * effectively bounded.
+        *
+        * btree_cache_freeable effectively is a small cache - we use it because
+        * high order page allocations can be rather expensive, and it's quite
+        * common to delete and allocate btree nodes in quick succession. It
+        * should never grow past ~2-3 nodes in practice.
+        */
+       struct mutex            lock;
+       struct list_head        live;
+       struct list_head        freeable;
+       struct list_head        freed;
+
+       /* Number of elements in live + freeable lists */
+       unsigned                used;
+       unsigned                reserve;
+       struct shrinker         shrink;
+
+       /*
+        * If we need to allocate memory for a new btree node and that
+        * allocation fails, we can cannibalize another node in the btree cache
+        * to satisfy the allocation - lock to guarantee only one thread does
+        * this at a time:
+        */
+       struct task_struct      *alloc_lock;
+       struct closure_waitlist alloc_wait;
+};
+
+struct btree_node_iter {
+       struct btree_node_iter_set {
+               u16     k, end;
+       } data[MAX_BSETS];
+};
+
+enum btree_iter_type {
+       BTREE_ITER_KEYS,
+       BTREE_ITER_NODES,
+       BTREE_ITER_CACHED,
+};
+
+#define BTREE_ITER_TYPE                        ((1 << 2) - 1)
+
+/*
+ * Iterate over all possible positions, synthesizing deleted keys for holes:
+ */
+#define BTREE_ITER_SLOTS               (1 << 2)
+/*
+ * Indicates that intent locks should be taken on leaf nodes, because we expect
+ * to be doing updates:
+ */
+#define BTREE_ITER_INTENT              (1 << 3)
+/*
+ * Causes the btree iterator code to prefetch additional btree nodes from disk:
+ */
+#define BTREE_ITER_PREFETCH            (1 << 4)
+/*
+ * Indicates that this iterator should not be reused until transaction commit,
+ * either because a pending update references it or because the update depends
+ * on that particular key being locked (e.g. by the str_hash code, for hash
+ * table consistency)
+ */
+#define BTREE_ITER_KEEP_UNTIL_COMMIT   (1 << 5)
+/*
+ * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
+ * @pos or the first key strictly greater than @pos
+ */
+#define BTREE_ITER_IS_EXTENTS          (1 << 6)
+#define BTREE_ITER_ERROR               (1 << 7)
+#define BTREE_ITER_SET_POS_AFTER_COMMIT        (1 << 8)
+#define BTREE_ITER_CACHED_NOFILL       (1 << 9)
+#define BTREE_ITER_CACHED_NOCREATE     (1 << 10)
+
+#define BTREE_ITER_USER_FLAGS                          \
+       (BTREE_ITER_SLOTS                               \
+       |BTREE_ITER_INTENT                              \
+       |BTREE_ITER_PREFETCH                            \
+       |BTREE_ITER_CACHED_NOFILL                       \
+       |BTREE_ITER_CACHED_NOCREATE)
+
+enum btree_iter_uptodate {
+       BTREE_ITER_UPTODATE             = 0,
+       BTREE_ITER_NEED_PEEK            = 1,
+       BTREE_ITER_NEED_RELOCK          = 2,
+       BTREE_ITER_NEED_TRAVERSE        = 3,
+};
+
+#define BTREE_ITER_NO_NODE_GET_LOCKS   ((struct btree *) 1)
+#define BTREE_ITER_NO_NODE_DROP                ((struct btree *) 2)
+#define BTREE_ITER_NO_NODE_LOCK_ROOT   ((struct btree *) 3)
+#define BTREE_ITER_NO_NODE_UP          ((struct btree *) 4)
+#define BTREE_ITER_NO_NODE_DOWN                ((struct btree *) 5)
+#define BTREE_ITER_NO_NODE_INIT                ((struct btree *) 6)
+#define BTREE_ITER_NO_NODE_ERROR       ((struct btree *) 7)
+
+/*
+ * @pos                        - iterator's current position
+ * @level              - current btree depth
+ * @locks_want         - btree level below which we start taking intent locks
+ * @nodes_locked       - bitmask indicating which nodes in @nodes are locked
+ * @nodes_intent_locked        - bitmask indicating which locks are intent locks
+ */
+struct btree_iter {
+       struct btree_trans      *trans;
+       struct bpos             pos;
+       struct bpos             pos_after_commit;
+
+       u16                     flags;
+       u8                      idx;
+
+       enum btree_id           btree_id:4;
+       enum btree_iter_uptodate uptodate:4;
+       unsigned                level:4,
+                               min_depth:4,
+                               locks_want:4,
+                               nodes_locked:4,
+                               nodes_intent_locked:4;
+
+       struct btree_iter_level {
+               struct btree    *b;
+               struct btree_node_iter iter;
+               u32             lock_seq;
+       }                       l[BTREE_MAX_DEPTH];
+
+       /*
+        * Current unpacked key - so that bch2_btree_iter_next()/
+        * bch2_btree_iter_next_slot() can correctly advance pos.
+        */
+       struct bkey             k;
+       unsigned long           ip_allocated;
+};
+
+static inline enum btree_iter_type
+btree_iter_type(const struct btree_iter *iter)
+{
+       return iter->flags & BTREE_ITER_TYPE;
+}
+
+static inline struct btree_iter_level *iter_l(struct btree_iter *iter)
+{
+       return iter->l + iter->level;
+}
+
+struct btree_key_cache {
+       struct mutex            lock;
+       struct rhashtable       table;
+       struct list_head        freed;
+       struct list_head        clean;
+};
+
+struct bkey_cached_key {
+       u32                     btree_id;
+       struct bpos             pos;
+} __attribute__((packed, aligned(4)));
+
+#define BKEY_CACHED_DIRTY              0
+
+struct bkey_cached {
+       struct btree_bkey_cached_common c;
+
+       unsigned long           flags;
+       u8                      u64s;
+       bool                    valid;
+       struct bkey_cached_key  key;
+
+       struct rhash_head       hash;
+       struct list_head        list;
+
+       struct journal_preres   res;
+       struct journal_entry_pin journal;
+
+       struct bkey_i           *k;
+};
+
+struct btree_insert_entry {
+       unsigned                trigger_flags;
+       unsigned                trans_triggers_run:1;
+       struct bkey_i           *k;
+       struct btree_iter       *iter;
+};
+
+#ifndef CONFIG_LOCKDEP
+#define BTREE_ITER_MAX         64
+#else
+#define BTREE_ITER_MAX         32
+#endif
+
+struct btree_trans {
+       struct bch_fs           *c;
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct list_head        list;
+       struct btree            *locking;
+       unsigned                locking_iter_idx;
+       struct bpos             locking_pos;
+       u8                      locking_btree_id;
+       u8                      locking_level;
+       pid_t                   pid;
+#endif
+       unsigned long           ip;
+
+       u64                     iters_linked;
+       u64                     iters_live;
+       u64                     iters_touched;
+
+       u8                      nr_iters;
+       u8                      nr_updates;
+       u8                      nr_updates2;
+       u8                      size;
+       unsigned                used_mempool:1;
+       unsigned                error:1;
+       unsigned                nounlock:1;
+       unsigned                need_reset:1;
+       unsigned                in_traverse_all:1;
+
+       unsigned                mem_top;
+       unsigned                mem_bytes;
+       void                    *mem;
+
+       struct btree_iter       *iters;
+       struct btree_insert_entry *updates;
+       struct btree_insert_entry *updates2;
+
+       /* update path: */
+       struct jset_entry       *extra_journal_entries;
+       unsigned                extra_journal_entry_u64s;
+       struct journal_entry_pin *journal_pin;
+
+       struct journal_res      journal_res;
+       struct journal_preres   journal_preres;
+       u64                     *journal_seq;
+       struct disk_reservation *disk_res;
+       unsigned                flags;
+       unsigned                journal_u64s;
+       unsigned                journal_preres_u64s;
+       struct replicas_delta_list *fs_usage_deltas;
+
+       struct btree_iter       iters_onstack[2];
+       struct btree_insert_entry updates_onstack[2];
+       struct btree_insert_entry updates2_onstack[2];
+};
+
+#define BTREE_FLAG(flag)                                               \
+static inline bool btree_node_ ## flag(struct btree *b)                        \
+{      return test_bit(BTREE_NODE_ ## flag, &b->flags); }              \
+                                                                       \
+static inline void set_btree_node_ ## flag(struct btree *b)            \
+{      set_bit(BTREE_NODE_ ## flag, &b->flags); }                      \
+                                                                       \
+static inline void clear_btree_node_ ## flag(struct btree *b)          \
+{      clear_bit(BTREE_NODE_ ## flag, &b->flags); }
+
+enum btree_flags {
+       BTREE_NODE_read_in_flight,
+       BTREE_NODE_read_error,
+       BTREE_NODE_dirty,
+       BTREE_NODE_need_write,
+       BTREE_NODE_noevict,
+       BTREE_NODE_write_idx,
+       BTREE_NODE_accessed,
+       BTREE_NODE_write_in_flight,
+       BTREE_NODE_just_written,
+       BTREE_NODE_dying,
+       BTREE_NODE_fake,
+       BTREE_NODE_old_extent_overwrite,
+       BTREE_NODE_need_rewrite,
+};
+
+BTREE_FLAG(read_in_flight);
+BTREE_FLAG(read_error);
+BTREE_FLAG(dirty);
+BTREE_FLAG(need_write);
+BTREE_FLAG(noevict);
+BTREE_FLAG(write_idx);
+BTREE_FLAG(accessed);
+BTREE_FLAG(write_in_flight);
+BTREE_FLAG(just_written);
+BTREE_FLAG(dying);
+BTREE_FLAG(fake);
+BTREE_FLAG(old_extent_overwrite);
+BTREE_FLAG(need_rewrite);
+
+static inline struct btree_write *btree_current_write(struct btree *b)
+{
+       return b->writes + btree_node_write_idx(b);
+}
+
+static inline struct btree_write *btree_prev_write(struct btree *b)
+{
+       return b->writes + (btree_node_write_idx(b) ^ 1);
+}
+
+static inline struct bset_tree *bset_tree_last(struct btree *b)
+{
+       EBUG_ON(!b->nsets);
+       return b->set + b->nsets - 1;
+}
+
+static inline void *
+__btree_node_offset_to_ptr(const struct btree *b, u16 offset)
+{
+       return (void *) ((u64 *) b->data + 1 + offset);
+}
+
+static inline u16
+__btree_node_ptr_to_offset(const struct btree *b, const void *p)
+{
+       u16 ret = (u64 *) p - 1 - (u64 *) b->data;
+
+       EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p);
+       return ret;
+}
+
+static inline struct bset *bset(const struct btree *b,
+                               const struct bset_tree *t)
+{
+       return __btree_node_offset_to_ptr(b, t->data_offset);
+}
+
+static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t)
+{
+       t->end_offset =
+               __btree_node_ptr_to_offset(b, vstruct_last(bset(b, t)));
+}
+
+static inline void set_btree_bset(struct btree *b, struct bset_tree *t,
+                                 const struct bset *i)
+{
+       t->data_offset = __btree_node_ptr_to_offset(b, i);
+       set_btree_bset_end(b, t);
+}
+
+static inline struct bset *btree_bset_first(struct btree *b)
+{
+       return bset(b, b->set);
+}
+
+static inline struct bset *btree_bset_last(struct btree *b)
+{
+       return bset(b, bset_tree_last(b));
+}
+
+static inline u16
+__btree_node_key_to_offset(const struct btree *b, const struct bkey_packed *k)
+{
+       return __btree_node_ptr_to_offset(b, k);
+}
+
+static inline struct bkey_packed *
+__btree_node_offset_to_key(const struct btree *b, u16 k)
+{
+       return __btree_node_offset_to_ptr(b, k);
+}
+
+static inline unsigned btree_bkey_first_offset(const struct bset_tree *t)
+{
+       return t->data_offset + offsetof(struct bset, _data) / sizeof(u64);
+}
+
+#define btree_bkey_first(_b, _t)                                       \
+({                                                                     \
+       EBUG_ON(bset(_b, _t)->start !=                                  \
+               __btree_node_offset_to_key(_b, btree_bkey_first_offset(_t)));\
+                                                                       \
+       bset(_b, _t)->start;                                            \
+})
+
+#define btree_bkey_last(_b, _t)                                                \
+({                                                                     \
+       EBUG_ON(__btree_node_offset_to_key(_b, (_t)->end_offset) !=     \
+               vstruct_last(bset(_b, _t)));                            \
+                                                                       \
+       __btree_node_offset_to_key(_b, (_t)->end_offset);               \
+})
+
+static inline unsigned bset_u64s(struct bset_tree *t)
+{
+       return t->end_offset - t->data_offset -
+               sizeof(struct bset) / sizeof(u64);
+}
+
+static inline unsigned bset_dead_u64s(struct btree *b, struct bset_tree *t)
+{
+       return bset_u64s(t) - b->nr.bset_u64s[t - b->set];
+}
+
+static inline unsigned bset_byte_offset(struct btree *b, void *i)
+{
+       return i - (void *) b->data;
+}
+
+enum btree_node_type {
+#define x(kwd, val, name) BKEY_TYPE_##kwd = val,
+       BCH_BTREE_IDS()
+#undef x
+       BKEY_TYPE_BTREE,
+};
+
+/* Type of a key in btree @id at level @level: */
+static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id)
+{
+       return level ? BKEY_TYPE_BTREE : (enum btree_node_type) id;
+}
+
+/* Type of keys @b contains: */
+static inline enum btree_node_type btree_node_type(struct btree *b)
+{
+       return __btree_node_type(b->c.level, b->c.btree_id);
+}
+
+static inline bool btree_node_type_is_extents(enum btree_node_type type)
+{
+       switch (type) {
+       case BKEY_TYPE_EXTENTS:
+       case BKEY_TYPE_REFLINK:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static inline bool btree_node_is_extents(struct btree *b)
+{
+       return btree_node_type_is_extents(btree_node_type(b));
+}
+
+static inline enum btree_node_type btree_iter_key_type(struct btree_iter *iter)
+{
+       return __btree_node_type(iter->level, iter->btree_id);
+}
+
+static inline bool btree_iter_is_extents(struct btree_iter *iter)
+{
+       return btree_node_type_is_extents(btree_iter_key_type(iter));
+}
+
+#define BTREE_NODE_TYPE_HAS_TRIGGERS                   \
+       ((1U << BKEY_TYPE_EXTENTS)|                     \
+        (1U << BKEY_TYPE_ALLOC)|                       \
+        (1U << BKEY_TYPE_INODES)|                      \
+        (1U << BKEY_TYPE_REFLINK)|                     \
+        (1U << BKEY_TYPE_EC)|                          \
+        (1U << BKEY_TYPE_BTREE))
+
+#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS             \
+       ((1U << BKEY_TYPE_EXTENTS)|                     \
+        (1U << BKEY_TYPE_INODES)|                      \
+        (1U << BKEY_TYPE_REFLINK))
+
+enum btree_trigger_flags {
+       __BTREE_TRIGGER_NORUN,          /* Don't run triggers at all */
+
+       __BTREE_TRIGGER_INSERT,
+       __BTREE_TRIGGER_OVERWRITE,
+       __BTREE_TRIGGER_OVERWRITE_SPLIT,
+
+       __BTREE_TRIGGER_GC,
+       __BTREE_TRIGGER_BUCKET_INVALIDATE,
+       __BTREE_TRIGGER_NOATOMIC,
+};
+
+#define BTREE_TRIGGER_NORUN            (1U << __BTREE_TRIGGER_NORUN)
+
+#define BTREE_TRIGGER_INSERT           (1U << __BTREE_TRIGGER_INSERT)
+#define BTREE_TRIGGER_OVERWRITE                (1U << __BTREE_TRIGGER_OVERWRITE)
+#define BTREE_TRIGGER_OVERWRITE_SPLIT  (1U << __BTREE_TRIGGER_OVERWRITE_SPLIT)
+
+#define BTREE_TRIGGER_GC               (1U << __BTREE_TRIGGER_GC)
+#define BTREE_TRIGGER_BUCKET_INVALIDATE        (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
+#define BTREE_TRIGGER_NOATOMIC         (1U << __BTREE_TRIGGER_NOATOMIC)
+
+static inline bool btree_node_type_needs_gc(enum btree_node_type type)
+{
+       return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type);
+}
+
+struct btree_root {
+       struct btree            *b;
+
+       /* On disk root - see async splits: */
+       __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
+       u8                      level;
+       u8                      alive;
+       s8                      error;
+};
+
+/*
+ * Optional hook that will be called just prior to a btree node update, when
+ * we're holding the write lock and we know what key is about to be overwritten:
+ */
+
+enum btree_insert_ret {
+       BTREE_INSERT_OK,
+       /* leaf node needs to be split */
+       BTREE_INSERT_BTREE_NODE_FULL,
+       BTREE_INSERT_ENOSPC,
+       BTREE_INSERT_NEED_MARK_REPLICAS,
+       BTREE_INSERT_NEED_JOURNAL_RES,
+};
+
+enum btree_gc_coalesce_fail_reason {
+       BTREE_GC_COALESCE_FAIL_RESERVE_GET,
+       BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,
+       BTREE_GC_COALESCE_FAIL_FORMAT_FITS,
+};
+
+enum btree_node_sibling {
+       btree_prev_sib,
+       btree_next_sib,
+};
+
+typedef struct btree_nr_keys (*sort_fix_overlapping_fn)(struct bset *,
+                                                       struct btree *,
+                                                       struct btree_node_iter *);
+
+#endif /* _BCACHEFS_BTREE_TYPES_H */
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
new file mode 100644 (file)
index 0000000..e0b1bde
--- /dev/null
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_UPDATE_H
+#define _BCACHEFS_BTREE_UPDATE_H
+
+#include "btree_iter.h"
+#include "journal.h"
+
+struct bch_fs;
+struct btree;
+
+void bch2_btree_node_lock_for_insert(struct bch_fs *, struct btree *,
+                                    struct btree_iter *);
+bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *,
+                               struct btree_node_iter *, struct bkey_i *);
+void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
+
+enum btree_insert_flags {
+       __BTREE_INSERT_NOUNLOCK,
+       __BTREE_INSERT_NOFAIL,
+       __BTREE_INSERT_NOCHECK_RW,
+       __BTREE_INSERT_LAZY_RW,
+       __BTREE_INSERT_USE_RESERVE,
+       __BTREE_INSERT_USE_ALLOC_RESERVE,
+       __BTREE_INSERT_JOURNAL_REPLAY,
+       __BTREE_INSERT_JOURNAL_RESERVED,
+       __BTREE_INSERT_JOURNAL_RECLAIM,
+       __BTREE_INSERT_NOWAIT,
+       __BTREE_INSERT_GC_LOCK_HELD,
+       __BCH_HASH_SET_MUST_CREATE,
+       __BCH_HASH_SET_MUST_REPLACE,
+};
+
+/*
+ * Don't drop locks _after_ successfully updating btree:
+ */
+#define BTREE_INSERT_NOUNLOCK          (1 << __BTREE_INSERT_NOUNLOCK)
+
+/* Don't check for -ENOSPC: */
+#define BTREE_INSERT_NOFAIL            (1 << __BTREE_INSERT_NOFAIL)
+
+#define BTREE_INSERT_NOCHECK_RW                (1 << __BTREE_INSERT_NOCHECK_RW)
+#define BTREE_INSERT_LAZY_RW           (1 << __BTREE_INSERT_LAZY_RW)
+
+/* for copygc, or when merging btree nodes */
+#define BTREE_INSERT_USE_RESERVE       (1 << __BTREE_INSERT_USE_RESERVE)
+#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
+
+/* Insert is for journal replay - don't get journal reservations: */
+#define BTREE_INSERT_JOURNAL_REPLAY    (1 << __BTREE_INSERT_JOURNAL_REPLAY)
+
+/* Indicates that we have pre-reserved space in the journal: */
+#define BTREE_INSERT_JOURNAL_RESERVED  (1 << __BTREE_INSERT_JOURNAL_RESERVED)
+
+/* Insert is being called from journal reclaim path: */
+#define BTREE_INSERT_JOURNAL_RECLAIM (1 << __BTREE_INSERT_JOURNAL_RECLAIM)
+
+/* Don't block on allocation failure (for new btree nodes: */
+#define BTREE_INSERT_NOWAIT            (1 << __BTREE_INSERT_NOWAIT)
+#define BTREE_INSERT_GC_LOCK_HELD      (1 << __BTREE_INSERT_GC_LOCK_HELD)
+
+#define BCH_HASH_SET_MUST_CREATE       (1 << __BCH_HASH_SET_MUST_CREATE)
+#define BCH_HASH_SET_MUST_REPLACE      (1 << __BCH_HASH_SET_MUST_REPLACE)
+
+int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
+
+int __bch2_btree_insert(struct btree_trans *, enum btree_id, struct bkey_i *);
+int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
+                    struct disk_reservation *, u64 *, int flags);
+
+int bch2_btree_delete_at_range(struct btree_trans *, struct btree_iter *,
+                              struct bpos, u64 *);
+int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
+                           struct bpos, struct bpos, u64 *);
+
+int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
+                           __le64, unsigned);
+int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
+                              struct btree *, struct bkey_i *);
+
+int bch2_trans_update(struct btree_trans *, struct btree_iter *,
+                     struct bkey_i *, enum btree_trigger_flags);
+int __bch2_trans_commit(struct btree_trans *);
+
+/**
+ * bch2_trans_commit - insert keys at given iterator positions
+ *
+ * This is main entry point for btree updates.
+ *
+ * Return values:
+ * -EINTR: locking changed, this function should be called again.
+ * -EROFS: filesystem read only
+ * -EIO: journal or btree node IO error
+ */
+static inline int bch2_trans_commit(struct btree_trans *trans,
+                                   struct disk_reservation *disk_res,
+                                   u64 *journal_seq,
+                                   unsigned flags)
+{
+       trans->disk_res         = disk_res;
+       trans->journal_seq      = journal_seq;
+       trans->flags            = flags;
+
+       return __bch2_trans_commit(trans);
+}
+
+#define __bch2_trans_do(_trans, _disk_res, _journal_seq, _flags, _do)  \
+({                                                                     \
+       int _ret;                                                       \
+                                                                       \
+       while (1) {                                                     \
+               _ret = (_do) ?: bch2_trans_commit(_trans, (_disk_res),  \
+                                       (_journal_seq), (_flags));      \
+               if (_ret != -EINTR)                                     \
+                       break;                                          \
+               bch2_trans_reset(_trans, 0);                            \
+       }                                                               \
+                                                                       \
+       _ret;                                                           \
+})
+
+#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do)                \
+({                                                                     \
+       struct btree_trans trans;                                       \
+       int _ret, _ret2;                                                \
+                                                                       \
+       bch2_trans_init(&trans, (_c), 0, 0);                            \
+       _ret = __bch2_trans_do(&trans, _disk_res, _journal_seq, _flags, \
+                              _do);                                    \
+       _ret2 = bch2_trans_exit(&trans);                                \
+                                                                       \
+       _ret ?: _ret2;                                                  \
+})
+
+#define trans_for_each_update(_trans, _i)                              \
+       for ((_i) = (_trans)->updates;                                  \
+            (_i) < (_trans)->updates + (_trans)->nr_updates;           \
+            (_i)++)
+
+#define trans_for_each_update2(_trans, _i)                             \
+       for ((_i) = (_trans)->updates2;                                 \
+            (_i) < (_trans)->updates2 + (_trans)->nr_updates2;         \
+            (_i)++)
+
+#endif /* _BCACHEFS_BTREE_UPDATE_H */
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
new file mode 100644 (file)
index 0000000..a2604b0
--- /dev/null
@@ -0,0 +1,2075 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_methods.h"
+#include "btree_cache.h"
+#include "btree_gc.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "btree_locking.h"
+#include "buckets.h"
+#include "extents.h"
+#include "journal.h"
+#include "journal_reclaim.h"
+#include "keylist.h"
+#include "replicas.h"
+#include "super-io.h"
+
+#include <linux/random.h>
+#include <trace/events/bcachefs.h>
+
+/* Debug code: */
+
+/*
+ * Verify that child nodes correctly span parent node's range:
+ */
+static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct bpos next_node = b->data->min_key;
+       struct btree_node_iter iter;
+       struct bkey_s_c k;
+       struct bkey_s_c_btree_ptr_v2 bp;
+       struct bkey unpacked;
+
+       BUG_ON(!b->c.level);
+
+       if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
+               return;
+
+       bch2_btree_node_iter_init_from_start(&iter, b);
+
+       while (1) {
+               k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked);
+               if (k.k->type != KEY_TYPE_btree_ptr_v2)
+                       break;
+               bp = bkey_s_c_to_btree_ptr_v2(k);
+
+               BUG_ON(bkey_cmp(next_node, bp.v->min_key));
+
+               bch2_btree_node_iter_advance(&iter, b);
+
+               if (bch2_btree_node_iter_end(&iter)) {
+                       BUG_ON(bkey_cmp(k.k->p, b->key.k.p));
+                       break;
+               }
+
+               next_node = bkey_successor(k.k->p);
+       }
+#endif
+}
+
+/* Calculate ideal packed bkey format for new btree nodes: */
+
+void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
+{
+       struct bkey_packed *k;
+       struct bset_tree *t;
+       struct bkey uk;
+
+       bch2_bkey_format_add_pos(s, b->data->min_key);
+
+       for_each_bset(b, t)
+               bset_tree_for_each_key(b, t, k)
+                       if (!bkey_whiteout(k)) {
+                               uk = bkey_unpack_key(b, k);
+                               bch2_bkey_format_add_key(s, &uk);
+                       }
+}
+
+static struct bkey_format bch2_btree_calc_format(struct btree *b)
+{
+       struct bkey_format_state s;
+
+       bch2_bkey_format_init(&s);
+       __bch2_btree_calc_format(&s, b);
+
+       return bch2_bkey_format_done(&s);
+}
+
+static size_t btree_node_u64s_with_format(struct btree *b,
+                                         struct bkey_format *new_f)
+{
+       struct bkey_format *old_f = &b->format;
+
+       /* stupid integer promotion rules */
+       ssize_t delta =
+           (((int) new_f->key_u64s - old_f->key_u64s) *
+            (int) b->nr.packed_keys) +
+           (((int) new_f->key_u64s - BKEY_U64s) *
+            (int) b->nr.unpacked_keys);
+
+       BUG_ON(delta + b->nr.live_u64s < 0);
+
+       return b->nr.live_u64s + delta;
+}
+
+/**
+ * btree_node_format_fits - check if we could rewrite node with a new format
+ *
+ * This assumes all keys can pack with the new format -- it just checks if
+ * the re-packed keys would fit inside the node itself.
+ */
+bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
+                                struct bkey_format *new_f)
+{
+       size_t u64s = btree_node_u64s_with_format(b, new_f);
+
+       return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c);
+}
+
+/* Btree node freeing/allocation: */
+
+static void __btree_node_free(struct bch_fs *c, struct btree *b)
+{
+       trace_btree_node_free(c, b);
+
+       BUG_ON(btree_node_dirty(b));
+       BUG_ON(btree_node_need_write(b));
+       BUG_ON(b == btree_node_root(c, b));
+       BUG_ON(b->ob.nr);
+       BUG_ON(!list_empty(&b->write_blocked));
+       BUG_ON(b->will_make_reachable);
+
+       clear_btree_node_noevict(b);
+
+       bch2_btree_node_hash_remove(&c->btree_cache, b);
+
+       mutex_lock(&c->btree_cache.lock);
+       list_move(&b->list, &c->btree_cache.freeable);
+       mutex_unlock(&c->btree_cache.lock);
+}
+
+void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
+{
+       struct open_buckets ob = b->ob;
+
+       b->ob.nr = 0;
+
+       clear_btree_node_dirty(b);
+
+       btree_node_lock_type(c, b, SIX_LOCK_write);
+       __btree_node_free(c, b);
+       six_unlock_write(&b->c.lock);
+
+       bch2_open_buckets_put(c, &ob);
+}
+
+void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
+                               struct btree_iter *iter)
+{
+       struct btree_iter *linked;
+
+       trans_for_each_iter(iter->trans, linked)
+               BUG_ON(linked->l[b->c.level].b == b);
+
+       six_lock_write(&b->c.lock, NULL, NULL);
+       __btree_node_free(c, b);
+       six_unlock_write(&b->c.lock);
+       six_unlock_intent(&b->c.lock);
+}
+
+static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
+                                            struct disk_reservation *res,
+                                            struct closure *cl,
+                                            unsigned flags)
+{
+       struct write_point *wp;
+       struct btree *b;
+       BKEY_PADDED(k) tmp;
+       struct open_buckets ob = { .nr = 0 };
+       struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
+       unsigned nr_reserve;
+       enum alloc_reserve alloc_reserve;
+
+       if (flags & BTREE_INSERT_USE_ALLOC_RESERVE) {
+               nr_reserve      = 0;
+               alloc_reserve   = RESERVE_ALLOC;
+       } else if (flags & BTREE_INSERT_USE_RESERVE) {
+               nr_reserve      = BTREE_NODE_RESERVE / 2;
+               alloc_reserve   = RESERVE_BTREE;
+       } else {
+               nr_reserve      = BTREE_NODE_RESERVE;
+               alloc_reserve   = RESERVE_NONE;
+       }
+
+       mutex_lock(&c->btree_reserve_cache_lock);
+       if (c->btree_reserve_cache_nr > nr_reserve) {
+               struct btree_alloc *a =
+                       &c->btree_reserve_cache[--c->btree_reserve_cache_nr];
+
+               ob = a->ob;
+               bkey_copy(&tmp.k, &a->k);
+               mutex_unlock(&c->btree_reserve_cache_lock);
+               goto mem_alloc;
+       }
+       mutex_unlock(&c->btree_reserve_cache_lock);
+
+retry:
+       wp = bch2_alloc_sectors_start(c, c->opts.foreground_target, 0,
+                                     writepoint_ptr(&c->btree_write_point),
+                                     &devs_have,
+                                     res->nr_replicas,
+                                     c->opts.metadata_replicas_required,
+                                     alloc_reserve, 0, cl);
+       if (IS_ERR(wp))
+               return ERR_CAST(wp);
+
+       if (wp->sectors_free < c->opts.btree_node_size) {
+               struct open_bucket *ob;
+               unsigned i;
+
+               open_bucket_for_each(c, &wp->ptrs, ob, i)
+                       if (ob->sectors_free < c->opts.btree_node_size)
+                               ob->sectors_free = 0;
+
+               bch2_alloc_sectors_done(c, wp);
+               goto retry;
+       }
+
+       if (c->sb.features & (1ULL << BCH_FEATURE_btree_ptr_v2))
+               bkey_btree_ptr_v2_init(&tmp.k);
+       else
+               bkey_btree_ptr_init(&tmp.k);
+
+       bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size);
+
+       bch2_open_bucket_get(c, wp, &ob);
+       bch2_alloc_sectors_done(c, wp);
+mem_alloc:
+       b = bch2_btree_node_mem_alloc(c);
+
+       /* we hold cannibalize_lock: */
+       BUG_ON(IS_ERR(b));
+       BUG_ON(b->ob.nr);
+
+       bkey_copy(&b->key, &tmp.k);
+       b->ob = ob;
+
+       return b;
+}
+
+static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned level)
+{
+       struct bch_fs *c = as->c;
+       struct btree *b;
+       int ret;
+
+       BUG_ON(level >= BTREE_MAX_DEPTH);
+       BUG_ON(!as->nr_prealloc_nodes);
+
+       b = as->prealloc_nodes[--as->nr_prealloc_nodes];
+
+       set_btree_node_accessed(b);
+       set_btree_node_dirty(b);
+       set_btree_node_need_write(b);
+
+       bch2_bset_init_first(b, &b->data->keys);
+       b->c.level      = level;
+       b->c.btree_id   = as->btree_id;
+
+       memset(&b->nr, 0, sizeof(b->nr));
+       b->data->magic = cpu_to_le64(bset_magic(c));
+       b->data->flags = 0;
+       SET_BTREE_NODE_ID(b->data, as->btree_id);
+       SET_BTREE_NODE_LEVEL(b->data, level);
+       b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr;
+
+       if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+               struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
+
+               bp->v.mem_ptr           = 0;
+               bp->v.seq               = b->data->keys.seq;
+               bp->v.sectors_written   = 0;
+               bp->v.sectors           = cpu_to_le16(c->opts.btree_node_size);
+       }
+
+       if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))
+               SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true);
+
+       if (btree_node_is_extents(b) &&
+           !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data)) {
+               set_btree_node_old_extent_overwrite(b);
+               set_btree_node_need_rewrite(b);
+       }
+
+       bch2_btree_build_aux_trees(b);
+
+       ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id);
+       BUG_ON(ret);
+
+       trace_btree_node_alloc(c, b);
+       return b;
+}
+
+static void btree_set_min(struct btree *b, struct bpos pos)
+{
+       if (b->key.k.type == KEY_TYPE_btree_ptr_v2)
+               bkey_i_to_btree_ptr_v2(&b->key)->v.min_key = pos;
+       b->data->min_key = pos;
+}
+
+static void btree_set_max(struct btree *b, struct bpos pos)
+{
+       b->key.k.p = pos;
+       b->data->max_key = pos;
+}
+
+struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
+                                                 struct btree *b,
+                                                 struct bkey_format format)
+{
+       struct btree *n;
+
+       n = bch2_btree_node_alloc(as, b->c.level);
+
+       SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
+
+       btree_set_min(n, b->data->min_key);
+       btree_set_max(n, b->data->max_key);
+
+       n->data->format         = format;
+       btree_node_set_format(n, format);
+
+       bch2_btree_sort_into(as->c, n, b);
+
+       btree_node_reset_sib_u64s(n);
+
+       n->key.k.p = b->key.k.p;
+       return n;
+}
+
+static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
+                                                      struct btree *b)
+{
+       struct bkey_format new_f = bch2_btree_calc_format(b);
+
+       /*
+        * The keys might expand with the new format - if they wouldn't fit in
+        * the btree node anymore, use the old format for now:
+        */
+       if (!bch2_btree_node_format_fits(as->c, b, &new_f))
+               new_f = b->format;
+
+       return __bch2_btree_node_alloc_replacement(as, b, new_f);
+}
+
+static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
+{
+       struct btree *b = bch2_btree_node_alloc(as, level);
+
+       btree_set_min(b, POS_MIN);
+       btree_set_max(b, POS_MAX);
+       b->data->format = bch2_btree_calc_format(b);
+
+       btree_node_set_format(b, b->data->format);
+       bch2_btree_build_aux_trees(b);
+
+       bch2_btree_update_add_new_node(as, b);
+       six_unlock_write(&b->c.lock);
+
+       return b;
+}
+
+static void bch2_btree_reserve_put(struct btree_update *as)
+{
+       struct bch_fs *c = as->c;
+
+       mutex_lock(&c->btree_reserve_cache_lock);
+
+       while (as->nr_prealloc_nodes) {
+               struct btree *b = as->prealloc_nodes[--as->nr_prealloc_nodes];
+
+               six_unlock_write(&b->c.lock);
+
+               if (c->btree_reserve_cache_nr <
+                   ARRAY_SIZE(c->btree_reserve_cache)) {
+                       struct btree_alloc *a =
+                               &c->btree_reserve_cache[c->btree_reserve_cache_nr++];
+
+                       a->ob = b->ob;
+                       b->ob.nr = 0;
+                       bkey_copy(&a->k, &b->key);
+               } else {
+                       bch2_open_buckets_put(c, &b->ob);
+               }
+
+               btree_node_lock_type(c, b, SIX_LOCK_write);
+               __btree_node_free(c, b);
+               six_unlock_write(&b->c.lock);
+
+               six_unlock_intent(&b->c.lock);
+       }
+
+       mutex_unlock(&c->btree_reserve_cache_lock);
+}
+
+static int bch2_btree_reserve_get(struct btree_update *as, unsigned nr_nodes,
+                                 unsigned flags, struct closure *cl)
+{
+       struct bch_fs *c = as->c;
+       struct btree *b;
+       int ret;
+
+       BUG_ON(nr_nodes > BTREE_RESERVE_MAX);
+
+       /*
+        * Protects reaping from the btree node cache and using the btree node
+        * open bucket reserve:
+        */
+       ret = bch2_btree_cache_cannibalize_lock(c, cl);
+       if (ret)
+               return ret;
+
+       while (as->nr_prealloc_nodes < nr_nodes) {
+               b = __bch2_btree_node_alloc(c, &as->disk_res,
+                                           flags & BTREE_INSERT_NOWAIT
+                                           ? NULL : cl, flags);
+               if (IS_ERR(b)) {
+                       ret = PTR_ERR(b);
+                       goto err_free;
+               }
+
+               ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
+               if (ret)
+                       goto err_free;
+
+               as->prealloc_nodes[as->nr_prealloc_nodes++] = b;
+       }
+
+       bch2_btree_cache_cannibalize_unlock(c);
+       return 0;
+err_free:
+       bch2_btree_cache_cannibalize_unlock(c);
+       trace_btree_reserve_get_fail(c, nr_nodes, cl);
+       return ret;
+}
+
+/* Asynchronous interior node update machinery */
+
+static void bch2_btree_update_free(struct btree_update *as)
+{
+       struct bch_fs *c = as->c;
+
+       bch2_journal_preres_put(&c->journal, &as->journal_preres);
+
+       bch2_journal_pin_drop(&c->journal, &as->journal);
+       bch2_journal_pin_flush(&c->journal, &as->journal);
+       bch2_disk_reservation_put(c, &as->disk_res);
+       bch2_btree_reserve_put(as);
+
+       mutex_lock(&c->btree_interior_update_lock);
+       list_del(&as->unwritten_list);
+       list_del(&as->list);
+       mutex_unlock(&c->btree_interior_update_lock);
+
+       closure_debug_destroy(&as->cl);
+       mempool_free(as, &c->btree_interior_update_pool);
+
+       closure_wake_up(&c->btree_interior_update_wait);
+}
+
+static void btree_update_will_delete_key(struct btree_update *as,
+                                        struct bkey_i *k)
+{
+       BUG_ON(bch2_keylist_u64s(&as->old_keys) + k->k.u64s >
+              ARRAY_SIZE(as->_old_keys));
+       bch2_keylist_add(&as->old_keys, k);
+}
+
+static void btree_update_will_add_key(struct btree_update *as,
+                                     struct bkey_i *k)
+{
+       BUG_ON(bch2_keylist_u64s(&as->new_keys) + k->k.u64s >
+              ARRAY_SIZE(as->_new_keys));
+       bch2_keylist_add(&as->new_keys, k);
+}
+
+/*
+ * The transactional part of an interior btree node update, where we journal the
+ * update we did to the interior node and update alloc info:
+ */
+static int btree_update_nodes_written_trans(struct btree_trans *trans,
+                                           struct btree_update *as)
+{
+       struct bkey_i *k;
+       int ret;
+
+       trans->extra_journal_entries = (void *) &as->journal_entries[0];
+       trans->extra_journal_entry_u64s = as->journal_u64s;
+       trans->journal_pin = &as->journal;
+
+       for_each_keylist_key(&as->new_keys, k) {
+               ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
+                                         0, 0, BTREE_TRIGGER_INSERT);
+               if (ret)
+                       return ret;
+       }
+
+       for_each_keylist_key(&as->old_keys, k) {
+               ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
+                                         0, 0, BTREE_TRIGGER_OVERWRITE);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static void btree_update_nodes_written(struct btree_update *as)
+{
+       struct bch_fs *c = as->c;
+       struct btree *b = as->b;
+       u64 journal_seq = 0;
+       unsigned i;
+       int ret;
+
+       /*
+        * We did an update to a parent node where the pointers we added pointed
+        * to child nodes that weren't written yet: now, the child nodes have
+        * been written so we can write out the update to the interior node.
+        */
+
+       /*
+        * We can't call into journal reclaim here: we'd block on the journal
+        * reclaim lock, but we may need to release the open buckets we have
+        * pinned in order for other btree updates to make forward progress, and
+        * journal reclaim does btree updates when flushing bkey_cached entries,
+        * which may require allocations as well.
+        */
+       ret = bch2_trans_do(c, &as->disk_res, &journal_seq,
+                           BTREE_INSERT_NOFAIL|
+                           BTREE_INSERT_USE_RESERVE|
+                           BTREE_INSERT_USE_ALLOC_RESERVE|
+                           BTREE_INSERT_NOCHECK_RW|
+                           BTREE_INSERT_JOURNAL_RECLAIM|
+                           BTREE_INSERT_JOURNAL_RESERVED,
+                           btree_update_nodes_written_trans(&trans, as));
+       BUG_ON(ret && !bch2_journal_error(&c->journal));
+
+       if (b) {
+               /*
+                * @b is the node we did the final insert into:
+                *
+                * On failure to get a journal reservation, we still have to
+                * unblock the write and allow most of the write path to happen
+                * so that shutdown works, but the i->journal_seq mechanism
+                * won't work to prevent the btree write from being visible (we
+                * didn't get a journal sequence number) - instead
+                * __bch2_btree_node_write() doesn't do the actual write if
+                * we're in journal error state:
+                */
+
+               btree_node_lock_type(c, b, SIX_LOCK_intent);
+               btree_node_lock_type(c, b, SIX_LOCK_write);
+               mutex_lock(&c->btree_interior_update_lock);
+
+               list_del(&as->write_blocked_list);
+
+               if (!ret && as->b == b) {
+                       struct bset *i = btree_bset_last(b);
+
+                       BUG_ON(!b->c.level);
+                       BUG_ON(!btree_node_dirty(b));
+
+                       i->journal_seq = cpu_to_le64(
+                               max(journal_seq,
+                                   le64_to_cpu(i->journal_seq)));
+
+                       bch2_btree_add_journal_pin(c, b, journal_seq);
+               }
+
+               mutex_unlock(&c->btree_interior_update_lock);
+               six_unlock_write(&b->c.lock);
+
+               btree_node_write_if_need(c, b, SIX_LOCK_intent);
+               six_unlock_intent(&b->c.lock);
+       }
+
+       bch2_journal_pin_drop(&c->journal, &as->journal);
+
+       bch2_journal_preres_put(&c->journal, &as->journal_preres);
+
+       mutex_lock(&c->btree_interior_update_lock);
+       for (i = 0; i < as->nr_new_nodes; i++) {
+               b = as->new_nodes[i];
+
+               BUG_ON(b->will_make_reachable != (unsigned long) as);
+               b->will_make_reachable = 0;
+       }
+       mutex_unlock(&c->btree_interior_update_lock);
+
+       for (i = 0; i < as->nr_new_nodes; i++) {
+               b = as->new_nodes[i];
+
+               btree_node_lock_type(c, b, SIX_LOCK_read);
+               btree_node_write_if_need(c, b, SIX_LOCK_read);
+               six_unlock_read(&b->c.lock);
+       }
+
+       for (i = 0; i < as->nr_open_buckets; i++)
+               bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
+
+       bch2_btree_update_free(as);
+}
+
+static void btree_interior_update_work(struct work_struct *work)
+{
+       struct bch_fs *c =
+               container_of(work, struct bch_fs, btree_interior_update_work);
+       struct btree_update *as;
+
+       while (1) {
+               mutex_lock(&c->btree_interior_update_lock);
+               as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
+                                             struct btree_update, unwritten_list);
+               if (as && !as->nodes_written)
+                       as = NULL;
+               mutex_unlock(&c->btree_interior_update_lock);
+
+               if (!as)
+                       break;
+
+               btree_update_nodes_written(as);
+       }
+}
+
+static void btree_update_set_nodes_written(struct closure *cl)
+{
+       struct btree_update *as = container_of(cl, struct btree_update, cl);
+       struct bch_fs *c = as->c;
+
+       mutex_lock(&c->btree_interior_update_lock);
+       as->nodes_written = true;
+       mutex_unlock(&c->btree_interior_update_lock);
+
+       queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
+}
+
+/*
+ * We're updating @b with pointers to nodes that haven't finished writing yet:
+ * block @b from being written until @as completes
+ */
+static void btree_update_updated_node(struct btree_update *as, struct btree *b)
+{
+       struct bch_fs *c = as->c;
+
+       mutex_lock(&c->btree_interior_update_lock);
+       list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
+
+       BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
+       BUG_ON(!btree_node_dirty(b));
+
+       as->mode        = BTREE_INTERIOR_UPDATING_NODE;
+       as->b           = b;
+       list_add(&as->write_blocked_list, &b->write_blocked);
+
+       mutex_unlock(&c->btree_interior_update_lock);
+}
+
+static void btree_update_reparent(struct btree_update *as,
+                                 struct btree_update *child)
+{
+       struct bch_fs *c = as->c;
+
+       lockdep_assert_held(&c->btree_interior_update_lock);
+
+       child->b = NULL;
+       child->mode = BTREE_INTERIOR_UPDATING_AS;
+
+       /*
+        * When we write a new btree root, we have to drop our journal pin
+        * _before_ the new nodes are technically reachable; see
+        * btree_update_nodes_written().
+        *
+        * This goes for journal pins that are recursively blocked on us - so,
+        * just transfer the journal pin to the new interior update so
+        * btree_update_nodes_written() can drop it.
+        */
+       bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, NULL);
+       bch2_journal_pin_drop(&c->journal, &child->journal);
+}
+
+static void btree_update_updated_root(struct btree_update *as, struct btree *b)
+{
+       struct bkey_i *insert = &b->key;
+       struct bch_fs *c = as->c;
+
+       BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
+
+       BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
+              ARRAY_SIZE(as->journal_entries));
+
+       as->journal_u64s +=
+               journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
+                                 BCH_JSET_ENTRY_btree_root,
+                                 b->c.btree_id, b->c.level,
+                                 insert, insert->k.u64s);
+
+       mutex_lock(&c->btree_interior_update_lock);
+       list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
+
+       as->mode        = BTREE_INTERIOR_UPDATING_ROOT;
+       mutex_unlock(&c->btree_interior_update_lock);
+}
+
+/*
+ * bch2_btree_update_add_new_node:
+ *
+ * This causes @as to wait on @b to be written, before it gets to
+ * bch2_btree_update_nodes_written
+ *
+ * Additionally, it sets b->will_make_reachable to prevent any additional writes
+ * to @b from happening besides the first until @b is reachable on disk
+ *
+ * And it adds @b to the list of @as's new nodes, so that we can update sector
+ * counts in bch2_btree_update_nodes_written:
+ */
+void bch2_btree_update_add_new_node(struct btree_update *as, struct btree *b)
+{
+       struct bch_fs *c = as->c;
+
+       closure_get(&as->cl);
+
+       mutex_lock(&c->btree_interior_update_lock);
+       BUG_ON(as->nr_new_nodes >= ARRAY_SIZE(as->new_nodes));
+       BUG_ON(b->will_make_reachable);
+
+       as->new_nodes[as->nr_new_nodes++] = b;
+       b->will_make_reachable = 1UL|(unsigned long) as;
+
+       mutex_unlock(&c->btree_interior_update_lock);
+
+       btree_update_will_add_key(as, &b->key);
+}
+
+/*
+ * returns true if @b was a new node
+ */
+static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b)
+{
+       struct btree_update *as;
+       unsigned long v;
+       unsigned i;
+
+       mutex_lock(&c->btree_interior_update_lock);
+       /*
+        * When b->will_make_reachable != 0, it owns a ref on as->cl that's
+        * dropped when it gets written by bch2_btree_complete_write - the
+        * xchg() is for synchronization with bch2_btree_complete_write:
+        */
+       v = xchg(&b->will_make_reachable, 0);
+       as = (struct btree_update *) (v & ~1UL);
+
+       if (!as) {
+               mutex_unlock(&c->btree_interior_update_lock);
+               return;
+       }
+
+       for (i = 0; i < as->nr_new_nodes; i++)
+               if (as->new_nodes[i] == b)
+                       goto found;
+
+       BUG();
+found:
+       array_remove_item(as->new_nodes, as->nr_new_nodes, i);
+       mutex_unlock(&c->btree_interior_update_lock);
+
+       if (v & 1)
+               closure_put(&as->cl);
+}
+
+void bch2_btree_update_get_open_buckets(struct btree_update *as, struct btree *b)
+{
+       while (b->ob.nr)
+               as->open_buckets[as->nr_open_buckets++] =
+                       b->ob.v[--b->ob.nr];
+}
+
+/*
+ * @b is being split/rewritten: it may have pointers to not-yet-written btree
+ * nodes and thus outstanding btree_updates - redirect @b's
+ * btree_updates to point to this btree_update:
+ */
+void bch2_btree_interior_update_will_free_node(struct btree_update *as,
+                                              struct btree *b)
+{
+       struct bch_fs *c = as->c;
+       struct btree_update *p, *n;
+       struct btree_write *w;
+
+       set_btree_node_dying(b);
+
+       if (btree_node_fake(b))
+               return;
+
+       mutex_lock(&c->btree_interior_update_lock);
+
+       /*
+        * Does this node have any btree_update operations preventing
+        * it from being written?
+        *
+        * If so, redirect them to point to this btree_update: we can
+        * write out our new nodes, but we won't make them visible until those
+        * operations complete
+        */
+       list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) {
+               list_del_init(&p->write_blocked_list);
+               btree_update_reparent(as, p);
+
+               /*
+                * for flush_held_btree_writes() waiting on updates to flush or
+                * nodes to be writeable:
+                */
+               closure_wake_up(&c->btree_interior_update_wait);
+       }
+
+       clear_btree_node_dirty(b);
+       clear_btree_node_need_write(b);
+
+       /*
+        * Does this node have unwritten data that has a pin on the journal?
+        *
+        * If so, transfer that pin to the btree_update operation -
+        * note that if we're freeing multiple nodes, we only need to keep the
+        * oldest pin of any of the nodes we're freeing. We'll release the pin
+        * when the new nodes are persistent and reachable on disk:
+        */
+       w = btree_current_write(b);
+       bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
+       bch2_journal_pin_drop(&c->journal, &w->journal);
+
+       w = btree_prev_write(b);
+       bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
+       bch2_journal_pin_drop(&c->journal, &w->journal);
+
+       mutex_unlock(&c->btree_interior_update_lock);
+
+       /*
+        * Is this a node that isn't reachable on disk yet?
+        *
+        * Nodes that aren't reachable yet have writes blocked until they're
+        * reachable - now that we've cancelled any pending writes and moved
+        * things waiting on that write to wait on this update, we can drop this
+        * node from the list of nodes that the other update is making
+        * reachable, prior to freeing it:
+        */
+       btree_update_drop_new_node(c, b);
+
+       btree_update_will_delete_key(as, &b->key);
+}
+
+void bch2_btree_update_done(struct btree_update *as)
+{
+       BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
+
+       bch2_btree_reserve_put(as);
+
+       continue_at(&as->cl, btree_update_set_nodes_written, system_freezable_wq);
+}
+
+struct btree_update *
+bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
+                       unsigned nr_nodes, unsigned flags,
+                       struct closure *cl)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_update *as;
+       int disk_res_flags = (flags & BTREE_INSERT_NOFAIL)
+               ? BCH_DISK_RESERVATION_NOFAIL : 0;
+       int journal_flags = (flags & BTREE_INSERT_JOURNAL_RESERVED)
+               ? JOURNAL_RES_GET_RECLAIM : 0;
+       int ret = 0;
+
+       /*
+        * This check isn't necessary for correctness - it's just to potentially
+        * prevent us from doing a lot of work that'll end up being wasted:
+        */
+       ret = bch2_journal_error(&c->journal);
+       if (ret)
+               return ERR_PTR(ret);
+
+       as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO);
+       memset(as, 0, sizeof(*as));
+       closure_init(&as->cl, NULL);
+       as->c           = c;
+       as->mode        = BTREE_INTERIOR_NO_UPDATE;
+       as->btree_id    = id;
+       INIT_LIST_HEAD(&as->list);
+       INIT_LIST_HEAD(&as->unwritten_list);
+       INIT_LIST_HEAD(&as->write_blocked_list);
+       bch2_keylist_init(&as->old_keys, as->_old_keys);
+       bch2_keylist_init(&as->new_keys, as->_new_keys);
+       bch2_keylist_init(&as->parent_keys, as->inline_keys);
+
+       ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
+                                     BTREE_UPDATE_JOURNAL_RES,
+                                     journal_flags|JOURNAL_RES_GET_NONBLOCK);
+       if (ret == -EAGAIN) {
+               if (flags & BTREE_INSERT_NOUNLOCK)
+                       return ERR_PTR(-EINTR);
+
+               bch2_trans_unlock(trans);
+
+               ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
+                               BTREE_UPDATE_JOURNAL_RES,
+                               journal_flags);
+               if (ret)
+                       return ERR_PTR(ret);
+
+               if (!bch2_trans_relock(trans)) {
+                       ret = -EINTR;
+                       goto err;
+               }
+       }
+
+       ret = bch2_disk_reservation_get(c, &as->disk_res,
+                       nr_nodes * c->opts.btree_node_size,
+                       c->opts.metadata_replicas,
+                       disk_res_flags);
+       if (ret)
+               goto err;
+
+       ret = bch2_btree_reserve_get(as, nr_nodes, flags, cl);
+       if (ret)
+               goto err;
+
+       mutex_lock(&c->btree_interior_update_lock);
+       list_add_tail(&as->list, &c->btree_interior_update_list);
+       mutex_unlock(&c->btree_interior_update_lock);
+
+       return as;
+err:
+       bch2_btree_update_free(as);
+       return ERR_PTR(ret);
+}
+
+/* Btree root updates: */
+
+static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
+{
+       /* Root nodes cannot be reaped */
+       mutex_lock(&c->btree_cache.lock);
+       list_del_init(&b->list);
+       mutex_unlock(&c->btree_cache.lock);
+
+       mutex_lock(&c->btree_root_lock);
+       BUG_ON(btree_node_root(c, b) &&
+              (b->c.level < btree_node_root(c, b)->c.level ||
+               !btree_node_dying(btree_node_root(c, b))));
+
+       btree_node_root(c, b) = b;
+       mutex_unlock(&c->btree_root_lock);
+
+       bch2_recalc_btree_reserve(c);
+}
+
+/**
+ * bch_btree_set_root - update the root in memory and on disk
+ *
+ * To ensure forward progress, the current task must not be holding any
+ * btree node write locks. However, you must hold an intent lock on the
+ * old root.
+ *
+ * Note: This allocates a journal entry but doesn't add any keys to
+ * it.  All the btree roots are part of every journal write, so there
+ * is nothing new to be done.  This just guarantees that there is a
+ * journal write.
+ */
+static void bch2_btree_set_root(struct btree_update *as, struct btree *b,
+                               struct btree_iter *iter)
+{
+       struct bch_fs *c = as->c;
+       struct btree *old;
+
+       trace_btree_set_root(c, b);
+       BUG_ON(!b->written &&
+              !test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags));
+
+       old = btree_node_root(c, b);
+
+       /*
+        * Ensure no one is using the old root while we switch to the
+        * new root:
+        */
+       bch2_btree_node_lock_write(old, iter);
+
+       bch2_btree_set_root_inmem(c, b);
+
+       btree_update_updated_root(as, b);
+
+       /*
+        * Unlock old root after new root is visible:
+        *
+        * The new root isn't persistent, but that's ok: we still have
+        * an intent lock on the new root, and any updates that would
+        * depend on the new root would have to update the new root.
+        */
+       bch2_btree_node_unlock_write(old, iter);
+}
+
+/* Interior node updates: */
+
+static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b,
+                                       struct btree_iter *iter,
+                                       struct bkey_i *insert,
+                                       struct btree_node_iter *node_iter)
+{
+       struct bkey_packed *k;
+
+       BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
+              ARRAY_SIZE(as->journal_entries));
+
+       as->journal_u64s +=
+               journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
+                                 BCH_JSET_ENTRY_btree_keys,
+                                 b->c.btree_id, b->c.level,
+                                 insert, insert->k.u64s);
+
+       while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
+              bkey_iter_pos_cmp(b, k, &insert->k.p) < 0)
+               bch2_btree_node_iter_advance(node_iter, b);
+
+       bch2_btree_bset_insert_key(iter, b, node_iter, insert);
+       set_btree_node_dirty(b);
+       set_btree_node_need_write(b);
+}
+
+/*
+ * Move keys from n1 (original replacement node, now lower node) to n2 (higher
+ * node)
+ */
+static struct btree *__btree_split_node(struct btree_update *as,
+                                       struct btree *n1,
+                                       struct btree_iter *iter)
+{
+       size_t nr_packed = 0, nr_unpacked = 0;
+       struct btree *n2;
+       struct bset *set1, *set2;
+       struct bkey_packed *k, *prev = NULL;
+
+       n2 = bch2_btree_node_alloc(as, n1->c.level);
+       bch2_btree_update_add_new_node(as, n2);
+
+       n2->data->max_key       = n1->data->max_key;
+       n2->data->format        = n1->format;
+       SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data));
+       n2->key.k.p = n1->key.k.p;
+
+       btree_node_set_format(n2, n2->data->format);
+
+       set1 = btree_bset_first(n1);
+       set2 = btree_bset_first(n2);
+
+       /*
+        * Has to be a linear search because we don't have an auxiliary
+        * search tree yet
+        */
+       k = set1->start;
+       while (1) {
+               struct bkey_packed *n = bkey_next_skip_noops(k, vstruct_last(set1));
+
+               if (n == vstruct_last(set1))
+                       break;
+               if (k->_data - set1->_data >= (le16_to_cpu(set1->u64s) * 3) / 5)
+                       break;
+
+               if (bkey_packed(k))
+                       nr_packed++;
+               else
+                       nr_unpacked++;
+
+               prev = k;
+               k = n;
+       }
+
+       BUG_ON(!prev);
+
+       btree_set_max(n1, bkey_unpack_pos(n1, prev));
+       btree_set_min(n2, bkey_successor(n1->key.k.p));
+
+       set2->u64s = cpu_to_le16((u64 *) vstruct_end(set1) - (u64 *) k);
+       set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s));
+
+       set_btree_bset_end(n1, n1->set);
+       set_btree_bset_end(n2, n2->set);
+
+       n2->nr.live_u64s        = le16_to_cpu(set2->u64s);
+       n2->nr.bset_u64s[0]     = le16_to_cpu(set2->u64s);
+       n2->nr.packed_keys      = n1->nr.packed_keys - nr_packed;
+       n2->nr.unpacked_keys    = n1->nr.unpacked_keys - nr_unpacked;
+
+       n1->nr.live_u64s        = le16_to_cpu(set1->u64s);
+       n1->nr.bset_u64s[0]     = le16_to_cpu(set1->u64s);
+       n1->nr.packed_keys      = nr_packed;
+       n1->nr.unpacked_keys    = nr_unpacked;
+
+       BUG_ON(!set1->u64s);
+       BUG_ON(!set2->u64s);
+
+       memcpy_u64s(set2->start,
+                   vstruct_end(set1),
+                   le16_to_cpu(set2->u64s));
+
+       btree_node_reset_sib_u64s(n1);
+       btree_node_reset_sib_u64s(n2);
+
+       bch2_verify_btree_nr_keys(n1);
+       bch2_verify_btree_nr_keys(n2);
+
+       if (n1->c.level) {
+               btree_node_interior_verify(as->c, n1);
+               btree_node_interior_verify(as->c, n2);
+       }
+
+       return n2;
+}
+
+/*
+ * For updates to interior nodes, we've got to do the insert before we split
+ * because the stuff we're inserting has to be inserted atomically. Post split,
+ * the keys might have to go in different nodes and the split would no longer be
+ * atomic.
+ *
+ * Worse, if the insert is from btree node coalescing, if we do the insert after
+ * we do the split (and pick the pivot) - the pivot we pick might be between
+ * nodes that were coalesced, and thus in the middle of a child node post
+ * coalescing:
+ */
+static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
+                                   struct btree_iter *iter,
+                                   struct keylist *keys)
+{
+       struct btree_node_iter node_iter;
+       struct bkey_i *k = bch2_keylist_front(keys);
+       struct bkey_packed *src, *dst, *n;
+       struct bset *i;
+
+       BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE);
+
+       bch2_btree_node_iter_init(&node_iter, b, &k->k.p);
+
+       while (!bch2_keylist_empty(keys)) {
+               k = bch2_keylist_front(keys);
+
+               bch2_insert_fixup_btree_ptr(as, b, iter, k, &node_iter);
+               bch2_keylist_pop_front(keys);
+       }
+
+       /*
+        * We can't tolerate whiteouts here - with whiteouts there can be
+        * duplicate keys, and it would be rather bad if we picked a duplicate
+        * for the pivot:
+        */
+       i = btree_bset_first(b);
+       src = dst = i->start;
+       while (src != vstruct_last(i)) {
+               n = bkey_next_skip_noops(src, vstruct_last(i));
+               if (!bkey_deleted(src)) {
+                       memmove_u64s_down(dst, src, src->u64s);
+                       dst = bkey_next(dst);
+               }
+               src = n;
+       }
+
+       i->u64s = cpu_to_le16((u64 *) dst - i->_data);
+       set_btree_bset_end(b, b->set);
+
+       BUG_ON(b->nsets != 1 ||
+              b->nr.live_u64s != le16_to_cpu(btree_bset_first(b)->u64s));
+
+       btree_node_interior_verify(as->c, b);
+}
+
+static void btree_split(struct btree_update *as, struct btree *b,
+                       struct btree_iter *iter, struct keylist *keys,
+                       unsigned flags)
+{
+       struct bch_fs *c = as->c;
+       struct btree *parent = btree_node_parent(iter, b);
+       struct btree *n1, *n2 = NULL, *n3 = NULL;
+       u64 start_time = local_clock();
+
+       BUG_ON(!parent && (b != btree_node_root(c, b)));
+       BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level));
+
+       bch2_btree_interior_update_will_free_node(as, b);
+
+       n1 = bch2_btree_node_alloc_replacement(as, b);
+       bch2_btree_update_add_new_node(as, n1);
+
+       if (keys)
+               btree_split_insert_keys(as, n1, iter, keys);
+
+       if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) {
+               trace_btree_split(c, b);
+
+               n2 = __btree_split_node(as, n1, iter);
+
+               bch2_btree_build_aux_trees(n2);
+               bch2_btree_build_aux_trees(n1);
+               six_unlock_write(&n2->c.lock);
+               six_unlock_write(&n1->c.lock);
+
+               bch2_btree_node_write(c, n2, SIX_LOCK_intent);
+
+               /*
+                * Note that on recursive parent_keys == keys, so we
+                * can't start adding new keys to parent_keys before emptying it
+                * out (which we did with btree_split_insert_keys() above)
+                */
+               bch2_keylist_add(&as->parent_keys, &n1->key);
+               bch2_keylist_add(&as->parent_keys, &n2->key);
+
+               if (!parent) {
+                       /* Depth increases, make a new root */
+                       n3 = __btree_root_alloc(as, b->c.level + 1);
+
+                       n3->sib_u64s[0] = U16_MAX;
+                       n3->sib_u64s[1] = U16_MAX;
+
+                       btree_split_insert_keys(as, n3, iter, &as->parent_keys);
+
+                       bch2_btree_node_write(c, n3, SIX_LOCK_intent);
+               }
+       } else {
+               trace_btree_compact(c, b);
+
+               bch2_btree_build_aux_trees(n1);
+               six_unlock_write(&n1->c.lock);
+
+               if (parent)
+                       bch2_keylist_add(&as->parent_keys, &n1->key);
+       }
+
+       bch2_btree_node_write(c, n1, SIX_LOCK_intent);
+
+       /* New nodes all written, now make them visible: */
+
+       if (parent) {
+               /* Split a non root node */
+               bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
+       } else if (n3) {
+               bch2_btree_set_root(as, n3, iter);
+       } else {
+               /* Root filled up but didn't need to be split */
+               bch2_btree_set_root(as, n1, iter);
+       }
+
+       bch2_btree_update_get_open_buckets(as, n1);
+       if (n2)
+               bch2_btree_update_get_open_buckets(as, n2);
+       if (n3)
+               bch2_btree_update_get_open_buckets(as, n3);
+
+       /* Successful split, update the iterator to point to the new nodes: */
+
+       six_lock_increment(&b->c.lock, SIX_LOCK_intent);
+       bch2_btree_iter_node_drop(iter, b);
+       if (n3)
+               bch2_btree_iter_node_replace(iter, n3);
+       if (n2)
+               bch2_btree_iter_node_replace(iter, n2);
+       bch2_btree_iter_node_replace(iter, n1);
+
+       /*
+        * The old node must be freed (in memory) _before_ unlocking the new
+        * nodes - else another thread could re-acquire a read lock on the old
+        * node after another thread has locked and updated the new node, thus
+        * seeing stale data:
+        */
+       bch2_btree_node_free_inmem(c, b, iter);
+
+       if (n3)
+               six_unlock_intent(&n3->c.lock);
+       if (n2)
+               six_unlock_intent(&n2->c.lock);
+       six_unlock_intent(&n1->c.lock);
+
+       bch2_btree_trans_verify_locks(iter->trans);
+
+       bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split],
+                              start_time);
+}
+
+static void
+bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
+                               struct btree_iter *iter, struct keylist *keys)
+{
+       struct btree_iter *linked;
+       struct btree_node_iter node_iter;
+       struct bkey_i *insert = bch2_keylist_front(keys);
+       struct bkey_packed *k;
+
+       /* Don't screw up @iter's position: */
+       node_iter = iter->l[b->c.level].iter;
+
+       /*
+        * btree_split(), btree_gc_coalesce() will insert keys before
+        * the iterator's current position - they know the keys go in
+        * the node the iterator points to:
+        */
+       while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) &&
+              (bkey_cmp_packed(b, k, &insert->k) >= 0))
+               ;
+
+       for_each_keylist_key(keys, insert)
+               bch2_insert_fixup_btree_ptr(as, b, iter, insert, &node_iter);
+
+       btree_update_updated_node(as, b);
+
+       trans_for_each_iter_with_node(iter->trans, b, linked)
+               bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
+
+       bch2_btree_trans_verify_iters(iter->trans, b);
+}
+
+/**
+ * bch_btree_insert_node - insert bkeys into a given btree node
+ *
+ * @iter:              btree iterator
+ * @keys:              list of keys to insert
+ * @hook:              insert callback
+ * @persistent:                if not null, @persistent will wait on journal write
+ *
+ * Inserts as many keys as it can into a given btree node, splitting it if full.
+ * If a split occurred, this function will return early. This can only happen
+ * for leaf nodes -- inserts into interior nodes have to be atomic.
+ */
+void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
+                           struct btree_iter *iter, struct keylist *keys,
+                           unsigned flags)
+{
+       struct bch_fs *c = as->c;
+       int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
+       int old_live_u64s = b->nr.live_u64s;
+       int live_u64s_added, u64s_added;
+
+       BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level));
+       BUG_ON(!b->c.level);
+       BUG_ON(!as || as->b);
+       bch2_verify_keylist_sorted(keys);
+
+       if (as->must_rewrite)
+               goto split;
+
+       bch2_btree_node_lock_for_insert(c, b, iter);
+
+       if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
+               bch2_btree_node_unlock_write(b, iter);
+               goto split;
+       }
+
+       bch2_btree_insert_keys_interior(as, b, iter, keys);
+
+       live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
+       u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
+
+       if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
+               b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
+       if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
+               b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
+
+       if (u64s_added > live_u64s_added &&
+           bch2_maybe_compact_whiteouts(c, b))
+               bch2_btree_iter_reinit_node(iter, b);
+
+       bch2_btree_node_unlock_write(b, iter);
+
+       btree_node_interior_verify(c, b);
+
+       /*
+        * when called from the btree_split path the new nodes aren't added to
+        * the btree iterator yet, so the merge path's unlock/wait/relock dance
+        * won't work:
+        */
+       bch2_foreground_maybe_merge(c, iter, b->c.level,
+                                   flags|BTREE_INSERT_NOUNLOCK);
+       return;
+split:
+       btree_split(as, b, iter, keys, flags);
+}
+
+int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
+                         unsigned flags)
+{
+       struct btree_trans *trans = iter->trans;
+       struct btree *b = iter_l(iter)->b;
+       struct btree_update *as;
+       struct closure cl;
+       int ret = 0;
+       struct btree_insert_entry *i;
+
+       /*
+        * We already have a disk reservation and open buckets pinned; this
+        * allocation must not block:
+        */
+       trans_for_each_update(trans, i)
+               if (btree_node_type_needs_gc(i->iter->btree_id))
+                       flags |= BTREE_INSERT_USE_RESERVE;
+
+       closure_init_stack(&cl);
+
+       /* Hack, because gc and splitting nodes doesn't mix yet: */
+       if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
+           !down_read_trylock(&c->gc_lock)) {
+               if (flags & BTREE_INSERT_NOUNLOCK) {
+                       trace_transaction_restart_ip(trans->ip, _THIS_IP_);
+                       return -EINTR;
+               }
+
+               bch2_trans_unlock(trans);
+               down_read(&c->gc_lock);
+
+               if (!bch2_trans_relock(trans))
+                       ret = -EINTR;
+       }
+
+       /*
+        * XXX: figure out how far we might need to split,
+        * instead of locking/reserving all the way to the root:
+        */
+       if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
+               trace_trans_restart_iter_upgrade(trans->ip);
+               ret = -EINTR;
+               goto out;
+       }
+
+       as = bch2_btree_update_start(trans, iter->btree_id,
+               btree_update_reserve_required(c, b), flags,
+               !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
+       if (IS_ERR(as)) {
+               ret = PTR_ERR(as);
+               if (ret == -EAGAIN) {
+                       BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
+                       bch2_trans_unlock(trans);
+                       ret = -EINTR;
+
+                       trace_transaction_restart_ip(trans->ip, _THIS_IP_);
+               }
+               goto out;
+       }
+
+       btree_split(as, b, iter, NULL, flags);
+       bch2_btree_update_done(as);
+
+       /*
+        * We haven't successfully inserted yet, so don't downgrade all the way
+        * back to read locks;
+        */
+       __bch2_btree_iter_downgrade(iter, 1);
+out:
+       if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
+               up_read(&c->gc_lock);
+       closure_sync(&cl);
+       return ret;
+}
+
+void __bch2_foreground_maybe_merge(struct bch_fs *c,
+                                  struct btree_iter *iter,
+                                  unsigned level,
+                                  unsigned flags,
+                                  enum btree_node_sibling sib)
+{
+       struct btree_trans *trans = iter->trans;
+       struct btree_update *as;
+       struct bkey_format_state new_s;
+       struct bkey_format new_f;
+       struct bkey_i delete;
+       struct btree *b, *m, *n, *prev, *next, *parent;
+       struct closure cl;
+       size_t sib_u64s;
+       int ret = 0;
+
+       BUG_ON(!btree_node_locked(iter, level));
+
+       closure_init_stack(&cl);
+retry:
+       BUG_ON(!btree_node_locked(iter, level));
+
+       b = iter->l[level].b;
+
+       parent = btree_node_parent(iter, b);
+       if (!parent)
+               goto out;
+
+       if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c))
+               goto out;
+
+       /* XXX: can't be holding read locks */
+       m = bch2_btree_node_get_sibling(c, iter, b, sib);
+       if (IS_ERR(m)) {
+               ret = PTR_ERR(m);
+               goto err;
+       }
+
+       /* NULL means no sibling: */
+       if (!m) {
+               b->sib_u64s[sib] = U16_MAX;
+               goto out;
+       }
+
+       if (sib == btree_prev_sib) {
+               prev = m;
+               next = b;
+       } else {
+               prev = b;
+               next = m;
+       }
+
+       bch2_bkey_format_init(&new_s);
+       __bch2_btree_calc_format(&new_s, b);
+       __bch2_btree_calc_format(&new_s, m);
+       new_f = bch2_bkey_format_done(&new_s);
+
+       sib_u64s = btree_node_u64s_with_format(b, &new_f) +
+               btree_node_u64s_with_format(m, &new_f);
+
+       if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) {
+               sib_u64s -= BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
+               sib_u64s /= 2;
+               sib_u64s += BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
+       }
+
+       sib_u64s = min(sib_u64s, btree_max_u64s(c));
+       b->sib_u64s[sib] = sib_u64s;
+
+       if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) {
+               six_unlock_intent(&m->c.lock);
+               goto out;
+       }
+
+       /* We're changing btree topology, doesn't mix with gc: */
+       if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
+           !down_read_trylock(&c->gc_lock))
+               goto err_cycle_gc_lock;
+
+       if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
+               ret = -EINTR;
+               goto err_unlock;
+       }
+
+       as = bch2_btree_update_start(trans, iter->btree_id,
+                        btree_update_reserve_required(c, parent) + 1,
+                        flags|
+                        BTREE_INSERT_NOFAIL|
+                        BTREE_INSERT_USE_RESERVE,
+                        !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
+       if (IS_ERR(as)) {
+               ret = PTR_ERR(as);
+               goto err_unlock;
+       }
+
+       trace_btree_merge(c, b);
+
+       bch2_btree_interior_update_will_free_node(as, b);
+       bch2_btree_interior_update_will_free_node(as, m);
+
+       n = bch2_btree_node_alloc(as, b->c.level);
+       bch2_btree_update_add_new_node(as, n);
+
+       btree_set_min(n, prev->data->min_key);
+       btree_set_max(n, next->data->max_key);
+       n->data->format         = new_f;
+
+       btree_node_set_format(n, new_f);
+
+       bch2_btree_sort_into(c, n, prev);
+       bch2_btree_sort_into(c, n, next);
+
+       bch2_btree_build_aux_trees(n);
+       six_unlock_write(&n->c.lock);
+
+       bkey_init(&delete.k);
+       delete.k.p = prev->key.k.p;
+       bch2_keylist_add(&as->parent_keys, &delete);
+       bch2_keylist_add(&as->parent_keys, &n->key);
+
+       bch2_btree_node_write(c, n, SIX_LOCK_intent);
+
+       bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
+
+       bch2_btree_update_get_open_buckets(as, n);
+
+       six_lock_increment(&b->c.lock, SIX_LOCK_intent);
+       bch2_btree_iter_node_drop(iter, b);
+       bch2_btree_iter_node_drop(iter, m);
+
+       bch2_btree_iter_node_replace(iter, n);
+
+       bch2_btree_trans_verify_iters(trans, n);
+
+       bch2_btree_node_free_inmem(c, b, iter);
+       bch2_btree_node_free_inmem(c, m, iter);
+
+       six_unlock_intent(&n->c.lock);
+
+       bch2_btree_update_done(as);
+
+       if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
+               up_read(&c->gc_lock);
+out:
+       bch2_btree_trans_verify_locks(trans);
+
+       /*
+        * Don't downgrade locks here: we're called after successful insert,
+        * and the caller will downgrade locks after a successful insert
+        * anyways (in case e.g. a split was required first)
+        *
+        * And we're also called when inserting into interior nodes in the
+        * split path, and downgrading to read locks in there is potentially
+        * confusing:
+        */
+       closure_sync(&cl);
+       return;
+
+err_cycle_gc_lock:
+       six_unlock_intent(&m->c.lock);
+
+       if (flags & BTREE_INSERT_NOUNLOCK)
+               goto out;
+
+       bch2_trans_unlock(trans);
+
+       down_read(&c->gc_lock);
+       up_read(&c->gc_lock);
+       ret = -EINTR;
+       goto err;
+
+err_unlock:
+       six_unlock_intent(&m->c.lock);
+       if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
+               up_read(&c->gc_lock);
+err:
+       BUG_ON(ret == -EAGAIN && (flags & BTREE_INSERT_NOUNLOCK));
+
+       if ((ret == -EAGAIN || ret == -EINTR) &&
+           !(flags & BTREE_INSERT_NOUNLOCK)) {
+               bch2_trans_unlock(trans);
+               closure_sync(&cl);
+               ret = bch2_btree_iter_traverse(iter);
+               if (ret)
+                       goto out;
+
+               goto retry;
+       }
+
+       goto out;
+}
+
+static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
+                               struct btree *b, unsigned flags,
+                               struct closure *cl)
+{
+       struct btree *n, *parent = btree_node_parent(iter, b);
+       struct btree_update *as;
+
+       as = bch2_btree_update_start(iter->trans, iter->btree_id,
+               (parent
+                ? btree_update_reserve_required(c, parent)
+                : 0) + 1,
+               flags, cl);
+       if (IS_ERR(as)) {
+               trace_btree_gc_rewrite_node_fail(c, b);
+               return PTR_ERR(as);
+       }
+
+       bch2_btree_interior_update_will_free_node(as, b);
+
+       n = bch2_btree_node_alloc_replacement(as, b);
+       bch2_btree_update_add_new_node(as, n);
+
+       bch2_btree_build_aux_trees(n);
+       six_unlock_write(&n->c.lock);
+
+       trace_btree_gc_rewrite_node(c, b);
+
+       bch2_btree_node_write(c, n, SIX_LOCK_intent);
+
+       if (parent) {
+               bch2_keylist_add(&as->parent_keys, &n->key);
+               bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
+       } else {
+               bch2_btree_set_root(as, n, iter);
+       }
+
+       bch2_btree_update_get_open_buckets(as, n);
+
+       six_lock_increment(&b->c.lock, SIX_LOCK_intent);
+       bch2_btree_iter_node_drop(iter, b);
+       bch2_btree_iter_node_replace(iter, n);
+       bch2_btree_node_free_inmem(c, b, iter);
+       six_unlock_intent(&n->c.lock);
+
+       bch2_btree_update_done(as);
+       return 0;
+}
+
+/**
+ * bch_btree_node_rewrite - Rewrite/move a btree node
+ *
+ * Returns 0 on success, -EINTR or -EAGAIN on failure (i.e.
+ * btree_check_reserve() has to wait)
+ */
+int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
+                           __le64 seq, unsigned flags)
+{
+       struct btree_trans *trans = iter->trans;
+       struct closure cl;
+       struct btree *b;
+       int ret;
+
+       flags |= BTREE_INSERT_NOFAIL;
+
+       closure_init_stack(&cl);
+
+       bch2_btree_iter_upgrade(iter, U8_MAX);
+
+       if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) {
+               if (!down_read_trylock(&c->gc_lock)) {
+                       bch2_trans_unlock(trans);
+                       down_read(&c->gc_lock);
+               }
+       }
+
+       while (1) {
+               ret = bch2_btree_iter_traverse(iter);
+               if (ret)
+                       break;
+
+               b = bch2_btree_iter_peek_node(iter);
+               if (!b || b->data->keys.seq != seq)
+                       break;
+
+               ret = __btree_node_rewrite(c, iter, b, flags, &cl);
+               if (ret != -EAGAIN &&
+                   ret != -EINTR)
+                       break;
+
+               bch2_trans_unlock(trans);
+               closure_sync(&cl);
+       }
+
+       bch2_btree_iter_downgrade(iter);
+
+       if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
+               up_read(&c->gc_lock);
+
+       closure_sync(&cl);
+       return ret;
+}
+
+static void __bch2_btree_node_update_key(struct bch_fs *c,
+                                        struct btree_update *as,
+                                        struct btree_iter *iter,
+                                        struct btree *b, struct btree *new_hash,
+                                        struct bkey_i *new_key)
+{
+       struct btree *parent;
+       int ret;
+
+       btree_update_will_delete_key(as, &b->key);
+       btree_update_will_add_key(as, new_key);
+
+       parent = btree_node_parent(iter, b);
+       if (parent) {
+               if (new_hash) {
+                       bkey_copy(&new_hash->key, new_key);
+                       ret = bch2_btree_node_hash_insert(&c->btree_cache,
+                                       new_hash, b->c.level, b->c.btree_id);
+                       BUG_ON(ret);
+               }
+
+               bch2_keylist_add(&as->parent_keys, new_key);
+               bch2_btree_insert_node(as, parent, iter, &as->parent_keys, 0);
+
+               if (new_hash) {
+                       mutex_lock(&c->btree_cache.lock);
+                       bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
+
+                       bch2_btree_node_hash_remove(&c->btree_cache, b);
+
+                       bkey_copy(&b->key, new_key);
+                       ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
+                       BUG_ON(ret);
+                       mutex_unlock(&c->btree_cache.lock);
+               } else {
+                       bkey_copy(&b->key, new_key);
+               }
+       } else {
+               BUG_ON(btree_node_root(c, b) != b);
+
+               bch2_btree_node_lock_write(b, iter);
+               bkey_copy(&b->key, new_key);
+
+               if (btree_ptr_hash_val(&b->key) != b->hash_val) {
+                       mutex_lock(&c->btree_cache.lock);
+                       bch2_btree_node_hash_remove(&c->btree_cache, b);
+
+                       ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
+                       BUG_ON(ret);
+                       mutex_unlock(&c->btree_cache.lock);
+               }
+
+               btree_update_updated_root(as, b);
+               bch2_btree_node_unlock_write(b, iter);
+       }
+
+       bch2_btree_update_done(as);
+}
+
+int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
+                              struct btree *b,
+                              struct bkey_i *new_key)
+{
+       struct btree *parent = btree_node_parent(iter, b);
+       struct btree_update *as = NULL;
+       struct btree *new_hash = NULL;
+       struct closure cl;
+       int ret;
+
+       closure_init_stack(&cl);
+
+       if (!bch2_btree_iter_upgrade(iter, U8_MAX))
+               return -EINTR;
+
+       if (!down_read_trylock(&c->gc_lock)) {
+               bch2_trans_unlock(iter->trans);
+               down_read(&c->gc_lock);
+
+               if (!bch2_trans_relock(iter->trans)) {
+                       ret = -EINTR;
+                       goto err;
+               }
+       }
+
+       /*
+        * check btree_ptr_hash_val() after @b is locked by
+        * btree_iter_traverse():
+        */
+       if (btree_ptr_hash_val(new_key) != b->hash_val) {
+               /* bch2_btree_reserve_get will unlock */
+               ret = bch2_btree_cache_cannibalize_lock(c, &cl);
+               if (ret) {
+                       bch2_trans_unlock(iter->trans);
+                       up_read(&c->gc_lock);
+                       closure_sync(&cl);
+                       down_read(&c->gc_lock);
+
+                       if (!bch2_trans_relock(iter->trans)) {
+                               ret = -EINTR;
+                               goto err;
+                       }
+               }
+
+               new_hash = bch2_btree_node_mem_alloc(c);
+       }
+retry:
+       as = bch2_btree_update_start(iter->trans, iter->btree_id,
+               parent ? btree_update_reserve_required(c, parent) : 0,
+               BTREE_INSERT_NOFAIL|
+               BTREE_INSERT_USE_RESERVE|
+               BTREE_INSERT_USE_ALLOC_RESERVE,
+               &cl);
+
+       if (IS_ERR(as)) {
+               ret = PTR_ERR(as);
+               if (ret == -EAGAIN)
+                       ret = -EINTR;
+
+               if (ret == -EINTR) {
+                       bch2_trans_unlock(iter->trans);
+                       up_read(&c->gc_lock);
+                       closure_sync(&cl);
+                       down_read(&c->gc_lock);
+
+                       if (bch2_trans_relock(iter->trans))
+                               goto retry;
+               }
+
+               goto err;
+       }
+
+       ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(new_key));
+       if (ret)
+               goto err_free_update;
+
+       __bch2_btree_node_update_key(c, as, iter, b, new_hash, new_key);
+
+       bch2_btree_iter_downgrade(iter);
+err:
+       if (new_hash) {
+               mutex_lock(&c->btree_cache.lock);
+               list_move(&new_hash->list, &c->btree_cache.freeable);
+               mutex_unlock(&c->btree_cache.lock);
+
+               six_unlock_write(&new_hash->c.lock);
+               six_unlock_intent(&new_hash->c.lock);
+       }
+       up_read(&c->gc_lock);
+       closure_sync(&cl);
+       return ret;
+err_free_update:
+       bch2_btree_update_free(as);
+       goto err;
+}
+
+/* Init code: */
+
+/*
+ * Only for filesystem bringup, when first reading the btree roots or allocating
+ * btree roots when initializing a new filesystem:
+ */
+void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
+{
+       BUG_ON(btree_node_root(c, b));
+
+       bch2_btree_set_root_inmem(c, b);
+}
+
+void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
+{
+       struct closure cl;
+       struct btree *b;
+       int ret;
+
+       closure_init_stack(&cl);
+
+       do {
+               ret = bch2_btree_cache_cannibalize_lock(c, &cl);
+               closure_sync(&cl);
+       } while (ret);
+
+       b = bch2_btree_node_mem_alloc(c);
+       bch2_btree_cache_cannibalize_unlock(c);
+
+       set_btree_node_fake(b);
+       set_btree_node_need_rewrite(b);
+       b->c.level      = 0;
+       b->c.btree_id   = id;
+
+       bkey_btree_ptr_init(&b->key);
+       b->key.k.p = POS_MAX;
+       *((u64 *) bkey_i_to_btree_ptr(&b->key)->v.start) = U64_MAX - id;
+
+       bch2_bset_init_first(b, &b->data->keys);
+       bch2_btree_build_aux_trees(b);
+
+       b->data->flags = 0;
+       btree_set_min(b, POS_MIN);
+       btree_set_max(b, POS_MAX);
+       b->data->format = bch2_btree_calc_format(b);
+       btree_node_set_format(b, b->data->format);
+
+       ret = bch2_btree_node_hash_insert(&c->btree_cache, b,
+                                         b->c.level, b->c.btree_id);
+       BUG_ON(ret);
+
+       bch2_btree_set_root_inmem(c, b);
+
+       six_unlock_write(&b->c.lock);
+       six_unlock_intent(&b->c.lock);
+}
+
+void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       struct btree_update *as;
+
+       mutex_lock(&c->btree_interior_update_lock);
+       list_for_each_entry(as, &c->btree_interior_update_list, list)
+               pr_buf(out, "%p m %u w %u r %u j %llu\n",
+                      as,
+                      as->mode,
+                      as->nodes_written,
+                      atomic_read(&as->cl.remaining) & CLOSURE_REMAINING_MASK,
+                      as->journal.seq);
+       mutex_unlock(&c->btree_interior_update_lock);
+}
+
+size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c)
+{
+       size_t ret = 0;
+       struct list_head *i;
+
+       mutex_lock(&c->btree_interior_update_lock);
+       list_for_each(i, &c->btree_interior_update_list)
+               ret++;
+       mutex_unlock(&c->btree_interior_update_lock);
+
+       return ret;
+}
+
+void bch2_journal_entries_to_btree_roots(struct bch_fs *c, struct jset *jset)
+{
+       struct btree_root *r;
+       struct jset_entry *entry;
+
+       mutex_lock(&c->btree_root_lock);
+
+       vstruct_for_each(jset, entry)
+               if (entry->type == BCH_JSET_ENTRY_btree_root) {
+                       r = &c->btree_roots[entry->btree_id];
+                       r->level = entry->level;
+                       r->alive = true;
+                       bkey_copy(&r->key, &entry->start[0]);
+               }
+
+       mutex_unlock(&c->btree_root_lock);
+}
+
+struct jset_entry *
+bch2_btree_roots_to_journal_entries(struct bch_fs *c,
+                                   struct jset_entry *start,
+                                   struct jset_entry *end)
+{
+       struct jset_entry *entry;
+       unsigned long have = 0;
+       unsigned i;
+
+       for (entry = start; entry < end; entry = vstruct_next(entry))
+               if (entry->type == BCH_JSET_ENTRY_btree_root)
+                       __set_bit(entry->btree_id, &have);
+
+       mutex_lock(&c->btree_root_lock);
+
+       for (i = 0; i < BTREE_ID_NR; i++)
+               if (c->btree_roots[i].alive && !test_bit(i, &have)) {
+                       journal_entry_set(end,
+                                         BCH_JSET_ENTRY_btree_root,
+                                         i, c->btree_roots[i].level,
+                                         &c->btree_roots[i].key,
+                                         c->btree_roots[i].key.u64s);
+                       end = vstruct_next(end);
+               }
+
+       mutex_unlock(&c->btree_root_lock);
+
+       return end;
+}
+
+void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
+{
+       if (c->btree_interior_update_worker)
+               destroy_workqueue(c->btree_interior_update_worker);
+       mempool_exit(&c->btree_interior_update_pool);
+}
+
+int bch2_fs_btree_interior_update_init(struct bch_fs *c)
+{
+       mutex_init(&c->btree_reserve_cache_lock);
+       INIT_LIST_HEAD(&c->btree_interior_update_list);
+       INIT_LIST_HEAD(&c->btree_interior_updates_unwritten);
+       mutex_init(&c->btree_interior_update_lock);
+       INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work);
+
+       c->btree_interior_update_worker =
+               alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
+       if (!c->btree_interior_update_worker)
+               return -ENOMEM;
+
+       return mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
+                                        sizeof(struct btree_update));
+}
diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h
new file mode 100644 (file)
index 0000000..7668225
--- /dev/null
@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H
+#define _BCACHEFS_BTREE_UPDATE_INTERIOR_H
+
+#include "btree_cache.h"
+#include "btree_locking.h"
+#include "btree_update.h"
+
+void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *);
+bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *,
+                               struct bkey_format *);
+
+#define BTREE_UPDATE_NODES_MAX         ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
+
+#define BTREE_UPDATE_JOURNAL_RES       (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
+
+/*
+ * Tracks an in progress split/rewrite of a btree node and the update to the
+ * parent node:
+ *
+ * When we split/rewrite a node, we do all the updates in memory without
+ * waiting for any writes to complete - we allocate the new node(s) and update
+ * the parent node, possibly recursively up to the root.
+ *
+ * The end result is that we have one or more new nodes being written -
+ * possibly several, if there were multiple splits - and then a write (updating
+ * an interior node) which will make all these new nodes visible.
+ *
+ * Additionally, as we split/rewrite nodes we free the old nodes - but the old
+ * nodes can't be freed (their space on disk can't be reclaimed) until the
+ * update to the interior node that makes the new node visible completes -
+ * until then, the old nodes are still reachable on disk.
+ *
+ */
+struct btree_update {
+       struct closure                  cl;
+       struct bch_fs                   *c;
+
+       struct list_head                list;
+       struct list_head                unwritten_list;
+
+       /* What kind of update are we doing? */
+       enum {
+               BTREE_INTERIOR_NO_UPDATE,
+               BTREE_INTERIOR_UPDATING_NODE,
+               BTREE_INTERIOR_UPDATING_ROOT,
+               BTREE_INTERIOR_UPDATING_AS,
+       } mode;
+
+       unsigned                        must_rewrite:1;
+       unsigned                        nodes_written:1;
+
+       enum btree_id                   btree_id;
+
+       struct disk_reservation         disk_res;
+       struct journal_preres           journal_preres;
+
+       /*
+        * BTREE_INTERIOR_UPDATING_NODE:
+        * The update that made the new nodes visible was a regular update to an
+        * existing interior node - @b. We can't write out the update to @b
+        * until the new nodes we created are finished writing, so we block @b
+        * from writing by putting this btree_interior update on the
+        * @b->write_blocked list with @write_blocked_list:
+        */
+       struct btree                    *b;
+       struct list_head                write_blocked_list;
+
+       /*
+        * We may be freeing nodes that were dirty, and thus had journal entries
+        * pinned: we need to transfer the oldest of those pins to the
+        * btree_update operation, and release it when the new node(s)
+        * are all persistent and reachable:
+        */
+       struct journal_entry_pin        journal;
+
+       /* Preallocated nodes we reserve when we start the update: */
+       struct btree                    *prealloc_nodes[BTREE_UPDATE_NODES_MAX];
+       unsigned                        nr_prealloc_nodes;
+
+       /* Nodes being freed: */
+       struct keylist                  old_keys;
+       u64                             _old_keys[BTREE_UPDATE_NODES_MAX *
+                                                 BKEY_BTREE_PTR_VAL_U64s_MAX];
+
+       /* Nodes being added: */
+       struct keylist                  new_keys;
+       u64                             _new_keys[BTREE_UPDATE_NODES_MAX *
+                                                 BKEY_BTREE_PTR_VAL_U64s_MAX];
+
+       /* New nodes, that will be made reachable by this update: */
+       struct btree                    *new_nodes[BTREE_UPDATE_NODES_MAX];
+       unsigned                        nr_new_nodes;
+
+       open_bucket_idx_t               open_buckets[BTREE_UPDATE_NODES_MAX *
+                                                    BCH_REPLICAS_MAX];
+       open_bucket_idx_t               nr_open_buckets;
+
+       unsigned                        journal_u64s;
+       u64                             journal_entries[BTREE_UPDATE_JOURNAL_RES];
+
+       /* Only here to reduce stack usage on recursive splits: */
+       struct keylist                  parent_keys;
+       /*
+        * Enough room for btree_split's keys without realloc - btree node
+        * pointers never have crc/compression info, so we only need to acount
+        * for the pointers for three keys
+        */
+       u64                             inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3];
+};
+
+void bch2_btree_node_free_inmem(struct bch_fs *, struct btree *,
+                               struct btree_iter *);
+void bch2_btree_node_free_never_inserted(struct bch_fs *, struct btree *);
+
+void bch2_btree_update_get_open_buckets(struct btree_update *, struct btree *);
+
+struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
+                                                 struct btree *,
+                                                 struct bkey_format);
+
+void bch2_btree_update_done(struct btree_update *);
+struct btree_update *
+bch2_btree_update_start(struct btree_trans *, enum btree_id, unsigned,
+                       unsigned, struct closure *);
+
+void bch2_btree_interior_update_will_free_node(struct btree_update *,
+                                              struct btree *);
+void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
+
+void bch2_btree_insert_node(struct btree_update *, struct btree *,
+                           struct btree_iter *, struct keylist *,
+                           unsigned);
+int bch2_btree_split_leaf(struct bch_fs *, struct btree_iter *, unsigned);
+
+void __bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *,
+                                  unsigned, unsigned, enum btree_node_sibling);
+
+static inline void bch2_foreground_maybe_merge_sibling(struct bch_fs *c,
+                                       struct btree_iter *iter,
+                                       unsigned level, unsigned flags,
+                                       enum btree_node_sibling sib)
+{
+       struct btree *b;
+
+       if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
+               return;
+
+       if (!bch2_btree_node_relock(iter, level))
+               return;
+
+       b = iter->l[level].b;
+       if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
+               return;
+
+       __bch2_foreground_maybe_merge(c, iter, level, flags, sib);
+}
+
+static inline void bch2_foreground_maybe_merge(struct bch_fs *c,
+                                              struct btree_iter *iter,
+                                              unsigned level,
+                                              unsigned flags)
+{
+       bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
+                                           btree_prev_sib);
+       bch2_foreground_maybe_merge_sibling(c, iter, level, flags,
+                                           btree_next_sib);
+}
+
+void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
+void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
+
+static inline unsigned btree_update_reserve_required(struct bch_fs *c,
+                                                    struct btree *b)
+{
+       unsigned depth = btree_node_root(c, b)->c.level + 1;
+
+       /*
+        * Number of nodes we might have to allocate in a worst case btree
+        * split operation - we split all the way up to the root, then allocate
+        * a new root, unless we're already at max depth:
+        */
+       if (depth < BTREE_MAX_DEPTH)
+               return (depth - b->c.level) * 2 + 1;
+       else
+               return (depth - b->c.level) * 2 - 1;
+}
+
+static inline void btree_node_reset_sib_u64s(struct btree *b)
+{
+       b->sib_u64s[0] = b->nr.live_u64s;
+       b->sib_u64s[1] = b->nr.live_u64s;
+}
+
+static inline void *btree_data_end(struct bch_fs *c, struct btree *b)
+{
+       return (void *) b->data + btree_bytes(c);
+}
+
+static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c,
+                                                           struct btree *b)
+{
+       return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s);
+}
+
+static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c,
+                                                         struct btree *b)
+{
+       return btree_data_end(c, b);
+}
+
+static inline void *write_block(struct btree *b)
+{
+       return (void *) b->data + (b->written << 9);
+}
+
+static inline bool __btree_addr_written(struct btree *b, void *p)
+{
+       return p < write_block(b);
+}
+
+static inline bool bset_written(struct btree *b, struct bset *i)
+{
+       return __btree_addr_written(b, i);
+}
+
+static inline bool bkey_written(struct btree *b, struct bkey_packed *k)
+{
+       return __btree_addr_written(b, k);
+}
+
+static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
+                                                struct btree *b,
+                                                void *end)
+{
+       ssize_t used = bset_byte_offset(b, end) / sizeof(u64) +
+               b->whiteout_u64s;
+       ssize_t total = c->opts.btree_node_size << 6;
+
+       return total - used;
+}
+
+static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
+                                                  struct btree *b)
+{
+       ssize_t remaining = __bch_btree_u64s_remaining(c, b,
+                               btree_bkey_last(b, bset_tree_last(b)));
+
+       BUG_ON(remaining < 0);
+
+       if (bset_written(b, btree_bset_last(b)))
+               return 0;
+
+       return remaining;
+}
+
+static inline unsigned btree_write_set_buffer(struct btree *b)
+{
+       /*
+        * Could buffer up larger amounts of keys for btrees with larger keys,
+        * pending benchmarking:
+        */
+       return 4 << 10;
+}
+
+static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
+                                                    struct btree *b)
+{
+       struct bset_tree *t = bset_tree_last(b);
+       struct btree_node_entry *bne = max(write_block(b),
+                       (void *) btree_bkey_last(b, bset_tree_last(b)));
+       ssize_t remaining_space =
+               __bch_btree_u64s_remaining(c, b, &bne->keys.start[0]);
+
+       if (unlikely(bset_written(b, bset(b, t)))) {
+               if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
+                       return bne;
+       } else {
+               if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) &&
+                   remaining_space > (ssize_t) (btree_write_set_buffer(b) >> 3))
+                       return bne;
+       }
+
+       return NULL;
+}
+
+static inline void push_whiteout(struct bch_fs *c, struct btree *b,
+                                struct bpos pos)
+{
+       struct bkey_packed k;
+
+       BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s);
+
+       if (!bkey_pack_pos(&k, pos, b)) {
+               struct bkey *u = (void *) &k;
+
+               bkey_init(u);
+               u->p = pos;
+       }
+
+       k.needs_whiteout = true;
+
+       b->whiteout_u64s += k.u64s;
+       bkey_copy(unwritten_whiteouts_start(c, b), &k);
+}
+
+/*
+ * write lock must be held on @b (else the dirty bset that we were going to
+ * insert into could be written out from under us)
+ */
+static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
+                                              struct btree *b, unsigned u64s)
+{
+       if (unlikely(btree_node_need_rewrite(b)))
+               return false;
+
+       return u64s <= bch_btree_keys_u64s_remaining(c, b);
+}
+
+void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *);
+
+size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *);
+
+void bch2_journal_entries_to_btree_roots(struct bch_fs *, struct jset *);
+struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
+                                       struct jset_entry *, struct jset_entry *);
+
+void bch2_fs_btree_interior_update_exit(struct bch_fs *);
+int bch2_fs_btree_interior_update_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
new file mode 100644 (file)
index 0000000..cd699c2
--- /dev/null
@@ -0,0 +1,1171 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "btree_gc.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "btree_key_cache.h"
+#include "btree_locking.h"
+#include "buckets.h"
+#include "debug.h"
+#include "error.h"
+#include "extent_update.h"
+#include "journal.h"
+#include "journal_reclaim.h"
+#include "keylist.h"
+#include "replicas.h"
+
+#include <linux/prefetch.h>
+#include <linux/sort.h>
+#include <trace/events/bcachefs.h>
+
+static inline bool same_leaf_as_prev(struct btree_trans *trans,
+                                    struct btree_insert_entry *i)
+{
+       return i != trans->updates2 &&
+               iter_l(i[0].iter)->b == iter_l(i[-1].iter)->b;
+}
+
+inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
+                                           struct btree_iter *iter)
+{
+       bch2_btree_node_lock_write(b, iter);
+
+       if (btree_iter_type(iter) == BTREE_ITER_CACHED)
+               return;
+
+       if (unlikely(btree_node_just_written(b)) &&
+           bch2_btree_post_write_cleanup(c, b))
+               bch2_btree_iter_reinit_node(iter, b);
+
+       /*
+        * If the last bset has been written, or if it's gotten too big - start
+        * a new bset to insert into:
+        */
+       if (want_new_bset(c, b))
+               bch2_btree_init_next(c, b, iter);
+}
+
+/* Inserting into a given leaf node (last stage of insert): */
+
+/* Handle overwrites and do insert, for non extents: */
+bool bch2_btree_bset_insert_key(struct btree_iter *iter,
+                               struct btree *b,
+                               struct btree_node_iter *node_iter,
+                               struct bkey_i *insert)
+{
+       struct bkey_packed *k;
+       unsigned clobber_u64s = 0, new_u64s = 0;
+
+       EBUG_ON(btree_node_just_written(b));
+       EBUG_ON(bset_written(b, btree_bset_last(b)));
+       EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
+       EBUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
+               bkey_cmp(bkey_start_pos(&insert->k),
+                        bkey_predecessor(b->data->min_key)) < 0);
+       EBUG_ON(bkey_cmp(insert->k.p, b->data->min_key) < 0);
+       EBUG_ON(bkey_cmp(insert->k.p, b->data->max_key) > 0);
+       EBUG_ON(insert->k.u64s >
+               bch_btree_keys_u64s_remaining(iter->trans->c, b));
+       EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
+
+       k = bch2_btree_node_iter_peek_all(node_iter, b);
+       if (k && bkey_cmp_packed(b, k, &insert->k))
+               k = NULL;
+
+       /* @k is the key being overwritten/deleted, if any: */
+       EBUG_ON(k && bkey_whiteout(k));
+
+       /* Deleting, but not found? nothing to do: */
+       if (bkey_whiteout(&insert->k) && !k)
+               return false;
+
+       if (bkey_whiteout(&insert->k)) {
+               /* Deleting: */
+               btree_account_key_drop(b, k);
+               k->type = KEY_TYPE_deleted;
+
+               if (k->needs_whiteout)
+                       push_whiteout(iter->trans->c, b, insert->k.p);
+               k->needs_whiteout = false;
+
+               if (k >= btree_bset_last(b)->start) {
+                       clobber_u64s = k->u64s;
+                       bch2_bset_delete(b, k, clobber_u64s);
+                       goto fix_iter;
+               } else {
+                       bch2_btree_iter_fix_key_modified(iter, b, k);
+               }
+
+               return true;
+       }
+
+       if (k) {
+               /* Overwriting: */
+               btree_account_key_drop(b, k);
+               k->type = KEY_TYPE_deleted;
+
+               insert->k.needs_whiteout = k->needs_whiteout;
+               k->needs_whiteout = false;
+
+               if (k >= btree_bset_last(b)->start) {
+                       clobber_u64s = k->u64s;
+                       goto overwrite;
+               } else {
+                       bch2_btree_iter_fix_key_modified(iter, b, k);
+               }
+       }
+
+       k = bch2_btree_node_iter_bset_pos(node_iter, b, bset_tree_last(b));
+overwrite:
+       bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
+       new_u64s = k->u64s;
+fix_iter:
+       if (clobber_u64s != new_u64s)
+               bch2_btree_node_iter_fix(iter, b, node_iter, k,
+                                        clobber_u64s, new_u64s);
+       return true;
+}
+
+static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
+                              unsigned i, u64 seq)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct btree_write *w = container_of(pin, struct btree_write, journal);
+       struct btree *b = container_of(w, struct btree, writes[i]);
+
+       btree_node_lock_type(c, b, SIX_LOCK_read);
+       bch2_btree_node_write_cond(c, b,
+               (btree_current_write(b) == w && w->journal.seq == seq));
+       six_unlock_read(&b->c.lock);
+}
+
+static void btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
+{
+       return __btree_node_flush(j, pin, 0, seq);
+}
+
+static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
+{
+       return __btree_node_flush(j, pin, 1, seq);
+}
+
+inline void bch2_btree_add_journal_pin(struct bch_fs *c,
+                                      struct btree *b, u64 seq)
+{
+       struct btree_write *w = btree_current_write(b);
+
+       bch2_journal_pin_add(&c->journal, seq, &w->journal,
+                            btree_node_write_idx(b) == 0
+                            ? btree_node_flush0
+                            : btree_node_flush1);
+}
+
+/**
+ * btree_insert_key - insert a key one key into a leaf node
+ */
+static bool btree_insert_key_leaf(struct btree_trans *trans,
+                                 struct btree_iter *iter,
+                                 struct bkey_i *insert)
+{
+       struct bch_fs *c = trans->c;
+       struct btree *b = iter_l(iter)->b;
+       struct bset_tree *t = bset_tree_last(b);
+       struct bset *i = bset(b, t);
+       int old_u64s = bset_u64s(t);
+       int old_live_u64s = b->nr.live_u64s;
+       int live_u64s_added, u64s_added;
+
+       EBUG_ON(!iter->level &&
+               !test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags));
+
+       if (unlikely(!bch2_btree_bset_insert_key(iter, b,
+                                       &iter_l(iter)->iter, insert)))
+               return false;
+
+       i->journal_seq = cpu_to_le64(max(trans->journal_res.seq,
+                                        le64_to_cpu(i->journal_seq)));
+
+       bch2_btree_add_journal_pin(c, b, trans->journal_res.seq);
+
+       if (unlikely(!btree_node_dirty(b)))
+               set_btree_node_dirty(b);
+
+       live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
+       u64s_added = (int) bset_u64s(t) - old_u64s;
+
+       if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
+               b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
+       if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
+               b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
+
+       if (u64s_added > live_u64s_added &&
+           bch2_maybe_compact_whiteouts(c, b))
+               bch2_btree_iter_reinit_node(iter, b);
+
+       trace_btree_insert_key(c, b, insert);
+       return true;
+}
+
+/* Cached btree updates: */
+
+/* Normal update interface: */
+
+static inline void btree_insert_entry_checks(struct btree_trans *trans,
+                                            struct btree_iter *iter,
+                                            struct bkey_i *insert)
+{
+       struct bch_fs *c = trans->c;
+
+       BUG_ON(bkey_cmp(insert->k.p, iter->pos));
+       BUG_ON(debug_check_bkeys(c) &&
+              bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
+                                __btree_node_type(iter->level, iter->btree_id)));
+}
+
+static noinline int
+bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s)
+{
+       struct bch_fs *c = trans->c;
+       int ret;
+
+       bch2_trans_unlock(trans);
+
+       ret = bch2_journal_preres_get(&c->journal,
+                       &trans->journal_preres, u64s, 0);
+       if (ret)
+               return ret;
+
+       if (!bch2_trans_relock(trans)) {
+               trace_trans_restart_journal_preres_get(trans->ip);
+               return -EINTR;
+       }
+
+       return 0;
+}
+
+static inline int bch2_trans_journal_res_get(struct btree_trans *trans,
+                                            unsigned flags)
+{
+       struct bch_fs *c = trans->c;
+       int ret;
+
+       if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
+               flags |= JOURNAL_RES_GET_RESERVED;
+
+       ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
+                                  trans->journal_u64s, flags);
+
+       return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret;
+}
+
+static enum btree_insert_ret
+btree_key_can_insert(struct btree_trans *trans,
+                    struct btree_iter *iter,
+                    unsigned u64s)
+{
+       struct bch_fs *c = trans->c;
+       struct btree *b = iter_l(iter)->b;
+
+       if (!bch2_btree_node_insert_fits(c, b, u64s))
+               return BTREE_INSERT_BTREE_NODE_FULL;
+
+       return BTREE_INSERT_OK;
+}
+
+static enum btree_insert_ret
+btree_key_can_insert_cached(struct btree_trans *trans,
+                           struct btree_iter *iter,
+                           unsigned u64s)
+{
+       struct bkey_cached *ck = (void *) iter->l[0].b;
+       unsigned new_u64s;
+       struct bkey_i *new_k;
+
+       BUG_ON(iter->level);
+
+       if (u64s <= ck->u64s)
+               return BTREE_INSERT_OK;
+
+       new_u64s        = roundup_pow_of_two(u64s);
+       new_k           = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
+       if (!new_k)
+               return -ENOMEM;
+
+       ck->u64s        = new_u64s;
+       ck->k           = new_k;
+       return BTREE_INSERT_OK;
+}
+
+static inline void do_btree_insert_one(struct btree_trans *trans,
+                                      struct btree_iter *iter,
+                                      struct bkey_i *insert)
+{
+       struct bch_fs *c = trans->c;
+       struct journal *j = &c->journal;
+       bool did_work;
+
+       EBUG_ON(trans->journal_res.ref !=
+               !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY));
+
+       insert->k.needs_whiteout = false;
+
+       did_work = (btree_iter_type(iter) != BTREE_ITER_CACHED)
+               ? btree_insert_key_leaf(trans, iter, insert)
+               : bch2_btree_insert_key_cached(trans, iter, insert);
+       if (!did_work)
+               return;
+
+       if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
+               bch2_journal_add_keys(j, &trans->journal_res,
+                                     iter->btree_id, insert);
+
+               bch2_journal_set_has_inode(j, &trans->journal_res,
+                                          insert->k.p.inode);
+
+               if (trans->journal_seq)
+                       *trans->journal_seq = trans->journal_res.seq;
+       }
+}
+
+static inline bool iter_has_trans_triggers(struct btree_iter *iter)
+{
+       return BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << iter->btree_id);
+}
+
+static inline bool iter_has_nontrans_triggers(struct btree_iter *iter)
+{
+       return (BTREE_NODE_TYPE_HAS_TRIGGERS &
+               ~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS) &
+               (1U << iter->btree_id);
+}
+
+static noinline void bch2_btree_iter_unlock_noinline(struct btree_iter *iter)
+{
+       __bch2_btree_iter_unlock(iter);
+}
+
+static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_insert_entry *i;
+
+       trans_for_each_update(trans, i) {
+               /*
+                * XXX: synchronization of cached update triggers with gc
+                */
+               BUG_ON(btree_iter_type(i->iter) == BTREE_ITER_CACHED);
+
+               if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
+                       bch2_mark_update(trans, i->iter, i->k, NULL,
+                                        i->trigger_flags|BTREE_TRIGGER_GC);
+       }
+}
+
+static inline int
+bch2_trans_commit_write_locked(struct btree_trans *trans,
+                              struct btree_insert_entry **stopped_at)
+{
+       struct bch_fs *c = trans->c;
+       struct bch_fs_usage *fs_usage = NULL;
+       struct btree_insert_entry *i;
+       unsigned u64s = 0;
+       bool marking = false;
+       int ret;
+
+       if (race_fault()) {
+               trace_trans_restart_fault_inject(trans->ip);
+               return -EINTR;
+       }
+
+       /*
+        * Check if the insert will fit in the leaf node with the write lock
+        * held, otherwise another thread could write the node changing the
+        * amount of space available:
+        */
+
+       prefetch(&trans->c->journal.flags);
+
+       trans_for_each_update2(trans, i) {
+               /* Multiple inserts might go to same leaf: */
+               if (!same_leaf_as_prev(trans, i))
+                       u64s = 0;
+
+               u64s += i->k->k.u64s;
+               ret = btree_iter_type(i->iter) != BTREE_ITER_CACHED
+                       ? btree_key_can_insert(trans, i->iter, u64s)
+                       : btree_key_can_insert_cached(trans, i->iter, u64s);
+               if (ret) {
+                       *stopped_at = i;
+                       return ret;
+               }
+
+               if (btree_node_type_needs_gc(i->iter->btree_id))
+                       marking = true;
+       }
+
+       if (marking) {
+               percpu_down_read(&c->mark_lock);
+               fs_usage = bch2_fs_usage_scratch_get(c);
+       }
+
+       /*
+        * Don't get journal reservation until after we know insert will
+        * succeed:
+        */
+       if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
+               ret = bch2_trans_journal_res_get(trans,
+                               JOURNAL_RES_GET_NONBLOCK);
+               if (ret)
+                       goto err;
+       } else {
+               trans->journal_res.seq = c->journal.replay_journal_seq;
+       }
+
+       if (unlikely(trans->extra_journal_entry_u64s)) {
+               memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
+                                 trans->extra_journal_entries,
+                                 trans->extra_journal_entry_u64s);
+
+               trans->journal_res.offset       += trans->extra_journal_entry_u64s;
+               trans->journal_res.u64s         -= trans->extra_journal_entry_u64s;
+       }
+
+       /*
+        * Not allowed to fail after we've gotten our journal reservation - we
+        * have to use it:
+        */
+
+       if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
+               if (journal_seq_verify(c))
+                       trans_for_each_update2(trans, i)
+                               i->k->k.version.lo = trans->journal_res.seq;
+               else if (inject_invalid_keys(c))
+                       trans_for_each_update2(trans, i)
+                               i->k->k.version = MAX_VERSION;
+       }
+
+       /* Must be called under mark_lock: */
+       if (marking && trans->fs_usage_deltas &&
+           bch2_replicas_delta_list_apply(c, fs_usage,
+                                          trans->fs_usage_deltas)) {
+               ret = BTREE_INSERT_NEED_MARK_REPLICAS;
+               goto err;
+       }
+
+       trans_for_each_update(trans, i)
+               if (iter_has_nontrans_triggers(i->iter))
+                       bch2_mark_update(trans, i->iter, i->k,
+                                        fs_usage, i->trigger_flags);
+
+       if (marking)
+               bch2_trans_fs_usage_apply(trans, fs_usage);
+
+       if (unlikely(c->gc_pos.phase))
+               bch2_trans_mark_gc(trans);
+
+       trans_for_each_update2(trans, i)
+               do_btree_insert_one(trans, i->iter, i->k);
+err:
+       if (marking) {
+               bch2_fs_usage_scratch_put(c, fs_usage);
+               percpu_up_read(&c->mark_lock);
+       }
+
+       return ret;
+}
+
+/*
+ * Get journal reservation, take write locks, and attempt to do btree update(s):
+ */
+static inline int do_bch2_trans_commit(struct btree_trans *trans,
+                                      struct btree_insert_entry **stopped_at)
+{
+       struct btree_insert_entry *i;
+       struct btree_iter *iter;
+       int ret;
+
+       trans_for_each_update2(trans, i)
+               BUG_ON(!btree_node_intent_locked(i->iter, i->iter->level));
+
+       ret = bch2_journal_preres_get(&trans->c->journal,
+                       &trans->journal_preres, trans->journal_preres_u64s,
+                       JOURNAL_RES_GET_NONBLOCK|
+                       ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM)
+                        ? JOURNAL_RES_GET_RECLAIM : 0));
+       if (unlikely(ret == -EAGAIN))
+               ret = bch2_trans_journal_preres_get_cold(trans,
+                                               trans->journal_preres_u64s);
+       if (unlikely(ret))
+               return ret;
+
+       /*
+        * Can't be holding any read locks when we go to take write locks:
+        *
+        * note - this must be done after bch2_trans_journal_preres_get_cold()
+        * or anything else that might call bch2_trans_relock(), since that
+        * would just retake the read locks:
+        */
+       trans_for_each_iter(trans, iter) {
+               if (iter->nodes_locked != iter->nodes_intent_locked) {
+                       EBUG_ON(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT);
+                       EBUG_ON(trans->iters_live & (1ULL << iter->idx));
+                       bch2_btree_iter_unlock_noinline(iter);
+               }
+       }
+
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
+               trans_for_each_update2(trans, i)
+                       btree_insert_entry_checks(trans, i->iter, i->k);
+       bch2_btree_trans_verify_locks(trans);
+
+       trans_for_each_update2(trans, i)
+               if (!same_leaf_as_prev(trans, i))
+                       bch2_btree_node_lock_for_insert(trans->c,
+                                       iter_l(i->iter)->b, i->iter);
+
+       ret = bch2_trans_commit_write_locked(trans, stopped_at);
+
+       trans_for_each_update2(trans, i)
+               if (!same_leaf_as_prev(trans, i))
+                       bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b,
+                                                            i->iter);
+
+       if (!ret && trans->journal_pin)
+               bch2_journal_pin_add(&trans->c->journal, trans->journal_res.seq,
+                                    trans->journal_pin, NULL);
+
+       /*
+        * Drop journal reservation after dropping write locks, since dropping
+        * the journal reservation may kick off a journal write:
+        */
+       bch2_journal_res_put(&trans->c->journal, &trans->journal_res);
+
+       if (unlikely(ret))
+               return ret;
+
+       if (trans->flags & BTREE_INSERT_NOUNLOCK)
+               trans->nounlock = true;
+
+       trans_for_each_update2(trans, i)
+               if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
+                   !same_leaf_as_prev(trans, i))
+                       bch2_foreground_maybe_merge(trans->c, i->iter,
+                                                   0, trans->flags);
+
+       trans->nounlock = false;
+
+       bch2_trans_downgrade(trans);
+
+       return 0;
+}
+
+static noinline
+int bch2_trans_commit_error(struct btree_trans *trans,
+                           struct btree_insert_entry *i,
+                           int ret)
+{
+       struct bch_fs *c = trans->c;
+       unsigned flags = trans->flags;
+
+       /*
+        * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
+        * update; if we haven't done anything yet it doesn't apply
+        */
+       flags &= ~BTREE_INSERT_NOUNLOCK;
+
+       switch (ret) {
+       case BTREE_INSERT_BTREE_NODE_FULL:
+               ret = bch2_btree_split_leaf(c, i->iter, flags);
+
+               /*
+                * if the split succeeded without dropping locks the insert will
+                * still be atomic (what the caller peeked() and is overwriting
+                * won't have changed)
+                */
+#if 0
+               /*
+                * XXX:
+                * split -> btree node merging (of parent node) might still drop
+                * locks when we're not passing it BTREE_INSERT_NOUNLOCK
+                *
+                * we don't want to pass BTREE_INSERT_NOUNLOCK to split as that
+                * will inhibit merging - but we don't have a reliable way yet
+                * (do we?) of checking if we dropped locks in this path
+                */
+               if (!ret)
+                       goto retry;
+#endif
+
+               /*
+                * don't care if we got ENOSPC because we told split it
+                * couldn't block:
+                */
+               if (!ret ||
+                   ret == -EINTR ||
+                   (flags & BTREE_INSERT_NOUNLOCK)) {
+                       trace_trans_restart_btree_node_split(trans->ip);
+                       ret = -EINTR;
+               }
+               break;
+       case BTREE_INSERT_ENOSPC:
+               ret = -ENOSPC;
+               break;
+       case BTREE_INSERT_NEED_MARK_REPLICAS:
+               bch2_trans_unlock(trans);
+
+               trans_for_each_update(trans, i) {
+                       ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k));
+                       if (ret)
+                               return ret;
+               }
+
+               if (bch2_trans_relock(trans))
+                       return 0;
+
+               trace_trans_restart_mark_replicas(trans->ip);
+               ret = -EINTR;
+               break;
+       case BTREE_INSERT_NEED_JOURNAL_RES:
+               bch2_trans_unlock(trans);
+
+               ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
+               if (ret)
+                       return ret;
+
+               if (bch2_trans_relock(trans))
+                       return 0;
+
+               trace_trans_restart_journal_res_get(trans->ip);
+               ret = -EINTR;
+               break;
+       default:
+               BUG_ON(ret >= 0);
+               break;
+       }
+
+       if (ret == -EINTR) {
+               int ret2 = bch2_btree_iter_traverse_all(trans);
+
+               if (ret2) {
+                       trace_trans_restart_traverse(trans->ip);
+                       return ret2;
+               }
+
+               trace_trans_restart_atomic(trans->ip);
+       }
+
+       return ret;
+}
+
+static noinline int
+bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
+{
+       struct bch_fs *c = trans->c;
+       int ret;
+
+       if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)))
+               return -EROFS;
+
+       bch2_trans_unlock(trans);
+
+       ret = bch2_fs_read_write_early(c);
+       if (ret)
+               return ret;
+
+       percpu_ref_get(&c->writes);
+       return 0;
+}
+
+static void bch2_trans_update2(struct btree_trans *trans,
+                              struct btree_iter *iter,
+                              struct bkey_i *insert)
+{
+       struct btree_insert_entry *i, n = (struct btree_insert_entry) {
+               .iter = iter, .k = insert
+       };
+
+       btree_insert_entry_checks(trans, n.iter, n.k);
+
+       BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
+
+       EBUG_ON(trans->nr_updates2 >= trans->nr_iters);
+
+       iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
+
+       trans_for_each_update2(trans, i) {
+               if (btree_iter_cmp(n.iter, i->iter) == 0) {
+                       *i = n;
+                       return;
+               }
+
+               if (btree_iter_cmp(n.iter, i->iter) <= 0)
+                       break;
+       }
+
+       array_insert_item(trans->updates2, trans->nr_updates2,
+                         i - trans->updates2, n);
+}
+
+static int extent_update_to_keys(struct btree_trans *trans,
+                                struct btree_iter *orig_iter,
+                                struct bkey_i *insert)
+{
+       struct btree_iter *iter;
+       int ret;
+
+       ret = bch2_extent_can_insert(trans, orig_iter, insert);
+       if (ret)
+               return ret;
+
+       if (bkey_deleted(&insert->k))
+               return 0;
+
+       iter = bch2_trans_copy_iter(trans, orig_iter);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter);
+
+       iter->flags |= BTREE_ITER_INTENT;
+       __bch2_btree_iter_set_pos(iter, insert->k.p, false);
+       bch2_trans_update2(trans, iter, insert);
+       bch2_trans_iter_put(trans, iter);
+       return 0;
+}
+
+static int extent_handle_overwrites(struct btree_trans *trans,
+                                   enum btree_id btree_id,
+                                   struct bpos start, struct bpos end)
+{
+       struct btree_iter *iter = NULL, *update_iter;
+       struct bkey_i *update;
+       struct bkey_s_c k;
+       int ret = 0;
+
+       iter = bch2_trans_get_iter(trans, btree_id, start, BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(iter);
+       if (ret)
+               return ret;
+
+       k = bch2_btree_iter_peek_with_updates(iter);
+
+       while (k.k && !(ret = bkey_err(k))) {
+               if (bkey_cmp(end, bkey_start_pos(k.k)) <= 0)
+                       break;
+
+               if (bkey_cmp(bkey_start_pos(k.k), start) < 0) {
+                       update_iter = bch2_trans_copy_iter(trans, iter);
+                       if ((ret = PTR_ERR_OR_ZERO(update_iter)))
+                               goto err;
+
+                       update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+                       if ((ret = PTR_ERR_OR_ZERO(update)))
+                               goto err;
+
+                       bkey_reassemble(update, k);
+                       bch2_cut_back(start, update);
+
+                       __bch2_btree_iter_set_pos(update_iter, update->k.p, false);
+                       bch2_trans_update2(trans, update_iter, update);
+                       bch2_trans_iter_put(trans, update_iter);
+               }
+
+               if (bkey_cmp(k.k->p, end) > 0) {
+                       update_iter = bch2_trans_copy_iter(trans, iter);
+                       if ((ret = PTR_ERR_OR_ZERO(update_iter)))
+                               goto err;
+
+                       update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+                       if ((ret = PTR_ERR_OR_ZERO(update)))
+                               goto err;
+
+                       bkey_reassemble(update, k);
+                       bch2_cut_front(end, update);
+
+                       __bch2_btree_iter_set_pos(update_iter, update->k.p, false);
+                       bch2_trans_update2(trans, update_iter, update);
+                       bch2_trans_iter_put(trans, update_iter);
+               } else {
+                       update_iter = bch2_trans_copy_iter(trans, iter);
+                       if ((ret = PTR_ERR_OR_ZERO(update_iter)))
+                               goto err;
+
+                       update = bch2_trans_kmalloc(trans, sizeof(struct bkey));
+                       if ((ret = PTR_ERR_OR_ZERO(update)))
+                               goto err;
+
+                       update->k = *k.k;
+                       set_bkey_val_u64s(&update->k, 0);
+                       update->k.type = KEY_TYPE_deleted;
+                       update->k.size = 0;
+
+                       __bch2_btree_iter_set_pos(update_iter, update->k.p, false);
+                       bch2_trans_update2(trans, update_iter, update);
+                       bch2_trans_iter_put(trans, update_iter);
+               }
+
+               k = bch2_btree_iter_next_with_updates(iter);
+       }
+err:
+       if (!IS_ERR_OR_NULL(iter))
+               bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+int __bch2_trans_commit(struct btree_trans *trans)
+{
+       struct btree_insert_entry *i = NULL;
+       struct btree_iter *iter;
+       bool trans_trigger_run;
+       unsigned u64s;
+       int ret = 0;
+
+       BUG_ON(trans->need_reset);
+
+       if (!trans->nr_updates)
+               goto out_noupdates;
+
+       if (trans->flags & BTREE_INSERT_GC_LOCK_HELD)
+               lockdep_assert_held(&trans->c->gc_lock);
+
+       memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
+
+       trans->journal_u64s             = trans->extra_journal_entry_u64s;
+       trans->journal_preres_u64s      = 0;
+
+       if (!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
+           unlikely(!percpu_ref_tryget(&trans->c->writes))) {
+               ret = bch2_trans_commit_get_rw_cold(trans);
+               if (ret)
+                       return ret;
+       }
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+       trans_for_each_update(trans, i)
+               if (btree_iter_type(i->iter) != BTREE_ITER_CACHED &&
+                   !(i->trigger_flags & BTREE_TRIGGER_NORUN))
+                       bch2_btree_key_cache_verify_clean(trans,
+                                       i->iter->btree_id, i->iter->pos);
+#endif
+
+       /*
+        * Running triggers will append more updates to the list of updates as
+        * we're walking it:
+        */
+       do {
+               trans_trigger_run = false;
+
+               trans_for_each_update(trans, i) {
+                       if (unlikely(i->iter->uptodate > BTREE_ITER_NEED_PEEK &&
+                                    (ret = bch2_btree_iter_traverse(i->iter)))) {
+                               trace_trans_restart_traverse(trans->ip);
+                               goto out;
+                       }
+
+                       /*
+                        * We're not using bch2_btree_iter_upgrade here because
+                        * we know trans->nounlock can't be set:
+                        */
+                       if (unlikely(i->iter->locks_want < 1 &&
+                                    !__bch2_btree_iter_upgrade(i->iter, 1))) {
+                               trace_trans_restart_upgrade(trans->ip);
+                               ret = -EINTR;
+                               goto out;
+                       }
+
+                       if (iter_has_trans_triggers(i->iter) &&
+                           !i->trans_triggers_run) {
+                               i->trans_triggers_run = true;
+                               trans_trigger_run = true;
+
+                               ret = bch2_trans_mark_update(trans, i->iter, i->k,
+                                                            i->trigger_flags);
+                               if (unlikely(ret)) {
+                                       if (ret == -EINTR)
+                                               trace_trans_restart_mark(trans->ip);
+                                       goto out;
+                               }
+                       }
+               }
+       } while (trans_trigger_run);
+
+       /* Turn extents updates into keys: */
+       trans_for_each_update(trans, i)
+               if (i->iter->flags & BTREE_ITER_IS_EXTENTS) {
+                       struct bpos start = bkey_start_pos(&i->k->k);
+
+                       while (i + 1 < trans->updates + trans->nr_updates &&
+                              i[0].iter->btree_id == i[1].iter->btree_id &&
+                              !bkey_cmp(i[0].k->k.p, bkey_start_pos(&i[1].k->k)))
+                               i++;
+
+                       ret = extent_handle_overwrites(trans, i->iter->btree_id,
+                                                      start, i->k->k.p);
+                       if (ret)
+                               goto out;
+               }
+
+       trans_for_each_update(trans, i) {
+               if (i->iter->flags & BTREE_ITER_IS_EXTENTS) {
+                       ret = extent_update_to_keys(trans, i->iter, i->k);
+                       if (ret)
+                               goto out;
+               } else {
+                       bch2_trans_update2(trans, i->iter, i->k);
+               }
+       }
+
+       trans_for_each_update2(trans, i) {
+               BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK);
+               BUG_ON(i->iter->locks_want < 1);
+
+               u64s = jset_u64s(i->k->k.u64s);
+               if (btree_iter_type(i->iter) == BTREE_ITER_CACHED &&
+                   likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)))
+                       trans->journal_preres_u64s += u64s;
+               trans->journal_u64s += u64s;
+       }
+retry:
+       memset(&trans->journal_res, 0, sizeof(trans->journal_res));
+
+       ret = do_bch2_trans_commit(trans, &i);
+
+       /* make sure we didn't drop or screw up locks: */
+       bch2_btree_trans_verify_locks(trans);
+
+       if (ret)
+               goto err;
+
+       trans_for_each_iter(trans, iter)
+               if ((trans->iters_live & (1ULL << iter->idx)) &&
+                   (iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT)) {
+                       if (trans->flags & BTREE_INSERT_NOUNLOCK)
+                               bch2_btree_iter_set_pos_same_leaf(iter, iter->pos_after_commit);
+                       else
+                               bch2_btree_iter_set_pos(iter, iter->pos_after_commit);
+               }
+out:
+       bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
+
+       if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
+               percpu_ref_put(&trans->c->writes);
+out_noupdates:
+       bch2_trans_reset(trans, !ret ? TRANS_RESET_NOTRAVERSE : 0);
+
+       return ret;
+err:
+       ret = bch2_trans_commit_error(trans, i, ret);
+       if (ret)
+               goto out;
+
+       goto retry;
+}
+
+int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
+                     struct bkey_i *k, enum btree_trigger_flags flags)
+{
+       struct btree_insert_entry *i, n = (struct btree_insert_entry) {
+               .trigger_flags = flags, .iter = iter, .k = k
+       };
+
+       EBUG_ON(bkey_cmp(iter->pos,
+                        (iter->flags & BTREE_ITER_IS_EXTENTS)
+                        ? bkey_start_pos(&k->k)
+                        : k->k.p));
+
+       iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
+
+       if (btree_node_type_is_extents(iter->btree_id)) {
+               iter->pos_after_commit = k->k.p;
+               iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT;
+       }
+
+       /*
+        * Pending updates are kept sorted: first, find position of new update:
+        */
+       trans_for_each_update(trans, i)
+               if (btree_iter_cmp(iter, i->iter) <= 0)
+                       break;
+
+       /*
+        * Now delete/trim any updates the new update overwrites:
+        */
+       if (i > trans->updates &&
+           i[-1].iter->btree_id == iter->btree_id &&
+           bkey_cmp(iter->pos, i[-1].k->k.p) < 0)
+               bch2_cut_back(n.iter->pos, i[-1].k);
+
+       while (i < trans->updates + trans->nr_updates &&
+              iter->btree_id == i->iter->btree_id &&
+              bkey_cmp(n.k->k.p, i->k->k.p) >= 0)
+               array_remove_item(trans->updates, trans->nr_updates,
+                                 i - trans->updates);
+
+       if (i < trans->updates + trans->nr_updates &&
+           iter->btree_id == i->iter->btree_id &&
+           bkey_cmp(n.k->k.p, i->iter->pos) > 0) {
+               /*
+                * When we have an extent that overwrites the start of another
+                * update, trimming that extent will mean the iterator's
+                * position has to change since the iterator position has to
+                * match the extent's start pos - but we don't want to change
+                * the iterator pos if some other code is using it, so we may
+                * need to clone it:
+                */
+               if (trans->iters_live & (1ULL << i->iter->idx)) {
+                       i->iter = bch2_trans_copy_iter(trans, i->iter);
+                       if (IS_ERR(i->iter)) {
+                               trans->need_reset = true;
+                               return PTR_ERR(i->iter);
+                       }
+
+                       i->iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
+                       bch2_trans_iter_put(trans, i->iter);
+               }
+
+               bch2_cut_front(n.k->k.p, i->k);
+               bch2_btree_iter_set_pos(i->iter, n.k->k.p);
+       }
+
+       EBUG_ON(trans->nr_updates >= trans->nr_iters);
+
+       array_insert_item(trans->updates, trans->nr_updates,
+                         i - trans->updates, n);
+       return 0;
+}
+
+int __bch2_btree_insert(struct btree_trans *trans,
+                       enum btree_id id, struct bkey_i *k)
+{
+       struct btree_iter *iter;
+       int ret;
+
+       iter = bch2_trans_get_iter(trans, id, bkey_start_pos(&k->k),
+                                  BTREE_ITER_INTENT);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter);
+
+       ret   = bch2_btree_iter_traverse(iter) ?:
+               bch2_trans_update(trans, iter, k, 0);
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+/**
+ * bch2_btree_insert - insert keys into the extent btree
+ * @c:                 pointer to struct bch_fs
+ * @id:                        btree to insert into
+ * @insert_keys:       list of keys to insert
+ * @hook:              insert callback
+ */
+int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
+                     struct bkey_i *k,
+                     struct disk_reservation *disk_res,
+                     u64 *journal_seq, int flags)
+{
+       return bch2_trans_do(c, disk_res, journal_seq, flags,
+                            __bch2_btree_insert(&trans, id, k));
+}
+
+int bch2_btree_delete_at_range(struct btree_trans *trans,
+                              struct btree_iter *iter,
+                              struct bpos end,
+                              u64 *journal_seq)
+{
+       struct bkey_s_c k;
+       int ret = 0;
+retry:
+       while ((k = bch2_btree_iter_peek(iter)).k &&
+              !(ret = bkey_err(k)) &&
+              bkey_cmp(iter->pos, end) < 0) {
+               struct bkey_i delete;
+
+               bch2_trans_begin(trans);
+
+               bkey_init(&delete.k);
+
+               /*
+                * For extents, iter.pos won't necessarily be the same as
+                * bkey_start_pos(k.k) (for non extents they always will be the
+                * same). It's important that we delete starting from iter.pos
+                * because the range we want to delete could start in the middle
+                * of k.
+                *
+                * (bch2_btree_iter_peek() does guarantee that iter.pos >=
+                * bkey_start_pos(k.k)).
+                */
+               delete.k.p = iter->pos;
+
+               if (btree_node_type_is_extents(iter->btree_id)) {
+                       unsigned max_sectors =
+                               KEY_SIZE_MAX & (~0 << trans->c->block_bits);
+
+                       /* create the biggest key we can */
+                       bch2_key_resize(&delete.k, max_sectors);
+                       bch2_cut_back(end, &delete);
+
+                       ret = bch2_extent_trim_atomic(&delete, iter);
+                       if (ret)
+                               break;
+               }
+
+               bch2_trans_update(trans, iter, &delete, 0);
+               ret = bch2_trans_commit(trans, NULL, journal_seq,
+                                       BTREE_INSERT_NOFAIL);
+               if (ret)
+                       break;
+
+               bch2_trans_cond_resched(trans);
+       }
+
+       if (ret == -EINTR) {
+               ret = 0;
+               goto retry;
+       }
+
+       return ret;
+
+}
+
+int bch2_btree_delete_at(struct btree_trans *trans,
+                        struct btree_iter *iter, unsigned flags)
+{
+       struct bkey_i k;
+
+       bkey_init(&k.k);
+       k.k.p = iter->pos;
+
+       bch2_trans_update(trans, iter, &k, 0);
+       return bch2_trans_commit(trans, NULL, NULL,
+                                BTREE_INSERT_NOFAIL|
+                                BTREE_INSERT_USE_RESERVE|flags);
+}
+
+/*
+ * bch_btree_delete_range - delete everything within a given range
+ *
+ * Range is a half open interval - [start, end)
+ */
+int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
+                           struct bpos start, struct bpos end,
+                           u64 *journal_seq)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       int ret = 0;
+
+       /*
+        * XXX: whether we need mem/more iters depends on whether this btree id
+        * has triggers
+        */
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512);
+
+       iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
+
+       ret = bch2_btree_delete_at_range(&trans, iter, end, journal_seq);
+       ret = bch2_trans_exit(&trans) ?: ret;
+
+       BUG_ON(ret == -EINTR);
+       return ret;
+}
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
new file mode 100644 (file)
index 0000000..c3fc3ab
--- /dev/null
@@ -0,0 +1,2147 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Code for manipulating bucket marks for garbage collection.
+ *
+ * Copyright 2014 Datera, Inc.
+ *
+ * Bucket states:
+ * - free bucket: mark == 0
+ *   The bucket contains no data and will not be read
+ *
+ * - allocator bucket: owned_by_allocator == 1
+ *   The bucket is on a free list, or it is an open bucket
+ *
+ * - cached bucket: owned_by_allocator == 0 &&
+ *                  dirty_sectors == 0 &&
+ *                  cached_sectors > 0
+ *   The bucket contains data but may be safely discarded as there are
+ *   enough replicas of the data on other cache devices, or it has been
+ *   written back to the backing device
+ *
+ * - dirty bucket: owned_by_allocator == 0 &&
+ *                 dirty_sectors > 0
+ *   The bucket contains data that we must not discard (either only copy,
+ *   or one of the 'main copies' for data requiring multiple replicas)
+ *
+ * - metadata bucket: owned_by_allocator == 0 && is_metadata == 1
+ *   This is a btree node, journal or gen/prio bucket
+ *
+ * Lifecycle:
+ *
+ * bucket invalidated => bucket on freelist => open bucket =>
+ *     [dirty bucket =>] cached bucket => bucket invalidated => ...
+ *
+ * Note that cache promotion can skip the dirty bucket step, as data
+ * is copied from a deeper tier to a shallower tier, onto a cached
+ * bucket.
+ * Note also that a cached bucket can spontaneously become dirty --
+ * see below.
+ *
+ * Only a traversal of the key space can determine whether a bucket is
+ * truly dirty or cached.
+ *
+ * Transitions:
+ *
+ * - free => allocator: bucket was invalidated
+ * - cached => allocator: bucket was invalidated
+ *
+ * - allocator => dirty: open bucket was filled up
+ * - allocator => cached: open bucket was filled up
+ * - allocator => metadata: metadata was allocated
+ *
+ * - dirty => cached: dirty sectors were copied to a deeper tier
+ * - dirty => free: dirty sectors were overwritten or moved (copy gc)
+ * - cached => free: cached sectors were overwritten
+ *
+ * - metadata => free: metadata was freed
+ *
+ * Oddities:
+ * - cached => dirty: a device was removed so formerly replicated data
+ *                    is no longer sufficiently replicated
+ * - free => cached: cannot happen
+ * - free => dirty: cannot happen
+ * - free => metadata: cannot happen
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "bset.h"
+#include "btree_gc.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "ec.h"
+#include "error.h"
+#include "movinggc.h"
+#include "replicas.h"
+
+#include <linux/preempt.h>
+#include <trace/events/bcachefs.h>
+
+/*
+ * Clear journal_seq_valid for buckets for which it's not needed, to prevent
+ * wraparound:
+ */
+void bch2_bucket_seq_cleanup(struct bch_fs *c)
+{
+       u64 journal_seq = atomic64_read(&c->journal.seq);
+       u16 last_seq_ondisk = c->journal.last_seq_ondisk;
+       struct bch_dev *ca;
+       struct bucket_array *buckets;
+       struct bucket *g;
+       struct bucket_mark m;
+       unsigned i;
+
+       if (journal_seq - c->last_bucket_seq_cleanup <
+           (1U << (BUCKET_JOURNAL_SEQ_BITS - 2)))
+               return;
+
+       c->last_bucket_seq_cleanup = journal_seq;
+
+       for_each_member_device(ca, c, i) {
+               down_read(&ca->bucket_lock);
+               buckets = bucket_array(ca);
+
+               for_each_bucket(g, buckets) {
+                       bucket_cmpxchg(g, m, ({
+                               if (!m.journal_seq_valid ||
+                                   bucket_needs_journal_commit(m, last_seq_ondisk))
+                                       break;
+
+                               m.journal_seq_valid = 0;
+                       }));
+               }
+               up_read(&ca->bucket_lock);
+       }
+}
+
+void bch2_fs_usage_initialize(struct bch_fs *c)
+{
+       struct bch_fs_usage *usage;
+       unsigned i;
+
+       percpu_down_write(&c->mark_lock);
+       usage = c->usage_base;
+
+       bch2_fs_usage_acc_to_base(c, 0);
+       bch2_fs_usage_acc_to_base(c, 1);
+
+       for (i = 0; i < BCH_REPLICAS_MAX; i++)
+               usage->reserved += usage->persistent_reserved[i];
+
+       for (i = 0; i < c->replicas.nr; i++) {
+               struct bch_replicas_entry *e =
+                       cpu_replicas_entry(&c->replicas, i);
+
+               switch (e->data_type) {
+               case BCH_DATA_btree:
+                       usage->btree    += usage->replicas[i];
+                       break;
+               case BCH_DATA_user:
+                       usage->data     += usage->replicas[i];
+                       break;
+               case BCH_DATA_cached:
+                       usage->cached   += usage->replicas[i];
+                       break;
+               }
+       }
+
+       percpu_up_write(&c->mark_lock);
+}
+
+void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage)
+{
+       if (fs_usage == c->usage_scratch)
+               mutex_unlock(&c->usage_scratch_lock);
+       else
+               kfree(fs_usage);
+}
+
+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c)
+{
+       struct bch_fs_usage *ret;
+       unsigned bytes = fs_usage_u64s(c) * sizeof(u64);
+
+       ret = kzalloc(bytes, GFP_NOWAIT|__GFP_NOWARN);
+       if (ret)
+               return ret;
+
+       if (mutex_trylock(&c->usage_scratch_lock))
+               goto out_pool;
+
+       ret = kzalloc(bytes, GFP_NOFS);
+       if (ret)
+               return ret;
+
+       mutex_lock(&c->usage_scratch_lock);
+out_pool:
+       ret = c->usage_scratch;
+       memset(ret, 0, bytes);
+       return ret;
+}
+
+struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
+{
+       struct bch_dev_usage ret;
+
+       memset(&ret, 0, sizeof(ret));
+       acc_u64s_percpu((u64 *) &ret,
+                       (u64 __percpu *) ca->usage[0],
+                       sizeof(ret) / sizeof(u64));
+
+       return ret;
+}
+
+static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
+                                               unsigned journal_seq,
+                                               bool gc)
+{
+       return this_cpu_ptr(gc
+                           ? c->usage_gc
+                           : c->usage[journal_seq & 1]);
+}
+
+u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
+{
+       ssize_t offset = v - (u64 *) c->usage_base;
+       unsigned seq;
+       u64 ret;
+
+       BUG_ON(offset < 0 || offset >= fs_usage_u64s(c));
+       percpu_rwsem_assert_held(&c->mark_lock);
+
+       do {
+               seq = read_seqcount_begin(&c->usage_lock);
+               ret = *v +
+                       percpu_u64_get((u64 __percpu *) c->usage[0] + offset) +
+                       percpu_u64_get((u64 __percpu *) c->usage[1] + offset);
+       } while (read_seqcount_retry(&c->usage_lock, seq));
+
+       return ret;
+}
+
+struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
+{
+       struct bch_fs_usage *ret;
+       unsigned seq, v, u64s = fs_usage_u64s(c);
+retry:
+       ret = kmalloc(u64s * sizeof(u64), GFP_NOFS);
+       if (unlikely(!ret))
+               return NULL;
+
+       percpu_down_read(&c->mark_lock);
+
+       v = fs_usage_u64s(c);
+       if (unlikely(u64s != v)) {
+               u64s = v;
+               percpu_up_read(&c->mark_lock);
+               kfree(ret);
+               goto retry;
+       }
+
+       do {
+               seq = read_seqcount_begin(&c->usage_lock);
+               memcpy(ret, c->usage_base, u64s * sizeof(u64));
+               acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[0], u64s);
+               acc_u64s_percpu((u64 *) ret, (u64 __percpu *) c->usage[1], u64s);
+       } while (read_seqcount_retry(&c->usage_lock, seq));
+
+       return ret;
+}
+
+void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx)
+{
+       unsigned u64s = fs_usage_u64s(c);
+
+       BUG_ON(idx >= 2);
+
+       preempt_disable();
+       write_seqcount_begin(&c->usage_lock);
+
+       acc_u64s_percpu((u64 *) c->usage_base,
+                       (u64 __percpu *) c->usage[idx], u64s);
+       percpu_memset(c->usage[idx], 0, u64s * sizeof(u64));
+
+       write_seqcount_end(&c->usage_lock);
+       preempt_enable();
+}
+
+void bch2_fs_usage_to_text(struct printbuf *out,
+                          struct bch_fs *c,
+                          struct bch_fs_usage *fs_usage)
+{
+       unsigned i;
+
+       pr_buf(out, "capacity:\t\t\t%llu\n", c->capacity);
+
+       pr_buf(out, "hidden:\t\t\t\t%llu\n",
+              fs_usage->hidden);
+       pr_buf(out, "data:\t\t\t\t%llu\n",
+              fs_usage->data);
+       pr_buf(out, "cached:\t\t\t\t%llu\n",
+              fs_usage->cached);
+       pr_buf(out, "reserved:\t\t\t%llu\n",
+              fs_usage->reserved);
+       pr_buf(out, "nr_inodes:\t\t\t%llu\n",
+              fs_usage->nr_inodes);
+       pr_buf(out, "online reserved:\t\t%llu\n",
+              fs_usage->online_reserved);
+
+       for (i = 0;
+            i < ARRAY_SIZE(fs_usage->persistent_reserved);
+            i++) {
+               pr_buf(out, "%u replicas:\n", i + 1);
+               pr_buf(out, "\treserved:\t\t%llu\n",
+                      fs_usage->persistent_reserved[i]);
+       }
+
+       for (i = 0; i < c->replicas.nr; i++) {
+               struct bch_replicas_entry *e =
+                       cpu_replicas_entry(&c->replicas, i);
+
+               pr_buf(out, "\t");
+               bch2_replicas_entry_to_text(out, e);
+               pr_buf(out, ":\t%llu\n", fs_usage->replicas[i]);
+       }
+}
+
+#define RESERVE_FACTOR 6
+
+static u64 reserve_factor(u64 r)
+{
+       return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR);
+}
+
+static u64 avail_factor(u64 r)
+{
+       return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
+}
+
+u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
+{
+       return min(fs_usage->hidden +
+                  fs_usage->btree +
+                  fs_usage->data +
+                  reserve_factor(fs_usage->reserved +
+                                 fs_usage->online_reserved),
+                  c->capacity);
+}
+
+static struct bch_fs_usage_short
+__bch2_fs_usage_read_short(struct bch_fs *c)
+{
+       struct bch_fs_usage_short ret;
+       u64 data, reserved;
+
+       ret.capacity = c->capacity -
+               bch2_fs_usage_read_one(c, &c->usage_base->hidden);
+
+       data            = bch2_fs_usage_read_one(c, &c->usage_base->data) +
+               bch2_fs_usage_read_one(c, &c->usage_base->btree);
+       reserved        = bch2_fs_usage_read_one(c, &c->usage_base->reserved) +
+               bch2_fs_usage_read_one(c, &c->usage_base->online_reserved);
+
+       ret.used        = min(ret.capacity, data + reserve_factor(reserved));
+       ret.free        = ret.capacity - ret.used;
+
+       ret.nr_inodes   = bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes);
+
+       return ret;
+}
+
+struct bch_fs_usage_short
+bch2_fs_usage_read_short(struct bch_fs *c)
+{
+       struct bch_fs_usage_short ret;
+
+       percpu_down_read(&c->mark_lock);
+       ret = __bch2_fs_usage_read_short(c);
+       percpu_up_read(&c->mark_lock);
+
+       return ret;
+}
+
+static inline int is_unavailable_bucket(struct bucket_mark m)
+{
+       return !is_available_bucket(m);
+}
+
+static inline int is_fragmented_bucket(struct bucket_mark m,
+                                      struct bch_dev *ca)
+{
+       if (!m.owned_by_allocator &&
+           m.data_type == BCH_DATA_user &&
+           bucket_sectors_used(m))
+               return max_t(int, 0, (int) ca->mi.bucket_size -
+                            bucket_sectors_used(m));
+       return 0;
+}
+
+static inline int bucket_stripe_sectors(struct bucket_mark m)
+{
+       return m.stripe ? m.dirty_sectors : 0;
+}
+
+static inline enum bch_data_type bucket_type(struct bucket_mark m)
+{
+       return m.cached_sectors && !m.dirty_sectors
+               ? BCH_DATA_cached
+               : m.data_type;
+}
+
+static bool bucket_became_unavailable(struct bucket_mark old,
+                                     struct bucket_mark new)
+{
+       return is_available_bucket(old) &&
+              !is_available_bucket(new);
+}
+
+int bch2_fs_usage_apply(struct bch_fs *c,
+                       struct bch_fs_usage *fs_usage,
+                       struct disk_reservation *disk_res,
+                       unsigned journal_seq)
+{
+       s64 added = fs_usage->data + fs_usage->reserved;
+       s64 should_not_have_added;
+       int ret = 0;
+
+       percpu_rwsem_assert_held(&c->mark_lock);
+
+       /*
+        * Not allowed to reduce sectors_available except by getting a
+        * reservation:
+        */
+       should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0);
+       if (WARN_ONCE(should_not_have_added > 0,
+                     "disk usage increased by %lli without a reservation",
+                     should_not_have_added)) {
+               atomic64_sub(should_not_have_added, &c->sectors_available);
+               added -= should_not_have_added;
+               ret = -1;
+       }
+
+       if (added > 0) {
+               disk_res->sectors               -= added;
+               fs_usage->online_reserved       -= added;
+       }
+
+       preempt_disable();
+       acc_u64s((u64 *) fs_usage_ptr(c, journal_seq, false),
+                (u64 *) fs_usage, fs_usage_u64s(c));
+       preempt_enable();
+
+       return ret;
+}
+
+static inline void account_bucket(struct bch_fs_usage *fs_usage,
+                                 struct bch_dev_usage *dev_usage,
+                                 enum bch_data_type type,
+                                 int nr, s64 size)
+{
+       if (type == BCH_DATA_sb || type == BCH_DATA_journal)
+               fs_usage->hidden        += size;
+
+       dev_usage->buckets[type]        += nr;
+}
+
+static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
+                                 struct bch_fs_usage *fs_usage,
+                                 struct bucket_mark old, struct bucket_mark new,
+                                 bool gc)
+{
+       struct bch_dev_usage *u;
+
+       percpu_rwsem_assert_held(&c->mark_lock);
+
+       preempt_disable();
+       u = this_cpu_ptr(ca->usage[gc]);
+
+       if (bucket_type(old))
+               account_bucket(fs_usage, u, bucket_type(old),
+                              -1, -ca->mi.bucket_size);
+
+       if (bucket_type(new))
+               account_bucket(fs_usage, u, bucket_type(new),
+                              1, ca->mi.bucket_size);
+
+       u->buckets_alloc +=
+               (int) new.owned_by_allocator - (int) old.owned_by_allocator;
+       u->buckets_unavailable +=
+               is_unavailable_bucket(new) - is_unavailable_bucket(old);
+
+       u->buckets_ec += (int) new.stripe - (int) old.stripe;
+       u->sectors_ec += bucket_stripe_sectors(new) -
+                        bucket_stripe_sectors(old);
+
+       u->sectors[old.data_type] -= old.dirty_sectors;
+       u->sectors[new.data_type] += new.dirty_sectors;
+       u->sectors[BCH_DATA_cached] +=
+               (int) new.cached_sectors - (int) old.cached_sectors;
+       u->sectors_fragmented +=
+               is_fragmented_bucket(new, ca) - is_fragmented_bucket(old, ca);
+       preempt_enable();
+
+       if (!is_available_bucket(old) && is_available_bucket(new))
+               bch2_wake_allocator(ca);
+}
+
+__flatten
+void bch2_dev_usage_from_buckets(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       struct bucket_mark old = { .v.counter = 0 };
+       struct bucket_array *buckets;
+       struct bucket *g;
+       unsigned i;
+       int cpu;
+
+       c->usage_base->hidden = 0;
+
+       for_each_member_device(ca, c, i) {
+               for_each_possible_cpu(cpu)
+                       memset(per_cpu_ptr(ca->usage[0], cpu), 0,
+                              sizeof(*ca->usage[0]));
+
+               buckets = bucket_array(ca);
+
+               for_each_bucket(g, buckets)
+                       bch2_dev_usage_update(c, ca, c->usage_base,
+                                             old, g->mark, false);
+       }
+}
+
+static inline int update_replicas(struct bch_fs *c,
+                                 struct bch_fs_usage *fs_usage,
+                                 struct bch_replicas_entry *r,
+                                 s64 sectors)
+{
+       int idx = bch2_replicas_entry_idx(c, r);
+
+       if (idx < 0)
+               return -1;
+
+       if (!fs_usage)
+               return 0;
+
+       switch (r->data_type) {
+       case BCH_DATA_btree:
+               fs_usage->btree         += sectors;
+               break;
+       case BCH_DATA_user:
+               fs_usage->data          += sectors;
+               break;
+       case BCH_DATA_cached:
+               fs_usage->cached        += sectors;
+               break;
+       }
+       fs_usage->replicas[idx]         += sectors;
+       return 0;
+}
+
+static inline void update_cached_sectors(struct bch_fs *c,
+                                        struct bch_fs_usage *fs_usage,
+                                        unsigned dev, s64 sectors)
+{
+       struct bch_replicas_padded r;
+
+       bch2_replicas_entry_cached(&r.e, dev);
+
+       update_replicas(c, fs_usage, &r.e, sectors);
+}
+
+static struct replicas_delta_list *
+replicas_deltas_realloc(struct btree_trans *trans, unsigned more)
+{
+       struct replicas_delta_list *d = trans->fs_usage_deltas;
+       unsigned new_size = d ? (d->size + more) * 2 : 128;
+
+       if (!d || d->used + more > d->size) {
+               d = krealloc(d, sizeof(*d) + new_size, GFP_NOIO|__GFP_ZERO);
+               BUG_ON(!d);
+
+               d->size = new_size;
+               trans->fs_usage_deltas = d;
+       }
+       return d;
+}
+
+static inline void update_replicas_list(struct btree_trans *trans,
+                                       struct bch_replicas_entry *r,
+                                       s64 sectors)
+{
+       struct replicas_delta_list *d;
+       struct replicas_delta *n;
+       unsigned b;
+
+       if (!sectors)
+               return;
+
+       b = replicas_entry_bytes(r) + 8;
+       d = replicas_deltas_realloc(trans, b);
+
+       n = (void *) d->d + d->used;
+       n->delta = sectors;
+       memcpy(&n->r, r, replicas_entry_bytes(r));
+       d->used += b;
+}
+
+static inline void update_cached_sectors_list(struct btree_trans *trans,
+                                             unsigned dev, s64 sectors)
+{
+       struct bch_replicas_padded r;
+
+       bch2_replicas_entry_cached(&r.e, dev);
+
+       update_replicas_list(trans, &r.e, sectors);
+}
+
+static inline struct replicas_delta *
+replicas_delta_next(struct replicas_delta *d)
+{
+       return (void *) d + replicas_entry_bytes(&d->r) + 8;
+}
+
+int bch2_replicas_delta_list_apply(struct bch_fs *c,
+                                  struct bch_fs_usage *fs_usage,
+                                  struct replicas_delta_list *r)
+{
+       struct replicas_delta *d = r->d;
+       struct replicas_delta *top = (void *) r->d + r->used;
+       unsigned i;
+
+       for (d = r->d; d != top; d = replicas_delta_next(d))
+               if (update_replicas(c, fs_usage, &d->r, d->delta)) {
+                       top = d;
+                       goto unwind;
+               }
+
+       if (!fs_usage)
+               return 0;
+
+       fs_usage->nr_inodes += r->nr_inodes;
+
+       for (i = 0; i < BCH_REPLICAS_MAX; i++) {
+               fs_usage->reserved += r->persistent_reserved[i];
+               fs_usage->persistent_reserved[i] += r->persistent_reserved[i];
+       }
+
+       return 0;
+unwind:
+       for (d = r->d; d != top; d = replicas_delta_next(d))
+               update_replicas(c, fs_usage, &d->r, -d->delta);
+       return -1;
+}
+
+#define do_mark_fn(fn, c, pos, flags, ...)                             \
+({                                                                     \
+       int gc, ret = 0;                                                \
+                                                                       \
+       percpu_rwsem_assert_held(&c->mark_lock);                        \
+                                                                       \
+       for (gc = 0; gc < 2 && !ret; gc++)                              \
+               if (!gc == !(flags & BTREE_TRIGGER_GC) ||               \
+                   (gc && gc_visited(c, pos)))                         \
+                       ret = fn(c, __VA_ARGS__, gc);                   \
+       ret;                                                            \
+})
+
+static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
+                                   size_t b, struct bucket_mark *ret,
+                                   bool gc)
+{
+       struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc);
+       struct bucket *g = __bucket(ca, b, gc);
+       struct bucket_mark old, new;
+
+       old = bucket_cmpxchg(g, new, ({
+               BUG_ON(!is_available_bucket(new));
+
+               new.owned_by_allocator  = true;
+               new.data_type           = 0;
+               new.cached_sectors      = 0;
+               new.dirty_sectors       = 0;
+               new.gen++;
+       }));
+
+       bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
+
+       if (old.cached_sectors)
+               update_cached_sectors(c, fs_usage, ca->dev_idx,
+                                     -((s64) old.cached_sectors));
+
+       if (!gc)
+               *ret = old;
+       return 0;
+}
+
+void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
+                           size_t b, struct bucket_mark *old)
+{
+       do_mark_fn(__bch2_invalidate_bucket, c, gc_phase(GC_PHASE_START), 0,
+                  ca, b, old);
+
+       if (!old->owned_by_allocator && old->cached_sectors)
+               trace_invalidate(ca, bucket_to_sector(ca, b),
+                                old->cached_sectors);
+}
+
+static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
+                                   size_t b, bool owned_by_allocator,
+                                   bool gc)
+{
+       struct bch_fs_usage *fs_usage = fs_usage_ptr(c, 0, gc);
+       struct bucket *g = __bucket(ca, b, gc);
+       struct bucket_mark old, new;
+
+       old = bucket_cmpxchg(g, new, ({
+               new.owned_by_allocator  = owned_by_allocator;
+       }));
+
+       bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
+
+       BUG_ON(!gc &&
+              !owned_by_allocator && !old.owned_by_allocator);
+
+       return 0;
+}
+
+void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
+                           size_t b, bool owned_by_allocator,
+                           struct gc_pos pos, unsigned flags)
+{
+       preempt_disable();
+
+       do_mark_fn(__bch2_mark_alloc_bucket, c, pos, flags,
+                  ca, b, owned_by_allocator);
+
+       preempt_enable();
+}
+
+static int bch2_mark_alloc(struct bch_fs *c,
+                          struct bkey_s_c old, struct bkey_s_c new,
+                          struct bch_fs_usage *fs_usage,
+                          u64 journal_seq, unsigned flags)
+{
+       bool gc = flags & BTREE_TRIGGER_GC;
+       struct bkey_alloc_unpacked u;
+       struct bch_dev *ca;
+       struct bucket *g;
+       struct bucket_mark old_m, m;
+
+       /* We don't do anything for deletions - do we?: */
+       if (new.k->type != KEY_TYPE_alloc)
+               return 0;
+
+       /*
+        * alloc btree is read in by bch2_alloc_read, not gc:
+        */
+       if ((flags & BTREE_TRIGGER_GC) &&
+           !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
+               return 0;
+
+       ca = bch_dev_bkey_exists(c, new.k->p.inode);
+
+       if (new.k->p.offset >= ca->mi.nbuckets)
+               return 0;
+
+       g = __bucket(ca, new.k->p.offset, gc);
+       u = bch2_alloc_unpack(new);
+
+       old_m = bucket_cmpxchg(g, m, ({
+               m.gen                   = u.gen;
+               m.data_type             = u.data_type;
+               m.dirty_sectors         = u.dirty_sectors;
+               m.cached_sectors        = u.cached_sectors;
+
+               if (journal_seq) {
+                       m.journal_seq_valid     = 1;
+                       m.journal_seq           = journal_seq;
+               }
+       }));
+
+       bch2_dev_usage_update(c, ca, fs_usage, old_m, m, gc);
+
+       g->io_time[READ]        = u.read_time;
+       g->io_time[WRITE]       = u.write_time;
+       g->oldest_gen           = u.oldest_gen;
+       g->gen_valid            = 1;
+
+       /*
+        * need to know if we're getting called from the invalidate path or
+        * not:
+        */
+
+       if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
+           old_m.cached_sectors) {
+               update_cached_sectors(c, fs_usage, ca->dev_idx,
+                                     -old_m.cached_sectors);
+               trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
+                                old_m.cached_sectors);
+       }
+
+       return 0;
+}
+
+#define checked_add(a, b)                                      \
+({                                                             \
+       unsigned _res = (unsigned) (a) + (b);                   \
+       bool overflow = _res > U16_MAX;                         \
+       if (overflow)                                           \
+               _res = U16_MAX;                                 \
+       (a) = _res;                                             \
+       overflow;                                               \
+})
+
+static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
+                                      size_t b, enum bch_data_type data_type,
+                                      unsigned sectors, bool gc)
+{
+       struct bucket *g = __bucket(ca, b, gc);
+       struct bucket_mark old, new;
+       bool overflow;
+
+       BUG_ON(data_type != BCH_DATA_sb &&
+              data_type != BCH_DATA_journal);
+
+       old = bucket_cmpxchg(g, new, ({
+               new.data_type   = data_type;
+               overflow = checked_add(new.dirty_sectors, sectors);
+       }));
+
+       bch2_fs_inconsistent_on(old.data_type &&
+                               old.data_type != data_type, c,
+               "different types of data in same bucket: %s, %s",
+               bch2_data_types[old.data_type],
+               bch2_data_types[data_type]);
+
+       bch2_fs_inconsistent_on(overflow, c,
+               "bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > U16_MAX",
+               ca->dev_idx, b, new.gen,
+               bch2_data_types[old.data_type ?: data_type],
+               old.dirty_sectors, sectors);
+
+       if (c)
+               bch2_dev_usage_update(c, ca, fs_usage_ptr(c, 0, gc),
+                                     old, new, gc);
+
+       return 0;
+}
+
+void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
+                              size_t b, enum bch_data_type type,
+                              unsigned sectors, struct gc_pos pos,
+                              unsigned flags)
+{
+       BUG_ON(type != BCH_DATA_sb &&
+              type != BCH_DATA_journal);
+
+       preempt_disable();
+
+       if (likely(c)) {
+               do_mark_fn(__bch2_mark_metadata_bucket, c, pos, flags,
+                          ca, b, type, sectors);
+       } else {
+               __bch2_mark_metadata_bucket(c, ca, b, type, sectors, 0);
+       }
+
+       preempt_enable();
+}
+
+static s64 disk_sectors_scaled(unsigned n, unsigned d, unsigned sectors)
+{
+       return DIV_ROUND_UP(sectors * n, d);
+}
+
+static s64 __ptr_disk_sectors_delta(unsigned old_size,
+                                   unsigned offset, s64 delta,
+                                   unsigned flags,
+                                   unsigned n, unsigned d)
+{
+       BUG_ON(!n || !d);
+
+       if (flags & BTREE_TRIGGER_OVERWRITE_SPLIT) {
+               BUG_ON(offset + -delta > old_size);
+
+               return -disk_sectors_scaled(n, d, old_size) +
+                       disk_sectors_scaled(n, d, offset) +
+                       disk_sectors_scaled(n, d, old_size - offset + delta);
+       } else if (flags & BTREE_TRIGGER_OVERWRITE) {
+               BUG_ON(offset + -delta > old_size);
+
+               return -disk_sectors_scaled(n, d, old_size) +
+                       disk_sectors_scaled(n, d, old_size + delta);
+       } else {
+               return  disk_sectors_scaled(n, d, delta);
+       }
+}
+
+static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
+                                 unsigned offset, s64 delta,
+                                 unsigned flags)
+{
+       return __ptr_disk_sectors_delta(p.crc.live_size,
+                                       offset, delta, flags,
+                                       p.crc.compressed_size,
+                                       p.crc.uncompressed_size);
+}
+
+static void bucket_set_stripe(struct bch_fs *c,
+                             const struct bch_extent_ptr *ptr,
+                             struct bch_fs_usage *fs_usage,
+                             u64 journal_seq,
+                             unsigned flags,
+                             bool enabled)
+{
+       bool gc = flags & BTREE_TRIGGER_GC;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+       struct bucket *g = PTR_BUCKET(ca, ptr, gc);
+       struct bucket_mark new, old;
+
+       old = bucket_cmpxchg(g, new, ({
+               new.stripe                      = enabled;
+               if (journal_seq) {
+                       new.journal_seq_valid   = 1;
+                       new.journal_seq         = journal_seq;
+               }
+       }));
+
+       bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
+
+       /*
+        * XXX write repair code for these, flag stripe as possibly bad
+        */
+       if (old.gen != ptr->gen)
+               bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+                             "stripe with stale pointer");
+#if 0
+       /*
+        * We'd like to check for these, but these checks don't work
+        * yet:
+        */
+       if (old.stripe && enabled)
+               bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+                             "multiple stripes using same bucket");
+
+       if (!old.stripe && !enabled)
+               bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+                             "deleting stripe but bucket not marked as stripe bucket");
+#endif
+}
+
+static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k,
+                         struct extent_ptr_decoded p,
+                         s64 sectors, enum bch_data_type ptr_data_type,
+                         u8 bucket_gen, u8 *bucket_data_type,
+                         u16 *dirty_sectors, u16 *cached_sectors)
+{
+       u16 *dst_sectors = !p.ptr.cached
+               ? dirty_sectors
+               : cached_sectors;
+       u16 orig_sectors = *dst_sectors;
+       char buf[200];
+
+       if (gen_after(p.ptr.gen, bucket_gen)) {
+               bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+                       "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
+                       "while marking %s",
+                       p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
+                       bucket_gen,
+                       bch2_data_types[*bucket_data_type ?: ptr_data_type],
+                       p.ptr.gen,
+                       (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
+               return -EIO;
+       }
+
+       if (gen_cmp(bucket_gen, p.ptr.gen) > 96U) {
+               bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+                       "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
+                       "while marking %s",
+                       p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
+                       bucket_gen,
+                       bch2_data_types[*bucket_data_type ?: ptr_data_type],
+                       p.ptr.gen,
+                       (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
+               return -EIO;
+       }
+
+       if (bucket_gen != p.ptr.gen && !p.ptr.cached) {
+               bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+                       "bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n"
+                       "while marking %s",
+                       p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
+                       bucket_gen,
+                       bch2_data_types[*bucket_data_type ?: ptr_data_type],
+                       p.ptr.gen,
+                       (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
+               return -EIO;
+       }
+
+       if (bucket_gen != p.ptr.gen)
+               return 1;
+
+       if (*bucket_data_type && *bucket_data_type != ptr_data_type) {
+               bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+                       "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
+                       "while marking %s",
+                       p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
+                       bucket_gen,
+                       bch2_data_types[*bucket_data_type],
+                       bch2_data_types[ptr_data_type],
+                       (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
+               return -EIO;
+       }
+
+       if (checked_add(*dst_sectors, sectors)) {
+               bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+                       "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n"
+                       "while marking %s",
+                       p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr),
+                       bucket_gen,
+                       bch2_data_types[*bucket_data_type ?: ptr_data_type],
+                       orig_sectors, sectors,
+                       (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
+               return -EIO;
+       }
+
+       *bucket_data_type = *dirty_sectors || *cached_sectors
+               ? ptr_data_type : 0;
+       return 0;
+}
+
+static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k,
+                            struct extent_ptr_decoded p,
+                            s64 sectors, enum bch_data_type data_type,
+                            struct bch_fs_usage *fs_usage,
+                            u64 journal_seq, unsigned flags)
+{
+       bool gc = flags & BTREE_TRIGGER_GC;
+       struct bucket_mark old, new;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+       struct bucket *g = PTR_BUCKET(ca, &p.ptr, gc);
+       u8 bucket_data_type;
+       u64 v;
+       int ret;
+
+       v = atomic64_read(&g->_mark.v);
+       do {
+               new.v.counter = old.v.counter = v;
+               bucket_data_type = new.data_type;
+
+               ret = __mark_pointer(c, k, p, sectors, data_type, new.gen,
+                                    &bucket_data_type,
+                                    &new.dirty_sectors,
+                                    &new.cached_sectors);
+               if (ret)
+                       return ret;
+
+               new.data_type = bucket_data_type;
+
+               if (journal_seq) {
+                       new.journal_seq_valid = 1;
+                       new.journal_seq = journal_seq;
+               }
+
+               if (flags & BTREE_TRIGGER_NOATOMIC) {
+                       g->_mark = new;
+                       break;
+               }
+       } while ((v = atomic64_cmpxchg(&g->_mark.v,
+                             old.v.counter,
+                             new.v.counter)) != old.v.counter);
+
+       bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
+
+       BUG_ON(!gc && bucket_became_unavailable(old, new));
+
+       return 0;
+}
+
+static int bch2_mark_stripe_ptr(struct bch_fs *c,
+                               struct bch_extent_stripe_ptr p,
+                               enum bch_data_type data_type,
+                               struct bch_fs_usage *fs_usage,
+                               s64 sectors, unsigned flags,
+                               struct bch_replicas_padded *r,
+                               unsigned *nr_data,
+                               unsigned *nr_parity)
+{
+       bool gc = flags & BTREE_TRIGGER_GC;
+       struct stripe *m;
+       unsigned i, blocks_nonempty = 0;
+
+       m = genradix_ptr(&c->stripes[gc], p.idx);
+
+       spin_lock(&c->ec_stripes_heap_lock);
+
+       if (!m || !m->alive) {
+               spin_unlock(&c->ec_stripes_heap_lock);
+               bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
+                                   (u64) p.idx);
+               return -EIO;
+       }
+
+       BUG_ON(m->r.e.data_type != data_type);
+
+       *nr_data        = m->nr_blocks - m->nr_redundant;
+       *nr_parity      = m->nr_redundant;
+       *r = m->r;
+
+       m->block_sectors[p.block] += sectors;
+
+       for (i = 0; i < m->nr_blocks; i++)
+               blocks_nonempty += m->block_sectors[i] != 0;
+
+       if (m->blocks_nonempty != blocks_nonempty) {
+               m->blocks_nonempty = blocks_nonempty;
+               if (!gc)
+                       bch2_stripes_heap_update(c, m, p.idx);
+       }
+
+       spin_unlock(&c->ec_stripes_heap_lock);
+
+       return 0;
+}
+
+static int bch2_mark_extent(struct bch_fs *c,
+                           struct bkey_s_c old, struct bkey_s_c new,
+                           unsigned offset, s64 sectors,
+                           enum bch_data_type data_type,
+                           struct bch_fs_usage *fs_usage,
+                           unsigned journal_seq, unsigned flags)
+{
+       struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old;
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       struct bch_replicas_padded r;
+       s64 dirty_sectors = 0;
+       bool stale;
+       int ret;
+
+       r.e.data_type   = data_type;
+       r.e.nr_devs     = 0;
+       r.e.nr_required = 1;
+
+       BUG_ON(!sectors);
+
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+               s64 disk_sectors = data_type == BCH_DATA_btree
+                       ? sectors
+                       : ptr_disk_sectors_delta(p, offset, sectors, flags);
+
+               ret = bch2_mark_pointer(c, k, p, disk_sectors, data_type,
+                                       fs_usage, journal_seq, flags);
+               if (ret < 0)
+                       return ret;
+
+               stale = ret > 0;
+
+               if (p.ptr.cached) {
+                       if (!stale)
+                               update_cached_sectors(c, fs_usage, p.ptr.dev,
+                                                     disk_sectors);
+               } else if (!p.has_ec) {
+                       dirty_sectors          += disk_sectors;
+                       r.e.devs[r.e.nr_devs++] = p.ptr.dev;
+               } else {
+                       struct bch_replicas_padded ec_r;
+                       unsigned nr_data, nr_parity;
+                       s64 parity_sectors;
+
+                       ret = bch2_mark_stripe_ptr(c, p.ec, data_type,
+                                       fs_usage, disk_sectors, flags,
+                                       &ec_r, &nr_data, &nr_parity);
+                       if (ret)
+                               return ret;
+
+                       parity_sectors =
+                               __ptr_disk_sectors_delta(p.crc.live_size,
+                                       offset, sectors, flags,
+                                       p.crc.compressed_size * nr_parity,
+                                       p.crc.uncompressed_size * nr_data);
+
+                       update_replicas(c, fs_usage, &ec_r.e,
+                                       disk_sectors + parity_sectors);
+
+                       /*
+                        * There may be other dirty pointers in this extent, but
+                        * if so they're not required for mounting if we have an
+                        * erasure coded pointer in this extent:
+                        */
+                       r.e.nr_required = 0;
+               }
+       }
+
+       if (r.e.nr_devs)
+               update_replicas(c, fs_usage, &r.e, dirty_sectors);
+
+       return 0;
+}
+
+static int bch2_mark_stripe(struct bch_fs *c,
+                           struct bkey_s_c old, struct bkey_s_c new,
+                           struct bch_fs_usage *fs_usage,
+                           u64 journal_seq, unsigned flags)
+{
+       bool gc = flags & BTREE_TRIGGER_GC;
+       size_t idx = new.k->p.offset;
+       const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
+               ? bkey_s_c_to_stripe(old).v : NULL;
+       const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
+               ? bkey_s_c_to_stripe(new).v : NULL;
+       struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
+       unsigned i;
+
+       if (!m || (old_s && !m->alive)) {
+               bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
+                                   idx);
+               return -1;
+       }
+
+       if (!new_s) {
+               /* Deleting: */
+               for (i = 0; i < old_s->nr_blocks; i++)
+                       bucket_set_stripe(c, old_s->ptrs + i, fs_usage,
+                                         journal_seq, flags, false);
+
+               if (!gc && m->on_heap) {
+                       spin_lock(&c->ec_stripes_heap_lock);
+                       bch2_stripes_heap_del(c, m, idx);
+                       spin_unlock(&c->ec_stripes_heap_lock);
+               }
+
+               memset(m, 0, sizeof(*m));
+       } else {
+               BUG_ON(old_s && new_s->nr_blocks != old_s->nr_blocks);
+               BUG_ON(old_s && new_s->nr_redundant != old_s->nr_redundant);
+
+               for (i = 0; i < new_s->nr_blocks; i++) {
+                       if (!old_s ||
+                           memcmp(new_s->ptrs + i,
+                                  old_s->ptrs + i,
+                                  sizeof(struct bch_extent_ptr))) {
+
+                               if (old_s)
+                                       bucket_set_stripe(c, old_s->ptrs + i, fs_usage,
+                                                         journal_seq, flags, false);
+                               bucket_set_stripe(c, new_s->ptrs + i, fs_usage,
+                                                 journal_seq, flags, true);
+                       }
+               }
+
+               m->alive        = true;
+               m->sectors      = le16_to_cpu(new_s->sectors);
+               m->algorithm    = new_s->algorithm;
+               m->nr_blocks    = new_s->nr_blocks;
+               m->nr_redundant = new_s->nr_redundant;
+
+               bch2_bkey_to_replicas(&m->r.e, new);
+
+               /* gc recalculates these fields: */
+               if (!(flags & BTREE_TRIGGER_GC)) {
+                       m->blocks_nonempty = 0;
+
+                       for (i = 0; i < new_s->nr_blocks; i++) {
+                               m->block_sectors[i] =
+                                       stripe_blockcount_get(new_s, i);
+                               m->blocks_nonempty += !!m->block_sectors[i];
+                       }
+               }
+
+               if (!gc) {
+                       spin_lock(&c->ec_stripes_heap_lock);
+                       bch2_stripes_heap_update(c, m, idx);
+                       spin_unlock(&c->ec_stripes_heap_lock);
+               }
+       }
+
+       return 0;
+}
+
+static int bch2_mark_key_locked(struct bch_fs *c,
+                  struct bkey_s_c old,
+                  struct bkey_s_c new,
+                  unsigned offset, s64 sectors,
+                  struct bch_fs_usage *fs_usage,
+                  u64 journal_seq, unsigned flags)
+{
+       struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old;
+       int ret = 0;
+
+       BUG_ON(!(flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)));
+
+       preempt_disable();
+
+       if (!fs_usage || (flags & BTREE_TRIGGER_GC))
+               fs_usage = fs_usage_ptr(c, journal_seq,
+                                       flags & BTREE_TRIGGER_GC);
+
+       switch (k.k->type) {
+       case KEY_TYPE_alloc:
+               ret = bch2_mark_alloc(c, old, new, fs_usage, journal_seq, flags);
+               break;
+       case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
+               sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
+                       ?  c->opts.btree_node_size
+                       : -c->opts.btree_node_size;
+
+               ret = bch2_mark_extent(c, old, new, offset, sectors,
+                               BCH_DATA_btree, fs_usage, journal_seq, flags);
+               break;
+       case KEY_TYPE_extent:
+       case KEY_TYPE_reflink_v:
+               ret = bch2_mark_extent(c, old, new, offset, sectors,
+                               BCH_DATA_user, fs_usage, journal_seq, flags);
+               break;
+       case KEY_TYPE_stripe:
+               ret = bch2_mark_stripe(c, old, new, fs_usage, journal_seq, flags);
+               break;
+       case KEY_TYPE_inode:
+               if (!(flags & BTREE_TRIGGER_OVERWRITE))
+                       fs_usage->nr_inodes++;
+               else
+                       fs_usage->nr_inodes--;
+               break;
+       case KEY_TYPE_reservation: {
+               unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
+
+               sectors *= replicas;
+               replicas = clamp_t(unsigned, replicas, 1,
+                                  ARRAY_SIZE(fs_usage->persistent_reserved));
+
+               fs_usage->reserved                              += sectors;
+               fs_usage->persistent_reserved[replicas - 1]     += sectors;
+               break;
+       }
+       }
+
+       preempt_enable();
+
+       return ret;
+}
+
+int bch2_mark_key(struct bch_fs *c, struct bkey_s_c new,
+                 unsigned offset, s64 sectors,
+                 struct bch_fs_usage *fs_usage,
+                 u64 journal_seq, unsigned flags)
+{
+       struct bkey deleted;
+       struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
+       int ret;
+
+       bkey_init(&deleted);
+
+       percpu_down_read(&c->mark_lock);
+       ret = bch2_mark_key_locked(c, old, new, offset, sectors,
+                                  fs_usage, journal_seq,
+                                  BTREE_TRIGGER_INSERT|flags);
+       percpu_up_read(&c->mark_lock);
+
+       return ret;
+}
+
+int bch2_mark_update(struct btree_trans *trans,
+                    struct btree_iter *iter,
+                    struct bkey_i *new,
+                    struct bch_fs_usage *fs_usage,
+                    unsigned flags)
+{
+       struct bch_fs           *c = trans->c;
+       struct btree            *b = iter_l(iter)->b;
+       struct btree_node_iter  node_iter = iter_l(iter)->iter;
+       struct bkey_packed      *_old;
+       struct bkey_s_c         old;
+       struct bkey             unpacked;
+       int ret = 0;
+
+       if (unlikely(flags & BTREE_TRIGGER_NORUN))
+               return 0;
+
+       if (!btree_node_type_needs_gc(iter->btree_id))
+               return 0;
+
+       bkey_init(&unpacked);
+       old = (struct bkey_s_c) { &unpacked, NULL };
+
+       if (!btree_node_type_is_extents(iter->btree_id)) {
+               if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
+                       _old = bch2_btree_node_iter_peek(&node_iter, b);
+                       if (_old)
+                               old = bkey_disassemble(b, _old, &unpacked);
+               } else {
+                       struct bkey_cached *ck = (void *) iter->l[0].b;
+
+                       if (ck->valid)
+                               old = bkey_i_to_s_c(ck->k);
+               }
+
+               if (old.k->type == new->k.type) {
+                       bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0,
+                               fs_usage, trans->journal_res.seq,
+                               BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
+
+               } else {
+                       bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0,
+                               fs_usage, trans->journal_res.seq,
+                               BTREE_TRIGGER_INSERT|flags);
+                       bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, 0,
+                               fs_usage, trans->journal_res.seq,
+                               BTREE_TRIGGER_OVERWRITE|flags);
+               }
+       } else {
+               BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
+               bch2_mark_key_locked(c, old, bkey_i_to_s_c(new),
+                       0, new->k.size,
+                       fs_usage, trans->journal_res.seq,
+                       BTREE_TRIGGER_INSERT|flags);
+
+               while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) {
+                       unsigned offset = 0;
+                       s64 sectors;
+
+                       old = bkey_disassemble(b, _old, &unpacked);
+                       sectors = -((s64) old.k->size);
+
+                       flags |= BTREE_TRIGGER_OVERWRITE;
+
+                       if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
+                               return 0;
+
+                       switch (bch2_extent_overlap(&new->k, old.k)) {
+                       case BCH_EXTENT_OVERLAP_ALL:
+                               offset = 0;
+                               sectors = -((s64) old.k->size);
+                               break;
+                       case BCH_EXTENT_OVERLAP_BACK:
+                               offset = bkey_start_offset(&new->k) -
+                                       bkey_start_offset(old.k);
+                               sectors = bkey_start_offset(&new->k) -
+                                       old.k->p.offset;
+                               break;
+                       case BCH_EXTENT_OVERLAP_FRONT:
+                               offset = 0;
+                               sectors = bkey_start_offset(old.k) -
+                                       new->k.p.offset;
+                               break;
+                       case BCH_EXTENT_OVERLAP_MIDDLE:
+                               offset = bkey_start_offset(&new->k) -
+                                       bkey_start_offset(old.k);
+                               sectors = -((s64) new->k.size);
+                               flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
+                               break;
+                       }
+
+                       BUG_ON(sectors >= 0);
+
+                       ret = bch2_mark_key_locked(c, old, bkey_i_to_s_c(new),
+                                       offset, sectors, fs_usage,
+                                       trans->journal_res.seq, flags) ?: 1;
+                       if (ret <= 0)
+                               break;
+
+                       bch2_btree_node_iter_advance(&node_iter, b);
+               }
+       }
+
+       return ret;
+}
+
+void bch2_trans_fs_usage_apply(struct btree_trans *trans,
+                              struct bch_fs_usage *fs_usage)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_insert_entry *i;
+       static int warned_disk_usage = 0;
+       u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
+       char buf[200];
+
+       if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res,
+                                trans->journal_res.seq) ||
+           warned_disk_usage ||
+           xchg(&warned_disk_usage, 1))
+               return;
+
+       bch_err(c, "disk usage increased more than %llu sectors reserved",
+               disk_res_sectors);
+
+       trans_for_each_update(trans, i) {
+               pr_err("while inserting");
+               bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
+               pr_err("%s", buf);
+               pr_err("overlapping with");
+
+               if (btree_iter_type(i->iter) != BTREE_ITER_CACHED) {
+                       struct btree            *b = iter_l(i->iter)->b;
+                       struct btree_node_iter  node_iter = iter_l(i->iter)->iter;
+                       struct bkey_packed      *_k;
+
+                       while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
+                               struct bkey             unpacked;
+                               struct bkey_s_c         k;
+
+                               pr_info("_k %px format %u", _k, _k->format);
+                               k = bkey_disassemble(b, _k, &unpacked);
+
+                               if (btree_node_is_extents(b)
+                                   ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0
+                                   : bkey_cmp(i->k->k.p, k.k->p))
+                                       break;
+
+                               bch2_bkey_val_to_text(&PBUF(buf), c, k);
+                               pr_err("%s", buf);
+
+                               bch2_btree_node_iter_advance(&node_iter, b);
+                       }
+               } else {
+                       struct bkey_cached *ck = (void *) i->iter->l[0].b;
+
+                       if (ck->valid) {
+                               bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(ck->k));
+                               pr_err("%s", buf);
+                       }
+               }
+       }
+}
+
+/* trans_mark: */
+
+static struct btree_iter *trans_get_update(struct btree_trans *trans,
+                           enum btree_id btree_id, struct bpos pos,
+                           struct bkey_s_c *k)
+{
+       struct btree_insert_entry *i;
+
+       trans_for_each_update(trans, i)
+               if (i->iter->btree_id == btree_id &&
+                   (btree_node_type_is_extents(btree_id)
+                    ? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 &&
+                      bkey_cmp(pos, i->k->k.p) < 0
+                    : !bkey_cmp(pos, i->iter->pos))) {
+                       *k = bkey_i_to_s_c(i->k);
+                       return i->iter;
+               }
+
+       return NULL;
+}
+
+static int trans_get_key(struct btree_trans *trans,
+                        enum btree_id btree_id, struct bpos pos,
+                        struct btree_iter **iter,
+                        struct bkey_s_c *k)
+{
+       unsigned flags = btree_id != BTREE_ID_ALLOC
+               ? BTREE_ITER_SLOTS
+               : BTREE_ITER_CACHED;
+       int ret;
+
+       *iter = trans_get_update(trans, btree_id, pos, k);
+       if (*iter)
+               return 1;
+
+       *iter = bch2_trans_get_iter(trans, btree_id, pos,
+                                   flags|BTREE_ITER_INTENT);
+       if (IS_ERR(*iter))
+               return PTR_ERR(*iter);
+
+       *k = __bch2_btree_iter_peek(*iter, flags);
+       ret = bkey_err(*k);
+       if (ret)
+               bch2_trans_iter_put(trans, *iter);
+       return ret;
+}
+
+static int bch2_trans_mark_pointer(struct btree_trans *trans,
+                       struct bkey_s_c k, struct extent_ptr_decoded p,
+                       s64 sectors, enum bch_data_type data_type)
+{
+       struct bch_fs *c = trans->c;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+       struct bpos pos = POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr));
+       struct btree_iter *iter;
+       struct bkey_s_c k_a;
+       struct bkey_alloc_unpacked u;
+       struct bkey_i_alloc *a;
+       struct bucket *g;
+       int ret;
+
+       iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k_a);
+       if (iter) {
+               u = bch2_alloc_unpack(k_a);
+       } else {
+               iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, pos,
+                                          BTREE_ITER_CACHED|
+                                          BTREE_ITER_CACHED_NOFILL|
+                                          BTREE_ITER_INTENT);
+               if (IS_ERR(iter))
+                       return PTR_ERR(iter);
+
+               ret = bch2_btree_iter_traverse(iter);
+               if (ret)
+                       goto out;
+
+               percpu_down_read(&c->mark_lock);
+               g = bucket(ca, pos.offset);
+               u = alloc_mem_to_key(g, READ_ONCE(g->mark));
+               percpu_up_read(&c->mark_lock);
+       }
+
+       ret = __mark_pointer(c, k, p, sectors, data_type, u.gen, &u.data_type,
+                            &u.dirty_sectors, &u.cached_sectors);
+       if (ret)
+               goto out;
+
+       a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
+       ret = PTR_ERR_OR_ZERO(a);
+       if (ret)
+               goto out;
+
+       bkey_alloc_init(&a->k_i);
+       a->k.p = pos;
+       bch2_alloc_pack(a, u);
+       bch2_trans_update(trans, iter, &a->k_i, 0);
+out:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
+                       struct bch_extent_stripe_ptr p,
+                       s64 sectors, enum bch_data_type data_type,
+                       struct bch_replicas_padded *r,
+                       unsigned *nr_data,
+                       unsigned *nr_parity)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_i_stripe *s;
+       int ret = 0;
+
+       ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), &iter, &k);
+       if (ret < 0)
+               return ret;
+
+       if (k.k->type != KEY_TYPE_stripe) {
+               bch2_fs_inconsistent(c,
+                       "pointer to nonexistent stripe %llu",
+                       (u64) p.idx);
+               ret = -EIO;
+               goto out;
+       }
+
+       s = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+       ret = PTR_ERR_OR_ZERO(s);
+       if (ret)
+               goto out;
+
+       bkey_reassemble(&s->k_i, k);
+
+       stripe_blockcount_set(&s->v, p.block,
+               stripe_blockcount_get(&s->v, p.block) +
+               sectors);
+
+       *nr_data        = s->v.nr_blocks - s->v.nr_redundant;
+       *nr_parity      = s->v.nr_redundant;
+       bch2_bkey_to_replicas(&r->e, bkey_i_to_s_c(&s->k_i));
+       bch2_trans_update(trans, iter, &s->k_i, 0);
+out:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+static int bch2_trans_mark_extent(struct btree_trans *trans,
+                       struct bkey_s_c k, unsigned offset,
+                       s64 sectors, unsigned flags,
+                       enum bch_data_type data_type)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       struct bch_replicas_padded r;
+       s64 dirty_sectors = 0;
+       bool stale;
+       int ret;
+
+       r.e.data_type   = data_type;
+       r.e.nr_devs     = 0;
+       r.e.nr_required = 1;
+
+       BUG_ON(!sectors);
+
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+               s64 disk_sectors = data_type == BCH_DATA_btree
+                       ? sectors
+                       : ptr_disk_sectors_delta(p, offset, sectors, flags);
+
+               ret = bch2_trans_mark_pointer(trans, k, p, disk_sectors,
+                                             data_type);
+               if (ret < 0)
+                       return ret;
+
+               stale = ret > 0;
+
+               if (p.ptr.cached) {
+                       if (!stale)
+                               update_cached_sectors_list(trans, p.ptr.dev,
+                                                          disk_sectors);
+               } else if (!p.has_ec) {
+                       dirty_sectors          += disk_sectors;
+                       r.e.devs[r.e.nr_devs++] = p.ptr.dev;
+               } else {
+                       struct bch_replicas_padded ec_r;
+                       unsigned nr_data, nr_parity;
+                       s64 parity_sectors;
+
+                       ret = bch2_trans_mark_stripe_ptr(trans, p.ec,
+                                       disk_sectors, data_type,
+                                       &ec_r, &nr_data, &nr_parity);
+                       if (ret)
+                               return ret;
+
+                       parity_sectors =
+                               __ptr_disk_sectors_delta(p.crc.live_size,
+                                       offset, sectors, flags,
+                                       p.crc.compressed_size * nr_parity,
+                                       p.crc.uncompressed_size * nr_data);
+
+                       update_replicas_list(trans, &ec_r.e,
+                                            disk_sectors + parity_sectors);
+
+                       r.e.nr_required = 0;
+               }
+       }
+
+       if (r.e.nr_devs)
+               update_replicas_list(trans, &r.e, dirty_sectors);
+
+       return 0;
+}
+
+static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
+                       struct bkey_s_c_reflink_p p,
+                       u64 idx, unsigned sectors,
+                       unsigned flags)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_i_reflink_v *r_v;
+       s64 ret;
+
+       ret = trans_get_key(trans, BTREE_ID_REFLINK,
+                           POS(0, idx), &iter, &k);
+       if (ret < 0)
+               return ret;
+
+       if (k.k->type != KEY_TYPE_reflink_v) {
+               bch2_fs_inconsistent(c,
+                       "%llu:%llu len %u points to nonexistent indirect extent %llu",
+                       p.k->p.inode, p.k->p.offset, p.k->size, idx);
+               ret = -EIO;
+               goto err;
+       }
+
+       if ((flags & BTREE_TRIGGER_OVERWRITE) &&
+           (bkey_start_offset(k.k) < idx ||
+            k.k->p.offset > idx + sectors))
+               goto out;
+
+       sectors = k.k->p.offset - idx;
+
+       r_v = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+       ret = PTR_ERR_OR_ZERO(r_v);
+       if (ret)
+               goto err;
+
+       bkey_reassemble(&r_v->k_i, k);
+
+       le64_add_cpu(&r_v->v.refcount,
+                    !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1);
+
+       if (!r_v->v.refcount) {
+               r_v->k.type = KEY_TYPE_deleted;
+               set_bkey_val_u64s(&r_v->k, 0);
+       }
+
+       bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
+       BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
+
+       bch2_trans_update(trans, iter, &r_v->k_i, 0);
+out:
+       ret = sectors;
+err:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
+                       struct bkey_s_c_reflink_p p, unsigned offset,
+                       s64 sectors, unsigned flags)
+{
+       u64 idx = le64_to_cpu(p.v->idx) + offset;
+       s64 ret = 0;
+
+       sectors = abs(sectors);
+       BUG_ON(offset + sectors > p.k->size);
+
+       while (sectors) {
+               ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors, flags);
+               if (ret < 0)
+                       break;
+
+               idx += ret;
+               sectors = max_t(s64, 0LL, sectors - ret);
+               ret = 0;
+       }
+
+       return ret;
+}
+
+int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
+                       unsigned offset, s64 sectors, unsigned flags)
+{
+       struct replicas_delta_list *d;
+       struct bch_fs *c = trans->c;
+
+       switch (k.k->type) {
+       case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
+               sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
+                       ?  c->opts.btree_node_size
+                       : -c->opts.btree_node_size;
+
+               return bch2_trans_mark_extent(trans, k, offset, sectors,
+                                             flags, BCH_DATA_btree);
+       case KEY_TYPE_extent:
+       case KEY_TYPE_reflink_v:
+               return bch2_trans_mark_extent(trans, k, offset, sectors,
+                                             flags, BCH_DATA_user);
+       case KEY_TYPE_inode:
+               d = replicas_deltas_realloc(trans, 0);
+
+               if (!(flags & BTREE_TRIGGER_OVERWRITE))
+                       d->nr_inodes++;
+               else
+                       d->nr_inodes--;
+               return 0;
+       case KEY_TYPE_reservation: {
+               unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
+
+               d = replicas_deltas_realloc(trans, 0);
+
+               sectors *= replicas;
+               replicas = clamp_t(unsigned, replicas, 1,
+                                  ARRAY_SIZE(d->persistent_reserved));
+
+               d->persistent_reserved[replicas - 1] += sectors;
+               return 0;
+       }
+       case KEY_TYPE_reflink_p:
+               return bch2_trans_mark_reflink_p(trans,
+                                       bkey_s_c_to_reflink_p(k),
+                                       offset, sectors, flags);
+       default:
+               return 0;
+       }
+}
+
+int bch2_trans_mark_update(struct btree_trans *trans,
+                          struct btree_iter *iter,
+                          struct bkey_i *insert,
+                          unsigned flags)
+{
+       struct btree            *b = iter_l(iter)->b;
+       struct btree_node_iter  node_iter = iter_l(iter)->iter;
+       struct bkey_packed      *_k;
+       int ret;
+
+       if (unlikely(flags & BTREE_TRIGGER_NORUN))
+               return 0;
+
+       if (!btree_node_type_needs_gc(iter->btree_id))
+               return 0;
+
+       ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(insert),
+                       0, insert->k.size, BTREE_TRIGGER_INSERT);
+       if (ret)
+               return ret;
+
+       if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
+               struct bkey_cached *ck = (void *) iter->l[0].b;
+
+               return bch2_trans_mark_key(trans, bkey_i_to_s_c(ck->k),
+                                          0, 0, BTREE_TRIGGER_OVERWRITE);
+       }
+
+       while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
+               struct bkey             unpacked;
+               struct bkey_s_c         k;
+               unsigned                offset = 0;
+               s64                     sectors = 0;
+               unsigned                flags = BTREE_TRIGGER_OVERWRITE;
+
+               k = bkey_disassemble(b, _k, &unpacked);
+
+               if (btree_node_is_extents(b)
+                   ? bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0
+                   : bkey_cmp(insert->k.p, k.k->p))
+                       break;
+
+               if (btree_node_is_extents(b)) {
+                       switch (bch2_extent_overlap(&insert->k, k.k)) {
+                       case BCH_EXTENT_OVERLAP_ALL:
+                               offset = 0;
+                               sectors = -((s64) k.k->size);
+                               break;
+                       case BCH_EXTENT_OVERLAP_BACK:
+                               offset = bkey_start_offset(&insert->k) -
+                                       bkey_start_offset(k.k);
+                               sectors = bkey_start_offset(&insert->k) -
+                                       k.k->p.offset;
+                               break;
+                       case BCH_EXTENT_OVERLAP_FRONT:
+                               offset = 0;
+                               sectors = bkey_start_offset(k.k) -
+                                       insert->k.p.offset;
+                               break;
+                       case BCH_EXTENT_OVERLAP_MIDDLE:
+                               offset = bkey_start_offset(&insert->k) -
+                                       bkey_start_offset(k.k);
+                               sectors = -((s64) insert->k.size);
+                               flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
+                               break;
+                       }
+
+                       BUG_ON(sectors >= 0);
+               }
+
+               ret = bch2_trans_mark_key(trans, k, offset, sectors, flags);
+               if (ret)
+                       return ret;
+
+               bch2_btree_node_iter_advance(&node_iter, b);
+       }
+
+       return 0;
+}
+
+/* Disk reservations: */
+
+static u64 bch2_recalc_sectors_available(struct bch_fs *c)
+{
+       percpu_u64_set(&c->pcpu->sectors_available, 0);
+
+       return avail_factor(__bch2_fs_usage_read_short(c).free);
+}
+
+void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
+{
+       percpu_down_read(&c->mark_lock);
+       this_cpu_sub(c->usage[0]->online_reserved,
+                    res->sectors);
+       percpu_up_read(&c->mark_lock);
+
+       res->sectors = 0;
+}
+
+#define SECTORS_CACHE  1024
+
+int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
+                             unsigned sectors, int flags)
+{
+       struct bch_fs_pcpu *pcpu;
+       u64 old, v, get;
+       s64 sectors_available;
+       int ret;
+
+       percpu_down_read(&c->mark_lock);
+       preempt_disable();
+       pcpu = this_cpu_ptr(c->pcpu);
+
+       if (sectors <= pcpu->sectors_available)
+               goto out;
+
+       v = atomic64_read(&c->sectors_available);
+       do {
+               old = v;
+               get = min((u64) sectors + SECTORS_CACHE, old);
+
+               if (get < sectors) {
+                       preempt_enable();
+                       percpu_up_read(&c->mark_lock);
+                       goto recalculate;
+               }
+       } while ((v = atomic64_cmpxchg(&c->sectors_available,
+                                      old, old - get)) != old);
+
+       pcpu->sectors_available         += get;
+
+out:
+       pcpu->sectors_available         -= sectors;
+       this_cpu_add(c->usage[0]->online_reserved, sectors);
+       res->sectors                    += sectors;
+
+       preempt_enable();
+       percpu_up_read(&c->mark_lock);
+       return 0;
+
+recalculate:
+       percpu_down_write(&c->mark_lock);
+
+       sectors_available = bch2_recalc_sectors_available(c);
+
+       if (sectors <= sectors_available ||
+           (flags & BCH_DISK_RESERVATION_NOFAIL)) {
+               atomic64_set(&c->sectors_available,
+                            max_t(s64, 0, sectors_available - sectors));
+               this_cpu_add(c->usage[0]->online_reserved, sectors);
+               res->sectors                    += sectors;
+               ret = 0;
+       } else {
+               atomic64_set(&c->sectors_available, sectors_available);
+               ret = -ENOSPC;
+       }
+
+       percpu_up_write(&c->mark_lock);
+
+       return ret;
+}
+
+/* Startup/shutdown: */
+
+static void buckets_free_rcu(struct rcu_head *rcu)
+{
+       struct bucket_array *buckets =
+               container_of(rcu, struct bucket_array, rcu);
+
+       kvpfree(buckets,
+               sizeof(struct bucket_array) +
+               buckets->nbuckets * sizeof(struct bucket));
+}
+
+int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
+{
+       struct bucket_array *buckets = NULL, *old_buckets = NULL;
+       unsigned long *buckets_nouse = NULL;
+       alloc_fifo      free[RESERVE_NR];
+       alloc_fifo      free_inc;
+       alloc_heap      alloc_heap;
+
+       size_t btree_reserve    = DIV_ROUND_UP(BTREE_NODE_RESERVE,
+                            ca->mi.bucket_size / c->opts.btree_node_size);
+       /* XXX: these should be tunable */
+       size_t reserve_none     = max_t(size_t, 1, nbuckets >> 9);
+       size_t copygc_reserve   = max_t(size_t, 2, nbuckets >> 7);
+       size_t free_inc_nr      = max(max_t(size_t, 1, nbuckets >> 12),
+                                     btree_reserve * 2);
+       bool resize = ca->buckets[0] != NULL;
+       int ret = -ENOMEM;
+       unsigned i;
+
+       memset(&free,           0, sizeof(free));
+       memset(&free_inc,       0, sizeof(free_inc));
+       memset(&alloc_heap,     0, sizeof(alloc_heap));
+
+       if (!(buckets           = kvpmalloc(sizeof(struct bucket_array) +
+                                           nbuckets * sizeof(struct bucket),
+                                           GFP_KERNEL|__GFP_ZERO)) ||
+           !(buckets_nouse     = kvpmalloc(BITS_TO_LONGS(nbuckets) *
+                                           sizeof(unsigned long),
+                                           GFP_KERNEL|__GFP_ZERO)) ||
+           !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
+           !init_fifo(&free[RESERVE_MOVINGGC],
+                      copygc_reserve, GFP_KERNEL) ||
+           !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
+           !init_fifo(&free_inc,       free_inc_nr, GFP_KERNEL) ||
+           !init_heap(&alloc_heap,     ALLOC_SCAN_BATCH(ca) << 1, GFP_KERNEL))
+               goto err;
+
+       buckets->first_bucket   = ca->mi.first_bucket;
+       buckets->nbuckets       = nbuckets;
+
+       bch2_copygc_stop(c);
+
+       if (resize) {
+               down_write(&c->gc_lock);
+               down_write(&ca->bucket_lock);
+               percpu_down_write(&c->mark_lock);
+       }
+
+       old_buckets = bucket_array(ca);
+
+       if (resize) {
+               size_t n = min(buckets->nbuckets, old_buckets->nbuckets);
+
+               memcpy(buckets->b,
+                      old_buckets->b,
+                      n * sizeof(struct bucket));
+               memcpy(buckets_nouse,
+                      ca->buckets_nouse,
+                      BITS_TO_LONGS(n) * sizeof(unsigned long));
+       }
+
+       rcu_assign_pointer(ca->buckets[0], buckets);
+       buckets = old_buckets;
+
+       swap(ca->buckets_nouse, buckets_nouse);
+
+       if (resize) {
+               percpu_up_write(&c->mark_lock);
+               up_write(&c->gc_lock);
+       }
+
+       spin_lock(&c->freelist_lock);
+       for (i = 0; i < RESERVE_NR; i++) {
+               fifo_move(&free[i], &ca->free[i]);
+               swap(ca->free[i], free[i]);
+       }
+       fifo_move(&free_inc, &ca->free_inc);
+       swap(ca->free_inc, free_inc);
+       spin_unlock(&c->freelist_lock);
+
+       /* with gc lock held, alloc_heap can't be in use: */
+       swap(ca->alloc_heap, alloc_heap);
+
+       nbuckets = ca->mi.nbuckets;
+
+       if (resize)
+               up_write(&ca->bucket_lock);
+
+       ret = 0;
+err:
+       free_heap(&alloc_heap);
+       free_fifo(&free_inc);
+       for (i = 0; i < RESERVE_NR; i++)
+               free_fifo(&free[i]);
+       kvpfree(buckets_nouse,
+               BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
+       if (buckets)
+               call_rcu(&old_buckets->rcu, buckets_free_rcu);
+
+       return ret;
+}
+
+void bch2_dev_buckets_free(struct bch_dev *ca)
+{
+       unsigned i;
+
+       free_heap(&ca->alloc_heap);
+       free_fifo(&ca->free_inc);
+       for (i = 0; i < RESERVE_NR; i++)
+               free_fifo(&ca->free[i]);
+       kvpfree(ca->buckets_nouse,
+               BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
+       kvpfree(rcu_dereference_protected(ca->buckets[0], 1),
+               sizeof(struct bucket_array) +
+               ca->mi.nbuckets * sizeof(struct bucket));
+
+       free_percpu(ca->usage[0]);
+}
+
+int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
+{
+       if (!(ca->usage[0] = alloc_percpu(struct bch_dev_usage)))
+               return -ENOMEM;
+
+       return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);;
+}
diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h
new file mode 100644 (file)
index 0000000..a3873be
--- /dev/null
@@ -0,0 +1,318 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Code for manipulating bucket marks for garbage collection.
+ *
+ * Copyright 2014 Datera, Inc.
+ */
+
+#ifndef _BUCKETS_H
+#define _BUCKETS_H
+
+#include "buckets_types.h"
+#include "super.h"
+
+#define for_each_bucket(_b, _buckets)                          \
+       for (_b = (_buckets)->b + (_buckets)->first_bucket;     \
+            _b < (_buckets)->b + (_buckets)->nbuckets; _b++)
+
+#define bucket_cmpxchg(g, new, expr)                           \
+({                                                             \
+       struct bucket *_g = g;                                  \
+       u64 _v = atomic64_read(&(g)->_mark.v);                  \
+       struct bucket_mark _old;                                \
+                                                               \
+       do {                                                    \
+               (new).v.counter = _old.v.counter = _v;          \
+               expr;                                           \
+       } while ((_v = atomic64_cmpxchg(&(_g)->_mark.v,         \
+                              _old.v.counter,                  \
+                              (new).v.counter)) != _old.v.counter);\
+       _old;                                                   \
+})
+
+static inline struct bucket_array *__bucket_array(struct bch_dev *ca,
+                                                 bool gc)
+{
+       return rcu_dereference_check(ca->buckets[gc],
+                                    !ca->fs ||
+                                    percpu_rwsem_is_held(&ca->fs->mark_lock) ||
+                                    lockdep_is_held(&ca->fs->gc_lock) ||
+                                    lockdep_is_held(&ca->bucket_lock));
+}
+
+static inline struct bucket_array *bucket_array(struct bch_dev *ca)
+{
+       return __bucket_array(ca, false);
+}
+
+static inline struct bucket *__bucket(struct bch_dev *ca, size_t b, bool gc)
+{
+       struct bucket_array *buckets = __bucket_array(ca, gc);
+
+       BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets);
+       return buckets->b + b;
+}
+
+static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
+{
+       return __bucket(ca, b, false);
+}
+
+static inline u16 bucket_last_io(struct bch_fs *c, struct bucket *g, int rw)
+{
+       return c->bucket_clock[rw].hand - g->io_time[rw];
+}
+
+/*
+ * bucket_gc_gen() returns the difference between the bucket's current gen and
+ * the oldest gen of any pointer into that bucket in the btree.
+ */
+
+static inline u8 bucket_gc_gen(struct bch_dev *ca, size_t b)
+{
+       struct bucket *g = bucket(ca, b);
+
+       return g->mark.gen - g->oldest_gen;
+}
+
+static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
+                                  const struct bch_extent_ptr *ptr)
+{
+       return sector_to_bucket(ca, ptr->offset);
+}
+
+static inline struct bucket *PTR_BUCKET(struct bch_dev *ca,
+                                       const struct bch_extent_ptr *ptr,
+                                       bool gc)
+{
+       return __bucket(ca, PTR_BUCKET_NR(ca, ptr), gc);
+}
+
+static inline enum bch_data_type ptr_data_type(const struct bkey *k,
+                                              const struct bch_extent_ptr *ptr)
+{
+       if (k->type == KEY_TYPE_btree_ptr ||
+           k->type == KEY_TYPE_btree_ptr_v2)
+               return BCH_DATA_btree;
+
+       return ptr->cached ? BCH_DATA_cached : BCH_DATA_user;
+}
+
+static inline struct bucket_mark ptr_bucket_mark(struct bch_dev *ca,
+                                                const struct bch_extent_ptr *ptr)
+{
+       struct bucket_mark m;
+
+       rcu_read_lock();
+       m = READ_ONCE(PTR_BUCKET(ca, ptr, 0)->mark);
+       rcu_read_unlock();
+
+       return m;
+}
+
+static inline int gen_cmp(u8 a, u8 b)
+{
+       return (s8) (a - b);
+}
+
+static inline int gen_after(u8 a, u8 b)
+{
+       int r = gen_cmp(a, b);
+
+       return r > 0 ? r : 0;
+}
+
+/**
+ * ptr_stale() - check if a pointer points into a bucket that has been
+ * invalidated.
+ */
+static inline u8 ptr_stale(struct bch_dev *ca,
+                          const struct bch_extent_ptr *ptr)
+{
+       return gen_after(ptr_bucket_mark(ca, ptr).gen, ptr->gen);
+}
+
+static inline s64 __ptr_disk_sectors(struct extent_ptr_decoded p,
+                                    unsigned live_size)
+{
+       return live_size && p.crc.compression_type
+               ? max(1U, DIV_ROUND_UP(live_size * p.crc.compressed_size,
+                                      p.crc.uncompressed_size))
+               : live_size;
+}
+
+static inline s64 ptr_disk_sectors(struct extent_ptr_decoded p)
+{
+       return __ptr_disk_sectors(p, p.crc.live_size);
+}
+
+/* bucket gc marks */
+
+static inline unsigned bucket_sectors_used(struct bucket_mark mark)
+{
+       return mark.dirty_sectors + mark.cached_sectors;
+}
+
+static inline bool bucket_unused(struct bucket_mark mark)
+{
+       return !mark.owned_by_allocator &&
+               !mark.data_type &&
+               !bucket_sectors_used(mark);
+}
+
+static inline bool is_available_bucket(struct bucket_mark mark)
+{
+       return (!mark.owned_by_allocator &&
+               !mark.dirty_sectors &&
+               !mark.stripe);
+}
+
+static inline bool bucket_needs_journal_commit(struct bucket_mark m,
+                                              u16 last_seq_ondisk)
+{
+       return m.journal_seq_valid &&
+               ((s16) m.journal_seq - (s16) last_seq_ondisk > 0);
+}
+
+/* Device usage: */
+
+struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *);
+
+void bch2_dev_usage_from_buckets(struct bch_fs *);
+
+static inline u64 __dev_buckets_available(struct bch_dev *ca,
+                                         struct bch_dev_usage stats)
+{
+       u64 total = ca->mi.nbuckets - ca->mi.first_bucket;
+
+       if (WARN_ONCE(stats.buckets_unavailable > total,
+                     "buckets_unavailable overflow (%llu > %llu)\n",
+                     stats.buckets_unavailable, total))
+               return 0;
+
+       return total - stats.buckets_unavailable;
+}
+
+/*
+ * Number of reclaimable buckets - only for use by the allocator thread:
+ */
+static inline u64 dev_buckets_available(struct bch_dev *ca)
+{
+       return __dev_buckets_available(ca, bch2_dev_usage_read(ca));
+}
+
+static inline u64 __dev_buckets_free(struct bch_dev *ca,
+                                    struct bch_dev_usage stats)
+{
+       return __dev_buckets_available(ca, stats) +
+               fifo_used(&ca->free[RESERVE_NONE]) +
+               fifo_used(&ca->free_inc);
+}
+
+static inline u64 dev_buckets_free(struct bch_dev *ca)
+{
+       return __dev_buckets_free(ca, bch2_dev_usage_read(ca));
+}
+
+/* Filesystem usage: */
+
+static inline unsigned fs_usage_u64s(struct bch_fs *c)
+{
+
+       return sizeof(struct bch_fs_usage) / sizeof(u64) +
+               READ_ONCE(c->replicas.nr);
+}
+
+void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *);
+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *);
+
+u64 bch2_fs_usage_read_one(struct bch_fs *, u64 *);
+
+struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
+
+void bch2_fs_usage_acc_to_base(struct bch_fs *, unsigned);
+
+void bch2_fs_usage_to_text(struct printbuf *,
+                          struct bch_fs *, struct bch_fs_usage *);
+
+u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *);
+
+struct bch_fs_usage_short
+bch2_fs_usage_read_short(struct bch_fs *);
+
+/* key/bucket marking: */
+
+void bch2_bucket_seq_cleanup(struct bch_fs *);
+void bch2_fs_usage_initialize(struct bch_fs *);
+
+void bch2_invalidate_bucket(struct bch_fs *, struct bch_dev *,
+                           size_t, struct bucket_mark *);
+void bch2_mark_alloc_bucket(struct bch_fs *, struct bch_dev *,
+                           size_t, bool, struct gc_pos, unsigned);
+void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
+                              size_t, enum bch_data_type, unsigned,
+                              struct gc_pos, unsigned);
+
+int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned,
+                 s64, struct bch_fs_usage *, u64, unsigned);
+int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
+                       struct disk_reservation *, unsigned);
+
+int bch2_mark_update(struct btree_trans *, struct btree_iter *,
+                    struct bkey_i *, struct bch_fs_usage *, unsigned);
+
+int bch2_replicas_delta_list_apply(struct bch_fs *,
+                                  struct bch_fs_usage *,
+                                  struct replicas_delta_list *);
+int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
+                       unsigned, s64, unsigned);
+int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
+                          struct bkey_i *insert, unsigned);
+void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
+
+/* disk reservations: */
+
+void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
+
+static inline void bch2_disk_reservation_put(struct bch_fs *c,
+                                            struct disk_reservation *res)
+{
+       if (res->sectors)
+               __bch2_disk_reservation_put(c, res);
+}
+
+#define BCH_DISK_RESERVATION_NOFAIL            (1 << 0)
+
+int bch2_disk_reservation_add(struct bch_fs *,
+                            struct disk_reservation *,
+                            unsigned, int);
+
+static inline struct disk_reservation
+bch2_disk_reservation_init(struct bch_fs *c, unsigned nr_replicas)
+{
+       return (struct disk_reservation) {
+               .sectors        = 0,
+#if 0
+               /* not used yet: */
+               .gen            = c->capacity_gen,
+#endif
+               .nr_replicas    = nr_replicas,
+       };
+}
+
+static inline int bch2_disk_reservation_get(struct bch_fs *c,
+                                           struct disk_reservation *res,
+                                           unsigned sectors,
+                                           unsigned nr_replicas,
+                                           int flags)
+{
+       *res = bch2_disk_reservation_init(c, nr_replicas);
+
+       return bch2_disk_reservation_add(c, res, sectors * nr_replicas, flags);
+}
+
+int bch2_dev_buckets_resize(struct bch_fs *, struct bch_dev *, u64);
+void bch2_dev_buckets_free(struct bch_dev *);
+int bch2_dev_buckets_alloc(struct bch_fs *, struct bch_dev *);
+
+#endif /* _BUCKETS_H */
diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h
new file mode 100644 (file)
index 0000000..d5215b1
--- /dev/null
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BUCKETS_TYPES_H
+#define _BUCKETS_TYPES_H
+
+#include "bcachefs_format.h"
+#include "util.h"
+
+#define BUCKET_JOURNAL_SEQ_BITS                16
+
+struct bucket_mark {
+       union {
+       atomic64_t      v;
+
+       struct {
+       u8              gen;
+       u8              data_type:3,
+                       owned_by_allocator:1,
+                       journal_seq_valid:1,
+                       stripe:1;
+       u16             dirty_sectors;
+       u16             cached_sectors;
+
+       /*
+        * low bits of journal sequence number when this bucket was most
+        * recently modified: if journal_seq_valid is set, this bucket can't be
+        * reused until the journal sequence number written to disk is >= the
+        * bucket's journal sequence number:
+        */
+       u16             journal_seq;
+       };
+       };
+};
+
+struct bucket {
+       union {
+               struct bucket_mark      _mark;
+               const struct bucket_mark mark;
+       };
+
+       u16                             io_time[2];
+       u8                              oldest_gen;
+       u8                              gc_gen;
+       unsigned                        gen_valid:1;
+};
+
+struct bucket_array {
+       struct rcu_head         rcu;
+       u16                     first_bucket;
+       size_t                  nbuckets;
+       struct bucket           b[];
+};
+
+struct bch_dev_usage {
+       u64                     buckets[BCH_DATA_NR];
+       u64                     buckets_alloc;
+       u64                     buckets_unavailable;
+
+       /* _compressed_ sectors: */
+       u64                     sectors[BCH_DATA_NR];
+       u64                     sectors_fragmented;
+
+       u64                     buckets_ec;
+       u64                     sectors_ec;
+};
+
+struct bch_fs_usage {
+       /* all fields are in units of 512 byte sectors: */
+
+       u64                     online_reserved;
+
+       /* fields after online_reserved are cleared/recalculated by gc: */
+       u64                     gc_start[0];
+
+       u64                     hidden;
+       u64                     btree;
+       u64                     data;
+       u64                     cached;
+       u64                     reserved;
+       u64                     nr_inodes;
+
+       /* XXX: add stats for compression ratio */
+#if 0
+       u64                     uncompressed;
+       u64                     compressed;
+#endif
+
+       /* broken out: */
+
+       u64                     persistent_reserved[BCH_REPLICAS_MAX];
+       u64                     replicas[];
+};
+
+struct bch_fs_usage_short {
+       u64                     capacity;
+       u64                     used;
+       u64                     free;
+       u64                     nr_inodes;
+};
+
+struct replicas_delta {
+       s64                     delta;
+       struct bch_replicas_entry r;
+} __packed;
+
+struct replicas_delta_list {
+       unsigned                size;
+       unsigned                used;
+
+       struct                  {} memset_start;
+       u64                     nr_inodes;
+       u64                     persistent_reserved[BCH_REPLICAS_MAX];
+       struct                  {} memset_end;
+       struct replicas_delta   d[0];
+};
+
+/*
+ * A reservation for space on disk:
+ */
+struct disk_reservation {
+       u64                     sectors;
+       u32                     gen;
+       unsigned                nr_replicas;
+};
+
+struct copygc_heap_entry {
+       u8                      dev;
+       u8                      gen;
+       u16                     fragmentation;
+       u32                     sectors;
+       u64                     offset;
+};
+
+typedef HEAP(struct copygc_heap_entry) copygc_heap;
+
+#endif /* _BUCKETS_TYPES_H */
diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c
new file mode 100644 (file)
index 0000000..0377f90
--- /dev/null
@@ -0,0 +1,704 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_CHARDEV
+
+#include "bcachefs.h"
+#include "bcachefs_ioctl.h"
+#include "buckets.h"
+#include "chardev.h"
+#include "move.h"
+#include "replicas.h"
+#include "super.h"
+#include "super-io.h"
+
+#include <linux/anon_inodes.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/kthread.h>
+#include <linux/major.h>
+#include <linux/sched/task.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+/* returns with ref on ca->ref */
+static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
+                                         unsigned flags)
+{
+       struct bch_dev *ca;
+
+       if (flags & BCH_BY_INDEX) {
+               if (dev >= c->sb.nr_devices)
+                       return ERR_PTR(-EINVAL);
+
+               rcu_read_lock();
+               ca = rcu_dereference(c->devs[dev]);
+               if (ca)
+                       percpu_ref_get(&ca->ref);
+               rcu_read_unlock();
+
+               if (!ca)
+                       return ERR_PTR(-EINVAL);
+       } else {
+               char *path;
+
+               path = strndup_user((const char __user *)
+                                   (unsigned long) dev, PATH_MAX);
+               if (IS_ERR(path))
+                       return ERR_CAST(path);
+
+               ca = bch2_dev_lookup(c, path);
+               kfree(path);
+       }
+
+       return ca;
+}
+
+#if 0
+static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
+{
+       struct bch_ioctl_assemble arg;
+       struct bch_fs *c;
+       u64 *user_devs = NULL;
+       char **devs = NULL;
+       unsigned i;
+       int ret = -EFAULT;
+
+       if (copy_from_user(&arg, user_arg, sizeof(arg)))
+               return -EFAULT;
+
+       if (arg.flags || arg.pad)
+               return -EINVAL;
+
+       user_devs = kmalloc_array(arg.nr_devs, sizeof(u64), GFP_KERNEL);
+       if (!user_devs)
+               return -ENOMEM;
+
+       devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL);
+
+       if (copy_from_user(user_devs, user_arg->devs,
+                          sizeof(u64) * arg.nr_devs))
+               goto err;
+
+       for (i = 0; i < arg.nr_devs; i++) {
+               devs[i] = strndup_user((const char __user *)(unsigned long)
+                                      user_devs[i],
+                                      PATH_MAX);
+               if (!devs[i]) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+       }
+
+       c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
+       ret = PTR_ERR_OR_ZERO(c);
+       if (!ret)
+               closure_put(&c->cl);
+err:
+       if (devs)
+               for (i = 0; i < arg.nr_devs; i++)
+                       kfree(devs[i]);
+       kfree(devs);
+       return ret;
+}
+
+static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg)
+{
+       struct bch_ioctl_incremental arg;
+       const char *err;
+       char *path;
+
+       if (copy_from_user(&arg, user_arg, sizeof(arg)))
+               return -EFAULT;
+
+       if (arg.flags || arg.pad)
+               return -EINVAL;
+
+       path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
+       if (!path)
+               return -ENOMEM;
+
+       err = bch2_fs_open_incremental(path);
+       kfree(path);
+
+       if (err) {
+               pr_err("Could not register bcachefs devices: %s", err);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+#endif
+
+static long bch2_global_ioctl(unsigned cmd, void __user *arg)
+{
+       switch (cmd) {
+#if 0
+       case BCH_IOCTL_ASSEMBLE:
+               return bch2_ioctl_assemble(arg);
+       case BCH_IOCTL_INCREMENTAL:
+               return bch2_ioctl_incremental(arg);
+#endif
+       default:
+               return -ENOTTY;
+       }
+}
+
+static long bch2_ioctl_query_uuid(struct bch_fs *c,
+                       struct bch_ioctl_query_uuid __user *user_arg)
+{
+       return copy_to_user(&user_arg->uuid,
+                           &c->sb.user_uuid,
+                           sizeof(c->sb.user_uuid));
+}
+
+#if 0
+static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg)
+{
+       if (arg.flags || arg.pad)
+               return -EINVAL;
+
+       return bch2_fs_start(c);
+}
+
+static long bch2_ioctl_stop(struct bch_fs *c)
+{
+       bch2_fs_stop(c);
+       return 0;
+}
+#endif
+
+static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
+{
+       char *path;
+       int ret;
+
+       if (arg.flags || arg.pad)
+               return -EINVAL;
+
+       path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
+       if (!path)
+               return -ENOMEM;
+
+       ret = bch2_dev_add(c, path);
+       kfree(path);
+
+       return ret;
+}
+
+static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg)
+{
+       struct bch_dev *ca;
+
+       if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
+                          BCH_FORCE_IF_METADATA_LOST|
+                          BCH_FORCE_IF_DEGRADED|
+                          BCH_BY_INDEX)) ||
+           arg.pad)
+               return -EINVAL;
+
+       ca = bch2_device_lookup(c, arg.dev, arg.flags);
+       if (IS_ERR(ca))
+               return PTR_ERR(ca);
+
+       return bch2_dev_remove(c, ca, arg.flags);
+}
+
+static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
+{
+       char *path;
+       int ret;
+
+       if (arg.flags || arg.pad)
+               return -EINVAL;
+
+       path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
+       if (!path)
+               return -ENOMEM;
+
+       ret = bch2_dev_online(c, path);
+       kfree(path);
+       return ret;
+}
+
+static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg)
+{
+       struct bch_dev *ca;
+       int ret;
+
+       if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
+                          BCH_FORCE_IF_METADATA_LOST|
+                          BCH_FORCE_IF_DEGRADED|
+                          BCH_BY_INDEX)) ||
+           arg.pad)
+               return -EINVAL;
+
+       ca = bch2_device_lookup(c, arg.dev, arg.flags);
+       if (IS_ERR(ca))
+               return PTR_ERR(ca);
+
+       ret = bch2_dev_offline(c, ca, arg.flags);
+       percpu_ref_put(&ca->ref);
+       return ret;
+}
+
+static long bch2_ioctl_disk_set_state(struct bch_fs *c,
+                       struct bch_ioctl_disk_set_state arg)
+{
+       struct bch_dev *ca;
+       int ret;
+
+       if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
+                          BCH_FORCE_IF_METADATA_LOST|
+                          BCH_FORCE_IF_DEGRADED|
+                          BCH_BY_INDEX)) ||
+           arg.pad[0] || arg.pad[1] || arg.pad[2])
+               return -EINVAL;
+
+       ca = bch2_device_lookup(c, arg.dev, arg.flags);
+       if (IS_ERR(ca))
+               return PTR_ERR(ca);
+
+       ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
+
+       percpu_ref_put(&ca->ref);
+       return ret;
+}
+
+struct bch_data_ctx {
+       struct bch_fs                   *c;
+       struct bch_ioctl_data           arg;
+       struct bch_move_stats           stats;
+
+       int                             ret;
+
+       struct task_struct              *thread;
+};
+
+static int bch2_data_thread(void *arg)
+{
+       struct bch_data_ctx *ctx = arg;
+
+       ctx->ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
+
+       ctx->stats.data_type = U8_MAX;
+       return 0;
+}
+
+static int bch2_data_job_release(struct inode *inode, struct file *file)
+{
+       struct bch_data_ctx *ctx = file->private_data;
+
+       kthread_stop(ctx->thread);
+       put_task_struct(ctx->thread);
+       kfree(ctx);
+       return 0;
+}
+
+static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
+                                 size_t len, loff_t *ppos)
+{
+       struct bch_data_ctx *ctx = file->private_data;
+       struct bch_fs *c = ctx->c;
+       struct bch_ioctl_data_event e = {
+               .type                   = BCH_DATA_EVENT_PROGRESS,
+               .p.data_type            = ctx->stats.data_type,
+               .p.btree_id             = ctx->stats.btree_id,
+               .p.pos                  = ctx->stats.pos,
+               .p.sectors_done         = atomic64_read(&ctx->stats.sectors_seen),
+               .p.sectors_total        = bch2_fs_usage_read_short(c).used,
+       };
+
+       if (len < sizeof(e))
+               return -EINVAL;
+
+       return copy_to_user(buf, &e, sizeof(e)) ?: sizeof(e);
+}
+
+static const struct file_operations bcachefs_data_ops = {
+       .release        = bch2_data_job_release,
+       .read           = bch2_data_job_read,
+       .llseek         = no_llseek,
+};
+
+static long bch2_ioctl_data(struct bch_fs *c,
+                           struct bch_ioctl_data arg)
+{
+       struct bch_data_ctx *ctx = NULL;
+       struct file *file = NULL;
+       unsigned flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK;
+       int ret, fd = -1;
+
+       if (arg.op >= BCH_DATA_OP_NR || arg.flags)
+               return -EINVAL;
+
+       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
+
+       ctx->c = c;
+       ctx->arg = arg;
+
+       ctx->thread = kthread_create(bch2_data_thread, ctx, "[bcachefs]");
+       if (IS_ERR(ctx->thread)) {
+               ret = PTR_ERR(ctx->thread);
+               goto err;
+       }
+
+       ret = get_unused_fd_flags(flags);
+       if (ret < 0)
+               goto err;
+       fd = ret;
+
+       file = anon_inode_getfile("[bcachefs]", &bcachefs_data_ops, ctx, flags);
+       if (IS_ERR(file)) {
+               ret = PTR_ERR(file);
+               goto err;
+       }
+
+       fd_install(fd, file);
+
+       get_task_struct(ctx->thread);
+       wake_up_process(ctx->thread);
+
+       return fd;
+err:
+       if (fd >= 0)
+               put_unused_fd(fd);
+       if (!IS_ERR_OR_NULL(ctx->thread))
+               kthread_stop(ctx->thread);
+       kfree(ctx);
+       return ret;
+}
+
+static long bch2_ioctl_fs_usage(struct bch_fs *c,
+                               struct bch_ioctl_fs_usage __user *user_arg)
+{
+       struct bch_ioctl_fs_usage *arg = NULL;
+       struct bch_replicas_usage *dst_e, *dst_end;
+       struct bch_fs_usage *src;
+       u32 replica_entries_bytes;
+       unsigned i;
+       int ret = 0;
+
+       if (!test_bit(BCH_FS_STARTED, &c->flags))
+               return -EINVAL;
+
+       if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
+               return -EFAULT;
+
+       arg = kzalloc(sizeof(*arg) + replica_entries_bytes, GFP_KERNEL);
+       if (!arg)
+               return -ENOMEM;
+
+       src = bch2_fs_usage_read(c);
+       if (!src) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       arg->capacity           = c->capacity;
+       arg->used               = bch2_fs_sectors_used(c, src);
+       arg->online_reserved    = src->online_reserved;
+
+       for (i = 0; i < BCH_REPLICAS_MAX; i++)
+               arg->persistent_reserved[i] = src->persistent_reserved[i];
+
+       dst_e   = arg->replicas;
+       dst_end = (void *) arg->replicas + replica_entries_bytes;
+
+       for (i = 0; i < c->replicas.nr; i++) {
+               struct bch_replicas_entry *src_e =
+                       cpu_replicas_entry(&c->replicas, i);
+
+               if (replicas_usage_next(dst_e) > dst_end) {
+                       ret = -ERANGE;
+                       break;
+               }
+
+               dst_e->sectors          = src->replicas[i];
+               dst_e->r                = *src_e;
+
+               /* recheck after setting nr_devs: */
+               if (replicas_usage_next(dst_e) > dst_end) {
+                       ret = -ERANGE;
+                       break;
+               }
+
+               memcpy(dst_e->r.devs, src_e->devs, src_e->nr_devs);
+
+               dst_e = replicas_usage_next(dst_e);
+       }
+
+       arg->replica_entries_bytes = (void *) dst_e - (void *) arg->replicas;
+
+       percpu_up_read(&c->mark_lock);
+       kfree(src);
+
+       if (!ret)
+               ret = copy_to_user(user_arg, arg,
+                       sizeof(*arg) + arg->replica_entries_bytes);
+err:
+       kfree(arg);
+       return ret;
+}
+
+static long bch2_ioctl_dev_usage(struct bch_fs *c,
+                                struct bch_ioctl_dev_usage __user *user_arg)
+{
+       struct bch_ioctl_dev_usage arg;
+       struct bch_dev_usage src;
+       struct bch_dev *ca;
+       unsigned i;
+
+       if (!test_bit(BCH_FS_STARTED, &c->flags))
+               return -EINVAL;
+
+       if (copy_from_user(&arg, user_arg, sizeof(arg)))
+               return -EFAULT;
+
+       if ((arg.flags & ~BCH_BY_INDEX) ||
+           arg.pad[0] ||
+           arg.pad[1] ||
+           arg.pad[2])
+               return -EINVAL;
+
+       ca = bch2_device_lookup(c, arg.dev, arg.flags);
+       if (IS_ERR(ca))
+               return PTR_ERR(ca);
+
+       src = bch2_dev_usage_read(ca);
+
+       arg.state               = ca->mi.state;
+       arg.bucket_size         = ca->mi.bucket_size;
+       arg.nr_buckets          = ca->mi.nbuckets - ca->mi.first_bucket;
+       arg.available_buckets   = arg.nr_buckets - src.buckets_unavailable;
+       arg.ec_buckets          = src.buckets_ec;
+       arg.ec_sectors          = src.sectors_ec;
+
+       for (i = 0; i < BCH_DATA_NR; i++) {
+               arg.buckets[i] = src.buckets[i];
+               arg.sectors[i] = src.sectors[i];
+       }
+
+       percpu_ref_put(&ca->ref);
+
+       return copy_to_user(user_arg, &arg, sizeof(arg));
+}
+
+static long bch2_ioctl_read_super(struct bch_fs *c,
+                                 struct bch_ioctl_read_super arg)
+{
+       struct bch_dev *ca = NULL;
+       struct bch_sb *sb;
+       int ret = 0;
+
+       if ((arg.flags & ~(BCH_BY_INDEX|BCH_READ_DEV)) ||
+           arg.pad)
+               return -EINVAL;
+
+       mutex_lock(&c->sb_lock);
+
+       if (arg.flags & BCH_READ_DEV) {
+               ca = bch2_device_lookup(c, arg.dev, arg.flags);
+
+               if (IS_ERR(ca)) {
+                       ret = PTR_ERR(ca);
+                       goto err;
+               }
+
+               sb = ca->disk_sb.sb;
+       } else {
+               sb = c->disk_sb.sb;
+       }
+
+       if (vstruct_bytes(sb) > arg.size) {
+               ret = -ERANGE;
+               goto err;
+       }
+
+       ret = copy_to_user((void __user *)(unsigned long)arg.sb,
+                          sb, vstruct_bytes(sb));
+err:
+       if (ca)
+               percpu_ref_put(&ca->ref);
+       mutex_unlock(&c->sb_lock);
+       return ret;
+}
+
+static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
+                                   struct bch_ioctl_disk_get_idx arg)
+{
+       dev_t dev = huge_decode_dev(arg.dev);
+       struct bch_dev *ca;
+       unsigned i;
+
+       for_each_online_member(ca, c, i)
+               if (ca->disk_sb.bdev->bd_dev == dev) {
+                       percpu_ref_put(&ca->io_ref);
+                       return i;
+               }
+
+       return -ENOENT;
+}
+
+static long bch2_ioctl_disk_resize(struct bch_fs *c,
+                                  struct bch_ioctl_disk_resize arg)
+{
+       struct bch_dev *ca;
+       int ret;
+
+       if ((arg.flags & ~BCH_BY_INDEX) ||
+           arg.pad)
+               return -EINVAL;
+
+       ca = bch2_device_lookup(c, arg.dev, arg.flags);
+       if (IS_ERR(ca))
+               return PTR_ERR(ca);
+
+       ret = bch2_dev_resize(c, ca, arg.nbuckets);
+
+       percpu_ref_put(&ca->ref);
+       return ret;
+}
+
+#define BCH_IOCTL(_name, _argtype)                                     \
+do {                                                                   \
+       _argtype i;                                                     \
+                                                                       \
+       if (copy_from_user(&i, arg, sizeof(i)))                         \
+               return -EFAULT;                                         \
+       return bch2_ioctl_##_name(c, i);                                \
+} while (0)
+
+long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
+{
+       /* ioctls that don't require admin cap: */
+       switch (cmd) {
+       case BCH_IOCTL_QUERY_UUID:
+               return bch2_ioctl_query_uuid(c, arg);
+       case BCH_IOCTL_FS_USAGE:
+               return bch2_ioctl_fs_usage(c, arg);
+       case BCH_IOCTL_DEV_USAGE:
+               return bch2_ioctl_dev_usage(c, arg);
+       }
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       switch (cmd) {
+#if 0
+       case BCH_IOCTL_START:
+               BCH_IOCTL(start, struct bch_ioctl_start);
+       case BCH_IOCTL_STOP:
+               return bch2_ioctl_stop(c);
+#endif
+       case BCH_IOCTL_READ_SUPER:
+               BCH_IOCTL(read_super, struct bch_ioctl_read_super);
+       case BCH_IOCTL_DISK_GET_IDX:
+               BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
+       }
+
+       if (!test_bit(BCH_FS_STARTED, &c->flags))
+               return -EINVAL;
+
+       /* ioctls that do require admin cap: */
+       switch (cmd) {
+       case BCH_IOCTL_DISK_ADD:
+               BCH_IOCTL(disk_add, struct bch_ioctl_disk);
+       case BCH_IOCTL_DISK_REMOVE:
+               BCH_IOCTL(disk_remove, struct bch_ioctl_disk);
+       case BCH_IOCTL_DISK_ONLINE:
+               BCH_IOCTL(disk_online, struct bch_ioctl_disk);
+       case BCH_IOCTL_DISK_OFFLINE:
+               BCH_IOCTL(disk_offline, struct bch_ioctl_disk);
+       case BCH_IOCTL_DISK_SET_STATE:
+               BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state);
+       case BCH_IOCTL_DATA:
+               BCH_IOCTL(data, struct bch_ioctl_data);
+       case BCH_IOCTL_DISK_RESIZE:
+               BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
+
+       default:
+               return -ENOTTY;
+       }
+}
+
+static DEFINE_IDR(bch_chardev_minor);
+
+static long bch2_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v)
+{
+       unsigned minor = iminor(file_inode(filp));
+       struct bch_fs *c = minor < U8_MAX ? idr_find(&bch_chardev_minor, minor) : NULL;
+       void __user *arg = (void __user *) v;
+
+       return c
+               ? bch2_fs_ioctl(c, cmd, arg)
+               : bch2_global_ioctl(cmd, arg);
+}
+
+static const struct file_operations bch_chardev_fops = {
+       .owner          = THIS_MODULE,
+       .unlocked_ioctl = bch2_chardev_ioctl,
+       .open           = nonseekable_open,
+};
+
+static int bch_chardev_major;
+static struct class *bch_chardev_class;
+static struct device *bch_chardev;
+
+void bch2_fs_chardev_exit(struct bch_fs *c)
+{
+       if (!IS_ERR_OR_NULL(c->chardev))
+               device_unregister(c->chardev);
+       if (c->minor >= 0)
+               idr_remove(&bch_chardev_minor, c->minor);
+}
+
+int bch2_fs_chardev_init(struct bch_fs *c)
+{
+       c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL);
+       if (c->minor < 0)
+               return c->minor;
+
+       c->chardev = device_create(bch_chardev_class, NULL,
+                                  MKDEV(bch_chardev_major, c->minor), c,
+                                  "bcachefs%u-ctl", c->minor);
+       if (IS_ERR(c->chardev))
+               return PTR_ERR(c->chardev);
+
+       return 0;
+}
+
+void bch2_chardev_exit(void)
+{
+       if (!IS_ERR_OR_NULL(bch_chardev_class))
+               device_destroy(bch_chardev_class,
+                              MKDEV(bch_chardev_major, U8_MAX));
+       if (!IS_ERR_OR_NULL(bch_chardev_class))
+               class_destroy(bch_chardev_class);
+       if (bch_chardev_major > 0)
+               unregister_chrdev(bch_chardev_major, "bcachefs");
+}
+
+int __init bch2_chardev_init(void)
+{
+       bch_chardev_major = register_chrdev(0, "bcachefs-ctl", &bch_chardev_fops);
+       if (bch_chardev_major < 0)
+               return bch_chardev_major;
+
+       bch_chardev_class = class_create(THIS_MODULE, "bcachefs");
+       if (IS_ERR(bch_chardev_class))
+               return PTR_ERR(bch_chardev_class);
+
+       bch_chardev = device_create(bch_chardev_class, NULL,
+                                   MKDEV(bch_chardev_major, U8_MAX),
+                                   NULL, "bcachefs-ctl");
+       if (IS_ERR(bch_chardev))
+               return PTR_ERR(bch_chardev);
+
+       return 0;
+}
+
+#endif /* NO_BCACHEFS_CHARDEV */
diff --git a/libbcachefs/chardev.h b/libbcachefs/chardev.h
new file mode 100644 (file)
index 0000000..3a4890d
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_CHARDEV_H
+#define _BCACHEFS_CHARDEV_H
+
+#ifndef NO_BCACHEFS_FS
+
+long bch2_fs_ioctl(struct bch_fs *, unsigned, void __user *);
+
+void bch2_fs_chardev_exit(struct bch_fs *);
+int bch2_fs_chardev_init(struct bch_fs *);
+
+void bch2_chardev_exit(void);
+int __init bch2_chardev_init(void);
+
+#else
+
+static inline long bch2_fs_ioctl(struct bch_fs *c,
+                               unsigned cmd, void __user * arg)
+{
+       return -ENOSYS;
+}
+
+static inline void bch2_fs_chardev_exit(struct bch_fs *c) {}
+static inline int bch2_fs_chardev_init(struct bch_fs *c) { return 0; }
+
+static inline void bch2_chardev_exit(void) {}
+static inline int __init bch2_chardev_init(void) { return 0; }
+
+#endif /* NO_BCACHEFS_FS */
+
+#endif /* _BCACHEFS_CHARDEV_H */
diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c
new file mode 100644 (file)
index 0000000..3d88719
--- /dev/null
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "checksum.h"
+#include "super.h"
+#include "super-io.h"
+
+#include <linux/crc32c.h>
+#include <linux/crypto.h>
+#include <linux/key.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <crypto/algapi.h>
+#include <crypto/chacha.h>
+#include <crypto/hash.h>
+#include <crypto/poly1305.h>
+#include <crypto/skcipher.h>
+#include <keys/user-type.h>
+
+static u64 bch2_checksum_init(unsigned type)
+{
+       switch (type) {
+       case BCH_CSUM_NONE:
+               return 0;
+       case BCH_CSUM_CRC32C_NONZERO:
+               return U32_MAX;
+       case BCH_CSUM_CRC64_NONZERO:
+               return U64_MAX;
+       case BCH_CSUM_CRC32C:
+               return 0;
+       case BCH_CSUM_CRC64:
+               return 0;
+       default:
+               BUG();
+       }
+}
+
+static u64 bch2_checksum_final(unsigned type, u64 crc)
+{
+       switch (type) {
+       case BCH_CSUM_NONE:
+               return 0;
+       case BCH_CSUM_CRC32C_NONZERO:
+               return crc ^ U32_MAX;
+       case BCH_CSUM_CRC64_NONZERO:
+               return crc ^ U64_MAX;
+       case BCH_CSUM_CRC32C:
+               return crc;
+       case BCH_CSUM_CRC64:
+               return crc;
+       default:
+               BUG();
+       }
+}
+
+static u64 bch2_checksum_update(unsigned type, u64 crc, const void *data, size_t len)
+{
+       switch (type) {
+       case BCH_CSUM_NONE:
+               return 0;
+       case BCH_CSUM_CRC32C_NONZERO:
+       case BCH_CSUM_CRC32C:
+               return crc32c(crc, data, len);
+       case BCH_CSUM_CRC64_NONZERO:
+       case BCH_CSUM_CRC64:
+               return crc64_be(crc, data, len);
+       default:
+               BUG();
+       }
+}
+
+static inline void do_encrypt_sg(struct crypto_sync_skcipher *tfm,
+                                struct nonce nonce,
+                                struct scatterlist *sg, size_t len)
+{
+       SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
+       int ret;
+
+       skcipher_request_set_sync_tfm(req, tfm);
+       skcipher_request_set_crypt(req, sg, sg, len, nonce.d);
+
+       ret = crypto_skcipher_encrypt(req);
+       BUG_ON(ret);
+}
+
+static inline void do_encrypt(struct crypto_sync_skcipher *tfm,
+                             struct nonce nonce,
+                             void *buf, size_t len)
+{
+       struct scatterlist sg;
+
+       sg_init_one(&sg, buf, len);
+       do_encrypt_sg(tfm, nonce, &sg, len);
+}
+
+int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
+                           void *buf, size_t len)
+{
+       struct crypto_sync_skcipher *chacha20 =
+               crypto_alloc_sync_skcipher("chacha20", 0, 0);
+       int ret;
+
+       if (!chacha20) {
+               pr_err("error requesting chacha20 module: %li", PTR_ERR(chacha20));
+               return PTR_ERR(chacha20);
+       }
+
+       ret = crypto_skcipher_setkey(&chacha20->base,
+                                    (void *) key, sizeof(*key));
+       if (ret) {
+               pr_err("crypto_skcipher_setkey() error: %i", ret);
+               goto err;
+       }
+
+       do_encrypt(chacha20, nonce, buf, len);
+err:
+       crypto_free_sync_skcipher(chacha20);
+       return ret;
+}
+
+static void gen_poly_key(struct bch_fs *c, struct shash_desc *desc,
+                        struct nonce nonce)
+{
+       u8 key[POLY1305_KEY_SIZE];
+
+       nonce.d[3] ^= BCH_NONCE_POLY;
+
+       memset(key, 0, sizeof(key));
+       do_encrypt(c->chacha20, nonce, key, sizeof(key));
+
+       desc->tfm = c->poly1305;
+       crypto_shash_init(desc);
+       crypto_shash_update(desc, key, sizeof(key));
+}
+
+struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type,
+                             struct nonce nonce, const void *data, size_t len)
+{
+       switch (type) {
+       case BCH_CSUM_NONE:
+       case BCH_CSUM_CRC32C_NONZERO:
+       case BCH_CSUM_CRC64_NONZERO:
+       case BCH_CSUM_CRC32C:
+       case BCH_CSUM_CRC64: {
+               u64 crc = bch2_checksum_init(type);
+
+               crc = bch2_checksum_update(type, crc, data, len);
+               crc = bch2_checksum_final(type, crc);
+
+               return (struct bch_csum) { .lo = cpu_to_le64(crc) };
+       }
+
+       case BCH_CSUM_CHACHA20_POLY1305_80:
+       case BCH_CSUM_CHACHA20_POLY1305_128: {
+               SHASH_DESC_ON_STACK(desc, c->poly1305);
+               u8 digest[POLY1305_DIGEST_SIZE];
+               struct bch_csum ret = { 0 };
+
+               gen_poly_key(c, desc, nonce);
+
+               crypto_shash_update(desc, data, len);
+               crypto_shash_final(desc, digest);
+
+               memcpy(&ret, digest, bch_crc_bytes[type]);
+               return ret;
+       }
+       default:
+               BUG();
+       }
+}
+
+void bch2_encrypt(struct bch_fs *c, unsigned type,
+                 struct nonce nonce, void *data, size_t len)
+{
+       if (!bch2_csum_type_is_encryption(type))
+               return;
+
+       do_encrypt(c->chacha20, nonce, data, len);
+}
+
+static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
+                                          struct nonce nonce, struct bio *bio,
+                                          struct bvec_iter *iter)
+{
+       struct bio_vec bv;
+
+       switch (type) {
+       case BCH_CSUM_NONE:
+               return (struct bch_csum) { 0 };
+       case BCH_CSUM_CRC32C_NONZERO:
+       case BCH_CSUM_CRC64_NONZERO:
+       case BCH_CSUM_CRC32C:
+       case BCH_CSUM_CRC64: {
+               u64 crc = bch2_checksum_init(type);
+
+#ifdef CONFIG_HIGHMEM
+               __bio_for_each_segment(bv, bio, *iter, *iter) {
+                       void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
+                       crc = bch2_checksum_update(type,
+                               crc, p, bv.bv_len);
+                       kunmap_atomic(p);
+               }
+#else
+               __bio_for_each_bvec(bv, bio, *iter, *iter)
+                       crc = bch2_checksum_update(type, crc,
+                               page_address(bv.bv_page) + bv.bv_offset,
+                               bv.bv_len);
+#endif
+               crc = bch2_checksum_final(type, crc);
+               return (struct bch_csum) { .lo = cpu_to_le64(crc) };
+       }
+
+       case BCH_CSUM_CHACHA20_POLY1305_80:
+       case BCH_CSUM_CHACHA20_POLY1305_128: {
+               SHASH_DESC_ON_STACK(desc, c->poly1305);
+               u8 digest[POLY1305_DIGEST_SIZE];
+               struct bch_csum ret = { 0 };
+
+               gen_poly_key(c, desc, nonce);
+
+#ifdef CONFIG_HIGHMEM
+               __bio_for_each_segment(bv, bio, *iter, *iter) {
+                       void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
+
+                       crypto_shash_update(desc, p, bv.bv_len);
+                       kunmap_atomic(p);
+               }
+#else
+               __bio_for_each_bvec(bv, bio, *iter, *iter)
+                       crypto_shash_update(desc,
+                               page_address(bv.bv_page) + bv.bv_offset,
+                               bv.bv_len);
+#endif
+               crypto_shash_final(desc, digest);
+
+               memcpy(&ret, digest, bch_crc_bytes[type]);
+               return ret;
+       }
+       default:
+               BUG();
+       }
+}
+
+struct bch_csum bch2_checksum_bio(struct bch_fs *c, unsigned type,
+                                 struct nonce nonce, struct bio *bio)
+{
+       struct bvec_iter iter = bio->bi_iter;
+
+       return __bch2_checksum_bio(c, type, nonce, bio, &iter);
+}
+
+void bch2_encrypt_bio(struct bch_fs *c, unsigned type,
+                     struct nonce nonce, struct bio *bio)
+{
+       struct bio_vec bv;
+       struct bvec_iter iter;
+       struct scatterlist sgl[16], *sg = sgl;
+       size_t bytes = 0;
+
+       if (!bch2_csum_type_is_encryption(type))
+               return;
+
+       sg_init_table(sgl, ARRAY_SIZE(sgl));
+
+       bio_for_each_segment(bv, bio, iter) {
+               if (sg == sgl + ARRAY_SIZE(sgl)) {
+                       sg_mark_end(sg - 1);
+                       do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
+
+                       nonce = nonce_add(nonce, bytes);
+                       bytes = 0;
+
+                       sg_init_table(sgl, ARRAY_SIZE(sgl));
+                       sg = sgl;
+               }
+
+               sg_set_page(sg++, bv.bv_page, bv.bv_len, bv.bv_offset);
+               bytes += bv.bv_len;
+       }
+
+       sg_mark_end(sg - 1);
+       do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
+}
+
+struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a,
+                                   struct bch_csum b, size_t b_len)
+{
+       BUG_ON(!bch2_checksum_mergeable(type));
+
+       while (b_len) {
+               unsigned b = min_t(unsigned, b_len, PAGE_SIZE);
+
+               a.lo = bch2_checksum_update(type, a.lo,
+                               page_address(ZERO_PAGE(0)), b);
+               b_len -= b;
+       }
+
+       a.lo ^= b.lo;
+       a.hi ^= b.hi;
+       return a;
+}
+
+int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
+                       struct bversion version,
+                       struct bch_extent_crc_unpacked crc_old,
+                       struct bch_extent_crc_unpacked *crc_a,
+                       struct bch_extent_crc_unpacked *crc_b,
+                       unsigned len_a, unsigned len_b,
+                       unsigned new_csum_type)
+{
+       struct bvec_iter iter = bio->bi_iter;
+       struct nonce nonce = extent_nonce(version, crc_old);
+       struct bch_csum merged = { 0 };
+       struct crc_split {
+               struct bch_extent_crc_unpacked  *crc;
+               unsigned                        len;
+               unsigned                        csum_type;
+               struct bch_csum                 csum;
+       } splits[3] = {
+               { crc_a, len_a, new_csum_type },
+               { crc_b, len_b, new_csum_type },
+               { NULL,  bio_sectors(bio) - len_a - len_b, new_csum_type },
+       }, *i;
+       bool mergeable = crc_old.csum_type == new_csum_type &&
+               bch2_checksum_mergeable(new_csum_type);
+       unsigned crc_nonce = crc_old.nonce;
+
+       BUG_ON(len_a + len_b > bio_sectors(bio));
+       BUG_ON(crc_old.uncompressed_size != bio_sectors(bio));
+       BUG_ON(crc_is_compressed(crc_old));
+       BUG_ON(bch2_csum_type_is_encryption(crc_old.csum_type) !=
+              bch2_csum_type_is_encryption(new_csum_type));
+
+       for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
+               iter.bi_size = i->len << 9;
+               if (mergeable || i->crc)
+                       i->csum = __bch2_checksum_bio(c, i->csum_type,
+                                                     nonce, bio, &iter);
+               else
+                       bio_advance_iter(bio, &iter, i->len << 9);
+               nonce = nonce_add(nonce, i->len << 9);
+       }
+
+       if (mergeable)
+               for (i = splits; i < splits + ARRAY_SIZE(splits); i++)
+                       merged = bch2_checksum_merge(new_csum_type, merged,
+                                                    i->csum, i->len << 9);
+       else
+               merged = bch2_checksum_bio(c, crc_old.csum_type,
+                               extent_nonce(version, crc_old), bio);
+
+       if (bch2_crc_cmp(merged, crc_old.csum))
+               return -EIO;
+
+       for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
+               if (i->crc)
+                       *i->crc = (struct bch_extent_crc_unpacked) {
+                               .csum_type              = i->csum_type,
+                               .compression_type       = crc_old.compression_type,
+                               .compressed_size        = i->len,
+                               .uncompressed_size      = i->len,
+                               .offset                 = 0,
+                               .live_size              = i->len,
+                               .nonce                  = crc_nonce,
+                               .csum                   = i->csum,
+                       };
+
+               if (bch2_csum_type_is_encryption(new_csum_type))
+                       crc_nonce += i->len;
+       }
+
+       return 0;
+}
+
+#ifdef __KERNEL__
+int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
+{
+       char key_description[60];
+       struct key *keyring_key;
+       const struct user_key_payload *ukp;
+       int ret;
+
+       snprintf(key_description, sizeof(key_description),
+                "bcachefs:%pUb", &sb->user_uuid);
+
+       keyring_key = request_key(&key_type_logon, key_description, NULL);
+       if (IS_ERR(keyring_key))
+               return PTR_ERR(keyring_key);
+
+       down_read(&keyring_key->sem);
+       ukp = dereference_key_locked(keyring_key);
+       if (ukp->datalen == sizeof(*key)) {
+               memcpy(key, ukp->data, ukp->datalen);
+               ret = 0;
+       } else {
+               ret = -EINVAL;
+       }
+       up_read(&keyring_key->sem);
+       key_put(keyring_key);
+
+       return ret;
+}
+#else
+#include <keyutils.h>
+#include <uuid/uuid.h>
+
+int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
+{
+       key_serial_t key_id;
+       char key_description[60];
+       char uuid[40];
+
+       uuid_unparse_lower(sb->user_uuid.b, uuid);
+       sprintf(key_description, "bcachefs:%s", uuid);
+
+       key_id = request_key("user", key_description, NULL,
+                            KEY_SPEC_USER_KEYRING);
+       if (key_id < 0)
+               return -errno;
+
+       if (keyctl_read(key_id, (void *) key, sizeof(*key)) != sizeof(*key))
+               return -1;
+
+       return 0;
+}
+#endif
+
+int bch2_decrypt_sb_key(struct bch_fs *c,
+                       struct bch_sb_field_crypt *crypt,
+                       struct bch_key *key)
+{
+       struct bch_encrypted_key sb_key = crypt->key;
+       struct bch_key user_key;
+       int ret = 0;
+
+       /* is key encrypted? */
+       if (!bch2_key_is_encrypted(&sb_key))
+               goto out;
+
+       ret = bch2_request_key(c->disk_sb.sb, &user_key);
+       if (ret) {
+               bch_err(c, "error requesting encryption key: %i", ret);
+               goto err;
+       }
+
+       /* decrypt real key: */
+       ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
+                            &sb_key, sizeof(sb_key));
+       if (ret)
+               goto err;
+
+       if (bch2_key_is_encrypted(&sb_key)) {
+               bch_err(c, "incorrect encryption key");
+               ret = -EINVAL;
+               goto err;
+       }
+out:
+       *key = sb_key.key;
+err:
+       memzero_explicit(&sb_key, sizeof(sb_key));
+       memzero_explicit(&user_key, sizeof(user_key));
+       return ret;
+}
+
+static int bch2_alloc_ciphers(struct bch_fs *c)
+{
+       if (!c->chacha20)
+               c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0);
+       if (IS_ERR(c->chacha20)) {
+               bch_err(c, "error requesting chacha20 module: %li",
+                       PTR_ERR(c->chacha20));
+               return PTR_ERR(c->chacha20);
+       }
+
+       if (!c->poly1305)
+               c->poly1305 = crypto_alloc_shash("poly1305", 0, 0);
+       if (IS_ERR(c->poly1305)) {
+               bch_err(c, "error requesting poly1305 module: %li",
+                       PTR_ERR(c->poly1305));
+               return PTR_ERR(c->poly1305);
+       }
+
+       return 0;
+}
+
+int bch2_disable_encryption(struct bch_fs *c)
+{
+       struct bch_sb_field_crypt *crypt;
+       struct bch_key key;
+       int ret = -EINVAL;
+
+       mutex_lock(&c->sb_lock);
+
+       crypt = bch2_sb_get_crypt(c->disk_sb.sb);
+       if (!crypt)
+               goto out;
+
+       /* is key encrypted? */
+       ret = 0;
+       if (bch2_key_is_encrypted(&crypt->key))
+               goto out;
+
+       ret = bch2_decrypt_sb_key(c, crypt, &key);
+       if (ret)
+               goto out;
+
+       crypt->key.magic        = BCH_KEY_MAGIC;
+       crypt->key.key          = key;
+
+       SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 0);
+       bch2_write_super(c);
+out:
+       mutex_unlock(&c->sb_lock);
+
+       return ret;
+}
+
+int bch2_enable_encryption(struct bch_fs *c, bool keyed)
+{
+       struct bch_encrypted_key key;
+       struct bch_key user_key;
+       struct bch_sb_field_crypt *crypt;
+       int ret = -EINVAL;
+
+       mutex_lock(&c->sb_lock);
+
+       /* Do we already have an encryption key? */
+       if (bch2_sb_get_crypt(c->disk_sb.sb))
+               goto err;
+
+       ret = bch2_alloc_ciphers(c);
+       if (ret)
+               goto err;
+
+       key.magic = BCH_KEY_MAGIC;
+       get_random_bytes(&key.key, sizeof(key.key));
+
+       if (keyed) {
+               ret = bch2_request_key(c->disk_sb.sb, &user_key);
+               if (ret) {
+                       bch_err(c, "error requesting encryption key: %i", ret);
+                       goto err;
+               }
+
+               ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
+                                             &key, sizeof(key));
+               if (ret)
+                       goto err;
+       }
+
+       ret = crypto_skcipher_setkey(&c->chacha20->base,
+                       (void *) &key.key, sizeof(key.key));
+       if (ret)
+               goto err;
+
+       crypt = bch2_sb_resize_crypt(&c->disk_sb, sizeof(*crypt) / sizeof(u64));
+       if (!crypt) {
+               ret = -ENOMEM; /* XXX this technically could be -ENOSPC */
+               goto err;
+       }
+
+       crypt->key = key;
+
+       /* write superblock */
+       SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 1);
+       bch2_write_super(c);
+err:
+       mutex_unlock(&c->sb_lock);
+       memzero_explicit(&user_key, sizeof(user_key));
+       memzero_explicit(&key, sizeof(key));
+       return ret;
+}
+
+void bch2_fs_encryption_exit(struct bch_fs *c)
+{
+       if (!IS_ERR_OR_NULL(c->poly1305))
+               crypto_free_shash(c->poly1305);
+       if (!IS_ERR_OR_NULL(c->chacha20))
+               crypto_free_sync_skcipher(c->chacha20);
+       if (!IS_ERR_OR_NULL(c->sha256))
+               crypto_free_shash(c->sha256);
+}
+
+int bch2_fs_encryption_init(struct bch_fs *c)
+{
+       struct bch_sb_field_crypt *crypt;
+       struct bch_key key;
+       int ret = 0;
+
+       pr_verbose_init(c->opts, "");
+
+       c->sha256 = crypto_alloc_shash("sha256", 0, 0);
+       if (IS_ERR(c->sha256)) {
+               bch_err(c, "error requesting sha256 module");
+               ret = PTR_ERR(c->sha256);
+               goto out;
+       }
+
+       crypt = bch2_sb_get_crypt(c->disk_sb.sb);
+       if (!crypt)
+               goto out;
+
+       ret = bch2_alloc_ciphers(c);
+       if (ret)
+               goto out;
+
+       ret = bch2_decrypt_sb_key(c, crypt, &key);
+       if (ret)
+               goto out;
+
+       ret = crypto_skcipher_setkey(&c->chacha20->base,
+                       (void *) &key.key, sizeof(key.key));
+       if (ret)
+               goto out;
+out:
+       memzero_explicit(&key, sizeof(key));
+       pr_verbose_init(c->opts, "ret %i", ret);
+       return ret;
+}
diff --git a/libbcachefs/checksum.h b/libbcachefs/checksum.h
new file mode 100644 (file)
index 0000000..24dee80
--- /dev/null
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_CHECKSUM_H
+#define _BCACHEFS_CHECKSUM_H
+
+#include "bcachefs.h"
+#include "extents_types.h"
+#include "super-io.h"
+
+#include <linux/crc64.h>
+#include <crypto/chacha.h>
+
+static inline bool bch2_checksum_mergeable(unsigned type)
+{
+
+       switch (type) {
+       case BCH_CSUM_NONE:
+       case BCH_CSUM_CRC32C:
+       case BCH_CSUM_CRC64:
+               return true;
+       default:
+               return false;
+       }
+}
+
+struct bch_csum bch2_checksum_merge(unsigned, struct bch_csum,
+                                   struct bch_csum, size_t);
+
+#define BCH_NONCE_EXTENT       cpu_to_le32(1 << 28)
+#define BCH_NONCE_BTREE                cpu_to_le32(2 << 28)
+#define BCH_NONCE_JOURNAL      cpu_to_le32(3 << 28)
+#define BCH_NONCE_PRIO         cpu_to_le32(4 << 28)
+#define BCH_NONCE_POLY         cpu_to_le32(1 << 31)
+
+struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce,
+                            const void *, size_t);
+
+/*
+ * This is used for various on disk data structures - bch_sb, prio_set, bset,
+ * jset: The checksum is _always_ the first field of these structs
+ */
+#define csum_vstruct(_c, _type, _nonce, _i)                            \
+({                                                                     \
+       const void *start = ((const void *) (_i)) + sizeof((_i)->csum); \
+       const void *end = vstruct_end(_i);                              \
+                                                                       \
+       bch2_checksum(_c, _type, _nonce, start, end - start);           \
+})
+
+int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
+int bch2_request_key(struct bch_sb *, struct bch_key *);
+
+void bch2_encrypt(struct bch_fs *, unsigned, struct nonce,
+                void *data, size_t);
+
+struct bch_csum bch2_checksum_bio(struct bch_fs *, unsigned,
+                                 struct nonce, struct bio *);
+
+int bch2_rechecksum_bio(struct bch_fs *, struct bio *, struct bversion,
+                       struct bch_extent_crc_unpacked,
+                       struct bch_extent_crc_unpacked *,
+                       struct bch_extent_crc_unpacked *,
+                       unsigned, unsigned, unsigned);
+
+void bch2_encrypt_bio(struct bch_fs *, unsigned,
+                   struct nonce, struct bio *);
+
+int bch2_decrypt_sb_key(struct bch_fs *, struct bch_sb_field_crypt *,
+                       struct bch_key *);
+
+int bch2_disable_encryption(struct bch_fs *);
+int bch2_enable_encryption(struct bch_fs *, bool);
+
+void bch2_fs_encryption_exit(struct bch_fs *);
+int bch2_fs_encryption_init(struct bch_fs *);
+
+static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opts type,
+                                                      bool data)
+{
+       switch (type) {
+       case BCH_CSUM_OPT_NONE:
+            return BCH_CSUM_NONE;
+       case BCH_CSUM_OPT_CRC32C:
+            return data ? BCH_CSUM_CRC32C : BCH_CSUM_CRC32C_NONZERO;
+       case BCH_CSUM_OPT_CRC64:
+            return data ? BCH_CSUM_CRC64 : BCH_CSUM_CRC64_NONZERO;
+       default:
+            BUG();
+       }
+}
+
+static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c,
+                                                        unsigned opt)
+{
+       if (c->sb.encryption_type)
+               return c->opts.wide_macs
+                       ? BCH_CSUM_CHACHA20_POLY1305_128
+                       : BCH_CSUM_CHACHA20_POLY1305_80;
+
+       return bch2_csum_opt_to_type(opt, true);
+}
+
+static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
+{
+       if (c->sb.encryption_type)
+               return BCH_CSUM_CHACHA20_POLY1305_128;
+
+       return bch2_csum_opt_to_type(c->opts.metadata_checksum, false);
+}
+
+static const unsigned bch2_compression_opt_to_type[] = {
+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
+       BCH_COMPRESSION_OPTS()
+#undef x
+};
+
+static inline bool bch2_checksum_type_valid(const struct bch_fs *c,
+                                          unsigned type)
+{
+       if (type >= BCH_CSUM_NR)
+               return false;
+
+       if (bch2_csum_type_is_encryption(type) && !c->chacha20)
+               return false;
+
+       return true;
+}
+
+/* returns true if not equal */
+static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r)
+{
+       /*
+        * XXX: need some way of preventing the compiler from optimizing this
+        * into a form that isn't constant time..
+        */
+       return ((l.lo ^ r.lo) | (l.hi ^ r.hi)) != 0;
+}
+
+/* for skipping ahead and encrypting/decrypting at an offset: */
+static inline struct nonce nonce_add(struct nonce nonce, unsigned offset)
+{
+       EBUG_ON(offset & (CHACHA_BLOCK_SIZE - 1));
+
+       le32_add_cpu(&nonce.d[0], offset / CHACHA_BLOCK_SIZE);
+       return nonce;
+}
+
+static inline struct nonce null_nonce(void)
+{
+       struct nonce ret;
+
+       memset(&ret, 0, sizeof(ret));
+       return ret;
+}
+
+static inline struct nonce extent_nonce(struct bversion version,
+                                       struct bch_extent_crc_unpacked crc)
+{
+       unsigned compression_type = crc_is_compressed(crc)
+               ? crc.compression_type
+               : 0;
+       unsigned size = compression_type ? crc.uncompressed_size : 0;
+       struct nonce nonce = (struct nonce) {{
+               [0] = cpu_to_le32(size << 22),
+               [1] = cpu_to_le32(version.lo),
+               [2] = cpu_to_le32(version.lo >> 32),
+               [3] = cpu_to_le32(version.hi|
+                                 (compression_type << 24))^BCH_NONCE_EXTENT,
+       }};
+
+       return nonce_add(nonce, crc.nonce << 9);
+}
+
+static inline bool bch2_key_is_encrypted(struct bch_encrypted_key *key)
+{
+       return le64_to_cpu(key->magic) != BCH_KEY_MAGIC;
+}
+
+static inline struct nonce __bch2_sb_key_nonce(struct bch_sb *sb)
+{
+       __le64 magic = __bch2_sb_magic(sb);
+
+       return (struct nonce) {{
+               [0] = 0,
+               [1] = 0,
+               [2] = ((__le32 *) &magic)[0],
+               [3] = ((__le32 *) &magic)[1],
+       }};
+}
+
+static inline struct nonce bch2_sb_key_nonce(struct bch_fs *c)
+{
+       __le64 magic = bch2_sb_magic(c);
+
+       return (struct nonce) {{
+               [0] = 0,
+               [1] = 0,
+               [2] = ((__le32 *) &magic)[0],
+               [3] = ((__le32 *) &magic)[1],
+       }};
+}
+
+#endif /* _BCACHEFS_CHECKSUM_H */
diff --git a/libbcachefs/clock.c b/libbcachefs/clock.c
new file mode 100644 (file)
index 0000000..1d1590d
--- /dev/null
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "clock.h"
+
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/preempt.h>
+
+static inline long io_timer_cmp(io_timer_heap *h,
+                               struct io_timer *l,
+                               struct io_timer *r)
+{
+       return l->expire - r->expire;
+}
+
+void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
+{
+       size_t i;
+
+       spin_lock(&clock->timer_lock);
+
+       if (time_after_eq((unsigned long) atomic_long_read(&clock->now),
+                         timer->expire)) {
+               spin_unlock(&clock->timer_lock);
+               timer->fn(timer);
+               return;
+       }
+
+       for (i = 0; i < clock->timers.used; i++)
+               if (clock->timers.data[i] == timer)
+                       goto out;
+
+       BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL));
+out:
+       spin_unlock(&clock->timer_lock);
+}
+
+void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
+{
+       size_t i;
+
+       spin_lock(&clock->timer_lock);
+
+       for (i = 0; i < clock->timers.used; i++)
+               if (clock->timers.data[i] == timer) {
+                       heap_del(&clock->timers, i, io_timer_cmp, NULL);
+                       break;
+               }
+
+       spin_unlock(&clock->timer_lock);
+}
+
+struct io_clock_wait {
+       struct io_timer         io_timer;
+       struct timer_list       cpu_timer;
+       struct task_struct      *task;
+       int                     expired;
+};
+
+static void io_clock_wait_fn(struct io_timer *timer)
+{
+       struct io_clock_wait *wait = container_of(timer,
+                               struct io_clock_wait, io_timer);
+
+       wait->expired = 1;
+       wake_up_process(wait->task);
+}
+
+static void io_clock_cpu_timeout(struct timer_list *timer)
+{
+       struct io_clock_wait *wait = container_of(timer,
+                               struct io_clock_wait, cpu_timer);
+
+       wait->expired = 1;
+       wake_up_process(wait->task);
+}
+
+void bch2_io_clock_schedule_timeout(struct io_clock *clock, unsigned long until)
+{
+       struct io_clock_wait wait;
+
+       /* XXX: calculate sleep time rigorously */
+       wait.io_timer.expire    = until;
+       wait.io_timer.fn        = io_clock_wait_fn;
+       wait.task               = current;
+       wait.expired            = 0;
+       bch2_io_timer_add(clock, &wait.io_timer);
+
+       schedule();
+
+       bch2_io_timer_del(clock, &wait.io_timer);
+}
+
+void bch2_kthread_io_clock_wait(struct io_clock *clock,
+                               unsigned long io_until,
+                               unsigned long cpu_timeout)
+{
+       bool kthread = (current->flags & PF_KTHREAD) != 0;
+       struct io_clock_wait wait;
+
+       wait.io_timer.expire    = io_until;
+       wait.io_timer.fn        = io_clock_wait_fn;
+       wait.task               = current;
+       wait.expired            = 0;
+       bch2_io_timer_add(clock, &wait.io_timer);
+
+       timer_setup_on_stack(&wait.cpu_timer, io_clock_cpu_timeout, 0);
+
+       if (cpu_timeout != MAX_SCHEDULE_TIMEOUT)
+               mod_timer(&wait.cpu_timer, cpu_timeout + jiffies);
+
+       while (1) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (kthread && kthread_should_stop())
+                       break;
+
+               if (wait.expired)
+                       break;
+
+               schedule();
+               try_to_freeze();
+       }
+
+       __set_current_state(TASK_RUNNING);
+       del_singleshot_timer_sync(&wait.cpu_timer);
+       destroy_timer_on_stack(&wait.cpu_timer);
+       bch2_io_timer_del(clock, &wait.io_timer);
+}
+
+static struct io_timer *get_expired_timer(struct io_clock *clock,
+                                         unsigned long now)
+{
+       struct io_timer *ret = NULL;
+
+       spin_lock(&clock->timer_lock);
+
+       if (clock->timers.used &&
+           time_after_eq(now, clock->timers.data[0]->expire))
+               heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
+
+       spin_unlock(&clock->timer_lock);
+
+       return ret;
+}
+
+void __bch2_increment_clock(struct io_clock *clock, unsigned sectors)
+{
+       struct io_timer *timer;
+       unsigned long now = atomic_long_add_return(sectors, &clock->now);
+
+       while ((timer = get_expired_timer(clock, now)))
+               timer->fn(timer);
+}
+
+void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
+{
+       unsigned long now;
+       unsigned i;
+
+       spin_lock(&clock->timer_lock);
+       now = atomic_long_read(&clock->now);
+
+       for (i = 0; i < clock->timers.used; i++)
+               pr_buf(out, "%ps:\t%li\n",
+                      clock->timers.data[i]->fn,
+                      clock->timers.data[i]->expire - now);
+       spin_unlock(&clock->timer_lock);
+}
+
+void bch2_io_clock_exit(struct io_clock *clock)
+{
+       free_heap(&clock->timers);
+       free_percpu(clock->pcpu_buf);
+}
+
+int bch2_io_clock_init(struct io_clock *clock)
+{
+       atomic_long_set(&clock->now, 0);
+       spin_lock_init(&clock->timer_lock);
+
+       clock->max_slop = IO_CLOCK_PCPU_SECTORS * num_possible_cpus();
+
+       clock->pcpu_buf = alloc_percpu(*clock->pcpu_buf);
+       if (!clock->pcpu_buf)
+               return -ENOMEM;
+
+       if (!init_heap(&clock->timers, NR_IO_TIMERS, GFP_KERNEL))
+               return -ENOMEM;
+
+       return 0;
+}
diff --git a/libbcachefs/clock.h b/libbcachefs/clock.h
new file mode 100644 (file)
index 0000000..70a0f74
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_CLOCK_H
+#define _BCACHEFS_CLOCK_H
+
+void bch2_io_timer_add(struct io_clock *, struct io_timer *);
+void bch2_io_timer_del(struct io_clock *, struct io_timer *);
+void bch2_kthread_io_clock_wait(struct io_clock *, unsigned long,
+                               unsigned long);
+
+void __bch2_increment_clock(struct io_clock *, unsigned);
+
+static inline void bch2_increment_clock(struct bch_fs *c, unsigned sectors,
+                                       int rw)
+{
+       struct io_clock *clock = &c->io_clock[rw];
+
+       if (unlikely(this_cpu_add_return(*clock->pcpu_buf, sectors) >=
+                  IO_CLOCK_PCPU_SECTORS))
+               __bch2_increment_clock(clock, this_cpu_xchg(*clock->pcpu_buf, 0));
+}
+
+void bch2_io_clock_schedule_timeout(struct io_clock *, unsigned long);
+
+#define bch2_kthread_wait_event_ioclock_timeout(condition, clock, timeout)\
+({                                                                     \
+       long __ret = timeout;                                           \
+       might_sleep();                                                  \
+       if (!___wait_cond_timeout(condition))                           \
+               __ret = __wait_event_timeout(wq, condition, timeout);   \
+       __ret;                                                          \
+})
+
+void bch2_io_timers_to_text(struct printbuf *, struct io_clock *);
+
+void bch2_io_clock_exit(struct io_clock *);
+int bch2_io_clock_init(struct io_clock *);
+
+#endif /* _BCACHEFS_CLOCK_H */
diff --git a/libbcachefs/clock_types.h b/libbcachefs/clock_types.h
new file mode 100644 (file)
index 0000000..92c740a
--- /dev/null
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_CLOCK_TYPES_H
+#define _BCACHEFS_CLOCK_TYPES_H
+
+#include "util.h"
+
+#define NR_IO_TIMERS           (BCH_SB_MEMBERS_MAX * 3)
+
+/*
+ * Clocks/timers in units of sectors of IO:
+ *
+ * Note - they use percpu batching, so they're only approximate.
+ */
+
+struct io_timer;
+typedef void (*io_timer_fn)(struct io_timer *);
+
+struct io_timer {
+       io_timer_fn             fn;
+       unsigned long           expire;
+};
+
+/* Amount to buffer up on a percpu counter */
+#define IO_CLOCK_PCPU_SECTORS  128
+
+typedef HEAP(struct io_timer *)        io_timer_heap;
+
+struct io_clock {
+       atomic_long_t           now;
+       u16 __percpu            *pcpu_buf;
+       unsigned                max_slop;
+
+       spinlock_t              timer_lock;
+       io_timer_heap           timers;
+};
+
+#endif /* _BCACHEFS_CLOCK_TYPES_H */
diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c
new file mode 100644 (file)
index 0000000..b50d2b0
--- /dev/null
@@ -0,0 +1,629 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "checksum.h"
+#include "compress.h"
+#include "extents.h"
+#include "io.h"
+#include "super-io.h"
+
+#include <linux/lz4.h>
+#include <linux/zlib.h>
+#include <linux/zstd.h>
+
+/* Bounce buffer: */
+struct bbuf {
+       void            *b;
+       enum {
+               BB_NONE,
+               BB_VMAP,
+               BB_KMALLOC,
+               BB_MEMPOOL,
+       }               type;
+       int             rw;
+};
+
+static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
+{
+       void *b;
+
+       BUG_ON(size > c->sb.encoded_extent_max << 9);
+
+       b = kmalloc(size, GFP_NOIO|__GFP_NOWARN);
+       if (b)
+               return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
+
+       b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO);
+       if (b)
+               return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
+
+       BUG();
+}
+
+static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
+{
+       struct bio_vec bv;
+       struct bvec_iter iter;
+       void *expected_start = NULL;
+
+       __bio_for_each_bvec(bv, bio, iter, start) {
+               if (expected_start &&
+                   expected_start != page_address(bv.bv_page) + bv.bv_offset)
+                       return false;
+
+               expected_start = page_address(bv.bv_page) +
+                       bv.bv_offset + bv.bv_len;
+       }
+
+       return true;
+}
+
+static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
+                                      struct bvec_iter start, int rw)
+{
+       struct bbuf ret;
+       struct bio_vec bv;
+       struct bvec_iter iter;
+       unsigned nr_pages = 0;
+       struct page *stack_pages[16];
+       struct page **pages = NULL;
+       void *data;
+
+       BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
+
+       if (!IS_ENABLED(CONFIG_HIGHMEM) &&
+           bio_phys_contig(bio, start))
+               return (struct bbuf) {
+                       .b = page_address(bio_iter_page(bio, start)) +
+                               bio_iter_offset(bio, start),
+                       .type = BB_NONE, .rw = rw
+               };
+
+       /* check if we can map the pages contiguously: */
+       __bio_for_each_segment(bv, bio, iter, start) {
+               if (iter.bi_size != start.bi_size &&
+                   bv.bv_offset)
+                       goto bounce;
+
+               if (bv.bv_len < iter.bi_size &&
+                   bv.bv_offset + bv.bv_len < PAGE_SIZE)
+                       goto bounce;
+
+               nr_pages++;
+       }
+
+       BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
+
+       pages = nr_pages > ARRAY_SIZE(stack_pages)
+               ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOIO)
+               : stack_pages;
+       if (!pages)
+               goto bounce;
+
+       nr_pages = 0;
+       __bio_for_each_segment(bv, bio, iter, start)
+               pages[nr_pages++] = bv.bv_page;
+
+       data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+       if (pages != stack_pages)
+               kfree(pages);
+
+       if (data)
+               return (struct bbuf) {
+                       .b = data + bio_iter_offset(bio, start),
+                       .type = BB_VMAP, .rw = rw
+               };
+bounce:
+       ret = __bounce_alloc(c, start.bi_size, rw);
+
+       if (rw == READ)
+               memcpy_from_bio(ret.b, bio, start);
+
+       return ret;
+}
+
+static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
+{
+       return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
+}
+
+static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
+{
+       switch (buf.type) {
+       case BB_NONE:
+               break;
+       case BB_VMAP:
+               vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
+               break;
+       case BB_KMALLOC:
+               kfree(buf.b);
+               break;
+       case BB_MEMPOOL:
+               mempool_free(buf.b, &c->compression_bounce[buf.rw]);
+               break;
+       }
+}
+
+static inline void zlib_set_workspace(z_stream *strm, void *workspace)
+{
+#ifdef __KERNEL__
+       strm->workspace = workspace;
+#endif
+}
+
+static int __bio_uncompress(struct bch_fs *c, struct bio *src,
+                           void *dst_data, struct bch_extent_crc_unpacked crc)
+{
+       struct bbuf src_data = { NULL };
+       size_t src_len = src->bi_iter.bi_size;
+       size_t dst_len = crc.uncompressed_size << 9;
+       void *workspace;
+       int ret;
+
+       src_data = bio_map_or_bounce(c, src, READ);
+
+       switch (crc.compression_type) {
+       case BCH_COMPRESSION_TYPE_lz4_old:
+       case BCH_COMPRESSION_TYPE_lz4:
+               ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
+                                                 src_len, dst_len, dst_len);
+               if (ret != dst_len)
+                       goto err;
+               break;
+       case BCH_COMPRESSION_TYPE_gzip: {
+               z_stream strm = {
+                       .next_in        = src_data.b,
+                       .avail_in       = src_len,
+                       .next_out       = dst_data,
+                       .avail_out      = dst_len,
+               };
+
+               workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
+
+               zlib_set_workspace(&strm, workspace);
+               zlib_inflateInit2(&strm, -MAX_WBITS);
+               ret = zlib_inflate(&strm, Z_FINISH);
+
+               mempool_free(workspace, &c->decompress_workspace);
+
+               if (ret != Z_STREAM_END)
+                       goto err;
+               break;
+       }
+       case BCH_COMPRESSION_TYPE_zstd: {
+               ZSTD_DCtx *ctx;
+               size_t real_src_len = le32_to_cpup(src_data.b);
+
+               if (real_src_len > src_len - 4)
+                       goto err;
+
+               workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
+               ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
+
+               ret = ZSTD_decompressDCtx(ctx,
+                               dst_data,       dst_len,
+                               src_data.b + 4, real_src_len);
+
+               mempool_free(workspace, &c->decompress_workspace);
+
+               if (ret != dst_len)
+                       goto err;
+               break;
+       }
+       default:
+               BUG();
+       }
+       ret = 0;
+out:
+       bio_unmap_or_unbounce(c, src_data);
+       return ret;
+err:
+       ret = -EIO;
+       goto out;
+}
+
+int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
+                               struct bch_extent_crc_unpacked *crc)
+{
+       struct bbuf data = { NULL };
+       size_t dst_len = crc->uncompressed_size << 9;
+
+       /* bio must own its pages: */
+       BUG_ON(!bio->bi_vcnt);
+       BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
+
+       if (crc->uncompressed_size      > c->sb.encoded_extent_max ||
+           crc->compressed_size        > c->sb.encoded_extent_max) {
+               bch_err(c, "error rewriting existing data: extent too big");
+               return -EIO;
+       }
+
+       data = __bounce_alloc(c, dst_len, WRITE);
+
+       if (__bio_uncompress(c, bio, data.b, *crc)) {
+               bch_err(c, "error rewriting existing data: decompression error");
+               bio_unmap_or_unbounce(c, data);
+               return -EIO;
+       }
+
+       /*
+        * XXX: don't have a good way to assert that the bio was allocated with
+        * enough space, we depend on bch2_move_extent doing the right thing
+        */
+       bio->bi_iter.bi_size = crc->live_size << 9;
+
+       memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
+
+       crc->csum_type          = 0;
+       crc->compression_type   = 0;
+       crc->compressed_size    = crc->live_size;
+       crc->uncompressed_size  = crc->live_size;
+       crc->offset             = 0;
+       crc->csum               = (struct bch_csum) { 0, 0 };
+
+       bio_unmap_or_unbounce(c, data);
+       return 0;
+}
+
+int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
+                      struct bio *dst, struct bvec_iter dst_iter,
+                      struct bch_extent_crc_unpacked crc)
+{
+       struct bbuf dst_data = { NULL };
+       size_t dst_len = crc.uncompressed_size << 9;
+       int ret = -ENOMEM;
+
+       if (crc.uncompressed_size       > c->sb.encoded_extent_max ||
+           crc.compressed_size         > c->sb.encoded_extent_max)
+               return -EIO;
+
+       dst_data = dst_len == dst_iter.bi_size
+               ? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
+               : __bounce_alloc(c, dst_len, WRITE);
+
+       ret = __bio_uncompress(c, src, dst_data.b, crc);
+       if (ret)
+               goto err;
+
+       if (dst_data.type != BB_NONE &&
+           dst_data.type != BB_VMAP)
+               memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
+err:
+       bio_unmap_or_unbounce(c, dst_data);
+       return ret;
+}
+
+static int attempt_compress(struct bch_fs *c,
+                           void *workspace,
+                           void *dst, size_t dst_len,
+                           void *src, size_t src_len,
+                           enum bch_compression_type compression_type)
+{
+       switch (compression_type) {
+       case BCH_COMPRESSION_TYPE_lz4: {
+               int len = src_len;
+               int ret = LZ4_compress_destSize(
+                               src,            dst,
+                               &len,           dst_len,
+                               workspace);
+
+               if (len < src_len)
+                       return -len;
+
+               return ret;
+       }
+       case BCH_COMPRESSION_TYPE_gzip: {
+               z_stream strm = {
+                       .next_in        = src,
+                       .avail_in       = src_len,
+                       .next_out       = dst,
+                       .avail_out      = dst_len,
+               };
+
+               zlib_set_workspace(&strm, workspace);
+               zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
+                                 Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
+                                 Z_DEFAULT_STRATEGY);
+
+               if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
+                       return 0;
+
+               if (zlib_deflateEnd(&strm) != Z_OK)
+                       return 0;
+
+               return strm.total_out;
+       }
+       case BCH_COMPRESSION_TYPE_zstd: {
+               ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
+                       ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
+
+               size_t len = ZSTD_compressCCtx(ctx,
+                               dst + 4,        dst_len - 4,
+                               src,            src_len,
+                               c->zstd_params);
+               if (ZSTD_isError(len))
+                       return 0;
+
+               *((__le32 *) dst) = cpu_to_le32(len);
+               return len + 4;
+       }
+       default:
+               BUG();
+       }
+}
+
+static unsigned __bio_compress(struct bch_fs *c,
+                              struct bio *dst, size_t *dst_len,
+                              struct bio *src, size_t *src_len,
+                              enum bch_compression_type compression_type)
+{
+       struct bbuf src_data = { NULL }, dst_data = { NULL };
+       void *workspace;
+       unsigned pad;
+       int ret = 0;
+
+       BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR);
+       BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
+
+       /* If it's only one block, don't bother trying to compress: */
+       if (bio_sectors(src) <= c->opts.block_size)
+               return 0;
+
+       dst_data = bio_map_or_bounce(c, dst, WRITE);
+       src_data = bio_map_or_bounce(c, src, READ);
+
+       workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOIO);
+
+       *src_len = src->bi_iter.bi_size;
+       *dst_len = dst->bi_iter.bi_size;
+
+       /*
+        * XXX: this algorithm sucks when the compression code doesn't tell us
+        * how much would fit, like LZ4 does:
+        */
+       while (1) {
+               if (*src_len <= block_bytes(c)) {
+                       ret = -1;
+                       break;
+               }
+
+               ret = attempt_compress(c, workspace,
+                                      dst_data.b,      *dst_len,
+                                      src_data.b,      *src_len,
+                                      compression_type);
+               if (ret > 0) {
+                       *dst_len = ret;
+                       ret = 0;
+                       break;
+               }
+
+               /* Didn't fit: should we retry with a smaller amount?  */
+               if (*src_len <= *dst_len) {
+                       ret = -1;
+                       break;
+               }
+
+               /*
+                * If ret is negative, it's a hint as to how much data would fit
+                */
+               BUG_ON(-ret >= *src_len);
+
+               if (ret < 0)
+                       *src_len = -ret;
+               else
+                       *src_len -= (*src_len - *dst_len) / 2;
+               *src_len = round_down(*src_len, block_bytes(c));
+       }
+
+       mempool_free(workspace, &c->compress_workspace[compression_type]);
+
+       if (ret)
+               goto err;
+
+       /* Didn't get smaller: */
+       if (round_up(*dst_len, block_bytes(c)) >= *src_len)
+               goto err;
+
+       pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
+
+       memset(dst_data.b + *dst_len, 0, pad);
+       *dst_len += pad;
+
+       if (dst_data.type != BB_NONE &&
+           dst_data.type != BB_VMAP)
+               memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
+
+       BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
+       BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
+       BUG_ON(*dst_len & (block_bytes(c) - 1));
+       BUG_ON(*src_len & (block_bytes(c) - 1));
+out:
+       bio_unmap_or_unbounce(c, src_data);
+       bio_unmap_or_unbounce(c, dst_data);
+       return compression_type;
+err:
+       compression_type = BCH_COMPRESSION_TYPE_incompressible;
+       goto out;
+}
+
+unsigned bch2_bio_compress(struct bch_fs *c,
+                          struct bio *dst, size_t *dst_len,
+                          struct bio *src, size_t *src_len,
+                          unsigned compression_type)
+{
+       unsigned orig_dst = dst->bi_iter.bi_size;
+       unsigned orig_src = src->bi_iter.bi_size;
+
+       /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
+       src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
+                                    c->sb.encoded_extent_max << 9);
+       /* Don't generate a bigger output than input: */
+       dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
+
+       if (compression_type == BCH_COMPRESSION_TYPE_lz4_old)
+               compression_type = BCH_COMPRESSION_TYPE_lz4;
+
+       compression_type =
+               __bio_compress(c, dst, dst_len, src, src_len, compression_type);
+
+       dst->bi_iter.bi_size = orig_dst;
+       src->bi_iter.bi_size = orig_src;
+       return compression_type;
+}
+
+static int __bch2_fs_compress_init(struct bch_fs *, u64);
+
+#define BCH_FEATURE_none       0
+
+static const unsigned bch2_compression_opt_to_feature[] = {
+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
+       BCH_COMPRESSION_OPTS()
+#undef x
+};
+
+#undef BCH_FEATURE_none
+
+static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
+{
+       int ret = 0;
+
+       if ((c->sb.features & f) == f)
+               return 0;
+
+       mutex_lock(&c->sb_lock);
+
+       if ((c->sb.features & f) == f) {
+               mutex_unlock(&c->sb_lock);
+               return 0;
+       }
+
+       ret = __bch2_fs_compress_init(c, c->sb.features|f);
+       if (ret) {
+               mutex_unlock(&c->sb_lock);
+               return ret;
+       }
+
+       c->disk_sb.sb->features[0] |= cpu_to_le64(f);
+       bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       return 0;
+}
+
+int bch2_check_set_has_compressed_data(struct bch_fs *c,
+                                      unsigned compression_type)
+{
+       BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
+
+       return compression_type
+               ? __bch2_check_set_has_compressed_data(c,
+                               1ULL << bch2_compression_opt_to_feature[compression_type])
+               : 0;
+}
+
+void bch2_fs_compress_exit(struct bch_fs *c)
+{
+       unsigned i;
+
+       mempool_exit(&c->decompress_workspace);
+       for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
+               mempool_exit(&c->compress_workspace[i]);
+       mempool_exit(&c->compression_bounce[WRITE]);
+       mempool_exit(&c->compression_bounce[READ]);
+}
+
+static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
+{
+       size_t max_extent = c->sb.encoded_extent_max << 9;
+       size_t decompress_workspace_size = 0;
+       bool decompress_workspace_needed;
+       ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0);
+       struct {
+               unsigned        feature;
+               unsigned        type;
+               size_t          compress_workspace;
+               size_t          decompress_workspace;
+       } compression_types[] = {
+               { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, LZ4_MEM_COMPRESS, 0 },
+               { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
+                       zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
+                       zlib_inflate_workspacesize(), },
+               { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
+                       ZSTD_CCtxWorkspaceBound(params.cParams),
+                       ZSTD_DCtxWorkspaceBound() },
+       }, *i;
+       int ret = 0;
+
+       pr_verbose_init(c->opts, "");
+
+       c->zstd_params = params;
+
+       for (i = compression_types;
+            i < compression_types + ARRAY_SIZE(compression_types);
+            i++)
+               if (features & (1 << i->feature))
+                       goto have_compressed;
+
+       goto out;
+have_compressed:
+
+       if (!mempool_initialized(&c->compression_bounce[READ])) {
+               ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
+                                                 1, max_extent);
+               if (ret)
+                       goto out;
+       }
+
+       if (!mempool_initialized(&c->compression_bounce[WRITE])) {
+               ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
+                                                 1, max_extent);
+               if (ret)
+                       goto out;
+       }
+
+       for (i = compression_types;
+            i < compression_types + ARRAY_SIZE(compression_types);
+            i++) {
+               decompress_workspace_size =
+                       max(decompress_workspace_size, i->decompress_workspace);
+
+               if (!(features & (1 << i->feature)))
+                       continue;
+
+               if (i->decompress_workspace)
+                       decompress_workspace_needed = true;
+
+               if (mempool_initialized(&c->compress_workspace[i->type]))
+                       continue;
+
+               ret = mempool_init_kvpmalloc_pool(
+                               &c->compress_workspace[i->type],
+                               1, i->compress_workspace);
+               if (ret)
+                       goto out;
+       }
+
+       if (!mempool_initialized(&c->decompress_workspace)) {
+               ret = mempool_init_kvpmalloc_pool(
+                               &c->decompress_workspace,
+                               1, decompress_workspace_size);
+               if (ret)
+                       goto out;
+       }
+out:
+       pr_verbose_init(c->opts, "ret %i", ret);
+       return ret;
+}
+
+int bch2_fs_compress_init(struct bch_fs *c)
+{
+       u64 f = c->sb.features;
+
+       if (c->opts.compression)
+               f |= 1ULL << bch2_compression_opt_to_feature[c->opts.compression];
+
+       if (c->opts.background_compression)
+               f |= 1ULL << bch2_compression_opt_to_feature[c->opts.background_compression];
+
+       return __bch2_fs_compress_init(c, f);
+
+}
diff --git a/libbcachefs/compress.h b/libbcachefs/compress.h
new file mode 100644 (file)
index 0000000..4bab1f6
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_COMPRESS_H
+#define _BCACHEFS_COMPRESS_H
+
+#include "extents_types.h"
+
+int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
+                               struct bch_extent_crc_unpacked *);
+int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
+                      struct bvec_iter, struct bch_extent_crc_unpacked);
+unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *,
+                          struct bio *, size_t *, unsigned);
+
+int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned);
+void bch2_fs_compress_exit(struct bch_fs *);
+int bch2_fs_compress_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_COMPRESS_H */
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
new file mode 100644 (file)
index 0000000..aa10591
--- /dev/null
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Assorted bcachefs debug code
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "bkey_methods.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "debug.h"
+#include "error.h"
+#include "extents.h"
+#include "fsck.h"
+#include "inode.h"
+#include "io.h"
+#include "super.h"
+
+#include <linux/console.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/seq_file.h>
+
+static struct dentry *bch_debug;
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
+{
+       struct btree *v = c->verify_data;
+       struct btree_node *n_ondisk, *n_sorted, *n_inmemory;
+       struct bset *sorted, *inmemory;
+       struct extent_ptr_decoded pick;
+       struct bch_dev *ca;
+       struct bio *bio;
+
+       if (c->opts.nochanges)
+               return;
+
+       btree_node_io_lock(b);
+       mutex_lock(&c->verify_lock);
+
+       n_ondisk = c->verify_ondisk;
+       n_sorted = c->verify_data->data;
+       n_inmemory = b->data;
+
+       bkey_copy(&v->key, &b->key);
+       v->written      = 0;
+       v->c.level      = b->c.level;
+       v->c.btree_id   = b->c.btree_id;
+       bch2_btree_keys_init(v, &c->expensive_debug_checks);
+
+       if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
+                                      NULL, &pick) <= 0)
+               return;
+
+       ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+       if (!bch2_dev_get_ioref(ca, READ))
+               return;
+
+       bio = bio_alloc_bioset(GFP_NOIO,
+                       buf_pages(n_sorted, btree_bytes(c)),
+                       &c->btree_bio);
+       bio_set_dev(bio, ca->disk_sb.bdev);
+       bio->bi_opf             = REQ_OP_READ|REQ_META;
+       bio->bi_iter.bi_sector  = pick.ptr.offset;
+       bch2_bio_map(bio, n_sorted, btree_bytes(c));
+
+       submit_bio_wait(bio);
+
+       bio_put(bio);
+       percpu_ref_put(&ca->io_ref);
+
+       memcpy(n_ondisk, n_sorted, btree_bytes(c));
+
+       if (bch2_btree_node_read_done(c, v, false))
+               goto out;
+
+       n_sorted = c->verify_data->data;
+       sorted = &n_sorted->keys;
+       inmemory = &n_inmemory->keys;
+
+       if (inmemory->u64s != sorted->u64s ||
+           memcmp(inmemory->start,
+                  sorted->start,
+                  vstruct_end(inmemory) - (void *) inmemory->start)) {
+               unsigned offset = 0, sectors;
+               struct bset *i;
+               unsigned j;
+
+               console_lock();
+
+               printk(KERN_ERR "*** in memory:\n");
+               bch2_dump_bset(c, b, inmemory, 0);
+
+               printk(KERN_ERR "*** read back in:\n");
+               bch2_dump_bset(c, v, sorted, 0);
+
+               while (offset < b->written) {
+                       if (!offset ) {
+                               i = &n_ondisk->keys;
+                               sectors = vstruct_blocks(n_ondisk, c->block_bits) <<
+                                       c->block_bits;
+                       } else {
+                               struct btree_node_entry *bne =
+                                       (void *) n_ondisk + (offset << 9);
+                               i = &bne->keys;
+
+                               sectors = vstruct_blocks(bne, c->block_bits) <<
+                                       c->block_bits;
+                       }
+
+                       printk(KERN_ERR "*** on disk block %u:\n", offset);
+                       bch2_dump_bset(c, b, i, offset);
+
+                       offset += sectors;
+               }
+
+               printk(KERN_ERR "*** block %u/%u not written\n",
+                      offset >> c->block_bits, btree_blocks(c));
+
+               for (j = 0; j < le16_to_cpu(inmemory->u64s); j++)
+                       if (inmemory->_data[j] != sorted->_data[j])
+                               break;
+
+               printk(KERN_ERR "b->written %u\n", b->written);
+
+               console_unlock();
+               panic("verify failed at %u\n", j);
+       }
+out:
+       mutex_unlock(&c->verify_lock);
+       btree_node_io_unlock(b);
+}
+
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+
+/* XXX: bch_fs refcounting */
+
+struct dump_iter {
+       struct bpos             from;
+       struct bch_fs   *c;
+       enum btree_id           id;
+
+       char                    buf[PAGE_SIZE];
+       size_t                  bytes;  /* what's currently in buf */
+
+       char __user             *ubuf;  /* destination user buffer */
+       size_t                  size;   /* size of requested read */
+       ssize_t                 ret;    /* bytes read so far */
+};
+
+static int flush_buf(struct dump_iter *i)
+{
+       if (i->bytes) {
+               size_t bytes = min(i->bytes, i->size);
+               int err = copy_to_user(i->ubuf, i->buf, bytes);
+
+               if (err)
+                       return err;
+
+               i->ret   += bytes;
+               i->ubuf  += bytes;
+               i->size  -= bytes;
+               i->bytes -= bytes;
+               memmove(i->buf, i->buf + bytes, i->bytes);
+       }
+
+       return 0;
+}
+
+static int bch2_dump_open(struct inode *inode, struct file *file)
+{
+       struct btree_debug *bd = inode->i_private;
+       struct dump_iter *i;
+
+       i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
+       if (!i)
+               return -ENOMEM;
+
+       file->private_data = i;
+       i->from = POS_MIN;
+       i->c    = container_of(bd, struct bch_fs, btree_debug[bd->id]);
+       i->id   = bd->id;
+
+       return 0;
+}
+
+static int bch2_dump_release(struct inode *inode, struct file *file)
+{
+       kfree(file->private_data);
+       return 0;
+}
+
+static ssize_t bch2_read_btree(struct file *file, char __user *buf,
+                              size_t size, loff_t *ppos)
+{
+       struct dump_iter *i = file->private_data;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int err;
+
+       i->ubuf = buf;
+       i->size = size;
+       i->ret  = 0;
+
+       err = flush_buf(i);
+       if (err)
+               return err;
+
+       if (!i->size)
+               return i->ret;
+
+       bch2_trans_init(&trans, i->c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
+       k = bch2_btree_iter_peek(iter);
+
+       while (k.k && !(err = bkey_err(k))) {
+               bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k);
+               i->bytes = strlen(i->buf);
+               BUG_ON(i->bytes >= PAGE_SIZE);
+               i->buf[i->bytes] = '\n';
+               i->bytes++;
+
+               k = bch2_btree_iter_next(iter);
+               i->from = iter->pos;
+
+               err = flush_buf(i);
+               if (err)
+                       break;
+
+               if (!i->size)
+                       break;
+       }
+       bch2_trans_exit(&trans);
+
+       return err < 0 ? err : i->ret;
+}
+
+static const struct file_operations btree_debug_ops = {
+       .owner          = THIS_MODULE,
+       .open           = bch2_dump_open,
+       .release        = bch2_dump_release,
+       .read           = bch2_read_btree,
+};
+
+static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
+                                      size_t size, loff_t *ppos)
+{
+       struct dump_iter *i = file->private_data;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct btree *b;
+       int err;
+
+       i->ubuf = buf;
+       i->size = size;
+       i->ret  = 0;
+
+       err = flush_buf(i);
+       if (err)
+               return err;
+
+       if (!i->size || !bkey_cmp(POS_MAX, i->from))
+               return i->ret;
+
+       bch2_trans_init(&trans, i->c, 0, 0);
+
+       for_each_btree_node(&trans, iter, i->id, i->from, 0, b) {
+               bch2_btree_node_to_text(&PBUF(i->buf), i->c, b);
+               i->bytes = strlen(i->buf);
+               err = flush_buf(i);
+               if (err)
+                       break;
+
+               /*
+                * can't easily correctly restart a btree node traversal across
+                * all nodes, meh
+                */
+               i->from = bkey_cmp(POS_MAX, b->key.k.p)
+                       ? bkey_successor(b->key.k.p)
+                       : b->key.k.p;
+
+               if (!i->size)
+                       break;
+       }
+       bch2_trans_exit(&trans);
+
+       return err < 0 ? err : i->ret;
+}
+
+static const struct file_operations btree_format_debug_ops = {
+       .owner          = THIS_MODULE,
+       .open           = bch2_dump_open,
+       .release        = bch2_dump_release,
+       .read           = bch2_read_btree_formats,
+};
+
+static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
+                                      size_t size, loff_t *ppos)
+{
+       struct dump_iter *i = file->private_data;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct btree *prev_node = NULL;
+       int err;
+
+       i->ubuf = buf;
+       i->size = size;
+       i->ret  = 0;
+
+       err = flush_buf(i);
+       if (err)
+               return err;
+
+       if (!i->size)
+               return i->ret;
+
+       bch2_trans_init(&trans, i->c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
+
+       while ((k = bch2_btree_iter_peek(iter)).k &&
+              !(err = bkey_err(k))) {
+               struct btree_iter_level *l = &iter->l[0];
+               struct bkey_packed *_k =
+                       bch2_btree_node_iter_peek(&l->iter, l->b);
+
+               if (l->b != prev_node) {
+                       bch2_btree_node_to_text(&PBUF(i->buf), i->c, l->b);
+                       i->bytes = strlen(i->buf);
+                       err = flush_buf(i);
+                       if (err)
+                               break;
+               }
+               prev_node = l->b;
+
+               bch2_bfloat_to_text(&PBUF(i->buf), l->b, _k);
+               i->bytes = strlen(i->buf);
+               err = flush_buf(i);
+               if (err)
+                       break;
+
+               bch2_btree_iter_next(iter);
+               i->from = iter->pos;
+
+               err = flush_buf(i);
+               if (err)
+                       break;
+
+               if (!i->size)
+                       break;
+       }
+       bch2_trans_exit(&trans);
+
+       return err < 0 ? err : i->ret;
+}
+
+static const struct file_operations bfloat_failed_debug_ops = {
+       .owner          = THIS_MODULE,
+       .open           = bch2_dump_open,
+       .release        = bch2_dump_release,
+       .read           = bch2_read_bfloat_failed,
+};
+
+void bch2_fs_debug_exit(struct bch_fs *c)
+{
+       if (!IS_ERR_OR_NULL(c->debug))
+               debugfs_remove_recursive(c->debug);
+}
+
+void bch2_fs_debug_init(struct bch_fs *c)
+{
+       struct btree_debug *bd;
+       char name[100];
+
+       if (IS_ERR_OR_NULL(bch_debug))
+               return;
+
+       snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
+       c->debug = debugfs_create_dir(name, bch_debug);
+       if (IS_ERR_OR_NULL(c->debug))
+               return;
+
+       for (bd = c->btree_debug;
+            bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
+            bd++) {
+               bd->id = bd - c->btree_debug;
+               bd->btree = debugfs_create_file(bch2_btree_ids[bd->id],
+                                               0400, c->debug, bd,
+                                               &btree_debug_ops);
+
+               snprintf(name, sizeof(name), "%s-formats",
+                        bch2_btree_ids[bd->id]);
+
+               bd->btree_format = debugfs_create_file(name, 0400, c->debug, bd,
+                                                      &btree_format_debug_ops);
+
+               snprintf(name, sizeof(name), "%s-bfloat-failed",
+                        bch2_btree_ids[bd->id]);
+
+               bd->failed = debugfs_create_file(name, 0400, c->debug, bd,
+                                                &bfloat_failed_debug_ops);
+       }
+}
+
+#endif
+
+void bch2_debug_exit(void)
+{
+       if (!IS_ERR_OR_NULL(bch_debug))
+               debugfs_remove_recursive(bch_debug);
+}
+
+int __init bch2_debug_init(void)
+{
+       int ret = 0;
+
+       bch_debug = debugfs_create_dir("bcachefs", NULL);
+       return ret;
+}
diff --git a/libbcachefs/debug.h b/libbcachefs/debug.h
new file mode 100644 (file)
index 0000000..56c2d1a
--- /dev/null
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_DEBUG_H
+#define _BCACHEFS_DEBUG_H
+
+#include "bcachefs.h"
+
+struct bio;
+struct btree;
+struct bch_fs;
+
+#define BCH_DEBUG_PARAM(name, description) extern bool bch2_##name;
+BCH_DEBUG_PARAMS()
+#undef BCH_DEBUG_PARAM
+
+#define BCH_DEBUG_PARAM(name, description)                             \
+       static inline bool name(struct bch_fs *c)                       \
+       { return bch2_##name || c->name;        }
+BCH_DEBUG_PARAMS_ALWAYS()
+#undef BCH_DEBUG_PARAM
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+#define BCH_DEBUG_PARAM(name, description)                             \
+       static inline bool name(struct bch_fs *c)                       \
+       { return bch2_##name || c->name;        }
+BCH_DEBUG_PARAMS_DEBUG()
+#undef BCH_DEBUG_PARAM
+
+void __bch2_btree_verify(struct bch_fs *, struct btree *);
+
+#define bypass_torture_test(d)         ((d)->bypass_torture_test)
+
+#else /* DEBUG */
+
+#define BCH_DEBUG_PARAM(name, description)                             \
+       static inline bool name(struct bch_fs *c) { return false; }
+BCH_DEBUG_PARAMS_DEBUG()
+#undef BCH_DEBUG_PARAM
+
+static inline void __bch2_btree_verify(struct bch_fs *c, struct btree *b) {}
+
+#define bypass_torture_test(d)         0
+
+#endif
+
+static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
+{
+       if (verify_btree_ondisk(c))
+               __bch2_btree_verify(c, b);
+}
+
+#ifdef CONFIG_DEBUG_FS
+void bch2_fs_debug_exit(struct bch_fs *);
+void bch2_fs_debug_init(struct bch_fs *);
+#else
+static inline void bch2_fs_debug_exit(struct bch_fs *c) {}
+static inline void bch2_fs_debug_init(struct bch_fs *c) {}
+#endif
+
+void bch2_debug_exit(void);
+int bch2_debug_init(void);
+
+#endif /* _BCACHEFS_DEBUG_H */
diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c
new file mode 100644 (file)
index 0000000..f34bfda
--- /dev/null
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_methods.h"
+#include "btree_update.h"
+#include "extents.h"
+#include "dirent.h"
+#include "fs.h"
+#include "keylist.h"
+#include "str_hash.h"
+
+#include <linux/dcache.h>
+
+unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
+{
+       unsigned len = bkey_val_bytes(d.k) -
+               offsetof(struct bch_dirent, d_name);
+
+       return strnlen(d.v->d_name, len);
+}
+
+static u64 bch2_dirent_hash(const struct bch_hash_info *info,
+                           const struct qstr *name)
+{
+       struct bch_str_hash_ctx ctx;
+
+       bch2_str_hash_init(&ctx, info);
+       bch2_str_hash_update(&ctx, info, name->name, name->len);
+
+       /* [0,2) reserved for dots */
+       return max_t(u64, bch2_str_hash_end(&ctx, info), 2);
+}
+
+static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
+{
+       return bch2_dirent_hash(info, key);
+}
+
+static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
+{
+       struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+       struct qstr name = QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
+
+       return bch2_dirent_hash(info, &name);
+}
+
+static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
+{
+       struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
+       int len = bch2_dirent_name_bytes(l);
+       const struct qstr *r = _r;
+
+       return len - r->len ?: memcmp(l.v->d_name, r->name, len);
+}
+
+static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
+{
+       struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
+       struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
+       int l_len = bch2_dirent_name_bytes(l);
+       int r_len = bch2_dirent_name_bytes(r);
+
+       return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
+}
+
+const struct bch_hash_desc bch2_dirent_hash_desc = {
+       .btree_id       = BTREE_ID_DIRENTS,
+       .key_type       = KEY_TYPE_dirent,
+       .hash_key       = dirent_hash_key,
+       .hash_bkey      = dirent_hash_bkey,
+       .cmp_key        = dirent_cmp_key,
+       .cmp_bkey       = dirent_cmp_bkey,
+};
+
+const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+       unsigned len;
+
+       if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
+               return "value too small";
+
+       len = bch2_dirent_name_bytes(d);
+       if (!len)
+               return "empty name";
+
+       /*
+        * older versions of bcachefs were buggy and creating dirent
+        * keys that were bigger than necessary:
+        */
+       if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7))
+               return "value too big";
+
+       if (len > BCH_NAME_MAX)
+               return "dirent name too big";
+
+       return NULL;
+}
+
+void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
+                        struct bkey_s_c k)
+{
+       struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+
+       bch_scnmemcpy(out, d.v->d_name,
+                     bch2_dirent_name_bytes(d));
+       pr_buf(out, " -> %llu type %u", d.v->d_inum, d.v->d_type);
+}
+
+static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
+                               u8 type, const struct qstr *name, u64 dst)
+{
+       struct bkey_i_dirent *dirent;
+       unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
+
+       if (name->len > BCH_NAME_MAX)
+               return ERR_PTR(-ENAMETOOLONG);
+
+       BUG_ON(u64s > U8_MAX);
+
+       dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
+       if (IS_ERR(dirent))
+               return dirent;
+
+       bkey_dirent_init(&dirent->k_i);
+       dirent->k.u64s = u64s;
+       dirent->v.d_inum = cpu_to_le64(dst);
+       dirent->v.d_type = type;
+
+       memcpy(dirent->v.d_name, name->name, name->len);
+       memset(dirent->v.d_name + name->len, 0,
+              bkey_val_bytes(&dirent->k) -
+              offsetof(struct bch_dirent, d_name) -
+              name->len);
+
+       EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
+
+       return dirent;
+}
+
+int bch2_dirent_create(struct btree_trans *trans,
+                      u64 dir_inum, const struct bch_hash_info *hash_info,
+                      u8 type, const struct qstr *name, u64 dst_inum,
+                      int flags)
+{
+       struct bkey_i_dirent *dirent;
+       int ret;
+
+       dirent = dirent_create_key(trans, type, name, dst_inum);
+       ret = PTR_ERR_OR_ZERO(dirent);
+       if (ret)
+               return ret;
+
+       return bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
+                            dir_inum, &dirent->k_i, flags);
+}
+
+static void dirent_copy_target(struct bkey_i_dirent *dst,
+                              struct bkey_s_c_dirent src)
+{
+       dst->v.d_inum = src.v->d_inum;
+       dst->v.d_type = src.v->d_type;
+}
+
+int bch2_dirent_rename(struct btree_trans *trans,
+                      u64 src_dir, struct bch_hash_info *src_hash,
+                      u64 dst_dir, struct bch_hash_info *dst_hash,
+                      const struct qstr *src_name, u64 *src_inum,
+                      const struct qstr *dst_name, u64 *dst_inum,
+                      enum bch_rename_mode mode)
+{
+       struct btree_iter *src_iter = NULL, *dst_iter = NULL;
+       struct bkey_s_c old_src, old_dst;
+       struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
+       struct bpos dst_pos =
+               POS(dst_dir, bch2_dirent_hash(dst_hash, dst_name));
+       int ret = 0;
+
+       *src_inum = *dst_inum = 0;
+
+       /*
+        * Lookup dst:
+        *
+        * Note that in BCH_RENAME mode, we're _not_ checking if
+        * the target already exists - we're relying on the VFS
+        * to do that check for us for correctness:
+        */
+       dst_iter = mode == BCH_RENAME
+               ? bch2_hash_hole(trans, bch2_dirent_hash_desc,
+                                dst_hash, dst_dir, dst_name)
+               : bch2_hash_lookup(trans, bch2_dirent_hash_desc,
+                                  dst_hash, dst_dir, dst_name,
+                                  BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(dst_iter);
+       if (ret)
+               goto out;
+
+       old_dst = bch2_btree_iter_peek_slot(dst_iter);
+
+       if (mode != BCH_RENAME)
+               *dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum);
+
+       /* Lookup src: */
+       src_iter = bch2_hash_lookup(trans, bch2_dirent_hash_desc,
+                                   src_hash, src_dir, src_name,
+                                   BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(src_iter);
+       if (ret)
+               goto out;
+
+       old_src = bch2_btree_iter_peek_slot(src_iter);
+       *src_inum = le64_to_cpu(bkey_s_c_to_dirent(old_src).v->d_inum);
+
+       /* Create new dst key: */
+       new_dst = dirent_create_key(trans, 0, dst_name, 0);
+       ret = PTR_ERR_OR_ZERO(new_dst);
+       if (ret)
+               goto out;
+
+       dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src));
+       new_dst->k.p = dst_iter->pos;
+
+       /* Create new src key: */
+       if (mode == BCH_RENAME_EXCHANGE) {
+               new_src = dirent_create_key(trans, 0, src_name, 0);
+               ret = PTR_ERR_OR_ZERO(new_src);
+               if (ret)
+                       goto out;
+
+               dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst));
+               new_src->k.p = src_iter->pos;
+       } else {
+               new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
+               ret = PTR_ERR_OR_ZERO(new_src);
+               if (ret)
+                       goto out;
+
+               bkey_init(&new_src->k);
+               new_src->k.p = src_iter->pos;
+
+               if (bkey_cmp(dst_pos, src_iter->pos) <= 0 &&
+                   bkey_cmp(src_iter->pos, dst_iter->pos) < 0) {
+                       /*
+                        * We have a hash collision for the new dst key,
+                        * and new_src - the key we're deleting - is between
+                        * new_dst's hashed slot and the slot we're going to be
+                        * inserting it into - oops.  This will break the hash
+                        * table if we don't deal with it:
+                        */
+                       if (mode == BCH_RENAME) {
+                               /*
+                                * If we're not overwriting, we can just insert
+                                * new_dst at the src position:
+                                */
+                               new_dst->k.p = src_iter->pos;
+                               bch2_trans_update(trans, src_iter,
+                                                 &new_dst->k_i, 0);
+                               goto out;
+                       } else {
+                               /* If we're overwriting, we can't insert new_dst
+                                * at a different slot because it has to
+                                * overwrite old_dst - just make sure to use a
+                                * whiteout when deleting src:
+                                */
+                               new_src->k.type = KEY_TYPE_whiteout;
+                       }
+               } else {
+                       /* Check if we need a whiteout to delete src: */
+                       ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc,
+                                                      src_hash, src_iter);
+                       if (ret < 0)
+                               goto out;
+
+                       if (ret)
+                               new_src->k.type = KEY_TYPE_whiteout;
+               }
+       }
+
+       bch2_trans_update(trans, src_iter, &new_src->k_i, 0);
+       bch2_trans_update(trans, dst_iter, &new_dst->k_i, 0);
+out:
+       bch2_trans_iter_put(trans, src_iter);
+       bch2_trans_iter_put(trans, dst_iter);
+       return ret;
+}
+
+int bch2_dirent_delete_at(struct btree_trans *trans,
+                         const struct bch_hash_info *hash_info,
+                         struct btree_iter *iter)
+{
+       return bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
+                                  hash_info, iter);
+}
+
+struct btree_iter *
+__bch2_dirent_lookup_trans(struct btree_trans *trans, u64 dir_inum,
+                          const struct bch_hash_info *hash_info,
+                          const struct qstr *name, unsigned flags)
+{
+       return bch2_hash_lookup(trans, bch2_dirent_hash_desc,
+                               hash_info, dir_inum, name, flags);
+}
+
+u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum,
+                      const struct bch_hash_info *hash_info,
+                      const struct qstr *name)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       u64 inum = 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = __bch2_dirent_lookup_trans(&trans, dir_inum,
+                                         hash_info, name, 0);
+       if (IS_ERR(iter)) {
+               BUG_ON(PTR_ERR(iter) == -EINTR);
+               goto out;
+       }
+
+       k = bch2_btree_iter_peek_slot(iter);
+       inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
+out:
+       bch2_trans_exit(&trans);
+       return inum;
+}
+
+int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret;
+
+       for_each_btree_key(trans, iter, BTREE_ID_DIRENTS,
+                          POS(dir_inum, 0), 0, k, ret) {
+               if (k.k->p.inode > dir_inum)
+                       break;
+
+               if (k.k->type == KEY_TYPE_dirent) {
+                       ret = -ENOTEMPTY;
+                       break;
+               }
+       }
+       bch2_trans_iter_put(trans, iter);
+
+       return ret;
+}
+
+int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_s_c_dirent dirent;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
+                          POS(inum, ctx->pos), 0, k, ret) {
+               if (k.k->p.inode > inum)
+                       break;
+
+               if (k.k->type != KEY_TYPE_dirent)
+                       continue;
+
+               dirent = bkey_s_c_to_dirent(k);
+
+               /*
+                * XXX: dir_emit() can fault and block, while we're holding
+                * locks
+                */
+               ctx->pos = dirent.k->p.offset;
+               if (!dir_emit(ctx, dirent.v->d_name,
+                             bch2_dirent_name_bytes(dirent),
+                             le64_to_cpu(dirent.v->d_inum),
+                             dirent.v->d_type))
+                       break;
+               ctx->pos = dirent.k->p.offset + 1;
+       }
+       ret = bch2_trans_exit(&trans) ?: ret;
+
+       return ret;
+}
diff --git a/libbcachefs/dirent.h b/libbcachefs/dirent.h
new file mode 100644 (file)
index 0000000..3476937
--- /dev/null
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_DIRENT_H
+#define _BCACHEFS_DIRENT_H
+
+#include "str_hash.h"
+
+extern const struct bch_hash_desc bch2_dirent_hash_desc;
+
+const char *bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_dirent (struct bkey_ops) {       \
+       .key_invalid    = bch2_dirent_invalid,          \
+       .val_to_text    = bch2_dirent_to_text,          \
+}
+
+struct qstr;
+struct file;
+struct dir_context;
+struct bch_fs;
+struct bch_hash_info;
+struct bch_inode_info;
+
+unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent);
+
+static inline unsigned dirent_val_u64s(unsigned len)
+{
+       return DIV_ROUND_UP(offsetof(struct bch_dirent, d_name) + len,
+                           sizeof(u64));
+}
+
+int bch2_dirent_create(struct btree_trans *, u64,
+                      const struct bch_hash_info *, u8,
+                      const struct qstr *, u64, int);
+
+int bch2_dirent_delete_at(struct btree_trans *,
+                         const struct bch_hash_info *,
+                         struct btree_iter *);
+
+enum bch_rename_mode {
+       BCH_RENAME,
+       BCH_RENAME_OVERWRITE,
+       BCH_RENAME_EXCHANGE,
+};
+
+int bch2_dirent_rename(struct btree_trans *,
+                      u64, struct bch_hash_info *,
+                      u64, struct bch_hash_info *,
+                      const struct qstr *, u64 *,
+                      const struct qstr *, u64 *,
+                      enum bch_rename_mode);
+
+struct btree_iter *
+__bch2_dirent_lookup_trans(struct btree_trans *, u64,
+                          const struct bch_hash_info *,
+                          const struct qstr *, unsigned);
+u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *,
+                      const struct qstr *);
+
+int bch2_empty_dir_trans(struct btree_trans *, u64);
+int bch2_readdir(struct bch_fs *, u64, struct dir_context *);
+
+#endif /* _BCACHEFS_DIRENT_H */
diff --git a/libbcachefs/disk_groups.c b/libbcachefs/disk_groups.c
new file mode 100644 (file)
index 0000000..c52b6fa
--- /dev/null
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "disk_groups.h"
+#include "super-io.h"
+
+#include <linux/sort.h>
+
+static int group_cmp(const void *_l, const void *_r)
+{
+       const struct bch_disk_group *l = _l;
+       const struct bch_disk_group *r = _r;
+
+       return ((BCH_GROUP_DELETED(l) > BCH_GROUP_DELETED(r)) -
+               (BCH_GROUP_DELETED(l) < BCH_GROUP_DELETED(r))) ?:
+               ((BCH_GROUP_PARENT(l) > BCH_GROUP_PARENT(r)) -
+                (BCH_GROUP_PARENT(l) < BCH_GROUP_PARENT(r))) ?:
+               strncmp(l->label, r->label, sizeof(l->label));
+}
+
+static const char *bch2_sb_disk_groups_validate(struct bch_sb *sb,
+                                               struct bch_sb_field *f)
+{
+       struct bch_sb_field_disk_groups *groups =
+               field_to_type(f, disk_groups);
+       struct bch_disk_group *g, *sorted = NULL;
+       struct bch_sb_field_members *mi;
+       struct bch_member *m;
+       unsigned i, nr_groups, len;
+       const char *err = NULL;
+
+       mi              = bch2_sb_get_members(sb);
+       groups          = bch2_sb_get_disk_groups(sb);
+       nr_groups       = disk_groups_nr(groups);
+
+       for (m = mi->members;
+            m < mi->members + sb->nr_devices;
+            m++) {
+               unsigned g;
+
+               if (!BCH_MEMBER_GROUP(m))
+                       continue;
+
+               g = BCH_MEMBER_GROUP(m) - 1;
+
+               if (g >= nr_groups ||
+                   BCH_GROUP_DELETED(&groups->entries[g]))
+                       return "disk has invalid group";
+       }
+
+       if (!nr_groups)
+               return NULL;
+
+       for (g = groups->entries;
+            g < groups->entries + nr_groups;
+            g++) {
+               if (BCH_GROUP_DELETED(g))
+                       continue;
+
+               len = strnlen(g->label, sizeof(g->label));
+               if (!len) {
+                       err = "group with empty label";
+                       goto err;
+               }
+       }
+
+       sorted = kmalloc_array(nr_groups, sizeof(*sorted), GFP_KERNEL);
+       if (!sorted)
+               return "cannot allocate memory";
+
+       memcpy(sorted, groups->entries, nr_groups * sizeof(*sorted));
+       sort(sorted, nr_groups, sizeof(*sorted), group_cmp, NULL);
+
+       for (i = 0; i + 1 < nr_groups; i++)
+               if (!BCH_GROUP_DELETED(sorted + i) &&
+                   !group_cmp(sorted + i, sorted + i + 1)) {
+                       err = "duplicate groups";
+                       goto err;
+               }
+
+       err = NULL;
+err:
+       kfree(sorted);
+       return err;
+}
+
+static void bch2_sb_disk_groups_to_text(struct printbuf *out,
+                                       struct bch_sb *sb,
+                                       struct bch_sb_field *f)
+{
+       struct bch_sb_field_disk_groups *groups =
+               field_to_type(f, disk_groups);
+       struct bch_disk_group *g;
+       unsigned nr_groups = disk_groups_nr(groups);
+
+       for (g = groups->entries;
+            g < groups->entries + nr_groups;
+            g++) {
+               if (g != groups->entries)
+                       pr_buf(out, " ");
+
+               if (BCH_GROUP_DELETED(g))
+                       pr_buf(out, "[deleted]");
+               else
+                       pr_buf(out, "[parent %llu name %s]",
+                              BCH_GROUP_PARENT(g), g->label);
+       }
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_disk_groups = {
+       .validate       = bch2_sb_disk_groups_validate,
+       .to_text        = bch2_sb_disk_groups_to_text
+};
+
+int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
+{
+       struct bch_sb_field_members *mi;
+       struct bch_sb_field_disk_groups *groups;
+       struct bch_disk_groups_cpu *cpu_g, *old_g;
+       unsigned i, g, nr_groups;
+
+       lockdep_assert_held(&c->sb_lock);
+
+       mi              = bch2_sb_get_members(c->disk_sb.sb);
+       groups          = bch2_sb_get_disk_groups(c->disk_sb.sb);
+       nr_groups       = disk_groups_nr(groups);
+
+       if (!groups)
+               return 0;
+
+       cpu_g = kzalloc(sizeof(*cpu_g) +
+                       sizeof(cpu_g->entries[0]) * nr_groups, GFP_KERNEL);
+       if (!cpu_g)
+               return -ENOMEM;
+
+       cpu_g->nr = nr_groups;
+
+       for (i = 0; i < nr_groups; i++) {
+               struct bch_disk_group *src      = &groups->entries[i];
+               struct bch_disk_group_cpu *dst  = &cpu_g->entries[i];
+
+               dst->deleted    = BCH_GROUP_DELETED(src);
+               dst->parent     = BCH_GROUP_PARENT(src);
+       }
+
+       for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
+               struct bch_member *m = mi->members + i;
+               struct bch_disk_group_cpu *dst =
+                       &cpu_g->entries[BCH_MEMBER_GROUP(m)];
+
+               if (!bch2_member_exists(m))
+                       continue;
+
+               g = BCH_MEMBER_GROUP(m);
+               while (g) {
+                       dst = &cpu_g->entries[g - 1];
+                       __set_bit(i, dst->devs.d);
+                       g = dst->parent;
+               }
+       }
+
+       old_g = rcu_dereference_protected(c->disk_groups,
+                               lockdep_is_held(&c->sb_lock));
+       rcu_assign_pointer(c->disk_groups, cpu_g);
+       if (old_g)
+               kfree_rcu(old_g, rcu);
+
+       return 0;
+}
+
+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned target)
+{
+       struct target t = target_decode(target);
+
+       switch (t.type) {
+       case TARGET_NULL:
+               return NULL;
+       case TARGET_DEV: {
+               struct bch_dev *ca = t.dev < c->sb.nr_devices
+                       ? rcu_dereference(c->devs[t.dev])
+                       : NULL;
+               return ca ? &ca->self : NULL;
+       }
+       case TARGET_GROUP: {
+               struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
+
+               return g && t.group < g->nr && !g->entries[t.group].deleted
+                       ? &g->entries[t.group].devs
+                       : NULL;
+       }
+       default:
+               BUG();
+       }
+}
+
+bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target)
+{
+       struct target t = target_decode(target);
+
+       switch (t.type) {
+       case TARGET_NULL:
+               return false;
+       case TARGET_DEV:
+               return dev == t.dev;
+       case TARGET_GROUP: {
+               struct bch_disk_groups_cpu *g;
+               const struct bch_devs_mask *m;
+               bool ret;
+
+               rcu_read_lock();
+               g = rcu_dereference(c->disk_groups);
+               m = g && t.group < g->nr && !g->entries[t.group].deleted
+                       ? &g->entries[t.group].devs
+                       : NULL;
+
+               ret = m ? test_bit(dev, m->d) : false;
+               rcu_read_unlock();
+
+               return ret;
+       }
+       default:
+               BUG();
+       }
+}
+
+static int __bch2_disk_group_find(struct bch_sb_field_disk_groups *groups,
+                                 unsigned parent,
+                                 const char *name, unsigned namelen)
+{
+       unsigned i, nr_groups = disk_groups_nr(groups);
+
+       if (!namelen || namelen > BCH_SB_LABEL_SIZE)
+               return -EINVAL;
+
+       for (i = 0; i < nr_groups; i++) {
+               struct bch_disk_group *g = groups->entries + i;
+
+               if (BCH_GROUP_DELETED(g))
+                       continue;
+
+               if (!BCH_GROUP_DELETED(g) &&
+                   BCH_GROUP_PARENT(g) == parent &&
+                   strnlen(g->label, sizeof(g->label)) == namelen &&
+                   !memcmp(name, g->label, namelen))
+                       return i;
+       }
+
+       return -1;
+}
+
+static int __bch2_disk_group_add(struct bch_sb_handle *sb, unsigned parent,
+                                const char *name, unsigned namelen)
+{
+       struct bch_sb_field_disk_groups *groups =
+               bch2_sb_get_disk_groups(sb->sb);
+       unsigned i, nr_groups = disk_groups_nr(groups);
+       struct bch_disk_group *g;
+
+       if (!namelen || namelen > BCH_SB_LABEL_SIZE)
+               return -EINVAL;
+
+       for (i = 0;
+            i < nr_groups && !BCH_GROUP_DELETED(&groups->entries[i]);
+            i++)
+               ;
+
+       if (i == nr_groups) {
+               unsigned u64s =
+                       (sizeof(struct bch_sb_field_disk_groups) +
+                        sizeof(struct bch_disk_group) * (nr_groups + 1)) /
+                       sizeof(u64);
+
+               groups = bch2_sb_resize_disk_groups(sb, u64s);
+               if (!groups)
+                       return -ENOSPC;
+
+               nr_groups = disk_groups_nr(groups);
+       }
+
+       BUG_ON(i >= nr_groups);
+
+       g = &groups->entries[i];
+
+       memcpy(g->label, name, namelen);
+       if (namelen < sizeof(g->label))
+               g->label[namelen] = '\0';
+       SET_BCH_GROUP_DELETED(g, 0);
+       SET_BCH_GROUP_PARENT(g, parent);
+       SET_BCH_GROUP_DATA_ALLOWED(g, ~0);
+
+       return i;
+}
+
+int bch2_disk_path_find(struct bch_sb_handle *sb, const char *name)
+{
+       struct bch_sb_field_disk_groups *groups =
+               bch2_sb_get_disk_groups(sb->sb);
+       int v = -1;
+
+       do {
+               const char *next = strchrnul(name, '.');
+               unsigned len = next - name;
+
+               if (*next == '.')
+                       next++;
+
+               v = __bch2_disk_group_find(groups, v + 1, name, len);
+               name = next;
+       } while (*name && v >= 0);
+
+       return v;
+}
+
+int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name)
+{
+       struct bch_sb_field_disk_groups *groups;
+       unsigned parent = 0;
+       int v = -1;
+
+       do {
+               const char *next = strchrnul(name, '.');
+               unsigned len = next - name;
+
+               if (*next == '.')
+                       next++;
+
+               groups = bch2_sb_get_disk_groups(sb->sb);
+
+               v = __bch2_disk_group_find(groups, parent, name, len);
+               if (v < 0)
+                       v = __bch2_disk_group_add(sb, parent, name, len);
+               if (v < 0)
+                       return v;
+
+               parent = v + 1;
+               name = next;
+       } while (*name && v >= 0);
+
+       return v;
+}
+
+void bch2_disk_path_to_text(struct printbuf *out,
+                           struct bch_sb_handle *sb,
+                           unsigned v)
+{
+       struct bch_sb_field_disk_groups *groups =
+               bch2_sb_get_disk_groups(sb->sb);
+       struct bch_disk_group *g;
+       unsigned nr = 0;
+       u16 path[32];
+
+       while (1) {
+               if (nr == ARRAY_SIZE(path))
+                       goto inval;
+
+               if (v >= disk_groups_nr(groups))
+                       goto inval;
+
+               g = groups->entries + v;
+
+               if (BCH_GROUP_DELETED(g))
+                       goto inval;
+
+               path[nr++] = v;
+
+               if (!BCH_GROUP_PARENT(g))
+                       break;
+
+               v = BCH_GROUP_PARENT(g) - 1;
+       }
+
+       while (nr) {
+               v = path[--nr];
+               g = groups->entries + v;
+
+               bch_scnmemcpy(out, g->label,
+                             strnlen(g->label, sizeof(g->label)));
+
+               if (nr)
+                       pr_buf(out, ".");
+       }
+       return;
+inval:
+       pr_buf(out, "invalid group %u", v);
+}
+
+int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
+{
+       struct bch_member *mi;
+       int v = -1;
+       int ret = 0;
+
+       mutex_lock(&c->sb_lock);
+
+       if (!strlen(name) || !strcmp(name, "none"))
+               goto write_sb;
+
+       v = bch2_disk_path_find_or_create(&c->disk_sb, name);
+       if (v < 0) {
+               mutex_unlock(&c->sb_lock);
+               return v;
+       }
+
+       ret = bch2_sb_disk_groups_to_cpu(c);
+       if (ret)
+               goto unlock;
+write_sb:
+       mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
+       SET_BCH_MEMBER_GROUP(mi, v + 1);
+
+       bch2_write_super(c);
+unlock:
+       mutex_unlock(&c->sb_lock);
+
+       return ret;
+}
+
+int bch2_opt_target_parse(struct bch_fs *c, const char *buf, u64 *v)
+{
+       struct bch_dev *ca;
+       int g;
+
+       if (!strlen(buf) || !strcmp(buf, "none")) {
+               *v = 0;
+               return 0;
+       }
+
+       /* Is it a device? */
+       ca = bch2_dev_lookup(c, buf);
+       if (!IS_ERR(ca)) {
+               *v = dev_to_target(ca->dev_idx);
+               percpu_ref_put(&ca->ref);
+               return 0;
+       }
+
+       mutex_lock(&c->sb_lock);
+       g = bch2_disk_path_find(&c->disk_sb, buf);
+       mutex_unlock(&c->sb_lock);
+
+       if (g >= 0) {
+               *v = group_to_target(g);
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+void bch2_opt_target_to_text(struct printbuf *out, struct bch_fs *c, u64 v)
+{
+       struct target t = target_decode(v);
+
+       switch (t.type) {
+       case TARGET_NULL:
+               pr_buf(out, "none");
+               break;
+       case TARGET_DEV: {
+               struct bch_dev *ca;
+
+               rcu_read_lock();
+               ca = t.dev < c->sb.nr_devices
+                       ? rcu_dereference(c->devs[t.dev])
+                       : NULL;
+
+               if (ca && percpu_ref_tryget(&ca->io_ref)) {
+                       char b[BDEVNAME_SIZE];
+
+                       pr_buf(out, "/dev/%s",
+                            bdevname(ca->disk_sb.bdev, b));
+                       percpu_ref_put(&ca->io_ref);
+               } else if (ca) {
+                       pr_buf(out, "offline device %u", t.dev);
+               } else {
+                       pr_buf(out, "invalid device %u", t.dev);
+               }
+
+               rcu_read_unlock();
+               break;
+       }
+       case TARGET_GROUP:
+               mutex_lock(&c->sb_lock);
+               bch2_disk_path_to_text(out, &c->disk_sb, t.group);
+               mutex_unlock(&c->sb_lock);
+               break;
+       default:
+               BUG();
+       }
+}
diff --git a/libbcachefs/disk_groups.h b/libbcachefs/disk_groups.h
new file mode 100644 (file)
index 0000000..3d84f23
--- /dev/null
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_DISK_GROUPS_H
+#define _BCACHEFS_DISK_GROUPS_H
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups;
+
+static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups)
+{
+       return groups
+               ? (vstruct_end(&groups->field) -
+                  (void *) &groups->entries[0]) / sizeof(struct bch_disk_group)
+               : 0;
+}
+
+struct target {
+       enum {
+               TARGET_NULL,
+               TARGET_DEV,
+               TARGET_GROUP,
+       }                       type;
+       union {
+               unsigned        dev;
+               unsigned        group;
+       };
+};
+
+#define TARGET_DEV_START       1
+#define TARGET_GROUP_START     (256 + TARGET_DEV_START)
+
+static inline u16 dev_to_target(unsigned dev)
+{
+       return TARGET_DEV_START + dev;
+}
+
+static inline u16 group_to_target(unsigned group)
+{
+       return TARGET_GROUP_START + group;
+}
+
+static inline struct target target_decode(unsigned target)
+{
+       if (target >= TARGET_GROUP_START)
+               return (struct target) {
+                       .type   = TARGET_GROUP,
+                       .group  = target - TARGET_GROUP_START
+               };
+
+       if (target >= TARGET_DEV_START)
+               return (struct target) {
+                       .type   = TARGET_DEV,
+                       .group  = target - TARGET_DEV_START
+               };
+
+       return (struct target) { .type = TARGET_NULL };
+}
+
+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *, unsigned);
+
+static inline struct bch_devs_mask target_rw_devs(struct bch_fs *c,
+                                                 enum bch_data_type data_type,
+                                                 u16 target)
+{
+       struct bch_devs_mask devs = c->rw_devs[data_type];
+       const struct bch_devs_mask *t = bch2_target_to_mask(c, target);
+
+       if (t)
+               bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
+       return devs;
+}
+
+bool bch2_dev_in_target(struct bch_fs *, unsigned, unsigned);
+
+int bch2_disk_path_find(struct bch_sb_handle *, const char *);
+
+/* Exported for userspace bcachefs-tools: */
+int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
+
+void bch2_disk_path_to_text(struct printbuf *, struct bch_sb_handle *,
+                           unsigned);
+
+int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *);
+void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, u64);
+
+int bch2_sb_disk_groups_to_cpu(struct bch_fs *);
+
+int bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
+
+const char *bch2_sb_validate_disk_groups(struct bch_sb *,
+                                        struct bch_sb_field *);
+
+#endif /* _BCACHEFS_DISK_GROUPS_H */
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
new file mode 100644 (file)
index 0000000..eac750a
--- /dev/null
@@ -0,0 +1,1636 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* erasure coding */
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_on_stack.h"
+#include "bset.h"
+#include "btree_gc.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "error.h"
+#include "io.h"
+#include "keylist.h"
+#include "recovery.h"
+#include "super-io.h"
+#include "util.h"
+
+#include <linux/sort.h>
+
+#ifdef __KERNEL__
+
+#include <linux/raid/pq.h>
+#include <linux/raid/xor.h>
+
+static void raid5_recov(unsigned disks, unsigned failed_idx,
+                       size_t size, void **data)
+{
+       unsigned i = 2, nr;
+
+       BUG_ON(failed_idx >= disks);
+
+       swap(data[0], data[failed_idx]);
+       memcpy(data[0], data[1], size);
+
+       while (i < disks) {
+               nr = min_t(unsigned, disks - i, MAX_XOR_BLOCKS);
+               xor_blocks(nr, size, data[0], data + i);
+               i += nr;
+       }
+
+       swap(data[0], data[failed_idx]);
+}
+
+static void raid_gen(int nd, int np, size_t size, void **v)
+{
+       if (np >= 1)
+               raid5_recov(nd + np, nd, size, v);
+       if (np >= 2)
+               raid6_call.gen_syndrome(nd + np, size, v);
+       BUG_ON(np > 2);
+}
+
+static void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v)
+{
+       switch (nr) {
+       case 0:
+               break;
+       case 1:
+               if (ir[0] < nd + 1)
+                       raid5_recov(nd + 1, ir[0], size, v);
+               else
+                       raid6_call.gen_syndrome(nd + np, size, v);
+               break;
+       case 2:
+               if (ir[1] < nd) {
+                       /* data+data failure. */
+                       raid6_2data_recov(nd + np, size, ir[0], ir[1], v);
+               } else if (ir[0] < nd) {
+                       /* data + p/q failure */
+
+                       if (ir[1] == nd) /* data + p failure */
+                               raid6_datap_recov(nd + np, size, ir[0], v);
+                       else { /* data + q failure */
+                               raid5_recov(nd + 1, ir[0], size, v);
+                               raid6_call.gen_syndrome(nd + np, size, v);
+                       }
+               } else {
+                       raid_gen(nd, np, size, v);
+               }
+               break;
+       default:
+               BUG();
+       }
+}
+
+#else
+
+#include <raid/raid.h>
+
+#endif
+
+struct ec_bio {
+       struct bch_dev          *ca;
+       struct ec_stripe_buf    *buf;
+       size_t                  idx;
+       struct bio              bio;
+};
+
+/* Stripes btree keys: */
+
+const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
+
+       if (k.k->p.inode)
+               return "invalid stripe key";
+
+       if (bkey_val_bytes(k.k) < sizeof(*s))
+               return "incorrect value size";
+
+       if (bkey_val_bytes(k.k) < sizeof(*s) ||
+           bkey_val_u64s(k.k) < stripe_val_u64s(s))
+               return "incorrect value size";
+
+       return bch2_bkey_ptrs_invalid(c, k);
+}
+
+void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
+                        struct bkey_s_c k)
+{
+       const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
+       unsigned i;
+
+       pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
+              s->algorithm,
+              le16_to_cpu(s->sectors),
+              s->nr_blocks - s->nr_redundant,
+              s->nr_redundant,
+              s->csum_type,
+              1U << s->csum_granularity_bits);
+
+       for (i = 0; i < s->nr_blocks; i++)
+               pr_buf(out, " %u:%llu:%u", s->ptrs[i].dev,
+                      (u64) s->ptrs[i].offset,
+                      stripe_blockcount_get(s, i));
+}
+
+static int ptr_matches_stripe(struct bch_fs *c,
+                             struct bch_stripe *v,
+                             const struct bch_extent_ptr *ptr)
+{
+       unsigned i;
+
+       for (i = 0; i < v->nr_blocks - v->nr_redundant; i++) {
+               const struct bch_extent_ptr *ptr2 = v->ptrs + i;
+
+               if (ptr->dev == ptr2->dev &&
+                   ptr->gen == ptr2->gen &&
+                   ptr->offset >= ptr2->offset &&
+                   ptr->offset <  ptr2->offset + le16_to_cpu(v->sectors))
+                       return i;
+       }
+
+       return -1;
+}
+
+static int extent_matches_stripe(struct bch_fs *c,
+                                struct bch_stripe *v,
+                                struct bkey_s_c k)
+{
+
+       switch (k.k->type) {
+       case KEY_TYPE_extent: {
+               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+               const struct bch_extent_ptr *ptr;
+               int idx;
+
+               extent_for_each_ptr(e, ptr) {
+                       idx = ptr_matches_stripe(c, v, ptr);
+                       if (idx >= 0)
+                               return idx;
+               }
+               break;
+       }
+       }
+
+       return -1;
+}
+
+static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
+{
+       switch (k.k->type) {
+       case KEY_TYPE_extent: {
+               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+               const union bch_extent_entry *entry;
+
+               extent_for_each_entry(e, entry)
+                       if (extent_entry_type(entry) ==
+                           BCH_EXTENT_ENTRY_stripe_ptr &&
+                           entry->stripe_ptr.idx == idx)
+                               return true;
+
+               break;
+       }
+       }
+
+       return false;
+}
+
+/* Checksumming: */
+
+static void ec_generate_checksums(struct ec_stripe_buf *buf)
+{
+       struct bch_stripe *v = &buf->key.v;
+       unsigned csum_granularity = 1 << v->csum_granularity_bits;
+       unsigned csums_per_device = stripe_csums_per_device(v);
+       unsigned csum_bytes = bch_crc_bytes[v->csum_type];
+       unsigned i, j;
+
+       if (!csum_bytes)
+               return;
+
+       BUG_ON(buf->offset);
+       BUG_ON(buf->size != le16_to_cpu(v->sectors));
+
+       for (i = 0; i < v->nr_blocks; i++) {
+               for (j = 0; j < csums_per_device; j++) {
+                       unsigned offset = j << v->csum_granularity_bits;
+                       unsigned len = min(csum_granularity, buf->size - offset);
+
+                       struct bch_csum csum =
+                               bch2_checksum(NULL, v->csum_type,
+                                             null_nonce(),
+                                             buf->data[i] + (offset << 9),
+                                             len << 9);
+
+                       memcpy(stripe_csum(v, i, j), &csum, csum_bytes);
+               }
+       }
+}
+
+static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
+{
+       struct bch_stripe *v = &buf->key.v;
+       unsigned csum_granularity = 1 << v->csum_granularity_bits;
+       unsigned csum_bytes = bch_crc_bytes[v->csum_type];
+       unsigned i;
+
+       if (!csum_bytes)
+               return;
+
+       for (i = 0; i < v->nr_blocks; i++) {
+               unsigned offset = buf->offset;
+               unsigned end = buf->offset + buf->size;
+
+               if (!test_bit(i, buf->valid))
+                       continue;
+
+               while (offset < end) {
+                       unsigned j = offset >> v->csum_granularity_bits;
+                       unsigned len = min(csum_granularity, end - offset);
+                       struct bch_csum csum;
+
+                       BUG_ON(offset & (csum_granularity - 1));
+                       BUG_ON(offset + len != le16_to_cpu(v->sectors) &&
+                              ((offset + len) & (csum_granularity - 1)));
+
+                       csum = bch2_checksum(NULL, v->csum_type,
+                                            null_nonce(),
+                                            buf->data[i] + ((offset - buf->offset) << 9),
+                                            len << 9);
+
+                       if (memcmp(stripe_csum(v, i, j), &csum, csum_bytes)) {
+                               __bcache_io_error(c,
+                                       "checksum error while doing reconstruct read (%u:%u)",
+                                       i, j);
+                               clear_bit(i, buf->valid);
+                               break;
+                       }
+
+                       offset += len;
+               }
+       }
+}
+
+/* Erasure coding: */
+
+static void ec_generate_ec(struct ec_stripe_buf *buf)
+{
+       struct bch_stripe *v = &buf->key.v;
+       unsigned nr_data = v->nr_blocks - v->nr_redundant;
+       unsigned bytes = le16_to_cpu(v->sectors) << 9;
+
+       raid_gen(nr_data, v->nr_redundant, bytes, buf->data);
+}
+
+static unsigned __ec_nr_failed(struct ec_stripe_buf *buf, unsigned nr)
+{
+       return nr - bitmap_weight(buf->valid, nr);
+}
+
+static unsigned ec_nr_failed(struct ec_stripe_buf *buf)
+{
+       return __ec_nr_failed(buf, buf->key.v.nr_blocks);
+}
+
+static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
+{
+       struct bch_stripe *v = &buf->key.v;
+       unsigned i, failed[EC_STRIPE_MAX], nr_failed = 0;
+       unsigned nr_data = v->nr_blocks - v->nr_redundant;
+       unsigned bytes = buf->size << 9;
+
+       if (ec_nr_failed(buf) > v->nr_redundant) {
+               __bcache_io_error(c,
+                       "error doing reconstruct read: unable to read enough blocks");
+               return -1;
+       }
+
+       for (i = 0; i < nr_data; i++)
+               if (!test_bit(i, buf->valid))
+                       failed[nr_failed++] = i;
+
+       raid_rec(nr_failed, failed, nr_data, v->nr_redundant, bytes, buf->data);
+       return 0;
+}
+
+/* IO: */
+
+static void ec_block_endio(struct bio *bio)
+{
+       struct ec_bio *ec_bio = container_of(bio, struct ec_bio, bio);
+       struct bch_dev *ca = ec_bio->ca;
+       struct closure *cl = bio->bi_private;
+
+       if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s: %s",
+                              bio_data_dir(bio) ? "write" : "read",
+                              bch2_blk_status_to_str(bio->bi_status)))
+               clear_bit(ec_bio->idx, ec_bio->buf->valid);
+
+       bio_put(&ec_bio->bio);
+       percpu_ref_put(&ca->io_ref);
+       closure_put(cl);
+}
+
+static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
+                       unsigned rw, unsigned idx, struct closure *cl)
+{
+       struct bch_stripe *v = &buf->key.v;
+       unsigned offset = 0, bytes = buf->size << 9;
+       struct bch_extent_ptr *ptr = &v->ptrs[idx];
+       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+
+       if (!bch2_dev_get_ioref(ca, rw)) {
+               clear_bit(idx, buf->valid);
+               return;
+       }
+
+       while (offset < bytes) {
+               unsigned nr_iovecs = min_t(size_t, BIO_MAX_PAGES,
+                                          DIV_ROUND_UP(bytes, PAGE_SIZE));
+               unsigned b = min_t(size_t, bytes - offset,
+                                  nr_iovecs << PAGE_SHIFT);
+               struct ec_bio *ec_bio;
+
+               ec_bio = container_of(bio_alloc_bioset(GFP_KERNEL, nr_iovecs,
+                                                      &c->ec_bioset),
+                                     struct ec_bio, bio);
+
+               ec_bio->ca                      = ca;
+               ec_bio->buf                     = buf;
+               ec_bio->idx                     = idx;
+
+               bio_set_dev(&ec_bio->bio, ca->disk_sb.bdev);
+               bio_set_op_attrs(&ec_bio->bio, rw, 0);
+
+               ec_bio->bio.bi_iter.bi_sector   = ptr->offset + buf->offset + (offset >> 9);
+               ec_bio->bio.bi_end_io           = ec_block_endio;
+               ec_bio->bio.bi_private          = cl;
+
+               bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
+
+               closure_get(cl);
+               percpu_ref_get(&ca->io_ref);
+
+               submit_bio(&ec_bio->bio);
+
+               offset += b;
+       }
+
+       percpu_ref_put(&ca->io_ref);
+}
+
+/* recovery read path: */
+int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct ec_stripe_buf *buf;
+       struct closure cl;
+       struct bkey_s_c k;
+       struct bch_stripe *v;
+       unsigned stripe_idx;
+       unsigned offset, end;
+       unsigned i, nr_data, csum_granularity;
+       int ret = 0, idx;
+
+       closure_init_stack(&cl);
+
+       BUG_ON(!rbio->pick.has_ec);
+
+       stripe_idx = rbio->pick.ec.idx;
+
+       buf = kzalloc(sizeof(*buf), GFP_NOIO);
+       if (!buf)
+               return -ENOMEM;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EC,
+                                  POS(0, stripe_idx),
+                                  BTREE_ITER_SLOTS);
+       k = bch2_btree_iter_peek_slot(iter);
+       if (bkey_err(k) || k.k->type != KEY_TYPE_stripe) {
+               __bcache_io_error(c,
+                       "error doing reconstruct read: stripe not found");
+               kfree(buf);
+               return bch2_trans_exit(&trans) ?: -EIO;
+       }
+
+       bkey_reassemble(&buf->key.k_i, k);
+       bch2_trans_exit(&trans);
+
+       v = &buf->key.v;
+
+       nr_data = v->nr_blocks - v->nr_redundant;
+
+       idx = ptr_matches_stripe(c, v, &rbio->pick.ptr);
+       BUG_ON(idx < 0);
+
+       csum_granularity = 1U << v->csum_granularity_bits;
+
+       offset  = rbio->bio.bi_iter.bi_sector - v->ptrs[idx].offset;
+       end     = offset + bio_sectors(&rbio->bio);
+
+       BUG_ON(end > le16_to_cpu(v->sectors));
+
+       buf->offset     = round_down(offset, csum_granularity);
+       buf->size       = min_t(unsigned, le16_to_cpu(v->sectors),
+                               round_up(end, csum_granularity)) - buf->offset;
+
+       for (i = 0; i < v->nr_blocks; i++) {
+               buf->data[i] = kmalloc(buf->size << 9, GFP_NOIO);
+               if (!buf->data[i]) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+       }
+
+       memset(buf->valid, 0xFF, sizeof(buf->valid));
+
+       for (i = 0; i < v->nr_blocks; i++) {
+               struct bch_extent_ptr *ptr = v->ptrs + i;
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+
+               if (ptr_stale(ca, ptr)) {
+                       __bcache_io_error(c,
+                                         "error doing reconstruct read: stale pointer");
+                       clear_bit(i, buf->valid);
+                       continue;
+               }
+
+               ec_block_io(c, buf, REQ_OP_READ, i, &cl);
+       }
+
+       closure_sync(&cl);
+
+       if (ec_nr_failed(buf) > v->nr_redundant) {
+               __bcache_io_error(c,
+                       "error doing reconstruct read: unable to read enough blocks");
+               ret = -EIO;
+               goto err;
+       }
+
+       ec_validate_checksums(c, buf);
+
+       ret = ec_do_recov(c, buf);
+       if (ret)
+               goto err;
+
+       memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter,
+                     buf->data[idx] + ((offset - buf->offset) << 9));
+err:
+       for (i = 0; i < v->nr_blocks; i++)
+               kfree(buf->data[i]);
+       kfree(buf);
+       return ret;
+}
+
+/* stripe bucket accounting: */
+
+static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
+{
+       ec_stripes_heap n, *h = &c->ec_stripes_heap;
+
+       if (idx >= h->size) {
+               if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp))
+                       return -ENOMEM;
+
+               spin_lock(&c->ec_stripes_heap_lock);
+               if (n.size > h->size) {
+                       memcpy(n.data, h->data, h->used * sizeof(h->data[0]));
+                       n.used = h->used;
+                       swap(*h, n);
+               }
+               spin_unlock(&c->ec_stripes_heap_lock);
+
+               free_heap(&n);
+       }
+
+       if (!genradix_ptr_alloc(&c->stripes[0], idx, gfp))
+               return -ENOMEM;
+
+       if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING &&
+           !genradix_ptr_alloc(&c->stripes[1], idx, gfp))
+               return -ENOMEM;
+
+       return 0;
+}
+
+static int ec_stripe_mem_alloc(struct bch_fs *c,
+                              struct btree_iter *iter)
+{
+       size_t idx = iter->pos.offset;
+       int ret = 0;
+
+       if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT|__GFP_NOWARN))
+               return ret;
+
+       bch2_trans_unlock(iter->trans);
+       ret = -EINTR;
+
+       if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL))
+               return ret;
+
+       return -ENOMEM;
+}
+
+static ssize_t stripe_idx_to_delete(struct bch_fs *c)
+{
+       ec_stripes_heap *h = &c->ec_stripes_heap;
+
+       return h->used && h->data[0].blocks_nonempty == 0
+               ? h->data[0].idx : -1;
+}
+
+static inline int ec_stripes_heap_cmp(ec_stripes_heap *h,
+                                     struct ec_stripe_heap_entry l,
+                                     struct ec_stripe_heap_entry r)
+{
+       return ((l.blocks_nonempty > r.blocks_nonempty) -
+               (l.blocks_nonempty < r.blocks_nonempty));
+}
+
+static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
+                                                  size_t i)
+{
+       struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap);
+
+       genradix_ptr(&c->stripes[0], h->data[i].idx)->heap_idx = i;
+}
+
+static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
+{
+       ec_stripes_heap *h = &c->ec_stripes_heap;
+       struct stripe *m = genradix_ptr(&c->stripes[0], idx);
+
+       BUG_ON(!m->alive);
+       BUG_ON(m->heap_idx >= h->used);
+       BUG_ON(h->data[m->heap_idx].idx != idx);
+}
+
+void bch2_stripes_heap_del(struct bch_fs *c,
+                          struct stripe *m, size_t idx)
+{
+       if (!m->on_heap)
+               return;
+
+       m->on_heap = false;
+
+       heap_verify_backpointer(c, idx);
+
+       heap_del(&c->ec_stripes_heap, m->heap_idx,
+                ec_stripes_heap_cmp,
+                ec_stripes_heap_set_backpointer);
+}
+
+void bch2_stripes_heap_insert(struct bch_fs *c,
+                             struct stripe *m, size_t idx)
+{
+       if (m->on_heap)
+               return;
+
+       BUG_ON(heap_full(&c->ec_stripes_heap));
+
+       m->on_heap = true;
+
+       heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
+                       .idx = idx,
+                       .blocks_nonempty = m->blocks_nonempty,
+               }),
+                ec_stripes_heap_cmp,
+                ec_stripes_heap_set_backpointer);
+
+       heap_verify_backpointer(c, idx);
+}
+
+void bch2_stripes_heap_update(struct bch_fs *c,
+                             struct stripe *m, size_t idx)
+{
+       ec_stripes_heap *h = &c->ec_stripes_heap;
+       size_t i;
+
+       if (!m->on_heap)
+               return;
+
+       heap_verify_backpointer(c, idx);
+
+       h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
+
+       i = m->heap_idx;
+       heap_sift_up(h,   i, ec_stripes_heap_cmp,
+                    ec_stripes_heap_set_backpointer);
+       heap_sift_down(h, i, ec_stripes_heap_cmp,
+                      ec_stripes_heap_set_backpointer);
+
+       heap_verify_backpointer(c, idx);
+
+       if (stripe_idx_to_delete(c) >= 0 &&
+           !percpu_ref_is_dying(&c->writes))
+               schedule_work(&c->ec_stripe_delete_work);
+}
+
+/* stripe deletion */
+
+static int ec_stripe_delete(struct bch_fs *c, size_t idx)
+{
+       //pr_info("deleting stripe %zu", idx);
+       return bch2_btree_delete_range(c, BTREE_ID_EC,
+                                      POS(0, idx),
+                                      POS(0, idx + 1),
+                                      NULL);
+}
+
+static void ec_stripe_delete_work(struct work_struct *work)
+{
+       struct bch_fs *c =
+               container_of(work, struct bch_fs, ec_stripe_delete_work);
+       ssize_t idx;
+
+       while (1) {
+               spin_lock(&c->ec_stripes_heap_lock);
+               idx = stripe_idx_to_delete(c);
+               if (idx < 0) {
+                       spin_unlock(&c->ec_stripes_heap_lock);
+                       break;
+               }
+
+               bch2_stripes_heap_del(c, genradix_ptr(&c->stripes[0], idx), idx);
+               spin_unlock(&c->ec_stripes_heap_lock);
+
+               if (ec_stripe_delete(c, idx))
+                       break;
+       }
+}
+
+/* stripe creation: */
+
+static int ec_stripe_bkey_insert(struct bch_fs *c,
+                                struct bkey_i_stripe *stripe)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bpos start_pos = POS(0, c->ec_stripe_hint);
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+retry:
+       bch2_trans_begin(&trans);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EC, start_pos,
+                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
+               if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) {
+                       if (start_pos.offset) {
+                               start_pos = POS_MIN;
+                               bch2_btree_iter_set_pos(iter, start_pos);
+                               continue;
+                       }
+
+                       ret = -ENOSPC;
+                       break;
+               }
+
+               if (bkey_deleted(k.k))
+                       goto found_slot;
+       }
+
+       goto err;
+found_slot:
+       start_pos = iter->pos;
+
+       ret = ec_stripe_mem_alloc(c, iter);
+       if (ret)
+               goto err;
+
+       stripe->k.p = iter->pos;
+
+       bch2_trans_update(&trans, iter, &stripe->k_i, 0);
+
+       ret = bch2_trans_commit(&trans, NULL, NULL,
+                               BTREE_INSERT_NOFAIL);
+err:
+       bch2_trans_iter_put(&trans, iter);
+
+       if (ret == -EINTR)
+               goto retry;
+
+       c->ec_stripe_hint = ret ? start_pos.offset : start_pos.offset + 1;
+       bch2_trans_exit(&trans);
+
+       return ret;
+}
+
+static void extent_stripe_ptr_add(struct bkey_s_extent e,
+                                 struct ec_stripe_buf *s,
+                                 struct bch_extent_ptr *ptr,
+                                 unsigned block)
+{
+       struct bch_extent_stripe_ptr *dst = (void *) ptr;
+       union bch_extent_entry *end = extent_entry_last(e);
+
+       memmove_u64s_up(dst + 1, dst, (u64 *) end - (u64 *) dst);
+       e.k->u64s += sizeof(*dst) / sizeof(u64);
+
+       *dst = (struct bch_extent_stripe_ptr) {
+               .type = 1 << BCH_EXTENT_ENTRY_stripe_ptr,
+               .block          = block,
+               .idx            = s->key.k.p.offset,
+       };
+}
+
+static int ec_stripe_update_ptrs(struct bch_fs *c,
+                                struct ec_stripe_buf *s,
+                                struct bkey *pos)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_s_extent e;
+       struct bkey_on_stack sk;
+       int ret = 0, dev, idx;
+
+       bkey_on_stack_init(&sk);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+       /* XXX this doesn't support the reflink btree */
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+                                  bkey_start_pos(pos),
+                                  BTREE_ITER_INTENT);
+
+       while ((k = bch2_btree_iter_peek(iter)).k &&
+              !(ret = bkey_err(k)) &&
+              bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
+               struct bch_extent_ptr *ptr, *ec_ptr = NULL;
+
+               if (extent_has_stripe_ptr(k, s->key.k.p.offset)) {
+                       bch2_btree_iter_next(iter);
+                       continue;
+               }
+
+               idx = extent_matches_stripe(c, &s->key.v, k);
+               if (idx < 0) {
+                       bch2_btree_iter_next(iter);
+                       continue;
+               }
+
+               dev = s->key.v.ptrs[idx].dev;
+
+               bkey_on_stack_reassemble(&sk, c, k);
+               e = bkey_i_to_s_extent(sk.k);
+
+               bch2_bkey_drop_ptrs(e.s, ptr, ptr->dev != dev);
+               ec_ptr = (void *) bch2_bkey_has_device(e.s_c, dev);
+               BUG_ON(!ec_ptr);
+
+               extent_stripe_ptr_add(e, s, ec_ptr, idx);
+
+               bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
+               bch2_trans_update(&trans, iter, sk.k, 0);
+
+               ret = bch2_trans_commit(&trans, NULL, NULL,
+                                       BTREE_INSERT_NOFAIL|
+                                       BTREE_INSERT_USE_RESERVE);
+               if (ret == -EINTR)
+                       ret = 0;
+               if (ret)
+                       break;
+       }
+
+       bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&sk, c);
+
+       return ret;
+}
+
+/*
+ * data buckets of new stripe all written: create the stripe
+ */
+static void ec_stripe_create(struct ec_stripe_new *s)
+{
+       struct bch_fs *c = s->c;
+       struct open_bucket *ob;
+       struct bkey_i *k;
+       struct stripe *m;
+       struct bch_stripe *v = &s->stripe.key.v;
+       unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
+       struct closure cl;
+       int ret;
+
+       BUG_ON(s->h->s == s);
+
+       closure_init_stack(&cl);
+
+       if (s->err) {
+               if (s->err != -EROFS)
+                       bch_err(c, "error creating stripe: error writing data buckets");
+               goto err;
+       }
+
+       BUG_ON(!s->allocated);
+
+       if (!percpu_ref_tryget(&c->writes))
+               goto err;
+
+       BUG_ON(bitmap_weight(s->blocks_allocated,
+                            s->blocks.nr) != s->blocks.nr);
+
+       ec_generate_ec(&s->stripe);
+
+       ec_generate_checksums(&s->stripe);
+
+       /* write p/q: */
+       for (i = nr_data; i < v->nr_blocks; i++)
+               ec_block_io(c, &s->stripe, REQ_OP_WRITE, i, &cl);
+
+       closure_sync(&cl);
+
+       for (i = nr_data; i < v->nr_blocks; i++)
+               if (!test_bit(i, s->stripe.valid)) {
+                       bch_err(c, "error creating stripe: error writing redundancy buckets");
+                       goto err_put_writes;
+               }
+
+       ret = s->existing_stripe
+               ? bch2_btree_insert(c, BTREE_ID_EC, &s->stripe.key.k_i,
+                                   NULL, NULL, BTREE_INSERT_NOFAIL)
+               : ec_stripe_bkey_insert(c, &s->stripe.key);
+       if (ret) {
+               bch_err(c, "error creating stripe: error creating stripe key");
+               goto err_put_writes;
+       }
+
+       for_each_keylist_key(&s->keys, k) {
+               ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k);
+               if (ret) {
+                       bch_err(c, "error creating stripe: error updating pointers");
+                       break;
+               }
+       }
+
+       spin_lock(&c->ec_stripes_heap_lock);
+       m = genradix_ptr(&c->stripes[0], s->stripe.key.k.p.offset);
+#if 0
+       pr_info("created a %s stripe %llu",
+               s->existing_stripe ? "existing" : "new",
+               s->stripe.key.k.p.offset);
+#endif
+       BUG_ON(m->on_heap);
+       bch2_stripes_heap_insert(c, m, s->stripe.key.k.p.offset);
+       spin_unlock(&c->ec_stripes_heap_lock);
+err_put_writes:
+       percpu_ref_put(&c->writes);
+err:
+       open_bucket_for_each(c, &s->blocks, ob, i) {
+               ob->ec = NULL;
+               __bch2_open_bucket_put(c, ob);
+       }
+
+       bch2_open_buckets_put(c, &s->parity);
+
+       bch2_keylist_free(&s->keys, s->inline_keys);
+
+       for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
+               kvpfree(s->stripe.data[i], s->stripe.size << 9);
+       kfree(s);
+}
+
+static void ec_stripe_create_work(struct work_struct *work)
+{
+       struct bch_fs *c = container_of(work,
+               struct bch_fs, ec_stripe_create_work);
+       struct ec_stripe_new *s, *n;
+restart:
+       mutex_lock(&c->ec_stripe_new_lock);
+       list_for_each_entry_safe(s, n, &c->ec_stripe_new_list, list)
+               if (!atomic_read(&s->pin)) {
+                       list_del(&s->list);
+                       mutex_unlock(&c->ec_stripe_new_lock);
+                       ec_stripe_create(s);
+                       goto restart;
+               }
+       mutex_unlock(&c->ec_stripe_new_lock);
+}
+
+static void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s)
+{
+       BUG_ON(atomic_read(&s->pin) <= 0);
+
+       if (atomic_dec_and_test(&s->pin)) {
+               BUG_ON(!s->pending);
+               queue_work(system_long_wq, &c->ec_stripe_create_work);
+       }
+}
+
+static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
+{
+       struct ec_stripe_new *s = h->s;
+
+       BUG_ON(!s->allocated && !s->err);
+
+       h->s            = NULL;
+       s->pending      = true;
+
+       mutex_lock(&c->ec_stripe_new_lock);
+       list_add(&s->list, &c->ec_stripe_new_list);
+       mutex_unlock(&c->ec_stripe_new_lock);
+
+       ec_stripe_new_put(c, s);
+}
+
+/* have a full bucket - hand it off to be erasure coded: */
+void bch2_ec_bucket_written(struct bch_fs *c, struct open_bucket *ob)
+{
+       struct ec_stripe_new *s = ob->ec;
+
+       if (ob->sectors_free)
+               s->err = -1;
+
+       ec_stripe_new_put(c, s);
+}
+
+void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
+{
+       struct ec_stripe_new *s = ob->ec;
+
+       s->err = -EIO;
+}
+
+void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
+{
+       struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs);
+       struct bch_dev *ca;
+       unsigned offset;
+
+       if (!ob)
+               return NULL;
+
+       ca      = bch_dev_bkey_exists(c, ob->ptr.dev);
+       offset  = ca->mi.bucket_size - ob->sectors_free;
+
+       return ob->ec->stripe.data[ob->ec_idx] + (offset << 9);
+}
+
+void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp,
+                            struct bpos pos, unsigned sectors)
+{
+       struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs);
+       struct ec_stripe_new *ec;
+
+       if (!ob)
+               return;
+
+       //pr_info("adding backpointer at %llu:%llu", pos.inode, pos.offset);
+
+       ec = ob->ec;
+       mutex_lock(&ec->lock);
+
+       if (bch2_keylist_realloc(&ec->keys, ec->inline_keys,
+                                ARRAY_SIZE(ec->inline_keys),
+                                BKEY_U64s)) {
+               BUG();
+       }
+
+       bkey_init(&ec->keys.top->k);
+       ec->keys.top->k.p       = pos;
+       bch2_key_resize(&ec->keys.top->k, sectors);
+       bch2_keylist_push(&ec->keys);
+
+       mutex_unlock(&ec->lock);
+}
+
+static int unsigned_cmp(const void *_l, const void *_r)
+{
+       unsigned l = *((const unsigned *) _l);
+       unsigned r = *((const unsigned *) _r);
+
+       return cmp_int(l, r);
+}
+
+/* pick most common bucket size: */
+static unsigned pick_blocksize(struct bch_fs *c,
+                              struct bch_devs_mask *devs)
+{
+       struct bch_dev *ca;
+       unsigned i, nr = 0, sizes[BCH_SB_MEMBERS_MAX];
+       struct {
+               unsigned nr, size;
+       } cur = { 0, 0 }, best = { 0, 0 };
+
+       for_each_member_device_rcu(ca, c, i, devs)
+               sizes[nr++] = ca->mi.bucket_size;
+
+       sort(sizes, nr, sizeof(unsigned), unsigned_cmp, NULL);
+
+       for (i = 0; i < nr; i++) {
+               if (sizes[i] != cur.size) {
+                       if (cur.nr > best.nr)
+                               best = cur;
+
+                       cur.nr = 0;
+                       cur.size = sizes[i];
+               }
+
+               cur.nr++;
+       }
+
+       if (cur.nr > best.nr)
+               best = cur;
+
+       return best.size;
+}
+
+static bool may_create_new_stripe(struct bch_fs *c)
+{
+       return false;
+}
+
+static void ec_stripe_key_init(struct bch_fs *c,
+                              struct bkey_i_stripe *s,
+                              unsigned nr_data,
+                              unsigned nr_parity,
+                              unsigned stripe_size)
+{
+       unsigned u64s;
+
+       bkey_stripe_init(&s->k_i);
+       s->v.sectors                    = cpu_to_le16(stripe_size);
+       s->v.algorithm                  = 0;
+       s->v.nr_blocks                  = nr_data + nr_parity;
+       s->v.nr_redundant               = nr_parity;
+       s->v.csum_granularity_bits      = ilog2(c->sb.encoded_extent_max);
+       s->v.csum_type                  = BCH_CSUM_CRC32C;
+       s->v.pad                        = 0;
+
+       while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
+               BUG_ON(1 << s->v.csum_granularity_bits >=
+                      le16_to_cpu(s->v.sectors) ||
+                      s->v.csum_granularity_bits == U8_MAX);
+               s->v.csum_granularity_bits++;
+       }
+
+       set_bkey_val_u64s(&s->k, u64s);
+}
+
+static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
+{
+       struct ec_stripe_new *s;
+       unsigned i;
+
+       lockdep_assert_held(&h->lock);
+
+       s = kzalloc(sizeof(*s), GFP_KERNEL);
+       if (!s)
+               return -ENOMEM;
+
+       mutex_init(&s->lock);
+       atomic_set(&s->pin, 1);
+       s->c            = c;
+       s->h            = h;
+       s->nr_data      = min_t(unsigned, h->nr_active_devs,
+                               EC_STRIPE_MAX) - h->redundancy;
+       s->nr_parity    = h->redundancy;
+
+       bch2_keylist_init(&s->keys, s->inline_keys);
+
+       s->stripe.offset        = 0;
+       s->stripe.size          = h->blocksize;
+       memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid));
+
+       ec_stripe_key_init(c, &s->stripe.key, s->nr_data,
+                          s->nr_parity, h->blocksize);
+
+       for (i = 0; i < s->stripe.key.v.nr_blocks; i++) {
+               s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL);
+               if (!s->stripe.data[i])
+                       goto err;
+       }
+
+       h->s = s;
+
+       return 0;
+err:
+       for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
+               kvpfree(s->stripe.data[i], s->stripe.size << 9);
+       kfree(s);
+       return -ENOMEM;
+}
+
+static struct ec_stripe_head *
+ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
+                        unsigned algo, unsigned redundancy)
+{
+       struct ec_stripe_head *h;
+       struct bch_dev *ca;
+       unsigned i;
+
+       h = kzalloc(sizeof(*h), GFP_KERNEL);
+       if (!h)
+               return NULL;
+
+       mutex_init(&h->lock);
+       mutex_lock(&h->lock);
+
+       h->target       = target;
+       h->algo         = algo;
+       h->redundancy   = redundancy;
+
+       rcu_read_lock();
+       h->devs = target_rw_devs(c, BCH_DATA_user, target);
+
+       for_each_member_device_rcu(ca, c, i, &h->devs)
+               if (!ca->mi.durability)
+                       __clear_bit(i, h->devs.d);
+
+       h->blocksize = pick_blocksize(c, &h->devs);
+
+       for_each_member_device_rcu(ca, c, i, &h->devs)
+               if (ca->mi.bucket_size == h->blocksize)
+                       h->nr_active_devs++;
+
+       rcu_read_unlock();
+       list_add(&h->list, &c->ec_stripe_head_list);
+       return h;
+}
+
+void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
+{
+       if (h->s &&
+           h->s->allocated &&
+           bitmap_weight(h->s->blocks_allocated,
+                         h->s->blocks.nr) == h->s->blocks.nr)
+               ec_stripe_set_pending(c, h);
+
+       mutex_unlock(&h->lock);
+}
+
+struct ec_stripe_head *__bch2_ec_stripe_head_get(struct bch_fs *c,
+                                              unsigned target,
+                                              unsigned algo,
+                                              unsigned redundancy)
+{
+       struct ec_stripe_head *h;
+
+       if (!redundancy)
+               return NULL;
+
+       mutex_lock(&c->ec_stripe_head_lock);
+       list_for_each_entry(h, &c->ec_stripe_head_list, list)
+               if (h->target           == target &&
+                   h->algo             == algo &&
+                   h->redundancy       == redundancy) {
+                       mutex_lock(&h->lock);
+                       goto found;
+               }
+
+       h = ec_new_stripe_head_alloc(c, target, algo, redundancy);
+found:
+       mutex_unlock(&c->ec_stripe_head_lock);
+       return h;
+}
+
+/*
+ * XXX: use a higher watermark for allocating open buckets here:
+ */
+static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
+{
+       struct bch_devs_mask devs;
+       struct open_bucket *ob;
+       unsigned i, nr_have, nr_data =
+               min_t(unsigned, h->nr_active_devs,
+                     EC_STRIPE_MAX) - h->redundancy;
+       bool have_cache = true;
+       int ret = 0;
+
+       devs = h->devs;
+
+       for_each_set_bit(i, h->s->blocks_allocated, EC_STRIPE_MAX) {
+               __clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d);
+               --nr_data;
+       }
+
+       BUG_ON(h->s->blocks.nr > nr_data);
+       BUG_ON(h->s->parity.nr > h->redundancy);
+
+       open_bucket_for_each(c, &h->s->parity, ob, i)
+               __clear_bit(ob->ptr.dev, devs.d);
+       open_bucket_for_each(c, &h->s->blocks, ob, i)
+               __clear_bit(ob->ptr.dev, devs.d);
+
+       percpu_down_read(&c->mark_lock);
+       rcu_read_lock();
+
+       if (h->s->parity.nr < h->redundancy) {
+               nr_have = h->s->parity.nr;
+
+               ret = bch2_bucket_alloc_set(c, &h->s->parity,
+                                           &h->parity_stripe,
+                                           &devs,
+                                           h->redundancy,
+                                           &nr_have,
+                                           &have_cache,
+                                           RESERVE_NONE,
+                                           0,
+                                           NULL);
+               if (ret)
+                       goto err;
+       }
+
+       if (h->s->blocks.nr < nr_data) {
+               nr_have = h->s->blocks.nr;
+
+               ret = bch2_bucket_alloc_set(c, &h->s->blocks,
+                                           &h->block_stripe,
+                                           &devs,
+                                           nr_data,
+                                           &nr_have,
+                                           &have_cache,
+                                           RESERVE_NONE,
+                                           0,
+                                           NULL);
+               if (ret)
+                       goto err;
+       }
+err:
+       rcu_read_unlock();
+       percpu_up_read(&c->mark_lock);
+       return ret;
+}
+
+/* XXX: doesn't obey target: */
+static s64 get_existing_stripe(struct bch_fs *c,
+                              unsigned target,
+                              unsigned algo,
+                              unsigned redundancy)
+{
+       ec_stripes_heap *h = &c->ec_stripes_heap;
+       struct stripe *m;
+       size_t heap_idx;
+       u64 stripe_idx;
+
+       if (may_create_new_stripe(c))
+               return -1;
+
+       spin_lock(&c->ec_stripes_heap_lock);
+       for (heap_idx = 0; heap_idx < h->used; heap_idx++) {
+               if (!h->data[heap_idx].blocks_nonempty)
+                       continue;
+
+               stripe_idx = h->data[heap_idx].idx;
+               m = genradix_ptr(&c->stripes[0], stripe_idx);
+
+               if (m->algorithm        == algo &&
+                   m->nr_redundant     == redundancy &&
+                   m->blocks_nonempty  < m->nr_blocks - m->nr_redundant) {
+                       bch2_stripes_heap_del(c, m, stripe_idx);
+                       spin_unlock(&c->ec_stripes_heap_lock);
+                       return stripe_idx;
+               }
+       }
+
+       spin_unlock(&c->ec_stripes_heap_lock);
+       return -1;
+}
+
+static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, idx), BTREE_ITER_SLOTS);
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (!ret)
+               bkey_reassemble(&stripe->key.k_i, k);
+       bch2_trans_exit(&trans);
+
+       return ret;
+}
+
+struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
+                                              unsigned target,
+                                              unsigned algo,
+                                              unsigned redundancy)
+{
+       struct closure cl;
+       struct ec_stripe_head *h;
+       struct open_bucket *ob;
+       unsigned i, data_idx = 0;
+       s64 idx;
+
+       closure_init_stack(&cl);
+
+       h = __bch2_ec_stripe_head_get(c, target, algo, redundancy);
+       if (!h)
+               return NULL;
+
+       if (!h->s && ec_new_stripe_alloc(c, h)) {
+               bch2_ec_stripe_head_put(c, h);
+               return NULL;
+       }
+
+       if (!h->s->allocated) {
+               if (!h->s->existing_stripe &&
+                   (idx = get_existing_stripe(c, target, algo, redundancy)) >= 0) {
+                       //pr_info("got existing stripe %llu", idx);
+
+                       h->s->existing_stripe = true;
+                       h->s->existing_stripe_idx = idx;
+                       if (get_stripe_key(c, idx, &h->s->stripe)) {
+                               /* btree error */
+                               BUG();
+                       }
+
+                       for (i = 0; i < h->s->stripe.key.v.nr_blocks; i++)
+                               if (stripe_blockcount_get(&h->s->stripe.key.v, i)) {
+                                       __set_bit(i, h->s->blocks_allocated);
+                                       ec_block_io(c, &h->s->stripe, READ, i, &cl);
+                               }
+               }
+
+               if (new_stripe_alloc_buckets(c, h)) {
+                       bch2_ec_stripe_head_put(c, h);
+                       h = NULL;
+                       goto out;
+               }
+
+               open_bucket_for_each(c, &h->s->blocks, ob, i) {
+                       data_idx = find_next_zero_bit(h->s->blocks_allocated,
+                                                     h->s->nr_data, data_idx);
+                       BUG_ON(data_idx >= h->s->nr_data);
+
+                       h->s->stripe.key.v.ptrs[data_idx] = ob->ptr;
+                       h->s->data_block_idx[i] = data_idx;
+                       data_idx++;
+               }
+
+               open_bucket_for_each(c, &h->s->parity, ob, i)
+                       h->s->stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
+
+               //pr_info("new stripe, blocks_allocated %lx", h->s->blocks_allocated[0]);
+               h->s->allocated = true;
+       }
+out:
+       closure_sync(&cl);
+       return h;
+}
+
+void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
+{
+       struct ec_stripe_head *h;
+       struct open_bucket *ob;
+       unsigned i;
+
+       mutex_lock(&c->ec_stripe_head_lock);
+       list_for_each_entry(h, &c->ec_stripe_head_list, list) {
+
+               mutex_lock(&h->lock);
+               if (!h->s)
+                       goto unlock;
+
+               open_bucket_for_each(c, &h->s->blocks, ob, i)
+                       if (ob->ptr.dev == ca->dev_idx)
+                               goto found;
+               open_bucket_for_each(c, &h->s->parity, ob, i)
+                       if (ob->ptr.dev == ca->dev_idx)
+                               goto found;
+               goto unlock;
+found:
+               h->s->err = -EROFS;
+               ec_stripe_set_pending(c, h);
+unlock:
+               mutex_unlock(&h->lock);
+       }
+       mutex_unlock(&c->ec_stripe_head_lock);
+}
+
+static int __bch2_stripe_write_key(struct btree_trans *trans,
+                                  struct btree_iter *iter,
+                                  struct stripe *m,
+                                  size_t idx,
+                                  struct bkey_i_stripe *new_key)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_s_c k;
+       unsigned i;
+       int ret;
+
+       bch2_btree_iter_set_pos(iter, POS(0, idx));
+
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret)
+               return ret;
+
+       if (k.k->type != KEY_TYPE_stripe)
+               return -EIO;
+
+       bkey_reassemble(&new_key->k_i, k);
+
+       spin_lock(&c->ec_stripes_heap_lock);
+
+       for (i = 0; i < new_key->v.nr_blocks; i++)
+               stripe_blockcount_set(&new_key->v, i,
+                                     m->block_sectors[i]);
+       m->dirty = false;
+
+       spin_unlock(&c->ec_stripes_heap_lock);
+
+       bch2_trans_update(trans, iter, &new_key->k_i, 0);
+       return 0;
+}
+
+int bch2_stripes_write(struct bch_fs *c, unsigned flags)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct genradix_iter giter;
+       struct bkey_i_stripe *new_key;
+       struct stripe *m;
+       int ret = 0;
+
+       new_key = kmalloc(255 * sizeof(u64), GFP_KERNEL);
+       BUG_ON(!new_key);
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN,
+                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+       genradix_for_each(&c->stripes[0], giter, m) {
+               if (!m->dirty)
+                       continue;
+
+               ret = __bch2_trans_do(&trans, NULL, NULL,
+                                     BTREE_INSERT_NOFAIL|flags,
+                       __bch2_stripe_write_key(&trans, iter, m,
+                                       giter.pos, new_key));
+
+               if (ret)
+                       break;
+       }
+
+       bch2_trans_exit(&trans);
+
+       kfree(new_key);
+
+       return ret;
+}
+
+static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
+                             unsigned level, struct bkey_s_c k)
+{
+       int ret = 0;
+
+       if (k.k->type == KEY_TYPE_stripe) {
+               struct stripe *m;
+
+               ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
+                       bch2_mark_key(c, k, 0, 0, NULL, 0,
+                                     BTREE_TRIGGER_NOATOMIC);
+               if (ret)
+                       return ret;
+
+               spin_lock(&c->ec_stripes_heap_lock);
+               m = genradix_ptr(&c->stripes[0], k.k->p.offset);
+               bch2_stripes_heap_insert(c, m, k.k->p.offset);
+               spin_unlock(&c->ec_stripes_heap_lock);
+       }
+
+       return ret;
+}
+
+int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
+{
+       int ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_EC,
+                                         NULL, bch2_stripes_read_fn);
+       if (ret)
+               bch_err(c, "error reading stripes: %i", ret);
+
+       return ret;
+}
+
+int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       size_t i, idx = 0;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, U64_MAX), 0);
+
+       k = bch2_btree_iter_prev(iter);
+       if (!IS_ERR_OR_NULL(k.k))
+               idx = k.k->p.offset + 1;
+       ret = bch2_trans_exit(&trans);
+       if (ret)
+               return ret;
+
+       if (!idx)
+               return 0;
+
+       if (!gc &&
+           !init_heap(&c->ec_stripes_heap, roundup_pow_of_two(idx),
+                      GFP_KERNEL))
+               return -ENOMEM;
+#if 0
+       ret = genradix_prealloc(&c->stripes[gc], idx, GFP_KERNEL);
+#else
+       for (i = 0; i < idx; i++)
+               if (!genradix_ptr_alloc(&c->stripes[gc], i, GFP_KERNEL))
+                       return -ENOMEM;
+#endif
+       return 0;
+}
+
+void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       ec_stripes_heap *h = &c->ec_stripes_heap;
+       struct stripe *m;
+       size_t i;
+
+       spin_lock(&c->ec_stripes_heap_lock);
+       for (i = 0; i < min(h->used, 20UL); i++) {
+               m = genradix_ptr(&c->stripes[0], h->data[i].idx);
+
+               pr_buf(out, "%zu %u/%u+%u\n", h->data[i].idx,
+                      h->data[i].blocks_nonempty,
+                      m->nr_blocks - m->nr_redundant,
+                      m->nr_redundant);
+       }
+       spin_unlock(&c->ec_stripes_heap_lock);
+}
+
+void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       struct ec_stripe_head *h;
+       struct ec_stripe_new *s;
+
+       mutex_lock(&c->ec_stripe_head_lock);
+       list_for_each_entry(h, &c->ec_stripe_head_list, list) {
+               pr_buf(out, "target %u algo %u redundancy %u:\n",
+                      h->target, h->algo, h->redundancy);
+
+               if (h->s)
+                       pr_buf(out, "\tpending: blocks %u allocated %u\n",
+                              h->s->blocks.nr,
+                              bitmap_weight(h->s->blocks_allocated,
+                                            h->s->blocks.nr));
+       }
+       mutex_unlock(&c->ec_stripe_head_lock);
+
+       mutex_lock(&c->ec_stripe_new_lock);
+       list_for_each_entry(s, &c->ec_stripe_new_list, list) {
+               pr_buf(out, "\tin flight: blocks %u allocated %u pin %u\n",
+                      s->blocks.nr,
+                      bitmap_weight(s->blocks_allocated,
+                                    s->blocks.nr),
+                      atomic_read(&s->pin));
+       }
+       mutex_unlock(&c->ec_stripe_new_lock);
+}
+
+void bch2_fs_ec_exit(struct bch_fs *c)
+{
+       struct ec_stripe_head *h;
+
+       while (1) {
+               mutex_lock(&c->ec_stripe_head_lock);
+               h = list_first_entry_or_null(&c->ec_stripe_head_list,
+                                            struct ec_stripe_head, list);
+               if (h)
+                       list_del(&h->list);
+               mutex_unlock(&c->ec_stripe_head_lock);
+               if (!h)
+                       break;
+
+               BUG_ON(h->s);
+               kfree(h);
+       }
+
+       BUG_ON(!list_empty(&c->ec_stripe_new_list));
+
+       free_heap(&c->ec_stripes_heap);
+       genradix_free(&c->stripes[0]);
+       bioset_exit(&c->ec_bioset);
+}
+
+int bch2_fs_ec_init(struct bch_fs *c)
+{
+       INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
+       INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
+
+       return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
+                          BIOSET_NEED_BVECS);
+}
diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h
new file mode 100644 (file)
index 0000000..6db16cf
--- /dev/null
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_EC_H
+#define _BCACHEFS_EC_H
+
+#include "ec_types.h"
+#include "keylist_types.h"
+
+const char *bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
+                        struct bkey_s_c);
+
+#define bch2_bkey_ops_stripe (struct bkey_ops) {       \
+       .key_invalid    = bch2_stripe_invalid,          \
+       .val_to_text    = bch2_stripe_to_text,          \
+       .swab           = bch2_ptr_swab,                \
+}
+
+static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
+{
+       return DIV_ROUND_UP(le16_to_cpu(s->sectors),
+                           1 << s->csum_granularity_bits);
+}
+
+static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
+                                         unsigned dev, unsigned csum_idx)
+{
+       unsigned csum_bytes = bch_crc_bytes[s->csum_type];
+
+       return sizeof(struct bch_stripe) +
+               sizeof(struct bch_extent_ptr) * s->nr_blocks +
+               (dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes;
+}
+
+static inline unsigned stripe_blockcount_offset(const struct bch_stripe *s,
+                                               unsigned idx)
+{
+       return stripe_csum_offset(s, s->nr_blocks, 0) +
+               sizeof(u16) * idx;
+}
+
+static inline unsigned stripe_blockcount_get(const struct bch_stripe *s,
+                                            unsigned idx)
+{
+       return le16_to_cpup((void *) s + stripe_blockcount_offset(s, idx));
+}
+
+static inline void stripe_blockcount_set(struct bch_stripe *s,
+                                        unsigned idx, unsigned v)
+{
+       __le16 *p = (void *) s + stripe_blockcount_offset(s, idx);
+
+       *p = cpu_to_le16(v);
+}
+
+static inline unsigned stripe_val_u64s(const struct bch_stripe *s)
+{
+       return DIV_ROUND_UP(stripe_blockcount_offset(s, s->nr_blocks),
+                           sizeof(u64));
+}
+
+static inline void *stripe_csum(struct bch_stripe *s,
+                               unsigned dev, unsigned csum_idx)
+{
+       return (void *) s + stripe_csum_offset(s, dev, csum_idx);
+}
+
+struct bch_read_bio;
+
+struct ec_stripe_buf {
+       /* might not be buffering the entire stripe: */
+       unsigned                offset;
+       unsigned                size;
+       unsigned long           valid[BITS_TO_LONGS(EC_STRIPE_MAX)];
+
+       void                    *data[EC_STRIPE_MAX];
+
+       union {
+               struct bkey_i_stripe    key;
+               u64                     pad[255];
+       };
+};
+
+struct ec_stripe_head;
+
+struct ec_stripe_new {
+       struct bch_fs           *c;
+       struct ec_stripe_head   *h;
+       struct mutex            lock;
+       struct list_head        list;
+
+       /* counts in flight writes, stripe is created when pin == 0 */
+       atomic_t                pin;
+
+       int                     err;
+
+       u8                      nr_data;
+       u8                      nr_parity;
+       bool                    allocated;
+       bool                    pending;
+       bool                    existing_stripe;
+       u64                     existing_stripe_idx;
+
+       unsigned long           blocks_allocated[BITS_TO_LONGS(EC_STRIPE_MAX)];
+
+       struct open_buckets     blocks;
+       u8                      data_block_idx[EC_STRIPE_MAX];
+       struct open_buckets     parity;
+
+       struct keylist          keys;
+       u64                     inline_keys[BKEY_U64s * 8];
+
+       struct ec_stripe_buf    stripe;
+};
+
+struct ec_stripe_head {
+       struct list_head        list;
+       struct mutex            lock;
+
+       unsigned                target;
+       unsigned                algo;
+       unsigned                redundancy;
+
+       struct bch_devs_mask    devs;
+       unsigned                nr_active_devs;
+
+       unsigned                blocksize;
+
+       struct dev_stripe_state block_stripe;
+       struct dev_stripe_state parity_stripe;
+
+       struct ec_stripe_new    *s;
+};
+
+int bch2_ec_read_extent(struct bch_fs *, struct bch_read_bio *);
+
+void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
+void bch2_ec_add_backpointer(struct bch_fs *, struct write_point *,
+                            struct bpos, unsigned);
+
+void bch2_ec_bucket_written(struct bch_fs *, struct open_bucket *);
+void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *);
+
+int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
+
+void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *);
+struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, unsigned,
+                                              unsigned, unsigned);
+
+void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
+void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
+void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
+
+void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
+
+void bch2_ec_flush_new_stripes(struct bch_fs *);
+
+struct journal_keys;
+int bch2_stripes_read(struct bch_fs *, struct journal_keys *);
+int bch2_stripes_write(struct bch_fs *, unsigned);
+
+int bch2_ec_mem_alloc(struct bch_fs *, bool);
+
+void bch2_stripes_heap_to_text(struct printbuf *, struct bch_fs *);
+void bch2_new_stripes_to_text(struct printbuf *, struct bch_fs *);
+
+void bch2_fs_ec_exit(struct bch_fs *);
+int bch2_fs_ec_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_EC_H */
diff --git a/libbcachefs/ec_types.h b/libbcachefs/ec_types.h
new file mode 100644 (file)
index 0000000..e4d633f
--- /dev/null
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_EC_TYPES_H
+#define _BCACHEFS_EC_TYPES_H
+
+#include <linux/llist.h>
+
+#define EC_STRIPE_MAX  16
+
+struct bch_replicas_padded {
+       struct bch_replicas_entry       e;
+       u8                              pad[EC_STRIPE_MAX];
+};
+
+struct stripe {
+       size_t                  heap_idx;
+
+       u16                     sectors;
+       u8                      algorithm;
+
+       u8                      nr_blocks;
+       u8                      nr_redundant;
+
+       unsigned                alive:1;
+       unsigned                dirty:1;
+       unsigned                on_heap:1;
+       u8                      blocks_nonempty;
+       u16                     block_sectors[EC_STRIPE_MAX];
+
+       struct bch_replicas_padded r;
+};
+
+struct ec_stripe_heap_entry {
+       size_t                  idx;
+       unsigned                blocks_nonempty;
+};
+
+typedef HEAP(struct ec_stripe_heap_entry) ec_stripes_heap;
+
+#endif /* _BCACHEFS_EC_TYPES_H */
diff --git a/libbcachefs/error.c b/libbcachefs/error.c
new file mode 100644 (file)
index 0000000..cd46706
--- /dev/null
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "error.h"
+#include "io.h"
+#include "super.h"
+
+#define FSCK_ERR_RATELIMIT_NR  10
+
+bool bch2_inconsistent_error(struct bch_fs *c)
+{
+       set_bit(BCH_FS_ERROR, &c->flags);
+
+       switch (c->opts.errors) {
+       case BCH_ON_ERROR_CONTINUE:
+               return false;
+       case BCH_ON_ERROR_RO:
+               if (bch2_fs_emergency_read_only(c))
+                       bch_err(c, "emergency read only");
+               return true;
+       case BCH_ON_ERROR_PANIC:
+               panic(bch2_fmt(c, "panic after error"));
+               return true;
+       default:
+               BUG();
+       }
+}
+
+void bch2_fatal_error(struct bch_fs *c)
+{
+       if (bch2_fs_emergency_read_only(c))
+               bch_err(c, "emergency read only");
+}
+
+void bch2_io_error_work(struct work_struct *work)
+{
+       struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
+       struct bch_fs *c = ca->fs;
+       bool dev;
+
+       down_write(&c->state_lock);
+       dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO,
+                                   BCH_FORCE_IF_DEGRADED);
+       if (dev
+           ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_RO,
+                                 BCH_FORCE_IF_DEGRADED)
+           : bch2_fs_emergency_read_only(c))
+               bch_err(ca,
+                       "too many IO errors, setting %s RO",
+                       dev ? "device" : "filesystem");
+       up_write(&c->state_lock);
+}
+
+void bch2_io_error(struct bch_dev *ca)
+{
+       //queue_work(system_long_wq, &ca->io_error_work);
+}
+
+#ifdef __KERNEL__
+#define ask_yn()       false
+#else
+#include "tools-util.h"
+#endif
+
+enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
+                               const char *fmt, ...)
+{
+       struct fsck_err_state *s = NULL;
+       va_list args;
+       bool fix = false, print = true, suppressing = false;
+       char _buf[sizeof(s->buf)], *buf = _buf;
+
+       if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
+               va_start(args, fmt);
+               vprintk(fmt, args);
+               va_end(args);
+
+               return bch2_inconsistent_error(c)
+                       ? FSCK_ERR_EXIT
+                       : FSCK_ERR_FIX;
+       }
+
+       mutex_lock(&c->fsck_error_lock);
+
+       list_for_each_entry(s, &c->fsck_errors, list)
+               if (s->fmt == fmt)
+                       goto found;
+
+       s = kzalloc(sizeof(*s), GFP_NOFS);
+       if (!s) {
+               if (!c->fsck_alloc_err)
+                       bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
+               c->fsck_alloc_err = true;
+               buf = _buf;
+               goto print;
+       }
+
+       INIT_LIST_HEAD(&s->list);
+       s->fmt = fmt;
+found:
+       list_move(&s->list, &c->fsck_errors);
+       s->nr++;
+       if (c->opts.ratelimit_errors &&
+           s->nr >= FSCK_ERR_RATELIMIT_NR) {
+               if (s->nr == FSCK_ERR_RATELIMIT_NR)
+                       suppressing = true;
+               else
+                       print = false;
+       }
+       buf             = s->buf;
+print:
+       va_start(args, fmt);
+       vscnprintf(buf, sizeof(_buf), fmt, args);
+       va_end(args);
+
+       if (c->opts.fix_errors == FSCK_OPT_EXIT) {
+               bch_err(c, "%s, exiting", buf);
+       } else if (flags & FSCK_CAN_FIX) {
+               if (c->opts.fix_errors == FSCK_OPT_ASK) {
+                       printk(KERN_ERR "%s: fix?", buf);
+                       fix = ask_yn();
+               } else if (c->opts.fix_errors == FSCK_OPT_YES ||
+                          (c->opts.nochanges &&
+                           !(flags & FSCK_CAN_IGNORE))) {
+                       if (print)
+                               bch_err(c, "%s, fixing", buf);
+                       fix = true;
+               } else {
+                       if (print)
+                               bch_err(c, "%s, not fixing", buf);
+                       fix = false;
+               }
+       } else if (flags & FSCK_NEED_FSCK) {
+               if (print)
+                       bch_err(c, "%s (run fsck to correct)", buf);
+       } else {
+               if (print)
+                       bch_err(c, "%s (repair unimplemented)", buf);
+       }
+
+       if (suppressing)
+               bch_err(c, "Ratelimiting new instances of previous error");
+
+       mutex_unlock(&c->fsck_error_lock);
+
+       if (fix) {
+               set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
+               return FSCK_ERR_FIX;
+       } else {
+               set_bit(BCH_FS_ERROR, &c->flags);
+               return c->opts.fix_errors == FSCK_OPT_EXIT ||
+                       !(flags & FSCK_CAN_IGNORE)
+                       ? FSCK_ERR_EXIT
+                       : FSCK_ERR_IGNORE;
+       }
+}
+
+void bch2_flush_fsck_errs(struct bch_fs *c)
+{
+       struct fsck_err_state *s, *n;
+
+       mutex_lock(&c->fsck_error_lock);
+
+       list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
+               if (s->ratelimited)
+                       bch_err(c, "Saw %llu errors like:\n    %s", s->nr, s->buf);
+
+               list_del(&s->list);
+               kfree(s);
+       }
+
+       mutex_unlock(&c->fsck_error_lock);
+}
diff --git a/libbcachefs/error.h b/libbcachefs/error.h
new file mode 100644 (file)
index 0000000..94b5331
--- /dev/null
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ERROR_H
+#define _BCACHEFS_ERROR_H
+
+#include <linux/list.h>
+#include <linux/printk.h>
+
+struct bch_dev;
+struct bch_fs;
+struct work_struct;
+
+/*
+ * XXX: separate out errors that indicate on disk data is inconsistent, and flag
+ * superblock as such
+ */
+
+/* Error messages: */
+
+/*
+ * Inconsistency errors: The on disk data is inconsistent. If these occur during
+ * initial recovery, they don't indicate a bug in the running code - we walk all
+ * the metadata before modifying anything. If they occur at runtime, they
+ * indicate either a bug in the running code or (less likely) data is being
+ * silently corrupted under us.
+ *
+ * XXX: audit all inconsistent errors and make sure they're all recoverable, in
+ * BCH_ON_ERROR_CONTINUE mode
+ */
+
+bool bch2_inconsistent_error(struct bch_fs *);
+
+#define bch2_fs_inconsistent(c, ...)                                   \
+({                                                                     \
+       bch_err(c, __VA_ARGS__);                                        \
+       bch2_inconsistent_error(c);                                     \
+})
+
+#define bch2_fs_inconsistent_on(cond, c, ...)                          \
+({                                                                     \
+       int _ret = !!(cond);                                            \
+                                                                       \
+       if (_ret)                                                       \
+               bch2_fs_inconsistent(c, __VA_ARGS__);                   \
+       _ret;                                                           \
+})
+
+/*
+ * Later we might want to mark only the particular device inconsistent, not the
+ * entire filesystem:
+ */
+
+#define bch2_dev_inconsistent(ca, ...)                                 \
+do {                                                                   \
+       bch_err(ca, __VA_ARGS__);                                       \
+       bch2_inconsistent_error((ca)->fs);                              \
+} while (0)
+
+#define bch2_dev_inconsistent_on(cond, ca, ...)                                \
+({                                                                     \
+       int _ret = !!(cond);                                            \
+                                                                       \
+       if (_ret)                                                       \
+               bch2_dev_inconsistent(ca, __VA_ARGS__);                 \
+       _ret;                                                           \
+})
+
+/*
+ * Fsck errors: inconsistency errors we detect at mount time, and should ideally
+ * be able to repair:
+ */
+
+enum {
+       BCH_FSCK_OK                     = 0,
+       BCH_FSCK_ERRORS_NOT_FIXED       = 1,
+       BCH_FSCK_REPAIR_UNIMPLEMENTED   = 2,
+       BCH_FSCK_REPAIR_IMPOSSIBLE      = 3,
+       BCH_FSCK_UNKNOWN_VERSION        = 4,
+};
+
+enum fsck_err_opts {
+       FSCK_OPT_EXIT,
+       FSCK_OPT_YES,
+       FSCK_OPT_NO,
+       FSCK_OPT_ASK,
+};
+
+enum fsck_err_ret {
+       FSCK_ERR_IGNORE = 0,
+       FSCK_ERR_FIX    = 1,
+       FSCK_ERR_EXIT   = 2,
+};
+
+struct fsck_err_state {
+       struct list_head        list;
+       const char              *fmt;
+       u64                     nr;
+       bool                    ratelimited;
+       char                    buf[512];
+};
+
+#define FSCK_CAN_FIX           (1 << 0)
+#define FSCK_CAN_IGNORE                (1 << 1)
+#define FSCK_NEED_FSCK         (1 << 2)
+
+__printf(3, 4) __cold
+enum fsck_err_ret bch2_fsck_err(struct bch_fs *,
+                               unsigned, const char *, ...);
+void bch2_flush_fsck_errs(struct bch_fs *);
+
+#define __fsck_err(c, _flags, msg, ...)                                        \
+({                                                                     \
+       int _fix = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__);\
+                                                                       \
+       if (_fix == FSCK_ERR_EXIT) {                                    \
+               bch_err(c, "Unable to continue, halting");              \
+               ret = BCH_FSCK_ERRORS_NOT_FIXED;                        \
+               goto fsck_err;                                          \
+       }                                                               \
+                                                                       \
+       _fix;                                                           \
+})
+
+/* These macros return true if error should be fixed: */
+
+/* XXX: mark in superblock that filesystem contains errors, if we ignore: */
+
+#define __fsck_err_on(cond, c, _flags, ...)                            \
+       ((cond) ? __fsck_err(c, _flags, ##__VA_ARGS__) : false)
+
+#define need_fsck_err_on(cond, c, ...)                                 \
+       __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
+
+#define need_fsck_err(c, ...)                                          \
+       __fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
+
+#define mustfix_fsck_err(c, ...)                                       \
+       __fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__)
+
+#define mustfix_fsck_err_on(cond, c, ...)                              \
+       __fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__)
+
+#define fsck_err(c, ...)                                               \
+       __fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
+
+#define fsck_err_on(cond, c, ...)                                      \
+       __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
+
+/*
+ * Fatal errors: these don't indicate a bug, but we can't continue running in RW
+ * mode - pretty much just due to metadata IO errors:
+ */
+
+void bch2_fatal_error(struct bch_fs *);
+
+#define bch2_fs_fatal_error(c, ...)                                    \
+do {                                                                   \
+       bch_err(c, __VA_ARGS__);                                        \
+       bch2_fatal_error(c);                                            \
+} while (0)
+
+#define bch2_fs_fatal_err_on(cond, c, ...)                             \
+({                                                                     \
+       int _ret = !!(cond);                                            \
+                                                                       \
+       if (_ret)                                                       \
+               bch2_fs_fatal_error(c, __VA_ARGS__);                    \
+       _ret;                                                           \
+})
+
+/*
+ * IO errors: either recoverable metadata IO (because we have replicas), or data
+ * IO - we need to log it and print out a message, but we don't (necessarily)
+ * want to shut down the fs:
+ */
+
+void bch2_io_error_work(struct work_struct *);
+
+/* Does the error handling without logging a message */
+void bch2_io_error(struct bch_dev *);
+
+/* Logs message and handles the error: */
+#define bch2_dev_io_error(ca, fmt, ...)                                        \
+do {                                                                   \
+       printk_ratelimited(KERN_ERR bch2_fmt((ca)->fs,                  \
+               "IO error on %s for " fmt),                             \
+               (ca)->name, ##__VA_ARGS__);                             \
+       bch2_io_error(ca);                                              \
+} while (0)
+
+#define bch2_dev_io_err_on(cond, ca, ...)                              \
+({                                                                     \
+       bool _ret = (cond);                                             \
+                                                                       \
+       if (_ret)                                                       \
+               bch2_dev_io_error(ca, __VA_ARGS__);                     \
+       _ret;                                                           \
+})
+
+/* kill? */
+
+#define __bcache_io_error(c, fmt, ...)                                 \
+       printk_ratelimited(KERN_ERR bch2_fmt(c,                         \
+                       "IO error: " fmt), ##__VA_ARGS__)
+
+#define bcache_io_error(c, bio, fmt, ...)                              \
+do {                                                                   \
+       __bcache_io_error(c, fmt, ##__VA_ARGS__);                       \
+       (bio)->bi_status = BLK_STS_IOERR;                                       \
+} while (0)
+
+#endif /* _BCACHEFS_ERROR_H */
diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c
new file mode 100644 (file)
index 0000000..fd011df
--- /dev/null
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "bkey_on_stack.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "debug.h"
+#include "extents.h"
+#include "extent_update.h"
+
+/*
+ * This counts the number of iterators to the alloc & ec btrees we'll need
+ * inserting/removing this extent:
+ */
+static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       unsigned ret = 0;
+
+       bkey_extent_entry_for_each(ptrs, entry) {
+               switch (__extent_entry_type(entry)) {
+               case BCH_EXTENT_ENTRY_ptr:
+               case BCH_EXTENT_ENTRY_stripe_ptr:
+                       ret++;
+               }
+       }
+
+       return ret;
+}
+
+static int count_iters_for_insert(struct btree_trans *trans,
+                                 struct bkey_s_c k,
+                                 unsigned offset,
+                                 struct bpos *end,
+                                 unsigned *nr_iters,
+                                 unsigned max_iters)
+{
+       int ret = 0, ret2 = 0;
+
+       if (*nr_iters >= max_iters) {
+               *end = bpos_min(*end, k.k->p);
+               ret = 1;
+       }
+
+       switch (k.k->type) {
+       case KEY_TYPE_extent:
+       case KEY_TYPE_reflink_v:
+               *nr_iters += bch2_bkey_nr_alloc_ptrs(k);
+
+               if (*nr_iters >= max_iters) {
+                       *end = bpos_min(*end, k.k->p);
+                       ret = 1;
+               }
+
+               break;
+       case KEY_TYPE_reflink_p: {
+               struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+               u64 idx = le64_to_cpu(p.v->idx);
+               unsigned sectors = bpos_min(*end, p.k->p).offset -
+                       bkey_start_offset(p.k);
+               struct btree_iter *iter;
+               struct bkey_s_c r_k;
+
+               for_each_btree_key(trans, iter,
+                                  BTREE_ID_REFLINK, POS(0, idx + offset),
+                                  BTREE_ITER_SLOTS, r_k, ret2) {
+                       if (bkey_cmp(bkey_start_pos(r_k.k),
+                                    POS(0, idx + sectors)) >= 0)
+                               break;
+
+                       /* extent_update_to_keys(), for the reflink_v update */
+                       *nr_iters += 1;
+
+                       *nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k);
+
+                       if (*nr_iters >= max_iters) {
+                               struct bpos pos = bkey_start_pos(k.k);
+                               pos.offset += min_t(u64, k.k->size,
+                                                   r_k.k->p.offset - idx);
+
+                               *end = bpos_min(*end, pos);
+                               ret = 1;
+                               break;
+                       }
+               }
+
+               bch2_trans_iter_put(trans, iter);
+               break;
+       }
+       }
+
+       return ret2 ?: ret;
+}
+
+#define EXTENT_ITERS_MAX       (BTREE_ITER_MAX / 3)
+
+int bch2_extent_atomic_end(struct btree_iter *iter,
+                          struct bkey_i *insert,
+                          struct bpos *end)
+{
+       struct btree_trans *trans = iter->trans;
+       struct btree *b;
+       struct btree_node_iter  node_iter;
+       struct bkey_packed      *_k;
+       unsigned                nr_iters = 0;
+       int ret;
+
+       ret = bch2_btree_iter_traverse(iter);
+       if (ret)
+               return ret;
+
+       b = iter->l[0].b;
+       node_iter = iter->l[0].iter;
+
+       BUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
+              bkey_cmp(bkey_start_pos(&insert->k),
+                       bkey_predecessor(b->data->min_key)) < 0);
+
+       *end = bpos_min(insert->k.p, b->key.k.p);
+
+       /* extent_update_to_keys(): */
+       nr_iters += 1;
+
+       ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end,
+                                    &nr_iters, EXTENT_ITERS_MAX / 2);
+       if (ret < 0)
+               return ret;
+
+       while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
+               struct bkey     unpacked;
+               struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked);
+               unsigned offset = 0;
+
+               if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0)
+                       break;
+
+               if (bkey_cmp(bkey_start_pos(&insert->k),
+                            bkey_start_pos(k.k)) > 0)
+                       offset = bkey_start_offset(&insert->k) -
+                               bkey_start_offset(k.k);
+
+               /* extent_handle_overwrites(): */
+               switch (bch2_extent_overlap(&insert->k, k.k)) {
+               case BCH_EXTENT_OVERLAP_ALL:
+               case BCH_EXTENT_OVERLAP_FRONT:
+                       nr_iters += 1;
+                       break;
+               case BCH_EXTENT_OVERLAP_BACK:
+               case BCH_EXTENT_OVERLAP_MIDDLE:
+                       nr_iters += 2;
+                       break;
+               }
+
+               ret = count_iters_for_insert(trans, k, offset, end,
+                                       &nr_iters, EXTENT_ITERS_MAX);
+               if (ret)
+                       break;
+
+               bch2_btree_node_iter_advance(&node_iter, b);
+       }
+
+       return ret < 0 ? ret : 0;
+}
+
+int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
+{
+       struct bpos end;
+       int ret;
+
+       ret = bch2_extent_atomic_end(iter, k, &end);
+       if (ret)
+               return ret;
+
+       bch2_cut_back(end, k);
+       return 0;
+}
+
+int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
+{
+       struct bpos end;
+       int ret;
+
+       ret = bch2_extent_atomic_end(iter, k, &end);
+       if (ret)
+               return ret;
+
+       return !bkey_cmp(end, k->k.p);
+}
+
+enum btree_insert_ret
+bch2_extent_can_insert(struct btree_trans *trans,
+                      struct btree_iter *iter,
+                      struct bkey_i *insert)
+{
+       struct btree_iter_level *l = &iter->l[0];
+       struct btree_node_iter node_iter = l->iter;
+       struct bkey_packed *_k;
+       struct bkey_s_c k;
+       struct bkey unpacked;
+       int sectors;
+
+       _k = bch2_btree_node_iter_peek(&node_iter, l->b);
+       if (!_k)
+               return BTREE_INSERT_OK;
+
+       k = bkey_disassemble(l->b, _k, &unpacked);
+
+       /* Check if we're splitting a compressed extent: */
+
+       if (bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k)) > 0 &&
+           bkey_cmp(insert->k.p, k.k->p) < 0 &&
+           (sectors = bch2_bkey_sectors_compressed(k))) {
+               int flags = trans->flags & BTREE_INSERT_NOFAIL
+                       ? BCH_DISK_RESERVATION_NOFAIL : 0;
+
+               switch (bch2_disk_reservation_add(trans->c, trans->disk_res,
+                                                 sectors, flags)) {
+               case 0:
+                       break;
+               case -ENOSPC:
+                       return BTREE_INSERT_ENOSPC;
+               default:
+                       BUG();
+               }
+       }
+
+       return BTREE_INSERT_OK;
+}
diff --git a/libbcachefs/extent_update.h b/libbcachefs/extent_update.h
new file mode 100644 (file)
index 0000000..38dc084
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_EXTENT_UPDATE_H
+#define _BCACHEFS_EXTENT_UPDATE_H
+
+#include "bcachefs.h"
+
+int bch2_extent_atomic_end(struct btree_iter *, struct bkey_i *,
+                          struct bpos *);
+int bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
+int bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *);
+
+enum btree_insert_ret
+bch2_extent_can_insert(struct btree_trans *, struct btree_iter *,
+                      struct bkey_i *);
+
+#endif /* _BCACHEFS_EXTENT_UPDATE_H */
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
new file mode 100644 (file)
index 0000000..568f039
--- /dev/null
@@ -0,0 +1,1258 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
+ *
+ * Code for managing the extent btree and dynamically updating the writeback
+ * dirty sector count.
+ */
+
+#include "bcachefs.h"
+#include "bkey_methods.h"
+#include "btree_gc.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "buckets.h"
+#include "checksum.h"
+#include "debug.h"
+#include "disk_groups.h"
+#include "error.h"
+#include "extents.h"
+#include "inode.h"
+#include "journal.h"
+#include "replicas.h"
+#include "super.h"
+#include "super-io.h"
+#include "util.h"
+
+#include <trace/events/bcachefs.h>
+
+static unsigned bch2_crc_field_size_max[] = {
+       [BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
+       [BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
+       [BCH_EXTENT_ENTRY_crc128] = CRC128_SIZE_MAX,
+};
+
+static void bch2_extent_crc_pack(union bch_extent_crc *,
+                                struct bch_extent_crc_unpacked,
+                                enum bch_extent_entry_type);
+
+static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
+                                                  unsigned dev)
+{
+       struct bch_dev_io_failures *i;
+
+       for (i = f->devs; i < f->devs + f->nr; i++)
+               if (i->dev == dev)
+                       return i;
+
+       return NULL;
+}
+
+void bch2_mark_io_failure(struct bch_io_failures *failed,
+                         struct extent_ptr_decoded *p)
+{
+       struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
+
+       if (!f) {
+               BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
+
+               f = &failed->devs[failed->nr++];
+               f->dev          = p->ptr.dev;
+               f->idx          = p->idx;
+               f->nr_failed    = 1;
+               f->nr_retries   = 0;
+       } else if (p->idx != f->idx) {
+               f->idx          = p->idx;
+               f->nr_failed    = 1;
+               f->nr_retries   = 0;
+       } else {
+               f->nr_failed++;
+       }
+}
+
+/*
+ * returns true if p1 is better than p2:
+ */
+static inline bool ptr_better(struct bch_fs *c,
+                             const struct extent_ptr_decoded p1,
+                             const struct extent_ptr_decoded p2)
+{
+       if (likely(!p1.idx && !p2.idx)) {
+               struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
+               struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
+
+               u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
+               u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
+
+               /* Pick at random, biased in favor of the faster device: */
+
+               return bch2_rand_range(l1 + l2) > l1;
+       }
+
+       if (force_reconstruct_read(c))
+               return p1.idx > p2.idx;
+
+       return p1.idx < p2.idx;
+}
+
+/*
+ * This picks a non-stale pointer, preferably from a device other than @avoid.
+ * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to
+ * other devices, it will still pick a pointer from avoid.
+ */
+int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
+                              struct bch_io_failures *failed,
+                              struct extent_ptr_decoded *pick)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       struct bch_dev_io_failures *f;
+       struct bch_dev *ca;
+       int ret = 0;
+
+       if (k.k->type == KEY_TYPE_error)
+               return -EIO;
+
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+               ca = bch_dev_bkey_exists(c, p.ptr.dev);
+
+               /*
+                * If there are any dirty pointers it's an error if we can't
+                * read:
+                */
+               if (!ret && !p.ptr.cached)
+                       ret = -EIO;
+
+               if (p.ptr.cached && ptr_stale(ca, &p.ptr))
+                       continue;
+
+               f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
+               if (f)
+                       p.idx = f->nr_failed < f->nr_retries
+                               ? f->idx
+                               : f->idx + 1;
+
+               if (!p.idx &&
+                   !bch2_dev_is_readable(ca))
+                       p.idx++;
+
+               if (force_reconstruct_read(c) &&
+                   !p.idx && p.has_ec)
+                       p.idx++;
+
+               if (p.idx >= (unsigned) p.has_ec + 1)
+                       continue;
+
+               if (ret > 0 && !ptr_better(c, p, *pick))
+                       continue;
+
+               *pick = p;
+               ret = 1;
+       }
+
+       return ret;
+}
+
+/* KEY_TYPE_btree_ptr: */
+
+const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
+               return "value too big";
+
+       return bch2_bkey_ptrs_invalid(c, k);
+}
+
+void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
+       const char *err;
+       char buf[160];
+       struct bucket_mark mark;
+       struct bch_dev *ca;
+
+       if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags))
+               return;
+
+       if (!percpu_down_read_trylock(&c->mark_lock))
+               return;
+
+       bkey_for_each_ptr(ptrs, ptr) {
+               ca = bch_dev_bkey_exists(c, ptr->dev);
+
+               mark = ptr_bucket_mark(ca, ptr);
+
+               err = "stale";
+               if (gen_after(mark.gen, ptr->gen))
+                       goto err;
+
+               err = "inconsistent";
+               if (mark.data_type != BCH_DATA_btree ||
+                   mark.dirty_sectors < c->opts.btree_node_size)
+                       goto err;
+       }
+out:
+       percpu_up_read(&c->mark_lock);
+       return;
+err:
+       bch2_fs_inconsistent(c, "%s btree pointer %s: bucket %zi gen %i mark %08x",
+               err, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
+               PTR_BUCKET_NR(ca, ptr),
+               mark.gen, (unsigned) mark.v.counter);
+       goto out;
+}
+
+void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
+                           struct bkey_s_c k)
+{
+       bch2_bkey_ptrs_to_text(out, c, k);
+}
+
+void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
+                           struct bkey_s_c k)
+{
+       struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
+
+       pr_buf(out, "seq %llx sectors %u written %u min_key ",
+              le64_to_cpu(bp.v->seq),
+              le16_to_cpu(bp.v->sectors),
+              le16_to_cpu(bp.v->sectors_written));
+
+       bch2_bpos_to_text(out, bp.v->min_key);
+       pr_buf(out, " ");
+       bch2_bkey_ptrs_to_text(out, c, k);
+}
+
+void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
+                             unsigned big_endian, int write,
+                             struct bkey_s k)
+{
+       struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(k);
+
+       compat_bpos(0, btree_id, version, big_endian, write, &bp.v->min_key);
+
+       if (version < bcachefs_metadata_version_inode_btree_change &&
+           btree_node_type_is_extents(btree_id) &&
+           bkey_cmp(bp.v->min_key, POS_MIN))
+               bp.v->min_key = write
+                       ? bkey_predecessor(bp.v->min_key)
+                       : bkey_successor(bp.v->min_key);
+}
+
+/* KEY_TYPE_extent: */
+
+const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       return bch2_bkey_ptrs_invalid(c, k);
+}
+
+void bch2_extent_debugcheck(struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       char buf[160];
+
+       if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) ||
+           !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags))
+               return;
+
+       if (!percpu_down_read_trylock(&c->mark_lock))
+               return;
+
+       extent_for_each_ptr_decode(e, p, entry) {
+               struct bch_dev *ca      = bch_dev_bkey_exists(c, p.ptr.dev);
+               struct bucket_mark mark = ptr_bucket_mark(ca, &p.ptr);
+               unsigned stale          = gen_after(mark.gen, p.ptr.gen);
+               unsigned disk_sectors   = ptr_disk_sectors(p);
+               unsigned mark_sectors   = p.ptr.cached
+                       ? mark.cached_sectors
+                       : mark.dirty_sectors;
+
+               bch2_fs_inconsistent_on(stale && !p.ptr.cached, c,
+                       "stale dirty pointer (ptr gen %u bucket %u",
+                       p.ptr.gen, mark.gen);
+
+               bch2_fs_inconsistent_on(stale > 96, c,
+                       "key too stale: %i", stale);
+
+               bch2_fs_inconsistent_on(!stale &&
+                       (mark.data_type != BCH_DATA_user ||
+                        mark_sectors < disk_sectors), c,
+                       "extent pointer not marked: %s:\n"
+                       "type %u sectors %u < %u",
+                       (bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c), buf),
+                       mark.data_type,
+                       mark_sectors, disk_sectors);
+       }
+
+       percpu_up_read(&c->mark_lock);
+}
+
+void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c,
+                        struct bkey_s_c k)
+{
+       bch2_bkey_ptrs_to_text(out, c, k);
+}
+
+enum merge_result bch2_extent_merge(struct bch_fs *c,
+                                   struct bkey_s _l, struct bkey_s _r)
+{
+       struct bkey_s_extent l = bkey_s_to_extent(_l);
+       struct bkey_s_extent r = bkey_s_to_extent(_r);
+       union bch_extent_entry *en_l = l.v->start;
+       union bch_extent_entry *en_r = r.v->start;
+       struct bch_extent_crc_unpacked crc_l, crc_r;
+
+       if (bkey_val_u64s(l.k) != bkey_val_u64s(r.k))
+               return BCH_MERGE_NOMERGE;
+
+       crc_l = bch2_extent_crc_unpack(l.k, NULL);
+
+       extent_for_each_entry(l, en_l) {
+               en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
+
+               if (extent_entry_type(en_l) != extent_entry_type(en_r))
+                       return BCH_MERGE_NOMERGE;
+
+               switch (extent_entry_type(en_l)) {
+               case BCH_EXTENT_ENTRY_ptr: {
+                       const struct bch_extent_ptr *lp = &en_l->ptr;
+                       const struct bch_extent_ptr *rp = &en_r->ptr;
+                       struct bch_dev *ca;
+
+                       if (lp->offset + crc_l.compressed_size != rp->offset ||
+                           lp->dev                     != rp->dev ||
+                           lp->gen                     != rp->gen)
+                               return BCH_MERGE_NOMERGE;
+
+                       /* We don't allow extents to straddle buckets: */
+                       ca = bch_dev_bkey_exists(c, lp->dev);
+
+                       if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
+                               return BCH_MERGE_NOMERGE;
+
+                       break;
+               }
+               case BCH_EXTENT_ENTRY_stripe_ptr:
+                       if (en_l->stripe_ptr.block      != en_r->stripe_ptr.block ||
+                           en_l->stripe_ptr.idx        != en_r->stripe_ptr.idx)
+                               return BCH_MERGE_NOMERGE;
+                       break;
+               case BCH_EXTENT_ENTRY_crc32:
+               case BCH_EXTENT_ENTRY_crc64:
+               case BCH_EXTENT_ENTRY_crc128:
+                       crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
+                       crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
+
+                       if (crc_l.csum_type             != crc_r.csum_type ||
+                           crc_l.compression_type      != crc_r.compression_type ||
+                           crc_l.nonce                 != crc_r.nonce)
+                               return BCH_MERGE_NOMERGE;
+
+                       if (crc_l.offset + crc_l.live_size != crc_l.compressed_size ||
+                           crc_r.offset)
+                               return BCH_MERGE_NOMERGE;
+
+                       if (!bch2_checksum_mergeable(crc_l.csum_type))
+                               return BCH_MERGE_NOMERGE;
+
+                       if (crc_is_compressed(crc_l))
+                               return BCH_MERGE_NOMERGE;
+
+                       if (crc_l.csum_type &&
+                           crc_l.uncompressed_size +
+                           crc_r.uncompressed_size > c->sb.encoded_extent_max)
+                               return BCH_MERGE_NOMERGE;
+
+                       if (crc_l.uncompressed_size + crc_r.uncompressed_size >
+                           bch2_crc_field_size_max[extent_entry_type(en_l)])
+                               return BCH_MERGE_NOMERGE;
+
+                       break;
+               default:
+                       return BCH_MERGE_NOMERGE;
+               }
+       }
+
+       extent_for_each_entry(l, en_l) {
+               struct bch_extent_crc_unpacked crc_l, crc_r;
+
+               en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
+
+               if (!extent_entry_is_crc(en_l))
+                       continue;
+
+               crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
+               crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
+
+               crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
+                                                crc_l.csum,
+                                                crc_r.csum,
+                                                crc_r.uncompressed_size << 9);
+
+               crc_l.uncompressed_size += crc_r.uncompressed_size;
+               crc_l.compressed_size   += crc_r.compressed_size;
+
+               bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
+                                    extent_entry_type(en_l));
+       }
+
+       bch2_key_resize(l.k, l.k->size + r.k->size);
+
+       return BCH_MERGE_MERGE;
+}
+
+/* KEY_TYPE_reservation: */
+
+const char *bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+
+       if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation))
+               return "incorrect value size";
+
+       if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX)
+               return "invalid nr_replicas";
+
+       return NULL;
+}
+
+void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
+                             struct bkey_s_c k)
+{
+       struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+
+       pr_buf(out, "generation %u replicas %u",
+              le32_to_cpu(r.v->generation),
+              r.v->nr_replicas);
+}
+
+enum merge_result bch2_reservation_merge(struct bch_fs *c,
+                                        struct bkey_s _l, struct bkey_s _r)
+{
+       struct bkey_s_reservation l = bkey_s_to_reservation(_l);
+       struct bkey_s_reservation r = bkey_s_to_reservation(_r);
+
+       if (l.v->generation != r.v->generation ||
+           l.v->nr_replicas != r.v->nr_replicas)
+               return BCH_MERGE_NOMERGE;
+
+       if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
+               bch2_key_resize(l.k, KEY_SIZE_MAX);
+               bch2_cut_front_s(l.k->p, r.s);
+               return BCH_MERGE_PARTIAL;
+       }
+
+       bch2_key_resize(l.k, l.k->size + r.k->size);
+
+       return BCH_MERGE_MERGE;
+}
+
+/* Extent checksum entries: */
+
+/* returns true if not equal */
+static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l,
+                                        struct bch_extent_crc_unpacked r)
+{
+       return (l.csum_type             != r.csum_type ||
+               l.compression_type      != r.compression_type ||
+               l.compressed_size       != r.compressed_size ||
+               l.uncompressed_size     != r.uncompressed_size ||
+               l.offset                != r.offset ||
+               l.live_size             != r.live_size ||
+               l.nonce                 != r.nonce ||
+               bch2_crc_cmp(l.csum, r.csum));
+}
+
+static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u,
+                                 struct bch_extent_crc_unpacked n)
+{
+       return !crc_is_compressed(u) &&
+               u.csum_type &&
+               u.uncompressed_size > u.live_size &&
+               bch2_csum_type_is_encryption(u.csum_type) ==
+               bch2_csum_type_is_encryption(n.csum_type);
+}
+
+bool bch2_can_narrow_extent_crcs(struct bkey_s_c k,
+                                struct bch_extent_crc_unpacked n)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       struct bch_extent_crc_unpacked crc;
+       const union bch_extent_entry *i;
+
+       if (!n.csum_type)
+               return false;
+
+       bkey_for_each_crc(k.k, ptrs, crc, i)
+               if (can_narrow_crc(crc, n))
+                       return true;
+
+       return false;
+}
+
+/*
+ * We're writing another replica for this extent, so while we've got the data in
+ * memory we'll be computing a new checksum for the currently live data.
+ *
+ * If there are other replicas we aren't moving, and they are checksummed but
+ * not compressed, we can modify them to point to only the data that is
+ * currently live (so that readers won't have to bounce) while we've got the
+ * checksum we need:
+ */
+bool bch2_bkey_narrow_crcs(struct bkey_i *k, struct bch_extent_crc_unpacked n)
+{
+       struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
+       struct bch_extent_crc_unpacked u;
+       struct extent_ptr_decoded p;
+       union bch_extent_entry *i;
+       bool ret = false;
+
+       /* Find a checksum entry that covers only live data: */
+       if (!n.csum_type) {
+               bkey_for_each_crc(&k->k, ptrs, u, i)
+                       if (!crc_is_compressed(u) &&
+                           u.csum_type &&
+                           u.live_size == u.uncompressed_size) {
+                               n = u;
+                               goto found;
+                       }
+               return false;
+       }
+found:
+       BUG_ON(crc_is_compressed(n));
+       BUG_ON(n.offset);
+       BUG_ON(n.live_size != k->k.size);
+
+restart_narrow_pointers:
+       ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
+
+       bkey_for_each_ptr_decode(&k->k, ptrs, p, i)
+               if (can_narrow_crc(p.crc, n)) {
+                       bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr);
+                       p.ptr.offset += p.crc.offset;
+                       p.crc = n;
+                       bch2_extent_ptr_decoded_append(k, &p);
+                       ret = true;
+                       goto restart_narrow_pointers;
+               }
+
+       return ret;
+}
+
+static void bch2_extent_crc_pack(union bch_extent_crc *dst,
+                                struct bch_extent_crc_unpacked src,
+                                enum bch_extent_entry_type type)
+{
+#define set_common_fields(_dst, _src)                                  \
+               _dst.type               = 1 << type;                    \
+               _dst.csum_type          = _src.csum_type,               \
+               _dst.compression_type   = _src.compression_type,        \
+               _dst._compressed_size   = _src.compressed_size - 1,     \
+               _dst._uncompressed_size = _src.uncompressed_size - 1,   \
+               _dst.offset             = _src.offset
+
+       switch (type) {
+       case BCH_EXTENT_ENTRY_crc32:
+               set_common_fields(dst->crc32, src);
+               dst->crc32.csum  = *((__le32 *) &src.csum.lo);
+               break;
+       case BCH_EXTENT_ENTRY_crc64:
+               set_common_fields(dst->crc64, src);
+               dst->crc64.nonce        = src.nonce;
+               dst->crc64.csum_lo      = src.csum.lo;
+               dst->crc64.csum_hi      = *((__le16 *) &src.csum.hi);
+               break;
+       case BCH_EXTENT_ENTRY_crc128:
+               set_common_fields(dst->crc128, src);
+               dst->crc128.nonce       = src.nonce;
+               dst->crc128.csum        = src.csum;
+               break;
+       default:
+               BUG();
+       }
+#undef set_common_fields
+}
+
+void bch2_extent_crc_append(struct bkey_i *k,
+                           struct bch_extent_crc_unpacked new)
+{
+       struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
+       union bch_extent_crc *crc = (void *) ptrs.end;
+       enum bch_extent_entry_type type;
+
+       if (bch_crc_bytes[new.csum_type]        <= 4 &&
+           new.uncompressed_size               <= CRC32_SIZE_MAX &&
+           new.nonce                           <= CRC32_NONCE_MAX)
+               type = BCH_EXTENT_ENTRY_crc32;
+       else if (bch_crc_bytes[new.csum_type]   <= 10 &&
+                  new.uncompressed_size        <= CRC64_SIZE_MAX &&
+                  new.nonce                    <= CRC64_NONCE_MAX)
+               type = BCH_EXTENT_ENTRY_crc64;
+       else if (bch_crc_bytes[new.csum_type]   <= 16 &&
+                  new.uncompressed_size        <= CRC128_SIZE_MAX &&
+                  new.nonce                    <= CRC128_NONCE_MAX)
+               type = BCH_EXTENT_ENTRY_crc128;
+       else
+               BUG();
+
+       bch2_extent_crc_pack(crc, new, type);
+
+       k->k.u64s += extent_entry_u64s(ptrs.end);
+
+       EBUG_ON(bkey_val_u64s(&k->k) > BKEY_EXTENT_VAL_U64s_MAX);
+}
+
+/* Generic code for keys with pointers: */
+
+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k)
+{
+       return bch2_bkey_devs(k).nr;
+}
+
+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c k)
+{
+       return k.k->type == KEY_TYPE_reservation
+               ? bkey_s_c_to_reservation(k).v->nr_replicas
+               : bch2_bkey_dirty_devs(k).nr;
+}
+
+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c k)
+{
+       unsigned ret = 0;
+
+       if (k.k->type == KEY_TYPE_reservation) {
+               ret = bkey_s_c_to_reservation(k).v->nr_replicas;
+       } else {
+               struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+               const union bch_extent_entry *entry;
+               struct extent_ptr_decoded p;
+
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+                       ret += !p.ptr.cached && !crc_is_compressed(p.crc);
+       }
+
+       return ret;
+}
+
+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       unsigned ret = 0;
+
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+               if (!p.ptr.cached && crc_is_compressed(p.crc))
+                       ret += p.crc.compressed_size;
+
+       return ret;
+}
+
+bool bch2_bkey_is_incompressible(struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct bch_extent_crc_unpacked crc;
+
+       bkey_for_each_crc(k.k, ptrs, crc, entry)
+               if (crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
+                       return true;
+       return false;
+}
+
+bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
+                               unsigned nr_replicas)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bpos end = pos;
+       struct bkey_s_c k;
+       bool ret = true;
+       int err;
+
+       end.offset += size;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos,
+                          BTREE_ITER_SLOTS, k, err) {
+               if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
+                       break;
+
+               if (nr_replicas > bch2_bkey_nr_ptrs_fully_allocated(k)) {
+                       ret = false;
+                       break;
+               }
+       }
+       bch2_trans_exit(&trans);
+
+       return ret;
+}
+
+static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
+                                          struct extent_ptr_decoded p)
+{
+       unsigned durability = 0;
+       struct bch_dev *ca;
+
+       if (p.ptr.cached)
+               return 0;
+
+       ca = bch_dev_bkey_exists(c, p.ptr.dev);
+
+       if (ca->mi.state != BCH_MEMBER_STATE_FAILED)
+               durability = max_t(unsigned, durability, ca->mi.durability);
+
+       if (p.has_ec) {
+               struct stripe *s =
+                       genradix_ptr(&c->stripes[0], p.ec.idx);
+
+               if (WARN_ON(!s))
+                       goto out;
+
+               durability += s->nr_redundant;
+       }
+out:
+       return durability;
+}
+
+unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       unsigned durability = 0;
+
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+               durability += bch2_extent_ptr_durability(c, p);
+
+       return durability;
+}
+
+void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k,
+                                   unsigned target,
+                                   unsigned nr_desired_replicas)
+{
+       struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+       union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas;
+
+       if (target && extra > 0)
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+                       int n = bch2_extent_ptr_durability(c, p);
+
+                       if (n && n <= extra &&
+                           !bch2_dev_in_target(c, p.ptr.dev, target)) {
+                               entry->ptr.cached = true;
+                               extra -= n;
+                       }
+               }
+
+       if (extra > 0)
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+                       int n = bch2_extent_ptr_durability(c, p);
+
+                       if (n && n <= extra) {
+                               entry->ptr.cached = true;
+                               extra -= n;
+                       }
+               }
+}
+
+void bch2_bkey_append_ptr(struct bkey_i *k,
+                         struct bch_extent_ptr ptr)
+{
+       EBUG_ON(bch2_bkey_has_device(bkey_i_to_s_c(k), ptr.dev));
+
+       switch (k->k.type) {
+       case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
+       case KEY_TYPE_extent:
+               EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX);
+
+               ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
+
+               memcpy((void *) &k->v + bkey_val_bytes(&k->k),
+                      &ptr,
+                      sizeof(ptr));
+               k->u64s++;
+               break;
+       default:
+               BUG();
+       }
+}
+
+static inline void __extent_entry_insert(struct bkey_i *k,
+                                        union bch_extent_entry *dst,
+                                        union bch_extent_entry *new)
+{
+       union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
+
+       memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
+                             dst, (u64 *) end - (u64 *) dst);
+       k->k.u64s += extent_entry_u64s(new);
+       memcpy(dst, new, extent_entry_bytes(new));
+}
+
+void bch2_extent_ptr_decoded_append(struct bkey_i *k,
+                                   struct extent_ptr_decoded *p)
+{
+       struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
+       struct bch_extent_crc_unpacked crc =
+               bch2_extent_crc_unpack(&k->k, NULL);
+       union bch_extent_entry *pos;
+
+       if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
+               pos = ptrs.start;
+               goto found;
+       }
+
+       bkey_for_each_crc(&k->k, ptrs, crc, pos)
+               if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
+                       pos = extent_entry_next(pos);
+                       goto found;
+               }
+
+       bch2_extent_crc_append(k, p->crc);
+       pos = bkey_val_end(bkey_i_to_s(k));
+found:
+       p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
+       __extent_entry_insert(k, pos, to_entry(&p->ptr));
+
+       if (p->has_ec) {
+               p->ec.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr;
+               __extent_entry_insert(k, pos, to_entry(&p->ec));
+       }
+}
+
+static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
+                                         union bch_extent_entry *entry)
+{
+       union bch_extent_entry *i = ptrs.start;
+
+       if (i == entry)
+               return NULL;
+
+       while (extent_entry_next(i) != entry)
+               i = extent_entry_next(i);
+       return i;
+}
+
+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
+                                          struct bch_extent_ptr *ptr)
+{
+       struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+       union bch_extent_entry *dst, *src, *prev;
+       bool drop_crc = true;
+
+       EBUG_ON(ptr < &ptrs.start->ptr ||
+               ptr >= &ptrs.end->ptr);
+       EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
+
+       src = extent_entry_next(to_entry(ptr));
+       if (src != ptrs.end &&
+           !extent_entry_is_crc(src))
+               drop_crc = false;
+
+       dst = to_entry(ptr);
+       while ((prev = extent_entry_prev(ptrs, dst))) {
+               if (extent_entry_is_ptr(prev))
+                       break;
+
+               if (extent_entry_is_crc(prev)) {
+                       if (drop_crc)
+                               dst = prev;
+                       break;
+               }
+
+               dst = prev;
+       }
+
+       memmove_u64s_down(dst, src,
+                         (u64 *) ptrs.end - (u64 *) src);
+       k.k->u64s -= (u64 *) src - (u64 *) dst;
+
+       return dst;
+}
+
+void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
+{
+       struct bch_extent_ptr *ptr;
+
+       bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev);
+}
+
+const struct bch_extent_ptr *
+bch2_bkey_has_device(struct bkey_s_c k, unsigned dev)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
+
+       bkey_for_each_ptr(ptrs, ptr)
+               if (ptr->dev == dev)
+                       return ptr;
+
+       return NULL;
+}
+
+bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
+
+       bkey_for_each_ptr(ptrs, ptr)
+               if (bch2_dev_in_target(c, ptr->dev, target) &&
+                   (!ptr->cached ||
+                    !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)))
+                       return true;
+
+       return false;
+}
+
+bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k,
+                          struct bch_extent_ptr m, u64 offset)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+               if (p.ptr.dev   == m.dev &&
+                   p.ptr.gen   == m.gen &&
+                   (s64) p.ptr.offset + p.crc.offset - bkey_start_offset(k.k) ==
+                   (s64) m.offset  - offset)
+                       return true;
+
+       return false;
+}
+
+/*
+ * bch_extent_normalize - clean up an extent, dropping stale pointers etc.
+ *
+ * Returns true if @k should be dropped entirely
+ *
+ * For existing keys, only called when btree nodes are being rewritten, not when
+ * they're merely being compacted/resorted in memory.
+ */
+bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
+{
+       struct bch_extent_ptr *ptr;
+
+       bch2_bkey_drop_ptrs(k, ptr,
+               ptr->cached &&
+               ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
+
+       /* will only happen if all pointers were cached: */
+       if (!bch2_bkey_nr_ptrs(k.s_c))
+               k.k->type = KEY_TYPE_discard;
+
+       return bkey_whiteout(k.k);
+}
+
+void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
+                           struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct bch_extent_crc_unpacked crc;
+       const struct bch_extent_ptr *ptr;
+       const struct bch_extent_stripe_ptr *ec;
+       struct bch_dev *ca;
+       bool first = true;
+
+       bkey_extent_entry_for_each(ptrs, entry) {
+               if (!first)
+                       pr_buf(out, " ");
+
+               switch (__extent_entry_type(entry)) {
+               case BCH_EXTENT_ENTRY_ptr:
+                       ptr = entry_to_ptr(entry);
+                       ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
+                               ? bch_dev_bkey_exists(c, ptr->dev)
+                               : NULL;
+
+                       pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev,
+                              (u64) ptr->offset, ptr->gen,
+                              ptr->cached ? " cached" : "",
+                              ca && ptr_stale(ca, ptr)
+                              ? " stale" : "");
+                       break;
+               case BCH_EXTENT_ENTRY_crc32:
+               case BCH_EXTENT_ENTRY_crc64:
+               case BCH_EXTENT_ENTRY_crc128:
+                       crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
+
+                       pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u",
+                              crc.compressed_size,
+                              crc.uncompressed_size,
+                              crc.offset, crc.nonce,
+                              crc.csum_type,
+                              crc.compression_type);
+                       break;
+               case BCH_EXTENT_ENTRY_stripe_ptr:
+                       ec = &entry->stripe_ptr;
+
+                       pr_buf(out, "ec: idx %llu block %u",
+                              (u64) ec->idx, ec->block);
+                       break;
+               default:
+                       pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
+                       return;
+               }
+
+               first = false;
+       }
+}
+
+static const char *extent_ptr_invalid(const struct bch_fs *c,
+                                     struct bkey_s_c k,
+                                     const struct bch_extent_ptr *ptr,
+                                     unsigned size_ondisk,
+                                     bool metadata)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr2;
+       struct bch_dev *ca;
+
+       if (!bch2_dev_exists2(c, ptr->dev))
+               return "pointer to invalid device";
+
+       ca = bch_dev_bkey_exists(c, ptr->dev);
+       if (!ca)
+               return "pointer to invalid device";
+
+       bkey_for_each_ptr(ptrs, ptr2)
+               if (ptr != ptr2 && ptr->dev == ptr2->dev)
+                       return "multiple pointers to same device";
+
+       if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets))
+               return "offset past end of device";
+
+       if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket))
+               return "offset before first bucket";
+
+       if (bucket_remainder(ca, ptr->offset) +
+           size_ondisk > ca->mi.bucket_size)
+               return "spans multiple buckets";
+
+       return NULL;
+}
+
+const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct bch_extent_crc_unpacked crc;
+       unsigned size_ondisk = k.k->size;
+       const char *reason;
+       unsigned nonce = UINT_MAX;
+
+       if (k.k->type == KEY_TYPE_btree_ptr)
+               size_ondisk = c->opts.btree_node_size;
+       if (k.k->type == KEY_TYPE_btree_ptr_v2)
+               size_ondisk = le16_to_cpu(bkey_s_c_to_btree_ptr_v2(k).v->sectors);
+
+       bkey_extent_entry_for_each(ptrs, entry) {
+               if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
+                       return "invalid extent entry type";
+
+               if (k.k->type == KEY_TYPE_btree_ptr &&
+                   !extent_entry_is_ptr(entry))
+                       return "has non ptr field";
+
+               switch (extent_entry_type(entry)) {
+               case BCH_EXTENT_ENTRY_ptr:
+                       reason = extent_ptr_invalid(c, k, &entry->ptr,
+                                                   size_ondisk, false);
+                       if (reason)
+                               return reason;
+                       break;
+               case BCH_EXTENT_ENTRY_crc32:
+               case BCH_EXTENT_ENTRY_crc64:
+               case BCH_EXTENT_ENTRY_crc128:
+                       crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
+
+                       if (crc.offset + crc.live_size >
+                           crc.uncompressed_size)
+                               return "checksum offset + key size > uncompressed size";
+
+                       size_ondisk = crc.compressed_size;
+
+                       if (!bch2_checksum_type_valid(c, crc.csum_type))
+                               return "invalid checksum type";
+
+                       if (crc.compression_type >= BCH_COMPRESSION_TYPE_NR)
+                               return "invalid compression type";
+
+                       if (bch2_csum_type_is_encryption(crc.csum_type)) {
+                               if (nonce == UINT_MAX)
+                                       nonce = crc.offset + crc.nonce;
+                               else if (nonce != crc.offset + crc.nonce)
+                                       return "incorrect nonce";
+                       }
+                       break;
+               case BCH_EXTENT_ENTRY_stripe_ptr:
+                       break;
+               }
+       }
+
+       return NULL;
+}
+
+void bch2_ptr_swab(struct bkey_s k)
+{
+       struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+       union bch_extent_entry *entry;
+       u64 *d;
+
+       for (d =  (u64 *) ptrs.start;
+            d != (u64 *) ptrs.end;
+            d++)
+               *d = swab64(*d);
+
+       for (entry = ptrs.start;
+            entry < ptrs.end;
+            entry = extent_entry_next(entry)) {
+               switch (extent_entry_type(entry)) {
+               case BCH_EXTENT_ENTRY_ptr:
+                       break;
+               case BCH_EXTENT_ENTRY_crc32:
+                       entry->crc32.csum = swab32(entry->crc32.csum);
+                       break;
+               case BCH_EXTENT_ENTRY_crc64:
+                       entry->crc64.csum_hi = swab16(entry->crc64.csum_hi);
+                       entry->crc64.csum_lo = swab64(entry->crc64.csum_lo);
+                       break;
+               case BCH_EXTENT_ENTRY_crc128:
+                       entry->crc128.csum.hi = (__force __le64)
+                               swab64((__force u64) entry->crc128.csum.hi);
+                       entry->crc128.csum.lo = (__force __le64)
+                               swab64((__force u64) entry->crc128.csum.lo);
+                       break;
+               case BCH_EXTENT_ENTRY_stripe_ptr:
+                       break;
+               }
+       }
+}
+
+/* Generic extent code: */
+
+int bch2_cut_front_s(struct bpos where, struct bkey_s k)
+{
+       unsigned new_val_u64s = bkey_val_u64s(k.k);
+       int val_u64s_delta;
+       u64 sub;
+
+       if (bkey_cmp(where, bkey_start_pos(k.k)) <= 0)
+               return 0;
+
+       EBUG_ON(bkey_cmp(where, k.k->p) > 0);
+
+       sub = where.offset - bkey_start_offset(k.k);
+
+       k.k->size -= sub;
+
+       if (!k.k->size) {
+               k.k->type = KEY_TYPE_deleted;
+               new_val_u64s = 0;
+       }
+
+       switch (k.k->type) {
+       case KEY_TYPE_extent:
+       case KEY_TYPE_reflink_v: {
+               struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+               union bch_extent_entry *entry;
+               bool seen_crc = false;
+
+               bkey_extent_entry_for_each(ptrs, entry) {
+                       switch (extent_entry_type(entry)) {
+                       case BCH_EXTENT_ENTRY_ptr:
+                               if (!seen_crc)
+                                       entry->ptr.offset += sub;
+                               break;
+                       case BCH_EXTENT_ENTRY_crc32:
+                               entry->crc32.offset += sub;
+                               break;
+                       case BCH_EXTENT_ENTRY_crc64:
+                               entry->crc64.offset += sub;
+                               break;
+                       case BCH_EXTENT_ENTRY_crc128:
+                               entry->crc128.offset += sub;
+                               break;
+                       case BCH_EXTENT_ENTRY_stripe_ptr:
+                               break;
+                       }
+
+                       if (extent_entry_is_crc(entry))
+                               seen_crc = true;
+               }
+
+               break;
+       }
+       case KEY_TYPE_reflink_p: {
+               struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k);
+
+               le64_add_cpu(&p.v->idx, sub);
+               break;
+       }
+       case KEY_TYPE_inline_data: {
+               struct bkey_s_inline_data d = bkey_s_to_inline_data(k);
+
+               sub = min_t(u64, sub << 9, bkey_val_bytes(d.k));
+
+               memmove(d.v->data,
+                       d.v->data + sub,
+                       bkey_val_bytes(d.k) - sub);
+
+               new_val_u64s -= sub >> 3;
+               break;
+       }
+       }
+
+       val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s;
+       BUG_ON(val_u64s_delta < 0);
+
+       set_bkey_val_u64s(k.k, new_val_u64s);
+       memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
+       return -val_u64s_delta;
+}
+
+int bch2_cut_back_s(struct bpos where, struct bkey_s k)
+{
+       unsigned new_val_u64s = bkey_val_u64s(k.k);
+       int val_u64s_delta;
+       u64 len = 0;
+
+       if (bkey_cmp(where, k.k->p) >= 0)
+               return 0;
+
+       EBUG_ON(bkey_cmp(where, bkey_start_pos(k.k)) < 0);
+
+       len = where.offset - bkey_start_offset(k.k);
+
+       k.k->p = where;
+       k.k->size = len;
+
+       if (!len) {
+               k.k->type = KEY_TYPE_deleted;
+               new_val_u64s = 0;
+       }
+
+       switch (k.k->type) {
+       case KEY_TYPE_inline_data:
+               new_val_u64s = min(new_val_u64s, k.k->size << 6);
+               break;
+       }
+
+       val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s;
+       BUG_ON(val_u64s_delta < 0);
+
+       set_bkey_val_u64s(k.k, new_val_u64s);
+       memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
+       return -val_u64s_delta;
+}
diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h
new file mode 100644 (file)
index 0000000..29b1536
--- /dev/null
@@ -0,0 +1,603 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_EXTENTS_H
+#define _BCACHEFS_EXTENTS_H
+
+#include "bcachefs.h"
+#include "bkey.h"
+#include "extents_types.h"
+
+struct bch_fs;
+struct btree_trans;
+
+/* extent entries: */
+
+#define extent_entry_last(_e)                                          \
+       ((typeof(&(_e).v->start[0])) bkey_val_end(_e))
+
+#define entry_to_ptr(_entry)                                           \
+({                                                                     \
+       EBUG_ON((_entry) && !extent_entry_is_ptr(_entry));              \
+                                                                       \
+       __builtin_choose_expr(                                          \
+               type_is_exact(_entry, const union bch_extent_entry *),  \
+               (const struct bch_extent_ptr *) (_entry),               \
+               (struct bch_extent_ptr *) (_entry));                    \
+})
+
+/* downcast, preserves const */
+#define to_entry(_entry)                                               \
+({                                                                     \
+       BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) &&        \
+                    !type_is(_entry, struct bch_extent_ptr *) &&       \
+                    !type_is(_entry, struct bch_extent_stripe_ptr *)); \
+                                                                       \
+       __builtin_choose_expr(                                          \
+               (type_is_exact(_entry, const union bch_extent_crc *) || \
+                type_is_exact(_entry, const struct bch_extent_ptr *) ||\
+                type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\
+               (const union bch_extent_entry *) (_entry),              \
+               (union bch_extent_entry *) (_entry));                   \
+})
+
+#define extent_entry_next(_entry)                                      \
+       ((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry)))
+
+static inline unsigned
+__extent_entry_type(const union bch_extent_entry *e)
+{
+       return e->type ? __ffs(e->type) : BCH_EXTENT_ENTRY_MAX;
+}
+
+static inline enum bch_extent_entry_type
+extent_entry_type(const union bch_extent_entry *e)
+{
+       int ret = __ffs(e->type);
+
+       EBUG_ON(ret < 0 || ret >= BCH_EXTENT_ENTRY_MAX);
+
+       return ret;
+}
+
+static inline size_t extent_entry_bytes(const union bch_extent_entry *entry)
+{
+       switch (extent_entry_type(entry)) {
+#define x(f, n)                                                \
+       case BCH_EXTENT_ENTRY_##f:                      \
+               return sizeof(struct bch_extent_##f);
+       BCH_EXTENT_ENTRY_TYPES()
+#undef x
+       default:
+               BUG();
+       }
+}
+
+static inline size_t extent_entry_u64s(const union bch_extent_entry *entry)
+{
+       return extent_entry_bytes(entry) / sizeof(u64);
+}
+
+static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
+{
+       switch (extent_entry_type(e)) {
+       case BCH_EXTENT_ENTRY_ptr:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
+{
+       switch (extent_entry_type(e)) {
+       case BCH_EXTENT_ENTRY_crc32:
+       case BCH_EXTENT_ENTRY_crc64:
+       case BCH_EXTENT_ENTRY_crc128:
+               return true;
+       default:
+               return false;
+       }
+}
+
+union bch_extent_crc {
+       u8                              type;
+       struct bch_extent_crc32         crc32;
+       struct bch_extent_crc64         crc64;
+       struct bch_extent_crc128        crc128;
+};
+
+#define __entry_to_crc(_entry)                                         \
+       __builtin_choose_expr(                                          \
+               type_is_exact(_entry, const union bch_extent_entry *),  \
+               (const union bch_extent_crc *) (_entry),                \
+               (union bch_extent_crc *) (_entry))
+
+#define entry_to_crc(_entry)                                           \
+({                                                                     \
+       EBUG_ON((_entry) && !extent_entry_is_crc(_entry));              \
+                                                                       \
+       __entry_to_crc(_entry);                                         \
+})
+
+static inline struct bch_extent_crc_unpacked
+bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
+{
+#define common_fields(_crc)                                            \
+               .csum_type              = _crc.csum_type,               \
+               .compression_type       = _crc.compression_type,        \
+               .compressed_size        = _crc._compressed_size + 1,    \
+               .uncompressed_size      = _crc._uncompressed_size + 1,  \
+               .offset                 = _crc.offset,                  \
+               .live_size              = k->size
+
+       if (!crc)
+               return (struct bch_extent_crc_unpacked) {
+                       .compressed_size        = k->size,
+                       .uncompressed_size      = k->size,
+                       .live_size              = k->size,
+               };
+
+       switch (extent_entry_type(to_entry(crc))) {
+       case BCH_EXTENT_ENTRY_crc32: {
+               struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
+                       common_fields(crc->crc32),
+               };
+
+               *((__le32 *) &ret.csum.lo) = crc->crc32.csum;
+
+               memcpy(&ret.csum.lo, &crc->crc32.csum,
+                      sizeof(crc->crc32.csum));
+
+               return ret;
+       }
+       case BCH_EXTENT_ENTRY_crc64: {
+               struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
+                       common_fields(crc->crc64),
+                       .nonce                  = crc->crc64.nonce,
+                       .csum.lo                = (__force __le64) crc->crc64.csum_lo,
+               };
+
+               *((__le16 *) &ret.csum.hi) = crc->crc64.csum_hi;
+
+               return ret;
+       }
+       case BCH_EXTENT_ENTRY_crc128: {
+               struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
+                       common_fields(crc->crc128),
+                       .nonce                  = crc->crc128.nonce,
+                       .csum                   = crc->crc128.csum,
+               };
+
+               return ret;
+       }
+       default:
+               BUG();
+       }
+#undef common_fields
+}
+
+static inline bool crc_is_compressed(struct bch_extent_crc_unpacked crc)
+{
+       return (crc.compression_type != BCH_COMPRESSION_TYPE_none &&
+               crc.compression_type != BCH_COMPRESSION_TYPE_incompressible);
+}
+
+/* bkey_ptrs: generically over any key type that has ptrs */
+
+struct bkey_ptrs_c {
+       const union bch_extent_entry    *start;
+       const union bch_extent_entry    *end;
+};
+
+struct bkey_ptrs {
+       union bch_extent_entry  *start;
+       union bch_extent_entry  *end;
+};
+
+static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
+{
+       switch (k.k->type) {
+       case KEY_TYPE_btree_ptr: {
+               struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k);
+               return (struct bkey_ptrs_c) {
+                       to_entry(&e.v->start[0]),
+                       to_entry(extent_entry_last(e))
+               };
+       }
+       case KEY_TYPE_extent: {
+               struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+               return (struct bkey_ptrs_c) {
+                       e.v->start,
+                       extent_entry_last(e)
+               };
+       }
+       case KEY_TYPE_stripe: {
+               struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+               return (struct bkey_ptrs_c) {
+                       to_entry(&s.v->ptrs[0]),
+                       to_entry(&s.v->ptrs[s.v->nr_blocks]),
+               };
+       }
+       case KEY_TYPE_reflink_v: {
+               struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
+
+               return (struct bkey_ptrs_c) {
+                       r.v->start,
+                       bkey_val_end(r),
+               };
+       }
+       case KEY_TYPE_btree_ptr_v2: {
+               struct bkey_s_c_btree_ptr_v2 e = bkey_s_c_to_btree_ptr_v2(k);
+               return (struct bkey_ptrs_c) {
+                       to_entry(&e.v->start[0]),
+                       to_entry(extent_entry_last(e))
+               };
+       }
+       default:
+               return (struct bkey_ptrs_c) { NULL, NULL };
+       }
+}
+
+static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k)
+{
+       struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k.s_c);
+
+       return (struct bkey_ptrs) {
+               (void *) p.start,
+               (void *) p.end
+       };
+}
+
+#define __bkey_extent_entry_for_each_from(_start, _end, _entry)                \
+       for ((_entry) = (_start);                                       \
+            (_entry) < (_end);                                         \
+            (_entry) = extent_entry_next(_entry))
+
+#define __bkey_ptr_next(_ptr, _end)                                    \
+({                                                                     \
+       typeof(_end) _entry;                                            \
+                                                                       \
+       __bkey_extent_entry_for_each_from(to_entry(_ptr), _end, _entry) \
+               if (extent_entry_is_ptr(_entry))                        \
+                       break;                                          \
+                                                                       \
+       _entry < (_end) ? entry_to_ptr(_entry) : NULL;                  \
+})
+
+#define bkey_extent_entry_for_each_from(_p, _entry, _start)            \
+       __bkey_extent_entry_for_each_from(_start, (_p).end, _entry)
+
+#define bkey_extent_entry_for_each(_p, _entry)                         \
+       bkey_extent_entry_for_each_from(_p, _entry, _p.start)
+
+#define __bkey_for_each_ptr(_start, _end, _ptr)                                \
+       for ((_ptr) = (_start);                                         \
+            ((_ptr) = __bkey_ptr_next(_ptr, _end));                    \
+            (_ptr)++)
+
+#define bkey_ptr_next(_p, _ptr)                                                \
+       __bkey_ptr_next(_ptr, (_p).end)
+
+#define bkey_for_each_ptr(_p, _ptr)                                    \
+       __bkey_for_each_ptr(&(_p).start->ptr, (_p).end, _ptr)
+
+#define __bkey_ptr_next_decode(_k, _end, _ptr, _entry)                 \
+({                                                                     \
+       __label__ out;                                                  \
+                                                                       \
+       (_ptr).idx      = 0;                                            \
+       (_ptr).has_ec   = false;                                        \
+                                                                       \
+       __bkey_extent_entry_for_each_from(_entry, _end, _entry)         \
+               switch (extent_entry_type(_entry)) {                    \
+               case BCH_EXTENT_ENTRY_ptr:                              \
+                       (_ptr).ptr              = _entry->ptr;          \
+                       goto out;                                       \
+               case BCH_EXTENT_ENTRY_crc32:                            \
+               case BCH_EXTENT_ENTRY_crc64:                            \
+               case BCH_EXTENT_ENTRY_crc128:                           \
+                       (_ptr).crc = bch2_extent_crc_unpack(_k,         \
+                                       entry_to_crc(_entry));          \
+                       break;                                          \
+               case BCH_EXTENT_ENTRY_stripe_ptr:                       \
+                       (_ptr).ec = _entry->stripe_ptr;                 \
+                       (_ptr).has_ec   = true;                         \
+                       break;                                          \
+               }                                                       \
+out:                                                                   \
+       _entry < (_end);                                                \
+})
+
+#define __bkey_for_each_ptr_decode(_k, _start, _end, _ptr, _entry)     \
+       for ((_ptr).crc = bch2_extent_crc_unpack(_k, NULL),             \
+            (_entry) = _start;                                         \
+            __bkey_ptr_next_decode(_k, _end, _ptr, _entry);            \
+            (_entry) = extent_entry_next(_entry))
+
+#define bkey_for_each_ptr_decode(_k, _p, _ptr, _entry)                 \
+       __bkey_for_each_ptr_decode(_k, (_p).start, (_p).end,            \
+                                  _ptr, _entry)
+
+#define bkey_crc_next(_k, _start, _end, _crc, _iter)                   \
+({                                                                     \
+       __bkey_extent_entry_for_each_from(_iter, _end, _iter)           \
+               if (extent_entry_is_crc(_iter)) {                       \
+                       (_crc) = bch2_extent_crc_unpack(_k,             \
+                                               entry_to_crc(_iter));   \
+                       break;                                          \
+               }                                                       \
+                                                                       \
+       (_iter) < (_end);                                               \
+})
+
+#define __bkey_for_each_crc(_k, _start, _end, _crc, _iter)             \
+       for ((_crc) = bch2_extent_crc_unpack(_k, NULL),                 \
+            (_iter) = (_start);                                        \
+            bkey_crc_next(_k, _start, _end, _crc, _iter);              \
+            (_iter) = extent_entry_next(_iter))
+
+#define bkey_for_each_crc(_k, _p, _crc, _iter)                         \
+       __bkey_for_each_crc(_k, (_p).start, (_p).end, _crc, _iter)
+
+/* Iterate over pointers in KEY_TYPE_extent: */
+
+#define extent_for_each_entry_from(_e, _entry, _start)                 \
+       __bkey_extent_entry_for_each_from(_start,                       \
+                               extent_entry_last(_e),_entry)
+
+#define extent_for_each_entry(_e, _entry)                              \
+       extent_for_each_entry_from(_e, _entry, (_e).v->start)
+
+#define extent_ptr_next(_e, _ptr)                                      \
+       __bkey_ptr_next(_ptr, extent_entry_last(_e))
+
+#define extent_for_each_ptr(_e, _ptr)                                  \
+       __bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr)
+
+#define extent_for_each_ptr_decode(_e, _ptr, _entry)                   \
+       __bkey_for_each_ptr_decode((_e).k, (_e).v->start,               \
+                                  extent_entry_last(_e), _ptr, _entry)
+
+/* utility code common to all keys with pointers: */
+
+void bch2_mark_io_failure(struct bch_io_failures *,
+                         struct extent_ptr_decoded *);
+int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
+                              struct bch_io_failures *,
+                              struct extent_ptr_decoded *);
+
+/* KEY_TYPE_btree_ptr: */
+
+const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_btree_ptr_debugcheck(struct bch_fs *, struct bkey_s_c);
+void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
+                           struct bkey_s_c);
+
+void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *,
+                           struct bkey_s_c);
+void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
+                             int, struct bkey_s);
+
+#define bch2_bkey_ops_btree_ptr (struct bkey_ops) {            \
+       .key_invalid    = bch2_btree_ptr_invalid,               \
+       .key_debugcheck = bch2_btree_ptr_debugcheck,            \
+       .val_to_text    = bch2_btree_ptr_to_text,               \
+       .swab           = bch2_ptr_swab,                        \
+}
+
+#define bch2_bkey_ops_btree_ptr_v2 (struct bkey_ops) {         \
+       .key_invalid    = bch2_btree_ptr_invalid,               \
+       .key_debugcheck = bch2_btree_ptr_debugcheck,            \
+       .val_to_text    = bch2_btree_ptr_v2_to_text,            \
+       .swab           = bch2_ptr_swab,                        \
+       .compat         = bch2_btree_ptr_v2_compat,             \
+}
+
+/* KEY_TYPE_extent: */
+
+const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_extent_debugcheck(struct bch_fs *, struct bkey_s_c);
+void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+enum merge_result bch2_extent_merge(struct bch_fs *,
+                                   struct bkey_s, struct bkey_s);
+
+#define bch2_bkey_ops_extent (struct bkey_ops) {               \
+       .key_invalid    = bch2_extent_invalid,                  \
+       .key_debugcheck = bch2_extent_debugcheck,               \
+       .val_to_text    = bch2_extent_to_text,                  \
+       .swab           = bch2_ptr_swab,                        \
+       .key_normalize  = bch2_extent_normalize,                \
+       .key_merge      = bch2_extent_merge,                    \
+}
+
+/* KEY_TYPE_reservation: */
+
+const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+enum merge_result bch2_reservation_merge(struct bch_fs *,
+                                        struct bkey_s, struct bkey_s);
+
+#define bch2_bkey_ops_reservation (struct bkey_ops) {          \
+       .key_invalid    = bch2_reservation_invalid,             \
+       .val_to_text    = bch2_reservation_to_text,             \
+       .key_merge      = bch2_reservation_merge,               \
+}
+
+/* Extent checksum entries: */
+
+bool bch2_can_narrow_extent_crcs(struct bkey_s_c,
+                                struct bch_extent_crc_unpacked);
+bool bch2_bkey_narrow_crcs(struct bkey_i *, struct bch_extent_crc_unpacked);
+void bch2_extent_crc_append(struct bkey_i *,
+                           struct bch_extent_crc_unpacked);
+
+/* Generic code for keys with pointers: */
+
+static inline bool bkey_extent_is_direct_data(const struct bkey *k)
+{
+       switch (k->type) {
+       case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
+       case KEY_TYPE_extent:
+       case KEY_TYPE_reflink_v:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static inline bool bkey_extent_is_data(const struct bkey *k)
+{
+       return bkey_extent_is_direct_data(k) ||
+               k->type == KEY_TYPE_inline_data ||
+               k->type == KEY_TYPE_reflink_p;
+}
+
+/*
+ * Should extent be counted under inode->i_sectors?
+ */
+static inline bool bkey_extent_is_allocation(const struct bkey *k)
+{
+       switch (k->type) {
+       case KEY_TYPE_extent:
+       case KEY_TYPE_reservation:
+       case KEY_TYPE_reflink_p:
+       case KEY_TYPE_reflink_v:
+       case KEY_TYPE_inline_data:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
+{
+       struct bch_devs_list ret = (struct bch_devs_list) { 0 };
+       struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
+
+       bkey_for_each_ptr(p, ptr)
+               ret.devs[ret.nr++] = ptr->dev;
+
+       return ret;
+}
+
+static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k)
+{
+       struct bch_devs_list ret = (struct bch_devs_list) { 0 };
+       struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
+
+       bkey_for_each_ptr(p, ptr)
+               if (!ptr->cached)
+                       ret.devs[ret.nr++] = ptr->dev;
+
+       return ret;
+}
+
+static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
+{
+       struct bch_devs_list ret = (struct bch_devs_list) { 0 };
+       struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
+
+       bkey_for_each_ptr(p, ptr)
+               if (ptr->cached)
+                       ret.devs[ret.nr++] = ptr->dev;
+
+       return ret;
+}
+
+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
+bool bch2_bkey_is_incompressible(struct bkey_s_c);
+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
+bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned);
+unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
+
+void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s,
+                                   unsigned, unsigned);
+
+void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr);
+void bch2_extent_ptr_decoded_append(struct bkey_i *,
+                                   struct extent_ptr_decoded *);
+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
+                                          struct bch_extent_ptr *);
+
+#define bch2_bkey_drop_ptrs(_k, _ptr, _cond)                           \
+do {                                                                   \
+       struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k);                    \
+                                                                       \
+       _ptr = &_ptrs.start->ptr;                                       \
+                                                                       \
+       while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) {                   \
+               if (_cond) {                                            \
+                       _ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr);   \
+                       _ptrs = bch2_bkey_ptrs(_k);                     \
+                       continue;                                       \
+               }                                                       \
+                                                                       \
+               (_ptr)++;                                               \
+       }                                                               \
+} while (0)
+
+void bch2_bkey_drop_device(struct bkey_s, unsigned);
+const struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s_c, unsigned);
+bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
+
+bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
+                          struct bch_extent_ptr, u64);
+
+bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
+void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
+                           struct bkey_s_c);
+const char *bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c);
+
+void bch2_ptr_swab(struct bkey_s);
+
+/* Generic extent code: */
+
+int bch2_cut_front_s(struct bpos, struct bkey_s);
+int bch2_cut_back_s(struct bpos, struct bkey_s);
+
+static inline void bch2_cut_front(struct bpos where, struct bkey_i *k)
+{
+       bch2_cut_front_s(where, bkey_i_to_s(k));
+}
+
+static inline void bch2_cut_back(struct bpos where, struct bkey_i *k)
+{
+       bch2_cut_back_s(where, bkey_i_to_s(k));
+}
+
+/**
+ * bch_key_resize - adjust size of @k
+ *
+ * bkey_start_offset(k) will be preserved, modifies where the extent ends
+ */
+static inline void bch2_key_resize(struct bkey *k, unsigned new_size)
+{
+       k->p.offset -= k->size;
+       k->p.offset += new_size;
+       k->size = new_size;
+}
+
+/*
+ * In extent_sort_fix_overlapping(), insert_fixup_extent(),
+ * extent_merge_inline() - we're modifying keys in place that are packed. To do
+ * that we have to unpack the key, modify the unpacked key - then this
+ * copies/repacks the unpacked to the original as necessary.
+ */
+static inline void extent_save(struct btree *b, struct bkey_packed *dst,
+                              struct bkey *src)
+{
+       struct bkey_format *f = &b->format;
+       struct bkey_i *dst_unpacked;
+
+       if ((dst_unpacked = packed_to_bkey(dst)))
+               dst_unpacked->k = *src;
+       else
+               BUG_ON(!bch2_bkey_pack_key(dst, src, f));
+}
+
+#endif /* _BCACHEFS_EXTENTS_H */
diff --git a/libbcachefs/extents_types.h b/libbcachefs/extents_types.h
new file mode 100644 (file)
index 0000000..43d6c34
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_EXTENTS_TYPES_H
+#define _BCACHEFS_EXTENTS_TYPES_H
+
+#include "bcachefs_format.h"
+
+struct bch_extent_crc_unpacked {
+       u32                     compressed_size;
+       u32                     uncompressed_size;
+       u32                     live_size;
+
+       u8                      csum_type;
+       u8                      compression_type;
+
+       u16                     offset;
+
+       u16                     nonce;
+
+       struct bch_csum         csum;
+};
+
+struct extent_ptr_decoded {
+       unsigned                        idx;
+       bool                            has_ec;
+       struct bch_extent_crc_unpacked  crc;
+       struct bch_extent_ptr           ptr;
+       struct bch_extent_stripe_ptr    ec;
+};
+
+struct bch_io_failures {
+       u8                      nr;
+       struct bch_dev_io_failures {
+               u8              dev;
+               u8              idx;
+               u8              nr_failed;
+               u8              nr_retries;
+       }                       devs[BCH_REPLICAS_MAX];
+};
+
+#endif /* _BCACHEFS_EXTENTS_TYPES_H */
diff --git a/libbcachefs/eytzinger.h b/libbcachefs/eytzinger.h
new file mode 100644 (file)
index 0000000..26d5cad
--- /dev/null
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _EYTZINGER_H
+#define _EYTZINGER_H
+
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "util.h"
+
+/*
+ * Traversal for trees in eytzinger layout - a full binary tree layed out in an
+ * array
+ */
+
+/*
+ * One based indexing version:
+ *
+ * With one based indexing each level of the tree starts at a power of two -
+ * good for cacheline alignment:
+ *
+ * Size parameter is treated as if we were using 0 based indexing, however:
+ * valid nodes, and inorder indices, are in the range [1..size) - that is, there
+ * are actually size - 1 elements
+ */
+
+static inline unsigned eytzinger1_child(unsigned i, unsigned child)
+{
+       EBUG_ON(child > 1);
+
+       return (i << 1) + child;
+}
+
+static inline unsigned eytzinger1_left_child(unsigned i)
+{
+       return eytzinger1_child(i, 0);
+}
+
+static inline unsigned eytzinger1_right_child(unsigned i)
+{
+       return eytzinger1_child(i, 1);
+}
+
+static inline unsigned eytzinger1_first(unsigned size)
+{
+       return rounddown_pow_of_two(size - 1);
+}
+
+static inline unsigned eytzinger1_last(unsigned size)
+{
+       return rounddown_pow_of_two(size) - 1;
+}
+
+/*
+ * eytzinger1_next() and eytzinger1_prev() have the nice properties that
+ *
+ * eytzinger1_next(0) == eytzinger1_first())
+ * eytzinger1_prev(0) == eytzinger1_last())
+ *
+ * eytzinger1_prev(eytzinger1_first()) == 0
+ * eytzinger1_next(eytzinger1_last()) == 0
+ */
+
+static inline unsigned eytzinger1_next(unsigned i, unsigned size)
+{
+       EBUG_ON(i >= size);
+
+       if (eytzinger1_right_child(i) < size) {
+               i = eytzinger1_right_child(i);
+
+               i <<= __fls(size) - __fls(i);
+               i >>= i >= size;
+       } else {
+               i >>= ffz(i) + 1;
+       }
+
+       return i;
+}
+
+static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
+{
+       EBUG_ON(i >= size);
+
+       if (eytzinger1_left_child(i) < size) {
+               i = eytzinger1_left_child(i) + 1;
+
+               i <<= __fls(size) - __fls(i);
+               i -= 1;
+               i >>= i >= size;
+       } else {
+               i >>= __ffs(i) + 1;
+       }
+
+       return i;
+}
+
+static inline unsigned eytzinger1_extra(unsigned size)
+{
+       return (size - rounddown_pow_of_two(size - 1)) << 1;
+}
+
+static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
+                                             unsigned extra)
+{
+       unsigned b = __fls(i);
+       unsigned shift = __fls(size - 1) - b;
+       int s;
+
+       EBUG_ON(!i || i >= size);
+
+       i  ^= 1U << b;
+       i <<= 1;
+       i  |= 1;
+       i <<= shift;
+
+       /*
+        * sign bit trick:
+        *
+        * if (i > extra)
+        *      i -= (i - extra) >> 1;
+        */
+       s = extra - i;
+       i += (s >> 1) & (s >> 31);
+
+       return i;
+}
+
+static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
+                                              unsigned extra)
+{
+       unsigned shift;
+       int s;
+
+       EBUG_ON(!i || i >= size);
+
+       /*
+        * sign bit trick:
+        *
+        * if (i > extra)
+        *      i += i - extra;
+        */
+       s = extra - i;
+       i -= s & (s >> 31);
+
+       shift = __ffs(i);
+
+       i >>= shift + 1;
+       i  |= 1U << (__fls(size - 1) - shift);
+
+       return i;
+}
+
+static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size)
+{
+       return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size));
+}
+
+static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
+{
+       return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size));
+}
+
+#define eytzinger1_for_each(_i, _size)                 \
+       for ((_i) = eytzinger1_first((_size));          \
+            (_i) != 0;                                 \
+            (_i) = eytzinger1_next((_i), (_size)))
+
+/* Zero based indexing version: */
+
+static inline unsigned eytzinger0_child(unsigned i, unsigned child)
+{
+       EBUG_ON(child > 1);
+
+       return (i << 1) + 1 + child;
+}
+
+static inline unsigned eytzinger0_left_child(unsigned i)
+{
+       return eytzinger0_child(i, 0);
+}
+
+static inline unsigned eytzinger0_right_child(unsigned i)
+{
+       return eytzinger0_child(i, 1);
+}
+
+static inline unsigned eytzinger0_first(unsigned size)
+{
+       return eytzinger1_first(size + 1) - 1;
+}
+
+static inline unsigned eytzinger0_last(unsigned size)
+{
+       return eytzinger1_last(size + 1) - 1;
+}
+
+static inline unsigned eytzinger0_next(unsigned i, unsigned size)
+{
+       return eytzinger1_next(i + 1, size + 1) - 1;
+}
+
+static inline unsigned eytzinger0_prev(unsigned i, unsigned size)
+{
+       return eytzinger1_prev(i + 1, size + 1) - 1;
+}
+
+static inline unsigned eytzinger0_extra(unsigned size)
+{
+       return eytzinger1_extra(size + 1);
+}
+
+static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size,
+                                              unsigned extra)
+{
+       return __eytzinger1_to_inorder(i + 1, size + 1, extra) - 1;
+}
+
+static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size,
+                                              unsigned extra)
+{
+       return __inorder_to_eytzinger1(i + 1, size + 1, extra) - 1;
+}
+
+static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size)
+{
+       return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size));
+}
+
+static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
+{
+       return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size));
+}
+
+#define eytzinger0_for_each(_i, _size)                 \
+       for ((_i) = eytzinger0_first((_size));          \
+            (_i) != -1;                                \
+            (_i) = eytzinger0_next((_i), (_size)))
+
+typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
+
+/* return greatest node <= @search, or -1 if not found */
+static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
+                                        eytzinger_cmp_fn cmp, const void *search)
+{
+       unsigned i, n = 0;
+
+       if (!nr)
+               return -1;
+
+       do {
+               i = n;
+               n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
+       } while (n < nr);
+
+       if (n & 1) {
+               /* @i was greater than @search, return previous node: */
+
+               if (i == eytzinger0_first(nr))
+                       return -1;
+
+               return eytzinger0_prev(i, nr);
+       } else {
+               return i;
+       }
+}
+
+#define eytzinger0_find(base, nr, size, _cmp, search)                  \
+({                                                                     \
+       void *_base     = (base);                                       \
+       void *_search   = (search);                                     \
+       size_t _nr      = (nr);                                         \
+       size_t _size    = (size);                                       \
+       size_t _i       = 0;                                            \
+       int _res;                                                       \
+                                                                       \
+       while (_i < _nr &&                                              \
+              (_res = _cmp(_search, _base + _i * _size, _size)))       \
+               _i = eytzinger0_child(_i, _res > 0);                    \
+       _i;                                                             \
+})
+
+void eytzinger0_sort(void *, size_t, size_t,
+                   int (*cmp_func)(const void *, const void *, size_t),
+                   void (*swap_func)(void *, void *, size_t));
+
+#endif /* _EYTZINGER_H */
diff --git a/libbcachefs/fifo.h b/libbcachefs/fifo.h
new file mode 100644 (file)
index 0000000..cdb2727
--- /dev/null
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FIFO_H
+#define _BCACHEFS_FIFO_H
+
+#include "util.h"
+
+#define FIFO(type)                                                     \
+struct {                                                               \
+       size_t front, back, size, mask;                                 \
+       type *data;                                                     \
+}
+
+#define DECLARE_FIFO(type, name)       FIFO(type) name
+
+#define fifo_buf_size(fifo)                                            \
+       ((fifo)->size                                                   \
+        ? roundup_pow_of_two((fifo)->size) * sizeof((fifo)->data[0])   \
+        : 0)
+
+#define init_fifo(fifo, _size, _gfp)                                   \
+({                                                                     \
+       (fifo)->front   = (fifo)->back = 0;                             \
+       (fifo)->size    = (_size);                                      \
+       (fifo)->mask    = (fifo)->size                                  \
+               ? roundup_pow_of_two((fifo)->size) - 1                  \
+               : 0;                                                    \
+       (fifo)->data    = kvpmalloc(fifo_buf_size(fifo), (_gfp));       \
+})
+
+#define free_fifo(fifo)                                                        \
+do {                                                                   \
+       kvpfree((fifo)->data, fifo_buf_size(fifo));                     \
+       (fifo)->data = NULL;                                            \
+} while (0)
+
+#define fifo_swap(l, r)                                                        \
+do {                                                                   \
+       swap((l)->front, (r)->front);                                   \
+       swap((l)->back, (r)->back);                                     \
+       swap((l)->size, (r)->size);                                     \
+       swap((l)->mask, (r)->mask);                                     \
+       swap((l)->data, (r)->data);                                     \
+} while (0)
+
+#define fifo_move(dest, src)                                           \
+do {                                                                   \
+       typeof(*((dest)->data)) _t;                                     \
+       while (!fifo_full(dest) &&                                      \
+              fifo_pop(src, _t))                                       \
+               fifo_push(dest, _t);                                    \
+} while (0)
+
+#define fifo_used(fifo)                (((fifo)->back - (fifo)->front))
+#define fifo_free(fifo)                ((fifo)->size - fifo_used(fifo))
+
+#define fifo_empty(fifo)       ((fifo)->front == (fifo)->back)
+#define fifo_full(fifo)                (fifo_used(fifo) == (fifo)->size)
+
+#define fifo_peek_front(fifo)  ((fifo)->data[(fifo)->front & (fifo)->mask])
+#define fifo_peek_back(fifo)   ((fifo)->data[((fifo)->back - 1) & (fifo)->mask])
+
+#define fifo_entry_idx_abs(fifo, p)                                    \
+       ((((p) >= &fifo_peek_front(fifo)                                \
+          ? (fifo)->front : (fifo)->back) & ~(fifo)->mask) +           \
+          (((p) - (fifo)->data)))
+
+#define fifo_entry_idx(fifo, p)        (((p) - &fifo_peek_front(fifo)) & (fifo)->mask)
+#define fifo_idx_entry(fifo, i)        (fifo)->data[((fifo)->front + (i)) & (fifo)->mask]
+
+#define fifo_push_back_ref(f)                                          \
+       (fifo_full((f)) ? NULL : &(f)->data[(f)->back++ & (f)->mask])
+
+#define fifo_push_front_ref(f)                                         \
+       (fifo_full((f)) ? NULL : &(f)->data[--(f)->front & (f)->mask])
+
+#define fifo_push_back(fifo, new)                                      \
+({                                                                     \
+       typeof((fifo)->data) _r = fifo_push_back_ref(fifo);             \
+       if (_r)                                                         \
+               *_r = (new);                                            \
+       _r != NULL;                                                     \
+})
+
+#define fifo_push_front(fifo, new)                                     \
+({                                                                     \
+       typeof((fifo)->data) _r = fifo_push_front_ref(fifo);            \
+       if (_r)                                                         \
+               *_r = (new);                                            \
+       _r != NULL;                                                     \
+})
+
+#define fifo_pop_front(fifo, i)                                                \
+({                                                                     \
+       bool _r = !fifo_empty((fifo));                                  \
+       if (_r)                                                         \
+               (i) = (fifo)->data[(fifo)->front++ & (fifo)->mask];     \
+       _r;                                                             \
+})
+
+#define fifo_pop_back(fifo, i)                                         \
+({                                                                     \
+       bool _r = !fifo_empty((fifo));                                  \
+       if (_r)                                                         \
+               (i) = (fifo)->data[--(fifo)->back & (fifo)->mask];      \
+       _r;                                                             \
+})
+
+#define fifo_push_ref(fifo)    fifo_push_back_ref(fifo)
+#define fifo_push(fifo, i)     fifo_push_back(fifo, (i))
+#define fifo_pop(fifo, i)      fifo_pop_front(fifo, (i))
+#define fifo_peek(fifo)                fifo_peek_front(fifo)
+
+#define fifo_for_each_entry(_entry, _fifo, _iter)                      \
+       for (typecheck(typeof((_fifo)->front), _iter),                  \
+            (_iter) = (_fifo)->front;                                  \
+            ((_iter != (_fifo)->back) &&                               \
+             (_entry = (_fifo)->data[(_iter) & (_fifo)->mask], true)); \
+            (_iter)++)
+
+#define fifo_for_each_entry_ptr(_ptr, _fifo, _iter)                    \
+       for (typecheck(typeof((_fifo)->front), _iter),                  \
+            (_iter) = (_fifo)->front;                                  \
+            ((_iter != (_fifo)->back) &&                               \
+             (_ptr = &(_fifo)->data[(_iter) & (_fifo)->mask], true));  \
+            (_iter)++)
+
+#endif /* _BCACHEFS_FIFO_H */
diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c
new file mode 100644 (file)
index 0000000..878419d
--- /dev/null
@@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "acl.h"
+#include "btree_update.h"
+#include "dirent.h"
+#include "fs-common.h"
+#include "inode.h"
+#include "xattr.h"
+
+#include <linux/posix_acl.h>
+
+int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
+                     struct bch_inode_unpacked *dir_u,
+                     struct bch_inode_unpacked *new_inode,
+                     const struct qstr *name,
+                     uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
+                     struct posix_acl *default_acl,
+                     struct posix_acl *acl)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter *dir_iter = NULL;
+       struct bch_hash_info hash = bch2_hash_info_init(c, new_inode);
+       u64 now = bch2_current_time(trans->c);
+       int ret;
+
+       dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(dir_iter);
+       if (ret)
+               goto err;
+
+       bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
+
+       if (!name)
+               new_inode->bi_flags |= BCH_INODE_UNLINKED;
+
+       ret = bch2_inode_create(trans, new_inode,
+                               BLOCKDEV_INODE_MAX, 0,
+                               &c->unused_inode_hint);
+       if (ret)
+               goto err;
+
+       if (default_acl) {
+               ret = bch2_set_acl_trans(trans, new_inode, &hash,
+                                        default_acl, ACL_TYPE_DEFAULT);
+               if (ret)
+                       goto err;
+       }
+
+       if (acl) {
+               ret = bch2_set_acl_trans(trans, new_inode, &hash,
+                                        acl, ACL_TYPE_ACCESS);
+               if (ret)
+                       goto err;
+       }
+
+       if (name) {
+               struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u);
+               dir_u->bi_mtime = dir_u->bi_ctime = now;
+
+               if (S_ISDIR(new_inode->bi_mode))
+                       dir_u->bi_nlink++;
+
+               ret = bch2_inode_write(trans, dir_iter, dir_u);
+               if (ret)
+                       goto err;
+
+               ret = bch2_dirent_create(trans, dir_inum, &dir_hash,
+                                        mode_to_type(new_inode->bi_mode),
+                                        name, new_inode->bi_inum,
+                                        BCH_HASH_SET_MUST_CREATE);
+               if (ret)
+                       goto err;
+       }
+err:
+       bch2_trans_iter_put(trans, dir_iter);
+       return ret;
+}
+
+int bch2_link_trans(struct btree_trans *trans, u64 dir_inum,
+                   u64 inum, struct bch_inode_unpacked *dir_u,
+                   struct bch_inode_unpacked *inode_u, const struct qstr *name)
+{
+       struct btree_iter *dir_iter = NULL, *inode_iter = NULL;
+       struct bch_hash_info dir_hash;
+       u64 now = bch2_current_time(trans->c);
+       int ret;
+
+       inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(inode_iter);
+       if (ret)
+               goto err;
+
+       inode_u->bi_ctime = now;
+       bch2_inode_nlink_inc(inode_u);
+
+       dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, 0);
+       ret = PTR_ERR_OR_ZERO(dir_iter);
+       if (ret)
+               goto err;
+
+       dir_u->bi_mtime = dir_u->bi_ctime = now;
+
+       dir_hash = bch2_hash_info_init(trans->c, dir_u);
+
+       ret =   bch2_dirent_create(trans, dir_inum, &dir_hash,
+                                 mode_to_type(inode_u->bi_mode),
+                                 name, inum, BCH_HASH_SET_MUST_CREATE) ?:
+               bch2_inode_write(trans, dir_iter, dir_u) ?:
+               bch2_inode_write(trans, inode_iter, inode_u);
+err:
+       bch2_trans_iter_put(trans, dir_iter);
+       bch2_trans_iter_put(trans, inode_iter);
+       return ret;
+}
+
+int bch2_unlink_trans(struct btree_trans *trans,
+                     u64 dir_inum, struct bch_inode_unpacked *dir_u,
+                     struct bch_inode_unpacked *inode_u,
+                     const struct qstr *name)
+{
+       struct btree_iter *dir_iter = NULL, *dirent_iter = NULL,
+                         *inode_iter = NULL;
+       struct bch_hash_info dir_hash;
+       u64 inum, now = bch2_current_time(trans->c);
+       struct bkey_s_c k;
+       int ret;
+
+       dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(dir_iter);
+       if (ret)
+               goto err;
+
+       dir_hash = bch2_hash_info_init(trans->c, dir_u);
+
+       dirent_iter = __bch2_dirent_lookup_trans(trans, dir_inum, &dir_hash,
+                                                name, BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(dirent_iter);
+       if (ret)
+               goto err;
+
+       k = bch2_btree_iter_peek_slot(dirent_iter);
+       inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
+
+       inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(inode_iter);
+       if (ret)
+               goto err;
+
+       dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
+       dir_u->bi_nlink -= S_ISDIR(inode_u->bi_mode);
+       bch2_inode_nlink_dec(inode_u);
+
+       ret =   (S_ISDIR(inode_u->bi_mode)
+                ? bch2_empty_dir_trans(trans, inum)
+                : 0) ?:
+               bch2_dirent_delete_at(trans, &dir_hash, dirent_iter) ?:
+               bch2_inode_write(trans, dir_iter, dir_u) ?:
+               bch2_inode_write(trans, inode_iter, inode_u);
+err:
+       bch2_trans_iter_put(trans, inode_iter);
+       bch2_trans_iter_put(trans, dirent_iter);
+       bch2_trans_iter_put(trans, dir_iter);
+       return ret;
+}
+
+bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
+                         struct bch_inode_unpacked *src_u)
+{
+       u64 src, dst;
+       unsigned id;
+       bool ret = false;
+
+       for (id = 0; id < Inode_opt_nr; id++) {
+               if (dst_u->bi_fields_set & (1 << id))
+                       continue;
+
+               src = bch2_inode_opt_get(src_u, id);
+               dst = bch2_inode_opt_get(dst_u, id);
+
+               if (src == dst)
+                       continue;
+
+               bch2_inode_opt_set(dst_u, id, src);
+               ret = true;
+       }
+
+       return ret;
+}
+
+int bch2_rename_trans(struct btree_trans *trans,
+                     u64 src_dir, struct bch_inode_unpacked *src_dir_u,
+                     u64 dst_dir, struct bch_inode_unpacked *dst_dir_u,
+                     struct bch_inode_unpacked *src_inode_u,
+                     struct bch_inode_unpacked *dst_inode_u,
+                     const struct qstr *src_name,
+                     const struct qstr *dst_name,
+                     enum bch_rename_mode mode)
+{
+       struct btree_iter *src_dir_iter = NULL, *dst_dir_iter = NULL;
+       struct btree_iter *src_inode_iter = NULL, *dst_inode_iter = NULL;
+       struct bch_hash_info src_hash, dst_hash;
+       u64 src_inode, dst_inode, now = bch2_current_time(trans->c);
+       int ret;
+
+       src_dir_iter = bch2_inode_peek(trans, src_dir_u, src_dir,
+                                      BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(src_dir_iter);
+       if (ret)
+               goto err;
+
+       src_hash = bch2_hash_info_init(trans->c, src_dir_u);
+
+       if (dst_dir != src_dir) {
+               dst_dir_iter = bch2_inode_peek(trans, dst_dir_u, dst_dir,
+                                              BTREE_ITER_INTENT);
+               ret = PTR_ERR_OR_ZERO(dst_dir_iter);
+               if (ret)
+                       goto err;
+
+               dst_hash = bch2_hash_info_init(trans->c, dst_dir_u);
+       } else {
+               dst_dir_u = src_dir_u;
+               dst_hash = src_hash;
+       }
+
+       ret = bch2_dirent_rename(trans,
+                                src_dir, &src_hash,
+                                dst_dir, &dst_hash,
+                                src_name, &src_inode,
+                                dst_name, &dst_inode,
+                                mode);
+       if (ret)
+               goto err;
+
+       src_inode_iter = bch2_inode_peek(trans, src_inode_u, src_inode,
+                                        BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(src_inode_iter);
+       if (ret)
+               goto err;
+
+       if (dst_inode) {
+               dst_inode_iter = bch2_inode_peek(trans, dst_inode_u, dst_inode,
+                                                BTREE_ITER_INTENT);
+               ret = PTR_ERR_OR_ZERO(dst_inode_iter);
+               if (ret)
+                       goto err;
+       }
+
+       if (mode == BCH_RENAME_OVERWRITE) {
+               if (S_ISDIR(src_inode_u->bi_mode) !=
+                   S_ISDIR(dst_inode_u->bi_mode)) {
+                       ret = -ENOTDIR;
+                       goto err;
+               }
+
+               if (S_ISDIR(dst_inode_u->bi_mode) &&
+                   bch2_empty_dir_trans(trans, dst_inode)) {
+                       ret = -ENOTEMPTY;
+                       goto err;
+               }
+       }
+
+       if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
+           S_ISDIR(src_inode_u->bi_mode)) {
+               ret = -EXDEV;
+               goto err;
+       }
+
+       if (mode == BCH_RENAME_EXCHANGE &&
+           bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
+           S_ISDIR(dst_inode_u->bi_mode)) {
+               ret = -EXDEV;
+               goto err;
+       }
+
+       if (S_ISDIR(src_inode_u->bi_mode)) {
+               src_dir_u->bi_nlink--;
+               dst_dir_u->bi_nlink++;
+       }
+
+       if (dst_inode && S_ISDIR(dst_inode_u->bi_mode)) {
+               dst_dir_u->bi_nlink--;
+               src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
+       }
+
+       if (mode == BCH_RENAME_OVERWRITE)
+               bch2_inode_nlink_dec(dst_inode_u);
+
+       src_dir_u->bi_mtime             = now;
+       src_dir_u->bi_ctime             = now;
+
+       if (src_dir != dst_dir) {
+               dst_dir_u->bi_mtime     = now;
+               dst_dir_u->bi_ctime     = now;
+       }
+
+       src_inode_u->bi_ctime           = now;
+
+       if (dst_inode)
+               dst_inode_u->bi_ctime   = now;
+
+       ret =   bch2_inode_write(trans, src_dir_iter, src_dir_u) ?:
+               (src_dir != dst_dir
+                ? bch2_inode_write(trans, dst_dir_iter, dst_dir_u)
+                : 0 ) ?:
+               bch2_inode_write(trans, src_inode_iter, src_inode_u) ?:
+               (dst_inode
+                ? bch2_inode_write(trans, dst_inode_iter, dst_inode_u)
+                : 0 );
+err:
+       bch2_trans_iter_put(trans, dst_inode_iter);
+       bch2_trans_iter_put(trans, src_inode_iter);
+       bch2_trans_iter_put(trans, dst_dir_iter);
+       bch2_trans_iter_put(trans, src_dir_iter);
+       return ret;
+}
diff --git a/libbcachefs/fs-common.h b/libbcachefs/fs-common.h
new file mode 100644 (file)
index 0000000..2273b79
--- /dev/null
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FS_COMMON_H
+#define _BCACHEFS_FS_COMMON_H
+
+struct posix_acl;
+
+int bch2_create_trans(struct btree_trans *, u64,
+                     struct bch_inode_unpacked *,
+                     struct bch_inode_unpacked *,
+                     const struct qstr *,
+                     uid_t, gid_t, umode_t, dev_t,
+                     struct posix_acl *,
+                     struct posix_acl *);
+
+int bch2_link_trans(struct btree_trans *, u64,
+                   u64, struct bch_inode_unpacked *,
+                   struct bch_inode_unpacked *,
+                   const struct qstr *);
+
+int bch2_unlink_trans(struct btree_trans *,
+                     u64, struct bch_inode_unpacked *,
+                     struct bch_inode_unpacked *,
+                     const struct qstr *);
+
+int bch2_rename_trans(struct btree_trans *,
+                     u64, struct bch_inode_unpacked *,
+                     u64, struct bch_inode_unpacked *,
+                     struct bch_inode_unpacked *,
+                     struct bch_inode_unpacked *,
+                     const struct qstr *,
+                     const struct qstr *,
+                     enum bch_rename_mode);
+
+bool bch2_reinherit_attrs(struct bch_inode_unpacked *,
+                         struct bch_inode_unpacked *);
+
+#endif /* _BCACHEFS_FS_COMMON_H */
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
new file mode 100644 (file)
index 0000000..4ceeafc
--- /dev/null
@@ -0,0 +1,3140 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_FS
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_on_stack.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "clock.h"
+#include "error.h"
+#include "extents.h"
+#include "extent_update.h"
+#include "fs.h"
+#include "fs-io.h"
+#include "fsck.h"
+#include "inode.h"
+#include "journal.h"
+#include "io.h"
+#include "keylist.h"
+#include "quota.h"
+#include "reflink.h"
+
+#include <linux/aio.h>
+#include <linux/backing-dev.h>
+#include <linux/falloc.h>
+#include <linux/migrate.h>
+#include <linux/mmu_context.h>
+#include <linux/pagevec.h>
+#include <linux/rmap.h>
+#include <linux/sched/signal.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
+
+#include <trace/events/bcachefs.h>
+#include <trace/events/writeback.h>
+
+struct quota_res {
+       u64                             sectors;
+};
+
+struct bch_writepage_io {
+       struct closure                  cl;
+       struct bch_inode_info           *inode;
+
+       /* must be last: */
+       struct bch_write_op             op;
+};
+
+struct dio_write {
+       struct completion               done;
+       struct kiocb                    *req;
+       struct mm_struct                *mm;
+       unsigned                        loop:1,
+                                       sync:1,
+                                       free_iov:1;
+       struct quota_res                quota_res;
+       u64                             written;
+
+       struct iov_iter                 iter;
+       struct iovec                    inline_vecs[2];
+
+       /* must be last: */
+       struct bch_write_op             op;
+};
+
+struct dio_read {
+       struct closure                  cl;
+       struct kiocb                    *req;
+       long                            ret;
+       struct bch_read_bio             rbio;
+};
+
+/* pagecache_block must be held */
+static int write_invalidate_inode_pages_range(struct address_space *mapping,
+                                             loff_t start, loff_t end)
+{
+       int ret;
+
+       /*
+        * XXX: the way this is currently implemented, we can spin if a process
+        * is continually redirtying a specific page
+        */
+       do {
+               if (!mapping->nrpages &&
+                   !mapping->nrexceptional)
+                       return 0;
+
+               ret = filemap_write_and_wait_range(mapping, start, end);
+               if (ret)
+                       break;
+
+               if (!mapping->nrpages)
+                       return 0;
+
+               ret = invalidate_inode_pages2_range(mapping,
+                               start >> PAGE_SHIFT,
+                               end >> PAGE_SHIFT);
+       } while (ret == -EBUSY);
+
+       return ret;
+}
+
+/* quotas */
+
+#ifdef CONFIG_BCACHEFS_QUOTA
+
+static void bch2_quota_reservation_put(struct bch_fs *c,
+                                      struct bch_inode_info *inode,
+                                      struct quota_res *res)
+{
+       if (!res->sectors)
+               return;
+
+       mutex_lock(&inode->ei_quota_lock);
+       BUG_ON(res->sectors > inode->ei_quota_reserved);
+
+       bch2_quota_acct(c, inode->ei_qid, Q_SPC,
+                       -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC);
+       inode->ei_quota_reserved -= res->sectors;
+       mutex_unlock(&inode->ei_quota_lock);
+
+       res->sectors = 0;
+}
+
+static int bch2_quota_reservation_add(struct bch_fs *c,
+                                     struct bch_inode_info *inode,
+                                     struct quota_res *res,
+                                     unsigned sectors,
+                                     bool check_enospc)
+{
+       int ret;
+
+       mutex_lock(&inode->ei_quota_lock);
+       ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors,
+                             check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK);
+       if (likely(!ret)) {
+               inode->ei_quota_reserved += sectors;
+               res->sectors += sectors;
+       }
+       mutex_unlock(&inode->ei_quota_lock);
+
+       return ret;
+}
+
+#else
+
+static void bch2_quota_reservation_put(struct bch_fs *c,
+                                      struct bch_inode_info *inode,
+                                      struct quota_res *res)
+{
+}
+
+static int bch2_quota_reservation_add(struct bch_fs *c,
+                                     struct bch_inode_info *inode,
+                                     struct quota_res *res,
+                                     unsigned sectors,
+                                     bool check_enospc)
+{
+       return 0;
+}
+
+#endif
+
+/* i_size updates: */
+
+struct inode_new_size {
+       loff_t          new_size;
+       u64             now;
+       unsigned        fields;
+};
+
+static int inode_set_size(struct bch_inode_info *inode,
+                         struct bch_inode_unpacked *bi,
+                         void *p)
+{
+       struct inode_new_size *s = p;
+
+       bi->bi_size = s->new_size;
+       if (s->fields & ATTR_ATIME)
+               bi->bi_atime = s->now;
+       if (s->fields & ATTR_MTIME)
+               bi->bi_mtime = s->now;
+       if (s->fields & ATTR_CTIME)
+               bi->bi_ctime = s->now;
+
+       return 0;
+}
+
+int __must_check bch2_write_inode_size(struct bch_fs *c,
+                                      struct bch_inode_info *inode,
+                                      loff_t new_size, unsigned fields)
+{
+       struct inode_new_size s = {
+               .new_size       = new_size,
+               .now            = bch2_current_time(c),
+               .fields         = fields,
+       };
+
+       return bch2_write_inode(c, inode, inode_set_size, &s, fields);
+}
+
+static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
+                          struct quota_res *quota_res, s64 sectors)
+{
+       if (!sectors)
+               return;
+
+       mutex_lock(&inode->ei_quota_lock);
+#ifdef CONFIG_BCACHEFS_QUOTA
+       if (quota_res && sectors > 0) {
+               BUG_ON(sectors > quota_res->sectors);
+               BUG_ON(sectors > inode->ei_quota_reserved);
+
+               quota_res->sectors -= sectors;
+               inode->ei_quota_reserved -= sectors;
+       } else {
+               bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN);
+       }
+#endif
+       inode->v.i_blocks += sectors;
+       mutex_unlock(&inode->ei_quota_lock);
+}
+
+/* page state: */
+
+/* stored in page->private: */
+
+struct bch_page_sector {
+       /* Uncompressed, fully allocated replicas: */
+       unsigned                nr_replicas:3;
+
+       /* Owns PAGE_SECTORS * replicas_reserved sized reservation: */
+       unsigned                replicas_reserved:3;
+
+       /* i_sectors: */
+       enum {
+               SECTOR_UNALLOCATED,
+               SECTOR_RESERVED,
+               SECTOR_DIRTY,
+               SECTOR_ALLOCATED,
+       }                       state:2;
+};
+
+struct bch_page_state {
+       spinlock_t              lock;
+       atomic_t                write_count;
+       struct bch_page_sector  s[PAGE_SECTORS];
+};
+
+static inline struct bch_page_state *__bch2_page_state(struct page *page)
+{
+       return page_has_private(page)
+               ? (struct bch_page_state *) page_private(page)
+               : NULL;
+}
+
+static inline struct bch_page_state *bch2_page_state(struct page *page)
+{
+       EBUG_ON(!PageLocked(page));
+
+       return __bch2_page_state(page);
+}
+
+/* for newly allocated pages: */
+static void __bch2_page_state_release(struct page *page)
+{
+       struct bch_page_state *s = __bch2_page_state(page);
+
+       if (!s)
+               return;
+
+       ClearPagePrivate(page);
+       set_page_private(page, 0);
+       put_page(page);
+       kfree(s);
+}
+
+static void bch2_page_state_release(struct page *page)
+{
+       struct bch_page_state *s = bch2_page_state(page);
+
+       if (!s)
+               return;
+
+       ClearPagePrivate(page);
+       set_page_private(page, 0);
+       put_page(page);
+       kfree(s);
+}
+
+/* for newly allocated pages: */
+static struct bch_page_state *__bch2_page_state_create(struct page *page,
+                                                      gfp_t gfp)
+{
+       struct bch_page_state *s;
+
+       s = kzalloc(sizeof(*s), GFP_NOFS|gfp);
+       if (!s)
+               return NULL;
+
+       spin_lock_init(&s->lock);
+       /*
+        * migrate_page_move_mapping() assumes that pages with private data
+        * have their count elevated by 1.
+        */
+       get_page(page);
+       set_page_private(page, (unsigned long) s);
+       SetPagePrivate(page);
+       return s;
+}
+
+static struct bch_page_state *bch2_page_state_create(struct page *page,
+                                                    gfp_t gfp)
+{
+       return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
+}
+
+static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode)
+{
+       /* XXX: this should not be open coded */
+       return inode->ei_inode.bi_data_replicas
+               ? inode->ei_inode.bi_data_replicas - 1
+               : c->opts.data_replicas;
+}
+
+static inline unsigned sectors_to_reserve(struct bch_page_sector *s,
+                                                 unsigned nr_replicas)
+{
+       return max(0, (int) nr_replicas -
+                  s->nr_replicas -
+                  s->replicas_reserved);
+}
+
+static int bch2_get_page_disk_reservation(struct bch_fs *c,
+                               struct bch_inode_info *inode,
+                               struct page *page, bool check_enospc)
+{
+       struct bch_page_state *s = bch2_page_state_create(page, 0);
+       unsigned nr_replicas = inode_nr_replicas(c, inode);
+       struct disk_reservation disk_res = { 0 };
+       unsigned i, disk_res_sectors = 0;
+       int ret;
+
+       if (!s)
+               return -ENOMEM;
+
+       for (i = 0; i < ARRAY_SIZE(s->s); i++)
+               disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
+
+       if (!disk_res_sectors)
+               return 0;
+
+       ret = bch2_disk_reservation_get(c, &disk_res,
+                                       disk_res_sectors, 1,
+                                       !check_enospc
+                                       ? BCH_DISK_RESERVATION_NOFAIL
+                                       : 0);
+       if (unlikely(ret))
+               return ret;
+
+       for (i = 0; i < ARRAY_SIZE(s->s); i++)
+               s->s[i].replicas_reserved +=
+                       sectors_to_reserve(&s->s[i], nr_replicas);
+
+       return 0;
+}
+
+struct bch2_page_reservation {
+       struct disk_reservation disk;
+       struct quota_res        quota;
+};
+
+static void bch2_page_reservation_init(struct bch_fs *c,
+                       struct bch_inode_info *inode,
+                       struct bch2_page_reservation *res)
+{
+       memset(res, 0, sizeof(*res));
+
+       res->disk.nr_replicas = inode_nr_replicas(c, inode);
+}
+
+static void bch2_page_reservation_put(struct bch_fs *c,
+                       struct bch_inode_info *inode,
+                       struct bch2_page_reservation *res)
+{
+       bch2_disk_reservation_put(c, &res->disk);
+       bch2_quota_reservation_put(c, inode, &res->quota);
+}
+
+static int bch2_page_reservation_get(struct bch_fs *c,
+                       struct bch_inode_info *inode, struct page *page,
+                       struct bch2_page_reservation *res,
+                       unsigned offset, unsigned len, bool check_enospc)
+{
+       struct bch_page_state *s = bch2_page_state_create(page, 0);
+       unsigned i, disk_sectors = 0, quota_sectors = 0;
+       int ret;
+
+       if (!s)
+               return -ENOMEM;
+
+       for (i = round_down(offset, block_bytes(c)) >> 9;
+            i < round_up(offset + len, block_bytes(c)) >> 9;
+            i++) {
+               disk_sectors += sectors_to_reserve(&s->s[i],
+                                               res->disk.nr_replicas);
+               quota_sectors += s->s[i].state == SECTOR_UNALLOCATED;
+       }
+
+       if (disk_sectors) {
+               ret = bch2_disk_reservation_add(c, &res->disk,
+                                               disk_sectors,
+                                               !check_enospc
+                                               ? BCH_DISK_RESERVATION_NOFAIL
+                                               : 0);
+               if (unlikely(ret))
+                       return ret;
+       }
+
+       if (quota_sectors) {
+               ret = bch2_quota_reservation_add(c, inode, &res->quota,
+                                                quota_sectors,
+                                                check_enospc);
+               if (unlikely(ret)) {
+                       struct disk_reservation tmp = {
+                               .sectors = disk_sectors
+                       };
+
+                       bch2_disk_reservation_put(c, &tmp);
+                       res->disk.sectors -= disk_sectors;
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static void bch2_clear_page_bits(struct page *page)
+{
+       struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct bch_page_state *s = bch2_page_state(page);
+       struct disk_reservation disk_res = { 0 };
+       int i, dirty_sectors = 0;
+
+       if (!s)
+               return;
+
+       EBUG_ON(!PageLocked(page));
+       EBUG_ON(PageWriteback(page));
+
+       for (i = 0; i < ARRAY_SIZE(s->s); i++) {
+               disk_res.sectors += s->s[i].replicas_reserved;
+               s->s[i].replicas_reserved = 0;
+
+               if (s->s[i].state == SECTOR_DIRTY) {
+                       dirty_sectors++;
+                       s->s[i].state = SECTOR_UNALLOCATED;
+               }
+       }
+
+       bch2_disk_reservation_put(c, &disk_res);
+
+       if (dirty_sectors)
+               i_sectors_acct(c, inode, NULL, -dirty_sectors);
+
+       bch2_page_state_release(page);
+}
+
+static void bch2_set_page_dirty(struct bch_fs *c,
+                       struct bch_inode_info *inode, struct page *page,
+                       struct bch2_page_reservation *res,
+                       unsigned offset, unsigned len)
+{
+       struct bch_page_state *s = bch2_page_state(page);
+       unsigned i, dirty_sectors = 0;
+
+       WARN_ON((u64) page_offset(page) + offset + len >
+               round_up((u64) i_size_read(&inode->v), block_bytes(c)));
+
+       spin_lock(&s->lock);
+
+       for (i = round_down(offset, block_bytes(c)) >> 9;
+            i < round_up(offset + len, block_bytes(c)) >> 9;
+            i++) {
+               unsigned sectors = sectors_to_reserve(&s->s[i],
+                                               res->disk.nr_replicas);
+
+               /*
+                * This can happen if we race with the error path in
+                * bch2_writepage_io_done():
+                */
+               sectors = min_t(unsigned, sectors, res->disk.sectors);
+
+               s->s[i].replicas_reserved += sectors;
+               res->disk.sectors -= sectors;
+
+               if (s->s[i].state == SECTOR_UNALLOCATED)
+                       dirty_sectors++;
+
+               s->s[i].state = max_t(unsigned, s->s[i].state, SECTOR_DIRTY);
+       }
+
+       spin_unlock(&s->lock);
+
+       if (dirty_sectors)
+               i_sectors_acct(c, inode, &res->quota, dirty_sectors);
+
+       if (!PageDirty(page))
+               __set_page_dirty_nobuffers(page);
+}
+
+vm_fault_t bch2_page_fault(struct vm_fault *vmf)
+{
+       struct file *file = vmf->vma->vm_file;
+       struct bch_inode_info *inode = file_bch_inode(file);
+       int ret;
+
+       bch2_pagecache_add_get(&inode->ei_pagecache_lock);
+       ret = filemap_fault(vmf);
+       bch2_pagecache_add_put(&inode->ei_pagecache_lock);
+
+       return ret;
+}
+
+vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
+{
+       struct page *page = vmf->page;
+       struct file *file = vmf->vma->vm_file;
+       struct bch_inode_info *inode = file_bch_inode(file);
+       struct address_space *mapping = file->f_mapping;
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct bch2_page_reservation res;
+       unsigned len;
+       loff_t isize;
+       int ret = VM_FAULT_LOCKED;
+
+       bch2_page_reservation_init(c, inode, &res);
+
+       sb_start_pagefault(inode->v.i_sb);
+       file_update_time(file);
+
+       /*
+        * Not strictly necessary, but helps avoid dio writes livelocking in
+        * write_invalidate_inode_pages_range() - can drop this if/when we get
+        * a write_invalidate_inode_pages_range() that works without dropping
+        * page lock before invalidating page
+        */
+       bch2_pagecache_add_get(&inode->ei_pagecache_lock);
+
+       lock_page(page);
+       isize = i_size_read(&inode->v);
+
+       if (page->mapping != mapping || page_offset(page) >= isize) {
+               unlock_page(page);
+               ret = VM_FAULT_NOPAGE;
+               goto out;
+       }
+
+       len = min_t(loff_t, PAGE_SIZE, isize - page_offset(page));
+
+       if (bch2_page_reservation_get(c, inode, page, &res, 0, len, true)) {
+               unlock_page(page);
+               ret = VM_FAULT_SIGBUS;
+               goto out;
+       }
+
+       bch2_set_page_dirty(c, inode, page, &res, 0, len);
+       bch2_page_reservation_put(c, inode, &res);
+
+       wait_for_stable_page(page);
+out:
+       bch2_pagecache_add_put(&inode->ei_pagecache_lock);
+       sb_end_pagefault(inode->v.i_sb);
+
+       return ret;
+}
+
+void bch2_invalidatepage(struct page *page, unsigned int offset,
+                        unsigned int length)
+{
+       if (offset || length < PAGE_SIZE)
+               return;
+
+       bch2_clear_page_bits(page);
+}
+
+int bch2_releasepage(struct page *page, gfp_t gfp_mask)
+{
+       if (PageDirty(page))
+               return 0;
+
+       bch2_clear_page_bits(page);
+       return 1;
+}
+
+#ifdef CONFIG_MIGRATION
+int bch2_migrate_page(struct address_space *mapping, struct page *newpage,
+                     struct page *page, enum migrate_mode mode)
+{
+       int ret;
+
+       EBUG_ON(!PageLocked(page));
+       EBUG_ON(!PageLocked(newpage));
+
+       ret = migrate_page_move_mapping(mapping, newpage, page, 0);
+       if (ret != MIGRATEPAGE_SUCCESS)
+               return ret;
+
+       if (PagePrivate(page)) {
+               ClearPagePrivate(page);
+               get_page(newpage);
+               set_page_private(newpage, page_private(page));
+               set_page_private(page, 0);
+               put_page(page);
+               SetPagePrivate(newpage);
+       }
+
+       if (mode != MIGRATE_SYNC_NO_COPY)
+               migrate_page_copy(newpage, page);
+       else
+               migrate_page_states(newpage, page);
+       return MIGRATEPAGE_SUCCESS;
+}
+#endif
+
+/* readpage(s): */
+
+static void bch2_readpages_end_io(struct bio *bio)
+{
+       struct bvec_iter_all iter;
+       struct bio_vec *bv;
+
+       bio_for_each_segment_all(bv, bio, iter) {
+               struct page *page = bv->bv_page;
+
+               if (!bio->bi_status) {
+                       SetPageUptodate(page);
+               } else {
+                       ClearPageUptodate(page);
+                       SetPageError(page);
+               }
+               unlock_page(page);
+       }
+
+       bio_put(bio);
+}
+
+static inline void page_state_init_for_read(struct page *page)
+{
+       SetPagePrivate(page);
+       page->private = 0;
+}
+
+struct readpages_iter {
+       struct address_space    *mapping;
+       struct page             **pages;
+       unsigned                nr_pages;
+       unsigned                nr_added;
+       unsigned                idx;
+       pgoff_t                 offset;
+};
+
+static int readpages_iter_init(struct readpages_iter *iter,
+                              struct address_space *mapping,
+                              struct list_head *pages, unsigned nr_pages)
+{
+       memset(iter, 0, sizeof(*iter));
+
+       iter->mapping   = mapping;
+       iter->offset    = list_last_entry(pages, struct page, lru)->index;
+
+       iter->pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS);
+       if (!iter->pages)
+               return -ENOMEM;
+
+       while (!list_empty(pages)) {
+               struct page *page = list_last_entry(pages, struct page, lru);
+
+               __bch2_page_state_create(page, __GFP_NOFAIL);
+
+               iter->pages[iter->nr_pages++] = page;
+               list_del(&page->lru);
+       }
+
+       return 0;
+}
+
+static inline struct page *readpage_iter_next(struct readpages_iter *iter)
+{
+       struct page *page;
+       unsigned i;
+       int ret;
+
+       BUG_ON(iter->idx > iter->nr_added);
+       BUG_ON(iter->nr_added > iter->nr_pages);
+
+       if (iter->idx < iter->nr_added)
+               goto out;
+
+       while (1) {
+               if (iter->idx == iter->nr_pages)
+                       return NULL;
+
+               ret = add_to_page_cache_lru_vec(iter->mapping,
+                               iter->pages     + iter->nr_added,
+                               iter->nr_pages  - iter->nr_added,
+                               iter->offset    + iter->nr_added,
+                               GFP_NOFS);
+               if (ret > 0)
+                       break;
+
+               page = iter->pages[iter->nr_added];
+               iter->idx++;
+               iter->nr_added++;
+
+               __bch2_page_state_release(page);
+               put_page(page);
+       }
+
+       iter->nr_added += ret;
+
+       for (i = iter->idx; i < iter->nr_added; i++)
+               put_page(iter->pages[i]);
+out:
+       EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx);
+
+       return iter->pages[iter->idx];
+}
+
+static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
+{
+       struct bvec_iter iter;
+       struct bio_vec bv;
+       unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
+               ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
+       unsigned state = k.k->type == KEY_TYPE_reservation
+               ? SECTOR_RESERVED
+               : SECTOR_ALLOCATED;
+
+       bio_for_each_segment(bv, bio, iter) {
+               struct bch_page_state *s = bch2_page_state(bv.bv_page);
+               unsigned i;
+
+               for (i = bv.bv_offset >> 9;
+                    i < (bv.bv_offset + bv.bv_len) >> 9;
+                    i++) {
+                       s->s[i].nr_replicas = nr_ptrs;
+                       s->s[i].state = state;
+               }
+       }
+}
+
+static bool extent_partial_reads_expensive(struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       struct bch_extent_crc_unpacked crc;
+       const union bch_extent_entry *i;
+
+       bkey_for_each_crc(k.k, ptrs, crc, i)
+               if (crc.csum_type || crc.compression_type)
+                       return true;
+       return false;
+}
+
+static void readpage_bio_extend(struct readpages_iter *iter,
+                               struct bio *bio,
+                               unsigned sectors_this_extent,
+                               bool get_more)
+{
+       while (bio_sectors(bio) < sectors_this_extent &&
+              bio->bi_vcnt < bio->bi_max_vecs) {
+               pgoff_t page_offset = bio_end_sector(bio) >> PAGE_SECTOR_SHIFT;
+               struct page *page = readpage_iter_next(iter);
+               int ret;
+
+               if (page) {
+                       if (iter->offset + iter->idx != page_offset)
+                               break;
+
+                       iter->idx++;
+               } else {
+                       if (!get_more)
+                               break;
+
+                       page = xa_load(&iter->mapping->i_pages, page_offset);
+                       if (page && !xa_is_value(page))
+                               break;
+
+                       page = __page_cache_alloc(readahead_gfp_mask(iter->mapping));
+                       if (!page)
+                               break;
+
+                       if (!__bch2_page_state_create(page, 0)) {
+                               put_page(page);
+                               break;
+                       }
+
+                       ret = add_to_page_cache_lru(page, iter->mapping,
+                                                   page_offset, GFP_NOFS);
+                       if (ret) {
+                               __bch2_page_state_release(page);
+                               put_page(page);
+                               break;
+                       }
+
+                       put_page(page);
+               }
+
+               BUG_ON(!bio_add_page(bio, page, PAGE_SIZE, 0));
+       }
+}
+
+static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
+                      struct bch_read_bio *rbio, u64 inum,
+                      struct readpages_iter *readpages_iter)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_on_stack sk;
+       int flags = BCH_READ_RETRY_IF_STALE|
+               BCH_READ_MAY_PROMOTE;
+       int ret = 0;
+
+       rbio->c = c;
+       rbio->start_time = local_clock();
+
+       bkey_on_stack_init(&sk);
+retry:
+       while (1) {
+               struct bkey_s_c k;
+               unsigned bytes, sectors, offset_into_extent;
+
+               bch2_btree_iter_set_pos(iter,
+                               POS(inum, rbio->bio.bi_iter.bi_sector));
+
+               k = bch2_btree_iter_peek_slot(iter);
+               ret = bkey_err(k);
+               if (ret)
+                       break;
+
+               bkey_on_stack_reassemble(&sk, c, k);
+               k = bkey_i_to_s_c(sk.k);
+
+               offset_into_extent = iter->pos.offset -
+                       bkey_start_offset(k.k);
+               sectors = k.k->size - offset_into_extent;
+
+               ret = bch2_read_indirect_extent(trans,
+                                       &offset_into_extent, &sk);
+               if (ret)
+                       break;
+
+               sectors = min(sectors, k.k->size - offset_into_extent);
+
+               bch2_trans_unlock(trans);
+
+               if (readpages_iter)
+                       readpage_bio_extend(readpages_iter, &rbio->bio, sectors,
+                                           extent_partial_reads_expensive(k));
+
+               bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
+               swap(rbio->bio.bi_iter.bi_size, bytes);
+
+               if (rbio->bio.bi_iter.bi_size == bytes)
+                       flags |= BCH_READ_LAST_FRAGMENT;
+
+               if (bkey_extent_is_allocation(k.k))
+                       bch2_add_page_sectors(&rbio->bio, k);
+
+               bch2_read_extent(trans, rbio, k, offset_into_extent, flags);
+
+               if (flags & BCH_READ_LAST_FRAGMENT)
+                       break;
+
+               swap(rbio->bio.bi_iter.bi_size, bytes);
+               bio_advance(&rbio->bio, bytes);
+       }
+
+       if (ret == -EINTR)
+               goto retry;
+
+       if (ret) {
+               bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
+               bio_endio(&rbio->bio);
+       }
+
+       bkey_on_stack_exit(&sk, c);
+}
+
+int bch2_readpages(struct file *file, struct address_space *mapping,
+                  struct list_head *pages, unsigned nr_pages)
+{
+       struct bch_inode_info *inode = to_bch_ei(mapping->host);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct page *page;
+       struct readpages_iter readpages_iter;
+       int ret;
+
+       ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages);
+       BUG_ON(ret);
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
+                                  BTREE_ITER_SLOTS);
+
+       bch2_pagecache_add_get(&inode->ei_pagecache_lock);
+
+       while ((page = readpage_iter_next(&readpages_iter))) {
+               pgoff_t index = readpages_iter.offset + readpages_iter.idx;
+               unsigned n = min_t(unsigned,
+                                  readpages_iter.nr_pages -
+                                  readpages_iter.idx,
+                                  BIO_MAX_PAGES);
+               struct bch_read_bio *rbio =
+                       rbio_init(bio_alloc_bioset(GFP_NOFS, n, &c->bio_read),
+                                 opts);
+
+               readpages_iter.idx++;
+
+               bio_set_op_attrs(&rbio->bio, REQ_OP_READ, 0);
+               rbio->bio.bi_iter.bi_sector = (sector_t) index << PAGE_SECTOR_SHIFT;
+               rbio->bio.bi_end_io = bch2_readpages_end_io;
+               BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
+
+               bchfs_read(&trans, iter, rbio, inode->v.i_ino,
+                          &readpages_iter);
+       }
+
+       bch2_pagecache_add_put(&inode->ei_pagecache_lock);
+
+       bch2_trans_exit(&trans);
+       kfree(readpages_iter.pages);
+
+       return 0;
+}
+
+static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
+                            u64 inum, struct page *page)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+
+       bch2_page_state_create(page, __GFP_NOFAIL);
+
+       bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC);
+       rbio->bio.bi_iter.bi_sector =
+               (sector_t) page->index << PAGE_SECTOR_SHIFT;
+       BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0));
+
+       bch2_trans_init(&trans, c, 0, 0);
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
+                                  BTREE_ITER_SLOTS);
+
+       bchfs_read(&trans, iter, rbio, inum, NULL);
+
+       bch2_trans_exit(&trans);
+}
+
+int bch2_readpage(struct file *file, struct page *page)
+{
+       struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
+       struct bch_read_bio *rbio;
+
+       rbio = rbio_init(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read), opts);
+       rbio->bio.bi_end_io = bch2_readpages_end_io;
+
+       __bchfs_readpage(c, rbio, inode->v.i_ino, page);
+       return 0;
+}
+
+static void bch2_read_single_page_end_io(struct bio *bio)
+{
+       complete(bio->bi_private);
+}
+
+static int bch2_read_single_page(struct page *page,
+                                struct address_space *mapping)
+{
+       struct bch_inode_info *inode = to_bch_ei(mapping->host);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct bch_read_bio *rbio;
+       int ret;
+       DECLARE_COMPLETION_ONSTACK(done);
+
+       rbio = rbio_init(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read),
+                        io_opts(c, &inode->ei_inode));
+       rbio->bio.bi_private = &done;
+       rbio->bio.bi_end_io = bch2_read_single_page_end_io;
+
+       __bchfs_readpage(c, rbio, inode->v.i_ino, page);
+       wait_for_completion(&done);
+
+       ret = blk_status_to_errno(rbio->bio.bi_status);
+       bio_put(&rbio->bio);
+
+       if (ret < 0)
+               return ret;
+
+       SetPageUptodate(page);
+       return 0;
+}
+
+/* writepages: */
+
+struct bch_writepage_state {
+       struct bch_writepage_io *io;
+       struct bch_io_opts      opts;
+};
+
+static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c,
+                                                                 struct bch_inode_info *inode)
+{
+       return (struct bch_writepage_state) {
+               .opts = io_opts(c, &inode->ei_inode)
+       };
+}
+
+static void bch2_writepage_io_free(struct closure *cl)
+{
+       struct bch_writepage_io *io = container_of(cl,
+                                       struct bch_writepage_io, cl);
+
+       bio_put(&io->op.wbio.bio);
+}
+
+static void bch2_writepage_io_done(struct closure *cl)
+{
+       struct bch_writepage_io *io = container_of(cl,
+                                       struct bch_writepage_io, cl);
+       struct bch_fs *c = io->op.c;
+       struct bio *bio = &io->op.wbio.bio;
+       struct bvec_iter_all iter;
+       struct bio_vec *bvec;
+       unsigned i;
+
+       if (io->op.error) {
+               bio_for_each_segment_all(bvec, bio, iter) {
+                       struct bch_page_state *s;
+
+                       SetPageError(bvec->bv_page);
+                       mapping_set_error(bvec->bv_page->mapping, -EIO);
+
+                       s = __bch2_page_state(bvec->bv_page);
+                       spin_lock(&s->lock);
+                       for (i = 0; i < PAGE_SECTORS; i++)
+                               s->s[i].nr_replicas = 0;
+                       spin_unlock(&s->lock);
+               }
+       }
+
+       if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) {
+               bio_for_each_segment_all(bvec, bio, iter) {
+                       struct bch_page_state *s;
+
+                       s = __bch2_page_state(bvec->bv_page);
+                       spin_lock(&s->lock);
+                       for (i = 0; i < PAGE_SECTORS; i++)
+                               s->s[i].nr_replicas = 0;
+                       spin_unlock(&s->lock);
+               }
+       }
+
+       /*
+        * racing with fallocate can cause us to add fewer sectors than
+        * expected - but we shouldn't add more sectors than expected:
+        */
+       BUG_ON(io->op.i_sectors_delta > 0);
+
+       /*
+        * (error (due to going RO) halfway through a page can screw that up
+        * slightly)
+        * XXX wtf?
+          BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS);
+        */
+
+       /*
+        * PageWriteback is effectively our ref on the inode - fixup i_blocks
+        * before calling end_page_writeback:
+        */
+       i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta);
+
+       bio_for_each_segment_all(bvec, bio, iter) {
+               struct bch_page_state *s = __bch2_page_state(bvec->bv_page);
+
+               if (atomic_dec_and_test(&s->write_count))
+                       end_page_writeback(bvec->bv_page);
+       }
+
+       closure_return_with_destructor(&io->cl, bch2_writepage_io_free);
+}
+
+static void bch2_writepage_do_io(struct bch_writepage_state *w)
+{
+       struct bch_writepage_io *io = w->io;
+
+       w->io = NULL;
+       closure_call(&io->op.cl, bch2_write, NULL, &io->cl);
+       continue_at(&io->cl, bch2_writepage_io_done, NULL);
+}
+
+/*
+ * Get a bch_writepage_io and add @page to it - appending to an existing one if
+ * possible, else allocating a new one:
+ */
+static void bch2_writepage_io_alloc(struct bch_fs *c,
+                                   struct writeback_control *wbc,
+                                   struct bch_writepage_state *w,
+                                   struct bch_inode_info *inode,
+                                   u64 sector,
+                                   unsigned nr_replicas)
+{
+       struct bch_write_op *op;
+
+       w->io = container_of(bio_alloc_bioset(GFP_NOFS,
+                                             BIO_MAX_PAGES,
+                                             &c->writepage_bioset),
+                            struct bch_writepage_io, op.wbio.bio);
+
+       closure_init(&w->io->cl, NULL);
+       w->io->inode            = inode;
+
+       op                      = &w->io->op;
+       bch2_write_op_init(op, c, w->opts);
+       op->target              = w->opts.foreground_target;
+       op_journal_seq_set(op, &inode->ei_journal_seq);
+       op->nr_replicas         = nr_replicas;
+       op->res.nr_replicas     = nr_replicas;
+       op->write_point         = writepoint_hashed(inode->ei_last_dirtied);
+       op->pos                 = POS(inode->v.i_ino, sector);
+       op->wbio.bio.bi_iter.bi_sector = sector;
+       op->wbio.bio.bi_opf     = wbc_to_write_flags(wbc);
+}
+
+static int __bch2_writepage(struct page *page,
+                           struct writeback_control *wbc,
+                           void *data)
+{
+       struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct bch_writepage_state *w = data;
+       struct bch_page_state *s, orig;
+       unsigned i, offset, nr_replicas_this_write = U32_MAX;
+       loff_t i_size = i_size_read(&inode->v);
+       pgoff_t end_index = i_size >> PAGE_SHIFT;
+       int ret;
+
+       EBUG_ON(!PageUptodate(page));
+
+       /* Is the page fully inside i_size? */
+       if (page->index < end_index)
+               goto do_io;
+
+       /* Is the page fully outside i_size? (truncate in progress) */
+       offset = i_size & (PAGE_SIZE - 1);
+       if (page->index > end_index || !offset) {
+               unlock_page(page);
+               return 0;
+       }
+
+       /*
+        * The page straddles i_size.  It must be zeroed out on each and every
+        * writepage invocation because it may be mmapped.  "A file is mapped
+        * in multiples of the page size.  For a file that is not a multiple of
+        * the  page size, the remaining memory is zeroed when mapped, and
+        * writes to that region are not written out to the file."
+        */
+       zero_user_segment(page, offset, PAGE_SIZE);
+do_io:
+       s = bch2_page_state_create(page, __GFP_NOFAIL);
+
+       ret = bch2_get_page_disk_reservation(c, inode, page, true);
+       if (ret) {
+               SetPageError(page);
+               mapping_set_error(page->mapping, ret);
+               unlock_page(page);
+               return 0;
+       }
+
+       /* Before unlocking the page, get copy of reservations: */
+       orig = *s;
+
+       for (i = 0; i < PAGE_SECTORS; i++) {
+               if (s->s[i].state < SECTOR_DIRTY)
+                       continue;
+
+               nr_replicas_this_write =
+                       min_t(unsigned, nr_replicas_this_write,
+                             s->s[i].nr_replicas +
+                             s->s[i].replicas_reserved);
+       }
+
+       for (i = 0; i < PAGE_SECTORS; i++) {
+               if (s->s[i].state < SECTOR_DIRTY)
+                       continue;
+
+               s->s[i].nr_replicas = w->opts.compression
+                       ? 0 : nr_replicas_this_write;
+
+               s->s[i].replicas_reserved = 0;
+               s->s[i].state = SECTOR_ALLOCATED;
+       }
+
+       BUG_ON(atomic_read(&s->write_count));
+       atomic_set(&s->write_count, 1);
+
+       BUG_ON(PageWriteback(page));
+       set_page_writeback(page);
+
+       unlock_page(page);
+
+       offset = 0;
+       while (1) {
+               unsigned sectors = 1, dirty_sectors = 0, reserved_sectors = 0;
+               u64 sector;
+
+               while (offset < PAGE_SECTORS &&
+                      orig.s[offset].state < SECTOR_DIRTY)
+                       offset++;
+
+               if (offset == PAGE_SECTORS)
+                       break;
+
+               sector = ((u64) page->index << PAGE_SECTOR_SHIFT) + offset;
+
+               while (offset + sectors < PAGE_SECTORS &&
+                      orig.s[offset + sectors].state >= SECTOR_DIRTY)
+                       sectors++;
+
+               for (i = offset; i < offset + sectors; i++) {
+                       reserved_sectors += orig.s[i].replicas_reserved;
+                       dirty_sectors += orig.s[i].state == SECTOR_DIRTY;
+               }
+
+               if (w->io &&
+                   (w->io->op.res.nr_replicas != nr_replicas_this_write ||
+                    bio_full(&w->io->op.wbio.bio, PAGE_SIZE) ||
+                    w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >=
+                    (BIO_MAX_PAGES * PAGE_SIZE) ||
+                    bio_end_sector(&w->io->op.wbio.bio) != sector))
+                       bch2_writepage_do_io(w);
+
+               if (!w->io)
+                       bch2_writepage_io_alloc(c, wbc, w, inode, sector,
+                                               nr_replicas_this_write);
+
+               atomic_inc(&s->write_count);
+
+               BUG_ON(inode != w->io->inode);
+               BUG_ON(!bio_add_page(&w->io->op.wbio.bio, page,
+                                    sectors << 9, offset << 9));
+
+               /* Check for writing past i_size: */
+               WARN_ON((bio_end_sector(&w->io->op.wbio.bio) << 9) >
+                       round_up(i_size, block_bytes(c)));
+
+               w->io->op.res.sectors += reserved_sectors;
+               w->io->op.i_sectors_delta -= dirty_sectors;
+               w->io->op.new_i_size = i_size;
+
+               offset += sectors;
+       }
+
+       if (atomic_dec_and_test(&s->write_count))
+               end_page_writeback(page);
+
+       return 0;
+}
+
+int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+       struct bch_fs *c = mapping->host->i_sb->s_fs_info;
+       struct bch_writepage_state w =
+               bch_writepage_state_init(c, to_bch_ei(mapping->host));
+       struct blk_plug plug;
+       int ret;
+
+       blk_start_plug(&plug);
+       ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w);
+       if (w.io)
+               bch2_writepage_do_io(&w);
+       blk_finish_plug(&plug);
+       return ret;
+}
+
+int bch2_writepage(struct page *page, struct writeback_control *wbc)
+{
+       struct bch_fs *c = page->mapping->host->i_sb->s_fs_info;
+       struct bch_writepage_state w =
+               bch_writepage_state_init(c, to_bch_ei(page->mapping->host));
+       int ret;
+
+       ret = __bch2_writepage(page, wbc, &w);
+       if (w.io)
+               bch2_writepage_do_io(&w);
+
+       return ret;
+}
+
+/* buffered writes: */
+
+int bch2_write_begin(struct file *file, struct address_space *mapping,
+                    loff_t pos, unsigned len, unsigned flags,
+                    struct page **pagep, void **fsdata)
+{
+       struct bch_inode_info *inode = to_bch_ei(mapping->host);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct bch2_page_reservation *res;
+       pgoff_t index = pos >> PAGE_SHIFT;
+       unsigned offset = pos & (PAGE_SIZE - 1);
+       struct page *page;
+       int ret = -ENOMEM;
+
+       res = kmalloc(sizeof(*res), GFP_KERNEL);
+       if (!res)
+               return -ENOMEM;
+
+       bch2_page_reservation_init(c, inode, res);
+       *fsdata = res;
+
+       bch2_pagecache_add_get(&inode->ei_pagecache_lock);
+
+       page = grab_cache_page_write_begin(mapping, index, flags);
+       if (!page)
+               goto err_unlock;
+
+       if (PageUptodate(page))
+               goto out;
+
+       /* If we're writing entire page, don't need to read it in first: */
+       if (len == PAGE_SIZE)
+               goto out;
+
+       if (!offset && pos + len >= inode->v.i_size) {
+               zero_user_segment(page, len, PAGE_SIZE);
+               flush_dcache_page(page);
+               goto out;
+       }
+
+       if (index > inode->v.i_size >> PAGE_SHIFT) {
+               zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE);
+               flush_dcache_page(page);
+               goto out;
+       }
+readpage:
+       ret = bch2_read_single_page(page, mapping);
+       if (ret)
+               goto err;
+out:
+       ret = bch2_page_reservation_get(c, inode, page, res,
+                                       offset, len, true);
+       if (ret) {
+               if (!PageUptodate(page)) {
+                       /*
+                        * If the page hasn't been read in, we won't know if we
+                        * actually need a reservation - we don't actually need
+                        * to read here, we just need to check if the page is
+                        * fully backed by uncompressed data:
+                        */
+                       goto readpage;
+               }
+
+               goto err;
+       }
+
+       *pagep = page;
+       return 0;
+err:
+       unlock_page(page);
+       put_page(page);
+       *pagep = NULL;
+err_unlock:
+       bch2_pagecache_add_put(&inode->ei_pagecache_lock);
+       kfree(res);
+       *fsdata = NULL;
+       return ret;
+}
+
+int bch2_write_end(struct file *file, struct address_space *mapping,
+                  loff_t pos, unsigned len, unsigned copied,
+                  struct page *page, void *fsdata)
+{
+       struct bch_inode_info *inode = to_bch_ei(mapping->host);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct bch2_page_reservation *res = fsdata;
+       unsigned offset = pos & (PAGE_SIZE - 1);
+
+       lockdep_assert_held(&inode->v.i_rwsem);
+
+       if (unlikely(copied < len && !PageUptodate(page))) {
+               /*
+                * The page needs to be read in, but that would destroy
+                * our partial write - simplest thing is to just force
+                * userspace to redo the write:
+                */
+               zero_user(page, 0, PAGE_SIZE);
+               flush_dcache_page(page);
+               copied = 0;
+       }
+
+       spin_lock(&inode->v.i_lock);
+       if (pos + copied > inode->v.i_size)
+               i_size_write(&inode->v, pos + copied);
+       spin_unlock(&inode->v.i_lock);
+
+       if (copied) {
+               if (!PageUptodate(page))
+                       SetPageUptodate(page);
+
+               bch2_set_page_dirty(c, inode, page, res, offset, copied);
+
+               inode->ei_last_dirtied = (unsigned long) current;
+       }
+
+       unlock_page(page);
+       put_page(page);
+       bch2_pagecache_add_put(&inode->ei_pagecache_lock);
+
+       bch2_page_reservation_put(c, inode, res);
+       kfree(res);
+
+       return copied;
+}
+
+#define WRITE_BATCH_PAGES      32
+
+static int __bch2_buffered_write(struct bch_inode_info *inode,
+                                struct address_space *mapping,
+                                struct iov_iter *iter,
+                                loff_t pos, unsigned len)
+{
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct page *pages[WRITE_BATCH_PAGES];
+       struct bch2_page_reservation res;
+       unsigned long index = pos >> PAGE_SHIFT;
+       unsigned offset = pos & (PAGE_SIZE - 1);
+       unsigned nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+       unsigned i, reserved = 0, set_dirty = 0;
+       unsigned copied = 0, nr_pages_copied = 0;
+       int ret = 0;
+
+       BUG_ON(!len);
+       BUG_ON(nr_pages > ARRAY_SIZE(pages));
+
+       bch2_page_reservation_init(c, inode, &res);
+
+       for (i = 0; i < nr_pages; i++) {
+               pages[i] = grab_cache_page_write_begin(mapping, index + i, 0);
+               if (!pages[i]) {
+                       nr_pages = i;
+                       if (!i) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+                       len = min_t(unsigned, len,
+                                   nr_pages * PAGE_SIZE - offset);
+                       break;
+               }
+       }
+
+       if (offset && !PageUptodate(pages[0])) {
+               ret = bch2_read_single_page(pages[0], mapping);
+               if (ret)
+                       goto out;
+       }
+
+       if ((pos + len) & (PAGE_SIZE - 1) &&
+           !PageUptodate(pages[nr_pages - 1])) {
+               if ((index + nr_pages - 1) << PAGE_SHIFT >= inode->v.i_size) {
+                       zero_user(pages[nr_pages - 1], 0, PAGE_SIZE);
+               } else {
+                       ret = bch2_read_single_page(pages[nr_pages - 1], mapping);
+                       if (ret)
+                               goto out;
+               }
+       }
+
+       while (reserved < len) {
+               struct page *page = pages[(offset + reserved) >> PAGE_SHIFT];
+               unsigned pg_offset = (offset + reserved) & (PAGE_SIZE - 1);
+               unsigned pg_len = min_t(unsigned, len - reserved,
+                                       PAGE_SIZE - pg_offset);
+retry_reservation:
+               ret = bch2_page_reservation_get(c, inode, page, &res,
+                                               pg_offset, pg_len, true);
+
+               if (ret && !PageUptodate(page)) {
+                       ret = bch2_read_single_page(page, mapping);
+                       if (!ret)
+                               goto retry_reservation;
+               }
+
+               if (ret)
+                       goto out;
+
+               reserved += pg_len;
+       }
+
+       if (mapping_writably_mapped(mapping))
+               for (i = 0; i < nr_pages; i++)
+                       flush_dcache_page(pages[i]);
+
+       while (copied < len) {
+               struct page *page = pages[(offset + copied) >> PAGE_SHIFT];
+               unsigned pg_offset = (offset + copied) & (PAGE_SIZE - 1);
+               unsigned pg_len = min_t(unsigned, len - copied,
+                                       PAGE_SIZE - pg_offset);
+               unsigned pg_copied = iov_iter_copy_from_user_atomic(page,
+                                               iter, pg_offset, pg_len);
+
+               if (!pg_copied)
+                       break;
+
+               if (!PageUptodate(page) &&
+                   pg_copied != PAGE_SIZE &&
+                   pos + copied + pg_copied < inode->v.i_size) {
+                       zero_user(page, 0, PAGE_SIZE);
+                       break;
+               }
+
+               flush_dcache_page(page);
+               iov_iter_advance(iter, pg_copied);
+               copied += pg_copied;
+
+               if (pg_copied != pg_len)
+                       break;
+       }
+
+       if (!copied)
+               goto out;
+
+       spin_lock(&inode->v.i_lock);
+       if (pos + copied > inode->v.i_size)
+               i_size_write(&inode->v, pos + copied);
+       spin_unlock(&inode->v.i_lock);
+
+       while (set_dirty < copied) {
+               struct page *page = pages[(offset + set_dirty) >> PAGE_SHIFT];
+               unsigned pg_offset = (offset + set_dirty) & (PAGE_SIZE - 1);
+               unsigned pg_len = min_t(unsigned, copied - set_dirty,
+                                       PAGE_SIZE - pg_offset);
+
+               if (!PageUptodate(page))
+                       SetPageUptodate(page);
+
+               bch2_set_page_dirty(c, inode, page, &res, pg_offset, pg_len);
+               unlock_page(page);
+               put_page(page);
+
+               set_dirty += pg_len;
+       }
+
+       nr_pages_copied = DIV_ROUND_UP(offset + copied, PAGE_SIZE);
+       inode->ei_last_dirtied = (unsigned long) current;
+out:
+       for (i = nr_pages_copied; i < nr_pages; i++) {
+               unlock_page(pages[i]);
+               put_page(pages[i]);
+       }
+
+       bch2_page_reservation_put(c, inode, &res);
+
+       return copied ?: ret;
+}
+
+static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct file *file = iocb->ki_filp;
+       struct address_space *mapping = file->f_mapping;
+       struct bch_inode_info *inode = file_bch_inode(file);
+       loff_t pos = iocb->ki_pos;
+       ssize_t written = 0;
+       int ret = 0;
+
+       bch2_pagecache_add_get(&inode->ei_pagecache_lock);
+
+       do {
+               unsigned offset = pos & (PAGE_SIZE - 1);
+               unsigned bytes = min_t(unsigned long, iov_iter_count(iter),
+                             PAGE_SIZE * WRITE_BATCH_PAGES - offset);
+again:
+               /*
+                * Bring in the user page that we will copy from _first_.
+                * Otherwise there's a nasty deadlock on copying from the
+                * same page as we're writing to, without it being marked
+                * up-to-date.
+                *
+                * Not only is this an optimisation, but it is also required
+                * to check that the address is actually valid, when atomic
+                * usercopies are used, below.
+                */
+               if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
+                       bytes = min_t(unsigned long, iov_iter_count(iter),
+                                     PAGE_SIZE - offset);
+
+                       if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
+                               ret = -EFAULT;
+                               break;
+                       }
+               }
+
+               if (unlikely(fatal_signal_pending(current))) {
+                       ret = -EINTR;
+                       break;
+               }
+
+               ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes);
+               if (unlikely(ret < 0))
+                       break;
+
+               cond_resched();
+
+               if (unlikely(ret == 0)) {
+                       /*
+                        * If we were unable to copy any data at all, we must
+                        * fall back to a single segment length write.
+                        *
+                        * If we didn't fallback here, we could livelock
+                        * because not all segments in the iov can be copied at
+                        * once without a pagefault.
+                        */
+                       bytes = min_t(unsigned long, PAGE_SIZE - offset,
+                                     iov_iter_single_seg_count(iter));
+                       goto again;
+               }
+               pos += ret;
+               written += ret;
+               ret = 0;
+
+               balance_dirty_pages_ratelimited(mapping);
+       } while (iov_iter_count(iter));
+
+       bch2_pagecache_add_put(&inode->ei_pagecache_lock);
+
+       return written ? written : ret;
+}
+
+/* O_DIRECT reads */
+
+static void bch2_dio_read_complete(struct closure *cl)
+{
+       struct dio_read *dio = container_of(cl, struct dio_read, cl);
+
+       dio->req->ki_complete(dio->req, dio->ret, 0);
+       bio_check_pages_dirty(&dio->rbio.bio);  /* transfers ownership */
+}
+
+static void bch2_direct_IO_read_endio(struct bio *bio)
+{
+       struct dio_read *dio = bio->bi_private;
+
+       if (bio->bi_status)
+               dio->ret = blk_status_to_errno(bio->bi_status);
+
+       closure_put(&dio->cl);
+}
+
+static void bch2_direct_IO_read_split_endio(struct bio *bio)
+{
+       bch2_direct_IO_read_endio(bio);
+       bio_check_pages_dirty(bio);     /* transfers ownership */
+}
+
+static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
+{
+       struct file *file = req->ki_filp;
+       struct bch_inode_info *inode = file_bch_inode(file);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
+       struct dio_read *dio;
+       struct bio *bio;
+       loff_t offset = req->ki_pos;
+       bool sync = is_sync_kiocb(req);
+       size_t shorten;
+       ssize_t ret;
+
+       if ((offset|iter->count) & (block_bytes(c) - 1))
+               return -EINVAL;
+
+       ret = min_t(loff_t, iter->count,
+                   max_t(loff_t, 0, i_size_read(&inode->v) - offset));
+
+       if (!ret)
+               return ret;
+
+       shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c));
+       iter->count -= shorten;
+
+       bio = bio_alloc_bioset(GFP_KERNEL,
+                              iov_iter_npages(iter, BIO_MAX_PAGES),
+                              &c->dio_read_bioset);
+
+       bio->bi_end_io = bch2_direct_IO_read_endio;
+
+       dio = container_of(bio, struct dio_read, rbio.bio);
+       closure_init(&dio->cl, NULL);
+
+       /*
+        * this is a _really_ horrible hack just to avoid an atomic sub at the
+        * end:
+        */
+       if (!sync) {
+               set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL);
+               atomic_set(&dio->cl.remaining,
+                          CLOSURE_REMAINING_INITIALIZER -
+                          CLOSURE_RUNNING +
+                          CLOSURE_DESTRUCTOR);
+       } else {
+               atomic_set(&dio->cl.remaining,
+                          CLOSURE_REMAINING_INITIALIZER + 1);
+       }
+
+       dio->req        = req;
+       dio->ret        = ret;
+
+       goto start;
+       while (iter->count) {
+               bio = bio_alloc_bioset(GFP_KERNEL,
+                                      iov_iter_npages(iter, BIO_MAX_PAGES),
+                                      &c->bio_read);
+               bio->bi_end_io          = bch2_direct_IO_read_split_endio;
+start:
+               bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC);
+               bio->bi_iter.bi_sector  = offset >> 9;
+               bio->bi_private         = dio;
+
+               ret = bio_iov_iter_get_pages(bio, iter);
+               if (ret < 0) {
+                       /* XXX: fault inject this path */
+                       bio->bi_status = BLK_STS_RESOURCE;
+                       bio_endio(bio);
+                       break;
+               }
+
+               offset += bio->bi_iter.bi_size;
+               bio_set_pages_dirty(bio);
+
+               if (iter->count)
+                       closure_get(&dio->cl);
+
+               bch2_read(c, rbio_init(bio, opts), inode->v.i_ino);
+       }
+
+       iter->count += shorten;
+
+       if (sync) {
+               closure_sync(&dio->cl);
+               closure_debug_destroy(&dio->cl);
+               ret = dio->ret;
+               bio_check_pages_dirty(&dio->rbio.bio); /* transfers ownership */
+               return ret;
+       } else {
+               return -EIOCBQUEUED;
+       }
+}
+
+ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct file *file = iocb->ki_filp;
+       struct bch_inode_info *inode = file_bch_inode(file);
+       struct address_space *mapping = file->f_mapping;
+       size_t count = iov_iter_count(iter);
+       ssize_t ret;
+
+       if (!count)
+               return 0; /* skip atime */
+
+       if (iocb->ki_flags & IOCB_DIRECT) {
+               struct blk_plug plug;
+
+               ret = filemap_write_and_wait_range(mapping,
+                                       iocb->ki_pos,
+                                       iocb->ki_pos + count - 1);
+               if (ret < 0)
+                       return ret;
+
+               file_accessed(file);
+
+               blk_start_plug(&plug);
+               ret = bch2_direct_IO_read(iocb, iter);
+               blk_finish_plug(&plug);
+
+               if (ret >= 0)
+                       iocb->ki_pos += ret;
+       } else {
+               bch2_pagecache_add_get(&inode->ei_pagecache_lock);
+               ret = generic_file_read_iter(iocb, iter);
+               bch2_pagecache_add_put(&inode->ei_pagecache_lock);
+       }
+
+       return ret;
+}
+
+/* O_DIRECT writes */
+
+static void bch2_dio_write_loop_async(struct bch_write_op *);
+
+static long bch2_dio_write_loop(struct dio_write *dio)
+{
+       bool kthread = (current->flags & PF_KTHREAD) != 0;
+       struct kiocb *req = dio->req;
+       struct address_space *mapping = req->ki_filp->f_mapping;
+       struct bch_inode_info *inode = file_bch_inode(req->ki_filp);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct bio *bio = &dio->op.wbio.bio;
+       struct bvec_iter_all iter;
+       struct bio_vec *bv;
+       unsigned unaligned;
+       bool sync = dio->sync;
+       long ret;
+
+       if (dio->loop)
+               goto loop;
+
+       while (1) {
+               if (kthread)
+                       kthread_use_mm(dio->mm);
+               BUG_ON(current->faults_disabled_mapping);
+               current->faults_disabled_mapping = mapping;
+
+               ret = bio_iov_iter_get_pages(bio, &dio->iter);
+
+               current->faults_disabled_mapping = NULL;
+               if (kthread)
+                       kthread_unuse_mm(dio->mm);
+
+               if (unlikely(ret < 0))
+                       goto err;
+
+               unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
+               bio->bi_iter.bi_size -= unaligned;
+               iov_iter_revert(&dio->iter, unaligned);
+
+               if (!bio->bi_iter.bi_size) {
+                       /*
+                        * bio_iov_iter_get_pages was only able to get <
+                        * blocksize worth of pages:
+                        */
+                       bio_for_each_segment_all(bv, bio, iter)
+                               put_page(bv->bv_page);
+                       ret = -EFAULT;
+                       goto err;
+               }
+
+               bch2_write_op_init(&dio->op, c, io_opts(c, &inode->ei_inode));
+               dio->op.end_io          = bch2_dio_write_loop_async;
+               dio->op.target          = dio->op.opts.foreground_target;
+               op_journal_seq_set(&dio->op, &inode->ei_journal_seq);
+               dio->op.write_point     = writepoint_hashed((unsigned long) current);
+               dio->op.nr_replicas     = dio->op.opts.data_replicas;
+               dio->op.pos             = POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
+
+               if ((req->ki_flags & IOCB_DSYNC) &&
+                   !c->opts.journal_flush_disabled)
+                       dio->op.flags |= BCH_WRITE_FLUSH;
+
+               ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio),
+                                               dio->op.opts.data_replicas, 0);
+               if (unlikely(ret) &&
+                   !bch2_check_range_allocated(c, dio->op.pos,
+                               bio_sectors(bio), dio->op.opts.data_replicas))
+                       goto err;
+
+               task_io_account_write(bio->bi_iter.bi_size);
+
+               if (!dio->sync && !dio->loop && dio->iter.count) {
+                       struct iovec *iov = dio->inline_vecs;
+
+                       if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
+                               iov = kmalloc(dio->iter.nr_segs * sizeof(*iov),
+                                             GFP_KERNEL);
+                               if (unlikely(!iov)) {
+                                       dio->sync = sync = true;
+                                       goto do_io;
+                               }
+
+                               dio->free_iov = true;
+                       }
+
+                       memcpy(iov, dio->iter.iov, dio->iter.nr_segs * sizeof(*iov));
+                       dio->iter.iov = iov;
+               }
+do_io:
+               dio->loop = true;
+               closure_call(&dio->op.cl, bch2_write, NULL, NULL);
+
+               if (sync)
+                       wait_for_completion(&dio->done);
+               else
+                       return -EIOCBQUEUED;
+loop:
+               i_sectors_acct(c, inode, &dio->quota_res,
+                              dio->op.i_sectors_delta);
+               req->ki_pos += (u64) dio->op.written << 9;
+               dio->written += dio->op.written;
+
+               spin_lock(&inode->v.i_lock);
+               if (req->ki_pos > inode->v.i_size)
+                       i_size_write(&inode->v, req->ki_pos);
+               spin_unlock(&inode->v.i_lock);
+
+               bio_for_each_segment_all(bv, bio, iter)
+                       put_page(bv->bv_page);
+               if (!dio->iter.count || dio->op.error)
+                       break;
+
+               bio_reset(bio);
+               reinit_completion(&dio->done);
+       }
+
+       ret = dio->op.error ?: ((long) dio->written << 9);
+err:
+       bch2_pagecache_block_put(&inode->ei_pagecache_lock);
+       bch2_quota_reservation_put(c, inode, &dio->quota_res);
+
+       if (dio->free_iov)
+               kfree(dio->iter.iov);
+
+       bio_put(bio);
+
+       /* inode->i_dio_count is our ref on inode and thus bch_fs */
+       inode_dio_end(&inode->v);
+
+       if (!sync) {
+               req->ki_complete(req, ret, 0);
+               ret = -EIOCBQUEUED;
+       }
+       return ret;
+}
+
+static void bch2_dio_write_loop_async(struct bch_write_op *op)
+{
+       struct dio_write *dio = container_of(op, struct dio_write, op);
+
+       if (dio->sync)
+               complete(&dio->done);
+       else
+               bch2_dio_write_loop(dio);
+}
+
+static noinline
+ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
+{
+       struct file *file = req->ki_filp;
+       struct address_space *mapping = file->f_mapping;
+       struct bch_inode_info *inode = file_bch_inode(file);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct dio_write *dio;
+       struct bio *bio;
+       bool locked = true, extending;
+       ssize_t ret;
+
+       prefetch(&c->opts);
+       prefetch((void *) &c->opts + 64);
+       prefetch(&inode->ei_inode);
+       prefetch((void *) &inode->ei_inode + 64);
+
+       inode_lock(&inode->v);
+
+       ret = generic_write_checks(req, iter);
+       if (unlikely(ret <= 0))
+               goto err;
+
+       ret = file_remove_privs(file);
+       if (unlikely(ret))
+               goto err;
+
+       ret = file_update_time(file);
+       if (unlikely(ret))
+               goto err;
+
+       if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
+               goto err;
+
+       inode_dio_begin(&inode->v);
+       bch2_pagecache_block_get(&inode->ei_pagecache_lock);
+
+       extending = req->ki_pos + iter->count > inode->v.i_size;
+       if (!extending) {
+               inode_unlock(&inode->v);
+               locked = false;
+       }
+
+       bio = bio_alloc_bioset(GFP_KERNEL,
+                              iov_iter_npages(iter, BIO_MAX_PAGES),
+                              &c->dio_write_bioset);
+       dio = container_of(bio, struct dio_write, op.wbio.bio);
+       init_completion(&dio->done);
+       dio->req                = req;
+       dio->mm                 = current->mm;
+       dio->loop               = false;
+       dio->sync               = is_sync_kiocb(req) || extending;
+       dio->free_iov           = false;
+       dio->quota_res.sectors  = 0;
+       dio->written            = 0;
+       dio->iter               = *iter;
+
+       ret = bch2_quota_reservation_add(c, inode, &dio->quota_res,
+                                        iter->count >> 9, true);
+       if (unlikely(ret))
+               goto err_put_bio;
+
+       ret = write_invalidate_inode_pages_range(mapping,
+                                       req->ki_pos,
+                                       req->ki_pos + iter->count - 1);
+       if (unlikely(ret))
+               goto err_put_bio;
+
+       ret = bch2_dio_write_loop(dio);
+err:
+       if (locked)
+               inode_unlock(&inode->v);
+       return ret;
+err_put_bio:
+       bch2_pagecache_block_put(&inode->ei_pagecache_lock);
+       bch2_quota_reservation_put(c, inode, &dio->quota_res);
+       bio_put(bio);
+       inode_dio_end(&inode->v);
+       goto err;
+}
+
+ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+       struct file *file = iocb->ki_filp;
+       struct bch_inode_info *inode = file_bch_inode(file);
+       ssize_t ret;
+
+       if (iocb->ki_flags & IOCB_DIRECT)
+               return bch2_direct_write(iocb, from);
+
+       /* We can write back this queue in page reclaim */
+       current->backing_dev_info = inode_to_bdi(&inode->v);
+       inode_lock(&inode->v);
+
+       ret = generic_write_checks(iocb, from);
+       if (ret <= 0)
+               goto unlock;
+
+       ret = file_remove_privs(file);
+       if (ret)
+               goto unlock;
+
+       ret = file_update_time(file);
+       if (ret)
+               goto unlock;
+
+       ret = bch2_buffered_write(iocb, from);
+       if (likely(ret > 0))
+               iocb->ki_pos += ret;
+unlock:
+       inode_unlock(&inode->v);
+       current->backing_dev_info = NULL;
+
+       if (ret > 0)
+               ret = generic_write_sync(iocb, ret);
+
+       return ret;
+}
+
+/* fsync: */
+
+int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+       struct bch_inode_info *inode = file_bch_inode(file);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       int ret, ret2;
+
+       ret = file_write_and_wait_range(file, start, end);
+       if (ret)
+               return ret;
+
+       if (datasync && !(inode->v.i_state & I_DIRTY_DATASYNC))
+               goto out;
+
+       ret = sync_inode_metadata(&inode->v, 1);
+       if (ret)
+               return ret;
+out:
+       if (!c->opts.journal_flush_disabled)
+               ret = bch2_journal_flush_seq(&c->journal,
+                                            inode->ei_journal_seq);
+       ret2 = file_check_and_advance_wb_err(file);
+
+       return ret ?: ret2;
+}
+
+/* truncate: */
+
+static inline int range_has_data(struct bch_fs *c,
+                                 struct bpos start,
+                                 struct bpos end)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k, ret) {
+               if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
+                       break;
+
+               if (bkey_extent_is_data(k.k)) {
+                       ret = 1;
+                       break;
+               }
+       }
+
+       return bch2_trans_exit(&trans) ?: ret;
+}
+
+static int __bch2_truncate_page(struct bch_inode_info *inode,
+                               pgoff_t index, loff_t start, loff_t end)
+{
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct address_space *mapping = inode->v.i_mapping;
+       struct bch_page_state *s;
+       unsigned start_offset = start & (PAGE_SIZE - 1);
+       unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1;
+       unsigned i;
+       struct page *page;
+       int ret = 0;
+
+       /* Page boundary? Nothing to do */
+       if (!((index == start >> PAGE_SHIFT && start_offset) ||
+             (index == end >> PAGE_SHIFT && end_offset != PAGE_SIZE)))
+               return 0;
+
+       /* Above i_size? */
+       if (index << PAGE_SHIFT >= inode->v.i_size)
+               return 0;
+
+       page = find_lock_page(mapping, index);
+       if (!page) {
+               /*
+                * XXX: we're doing two index lookups when we end up reading the
+                * page
+                */
+               ret = range_has_data(c,
+                               POS(inode->v.i_ino, index << PAGE_SECTOR_SHIFT),
+                               POS(inode->v.i_ino, (index + 1) << PAGE_SECTOR_SHIFT));
+               if (ret <= 0)
+                       return ret;
+
+               page = find_or_create_page(mapping, index, GFP_KERNEL);
+               if (unlikely(!page)) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+       }
+
+       s = bch2_page_state_create(page, 0);
+       if (!s) {
+               ret = -ENOMEM;
+               goto unlock;
+       }
+
+       if (!PageUptodate(page)) {
+               ret = bch2_read_single_page(page, mapping);
+               if (ret)
+                       goto unlock;
+       }
+
+       if (index != start >> PAGE_SHIFT)
+               start_offset = 0;
+       if (index != end >> PAGE_SHIFT)
+               end_offset = PAGE_SIZE;
+
+       for (i = round_up(start_offset, block_bytes(c)) >> 9;
+            i < round_down(end_offset, block_bytes(c)) >> 9;
+            i++) {
+               s->s[i].nr_replicas     = 0;
+               s->s[i].state           = SECTOR_UNALLOCATED;
+       }
+
+       zero_user_segment(page, start_offset, end_offset);
+
+       /*
+        * Bit of a hack - we don't want truncate to fail due to -ENOSPC.
+        *
+        * XXX: because we aren't currently tracking whether the page has actual
+        * data in it (vs. just 0s, or only partially written) this wrong. ick.
+        */
+       ret = bch2_get_page_disk_reservation(c, inode, page, false);
+       BUG_ON(ret);
+
+       /*
+        * This removes any writeable userspace mappings; we need to force
+        * .page_mkwrite to be called again before any mmapped writes, to
+        * redirty the full page:
+        */
+       page_mkclean(page);
+       __set_page_dirty_nobuffers(page);
+unlock:
+       unlock_page(page);
+       put_page(page);
+out:
+       return ret;
+}
+
+static int bch2_truncate_page(struct bch_inode_info *inode, loff_t from)
+{
+       return __bch2_truncate_page(inode, from >> PAGE_SHIFT,
+                                   from, round_up(from, PAGE_SIZE));
+}
+
+static int bch2_extend(struct bch_inode_info *inode,
+                      struct bch_inode_unpacked *inode_u,
+                      struct iattr *iattr)
+{
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct address_space *mapping = inode->v.i_mapping;
+       int ret;
+
+       /*
+        * sync appends:
+        *
+        * this has to be done _before_ extending i_size:
+        */
+       ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX);
+       if (ret)
+               return ret;
+
+       truncate_setsize(&inode->v, iattr->ia_size);
+       setattr_copy(&inode->v, iattr);
+
+       mutex_lock(&inode->ei_update_lock);
+       ret = bch2_write_inode_size(c, inode, inode->v.i_size,
+                                   ATTR_MTIME|ATTR_CTIME);
+       mutex_unlock(&inode->ei_update_lock);
+
+       return ret;
+}
+
+static int bch2_truncate_finish_fn(struct bch_inode_info *inode,
+                                  struct bch_inode_unpacked *bi,
+                                  void *p)
+{
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+       bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
+       bi->bi_mtime = bi->bi_ctime = bch2_current_time(c);
+       return 0;
+}
+
+static int bch2_truncate_start_fn(struct bch_inode_info *inode,
+                                 struct bch_inode_unpacked *bi, void *p)
+{
+       u64 *new_i_size = p;
+
+       bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY;
+       bi->bi_size = *new_i_size;
+       return 0;
+}
+
+int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
+{
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct address_space *mapping = inode->v.i_mapping;
+       struct bch_inode_unpacked inode_u;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       u64 new_i_size = iattr->ia_size;
+       s64 i_sectors_delta = 0;
+       int ret = 0;
+
+       inode_dio_wait(&inode->v);
+       bch2_pagecache_block_get(&inode->ei_pagecache_lock);
+
+       /*
+        * fetch current on disk i_size: inode is locked, i_size can only
+        * increase underneath us:
+        */
+       bch2_trans_init(&trans, c, 0, 0);
+       iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, 0);
+       ret = PTR_ERR_OR_ZERO(iter);
+       bch2_trans_exit(&trans);
+
+       if (ret)
+               goto err;
+
+       /*
+        * check this before next assertion; on filesystem error our normal
+        * invariants are a bit broken (truncate has to truncate the page cache
+        * before the inode).
+        */
+       ret = bch2_journal_error(&c->journal);
+       if (ret)
+               goto err;
+
+       BUG_ON(inode->v.i_size < inode_u.bi_size);
+
+       if (iattr->ia_size > inode->v.i_size) {
+               ret = bch2_extend(inode, &inode_u, iattr);
+               goto err;
+       }
+
+       ret = bch2_truncate_page(inode, iattr->ia_size);
+       if (unlikely(ret))
+               goto err;
+
+       /*
+        * When extending, we're going to write the new i_size to disk
+        * immediately so we need to flush anything above the current on disk
+        * i_size first:
+        *
+        * Also, when extending we need to flush the page that i_size currently
+        * straddles - if it's mapped to userspace, we need to ensure that
+        * userspace has to redirty it and call .mkwrite -> set_page_dirty
+        * again to allocate the part of the page that was extended.
+        */
+       if (iattr->ia_size > inode_u.bi_size)
+               ret = filemap_write_and_wait_range(mapping,
+                               inode_u.bi_size,
+                               iattr->ia_size - 1);
+       else if (iattr->ia_size & (PAGE_SIZE - 1))
+               ret = filemap_write_and_wait_range(mapping,
+                               round_down(iattr->ia_size, PAGE_SIZE),
+                               iattr->ia_size - 1);
+       if (ret)
+               goto err;
+
+       mutex_lock(&inode->ei_update_lock);
+       ret = bch2_write_inode(c, inode, bch2_truncate_start_fn,
+                              &new_i_size, 0);
+       mutex_unlock(&inode->ei_update_lock);
+
+       if (unlikely(ret))
+               goto err;
+
+       truncate_setsize(&inode->v, iattr->ia_size);
+
+       ret = bch2_fpunch(c, inode->v.i_ino,
+                       round_up(iattr->ia_size, block_bytes(c)) >> 9,
+                       U64_MAX, &inode->ei_journal_seq, &i_sectors_delta);
+       i_sectors_acct(c, inode, NULL, i_sectors_delta);
+
+       if (unlikely(ret))
+               goto err;
+
+       setattr_copy(&inode->v, iattr);
+
+       mutex_lock(&inode->ei_update_lock);
+       ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL,
+                              ATTR_MTIME|ATTR_CTIME);
+       mutex_unlock(&inode->ei_update_lock);
+err:
+       bch2_pagecache_block_put(&inode->ei_pagecache_lock);
+       return ret;
+}
+
+/* fallocate: */
+
+static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len)
+{
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       u64 discard_start = round_up(offset, block_bytes(c)) >> 9;
+       u64 discard_end = round_down(offset + len, block_bytes(c)) >> 9;
+       int ret = 0;
+
+       inode_lock(&inode->v);
+       inode_dio_wait(&inode->v);
+       bch2_pagecache_block_get(&inode->ei_pagecache_lock);
+
+       ret = __bch2_truncate_page(inode,
+                                  offset >> PAGE_SHIFT,
+                                  offset, offset + len);
+       if (unlikely(ret))
+               goto err;
+
+       if (offset >> PAGE_SHIFT !=
+           (offset + len) >> PAGE_SHIFT) {
+               ret = __bch2_truncate_page(inode,
+                                          (offset + len) >> PAGE_SHIFT,
+                                          offset, offset + len);
+               if (unlikely(ret))
+                       goto err;
+       }
+
+       truncate_pagecache_range(&inode->v, offset, offset + len - 1);
+
+       if (discard_start < discard_end) {
+               s64 i_sectors_delta = 0;
+
+               ret = bch2_fpunch(c, inode->v.i_ino,
+                                 discard_start, discard_end,
+                                 &inode->ei_journal_seq,
+                                 &i_sectors_delta);
+               i_sectors_acct(c, inode, NULL, i_sectors_delta);
+       }
+err:
+       bch2_pagecache_block_put(&inode->ei_pagecache_lock);
+       inode_unlock(&inode->v);
+
+       return ret;
+}
+
+static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
+                                  loff_t offset, loff_t len,
+                                  bool insert)
+{
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct address_space *mapping = inode->v.i_mapping;
+       struct bkey_on_stack copy;
+       struct btree_trans trans;
+       struct btree_iter *src, *dst;
+       loff_t shift, new_size;
+       u64 src_start;
+       int ret;
+
+       if ((offset | len) & (block_bytes(c) - 1))
+               return -EINVAL;
+
+       bkey_on_stack_init(&copy);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
+
+       /*
+        * We need i_mutex to keep the page cache consistent with the extents
+        * btree, and the btree consistent with i_size - we don't need outside
+        * locking for the extents btree itself, because we're using linked
+        * iterators
+        */
+       inode_lock(&inode->v);
+       inode_dio_wait(&inode->v);
+       bch2_pagecache_block_get(&inode->ei_pagecache_lock);
+
+       if (insert) {
+               ret = -EFBIG;
+               if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len)
+                       goto err;
+
+               ret = -EINVAL;
+               if (offset >= inode->v.i_size)
+                       goto err;
+
+               src_start       = U64_MAX;
+               shift           = len;
+       } else {
+               ret = -EINVAL;
+               if (offset + len >= inode->v.i_size)
+                       goto err;
+
+               src_start       = offset + len;
+               shift           = -len;
+       }
+
+       new_size = inode->v.i_size + shift;
+
+       ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
+       if (ret)
+               goto err;
+
+       if (insert) {
+               i_size_write(&inode->v, new_size);
+               mutex_lock(&inode->ei_update_lock);
+               ret = bch2_write_inode_size(c, inode, new_size,
+                                           ATTR_MTIME|ATTR_CTIME);
+               mutex_unlock(&inode->ei_update_lock);
+       } else {
+               s64 i_sectors_delta = 0;
+
+               ret = bch2_fpunch(c, inode->v.i_ino,
+                                 offset >> 9, (offset + len) >> 9,
+                                 &inode->ei_journal_seq,
+                                 &i_sectors_delta);
+               i_sectors_acct(c, inode, NULL, i_sectors_delta);
+
+               if (ret)
+                       goto err;
+       }
+
+       src = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+                       POS(inode->v.i_ino, src_start >> 9),
+                       BTREE_ITER_INTENT);
+       BUG_ON(IS_ERR_OR_NULL(src));
+
+       dst = bch2_trans_copy_iter(&trans, src);
+       BUG_ON(IS_ERR_OR_NULL(dst));
+
+       while (1) {
+               struct disk_reservation disk_res =
+                       bch2_disk_reservation_init(c, 0);
+               struct bkey_i delete;
+               struct bkey_s_c k;
+               struct bpos next_pos;
+               struct bpos move_pos = POS(inode->v.i_ino, offset >> 9);
+               struct bpos atomic_end;
+               unsigned trigger_flags = 0;
+
+               k = insert
+                       ? bch2_btree_iter_peek_prev(src)
+                       : bch2_btree_iter_peek(src);
+               if ((ret = bkey_err(k)))
+                       goto bkey_err;
+
+               if (!k.k || k.k->p.inode != inode->v.i_ino)
+                       break;
+
+               BUG_ON(bkey_cmp(src->pos, bkey_start_pos(k.k)));
+
+               if (insert &&
+                   bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
+                       break;
+reassemble:
+               bkey_on_stack_reassemble(&copy, c, k);
+
+               if (insert &&
+                   bkey_cmp(bkey_start_pos(k.k), move_pos) < 0)
+                       bch2_cut_front(move_pos, copy.k);
+
+               copy.k->k.p.offset += shift >> 9;
+               bch2_btree_iter_set_pos(dst, bkey_start_pos(&copy.k->k));
+
+               ret = bch2_extent_atomic_end(dst, copy.k, &atomic_end);
+               if (ret)
+                       goto bkey_err;
+
+               if (bkey_cmp(atomic_end, copy.k->k.p)) {
+                       if (insert) {
+                               move_pos = atomic_end;
+                               move_pos.offset -= shift >> 9;
+                               goto reassemble;
+                       } else {
+                               bch2_cut_back(atomic_end, copy.k);
+                       }
+               }
+
+               bkey_init(&delete.k);
+               delete.k.p = copy.k->k.p;
+               delete.k.size = copy.k->k.size;
+               delete.k.p.offset -= shift >> 9;
+
+               next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
+
+               if (copy.k->k.size == k.k->size) {
+                       /*
+                        * If we're moving the entire extent, we can skip
+                        * running triggers:
+                        */
+                       trigger_flags |= BTREE_TRIGGER_NORUN;
+               } else {
+                       /* We might end up splitting compressed extents: */
+                       unsigned nr_ptrs =
+                               bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k));
+
+                       ret = bch2_disk_reservation_get(c, &disk_res,
+                                       copy.k->k.size, nr_ptrs,
+                                       BCH_DISK_RESERVATION_NOFAIL);
+                       BUG_ON(ret);
+               }
+
+               bch2_btree_iter_set_pos(src, bkey_start_pos(&delete.k));
+
+               ret =   bch2_trans_update(&trans, src, &delete, trigger_flags) ?:
+                       bch2_trans_update(&trans, dst, copy.k, trigger_flags) ?:
+                       bch2_trans_commit(&trans, &disk_res,
+                                         &inode->ei_journal_seq,
+                                         BTREE_INSERT_NOFAIL);
+               bch2_disk_reservation_put(c, &disk_res);
+bkey_err:
+               if (!ret)
+                       bch2_btree_iter_set_pos(src, next_pos);
+
+               if (ret == -EINTR)
+                       ret = 0;
+               if (ret)
+                       goto err;
+
+               bch2_trans_cond_resched(&trans);
+       }
+       bch2_trans_unlock(&trans);
+
+       if (!insert) {
+               i_size_write(&inode->v, new_size);
+               mutex_lock(&inode->ei_update_lock);
+               ret = bch2_write_inode_size(c, inode, new_size,
+                                           ATTR_MTIME|ATTR_CTIME);
+               mutex_unlock(&inode->ei_update_lock);
+       }
+err:
+       bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&copy, c);
+       bch2_pagecache_block_put(&inode->ei_pagecache_lock);
+       inode_unlock(&inode->v);
+       return ret;
+}
+
+static long bchfs_fallocate(struct bch_inode_info *inode, int mode,
+                           loff_t offset, loff_t len)
+{
+       struct address_space *mapping = inode->v.i_mapping;
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bpos end_pos;
+       loff_t end              = offset + len;
+       loff_t block_start      = round_down(offset,    block_bytes(c));
+       loff_t block_end        = round_up(end,         block_bytes(c));
+       unsigned sectors;
+       unsigned replicas = io_opts(c, &inode->ei_inode).data_replicas;
+       int ret;
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+       inode_lock(&inode->v);
+       inode_dio_wait(&inode->v);
+       bch2_pagecache_block_get(&inode->ei_pagecache_lock);
+
+       if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) {
+               ret = inode_newsize_ok(&inode->v, end);
+               if (ret)
+                       goto err;
+       }
+
+       if (mode & FALLOC_FL_ZERO_RANGE) {
+               ret = __bch2_truncate_page(inode,
+                                          offset >> PAGE_SHIFT,
+                                          offset, end);
+
+               if (!ret &&
+                   offset >> PAGE_SHIFT != end >> PAGE_SHIFT)
+                       ret = __bch2_truncate_page(inode,
+                                                  end >> PAGE_SHIFT,
+                                                  offset, end);
+
+               if (unlikely(ret))
+                       goto err;
+
+               truncate_pagecache_range(&inode->v, offset, end - 1);
+       }
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+                       POS(inode->v.i_ino, block_start >> 9),
+                       BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+       end_pos = POS(inode->v.i_ino, block_end >> 9);
+
+       while (bkey_cmp(iter->pos, end_pos) < 0) {
+               s64 i_sectors_delta = 0;
+               struct disk_reservation disk_res = { 0 };
+               struct quota_res quota_res = { 0 };
+               struct bkey_i_reservation reservation;
+               struct bkey_s_c k;
+
+               bch2_trans_begin(&trans);
+
+               k = bch2_btree_iter_peek_slot(iter);
+               if ((ret = bkey_err(k)))
+                       goto bkey_err;
+
+               /* already reserved */
+               if (k.k->type == KEY_TYPE_reservation &&
+                   bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) {
+                       bch2_btree_iter_next_slot(iter);
+                       continue;
+               }
+
+               if (bkey_extent_is_data(k.k) &&
+                   !(mode & FALLOC_FL_ZERO_RANGE)) {
+                       bch2_btree_iter_next_slot(iter);
+                       continue;
+               }
+
+               bkey_reservation_init(&reservation.k_i);
+               reservation.k.type      = KEY_TYPE_reservation;
+               reservation.k.p         = k.k->p;
+               reservation.k.size      = k.k->size;
+
+               bch2_cut_front(iter->pos,       &reservation.k_i);
+               bch2_cut_back(end_pos,          &reservation.k_i);
+
+               sectors = reservation.k.size;
+               reservation.v.nr_replicas = bch2_bkey_nr_ptrs_allocated(k);
+
+               if (!bkey_extent_is_allocation(k.k)) {
+                       ret = bch2_quota_reservation_add(c, inode,
+                                       &quota_res,
+                                       sectors, true);
+                       if (unlikely(ret))
+                               goto bkey_err;
+               }
+
+               if (reservation.v.nr_replicas < replicas ||
+                   bch2_bkey_sectors_compressed(k)) {
+                       ret = bch2_disk_reservation_get(c, &disk_res, sectors,
+                                                       replicas, 0);
+                       if (unlikely(ret))
+                               goto bkey_err;
+
+                       reservation.v.nr_replicas = disk_res.nr_replicas;
+               }
+
+               ret = bch2_extent_update(&trans, iter, &reservation.k_i,
+                               &disk_res, &inode->ei_journal_seq,
+                               0, &i_sectors_delta);
+               i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
+bkey_err:
+               bch2_quota_reservation_put(c, inode, &quota_res);
+               bch2_disk_reservation_put(c, &disk_res);
+               if (ret == -EINTR)
+                       ret = 0;
+               if (ret)
+                       goto err;
+       }
+
+       /*
+        * Do we need to extend the file?
+        *
+        * If we zeroed up to the end of the file, we dropped whatever writes
+        * were going to write out the current i_size, so we have to extend
+        * manually even if FL_KEEP_SIZE was set:
+        */
+       if (end >= inode->v.i_size &&
+           (!(mode & FALLOC_FL_KEEP_SIZE) ||
+            (mode & FALLOC_FL_ZERO_RANGE))) {
+               struct btree_iter *inode_iter;
+               struct bch_inode_unpacked inode_u;
+
+               do {
+                       bch2_trans_begin(&trans);
+                       inode_iter = bch2_inode_peek(&trans, &inode_u,
+                                                    inode->v.i_ino, 0);
+                       ret = PTR_ERR_OR_ZERO(inode_iter);
+               } while (ret == -EINTR);
+
+               bch2_trans_unlock(&trans);
+
+               if (ret)
+                       goto err;
+
+               /*
+                * Sync existing appends before extending i_size,
+                * as in bch2_extend():
+                */
+               ret = filemap_write_and_wait_range(mapping,
+                                       inode_u.bi_size, S64_MAX);
+               if (ret)
+                       goto err;
+
+               if (mode & FALLOC_FL_KEEP_SIZE)
+                       end = inode->v.i_size;
+               else
+                       i_size_write(&inode->v, end);
+
+               mutex_lock(&inode->ei_update_lock);
+               ret = bch2_write_inode_size(c, inode, end, 0);
+               mutex_unlock(&inode->ei_update_lock);
+       }
+err:
+       bch2_trans_exit(&trans);
+       bch2_pagecache_block_put(&inode->ei_pagecache_lock);
+       inode_unlock(&inode->v);
+       return ret;
+}
+
+long bch2_fallocate_dispatch(struct file *file, int mode,
+                            loff_t offset, loff_t len)
+{
+       struct bch_inode_info *inode = file_bch_inode(file);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       long ret;
+
+       if (!percpu_ref_tryget(&c->writes))
+               return -EROFS;
+
+       if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE)))
+               ret = bchfs_fallocate(inode, mode, offset, len);
+       else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
+               ret = bchfs_fpunch(inode, offset, len);
+       else if (mode == FALLOC_FL_INSERT_RANGE)
+               ret = bchfs_fcollapse_finsert(inode, offset, len, true);
+       else if (mode == FALLOC_FL_COLLAPSE_RANGE)
+               ret = bchfs_fcollapse_finsert(inode, offset, len, false);
+       else
+               ret = -EOPNOTSUPP;
+
+       percpu_ref_put(&c->writes);
+
+       return ret;
+}
+
+static void mark_range_unallocated(struct bch_inode_info *inode,
+                                  loff_t start, loff_t end)
+{
+       pgoff_t index = start >> PAGE_SHIFT;
+       pgoff_t end_index = (end - 1) >> PAGE_SHIFT;
+       struct pagevec pvec;
+
+       pagevec_init(&pvec);
+
+       do {
+               unsigned nr_pages, i, j;
+
+               nr_pages = pagevec_lookup_range(&pvec, inode->v.i_mapping,
+                                               &index, end_index);
+               if (nr_pages == 0)
+                       break;
+
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+                       struct bch_page_state *s;
+
+                       lock_page(page);
+                       s = bch2_page_state(page);
+
+                       if (s) {
+                               spin_lock(&s->lock);
+                               for (j = 0; j < PAGE_SECTORS; j++)
+                                       s->s[j].nr_replicas = 0;
+                               spin_unlock(&s->lock);
+                       }
+
+                       unlock_page(page);
+               }
+               pagevec_release(&pvec);
+       } while (index <= end_index);
+}
+
+loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
+                            struct file *file_dst, loff_t pos_dst,
+                            loff_t len, unsigned remap_flags)
+{
+       struct bch_inode_info *src = file_bch_inode(file_src);
+       struct bch_inode_info *dst = file_bch_inode(file_dst);
+       struct bch_fs *c = src->v.i_sb->s_fs_info;
+       s64 i_sectors_delta = 0;
+       u64 aligned_len;
+       loff_t ret = 0;
+
+       if (!c->opts.reflink)
+               return -EOPNOTSUPP;
+
+       if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY))
+               return -EINVAL;
+
+       if (remap_flags & REMAP_FILE_DEDUP)
+               return -EOPNOTSUPP;
+
+       if ((pos_src & (block_bytes(c) - 1)) ||
+           (pos_dst & (block_bytes(c) - 1)))
+               return -EINVAL;
+
+       if (src == dst &&
+           abs(pos_src - pos_dst) < len)
+               return -EINVAL;
+
+       bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
+
+       file_update_time(file_dst);
+
+       inode_dio_wait(&src->v);
+       inode_dio_wait(&dst->v);
+
+       ret = generic_remap_file_range_prep(file_src, pos_src,
+                                           file_dst, pos_dst,
+                                           &len, remap_flags);
+       if (ret < 0 || len == 0)
+               goto err;
+
+       aligned_len = round_up((u64) len, block_bytes(c));
+
+       ret = write_invalidate_inode_pages_range(dst->v.i_mapping,
+                               pos_dst, pos_dst + len - 1);
+       if (ret)
+               goto err;
+
+       mark_range_unallocated(src, pos_src, pos_src + aligned_len);
+
+       ret = bch2_remap_range(c,
+                              POS(dst->v.i_ino, pos_dst >> 9),
+                              POS(src->v.i_ino, pos_src >> 9),
+                              aligned_len >> 9,
+                              &dst->ei_journal_seq,
+                              pos_dst + len, &i_sectors_delta);
+       if (ret < 0)
+               goto err;
+
+       /*
+        * due to alignment, we might have remapped slightly more than requsted
+        */
+       ret = min((u64) ret << 9, (u64) len);
+
+       /* XXX get a quota reservation */
+       i_sectors_acct(c, dst, NULL, i_sectors_delta);
+
+       spin_lock(&dst->v.i_lock);
+       if (pos_dst + ret > dst->v.i_size)
+               i_size_write(&dst->v, pos_dst + ret);
+       spin_unlock(&dst->v.i_lock);
+err:
+       bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
+
+       return ret;
+}
+
+/* fseek: */
+
+static int page_data_offset(struct page *page, unsigned offset)
+{
+       struct bch_page_state *s = bch2_page_state(page);
+       unsigned i;
+
+       if (s)
+               for (i = offset >> 9; i < PAGE_SECTORS; i++)
+                       if (s->s[i].state >= SECTOR_DIRTY)
+                               return i << 9;
+
+       return -1;
+}
+
+static loff_t bch2_seek_pagecache_data(struct inode *vinode,
+                                      loff_t start_offset,
+                                      loff_t end_offset)
+{
+       struct address_space *mapping = vinode->i_mapping;
+       struct page *page;
+       pgoff_t start_index     = start_offset >> PAGE_SHIFT;
+       pgoff_t end_index       = end_offset >> PAGE_SHIFT;
+       pgoff_t index           = start_index;
+       loff_t ret;
+       int offset;
+
+       while (index <= end_index) {
+               if (find_get_pages_range(mapping, &index, end_index, 1, &page)) {
+                       lock_page(page);
+
+                       offset = page_data_offset(page,
+                                       page->index == start_index
+                                       ? start_offset & (PAGE_SIZE - 1)
+                                       : 0);
+                       if (offset >= 0) {
+                               ret = clamp(((loff_t) page->index << PAGE_SHIFT) +
+                                           offset,
+                                           start_offset, end_offset);
+                               unlock_page(page);
+                               put_page(page);
+                               return ret;
+                       }
+
+                       unlock_page(page);
+                       put_page(page);
+               } else {
+                       break;
+               }
+       }
+
+       return end_offset;
+}
+
+static loff_t bch2_seek_data(struct file *file, u64 offset)
+{
+       struct bch_inode_info *inode = file_bch_inode(file);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       u64 isize, next_data = MAX_LFS_FILESIZE;
+       int ret;
+
+       isize = i_size_read(&inode->v);
+       if (offset >= isize)
+               return -ENXIO;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
+                          POS(inode->v.i_ino, offset >> 9), 0, k, ret) {
+               if (k.k->p.inode != inode->v.i_ino) {
+                       break;
+               } else if (bkey_extent_is_data(k.k)) {
+                       next_data = max(offset, bkey_start_offset(k.k) << 9);
+                       break;
+               } else if (k.k->p.offset >> 9 > isize)
+                       break;
+       }
+
+       ret = bch2_trans_exit(&trans) ?: ret;
+       if (ret)
+               return ret;
+
+       if (next_data > offset)
+               next_data = bch2_seek_pagecache_data(&inode->v,
+                                                    offset, next_data);
+
+       if (next_data >= isize)
+               return -ENXIO;
+
+       return vfs_setpos(file, next_data, MAX_LFS_FILESIZE);
+}
+
+static int __page_hole_offset(struct page *page, unsigned offset)
+{
+       struct bch_page_state *s = bch2_page_state(page);
+       unsigned i;
+
+       if (!s)
+               return 0;
+
+       for (i = offset >> 9; i < PAGE_SECTORS; i++)
+               if (s->s[i].state < SECTOR_DIRTY)
+                       return i << 9;
+
+       return -1;
+}
+
+static loff_t page_hole_offset(struct address_space *mapping, loff_t offset)
+{
+       pgoff_t index = offset >> PAGE_SHIFT;
+       struct page *page;
+       int pg_offset;
+       loff_t ret = -1;
+
+       page = find_lock_entry(mapping, index);
+       if (!page || xa_is_value(page))
+               return offset;
+
+       pg_offset = __page_hole_offset(page, offset & (PAGE_SIZE - 1));
+       if (pg_offset >= 0)
+               ret = ((loff_t) index << PAGE_SHIFT) + pg_offset;
+
+       unlock_page(page);
+
+       return ret;
+}
+
+static loff_t bch2_seek_pagecache_hole(struct inode *vinode,
+                                      loff_t start_offset,
+                                      loff_t end_offset)
+{
+       struct address_space *mapping = vinode->i_mapping;
+       loff_t offset = start_offset, hole;
+
+       while (offset < end_offset) {
+               hole = page_hole_offset(mapping, offset);
+               if (hole >= 0 && hole <= end_offset)
+                       return max(start_offset, hole);
+
+               offset += PAGE_SIZE;
+               offset &= PAGE_MASK;
+       }
+
+       return end_offset;
+}
+
+static loff_t bch2_seek_hole(struct file *file, u64 offset)
+{
+       struct bch_inode_info *inode = file_bch_inode(file);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       u64 isize, next_hole = MAX_LFS_FILESIZE;
+       int ret;
+
+       isize = i_size_read(&inode->v);
+       if (offset >= isize)
+               return -ENXIO;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
+                          POS(inode->v.i_ino, offset >> 9),
+                          BTREE_ITER_SLOTS, k, ret) {
+               if (k.k->p.inode != inode->v.i_ino) {
+                       next_hole = bch2_seek_pagecache_hole(&inode->v,
+                                       offset, MAX_LFS_FILESIZE);
+                       break;
+               } else if (!bkey_extent_is_data(k.k)) {
+                       next_hole = bch2_seek_pagecache_hole(&inode->v,
+                                       max(offset, bkey_start_offset(k.k) << 9),
+                                       k.k->p.offset << 9);
+
+                       if (next_hole < k.k->p.offset << 9)
+                               break;
+               } else {
+                       offset = max(offset, bkey_start_offset(k.k) << 9);
+               }
+       }
+
+       ret = bch2_trans_exit(&trans) ?: ret;
+       if (ret)
+               return ret;
+
+       if (next_hole > isize)
+               next_hole = isize;
+
+       return vfs_setpos(file, next_hole, MAX_LFS_FILESIZE);
+}
+
+loff_t bch2_llseek(struct file *file, loff_t offset, int whence)
+{
+       switch (whence) {
+       case SEEK_SET:
+       case SEEK_CUR:
+       case SEEK_END:
+               return generic_file_llseek(file, offset, whence);
+       case SEEK_DATA:
+               return bch2_seek_data(file, offset);
+       case SEEK_HOLE:
+               return bch2_seek_hole(file, offset);
+       }
+
+       return -EINVAL;
+}
+
+void bch2_fs_fsio_exit(struct bch_fs *c)
+{
+       bioset_exit(&c->dio_write_bioset);
+       bioset_exit(&c->dio_read_bioset);
+       bioset_exit(&c->writepage_bioset);
+}
+
+int bch2_fs_fsio_init(struct bch_fs *c)
+{
+       int ret = 0;
+
+       pr_verbose_init(c->opts, "");
+
+       if (bioset_init(&c->writepage_bioset,
+                       4, offsetof(struct bch_writepage_io, op.wbio.bio),
+                       BIOSET_NEED_BVECS) ||
+           bioset_init(&c->dio_read_bioset,
+                       4, offsetof(struct dio_read, rbio.bio),
+                       BIOSET_NEED_BVECS) ||
+           bioset_init(&c->dio_write_bioset,
+                       4, offsetof(struct dio_write, op.wbio.bio),
+                       BIOSET_NEED_BVECS))
+               ret = -ENOMEM;
+
+       pr_verbose_init(c->opts, "ret %i", ret);
+       return ret;
+}
+
+#endif /* NO_BCACHEFS_FS */
diff --git a/libbcachefs/fs-io.h b/libbcachefs/fs-io.h
new file mode 100644 (file)
index 0000000..7063556
--- /dev/null
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FS_IO_H
+#define _BCACHEFS_FS_IO_H
+
+#ifndef NO_BCACHEFS_FS
+
+#include "buckets.h"
+#include "io_types.h"
+
+#include <linux/uio.h>
+
+struct quota_res;
+
+int __must_check bch2_write_inode_size(struct bch_fs *,
+                                      struct bch_inode_info *,
+                                      loff_t, unsigned);
+
+int bch2_writepage(struct page *, struct writeback_control *);
+int bch2_readpage(struct file *, struct page *);
+
+int bch2_writepages(struct address_space *, struct writeback_control *);
+int bch2_readpages(struct file *, struct address_space *,
+                  struct list_head *, unsigned);
+
+int bch2_write_begin(struct file *, struct address_space *, loff_t,
+                    unsigned, unsigned, struct page **, void **);
+int bch2_write_end(struct file *, struct address_space *, loff_t,
+                  unsigned, unsigned, struct page *, void *);
+
+ssize_t bch2_read_iter(struct kiocb *, struct iov_iter *);
+ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *);
+
+int bch2_fsync(struct file *, loff_t, loff_t, int);
+
+int bch2_truncate(struct bch_inode_info *, struct iattr *);
+long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
+
+loff_t bch2_remap_file_range(struct file *, loff_t, struct file *,
+                            loff_t, loff_t, unsigned);
+
+loff_t bch2_llseek(struct file *, loff_t, int);
+
+vm_fault_t bch2_page_fault(struct vm_fault *);
+vm_fault_t bch2_page_mkwrite(struct vm_fault *);
+void bch2_invalidatepage(struct page *, unsigned int, unsigned int);
+int bch2_releasepage(struct page *, gfp_t);
+int bch2_migrate_page(struct address_space *, struct page *,
+                     struct page *, enum migrate_mode);
+
+void bch2_fs_fsio_exit(struct bch_fs *);
+int bch2_fs_fsio_init(struct bch_fs *);
+#else
+static inline void bch2_fs_fsio_exit(struct bch_fs *c) {}
+static inline int bch2_fs_fsio_init(struct bch_fs *c) { return 0; }
+#endif
+
+#endif /* _BCACHEFS_FS_IO_H */
diff --git a/libbcachefs/fs-ioctl.c b/libbcachefs/fs-ioctl.c
new file mode 100644 (file)
index 0000000..0873d2f
--- /dev/null
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_FS
+
+#include "bcachefs.h"
+#include "chardev.h"
+#include "dirent.h"
+#include "fs.h"
+#include "fs-common.h"
+#include "fs-ioctl.h"
+#include "quota.h"
+
+#include <linux/compat.h>
+#include <linux/mount.h>
+
+#define FS_IOC_GOINGDOWN            _IOR('X', 125, __u32)
+
+struct flags_set {
+       unsigned                mask;
+       unsigned                flags;
+
+       unsigned                projid;
+};
+
+static int bch2_inode_flags_set(struct bch_inode_info *inode,
+                               struct bch_inode_unpacked *bi,
+                               void *p)
+{
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       /*
+        * We're relying on btree locking here for exclusion with other ioctl
+        * calls - use the flags in the btree (@bi), not inode->i_flags:
+        */
+       struct flags_set *s = p;
+       unsigned newflags = s->flags;
+       unsigned oldflags = bi->bi_flags & s->mask;
+
+       if (((newflags ^ oldflags) & (BCH_INODE_APPEND|BCH_INODE_IMMUTABLE)) &&
+           !capable(CAP_LINUX_IMMUTABLE))
+               return -EPERM;
+
+       if (!S_ISREG(bi->bi_mode) &&
+           !S_ISDIR(bi->bi_mode) &&
+           (newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags)
+               return -EINVAL;
+
+       bi->bi_flags &= ~s->mask;
+       bi->bi_flags |= newflags;
+
+       bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
+       return 0;
+}
+
+static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg)
+{
+       unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags);
+
+       return put_user(flags, arg);
+}
+
+static int bch2_ioc_setflags(struct bch_fs *c,
+                            struct file *file,
+                            struct bch_inode_info *inode,
+                            void __user *arg)
+{
+       struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) };
+       unsigned uflags;
+       int ret;
+
+       if (get_user(uflags, (int __user *) arg))
+               return -EFAULT;
+
+       s.flags = map_flags_rev(bch_flags_to_uflags, uflags);
+       if (uflags)
+               return -EOPNOTSUPP;
+
+       ret = mnt_want_write_file(file);
+       if (ret)
+               return ret;
+
+       inode_lock(&inode->v);
+       if (!inode_owner_or_capable(&inode->v)) {
+               ret = -EACCES;
+               goto setflags_out;
+       }
+
+       mutex_lock(&inode->ei_update_lock);
+       ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
+                              ATTR_CTIME);
+       mutex_unlock(&inode->ei_update_lock);
+
+setflags_out:
+       inode_unlock(&inode->v);
+       mnt_drop_write_file(file);
+       return ret;
+}
+
+static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
+                              struct fsxattr __user *arg)
+{
+       struct fsxattr fa = { 0 };
+
+       fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
+       fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
+
+       return copy_to_user(arg, &fa, sizeof(fa));
+}
+
+static int fssetxattr_inode_update_fn(struct bch_inode_info *inode,
+                                     struct bch_inode_unpacked *bi,
+                                     void *p)
+{
+       struct flags_set *s = p;
+
+       if (s->projid != bi->bi_project) {
+               bi->bi_fields_set |= 1U << Inode_opt_project;
+               bi->bi_project = s->projid;
+       }
+
+       return bch2_inode_flags_set(inode, bi, p);
+}
+
+static int bch2_ioc_fssetxattr(struct bch_fs *c,
+                              struct file *file,
+                              struct bch_inode_info *inode,
+                              struct fsxattr __user *arg)
+{
+       struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) };
+       struct fsxattr fa;
+       int ret;
+
+       if (copy_from_user(&fa, arg, sizeof(fa)))
+               return -EFAULT;
+
+       s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
+       if (fa.fsx_xflags)
+               return -EOPNOTSUPP;
+
+       if (fa.fsx_projid >= U32_MAX)
+               return -EINVAL;
+
+       /*
+        * inode fields accessible via the xattr interface are stored with a +1
+        * bias, so that 0 means unset:
+        */
+       s.projid = fa.fsx_projid + 1;
+
+       ret = mnt_want_write_file(file);
+       if (ret)
+               return ret;
+
+       inode_lock(&inode->v);
+       if (!inode_owner_or_capable(&inode->v)) {
+               ret = -EACCES;
+               goto err;
+       }
+
+       mutex_lock(&inode->ei_update_lock);
+       ret = bch2_set_projid(c, inode, fa.fsx_projid);
+       if (ret)
+               goto err_unlock;
+
+       ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
+                              ATTR_CTIME);
+err_unlock:
+       mutex_unlock(&inode->ei_update_lock);
+err:
+       inode_unlock(&inode->v);
+       mnt_drop_write_file(file);
+       return ret;
+}
+
+static int bch2_reinherit_attrs_fn(struct bch_inode_info *inode,
+                                  struct bch_inode_unpacked *bi,
+                                  void *p)
+{
+       struct bch_inode_info *dir = p;
+
+       return !bch2_reinherit_attrs(bi, &dir->ei_inode);
+}
+
+static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
+                                   struct file *file,
+                                   struct bch_inode_info *src,
+                                   const char __user *name)
+{
+       struct bch_inode_info *dst;
+       struct inode *vinode = NULL;
+       char *kname = NULL;
+       struct qstr qstr;
+       int ret = 0;
+       u64 inum;
+
+       kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL);
+       if (!kname)
+               return -ENOMEM;
+
+       ret = strncpy_from_user(kname, name, BCH_NAME_MAX);
+       if (unlikely(ret < 0))
+               goto err1;
+
+       qstr.len        = ret;
+       qstr.name       = kname;
+
+       ret = -ENOENT;
+       inum = bch2_dirent_lookup(c, src->v.i_ino,
+                                 &src->ei_str_hash,
+                                 &qstr);
+       if (!inum)
+               goto err1;
+
+       vinode = bch2_vfs_inode_get(c, inum);
+       ret = PTR_ERR_OR_ZERO(vinode);
+       if (ret)
+               goto err1;
+
+       dst = to_bch_ei(vinode);
+
+       ret = mnt_want_write_file(file);
+       if (ret)
+               goto err2;
+
+       bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst);
+
+       if (inode_attr_changing(src, dst, Inode_opt_project)) {
+               ret = bch2_fs_quota_transfer(c, dst,
+                                            src->ei_qid,
+                                            1 << QTYP_PRJ,
+                                            KEY_TYPE_QUOTA_PREALLOC);
+               if (ret)
+                       goto err3;
+       }
+
+       ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0);
+err3:
+       bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst);
+
+       /* return true if we did work */
+       if (ret >= 0)
+               ret = !ret;
+
+       mnt_drop_write_file(file);
+err2:
+       iput(vinode);
+err1:
+       kfree(kname);
+
+       return ret;
+}
+
+long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+       struct bch_inode_info *inode = file_bch_inode(file);
+       struct super_block *sb = inode->v.i_sb;
+       struct bch_fs *c = sb->s_fs_info;
+
+       switch (cmd) {
+       case FS_IOC_GETFLAGS:
+               return bch2_ioc_getflags(inode, (int __user *) arg);
+
+       case FS_IOC_SETFLAGS:
+               return bch2_ioc_setflags(c, file, inode, (int __user *) arg);
+
+       case FS_IOC_FSGETXATTR:
+               return bch2_ioc_fsgetxattr(inode, (void __user *) arg);
+       case FS_IOC_FSSETXATTR:
+               return bch2_ioc_fssetxattr(c, file, inode,
+                                          (void __user *) arg);
+
+       case BCHFS_IOC_REINHERIT_ATTRS:
+               return bch2_ioc_reinherit_attrs(c, file, inode,
+                                               (void __user *) arg);
+
+       case FS_IOC_GETVERSION:
+               return -ENOTTY;
+       case FS_IOC_SETVERSION:
+               return -ENOTTY;
+
+       case FS_IOC_GOINGDOWN:
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               down_write(&sb->s_umount);
+               sb->s_flags |= SB_RDONLY;
+               if (bch2_fs_emergency_read_only(c))
+                       bch_err(c, "emergency read only due to ioctl");
+               up_write(&sb->s_umount);
+               return 0;
+
+       default:
+               return bch2_fs_ioctl(c, cmd, (void __user *) arg);
+       }
+}
+
+#ifdef CONFIG_COMPAT
+long bch2_compat_fs_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+       /* These are just misnamed, they actually get/put from/to user an int */
+       switch (cmd) {
+       case FS_IOC_GETFLAGS:
+               cmd = FS_IOC_GETFLAGS;
+               break;
+       case FS_IOC32_SETFLAGS:
+               cmd = FS_IOC_SETFLAGS;
+               break;
+       default:
+               return -ENOIOCTLCMD;
+       }
+       return bch2_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
+}
+#endif
+
+#endif /* NO_BCACHEFS_FS */
diff --git a/libbcachefs/fs-ioctl.h b/libbcachefs/fs-ioctl.h
new file mode 100644 (file)
index 0000000..f201980
--- /dev/null
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FS_IOCTL_H
+#define _BCACHEFS_FS_IOCTL_H
+
+/* Inode flags: */
+
+/* bcachefs inode flags -> vfs inode flags: */
+static const unsigned bch_flags_to_vfs[] = {
+       [__BCH_INODE_SYNC]      = S_SYNC,
+       [__BCH_INODE_IMMUTABLE] = S_IMMUTABLE,
+       [__BCH_INODE_APPEND]    = S_APPEND,
+       [__BCH_INODE_NOATIME]   = S_NOATIME,
+};
+
+/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
+static const unsigned bch_flags_to_uflags[] = {
+       [__BCH_INODE_SYNC]      = FS_SYNC_FL,
+       [__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL,
+       [__BCH_INODE_APPEND]    = FS_APPEND_FL,
+       [__BCH_INODE_NODUMP]    = FS_NODUMP_FL,
+       [__BCH_INODE_NOATIME]   = FS_NOATIME_FL,
+};
+
+/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
+static const unsigned bch_flags_to_xflags[] = {
+       [__BCH_INODE_SYNC]      = FS_XFLAG_SYNC,
+       [__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE,
+       [__BCH_INODE_APPEND]    = FS_XFLAG_APPEND,
+       [__BCH_INODE_NODUMP]    = FS_XFLAG_NODUMP,
+       [__BCH_INODE_NOATIME]   = FS_XFLAG_NOATIME,
+       //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
+};
+
+#define set_flags(_map, _in, _out)                                     \
+do {                                                                   \
+       unsigned _i;                                                    \
+                                                                       \
+       for (_i = 0; _i < ARRAY_SIZE(_map); _i++)                       \
+               if ((_in) & (1 << _i))                                  \
+                       (_out) |= _map[_i];                             \
+               else                                                    \
+                       (_out) &= ~_map[_i];                            \
+} while (0)
+
+#define map_flags(_map, _in)                                           \
+({                                                                     \
+       unsigned _out = 0;                                              \
+                                                                       \
+       set_flags(_map, _in, _out);                                     \
+       _out;                                                           \
+})
+
+#define map_flags_rev(_map, _in)                                       \
+({                                                                     \
+       unsigned _i, _out = 0;                                          \
+                                                                       \
+       for (_i = 0; _i < ARRAY_SIZE(_map); _i++)                       \
+               if ((_in) & _map[_i]) {                                 \
+                       (_out) |= 1 << _i;                              \
+                       (_in) &= ~_map[_i];                             \
+               }                                                       \
+       (_out);                                                         \
+})
+
+#define map_defined(_map)                                              \
+({                                                                     \
+       unsigned _in = ~0;                                              \
+                                                                       \
+       map_flags_rev(_map, _in);                                       \
+})
+
+/* Set VFS inode flags from bcachefs inode: */
+static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
+{
+       set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
+}
+
+long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long);
+long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long);
+
+#endif /* _BCACHEFS_FS_IOCTL_H */
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
new file mode 100644 (file)
index 0000000..6a9820e
--- /dev/null
@@ -0,0 +1,1614 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_FS
+
+#include "bcachefs.h"
+#include "acl.h"
+#include "bkey_on_stack.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "chardev.h"
+#include "dirent.h"
+#include "extents.h"
+#include "fs.h"
+#include "fs-common.h"
+#include "fs-io.h"
+#include "fs-ioctl.h"
+#include "fsck.h"
+#include "inode.h"
+#include "io.h"
+#include "journal.h"
+#include "keylist.h"
+#include "quota.h"
+#include "super.h"
+#include "xattr.h"
+
+#include <linux/aio.h>
+#include <linux/backing-dev.h>
+#include <linux/exportfs.h>
+#include <linux/fiemap.h>
+#include <linux/module.h>
+#include <linux/posix_acl.h>
+#include <linux/random.h>
+#include <linux/statfs.h>
+#include <linux/xattr.h>
+
+static struct kmem_cache *bch2_inode_cache;
+
+static void bch2_vfs_inode_init(struct bch_fs *,
+                               struct bch_inode_info *,
+                               struct bch_inode_unpacked *);
+
+static void journal_seq_copy(struct bch_fs *c,
+                            struct bch_inode_info *dst,
+                            u64 journal_seq)
+{
+       u64 old, v = READ_ONCE(dst->ei_journal_seq);
+
+       do {
+               old = v;
+
+               if (old >= journal_seq)
+                       break;
+       } while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old);
+
+       bch2_journal_set_has_inum(&c->journal, dst->v.i_ino, journal_seq);
+}
+
+static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
+{
+       BUG_ON(atomic_long_read(&lock->v) == 0);
+
+       if (atomic_long_sub_return_release(i, &lock->v) == 0)
+               wake_up_all(&lock->wait);
+}
+
+static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i)
+{
+       long v = atomic_long_read(&lock->v), old;
+
+       do {
+               old = v;
+
+               if (i > 0 ? v < 0 : v > 0)
+                       return false;
+       } while ((v = atomic_long_cmpxchg_acquire(&lock->v,
+                                       old, old + i)) != old);
+       return true;
+}
+
+static void __pagecache_lock_get(struct pagecache_lock *lock, long i)
+{
+       wait_event(lock->wait, __pagecache_lock_tryget(lock, i));
+}
+
+void bch2_pagecache_add_put(struct pagecache_lock *lock)
+{
+       __pagecache_lock_put(lock, 1);
+}
+
+void bch2_pagecache_add_get(struct pagecache_lock *lock)
+{
+       __pagecache_lock_get(lock, 1);
+}
+
+void bch2_pagecache_block_put(struct pagecache_lock *lock)
+{
+       __pagecache_lock_put(lock, -1);
+}
+
+void bch2_pagecache_block_get(struct pagecache_lock *lock)
+{
+       __pagecache_lock_get(lock, -1);
+}
+
+void bch2_inode_update_after_write(struct bch_fs *c,
+                                  struct bch_inode_info *inode,
+                                  struct bch_inode_unpacked *bi,
+                                  unsigned fields)
+{
+       set_nlink(&inode->v, bch2_inode_nlink_get(bi));
+       i_uid_write(&inode->v, bi->bi_uid);
+       i_gid_write(&inode->v, bi->bi_gid);
+       inode->v.i_mode = bi->bi_mode;
+
+       if (fields & ATTR_ATIME)
+               inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime);
+       if (fields & ATTR_MTIME)
+               inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime);
+       if (fields & ATTR_CTIME)
+               inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
+
+       inode->ei_inode         = *bi;
+
+       bch2_inode_flags_to_vfs(inode);
+}
+
+int __must_check bch2_write_inode(struct bch_fs *c,
+                                 struct bch_inode_info *inode,
+                                 inode_set_fn set,
+                                 void *p, unsigned fields)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bch_inode_unpacked inode_u;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+retry:
+       bch2_trans_begin(&trans);
+
+       iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
+                              BTREE_ITER_INTENT);
+       ret   = PTR_ERR_OR_ZERO(iter) ?:
+               (set ? set(inode, &inode_u, p) : 0) ?:
+               bch2_inode_write(&trans, iter, &inode_u) ?:
+               bch2_trans_commit(&trans, NULL,
+                                 &inode->ei_journal_seq,
+                                 BTREE_INSERT_NOUNLOCK|
+                                 BTREE_INSERT_NOFAIL);
+
+       /*
+        * the btree node lock protects inode->ei_inode, not ei_update_lock;
+        * this is important for inode updates via bchfs_write_index_update
+        */
+       if (!ret)
+               bch2_inode_update_after_write(c, inode, &inode_u, fields);
+
+       bch2_trans_iter_put(&trans, iter);
+
+       if (ret == -EINTR)
+               goto retry;
+
+       bch2_trans_exit(&trans);
+       return ret < 0 ? ret : 0;
+}
+
+int bch2_fs_quota_transfer(struct bch_fs *c,
+                          struct bch_inode_info *inode,
+                          struct bch_qid new_qid,
+                          unsigned qtypes,
+                          enum quota_acct_mode mode)
+{
+       unsigned i;
+       int ret;
+
+       qtypes &= enabled_qtypes(c);
+
+       for (i = 0; i < QTYP_NR; i++)
+               if (new_qid.q[i] == inode->ei_qid.q[i])
+                       qtypes &= ~(1U << i);
+
+       if (!qtypes)
+               return 0;
+
+       mutex_lock(&inode->ei_quota_lock);
+
+       ret = bch2_quota_transfer(c, qtypes, new_qid,
+                                 inode->ei_qid,
+                                 inode->v.i_blocks +
+                                 inode->ei_quota_reserved,
+                                 mode);
+       if (!ret)
+               for (i = 0; i < QTYP_NR; i++)
+                       if (qtypes & (1 << i))
+                               inode->ei_qid.q[i] = new_qid.q[i];
+
+       mutex_unlock(&inode->ei_quota_lock);
+
+       return ret;
+}
+
+struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum)
+{
+       struct bch_inode_unpacked inode_u;
+       struct bch_inode_info *inode;
+       int ret;
+
+       inode = to_bch_ei(iget_locked(c->vfs_sb, inum));
+       if (unlikely(!inode))
+               return ERR_PTR(-ENOMEM);
+       if (!(inode->v.i_state & I_NEW))
+               return &inode->v;
+
+       ret = bch2_inode_find_by_inum(c, inum, &inode_u);
+       if (ret) {
+               iget_failed(&inode->v);
+               return ERR_PTR(ret);
+       }
+
+       bch2_vfs_inode_init(c, inode, &inode_u);
+
+       inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum);
+
+       unlock_new_inode(&inode->v);
+
+       return &inode->v;
+}
+
+static struct bch_inode_info *
+__bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
+             umode_t mode, dev_t rdev, bool tmpfile)
+{
+       struct bch_fs *c = dir->v.i_sb->s_fs_info;
+       struct user_namespace *ns = dir->v.i_sb->s_user_ns;
+       struct btree_trans trans;
+       struct bch_inode_unpacked dir_u;
+       struct bch_inode_info *inode, *old;
+       struct bch_inode_unpacked inode_u;
+       struct posix_acl *default_acl = NULL, *acl = NULL;
+       u64 journal_seq = 0;
+       int ret;
+
+       /*
+        * preallocate acls + vfs inode before btree transaction, so that
+        * nothing can fail after the transaction succeeds:
+        */
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+       ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
+       if (ret)
+               return ERR_PTR(ret);
+#endif
+       inode = to_bch_ei(new_inode(c->vfs_sb));
+       if (unlikely(!inode)) {
+               inode = ERR_PTR(-ENOMEM);
+               goto err;
+       }
+
+       bch2_inode_init_early(c, &inode_u);
+
+       if (!tmpfile)
+               mutex_lock(&dir->ei_update_lock);
+
+       bch2_trans_init(&trans, c, 8, 1024);
+retry:
+       bch2_trans_begin(&trans);
+
+       ret   = bch2_create_trans(&trans, dir->v.i_ino, &dir_u, &inode_u,
+                                 !tmpfile ? &dentry->d_name : NULL,
+                                 from_kuid(ns, current_fsuid()),
+                                 from_kgid(ns, current_fsgid()),
+                                 mode, rdev,
+                                 default_acl, acl) ?:
+               bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
+                               KEY_TYPE_QUOTA_PREALLOC);
+       if (unlikely(ret))
+               goto err_before_quota;
+
+       ret   = bch2_trans_commit(&trans, NULL, &journal_seq,
+                                 BTREE_INSERT_NOUNLOCK);
+       if (unlikely(ret)) {
+               bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
+                               KEY_TYPE_QUOTA_WARN);
+err_before_quota:
+               if (ret == -EINTR)
+                       goto retry;
+               goto err_trans;
+       }
+
+       if (!tmpfile) {
+               bch2_inode_update_after_write(c, dir, &dir_u,
+                                             ATTR_MTIME|ATTR_CTIME);
+               journal_seq_copy(c, dir, journal_seq);
+               mutex_unlock(&dir->ei_update_lock);
+       }
+
+       bch2_vfs_inode_init(c, inode, &inode_u);
+       journal_seq_copy(c, inode, journal_seq);
+
+       set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
+       set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
+
+       /*
+        * we must insert the new inode into the inode cache before calling
+        * bch2_trans_exit() and dropping locks, else we could race with another
+        * thread pulling the inode in and modifying it:
+        */
+
+       old = to_bch_ei(insert_inode_locked2(&inode->v));
+       if (unlikely(old)) {
+               /*
+                * We raced, another process pulled the new inode into cache
+                * before us:
+                */
+               journal_seq_copy(c, old, journal_seq);
+               make_bad_inode(&inode->v);
+               iput(&inode->v);
+
+               inode = old;
+       } else {
+               /*
+                * we really don't want insert_inode_locked2() to be setting
+                * I_NEW...
+                */
+               unlock_new_inode(&inode->v);
+       }
+
+       bch2_trans_exit(&trans);
+err:
+       posix_acl_release(default_acl);
+       posix_acl_release(acl);
+       return inode;
+err_trans:
+       if (!tmpfile)
+               mutex_unlock(&dir->ei_update_lock);
+
+       bch2_trans_exit(&trans);
+       make_bad_inode(&inode->v);
+       iput(&inode->v);
+       inode = ERR_PTR(ret);
+       goto err;
+}
+
+/* methods */
+
+static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
+                                 unsigned int flags)
+{
+       struct bch_fs *c = vdir->i_sb->s_fs_info;
+       struct bch_inode_info *dir = to_bch_ei(vdir);
+       struct inode *vinode = NULL;
+       u64 inum;
+
+       inum = bch2_dirent_lookup(c, dir->v.i_ino,
+                                 &dir->ei_str_hash,
+                                 &dentry->d_name);
+
+       if (inum)
+               vinode = bch2_vfs_inode_get(c, inum);
+
+       return d_splice_alias(vinode, dentry);
+}
+
+static int bch2_mknod(struct inode *vdir, struct dentry *dentry,
+                     umode_t mode, dev_t rdev)
+{
+       struct bch_inode_info *inode =
+               __bch2_create(to_bch_ei(vdir), dentry, mode, rdev, false);
+
+       if (IS_ERR(inode))
+               return PTR_ERR(inode);
+
+       d_instantiate(dentry, &inode->v);
+       return 0;
+}
+
+static int bch2_create(struct inode *vdir, struct dentry *dentry,
+                      umode_t mode, bool excl)
+{
+       return bch2_mknod(vdir, dentry, mode|S_IFREG, 0);
+}
+
+static int __bch2_link(struct bch_fs *c,
+                      struct bch_inode_info *inode,
+                      struct bch_inode_info *dir,
+                      struct dentry *dentry)
+{
+       struct btree_trans trans;
+       struct bch_inode_unpacked dir_u, inode_u;
+       int ret;
+
+       mutex_lock(&inode->ei_update_lock);
+       bch2_trans_init(&trans, c, 4, 1024);
+
+       do {
+               bch2_trans_begin(&trans);
+               ret   = bch2_link_trans(&trans,
+                                       dir->v.i_ino,
+                                       inode->v.i_ino, &dir_u, &inode_u,
+                                       &dentry->d_name) ?:
+                       bch2_trans_commit(&trans, NULL,
+                                       &inode->ei_journal_seq,
+                                       BTREE_INSERT_NOUNLOCK);
+       } while (ret == -EINTR);
+
+       if (likely(!ret)) {
+               BUG_ON(inode_u.bi_inum != inode->v.i_ino);
+
+               journal_seq_copy(c, inode, dir->ei_journal_seq);
+               bch2_inode_update_after_write(c, dir, &dir_u,
+                                             ATTR_MTIME|ATTR_CTIME);
+               bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME);
+       }
+
+       bch2_trans_exit(&trans);
+       mutex_unlock(&inode->ei_update_lock);
+       return ret;
+}
+
+static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
+                    struct dentry *dentry)
+{
+       struct bch_fs *c = vdir->i_sb->s_fs_info;
+       struct bch_inode_info *dir = to_bch_ei(vdir);
+       struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
+       int ret;
+
+       lockdep_assert_held(&inode->v.i_rwsem);
+
+       ret = __bch2_link(c, inode, dir, dentry);
+       if (unlikely(ret))
+               return ret;
+
+       ihold(&inode->v);
+       d_instantiate(dentry, &inode->v);
+       return 0;
+}
+
+static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
+{
+       struct bch_fs *c = vdir->i_sb->s_fs_info;
+       struct bch_inode_info *dir = to_bch_ei(vdir);
+       struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
+       struct bch_inode_unpacked dir_u, inode_u;
+       struct btree_trans trans;
+       int ret;
+
+       bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
+       bch2_trans_init(&trans, c, 4, 1024);
+
+       do {
+               bch2_trans_begin(&trans);
+
+               ret   = bch2_unlink_trans(&trans,
+                                         dir->v.i_ino, &dir_u,
+                                         &inode_u, &dentry->d_name) ?:
+                       bch2_trans_commit(&trans, NULL,
+                                         &dir->ei_journal_seq,
+                                         BTREE_INSERT_NOUNLOCK|
+                                         BTREE_INSERT_NOFAIL);
+       } while (ret == -EINTR);
+
+       if (likely(!ret)) {
+               BUG_ON(inode_u.bi_inum != inode->v.i_ino);
+
+               journal_seq_copy(c, inode, dir->ei_journal_seq);
+               bch2_inode_update_after_write(c, dir, &dir_u,
+                                             ATTR_MTIME|ATTR_CTIME);
+               bch2_inode_update_after_write(c, inode, &inode_u,
+                                             ATTR_MTIME);
+       }
+
+       bch2_trans_exit(&trans);
+       bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
+
+       return ret;
+}
+
+static int bch2_symlink(struct inode *vdir, struct dentry *dentry,
+                       const char *symname)
+{
+       struct bch_fs *c = vdir->i_sb->s_fs_info;
+       struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
+       int ret;
+
+       inode = __bch2_create(dir, dentry, S_IFLNK|S_IRWXUGO, 0, true);
+       if (unlikely(IS_ERR(inode)))
+               return PTR_ERR(inode);
+
+       inode_lock(&inode->v);
+       ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
+       inode_unlock(&inode->v);
+
+       if (unlikely(ret))
+               goto err;
+
+       ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
+       if (unlikely(ret))
+               goto err;
+
+       journal_seq_copy(c, dir, inode->ei_journal_seq);
+
+       ret = __bch2_link(c, inode, dir, dentry);
+       if (unlikely(ret))
+               goto err;
+
+       d_instantiate(dentry, &inode->v);
+       return 0;
+err:
+       iput(&inode->v);
+       return ret;
+}
+
+static int bch2_mkdir(struct inode *vdir, struct dentry *dentry, umode_t mode)
+{
+       return bch2_mknod(vdir, dentry, mode|S_IFDIR, 0);
+}
+
+static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry,
+                       struct inode *dst_vdir, struct dentry *dst_dentry,
+                       unsigned flags)
+{
+       struct bch_fs *c = src_vdir->i_sb->s_fs_info;
+       struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
+       struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
+       struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
+       struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
+       struct bch_inode_unpacked dst_dir_u, src_dir_u;
+       struct bch_inode_unpacked src_inode_u, dst_inode_u;
+       struct btree_trans trans;
+       enum bch_rename_mode mode = flags & RENAME_EXCHANGE
+               ? BCH_RENAME_EXCHANGE
+               : dst_dentry->d_inode
+               ? BCH_RENAME_OVERWRITE : BCH_RENAME;
+       u64 journal_seq = 0;
+       int ret;
+
+       if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
+               return -EINVAL;
+
+       if (mode == BCH_RENAME_OVERWRITE) {
+               ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
+                                                  0, LLONG_MAX);
+               if (ret)
+                       return ret;
+       }
+
+       bch2_trans_init(&trans, c, 8, 2048);
+
+       bch2_lock_inodes(INODE_UPDATE_LOCK,
+                        src_dir,
+                        dst_dir,
+                        src_inode,
+                        dst_inode);
+
+       if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
+               ret = bch2_fs_quota_transfer(c, src_inode,
+                                            dst_dir->ei_qid,
+                                            1 << QTYP_PRJ,
+                                            KEY_TYPE_QUOTA_PREALLOC);
+               if (ret)
+                       goto err;
+       }
+
+       if (mode == BCH_RENAME_EXCHANGE &&
+           inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
+               ret = bch2_fs_quota_transfer(c, dst_inode,
+                                            src_dir->ei_qid,
+                                            1 << QTYP_PRJ,
+                                            KEY_TYPE_QUOTA_PREALLOC);
+               if (ret)
+                       goto err;
+       }
+
+retry:
+       bch2_trans_begin(&trans);
+       ret   = bch2_rename_trans(&trans,
+                                 src_dir->v.i_ino, &src_dir_u,
+                                 dst_dir->v.i_ino, &dst_dir_u,
+                                 &src_inode_u,
+                                 &dst_inode_u,
+                                 &src_dentry->d_name,
+                                 &dst_dentry->d_name,
+                                 mode) ?:
+               bch2_trans_commit(&trans, NULL,
+                                 &journal_seq,
+                                 BTREE_INSERT_NOUNLOCK);
+       if (ret == -EINTR)
+               goto retry;
+       if (unlikely(ret))
+               goto err;
+
+       BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
+       BUG_ON(dst_inode &&
+              dst_inode->v.i_ino != dst_inode_u.bi_inum);
+
+       bch2_inode_update_after_write(c, src_dir, &src_dir_u,
+                                     ATTR_MTIME|ATTR_CTIME);
+       journal_seq_copy(c, src_dir, journal_seq);
+
+       if (src_dir != dst_dir) {
+               bch2_inode_update_after_write(c, dst_dir, &dst_dir_u,
+                                             ATTR_MTIME|ATTR_CTIME);
+               journal_seq_copy(c, dst_dir, journal_seq);
+       }
+
+       bch2_inode_update_after_write(c, src_inode, &src_inode_u,
+                                     ATTR_CTIME);
+       journal_seq_copy(c, src_inode, journal_seq);
+
+       if (dst_inode) {
+               bch2_inode_update_after_write(c, dst_inode, &dst_inode_u,
+                                             ATTR_CTIME);
+               journal_seq_copy(c, dst_inode, journal_seq);
+       }
+err:
+       bch2_trans_exit(&trans);
+
+       bch2_fs_quota_transfer(c, src_inode,
+                              bch_qid(&src_inode->ei_inode),
+                              1 << QTYP_PRJ,
+                              KEY_TYPE_QUOTA_NOCHECK);
+       if (dst_inode)
+               bch2_fs_quota_transfer(c, dst_inode,
+                                      bch_qid(&dst_inode->ei_inode),
+                                      1 << QTYP_PRJ,
+                                      KEY_TYPE_QUOTA_NOCHECK);
+
+       bch2_unlock_inodes(INODE_UPDATE_LOCK,
+                          src_dir,
+                          dst_dir,
+                          src_inode,
+                          dst_inode);
+
+       return ret;
+}
+
+void bch2_setattr_copy(struct bch_inode_info *inode,
+                      struct bch_inode_unpacked *bi,
+                      struct iattr *attr)
+{
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       unsigned int ia_valid = attr->ia_valid;
+
+       if (ia_valid & ATTR_UID)
+               bi->bi_uid = from_kuid(c->vfs_sb->s_user_ns, attr->ia_uid);
+       if (ia_valid & ATTR_GID)
+               bi->bi_gid = from_kgid(c->vfs_sb->s_user_ns, attr->ia_gid);
+
+       if (ia_valid & ATTR_ATIME)
+               bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
+       if (ia_valid & ATTR_MTIME)
+               bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
+       if (ia_valid & ATTR_CTIME)
+               bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
+
+       if (ia_valid & ATTR_MODE) {
+               umode_t mode = attr->ia_mode;
+               kgid_t gid = ia_valid & ATTR_GID
+                       ? attr->ia_gid
+                       : inode->v.i_gid;
+
+               if (!in_group_p(gid) &&
+                   !capable_wrt_inode_uidgid(&inode->v, CAP_FSETID))
+                       mode &= ~S_ISGID;
+               bi->bi_mode = mode;
+       }
+}
+
+static int bch2_setattr_nonsize(struct bch_inode_info *inode,
+                               struct iattr *attr)
+{
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct bch_qid qid;
+       struct btree_trans trans;
+       struct btree_iter *inode_iter;
+       struct bch_inode_unpacked inode_u;
+       struct posix_acl *acl = NULL;
+       int ret;
+
+       mutex_lock(&inode->ei_update_lock);
+
+       qid = inode->ei_qid;
+
+       if (attr->ia_valid & ATTR_UID)
+               qid.q[QTYP_USR] = from_kuid(&init_user_ns, attr->ia_uid);
+
+       if (attr->ia_valid & ATTR_GID)
+               qid.q[QTYP_GRP] = from_kgid(&init_user_ns, attr->ia_gid);
+
+       ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
+                                    KEY_TYPE_QUOTA_PREALLOC);
+       if (ret)
+               goto err;
+
+       bch2_trans_init(&trans, c, 0, 0);
+retry:
+       bch2_trans_begin(&trans);
+       kfree(acl);
+       acl = NULL;
+
+       inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino,
+                                    BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(inode_iter);
+       if (ret)
+               goto btree_err;
+
+       bch2_setattr_copy(inode, &inode_u, attr);
+
+       if (attr->ia_valid & ATTR_MODE) {
+               ret = bch2_acl_chmod(&trans, inode, inode_u.bi_mode, &acl);
+               if (ret)
+                       goto btree_err;
+       }
+
+       ret =   bch2_inode_write(&trans, inode_iter, &inode_u) ?:
+               bch2_trans_commit(&trans, NULL,
+                                 &inode->ei_journal_seq,
+                                 BTREE_INSERT_NOUNLOCK|
+                                 BTREE_INSERT_NOFAIL);
+btree_err:
+       if (ret == -EINTR)
+               goto retry;
+       if (unlikely(ret))
+               goto err_trans;
+
+       bch2_inode_update_after_write(c, inode, &inode_u, attr->ia_valid);
+
+       if (acl)
+               set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
+err_trans:
+       bch2_trans_exit(&trans);
+err:
+       mutex_unlock(&inode->ei_update_lock);
+
+       return ret;
+}
+
+static int bch2_getattr(const struct path *path, struct kstat *stat,
+                       u32 request_mask, unsigned query_flags)
+{
+       struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+       stat->dev       = inode->v.i_sb->s_dev;
+       stat->ino       = inode->v.i_ino;
+       stat->mode      = inode->v.i_mode;
+       stat->nlink     = inode->v.i_nlink;
+       stat->uid       = inode->v.i_uid;
+       stat->gid       = inode->v.i_gid;
+       stat->rdev      = inode->v.i_rdev;
+       stat->size      = i_size_read(&inode->v);
+       stat->atime     = inode->v.i_atime;
+       stat->mtime     = inode->v.i_mtime;
+       stat->ctime     = inode->v.i_ctime;
+       stat->blksize   = block_bytes(c);
+       stat->blocks    = inode->v.i_blocks;
+
+       if (request_mask & STATX_BTIME) {
+               stat->result_mask |= STATX_BTIME;
+               stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
+       }
+
+       if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE)
+               stat->attributes |= STATX_ATTR_IMMUTABLE;
+       stat->attributes_mask    |= STATX_ATTR_IMMUTABLE;
+
+       if (inode->ei_inode.bi_flags & BCH_INODE_APPEND)
+               stat->attributes |= STATX_ATTR_APPEND;
+       stat->attributes_mask    |= STATX_ATTR_APPEND;
+
+       if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP)
+               stat->attributes |= STATX_ATTR_NODUMP;
+       stat->attributes_mask    |= STATX_ATTR_NODUMP;
+
+       return 0;
+}
+
+static int bch2_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+       struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
+       int ret;
+
+       lockdep_assert_held(&inode->v.i_rwsem);
+
+       ret = setattr_prepare(dentry, iattr);
+       if (ret)
+               return ret;
+
+       return iattr->ia_valid & ATTR_SIZE
+               ? bch2_truncate(inode, iattr)
+               : bch2_setattr_nonsize(inode, iattr);
+}
+
+static int bch2_tmpfile(struct inode *vdir, struct dentry *dentry, umode_t mode)
+{
+       struct bch_inode_info *inode =
+               __bch2_create(to_bch_ei(vdir), dentry, mode, 0, true);
+
+       if (IS_ERR(inode))
+               return PTR_ERR(inode);
+
+       d_mark_tmpfile(dentry, &inode->v);
+       d_instantiate(dentry, &inode->v);
+       return 0;
+}
+
+static int bch2_fill_extent(struct bch_fs *c,
+                           struct fiemap_extent_info *info,
+                           struct bkey_s_c k, unsigned flags)
+{
+       if (bkey_extent_is_data(k.k)) {
+               struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+               const union bch_extent_entry *entry;
+               struct extent_ptr_decoded p;
+               int ret;
+
+               if (k.k->type == KEY_TYPE_reflink_v)
+                       flags |= FIEMAP_EXTENT_SHARED;
+
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+                       int flags2 = 0;
+                       u64 offset = p.ptr.offset;
+
+                       if (p.crc.compression_type)
+                               flags2 |= FIEMAP_EXTENT_ENCODED;
+                       else
+                               offset += p.crc.offset;
+
+                       if ((offset & (c->opts.block_size - 1)) ||
+                           (k.k->size & (c->opts.block_size - 1)))
+                               flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
+
+                       ret = fiemap_fill_next_extent(info,
+                                               bkey_start_offset(k.k) << 9,
+                                               offset << 9,
+                                               k.k->size << 9, flags|flags2);
+                       if (ret)
+                               return ret;
+               }
+
+               return 0;
+       } else if (k.k->type == KEY_TYPE_reservation) {
+               return fiemap_fill_next_extent(info,
+                                              bkey_start_offset(k.k) << 9,
+                                              0, k.k->size << 9,
+                                              flags|
+                                              FIEMAP_EXTENT_DELALLOC|
+                                              FIEMAP_EXTENT_UNWRITTEN);
+       } else {
+               BUG();
+       }
+}
+
+static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
+                      u64 start, u64 len)
+{
+       struct bch_fs *c = vinode->i_sb->s_fs_info;
+       struct bch_inode_info *ei = to_bch_ei(vinode);
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_on_stack cur, prev;
+       struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
+       unsigned offset_into_extent, sectors;
+       bool have_extent = false;
+       int ret = 0;
+
+       ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
+       if (ret)
+               return ret;
+
+       if (start + len < start)
+               return -EINVAL;
+
+       bkey_on_stack_init(&cur);
+       bkey_on_stack_init(&prev);
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+                                  POS(ei->v.i_ino, start >> 9), 0);
+retry:
+       while ((k = bch2_btree_iter_peek(iter)).k &&
+              !(ret = bkey_err(k)) &&
+              bkey_cmp(iter->pos, end) < 0) {
+               if (!bkey_extent_is_data(k.k) &&
+                   k.k->type != KEY_TYPE_reservation) {
+                       bch2_btree_iter_next(iter);
+                       continue;
+               }
+
+               bkey_on_stack_realloc(&cur, c, k.k->u64s);
+               bkey_on_stack_realloc(&prev, c, k.k->u64s);
+               bkey_reassemble(cur.k, k);
+               k = bkey_i_to_s_c(cur.k);
+
+               offset_into_extent      = iter->pos.offset -
+                       bkey_start_offset(k.k);
+               sectors                 = k.k->size - offset_into_extent;
+
+               ret = bch2_read_indirect_extent(&trans,
+                                       &offset_into_extent, &cur);
+               if (ret)
+                       break;
+
+               sectors = min(sectors, k.k->size - offset_into_extent);
+
+               if (offset_into_extent)
+                       bch2_cut_front(POS(k.k->p.inode,
+                                          bkey_start_offset(k.k) +
+                                          offset_into_extent),
+                                      cur.k);
+               bch2_key_resize(&cur.k->k, sectors);
+               cur.k->k.p = iter->pos;
+               cur.k->k.p.offset += cur.k->k.size;
+
+               if (have_extent) {
+                       ret = bch2_fill_extent(c, info,
+                                       bkey_i_to_s_c(prev.k), 0);
+                       if (ret)
+                               break;
+               }
+
+               bkey_copy(prev.k, cur.k);
+               have_extent = true;
+
+               if (k.k->type == KEY_TYPE_reflink_v)
+                       bch2_btree_iter_set_pos(iter, k.k->p);
+               else
+                       bch2_btree_iter_next(iter);
+       }
+
+       if (ret == -EINTR)
+               goto retry;
+
+       if (!ret && have_extent)
+               ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
+                                      FIEMAP_EXTENT_LAST);
+
+       ret = bch2_trans_exit(&trans) ?: ret;
+       bkey_on_stack_exit(&cur, c);
+       bkey_on_stack_exit(&prev, c);
+       return ret < 0 ? ret : 0;
+}
+
+static const struct vm_operations_struct bch_vm_ops = {
+       .fault          = bch2_page_fault,
+       .map_pages      = filemap_map_pages,
+       .page_mkwrite   = bch2_page_mkwrite,
+};
+
+static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       file_accessed(file);
+
+       vma->vm_ops = &bch_vm_ops;
+       return 0;
+}
+
+/* Directories: */
+
+static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+       return generic_file_llseek_size(file, offset, whence,
+                                       S64_MAX, S64_MAX);
+}
+
+static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
+{
+       struct bch_inode_info *inode = file_bch_inode(file);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+       if (!dir_emit_dots(file, ctx))
+               return 0;
+
+       return bch2_readdir(c, inode->v.i_ino, ctx);
+}
+
+static const struct file_operations bch_file_operations = {
+       .llseek         = bch2_llseek,
+       .read_iter      = bch2_read_iter,
+       .write_iter     = bch2_write_iter,
+       .mmap           = bch2_mmap,
+       .open           = generic_file_open,
+       .fsync          = bch2_fsync,
+       .splice_read    = generic_file_splice_read,
+       /*
+        * Broken, on v5.3:
+       .splice_write   = iter_file_splice_write,
+       */
+       .fallocate      = bch2_fallocate_dispatch,
+       .unlocked_ioctl = bch2_fs_file_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = bch2_compat_fs_ioctl,
+#endif
+       .remap_file_range = bch2_remap_file_range,
+};
+
+static const struct inode_operations bch_file_inode_operations = {
+       .getattr        = bch2_getattr,
+       .setattr        = bch2_setattr,
+       .fiemap         = bch2_fiemap,
+       .listxattr      = bch2_xattr_list,
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+       .get_acl        = bch2_get_acl,
+       .set_acl        = bch2_set_acl,
+#endif
+};
+
+static const struct inode_operations bch_dir_inode_operations = {
+       .lookup         = bch2_lookup,
+       .create         = bch2_create,
+       .link           = bch2_link,
+       .unlink         = bch2_unlink,
+       .symlink        = bch2_symlink,
+       .mkdir          = bch2_mkdir,
+       .rmdir          = bch2_unlink,
+       .mknod          = bch2_mknod,
+       .rename         = bch2_rename2,
+       .getattr        = bch2_getattr,
+       .setattr        = bch2_setattr,
+       .tmpfile        = bch2_tmpfile,
+       .listxattr      = bch2_xattr_list,
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+       .get_acl        = bch2_get_acl,
+       .set_acl        = bch2_set_acl,
+#endif
+};
+
+static const struct file_operations bch_dir_file_operations = {
+       .llseek         = bch2_dir_llseek,
+       .read           = generic_read_dir,
+       .iterate_shared = bch2_vfs_readdir,
+       .fsync          = bch2_fsync,
+       .unlocked_ioctl = bch2_fs_file_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = bch2_compat_fs_ioctl,
+#endif
+};
+
+static const struct inode_operations bch_symlink_inode_operations = {
+       .get_link       = page_get_link,
+       .getattr        = bch2_getattr,
+       .setattr        = bch2_setattr,
+       .listxattr      = bch2_xattr_list,
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+       .get_acl        = bch2_get_acl,
+       .set_acl        = bch2_set_acl,
+#endif
+};
+
+static const struct inode_operations bch_special_inode_operations = {
+       .getattr        = bch2_getattr,
+       .setattr        = bch2_setattr,
+       .listxattr      = bch2_xattr_list,
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+       .get_acl        = bch2_get_acl,
+       .set_acl        = bch2_set_acl,
+#endif
+};
+
+static const struct address_space_operations bch_address_space_operations = {
+       .writepage      = bch2_writepage,
+       .readpage       = bch2_readpage,
+       .writepages     = bch2_writepages,
+       .readpages      = bch2_readpages,
+       .set_page_dirty = __set_page_dirty_nobuffers,
+       .write_begin    = bch2_write_begin,
+       .write_end      = bch2_write_end,
+       .invalidatepage = bch2_invalidatepage,
+       .releasepage    = bch2_releasepage,
+       .direct_IO      = noop_direct_IO,
+#ifdef CONFIG_MIGRATION
+       .migratepage    = bch2_migrate_page,
+#endif
+       .error_remove_page = generic_error_remove_page,
+};
+
+static struct inode *bch2_nfs_get_inode(struct super_block *sb,
+               u64 ino, u32 generation)
+{
+       struct bch_fs *c = sb->s_fs_info;
+       struct inode *vinode;
+
+       if (ino < BCACHEFS_ROOT_INO)
+               return ERR_PTR(-ESTALE);
+
+       vinode = bch2_vfs_inode_get(c, ino);
+       if (IS_ERR(vinode))
+               return ERR_CAST(vinode);
+       if (generation && vinode->i_generation != generation) {
+               /* we didn't find the right inode.. */
+               iput(vinode);
+               return ERR_PTR(-ESTALE);
+       }
+       return vinode;
+}
+
+static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *fid,
+               int fh_len, int fh_type)
+{
+       return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+                                   bch2_nfs_get_inode);
+}
+
+static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *fid,
+               int fh_len, int fh_type)
+{
+       return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+                                   bch2_nfs_get_inode);
+}
+
+static const struct export_operations bch_export_ops = {
+       .fh_to_dentry   = bch2_fh_to_dentry,
+       .fh_to_parent   = bch2_fh_to_parent,
+       //.get_parent   = bch2_get_parent,
+};
+
+static void bch2_vfs_inode_init(struct bch_fs *c,
+                               struct bch_inode_info *inode,
+                               struct bch_inode_unpacked *bi)
+{
+       bch2_inode_update_after_write(c, inode, bi, ~0);
+
+       inode->v.i_blocks       = bi->bi_sectors;
+       inode->v.i_ino          = bi->bi_inum;
+       inode->v.i_rdev         = bi->bi_dev;
+       inode->v.i_generation   = bi->bi_generation;
+       inode->v.i_size         = bi->bi_size;
+
+       inode->ei_journal_seq   = 0;
+       inode->ei_quota_reserved = 0;
+       inode->ei_str_hash      = bch2_hash_info_init(c, bi);
+       inode->ei_qid           = bch_qid(bi);
+
+       inode->v.i_mapping->a_ops = &bch_address_space_operations;
+
+       switch (inode->v.i_mode & S_IFMT) {
+       case S_IFREG:
+               inode->v.i_op   = &bch_file_inode_operations;
+               inode->v.i_fop  = &bch_file_operations;
+               break;
+       case S_IFDIR:
+               inode->v.i_op   = &bch_dir_inode_operations;
+               inode->v.i_fop  = &bch_dir_file_operations;
+               break;
+       case S_IFLNK:
+               inode_nohighmem(&inode->v);
+               inode->v.i_op   = &bch_symlink_inode_operations;
+               break;
+       default:
+               init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
+               inode->v.i_op   = &bch_special_inode_operations;
+               break;
+       }
+}
+
+static struct inode *bch2_alloc_inode(struct super_block *sb)
+{
+       struct bch_inode_info *inode;
+
+       inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
+       if (!inode)
+               return NULL;
+
+       inode_init_once(&inode->v);
+       mutex_init(&inode->ei_update_lock);
+       pagecache_lock_init(&inode->ei_pagecache_lock);
+       mutex_init(&inode->ei_quota_lock);
+       inode->ei_journal_seq = 0;
+
+       return &inode->v;
+}
+
+static void bch2_i_callback(struct rcu_head *head)
+{
+       struct inode *vinode = container_of(head, struct inode, i_rcu);
+       struct bch_inode_info *inode = to_bch_ei(vinode);
+
+       kmem_cache_free(bch2_inode_cache, inode);
+}
+
+static void bch2_destroy_inode(struct inode *vinode)
+{
+       call_rcu(&vinode->i_rcu, bch2_i_callback);
+}
+
+static int inode_update_times_fn(struct bch_inode_info *inode,
+                                struct bch_inode_unpacked *bi,
+                                void *p)
+{
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+       bi->bi_atime    = timespec_to_bch2_time(c, inode->v.i_atime);
+       bi->bi_mtime    = timespec_to_bch2_time(c, inode->v.i_mtime);
+       bi->bi_ctime    = timespec_to_bch2_time(c, inode->v.i_ctime);
+
+       return 0;
+}
+
+static int bch2_vfs_write_inode(struct inode *vinode,
+                               struct writeback_control *wbc)
+{
+       struct bch_fs *c = vinode->i_sb->s_fs_info;
+       struct bch_inode_info *inode = to_bch_ei(vinode);
+       int ret;
+
+       mutex_lock(&inode->ei_update_lock);
+       ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
+                              ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
+       mutex_unlock(&inode->ei_update_lock);
+
+       return ret;
+}
+
+static void bch2_evict_inode(struct inode *vinode)
+{
+       struct bch_fs *c = vinode->i_sb->s_fs_info;
+       struct bch_inode_info *inode = to_bch_ei(vinode);
+
+       truncate_inode_pages_final(&inode->v.i_data);
+
+       clear_inode(&inode->v);
+
+       BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
+
+       if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
+               bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
+                               KEY_TYPE_QUOTA_WARN);
+               bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
+                               KEY_TYPE_QUOTA_WARN);
+               bch2_inode_rm(c, inode->v.i_ino);
+       }
+}
+
+static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+       struct super_block *sb = dentry->d_sb;
+       struct bch_fs *c = sb->s_fs_info;
+       struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
+       unsigned shift = sb->s_blocksize_bits - 9;
+       u64 fsid;
+
+       buf->f_type     = BCACHEFS_STATFS_MAGIC;
+       buf->f_bsize    = sb->s_blocksize;
+       buf->f_blocks   = usage.capacity >> shift;
+       buf->f_bfree    = (usage.capacity - usage.used) >> shift;
+       buf->f_bavail   = buf->f_bfree;
+       buf->f_files    = 0;
+       buf->f_ffree    = 0;
+
+       fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
+              le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
+       buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
+       buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+       buf->f_namelen  = BCH_NAME_MAX;
+
+       return 0;
+}
+
+static int bch2_sync_fs(struct super_block *sb, int wait)
+{
+       struct bch_fs *c = sb->s_fs_info;
+
+       if (c->opts.journal_flush_disabled)
+               return 0;
+
+       if (!wait) {
+               bch2_journal_flush_async(&c->journal, NULL);
+               return 0;
+       }
+
+       return bch2_journal_flush(&c->journal);
+}
+
+static struct bch_fs *bch2_path_to_fs(const char *dev)
+{
+       struct bch_fs *c;
+       struct block_device *bdev = lookup_bdev(dev);
+
+       if (IS_ERR(bdev))
+               return ERR_CAST(bdev);
+
+       c = bch2_bdev_to_fs(bdev);
+       bdput(bdev);
+       if (c)
+               closure_put(&c->cl);
+       return c ?: ERR_PTR(-ENOENT);
+}
+
+static char **split_devs(const char *_dev_name, unsigned *nr)
+{
+       char *dev_name = NULL, **devs = NULL, *s;
+       size_t i, nr_devs = 0;
+
+       dev_name = kstrdup(_dev_name, GFP_KERNEL);
+       if (!dev_name)
+               return NULL;
+
+       for (s = dev_name; s; s = strchr(s + 1, ':'))
+               nr_devs++;
+
+       devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL);
+       if (!devs) {
+               kfree(dev_name);
+               return NULL;
+       }
+
+       for (i = 0, s = dev_name;
+            s;
+            (s = strchr(s, ':')) && (*s++ = '\0'))
+               devs[i++] = s;
+
+       *nr = nr_devs;
+       return devs;
+}
+
+static int bch2_remount(struct super_block *sb, int *flags, char *data)
+{
+       struct bch_fs *c = sb->s_fs_info;
+       struct bch_opts opts = bch2_opts_empty();
+       int ret;
+
+       opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
+
+       ret = bch2_parse_mount_opts(&opts, data);
+       if (ret)
+               return ret;
+
+       if (opts.read_only != c->opts.read_only) {
+               down_write(&c->state_lock);
+
+               if (opts.read_only) {
+                       bch2_fs_read_only(c);
+
+                       sb->s_flags |= SB_RDONLY;
+               } else {
+                       ret = bch2_fs_read_write(c);
+                       if (ret) {
+                               bch_err(c, "error going rw: %i", ret);
+                               up_write(&c->state_lock);
+                               return -EINVAL;
+                       }
+
+                       sb->s_flags &= ~SB_RDONLY;
+               }
+
+               c->opts.read_only = opts.read_only;
+
+               up_write(&c->state_lock);
+       }
+
+       if (opts.errors >= 0)
+               c->opts.errors = opts.errors;
+
+       return ret;
+}
+
+static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
+{
+       struct bch_fs *c = root->d_sb->s_fs_info;
+       struct bch_dev *ca;
+       unsigned i;
+       bool first = true;
+
+       for_each_online_member(ca, c, i) {
+               if (!first)
+                       seq_putc(seq, ':');
+               first = false;
+               seq_puts(seq, "/dev/");
+               seq_puts(seq, ca->name);
+       }
+
+       return 0;
+}
+
+static int bch2_show_options(struct seq_file *seq, struct dentry *root)
+{
+       struct bch_fs *c = root->d_sb->s_fs_info;
+       enum bch_opt_id i;
+       char buf[512];
+
+       for (i = 0; i < bch2_opts_nr; i++) {
+               const struct bch_option *opt = &bch2_opt_table[i];
+               u64 v = bch2_opt_get_by_id(&c->opts, i);
+
+               if (!(opt->mode & OPT_MOUNT))
+                       continue;
+
+               if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
+                       continue;
+
+               bch2_opt_to_text(&PBUF(buf), c, opt, v,
+                                OPT_SHOW_MOUNT_STYLE);
+               seq_putc(seq, ',');
+               seq_puts(seq, buf);
+       }
+
+       return 0;
+}
+
+static void bch2_put_super(struct super_block *sb)
+{
+       struct bch_fs *c = sb->s_fs_info;
+
+       __bch2_fs_stop(c);
+}
+
+static const struct super_operations bch_super_operations = {
+       .alloc_inode    = bch2_alloc_inode,
+       .destroy_inode  = bch2_destroy_inode,
+       .write_inode    = bch2_vfs_write_inode,
+       .evict_inode    = bch2_evict_inode,
+       .sync_fs        = bch2_sync_fs,
+       .statfs         = bch2_statfs,
+       .show_devname   = bch2_show_devname,
+       .show_options   = bch2_show_options,
+       .remount_fs     = bch2_remount,
+       .put_super      = bch2_put_super,
+#if 0
+       .freeze_fs      = bch2_freeze,
+       .unfreeze_fs    = bch2_unfreeze,
+#endif
+};
+
+static int bch2_set_super(struct super_block *s, void *data)
+{
+       s->s_fs_info = data;
+       return 0;
+}
+
+static int bch2_noset_super(struct super_block *s, void *data)
+{
+       return -EBUSY;
+}
+
+static int bch2_test_super(struct super_block *s, void *data)
+{
+       struct bch_fs *c = s->s_fs_info;
+       struct bch_fs **devs = data;
+       unsigned i;
+
+       if (!c)
+               return false;
+
+       for (i = 0; devs[i]; i++)
+               if (c != devs[i])
+                       return false;
+       return true;
+}
+
+static struct dentry *bch2_mount(struct file_system_type *fs_type,
+                                int flags, const char *dev_name, void *data)
+{
+       struct bch_fs *c;
+       struct bch_dev *ca;
+       struct super_block *sb;
+       struct inode *vinode;
+       struct bch_opts opts = bch2_opts_empty();
+       char **devs;
+       struct bch_fs **devs_to_fs = NULL;
+       unsigned i, nr_devs;
+       int ret;
+
+       opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
+
+       ret = bch2_parse_mount_opts(&opts, data);
+       if (ret)
+               return ERR_PTR(ret);
+
+       devs = split_devs(dev_name, &nr_devs);
+       if (!devs)
+               return ERR_PTR(-ENOMEM);
+
+       devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL);
+       if (!devs_to_fs) {
+               sb = ERR_PTR(-ENOMEM);
+               goto got_sb;
+       }
+
+       for (i = 0; i < nr_devs; i++)
+               devs_to_fs[i] = bch2_path_to_fs(devs[i]);
+
+       sb = sget(fs_type, bch2_test_super, bch2_noset_super,
+                 flags|SB_NOSEC, devs_to_fs);
+       if (!IS_ERR(sb))
+               goto got_sb;
+
+       c = bch2_fs_open(devs, nr_devs, opts);
+
+       if (!IS_ERR(c))
+               sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c);
+       else
+               sb = ERR_CAST(c);
+got_sb:
+       kfree(devs_to_fs);
+       kfree(devs[0]);
+       kfree(devs);
+
+       if (IS_ERR(sb))
+               return ERR_CAST(sb);
+
+       c = sb->s_fs_info;
+
+       if (sb->s_root) {
+               if ((flags ^ sb->s_flags) & SB_RDONLY) {
+                       ret = -EBUSY;
+                       goto err_put_super;
+               }
+               goto out;
+       }
+
+       sb->s_blocksize         = block_bytes(c);
+       sb->s_blocksize_bits    = ilog2(block_bytes(c));
+       sb->s_maxbytes          = MAX_LFS_FILESIZE;
+       sb->s_op                = &bch_super_operations;
+       sb->s_export_op         = &bch_export_ops;
+#ifdef CONFIG_BCACHEFS_QUOTA
+       sb->s_qcop              = &bch2_quotactl_operations;
+       sb->s_quota_types       = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
+#endif
+       sb->s_xattr             = bch2_xattr_handlers;
+       sb->s_magic             = BCACHEFS_STATFS_MAGIC;
+       sb->s_time_gran         = c->sb.time_precision;
+       c->vfs_sb               = sb;
+       strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
+
+       ret = super_setup_bdi(sb);
+       if (ret)
+               goto err_put_super;
+
+       sb->s_bdi->ra_pages             = VM_READAHEAD_PAGES;
+
+       for_each_online_member(ca, c, i) {
+               struct block_device *bdev = ca->disk_sb.bdev;
+
+               /* XXX: create an anonymous device for multi device filesystems */
+               sb->s_bdev      = bdev;
+               sb->s_dev       = bdev->bd_dev;
+               percpu_ref_put(&ca->io_ref);
+               break;
+       }
+
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+       if (c->opts.acl)
+               sb->s_flags     |= SB_POSIXACL;
+#endif
+
+       vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO);
+       if (IS_ERR(vinode)) {
+               bch_err(c, "error mounting: error getting root inode %i",
+                       (int) PTR_ERR(vinode));
+               ret = PTR_ERR(vinode);
+               goto err_put_super;
+       }
+
+       sb->s_root = d_make_root(vinode);
+       if (!sb->s_root) {
+               bch_err(c, "error mounting: error allocating root dentry");
+               ret = -ENOMEM;
+               goto err_put_super;
+       }
+
+       sb->s_flags |= SB_ACTIVE;
+out:
+       return dget(sb->s_root);
+
+err_put_super:
+       deactivate_locked_super(sb);
+       return ERR_PTR(ret);
+}
+
+static void bch2_kill_sb(struct super_block *sb)
+{
+       struct bch_fs *c = sb->s_fs_info;
+
+       generic_shutdown_super(sb);
+       bch2_fs_free(c);
+}
+
+static struct file_system_type bcache_fs_type = {
+       .owner          = THIS_MODULE,
+       .name           = "bcachefs",
+       .mount          = bch2_mount,
+       .kill_sb        = bch2_kill_sb,
+       .fs_flags       = FS_REQUIRES_DEV,
+};
+
+MODULE_ALIAS_FS("bcachefs");
+
+void bch2_vfs_exit(void)
+{
+       unregister_filesystem(&bcache_fs_type);
+       if (bch2_inode_cache)
+               kmem_cache_destroy(bch2_inode_cache);
+}
+
+int __init bch2_vfs_init(void)
+{
+       int ret = -ENOMEM;
+
+       bch2_inode_cache = KMEM_CACHE(bch_inode_info, 0);
+       if (!bch2_inode_cache)
+               goto err;
+
+       ret = register_filesystem(&bcache_fs_type);
+       if (ret)
+               goto err;
+
+       return 0;
+err:
+       bch2_vfs_exit();
+       return ret;
+}
+
+#endif /* NO_BCACHEFS_FS */
diff --git a/libbcachefs/fs.h b/libbcachefs/fs.h
new file mode 100644 (file)
index 0000000..eda903a
--- /dev/null
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FS_H
+#define _BCACHEFS_FS_H
+
+#include "inode.h"
+#include "opts.h"
+#include "str_hash.h"
+#include "quota_types.h"
+
+#include <linux/seqlock.h>
+#include <linux/stat.h>
+
+/*
+ * Two-state lock - can be taken for add or block - both states are shared,
+ * like read side of rwsem, but conflict with other state:
+ */
+struct pagecache_lock {
+       atomic_long_t           v;
+       wait_queue_head_t       wait;
+};
+
+static inline void pagecache_lock_init(struct pagecache_lock *lock)
+{
+       atomic_long_set(&lock->v, 0);
+       init_waitqueue_head(&lock->wait);
+}
+
+void bch2_pagecache_add_put(struct pagecache_lock *);
+void bch2_pagecache_add_get(struct pagecache_lock *);
+void bch2_pagecache_block_put(struct pagecache_lock *);
+void bch2_pagecache_block_get(struct pagecache_lock *);
+
+struct bch_inode_info {
+       struct inode            v;
+
+       struct mutex            ei_update_lock;
+       u64                     ei_journal_seq;
+       u64                     ei_quota_reserved;
+       unsigned long           ei_last_dirtied;
+
+       struct pagecache_lock   ei_pagecache_lock;
+
+       struct mutex            ei_quota_lock;
+       struct bch_qid          ei_qid;
+
+       struct bch_hash_info    ei_str_hash;
+
+       /* copy of inode in btree: */
+       struct bch_inode_unpacked ei_inode;
+};
+
+#define to_bch_ei(_inode)                                      \
+       container_of_or_null(_inode, struct bch_inode_info, v)
+
+static inline int ptrcmp(void *l, void *r)
+{
+       return cmp_int(l, r);
+}
+
+enum bch_inode_lock_op {
+       INODE_LOCK              = (1U << 0),
+       INODE_PAGECACHE_BLOCK   = (1U << 1),
+       INODE_UPDATE_LOCK       = (1U << 2),
+};
+
+#define bch2_lock_inodes(_locks, ...)                                  \
+do {                                                                   \
+       struct bch_inode_info *a[] = { NULL, __VA_ARGS__ };             \
+       unsigned i;                                                     \
+                                                                       \
+       bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp);                  \
+                                                                       \
+       for (i = 1; i < ARRAY_SIZE(a); i++)                             \
+               if (a[i] != a[i - 1]) {                                 \
+                       if ((_locks) & INODE_LOCK)                      \
+                               down_write_nested(&a[i]->v.i_rwsem, i); \
+                       if ((_locks) & INODE_PAGECACHE_BLOCK)           \
+                               bch2_pagecache_block_get(&a[i]->ei_pagecache_lock);\
+                       if ((_locks) & INODE_UPDATE_LOCK)                       \
+                               mutex_lock_nested(&a[i]->ei_update_lock, i);\
+               }                                                       \
+} while (0)
+
+#define bch2_unlock_inodes(_locks, ...)                                        \
+do {                                                                   \
+       struct bch_inode_info *a[] = { NULL, __VA_ARGS__ };             \
+       unsigned i;                                                     \
+                                                                       \
+       bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp);                  \
+                                                                       \
+       for (i = 1; i < ARRAY_SIZE(a); i++)                             \
+               if (a[i] != a[i - 1]) {                                 \
+                       if ((_locks) & INODE_LOCK)                      \
+                               up_write(&a[i]->v.i_rwsem);             \
+                       if ((_locks) & INODE_PAGECACHE_BLOCK)           \
+                               bch2_pagecache_block_put(&a[i]->ei_pagecache_lock);\
+                       if ((_locks) & INODE_UPDATE_LOCK)                       \
+                               mutex_unlock(&a[i]->ei_update_lock);    \
+               }                                                       \
+} while (0)
+
+static inline struct bch_inode_info *file_bch_inode(struct file *file)
+{
+       return to_bch_ei(file_inode(file));
+}
+
+static inline bool inode_attr_changing(struct bch_inode_info *dir,
+                               struct bch_inode_info *inode,
+                               enum inode_opt_id id)
+{
+       return !(inode->ei_inode.bi_fields_set & (1 << id)) &&
+               bch2_inode_opt_get(&dir->ei_inode, id) !=
+               bch2_inode_opt_get(&inode->ei_inode, id);
+}
+
+static inline bool inode_attrs_changing(struct bch_inode_info *dir,
+                                struct bch_inode_info *inode)
+{
+       unsigned id;
+
+       for (id = 0; id < Inode_opt_nr; id++)
+               if (inode_attr_changing(dir, inode, id))
+                       return true;
+
+       return false;
+}
+
+struct bch_inode_unpacked;
+
+#ifndef NO_BCACHEFS_FS
+
+int bch2_fs_quota_transfer(struct bch_fs *,
+                          struct bch_inode_info *,
+                          struct bch_qid,
+                          unsigned,
+                          enum quota_acct_mode);
+
+static inline int bch2_set_projid(struct bch_fs *c,
+                                 struct bch_inode_info *inode,
+                                 u32 projid)
+{
+       struct bch_qid qid = inode->ei_qid;
+
+       qid.q[QTYP_PRJ] = projid;
+
+       return bch2_fs_quota_transfer(c, inode, qid,
+                                     1 << QTYP_PRJ,
+                                     KEY_TYPE_QUOTA_PREALLOC);
+}
+
+struct inode *bch2_vfs_inode_get(struct bch_fs *, u64);
+
+/* returns 0 if we want to do the update, or error is passed up */
+typedef int (*inode_set_fn)(struct bch_inode_info *,
+                           struct bch_inode_unpacked *, void *);
+
+void bch2_inode_update_after_write(struct bch_fs *,
+                                  struct bch_inode_info *,
+                                  struct bch_inode_unpacked *,
+                                  unsigned);
+int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
+                                 inode_set_fn, void *, unsigned);
+
+void bch2_vfs_exit(void);
+int bch2_vfs_init(void);
+
+#else
+
+static inline void bch2_vfs_exit(void) {}
+static inline int bch2_vfs_init(void) { return 0; }
+
+#endif /* NO_BCACHEFS_FS */
+
+#endif /* _BCACHEFS_FS_H */
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
new file mode 100644 (file)
index 0000000..5a6df3d
--- /dev/null
@@ -0,0 +1,1502 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_on_stack.h"
+#include "btree_update.h"
+#include "dirent.h"
+#include "error.h"
+#include "fs-common.h"
+#include "fsck.h"
+#include "inode.h"
+#include "keylist.h"
+#include "super.h"
+#include "xattr.h"
+
+#include <linux/dcache.h> /* struct qstr */
+#include <linux/generic-radix-tree.h>
+
+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
+
+static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       u64 sectors = 0;
+       int ret;
+
+       for_each_btree_key(trans, iter, BTREE_ID_EXTENTS,
+                          POS(inum, 0), 0, k, ret) {
+               if (k.k->p.inode != inum)
+                       break;
+
+               if (bkey_extent_is_allocation(k.k))
+                       sectors += k.k->size;
+       }
+
+       bch2_trans_iter_free(trans, iter);
+
+       return ret ?: sectors;
+}
+
+static int __remove_dirent(struct btree_trans *trans,
+                          struct bkey_s_c_dirent dirent)
+{
+       struct bch_fs *c = trans->c;
+       struct qstr name;
+       struct bch_inode_unpacked dir_inode;
+       struct bch_hash_info dir_hash_info;
+       u64 dir_inum = dirent.k->p.inode;
+       int ret;
+       char *buf;
+
+       name.len = bch2_dirent_name_bytes(dirent);
+       buf = bch2_trans_kmalloc(trans, name.len + 1);
+       if (IS_ERR(buf))
+               return PTR_ERR(buf);
+
+       memcpy(buf, dirent.v->d_name, name.len);
+       buf[name.len] = '\0';
+       name.name = buf;
+
+       ret = bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode);
+       if (ret && ret != -EINTR)
+               bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
+       if (ret)
+               return ret;
+
+       dir_hash_info = bch2_hash_info_init(c, &dir_inode);
+
+       ret = bch2_hash_delete(trans, bch2_dirent_hash_desc,
+                              &dir_hash_info, dir_inum, &name);
+       if (ret && ret != -EINTR)
+               bch_err(c, "remove_dirent: err %i deleting dirent", ret);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int remove_dirent(struct btree_trans *trans,
+                        struct bkey_s_c_dirent dirent)
+{
+       return __bch2_trans_do(trans, NULL, NULL,
+                              BTREE_INSERT_NOFAIL|
+                              BTREE_INSERT_LAZY_RW,
+                              __remove_dirent(trans, dirent));
+}
+
+static int reattach_inode(struct bch_fs *c,
+                         struct bch_inode_unpacked *lostfound_inode,
+                         u64 inum)
+{
+       struct bch_inode_unpacked dir_u, inode_u;
+       char name_buf[20];
+       struct qstr name;
+       int ret;
+
+       snprintf(name_buf, sizeof(name_buf), "%llu", inum);
+       name = (struct qstr) QSTR(name_buf);
+
+       ret = bch2_trans_do(c, NULL, NULL,
+                           BTREE_INSERT_LAZY_RW,
+               bch2_link_trans(&trans, lostfound_inode->bi_inum,
+                               inum, &dir_u, &inode_u, &name));
+       if (ret)
+               bch_err(c, "error %i reattaching inode %llu", ret, inum);
+
+       return ret;
+}
+
+struct inode_walker {
+       bool                    first_this_inode;
+       bool                    have_inode;
+       u64                     cur_inum;
+       struct bch_inode_unpacked inode;
+};
+
+static struct inode_walker inode_walker_init(void)
+{
+       return (struct inode_walker) {
+               .cur_inum       = -1,
+               .have_inode     = false,
+       };
+}
+
+static int walk_inode(struct btree_trans *trans,
+                     struct inode_walker *w, u64 inum)
+{
+       if (inum != w->cur_inum) {
+               int ret = bch2_inode_find_by_inum_trans(trans, inum,
+                                                       &w->inode);
+
+               if (ret && ret != -ENOENT)
+                       return ret;
+
+               w->have_inode   = !ret;
+               w->cur_inum     = inum;
+               w->first_this_inode = true;
+       } else {
+               w->first_this_inode = false;
+       }
+
+       return 0;
+}
+
+struct hash_check {
+       struct bch_hash_info    info;
+
+       /* start of current chain of hash collisions: */
+       struct btree_iter       *chain;
+
+       /* next offset in current chain of hash collisions: */
+       u64                     chain_end;
+};
+
+static void hash_check_init(struct hash_check *h)
+{
+       h->chain = NULL;
+       h->chain_end = 0;
+}
+
+static void hash_stop_chain(struct btree_trans *trans,
+                           struct hash_check *h)
+{
+       if (h->chain)
+               bch2_trans_iter_free(trans, h->chain);
+       h->chain = NULL;
+}
+
+static void hash_check_set_inode(struct btree_trans *trans,
+                                struct hash_check *h,
+                                const struct bch_inode_unpacked *bi)
+{
+       h->info = bch2_hash_info_init(trans->c, bi);
+       hash_stop_chain(trans, h);
+}
+
+static int hash_redo_key(const struct bch_hash_desc desc,
+                        struct btree_trans *trans, struct hash_check *h,
+                        struct btree_iter *k_iter, struct bkey_s_c k,
+                        u64 hashed)
+{
+       struct bkey_i delete;
+       struct bkey_i *tmp;
+
+       tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+       if (IS_ERR(tmp))
+               return PTR_ERR(tmp);
+
+       bkey_reassemble(tmp, k);
+
+       bkey_init(&delete.k);
+       delete.k.p = k_iter->pos;
+       bch2_trans_update(trans, k_iter, &delete, 0);
+
+       return bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
+                            tmp, BCH_HASH_SET_MUST_CREATE);
+}
+
+static int fsck_hash_delete_at(struct btree_trans *trans,
+                              const struct bch_hash_desc desc,
+                              struct bch_hash_info *info,
+                              struct btree_iter *iter)
+{
+       int ret;
+retry:
+       ret   = bch2_hash_delete_at(trans, desc, info, iter) ?:
+               bch2_trans_commit(trans, NULL, NULL,
+                                 BTREE_INSERT_NOFAIL|
+                                 BTREE_INSERT_LAZY_RW);
+       if (ret == -EINTR) {
+               ret = bch2_btree_iter_traverse(iter);
+               if (!ret)
+                       goto retry;
+       }
+
+       return ret;
+}
+
+static int hash_check_duplicates(struct btree_trans *trans,
+                       const struct bch_hash_desc desc, struct hash_check *h,
+                       struct btree_iter *k_iter, struct bkey_s_c k)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter *iter;
+       struct bkey_s_c k2;
+       char buf[200];
+       int ret = 0;
+
+       if (!bkey_cmp(h->chain->pos, k_iter->pos))
+               return 0;
+
+       iter = bch2_trans_copy_iter(trans, h->chain);
+       BUG_ON(IS_ERR(iter));
+
+       for_each_btree_key_continue(iter, 0, k2, ret) {
+               if (bkey_cmp(k2.k->p, k.k->p) >= 0)
+                       break;
+
+               if (fsck_err_on(k2.k->type == desc.key_type &&
+                               !desc.cmp_bkey(k, k2), c,
+                               "duplicate hash table keys:\n%s",
+                               (bch2_bkey_val_to_text(&PBUF(buf), c,
+                                                      k), buf))) {
+                       ret = fsck_hash_delete_at(trans, desc, &h->info, k_iter);
+                       if (ret)
+                               return ret;
+                       ret = 1;
+                       break;
+               }
+       }
+fsck_err:
+       bch2_trans_iter_free(trans, iter);
+       return ret;
+}
+
+static void hash_set_chain_start(struct btree_trans *trans,
+                       const struct bch_hash_desc desc,
+                       struct hash_check *h,
+                       struct btree_iter *k_iter, struct bkey_s_c k)
+{
+       bool hole = (k.k->type != KEY_TYPE_whiteout &&
+                    k.k->type != desc.key_type);
+
+       if (hole || k.k->p.offset > h->chain_end + 1)
+               hash_stop_chain(trans, h);
+
+       if (!hole) {
+               if (!h->chain) {
+                       h->chain = bch2_trans_copy_iter(trans, k_iter);
+                       BUG_ON(IS_ERR(h->chain));
+               }
+
+               h->chain_end = k.k->p.offset;
+       }
+}
+
+static bool key_has_correct_hash(struct btree_trans *trans,
+                       const struct bch_hash_desc desc,
+                       struct hash_check *h,
+                       struct btree_iter *k_iter, struct bkey_s_c k)
+{
+       u64 hash;
+
+       hash_set_chain_start(trans, desc, h, k_iter, k);
+
+       if (k.k->type != desc.key_type)
+               return true;
+
+       hash = desc.hash_bkey(&h->info, k);
+
+       return hash >= h->chain->pos.offset &&
+               hash <= k.k->p.offset;
+}
+
+static int hash_check_key(struct btree_trans *trans,
+                       const struct bch_hash_desc desc, struct hash_check *h,
+                       struct btree_iter *k_iter, struct bkey_s_c k)
+{
+       struct bch_fs *c = trans->c;
+       char buf[200];
+       u64 hashed;
+       int ret = 0;
+
+       hash_set_chain_start(trans, desc, h, k_iter, k);
+
+       if (k.k->type != desc.key_type)
+               return 0;
+
+       hashed = desc.hash_bkey(&h->info, k);
+
+       if (fsck_err_on(hashed < h->chain->pos.offset ||
+                       hashed > k.k->p.offset, c,
+                       "hash table key at wrong offset: btree %u, %llu, "
+                       "hashed to %llu chain starts at %llu\n%s",
+                       desc.btree_id, k.k->p.offset,
+                       hashed, h->chain->pos.offset,
+                       (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) {
+               ret = __bch2_trans_do(trans, NULL, NULL,
+                                     BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
+                       hash_redo_key(desc, trans, h, k_iter, k, hashed));
+               if (ret) {
+                       bch_err(c, "hash_redo_key err %i", ret);
+                       return ret;
+               }
+               return 1;
+       }
+
+       ret = hash_check_duplicates(trans, desc, h, k_iter, k);
+fsck_err:
+       return ret;
+}
+
+static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h,
+                            struct btree_iter *iter, struct bkey_s_c *k)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_i_dirent *d = NULL;
+       int ret = -EINVAL;
+       char buf[200];
+       unsigned len;
+       u64 hash;
+
+       if (key_has_correct_hash(trans, bch2_dirent_hash_desc, h, iter, *k))
+               return 0;
+
+       len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k));
+       BUG_ON(!len);
+
+       memcpy(buf, bkey_s_c_to_dirent(*k).v->d_name, len);
+       buf[len] = '\0';
+
+       d = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
+       if (!d) {
+               bch_err(c, "memory allocation failure");
+               return -ENOMEM;
+       }
+
+       bkey_reassemble(&d->k_i, *k);
+
+       do {
+               --len;
+               if (!len)
+                       goto err_redo;
+
+               d->k.u64s = BKEY_U64s + dirent_val_u64s(len);
+
+               BUG_ON(bkey_val_bytes(&d->k) <
+                      offsetof(struct bch_dirent, d_name) + len);
+
+               memset(d->v.d_name + len, 0,
+                      bkey_val_bytes(&d->k) -
+                      offsetof(struct bch_dirent, d_name) - len);
+
+               hash = bch2_dirent_hash_desc.hash_bkey(&h->info,
+                                               bkey_i_to_s_c(&d->k_i));
+       } while (hash < h->chain->pos.offset ||
+                hash > k->k->p.offset);
+
+       if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)",
+                    buf, strlen(buf), d->v.d_name, len)) {
+               ret = __bch2_trans_do(trans, NULL, NULL,
+                                     BTREE_INSERT_NOFAIL|
+                                     BTREE_INSERT_LAZY_RW,
+                       (bch2_trans_update(trans, iter, &d->k_i, 0), 0));
+               if (ret)
+                       goto err;
+
+               *k = bch2_btree_iter_peek(iter);
+
+               BUG_ON(k->k->type != KEY_TYPE_dirent);
+       }
+err:
+fsck_err:
+       kfree(d);
+       return ret;
+err_redo:
+       hash = bch2_dirent_hash_desc.hash_bkey(&h->info, *k);
+
+       if (fsck_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)\n"
+                    "hash table key at wrong offset: btree %u, offset %llu, "
+                    "hashed to %llu chain starts at %llu\n%s",
+                    buf, strlen(buf), BTREE_ID_DIRENTS,
+                    k->k->p.offset, hash, h->chain->pos.offset,
+                    (bch2_bkey_val_to_text(&PBUF(buf), c,
+                                           *k), buf))) {
+               ret = __bch2_trans_do(trans, NULL, NULL,
+                                     BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
+                       hash_redo_key(bch2_dirent_hash_desc, trans,
+                                     h, iter, *k, hash));
+               if (ret)
+                       bch_err(c, "hash_redo_key err %i", ret);
+               else
+                       ret = 1;
+       }
+
+       goto err;
+}
+
+static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size)
+{
+       return bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
+                       POS(inode_nr, round_up(new_size, block_bytes(c)) >> 9),
+                       POS(inode_nr + 1, 0), NULL);
+}
+
+static int bch2_fix_overlapping_extent(struct btree_trans *trans,
+                                      struct btree_iter *iter,
+                                      struct bkey_s_c k, struct bpos cut_at)
+{
+       struct btree_iter *u_iter;
+       struct bkey_i *u;
+       int ret;
+
+       u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+       ret = PTR_ERR_OR_ZERO(u);
+       if (ret)
+               return ret;
+
+       bkey_reassemble(u, k);
+       bch2_cut_front(cut_at, u);
+
+       u_iter = bch2_trans_copy_iter(trans, iter);
+       ret = PTR_ERR_OR_ZERO(u_iter);
+       if (ret)
+               return ret;
+
+       /*
+        * We don't want to go through the
+        * extent_handle_overwrites path:
+        */
+       __bch2_btree_iter_set_pos(u_iter, u->k.p, false);
+
+       /*
+        * XXX: this is going to leave disk space
+        * accounting slightly wrong
+        */
+       ret = bch2_trans_update(trans, u_iter, u, 0);
+       bch2_trans_iter_put(trans, u_iter);
+       return ret;
+}
+
+/*
+ * Walk extents: verify that extents have a corresponding S_ISREG inode, and
+ * that i_size an i_sectors are consistent
+ */
+noinline_for_stack
+static int check_extents(struct bch_fs *c)
+{
+       struct inode_walker w = inode_walker_init();
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_on_stack prev;
+       u64 i_sectors;
+       int ret = 0;
+
+       bkey_on_stack_init(&prev);
+       prev.k->k = KEY(0, 0, 0);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+       bch_verbose(c, "checking extents");
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+                                  POS(BCACHEFS_ROOT_INO, 0),
+                                  BTREE_ITER_INTENT);
+retry:
+       for_each_btree_key_continue(iter, 0, k, ret) {
+               if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
+                       char buf1[200];
+                       char buf2[200];
+
+                       bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
+                       bch2_bkey_val_to_text(&PBUF(buf2), c, k);
+
+                       if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) {
+                               ret = __bch2_trans_do(&trans, NULL, NULL,
+                                                     BTREE_INSERT_NOFAIL|
+                                                     BTREE_INSERT_LAZY_RW,
+                                               bch2_fix_overlapping_extent(&trans,
+                                                               iter, k, prev.k->k.p));
+                               if (ret)
+                                       goto err;
+                       }
+               }
+               bkey_on_stack_reassemble(&prev, c, k);
+
+               ret = walk_inode(&trans, &w, k.k->p.inode);
+               if (ret)
+                       break;
+
+               if (fsck_err_on(!w.have_inode, c,
+                       "extent type %u for missing inode %llu",
+                       k.k->type, k.k->p.inode) ||
+                   fsck_err_on(w.have_inode &&
+                       !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
+                       "extent type %u for non regular file, inode %llu mode %o",
+                       k.k->type, k.k->p.inode, w.inode.bi_mode)) {
+                       bch2_trans_unlock(&trans);
+
+                       ret = bch2_inode_truncate(c, k.k->p.inode, 0);
+                       if (ret)
+                               goto err;
+                       continue;
+               }
+
+               if (fsck_err_on(w.first_this_inode &&
+                       w.have_inode &&
+                       !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
+                       w.inode.bi_sectors !=
+                       (i_sectors = bch2_count_inode_sectors(&trans, w.cur_inum)),
+                       c, "inode %llu has incorrect i_sectors: got %llu, should be %llu",
+                       w.inode.bi_inum,
+                       w.inode.bi_sectors, i_sectors)) {
+                       struct bkey_inode_buf p;
+
+                       w.inode.bi_sectors = i_sectors;
+
+                       bch2_trans_unlock(&trans);
+
+                       bch2_inode_pack(&p, &w.inode);
+
+                       ret = bch2_btree_insert(c, BTREE_ID_INODES,
+                                               &p.inode.k_i, NULL, NULL,
+                                               BTREE_INSERT_NOFAIL|
+                                               BTREE_INSERT_LAZY_RW);
+                       if (ret) {
+                               bch_err(c, "error in fsck: error %i updating inode", ret);
+                               goto err;
+                       }
+
+                       /* revalidate iterator: */
+                       k = bch2_btree_iter_peek(iter);
+               }
+
+               if (fsck_err_on(w.have_inode &&
+                       !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
+                       k.k->type != KEY_TYPE_reservation &&
+                       k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c,
+                       "extent type %u offset %llu past end of inode %llu, i_size %llu",
+                       k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
+                       bch2_trans_unlock(&trans);
+
+                       ret = bch2_inode_truncate(c, k.k->p.inode,
+                                                 w.inode.bi_size);
+                       if (ret)
+                               goto err;
+                       continue;
+               }
+       }
+err:
+fsck_err:
+       if (ret == -EINTR)
+               goto retry;
+       bkey_on_stack_exit(&prev, c);
+       return bch2_trans_exit(&trans) ?: ret;
+}
+
+/*
+ * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
+ * validate d_type
+ */
+noinline_for_stack
+static int check_dirents(struct bch_fs *c)
+{
+       struct inode_walker w = inode_walker_init();
+       struct hash_check h;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       unsigned name_len;
+       char buf[200];
+       int ret = 0;
+
+       bch_verbose(c, "checking dirents");
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+       hash_check_init(&h);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
+                                  POS(BCACHEFS_ROOT_INO, 0), 0);
+retry:
+       for_each_btree_key_continue(iter, 0, k, ret) {
+               struct bkey_s_c_dirent d;
+               struct bch_inode_unpacked target;
+               bool have_target;
+               u64 d_inum;
+
+               ret = walk_inode(&trans, &w, k.k->p.inode);
+               if (ret)
+                       break;
+
+               if (fsck_err_on(!w.have_inode, c,
+                               "dirent in nonexisting directory:\n%s",
+                               (bch2_bkey_val_to_text(&PBUF(buf), c,
+                                                      k), buf)) ||
+                   fsck_err_on(!S_ISDIR(w.inode.bi_mode), c,
+                               "dirent in non directory inode type %u:\n%s",
+                               mode_to_type(w.inode.bi_mode),
+                               (bch2_bkey_val_to_text(&PBUF(buf), c,
+                                                      k), buf))) {
+                       ret = bch2_btree_delete_at(&trans, iter, 0);
+                       if (ret)
+                               goto err;
+                       continue;
+               }
+
+               if (w.first_this_inode && w.have_inode)
+                       hash_check_set_inode(&trans, &h, &w.inode);
+
+               ret = check_dirent_hash(&trans, &h, iter, &k);
+               if (ret > 0) {
+                       ret = 0;
+                       continue;
+               }
+               if (ret)
+                       goto fsck_err;
+
+               if (ret)
+                       goto fsck_err;
+
+               if (k.k->type != KEY_TYPE_dirent)
+                       continue;
+
+               d = bkey_s_c_to_dirent(k);
+               d_inum = le64_to_cpu(d.v->d_inum);
+
+               name_len = bch2_dirent_name_bytes(d);
+
+               if (fsck_err_on(!name_len, c, "empty dirent") ||
+                   fsck_err_on(name_len == 1 &&
+                               !memcmp(d.v->d_name, ".", 1), c,
+                               ". dirent") ||
+                   fsck_err_on(name_len == 2 &&
+                               !memcmp(d.v->d_name, "..", 2), c,
+                               ".. dirent") ||
+                   fsck_err_on(name_len == 2 &&
+                               !memcmp(d.v->d_name, "..", 2), c,
+                               ".. dirent") ||
+                   fsck_err_on(memchr(d.v->d_name, '/', name_len), c,
+                               "dirent name has invalid chars")) {
+                       ret = remove_dirent(&trans, d);
+                       if (ret)
+                               goto err;
+                       continue;
+               }
+
+               if (fsck_err_on(d_inum == d.k->p.inode, c,
+                               "dirent points to own directory:\n%s",
+                               (bch2_bkey_val_to_text(&PBUF(buf), c,
+                                                      k), buf))) {
+                       ret = remove_dirent(&trans, d);
+                       if (ret)
+                               goto err;
+                       continue;
+               }
+
+               ret = bch2_inode_find_by_inum_trans(&trans, d_inum, &target);
+               if (ret && ret != -ENOENT)
+                       break;
+
+               have_target = !ret;
+               ret = 0;
+
+               if (fsck_err_on(!have_target, c,
+                               "dirent points to missing inode:\n%s",
+                               (bch2_bkey_val_to_text(&PBUF(buf), c,
+                                                      k), buf))) {
+                       ret = remove_dirent(&trans, d);
+                       if (ret)
+                               goto err;
+                       continue;
+               }
+
+               if (fsck_err_on(have_target &&
+                               d.v->d_type !=
+                               mode_to_type(target.bi_mode), c,
+                               "incorrect d_type: should be %u:\n%s",
+                               mode_to_type(target.bi_mode),
+                               (bch2_bkey_val_to_text(&PBUF(buf), c,
+                                                      k), buf))) {
+                       struct bkey_i_dirent *n;
+
+                       n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
+                       if (!n) {
+                               ret = -ENOMEM;
+                               goto err;
+                       }
+
+                       bkey_reassemble(&n->k_i, d.s_c);
+                       n->v.d_type = mode_to_type(target.bi_mode);
+
+                       ret = __bch2_trans_do(&trans, NULL, NULL,
+                                             BTREE_INSERT_NOFAIL|
+                                             BTREE_INSERT_LAZY_RW,
+                               (bch2_trans_update(&trans, iter, &n->k_i, 0), 0));
+                       kfree(n);
+                       if (ret)
+                               goto err;
+
+               }
+       }
+
+       hash_stop_chain(&trans, &h);
+err:
+fsck_err:
+       if (ret == -EINTR)
+               goto retry;
+
+       return bch2_trans_exit(&trans) ?: ret;
+}
+
+/*
+ * Walk xattrs: verify that they all have a corresponding inode
+ */
+noinline_for_stack
+static int check_xattrs(struct bch_fs *c)
+{
+       struct inode_walker w = inode_walker_init();
+       struct hash_check h;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret = 0;
+
+       bch_verbose(c, "checking xattrs");
+
+       hash_check_init(&h);
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
+                                  POS(BCACHEFS_ROOT_INO, 0), 0);
+retry:
+       for_each_btree_key_continue(iter, 0, k, ret) {
+               ret = walk_inode(&trans, &w, k.k->p.inode);
+               if (ret)
+                       break;
+
+               if (fsck_err_on(!w.have_inode, c,
+                               "xattr for missing inode %llu",
+                               k.k->p.inode)) {
+                       ret = bch2_btree_delete_at(&trans, iter, 0);
+                       if (ret)
+                               goto err;
+                       continue;
+               }
+
+               if (w.first_this_inode && w.have_inode)
+                       hash_check_set_inode(&trans, &h, &w.inode);
+
+               ret = hash_check_key(&trans, bch2_xattr_hash_desc,
+                                    &h, iter, k);
+               if (ret)
+                       goto fsck_err;
+       }
+err:
+fsck_err:
+       if (ret == -EINTR)
+               goto retry;
+       return bch2_trans_exit(&trans) ?: ret;
+}
+
+/* Get root directory, create if it doesn't exist: */
+static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
+{
+       struct bkey_inode_buf packed;
+       int ret;
+
+       bch_verbose(c, "checking root directory");
+
+       ret = bch2_inode_find_by_inum(c, BCACHEFS_ROOT_INO, root_inode);
+       if (ret && ret != -ENOENT)
+               return ret;
+
+       if (fsck_err_on(ret, c, "root directory missing"))
+               goto create_root;
+
+       if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c,
+                       "root inode not a directory"))
+               goto create_root;
+
+       return 0;
+fsck_err:
+       return ret;
+create_root:
+       bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|0755,
+                       0, NULL);
+       root_inode->bi_inum = BCACHEFS_ROOT_INO;
+
+       bch2_inode_pack(&packed, root_inode);
+
+       return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
+                                NULL, NULL,
+                                BTREE_INSERT_NOFAIL|
+                                BTREE_INSERT_LAZY_RW);
+}
+
+/* Get lost+found, create if it doesn't exist: */
+static int check_lostfound(struct bch_fs *c,
+                          struct bch_inode_unpacked *root_inode,
+                          struct bch_inode_unpacked *lostfound_inode)
+{
+       struct qstr lostfound = QSTR("lost+found");
+       struct bch_hash_info root_hash_info =
+               bch2_hash_info_init(c, root_inode);
+       u64 inum;
+       int ret;
+
+       bch_verbose(c, "checking lost+found");
+
+       inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
+                                &lostfound);
+       if (!inum) {
+               bch_notice(c, "creating lost+found");
+               goto create_lostfound;
+       }
+
+       ret = bch2_inode_find_by_inum(c, inum, lostfound_inode);
+       if (ret && ret != -ENOENT)
+               return ret;
+
+       if (fsck_err_on(ret, c, "lost+found missing"))
+               goto create_lostfound;
+
+       if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c,
+                       "lost+found inode not a directory"))
+               goto create_lostfound;
+
+       return 0;
+fsck_err:
+       return ret;
+create_lostfound:
+       bch2_inode_init_early(c, lostfound_inode);
+
+       ret = bch2_trans_do(c, NULL, NULL,
+                           BTREE_INSERT_NOFAIL|
+                           BTREE_INSERT_LAZY_RW,
+               bch2_create_trans(&trans,
+                                 BCACHEFS_ROOT_INO, root_inode,
+                                 lostfound_inode, &lostfound,
+                                 0, 0, S_IFDIR|0700, 0, NULL, NULL));
+       if (ret)
+               bch_err(c, "error creating lost+found: %i", ret);
+
+       return ret;
+}
+
+struct inode_bitmap {
+       unsigned long   *bits;
+       size_t          size;
+};
+
+static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
+{
+       return nr < b->size ? test_bit(nr, b->bits) : false;
+}
+
+static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
+{
+       if (nr >= b->size) {
+               size_t new_size = max_t(size_t, max_t(size_t,
+                                       PAGE_SIZE * 8,
+                                       b->size * 2),
+                                       nr + 1);
+               void *n;
+
+               new_size = roundup_pow_of_two(new_size);
+               n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
+               if (!n) {
+                       return -ENOMEM;
+               }
+
+               b->bits = n;
+               b->size = new_size;
+       }
+
+       __set_bit(nr, b->bits);
+       return 0;
+}
+
+struct pathbuf {
+       size_t          nr;
+       size_t          size;
+
+       struct pathbuf_entry {
+               u64     inum;
+               u64     offset;
+       }               *entries;
+};
+
+static int path_down(struct pathbuf *p, u64 inum)
+{
+       if (p->nr == p->size) {
+               size_t new_size = max_t(size_t, 256UL, p->size * 2);
+               void *n = krealloc(p->entries,
+                                  new_size * sizeof(p->entries[0]),
+                                  GFP_KERNEL);
+               if (!n)
+                       return -ENOMEM;
+
+               p->entries = n;
+               p->size = new_size;
+       };
+
+       p->entries[p->nr++] = (struct pathbuf_entry) {
+               .inum = inum,
+               .offset = 0,
+       };
+       return 0;
+}
+
+noinline_for_stack
+static int check_directory_structure(struct bch_fs *c,
+                                    struct bch_inode_unpacked *lostfound_inode)
+{
+       struct inode_bitmap dirs_done = { NULL, 0 };
+       struct pathbuf path = { 0, 0, NULL };
+       struct pathbuf_entry *e;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_s_c_dirent dirent;
+       bool had_unreachable;
+       u64 d_inum;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+       bch_verbose(c, "checking directory structure");
+
+       /* DFS: */
+restart_dfs:
+       had_unreachable = false;
+
+       ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO);
+       if (ret) {
+               bch_err(c, "memory allocation failure in inode_bitmap_set()");
+               goto err;
+       }
+
+       ret = path_down(&path, BCACHEFS_ROOT_INO);
+       if (ret)
+               goto err;
+
+       while (path.nr) {
+next:
+               e = &path.entries[path.nr - 1];
+
+               if (e->offset == U64_MAX)
+                       goto up;
+
+               for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS,
+                                  POS(e->inum, e->offset + 1), 0, k, ret) {
+                       if (k.k->p.inode != e->inum)
+                               break;
+
+                       e->offset = k.k->p.offset;
+
+                       if (k.k->type != KEY_TYPE_dirent)
+                               continue;
+
+                       dirent = bkey_s_c_to_dirent(k);
+
+                       if (dirent.v->d_type != DT_DIR)
+                               continue;
+
+                       d_inum = le64_to_cpu(dirent.v->d_inum);
+
+                       if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
+                                       "directory %llu has multiple hardlinks",
+                                       d_inum)) {
+                               ret = remove_dirent(&trans, dirent);
+                               if (ret)
+                                       goto err;
+                               continue;
+                       }
+
+                       ret = inode_bitmap_set(&dirs_done, d_inum);
+                       if (ret) {
+                               bch_err(c, "memory allocation failure in inode_bitmap_set()");
+                               goto err;
+                       }
+
+                       ret = path_down(&path, d_inum);
+                       if (ret) {
+                               goto err;
+                       }
+
+                       ret = bch2_trans_iter_free(&trans, iter);
+                       if (ret) {
+                               bch_err(c, "btree error %i in fsck", ret);
+                               goto err;
+                       }
+                       goto next;
+               }
+               ret = bch2_trans_iter_free(&trans, iter) ?: ret;
+               if (ret) {
+                       bch_err(c, "btree error %i in fsck", ret);
+                       goto err;
+               }
+up:
+               path.nr--;
+       }
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS_MIN, 0);
+retry:
+       for_each_btree_key_continue(iter, 0, k, ret) {
+               if (k.k->type != KEY_TYPE_inode)
+                       continue;
+
+               if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
+                       continue;
+
+               ret = bch2_empty_dir_trans(&trans, k.k->p.inode);
+               if (ret == -EINTR)
+                       goto retry;
+               if (!ret)
+                       continue;
+
+               if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.offset), c,
+                               "unreachable directory found (inum %llu)",
+                               k.k->p.offset)) {
+                       bch2_trans_unlock(&trans);
+
+                       ret = reattach_inode(c, lostfound_inode, k.k->p.offset);
+                       if (ret) {
+                               goto err;
+                       }
+
+                       had_unreachable = true;
+               }
+       }
+       bch2_trans_iter_free(&trans, iter);
+       if (ret)
+               goto err;
+
+       if (had_unreachable) {
+               bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
+               kfree(dirs_done.bits);
+               kfree(path.entries);
+               memset(&dirs_done, 0, sizeof(dirs_done));
+               memset(&path, 0, sizeof(path));
+               goto restart_dfs;
+       }
+err:
+fsck_err:
+       ret = bch2_trans_exit(&trans) ?: ret;
+       kfree(dirs_done.bits);
+       kfree(path.entries);
+       return ret;
+}
+
+struct nlink {
+       u32     count;
+       u32     dir_count;
+};
+
+typedef GENRADIX(struct nlink) nlink_table;
+
+static void inc_link(struct bch_fs *c, nlink_table *links,
+                    u64 range_start, u64 *range_end,
+                    u64 inum, bool dir)
+{
+       struct nlink *link;
+
+       if (inum < range_start || inum >= *range_end)
+               return;
+
+       link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
+       if (!link) {
+               bch_verbose(c, "allocation failed during fsck - will need another pass");
+               *range_end = inum;
+               return;
+       }
+
+       if (dir)
+               link->dir_count++;
+       else
+               link->count++;
+}
+
+noinline_for_stack
+static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
+                              u64 range_start, u64 *range_end)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_s_c_dirent d;
+       u64 d_inum;
+       int ret;
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+       inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k, ret) {
+               switch (k.k->type) {
+               case KEY_TYPE_dirent:
+                       d = bkey_s_c_to_dirent(k);
+                       d_inum = le64_to_cpu(d.v->d_inum);
+
+                       if (d.v->d_type == DT_DIR)
+                               inc_link(c, links, range_start, range_end,
+                                        d.k->p.inode, true);
+
+                       inc_link(c, links, range_start, range_end,
+                                d_inum, false);
+
+                       break;
+               }
+
+               bch2_trans_cond_resched(&trans);
+       }
+       ret = bch2_trans_exit(&trans) ?: ret;
+       if (ret)
+               bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
+
+       return ret;
+}
+
+static int check_inode_nlink(struct bch_fs *c,
+                            struct bch_inode_unpacked *lostfound_inode,
+                            struct bch_inode_unpacked *u,
+                            struct nlink *link,
+                            bool *do_update)
+{
+       u32 i_nlink = bch2_inode_nlink_get(u);
+       u32 real_i_nlink =
+               link->count * nlink_bias(u->bi_mode) +
+               link->dir_count;
+       int ret = 0;
+
+       /*
+        * These should have been caught/fixed by earlier passes, we don't
+        * repair them here:
+        */
+       if (S_ISDIR(u->bi_mode) && link->count > 1) {
+               need_fsck_err(c, "directory %llu with multiple hardlinks: %u",
+                             u->bi_inum, link->count);
+               return 0;
+       }
+
+       if (S_ISDIR(u->bi_mode) && !link->count) {
+               need_fsck_err(c, "unreachable directory found (inum %llu)",
+                             u->bi_inum);
+               return 0;
+       }
+
+       if (!S_ISDIR(u->bi_mode) && link->dir_count) {
+               need_fsck_err(c, "non directory with subdirectories (inum %llu)",
+                             u->bi_inum);
+               return 0;
+       }
+
+       if (!link->count &&
+           !(u->bi_flags & BCH_INODE_UNLINKED) &&
+           (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
+               if (fsck_err(c, "unreachable inode %llu not marked as unlinked (type %u)",
+                            u->bi_inum, mode_to_type(u->bi_mode)) ==
+                   FSCK_ERR_IGNORE)
+                       return 0;
+
+               ret = reattach_inode(c, lostfound_inode, u->bi_inum);
+               if (ret)
+                       return ret;
+
+               link->count = 1;
+               real_i_nlink = nlink_bias(u->bi_mode) + link->dir_count;
+               goto set_i_nlink;
+       }
+
+       if (i_nlink < link->count) {
+               if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)",
+                            u->bi_inum, i_nlink, link->count,
+                            mode_to_type(u->bi_mode)) == FSCK_ERR_IGNORE)
+                       return 0;
+               goto set_i_nlink;
+       }
+
+       if (i_nlink != real_i_nlink &&
+           c->sb.clean) {
+               if (fsck_err(c, "filesystem marked clean, "
+                            "but inode %llu has wrong i_nlink "
+                            "(type %u i_nlink %u, should be %u)",
+                            u->bi_inum, mode_to_type(u->bi_mode),
+                            i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
+                       return 0;
+               goto set_i_nlink;
+       }
+
+       if (i_nlink != real_i_nlink &&
+           (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
+               if (fsck_err(c, "inode %llu has wrong i_nlink "
+                            "(type %u i_nlink %u, should be %u)",
+                            u->bi_inum, mode_to_type(u->bi_mode),
+                            i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
+                       return 0;
+               goto set_i_nlink;
+       }
+
+       if (real_i_nlink && i_nlink != real_i_nlink)
+               bch_verbose(c, "setting inode %llu nlink from %u to %u",
+                           u->bi_inum, i_nlink, real_i_nlink);
+set_i_nlink:
+       if (i_nlink != real_i_nlink) {
+               bch2_inode_nlink_set(u, real_i_nlink);
+               *do_update = true;
+       }
+fsck_err:
+       return ret;
+}
+
+static int check_inode(struct btree_trans *trans,
+                      struct bch_inode_unpacked *lostfound_inode,
+                      struct btree_iter *iter,
+                      struct bkey_s_c_inode inode,
+                      struct nlink *link)
+{
+       struct bch_fs *c = trans->c;
+       struct bch_inode_unpacked u;
+       bool do_update = false;
+       int ret = 0;
+
+       ret = bch2_inode_unpack(inode, &u);
+
+       bch2_trans_unlock(trans);
+
+       if (bch2_fs_inconsistent_on(ret, c,
+                        "error unpacking inode %llu in fsck",
+                        inode.k->p.inode))
+               return ret;
+
+       if (link) {
+               ret = check_inode_nlink(c, lostfound_inode, &u, link,
+                                       &do_update);
+               if (ret)
+                       return ret;
+       }
+
+       if (u.bi_flags & BCH_INODE_UNLINKED &&
+           (!c->sb.clean ||
+            fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
+                     u.bi_inum))) {
+               bch_verbose(c, "deleting inode %llu", u.bi_inum);
+
+               bch2_fs_lazy_rw(c);
+
+               ret = bch2_inode_rm(c, u.bi_inum);
+               if (ret)
+                       bch_err(c, "error in fsck: error %i while deleting inode", ret);
+               return ret;
+       }
+
+       if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
+           (!c->sb.clean ||
+            fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
+                     u.bi_inum))) {
+               bch_verbose(c, "truncating inode %llu", u.bi_inum);
+
+               bch2_fs_lazy_rw(c);
+
+               /*
+                * XXX: need to truncate partial blocks too here - or ideally
+                * just switch units to bytes and that issue goes away
+                */
+
+               ret = bch2_inode_truncate(c, u.bi_inum, u.bi_size);
+               if (ret) {
+                       bch_err(c, "error in fsck: error %i truncating inode", ret);
+                       return ret;
+               }
+
+               /*
+                * We truncated without our normal sector accounting hook, just
+                * make sure we recalculate it:
+                */
+               u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
+
+               u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
+               do_update = true;
+       }
+
+       if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
+           (!c->sb.clean ||
+            fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
+                     u.bi_inum))) {
+               s64 sectors;
+
+               bch_verbose(c, "recounting sectors for inode %llu",
+                           u.bi_inum);
+
+               sectors = bch2_count_inode_sectors(trans, u.bi_inum);
+               if (sectors < 0) {
+                       bch_err(c, "error in fsck: error %i recounting inode sectors",
+                               (int) sectors);
+                       return sectors;
+               }
+
+               u.bi_sectors = sectors;
+               u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
+               do_update = true;
+       }
+
+       if (do_update) {
+               struct bkey_inode_buf p;
+
+               bch2_inode_pack(&p, &u);
+
+               ret = __bch2_trans_do(trans, NULL, NULL,
+                                     BTREE_INSERT_NOFAIL|
+                                     BTREE_INSERT_LAZY_RW,
+                       (bch2_trans_update(trans, iter, &p.inode.k_i, 0), 0));
+               if (ret)
+                       bch_err(c, "error in fsck: error %i "
+                               "updating inode", ret);
+       }
+fsck_err:
+       return ret;
+}
+
+noinline_for_stack
+static int bch2_gc_walk_inodes(struct bch_fs *c,
+                              struct bch_inode_unpacked *lostfound_inode,
+                              nlink_table *links,
+                              u64 range_start, u64 range_end)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct nlink *link, zero_links = { 0, 0 };
+       struct genradix_iter nlinks_iter;
+       int ret = 0, ret2 = 0;
+       u64 nlinks_pos;
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
+                                  POS(0, range_start), 0);
+       nlinks_iter = genradix_iter_init(links, 0);
+
+       while ((k = bch2_btree_iter_peek(iter)).k &&
+              !(ret2 = bkey_err(k))) {
+peek_nlinks:   link = genradix_iter_peek(&nlinks_iter, links);
+
+               if (!link && (!k.k || iter->pos.offset >= range_end))
+                       break;
+
+               nlinks_pos = range_start + nlinks_iter.pos;
+               if (iter->pos.offset > nlinks_pos) {
+                       /* Should have been caught by dirents pass: */
+                       need_fsck_err_on(link && link->count, c,
+                               "missing inode %llu (nlink %u)",
+                               nlinks_pos, link->count);
+                       genradix_iter_advance(&nlinks_iter, links);
+                       goto peek_nlinks;
+               }
+
+               if (iter->pos.offset < nlinks_pos || !link)
+                       link = &zero_links;
+
+               if (k.k && k.k->type == KEY_TYPE_inode) {
+                       ret = check_inode(&trans, lostfound_inode, iter,
+                                         bkey_s_c_to_inode(k), link);
+                       BUG_ON(ret == -EINTR);
+                       if (ret)
+                               break;
+               } else {
+                       /* Should have been caught by dirents pass: */
+                       need_fsck_err_on(link->count, c,
+                               "missing inode %llu (nlink %u)",
+                               nlinks_pos, link->count);
+               }
+
+               if (nlinks_pos == iter->pos.offset)
+                       genradix_iter_advance(&nlinks_iter, links);
+
+               bch2_btree_iter_next(iter);
+               bch2_trans_cond_resched(&trans);
+       }
+fsck_err:
+       bch2_trans_exit(&trans);
+
+       if (ret2)
+               bch_err(c, "error in fsck: btree error %i while walking inodes", ret2);
+
+       return ret ?: ret2;
+}
+
+noinline_for_stack
+static int check_inode_nlinks(struct bch_fs *c,
+                             struct bch_inode_unpacked *lostfound_inode)
+{
+       nlink_table links;
+       u64 this_iter_range_start, next_iter_range_start = 0;
+       int ret = 0;
+
+       bch_verbose(c, "checking inode nlinks");
+
+       genradix_init(&links);
+
+       do {
+               this_iter_range_start = next_iter_range_start;
+               next_iter_range_start = U64_MAX;
+
+               ret = bch2_gc_walk_dirents(c, &links,
+                                         this_iter_range_start,
+                                         &next_iter_range_start);
+               if (ret)
+                       break;
+
+               ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
+                                        this_iter_range_start,
+                                        next_iter_range_start);
+               if (ret)
+                       break;
+
+               genradix_free(&links);
+       } while (next_iter_range_start != U64_MAX);
+
+       genradix_free(&links);
+
+       return ret;
+}
+
+/*
+ * Checks for inconsistencies that shouldn't happen, unless we have a bug.
+ * Doesn't fix them yet, mainly because they haven't yet been observed:
+ */
+int bch2_fsck_full(struct bch_fs *c)
+{
+       struct bch_inode_unpacked root_inode, lostfound_inode;
+
+       return  check_extents(c) ?:
+               check_dirents(c) ?:
+               check_xattrs(c) ?:
+               check_root(c, &root_inode) ?:
+               check_lostfound(c, &root_inode, &lostfound_inode) ?:
+               check_directory_structure(c, &lostfound_inode) ?:
+               check_inode_nlinks(c, &lostfound_inode);
+}
+
+int bch2_fsck_inode_nlink(struct bch_fs *c)
+{
+       struct bch_inode_unpacked root_inode, lostfound_inode;
+
+       return  check_root(c, &root_inode) ?:
+               check_lostfound(c, &root_inode, &lostfound_inode) ?:
+               check_inode_nlinks(c, &lostfound_inode);
+}
+
+int bch2_fsck_walk_inodes_only(struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_s_c_inode inode;
+       int ret;
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k, ret) {
+               if (k.k->type != KEY_TYPE_inode)
+                       continue;
+
+               inode = bkey_s_c_to_inode(k);
+
+               if (inode.v->bi_flags &
+                   (BCH_INODE_I_SIZE_DIRTY|
+                    BCH_INODE_I_SECTORS_DIRTY|
+                    BCH_INODE_UNLINKED)) {
+                       ret = check_inode(&trans, NULL, iter, inode, NULL);
+                       BUG_ON(ret == -EINTR);
+                       if (ret)
+                               break;
+               }
+       }
+       BUG_ON(ret == -EINTR);
+
+       return bch2_trans_exit(&trans) ?: ret;
+}
diff --git a/libbcachefs/fsck.h b/libbcachefs/fsck.h
new file mode 100644 (file)
index 0000000..9e4af02
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FSCK_H
+#define _BCACHEFS_FSCK_H
+
+int bch2_fsck_full(struct bch_fs *);
+int bch2_fsck_inode_nlink(struct bch_fs *);
+int bch2_fsck_walk_inodes_only(struct bch_fs *);
+
+#endif /* _BCACHEFS_FSCK_H */
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
new file mode 100644 (file)
index 0000000..7d20f08
--- /dev/null
@@ -0,0 +1,554 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_methods.h"
+#include "btree_update.h"
+#include "error.h"
+#include "extents.h"
+#include "inode.h"
+#include "str_hash.h"
+
+#include <linux/random.h>
+
+#include <asm/unaligned.h>
+
+const char * const bch2_inode_opts[] = {
+#define x(name, ...)   #name,
+       BCH_INODE_OPTS()
+#undef  x
+       NULL,
+};
+
+static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
+static const u8 bits_table[8] = {
+       1  * 8 - 1,
+       2  * 8 - 2,
+       3  * 8 - 3,
+       4  * 8 - 4,
+       6  * 8 - 5,
+       8  * 8 - 6,
+       10 * 8 - 7,
+       13 * 8 - 8,
+};
+
+static int inode_encode_field(u8 *out, u8 *end, u64 hi, u64 lo)
+{
+       __be64 in[2] = { cpu_to_be64(hi), cpu_to_be64(lo), };
+       unsigned shift, bytes, bits = likely(!hi)
+               ? fls64(lo)
+               : fls64(hi) + 64;
+
+       for (shift = 1; shift <= 8; shift++)
+               if (bits < bits_table[shift - 1])
+                       goto got_shift;
+
+       BUG();
+got_shift:
+       bytes = byte_table[shift - 1];
+
+       BUG_ON(out + bytes > end);
+
+       memcpy(out, (u8 *) in + 16 - bytes, bytes);
+       *out |= (1 << 8) >> shift;
+
+       return bytes;
+}
+
+static int inode_decode_field(const u8 *in, const u8 *end,
+                             u64 out[2], unsigned *out_bits)
+{
+       __be64 be[2] = { 0, 0 };
+       unsigned bytes, shift;
+       u8 *p;
+
+       if (in >= end)
+               return -1;
+
+       if (!*in)
+               return -1;
+
+       /*
+        * position of highest set bit indicates number of bytes:
+        * shift = number of bits to remove in high byte:
+        */
+       shift   = 8 - __fls(*in); /* 1 <= shift <= 8 */
+       bytes   = byte_table[shift - 1];
+
+       if (in + bytes > end)
+               return -1;
+
+       p = (u8 *) be + 16 - bytes;
+       memcpy(p, in, bytes);
+       *p ^= (1 << 8) >> shift;
+
+       out[0] = be64_to_cpu(be[0]);
+       out[1] = be64_to_cpu(be[1]);
+       *out_bits = out[0] ? 64 + fls64(out[0]) : fls64(out[1]);
+
+       return bytes;
+}
+
+void bch2_inode_pack(struct bkey_inode_buf *packed,
+                    const struct bch_inode_unpacked *inode)
+{
+       u8 *out = packed->inode.v.fields;
+       u8 *end = (void *) &packed[1];
+       u8 *last_nonzero_field = out;
+       unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
+       unsigned bytes;
+
+       bkey_inode_init(&packed->inode.k_i);
+       packed->inode.k.p.offset        = inode->bi_inum;
+       packed->inode.v.bi_hash_seed    = inode->bi_hash_seed;
+       packed->inode.v.bi_flags        = cpu_to_le32(inode->bi_flags);
+       packed->inode.v.bi_mode         = cpu_to_le16(inode->bi_mode);
+
+#define x(_name, _bits)                                        \
+       out += inode_encode_field(out, end, 0, inode->_name);           \
+       nr_fields++;                                                    \
+                                                                       \
+       if (inode->_name) {                                             \
+               last_nonzero_field = out;                               \
+               last_nonzero_fieldnr = nr_fields;                       \
+       }
+
+       BCH_INODE_FIELDS()
+#undef  x
+
+       out = last_nonzero_field;
+       nr_fields = last_nonzero_fieldnr;
+
+       bytes = out - (u8 *) &packed->inode.v;
+       set_bkey_val_bytes(&packed->inode.k, bytes);
+       memset_u64s_tail(&packed->inode.v, 0, bytes);
+
+       SET_INODE_NR_FIELDS(&packed->inode.v, nr_fields);
+
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
+               struct bch_inode_unpacked unpacked;
+
+               int ret = bch2_inode_unpack(inode_i_to_s_c(&packed->inode),
+                                          &unpacked);
+               BUG_ON(ret);
+               BUG_ON(unpacked.bi_inum         != inode->bi_inum);
+               BUG_ON(unpacked.bi_hash_seed    != inode->bi_hash_seed);
+               BUG_ON(unpacked.bi_mode         != inode->bi_mode);
+
+#define x(_name, _bits)        BUG_ON(unpacked._name != inode->_name);
+               BCH_INODE_FIELDS()
+#undef  x
+       }
+}
+
+int bch2_inode_unpack(struct bkey_s_c_inode inode,
+                     struct bch_inode_unpacked *unpacked)
+{
+       const u8 *in = inode.v->fields;
+       const u8 *end = (void *) inode.v + bkey_val_bytes(inode.k);
+       u64 field[2];
+       unsigned fieldnr = 0, field_bits;
+       int ret;
+
+       unpacked->bi_inum       = inode.k->p.offset;
+       unpacked->bi_hash_seed  = inode.v->bi_hash_seed;
+       unpacked->bi_flags      = le32_to_cpu(inode.v->bi_flags);
+       unpacked->bi_mode       = le16_to_cpu(inode.v->bi_mode);
+
+#define x(_name, _bits)                                        \
+       if (fieldnr++ == INODE_NR_FIELDS(inode.v)) {                    \
+               memset(&unpacked->_name, 0,                             \
+                      sizeof(*unpacked) -                              \
+                      offsetof(struct bch_inode_unpacked, _name));     \
+               return 0;                                               \
+       }                                                               \
+                                                                       \
+       ret = inode_decode_field(in, end, field, &field_bits);          \
+       if (ret < 0)                                                    \
+               return ret;                                             \
+                                                                       \
+       if (field_bits > sizeof(unpacked->_name) * 8)                   \
+               return -1;                                              \
+                                                                       \
+       unpacked->_name = field[1];                                     \
+       in += ret;
+
+       BCH_INODE_FIELDS()
+#undef  x
+
+       /* XXX: signal if there were more fields than expected? */
+
+       return 0;
+}
+
+struct btree_iter *bch2_inode_peek(struct btree_trans *trans,
+                                  struct bch_inode_unpacked *inode,
+                                  u64 inum, unsigned flags)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret;
+
+       iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, inum),
+                                  BTREE_ITER_SLOTS|flags);
+       if (IS_ERR(iter))
+               return iter;
+
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO;
+       if (ret)
+               goto err;
+
+       ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
+       if (ret)
+               goto err;
+
+       return iter;
+err:
+       bch2_trans_iter_put(trans, iter);
+       return ERR_PTR(ret);
+}
+
+int bch2_inode_write(struct btree_trans *trans,
+                    struct btree_iter *iter,
+                    struct bch_inode_unpacked *inode)
+{
+       struct bkey_inode_buf *inode_p;
+
+       inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
+       if (IS_ERR(inode_p))
+               return PTR_ERR(inode_p);
+
+       bch2_inode_pack(inode_p, inode);
+       bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
+       return 0;
+}
+
+const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+               struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
+               struct bch_inode_unpacked unpacked;
+
+       if (k.k->p.inode)
+               return "nonzero k.p.inode";
+
+       if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
+               return "incorrect value size";
+
+       if (k.k->p.offset < BLOCKDEV_INODE_MAX)
+               return "fs inode in blockdev range";
+
+       if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
+               return "invalid str hash type";
+
+       if (bch2_inode_unpack(inode, &unpacked))
+               return "invalid variable length fields";
+
+       if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1)
+               return "invalid data checksum type";
+
+       if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1)
+               return "invalid data checksum type";
+
+       if ((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
+           unpacked.bi_nlink != 0)
+               return "flagged as unlinked but bi_nlink != 0";
+
+       return NULL;
+}
+
+void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c,
+                      struct bkey_s_c k)
+{
+       struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
+       struct bch_inode_unpacked unpacked;
+
+       if (bch2_inode_unpack(inode, &unpacked)) {
+               pr_buf(out, "(unpack error)");
+               return;
+       }
+
+#define x(_name, _bits)                                                \
+       pr_buf(out, #_name ": %llu ", (u64) unpacked._name);
+       BCH_INODE_FIELDS()
+#undef  x
+}
+
+const char *bch2_inode_generation_invalid(const struct bch_fs *c,
+                                         struct bkey_s_c k)
+{
+       if (k.k->p.inode)
+               return "nonzero k.p.inode";
+
+       if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation))
+               return "incorrect value size";
+
+       return NULL;
+}
+
+void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
+                                  struct bkey_s_c k)
+{
+       struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation(k);
+
+       pr_buf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation));
+}
+
+void bch2_inode_init_early(struct bch_fs *c,
+                          struct bch_inode_unpacked *inode_u)
+{
+       enum bch_str_hash_type str_hash =
+               bch2_str_hash_opt_to_type(c, c->opts.str_hash);
+
+       memset(inode_u, 0, sizeof(*inode_u));
+
+       /* ick */
+       inode_u->bi_flags |= str_hash << INODE_STR_HASH_OFFSET;
+       get_random_bytes(&inode_u->bi_hash_seed,
+                        sizeof(inode_u->bi_hash_seed));
+}
+
+void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now,
+                         uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
+                         struct bch_inode_unpacked *parent)
+{
+       inode_u->bi_mode        = mode;
+       inode_u->bi_uid         = uid;
+       inode_u->bi_gid         = gid;
+       inode_u->bi_dev         = rdev;
+       inode_u->bi_atime       = now;
+       inode_u->bi_mtime       = now;
+       inode_u->bi_ctime       = now;
+       inode_u->bi_otime       = now;
+
+       if (parent && parent->bi_mode & S_ISGID) {
+               inode_u->bi_gid = parent->bi_gid;
+               if (S_ISDIR(mode))
+                       inode_u->bi_mode |= S_ISGID;
+       }
+
+       if (parent) {
+#define x(_name, ...)  inode_u->bi_##_name = parent->bi_##_name;
+               BCH_INODE_OPTS()
+#undef x
+       }
+}
+
+void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
+                    uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
+                    struct bch_inode_unpacked *parent)
+{
+       bch2_inode_init_early(c, inode_u);
+       bch2_inode_init_late(inode_u, bch2_current_time(c),
+                            uid, gid, mode, rdev, parent);
+}
+
+static inline u32 bkey_generation(struct bkey_s_c k)
+{
+       switch (k.k->type) {
+       case KEY_TYPE_inode:
+               BUG();
+       case KEY_TYPE_inode_generation:
+               return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
+       default:
+               return 0;
+       }
+}
+
+int bch2_inode_create(struct btree_trans *trans,
+                     struct bch_inode_unpacked *inode_u,
+                     u64 min, u64 max, u64 *hint)
+{
+       struct bkey_inode_buf *inode_p;
+       struct btree_iter *iter = NULL;
+       struct bkey_s_c k;
+       u64 start;
+       int ret;
+
+       if (!max)
+               max = ULLONG_MAX;
+
+       if (trans->c->opts.inodes_32bit)
+               max = min_t(u64, max, U32_MAX);
+
+       start = READ_ONCE(*hint);
+
+       if (start >= max || start < min)
+               start = min;
+
+       inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
+       if (IS_ERR(inode_p))
+               return PTR_ERR(inode_p);
+again:
+       for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(0, start),
+                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
+               if (bkey_cmp(iter->pos, POS(0, max)) > 0)
+                       break;
+
+               if (k.k->type != KEY_TYPE_inode)
+                       goto found_slot;
+       }
+
+       bch2_trans_iter_put(trans, iter);
+
+       if (ret)
+               return ret;
+
+       if (start != min) {
+               /* Retry from start */
+               start = min;
+               goto again;
+       }
+
+       return -ENOSPC;
+found_slot:
+       *hint                   = k.k->p.offset;
+       inode_u->bi_inum        = k.k->p.offset;
+       inode_u->bi_generation  = bkey_generation(k);
+
+       bch2_inode_pack(inode_p, inode_u);
+       bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
+       bch2_trans_iter_put(trans, iter);
+       return 0;
+}
+
+int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_i_inode_generation delete;
+       struct bpos start = POS(inode_nr, 0);
+       struct bpos end = POS(inode_nr + 1, 0);
+       int ret;
+
+       /*
+        * If this was a directory, there shouldn't be any real dirents left -
+        * but there could be whiteouts (from hash collisions) that we should
+        * delete:
+        *
+        * XXX: the dirent could ideally would delete whiteouts when they're no
+        * longer needed
+        */
+       ret   = bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
+                                       start, end, NULL) ?:
+               bch2_btree_delete_range(c, BTREE_ID_XATTRS,
+                                       start, end, NULL) ?:
+               bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
+                                       start, end, NULL);
+       if (ret)
+               return ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr),
+                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+       do {
+               struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
+               u32 bi_generation = 0;
+
+               ret = bkey_err(k);
+               if (ret)
+                       break;
+
+               bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c,
+                                       "inode %llu not found when deleting",
+                                       inode_nr);
+
+               switch (k.k->type) {
+               case KEY_TYPE_inode: {
+                       struct bch_inode_unpacked inode_u;
+
+                       if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u))
+                               bi_generation = inode_u.bi_generation + 1;
+                       break;
+               }
+               case KEY_TYPE_inode_generation: {
+                       struct bkey_s_c_inode_generation g =
+                               bkey_s_c_to_inode_generation(k);
+                       bi_generation = le32_to_cpu(g.v->bi_generation);
+                       break;
+               }
+               }
+
+               if (!bi_generation) {
+                       bkey_init(&delete.k);
+                       delete.k.p.offset = inode_nr;
+               } else {
+                       bkey_inode_generation_init(&delete.k_i);
+                       delete.k.p.offset = inode_nr;
+                       delete.v.bi_generation = cpu_to_le32(bi_generation);
+               }
+
+               bch2_trans_update(&trans, iter, &delete.k_i, 0);
+
+               ret = bch2_trans_commit(&trans, NULL, NULL,
+                                       BTREE_INSERT_NOFAIL);
+       } while (ret == -EINTR);
+
+       bch2_trans_exit(&trans);
+       return ret;
+}
+
+int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
+                                 struct bch_inode_unpacked *inode)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret;
+
+       iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
+                       POS(0, inode_nr), BTREE_ITER_SLOTS);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter);
+
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       ret = k.k->type == KEY_TYPE_inode
+               ? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
+               : -ENOENT;
+err:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
+                           struct bch_inode_unpacked *inode)
+{
+       return bch2_trans_do(c, NULL, NULL, 0,
+               bch2_inode_find_by_inum_trans(&trans, inode_nr, inode));
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_inode_pack_test(void)
+{
+       struct bch_inode_unpacked *u, test_inodes[] = {
+               {
+                       .bi_atime       = U64_MAX,
+                       .bi_ctime       = U64_MAX,
+                       .bi_mtime       = U64_MAX,
+                       .bi_otime       = U64_MAX,
+                       .bi_size        = U64_MAX,
+                       .bi_sectors     = U64_MAX,
+                       .bi_uid         = U32_MAX,
+                       .bi_gid         = U32_MAX,
+                       .bi_nlink       = U32_MAX,
+                       .bi_generation  = U32_MAX,
+                       .bi_dev         = U32_MAX,
+               },
+       };
+
+       for (u = test_inodes;
+            u < test_inodes + ARRAY_SIZE(test_inodes);
+            u++) {
+               struct bkey_inode_buf p;
+
+               bch2_inode_pack(&p, u);
+       }
+}
+#endif
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
new file mode 100644 (file)
index 0000000..bb759a4
--- /dev/null
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_INODE_H
+#define _BCACHEFS_INODE_H
+
+#include "opts.h"
+
+extern const char * const bch2_inode_opts[];
+
+const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_inode (struct bkey_ops) {                \
+       .key_invalid    = bch2_inode_invalid,           \
+       .val_to_text    = bch2_inode_to_text,           \
+}
+
+const char *bch2_inode_generation_invalid(const struct bch_fs *,
+                                         struct bkey_s_c);
+void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *,
+                                  struct bkey_s_c);
+
+#define bch2_bkey_ops_inode_generation (struct bkey_ops) {     \
+       .key_invalid    = bch2_inode_generation_invalid,        \
+       .val_to_text    = bch2_inode_generation_to_text,        \
+}
+
+struct bch_inode_unpacked {
+       u64                     bi_inum;
+       __le64                  bi_hash_seed;
+       u32                     bi_flags;
+       u16                     bi_mode;
+
+#define x(_name, _bits)        u##_bits _name;
+       BCH_INODE_FIELDS()
+#undef  x
+};
+
+struct bkey_inode_buf {
+       struct bkey_i_inode     inode;
+
+#define x(_name, _bits)                + 8 + _bits / 8
+       u8              _pad[0 + BCH_INODE_FIELDS()];
+#undef  x
+} __attribute__((packed, aligned(8)));
+
+void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
+int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
+
+struct btree_iter *bch2_inode_peek(struct btree_trans *,
+                       struct bch_inode_unpacked *, u64, unsigned);
+int bch2_inode_write(struct btree_trans *, struct btree_iter *,
+                    struct bch_inode_unpacked *);
+
+void bch2_inode_init_early(struct bch_fs *,
+                          struct bch_inode_unpacked *);
+void bch2_inode_init_late(struct bch_inode_unpacked *, u64,
+                         uid_t, gid_t, umode_t, dev_t,
+                         struct bch_inode_unpacked *);
+void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
+                    uid_t, gid_t, umode_t, dev_t,
+                    struct bch_inode_unpacked *);
+
+int bch2_inode_create(struct btree_trans *,
+                     struct bch_inode_unpacked *,
+                     u64, u64, u64 *);
+
+int bch2_inode_rm(struct bch_fs *, u64);
+
+int bch2_inode_find_by_inum_trans(struct btree_trans *, u64,
+                                 struct bch_inode_unpacked *);
+int bch2_inode_find_by_inum(struct bch_fs *, u64, struct bch_inode_unpacked *);
+
+static inline struct bch_io_opts bch2_inode_opts_get(struct bch_inode_unpacked *inode)
+{
+       struct bch_io_opts ret = { 0 };
+
+#define x(_name, _bits)                                        \
+       if (inode->bi_##_name)                                          \
+               opt_set(ret, _name, inode->bi_##_name - 1);
+       BCH_INODE_OPTS()
+#undef x
+       return ret;
+}
+
+static inline void bch2_inode_opt_set(struct bch_inode_unpacked *inode,
+                                     enum inode_opt_id id, u64 v)
+{
+       switch (id) {
+#define x(_name, ...)                                                  \
+       case Inode_opt_##_name:                                         \
+               inode->bi_##_name = v;                                  \
+               break;
+       BCH_INODE_OPTS()
+#undef x
+       default:
+               BUG();
+       }
+}
+
+static inline u64 bch2_inode_opt_get(struct bch_inode_unpacked *inode,
+                                    enum inode_opt_id id)
+{
+       switch (id) {
+#define x(_name, ...)                                                  \
+       case Inode_opt_##_name:                                         \
+               return inode->bi_##_name;
+       BCH_INODE_OPTS()
+#undef x
+       default:
+               BUG();
+       }
+}
+
+static inline struct bch_io_opts
+io_opts(struct bch_fs *c, struct bch_inode_unpacked *inode)
+{
+       struct bch_io_opts opts = bch2_opts_to_inode_opts(c->opts);
+
+       bch2_io_opts_apply(&opts, bch2_inode_opts_get(inode));
+       return opts;
+}
+
+static inline u8 mode_to_type(umode_t mode)
+{
+       return (mode >> 12) & 15;
+}
+
+/* i_nlink: */
+
+static inline unsigned nlink_bias(umode_t mode)
+{
+       return S_ISDIR(mode) ? 2 : 1;
+}
+
+static inline void bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
+{
+       if (bi->bi_flags & BCH_INODE_UNLINKED)
+               bi->bi_flags &= ~BCH_INODE_UNLINKED;
+       else
+               bi->bi_nlink++;
+}
+
+static inline void bch2_inode_nlink_dec(struct bch_inode_unpacked *bi)
+{
+       BUG_ON(bi->bi_flags & BCH_INODE_UNLINKED);
+       if (bi->bi_nlink)
+               bi->bi_nlink--;
+       else
+               bi->bi_flags |= BCH_INODE_UNLINKED;
+}
+
+static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi)
+{
+       return bi->bi_flags & BCH_INODE_UNLINKED
+                 ? 0
+                 : bi->bi_nlink + nlink_bias(bi->bi_mode);
+}
+
+static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
+                                       unsigned nlink)
+{
+       if (nlink) {
+               bi->bi_nlink = nlink - nlink_bias(bi->bi_mode);
+               bi->bi_flags &= ~BCH_INODE_UNLINKED;
+       } else {
+               bi->bi_nlink = 0;
+               bi->bi_flags |= BCH_INODE_UNLINKED;
+       }
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_inode_pack_test(void);
+#else
+static inline void bch2_inode_pack_test(void) {}
+#endif
+
+#endif /* _BCACHEFS_INODE_H */
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
new file mode 100644 (file)
index 0000000..0a4b4ee
--- /dev/null
@@ -0,0 +1,2389 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Some low level IO code, and hacks for various block layer limitations
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "bkey_on_stack.h"
+#include "bset.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "checksum.h"
+#include "compress.h"
+#include "clock.h"
+#include "debug.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "error.h"
+#include "extent_update.h"
+#include "inode.h"
+#include "io.h"
+#include "journal.h"
+#include "keylist.h"
+#include "move.h"
+#include "rebalance.h"
+#include "super.h"
+#include "super-io.h"
+
+#include <linux/blkdev.h>
+#include <linux/random.h>
+#include <linux/sched/mm.h>
+
+#include <trace/events/bcachefs.h>
+
+const char *bch2_blk_status_to_str(blk_status_t status)
+{
+       if (status == BLK_STS_REMOVED)
+               return "device removed";
+       return blk_status_to_str(status);
+}
+
+static bool bch2_target_congested(struct bch_fs *c, u16 target)
+{
+       const struct bch_devs_mask *devs;
+       unsigned d, nr = 0, total = 0;
+       u64 now = local_clock(), last;
+       s64 congested;
+       struct bch_dev *ca;
+
+       if (!target)
+               return false;
+
+       rcu_read_lock();
+       devs = bch2_target_to_mask(c, target) ?:
+               &c->rw_devs[BCH_DATA_user];
+
+       for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
+               ca = rcu_dereference(c->devs[d]);
+               if (!ca)
+                       continue;
+
+               congested = atomic_read(&ca->congested);
+               last = READ_ONCE(ca->congested_last);
+               if (time_after64(now, last))
+                       congested -= (now - last) >> 12;
+
+               total += max(congested, 0LL);
+               nr++;
+       }
+       rcu_read_unlock();
+
+       return bch2_rand_range(nr * CONGESTED_MAX) < total;
+}
+
+static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency,
+                                      u64 now, int rw)
+{
+       u64 latency_capable =
+               ca->io_latency[rw].quantiles.entries[QUANTILE_IDX(1)].m;
+       /* ideally we'd be taking into account the device's variance here: */
+       u64 latency_threshold = latency_capable << (rw == READ ? 2 : 3);
+       s64 latency_over = io_latency - latency_threshold;
+
+       if (latency_threshold && latency_over > 0) {
+               /*
+                * bump up congested by approximately latency_over * 4 /
+                * latency_threshold - we don't need much accuracy here so don't
+                * bother with the divide:
+                */
+               if (atomic_read(&ca->congested) < CONGESTED_MAX)
+                       atomic_add(latency_over >>
+                                  max_t(int, ilog2(latency_threshold) - 2, 0),
+                                  &ca->congested);
+
+               ca->congested_last = now;
+       } else if (atomic_read(&ca->congested) > 0) {
+               atomic_dec(&ca->congested);
+       }
+}
+
+void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
+{
+       atomic64_t *latency = &ca->cur_latency[rw];
+       u64 now = local_clock();
+       u64 io_latency = time_after64(now, submit_time)
+               ? now - submit_time
+               : 0;
+       u64 old, new, v = atomic64_read(latency);
+
+       do {
+               old = v;
+
+               /*
+                * If the io latency was reasonably close to the current
+                * latency, skip doing the update and atomic operation - most of
+                * the time:
+                */
+               if (abs((int) (old - io_latency)) < (old >> 1) &&
+                   now & ~(~0 << 5))
+                       break;
+
+               new = ewma_add(old, io_latency, 5);
+       } while ((v = atomic64_cmpxchg(latency, old, new)) != old);
+
+       bch2_congested_acct(ca, io_latency, now, rw);
+
+       __bch2_time_stats_update(&ca->io_latency[rw], submit_time, now);
+}
+
+/* Allocate, free from mempool: */
+
+void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
+{
+       struct bvec_iter_all iter;
+       struct bio_vec *bv;
+
+       bio_for_each_segment_all(bv, bio, iter)
+               if (bv->bv_page != ZERO_PAGE(0))
+                       mempool_free(bv->bv_page, &c->bio_bounce_pages);
+       bio->bi_vcnt = 0;
+}
+
+static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool)
+{
+       struct page *page;
+
+       if (likely(!*using_mempool)) {
+               page = alloc_page(GFP_NOIO);
+               if (unlikely(!page)) {
+                       mutex_lock(&c->bio_bounce_pages_lock);
+                       *using_mempool = true;
+                       goto pool_alloc;
+
+               }
+       } else {
+pool_alloc:
+               page = mempool_alloc(&c->bio_bounce_pages, GFP_NOIO);
+       }
+
+       return page;
+}
+
+void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
+                              size_t size)
+{
+       bool using_mempool = false;
+
+       while (size) {
+               struct page *page = __bio_alloc_page_pool(c, &using_mempool);
+               unsigned len = min(PAGE_SIZE, size);
+
+               BUG_ON(!bio_add_page(bio, page, len, 0));
+               size -= len;
+       }
+
+       if (using_mempool)
+               mutex_unlock(&c->bio_bounce_pages_lock);
+}
+
+/* Extent update path: */
+
+static int sum_sector_overwrites(struct btree_trans *trans,
+                                struct btree_iter *extent_iter,
+                                struct bkey_i *new,
+                                bool may_allocate,
+                                bool *maybe_extending,
+                                s64 *delta)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c old;
+       int ret = 0;
+
+       *maybe_extending = true;
+       *delta = 0;
+
+       iter = bch2_trans_copy_iter(trans, extent_iter);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter);
+
+       for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) {
+               if (!may_allocate &&
+                   bch2_bkey_nr_ptrs_fully_allocated(old) <
+                   bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new))) {
+                       ret = -ENOSPC;
+                       break;
+               }
+
+               *delta += (min(new->k.p.offset,
+                             old.k->p.offset) -
+                         max(bkey_start_offset(&new->k),
+                             bkey_start_offset(old.k))) *
+                       (bkey_extent_is_allocation(&new->k) -
+                        bkey_extent_is_allocation(old.k));
+
+               if (bkey_cmp(old.k->p, new->k.p) >= 0) {
+                       /*
+                        * Check if there's already data above where we're
+                        * going to be writing to - this means we're definitely
+                        * not extending the file:
+                        *
+                        * Note that it's not sufficient to check if there's
+                        * data up to the sector offset we're going to be
+                        * writing to, because i_size could be up to one block
+                        * less:
+                        */
+                       if (!bkey_cmp(old.k->p, new->k.p))
+                               old = bch2_btree_iter_next(iter);
+
+                       if (old.k && !bkey_err(old) &&
+                           old.k->p.inode == extent_iter->pos.inode &&
+                           bkey_extent_is_data(old.k))
+                               *maybe_extending = false;
+
+                       break;
+               }
+       }
+
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+int bch2_extent_update(struct btree_trans *trans,
+                      struct btree_iter *iter,
+                      struct bkey_i *k,
+                      struct disk_reservation *disk_res,
+                      u64 *journal_seq,
+                      u64 new_i_size,
+                      s64 *i_sectors_delta)
+{
+       /* this must live until after bch2_trans_commit(): */
+       struct bkey_inode_buf inode_p;
+       bool extending = false;
+       s64 delta = 0;
+       int ret;
+
+       ret = bch2_extent_trim_atomic(k, iter);
+       if (ret)
+               return ret;
+
+       ret = sum_sector_overwrites(trans, iter, k,
+                       disk_res && disk_res->sectors != 0,
+                       &extending, &delta);
+       if (ret)
+               return ret;
+
+       new_i_size = extending
+               ? min(k->k.p.offset << 9, new_i_size)
+               : 0;
+
+       if (delta || new_i_size) {
+               struct btree_iter *inode_iter;
+               struct bch_inode_unpacked inode_u;
+
+               inode_iter = bch2_inode_peek(trans, &inode_u,
+                               k->k.p.inode, BTREE_ITER_INTENT);
+               if (IS_ERR(inode_iter))
+                       return PTR_ERR(inode_iter);
+
+               /*
+                * XXX:
+                * writeback can race a bit with truncate, because truncate
+                * first updates the inode then truncates the pagecache. This is
+                * ugly, but lets us preserve the invariant that the in memory
+                * i_size is always >= the on disk i_size.
+                *
+               BUG_ON(new_i_size > inode_u.bi_size &&
+                      (inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY));
+                */
+               BUG_ON(new_i_size > inode_u.bi_size && !extending);
+
+               if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
+                   new_i_size > inode_u.bi_size)
+                       inode_u.bi_size = new_i_size;
+               else
+                       new_i_size = 0;
+
+               inode_u.bi_sectors += delta;
+
+               if (delta || new_i_size) {
+                       bch2_inode_pack(&inode_p, &inode_u);
+                       bch2_trans_update(trans, inode_iter,
+                                         &inode_p.inode.k_i, 0);
+               }
+
+               bch2_trans_iter_put(trans, inode_iter);
+       }
+
+       bch2_trans_update(trans, iter, k, 0);
+
+       ret = bch2_trans_commit(trans, disk_res, journal_seq,
+                               BTREE_INSERT_NOCHECK_RW|
+                               BTREE_INSERT_NOFAIL|
+                               BTREE_INSERT_USE_RESERVE);
+       if (!ret && i_sectors_delta)
+               *i_sectors_delta += delta;
+
+       return ret;
+}
+
+int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
+                  struct bpos end, u64 *journal_seq,
+                  s64 *i_sectors_delta)
+{
+       struct bch_fs *c        = trans->c;
+       unsigned max_sectors    = KEY_SIZE_MAX & (~0 << c->block_bits);
+       struct bkey_s_c k;
+       int ret = 0, ret2 = 0;
+
+       while ((k = bch2_btree_iter_peek(iter)).k &&
+              bkey_cmp(iter->pos, end) < 0) {
+               struct disk_reservation disk_res =
+                       bch2_disk_reservation_init(c, 0);
+               struct bkey_i delete;
+
+               bch2_trans_begin(trans);
+
+               ret = bkey_err(k);
+               if (ret)
+                       goto btree_err;
+
+               bkey_init(&delete.k);
+               delete.k.p = iter->pos;
+
+               /* create the biggest key we can */
+               bch2_key_resize(&delete.k, max_sectors);
+               bch2_cut_back(end, &delete);
+
+               ret = bch2_extent_update(trans, iter, &delete,
+                               &disk_res, journal_seq,
+                               0, i_sectors_delta);
+               bch2_disk_reservation_put(c, &disk_res);
+btree_err:
+               if (ret == -EINTR) {
+                       ret2 = ret;
+                       ret = 0;
+               }
+               if (ret)
+                       break;
+       }
+
+       if (bkey_cmp(iter->pos, end) > 0) {
+               bch2_btree_iter_set_pos(iter, end);
+               ret = bch2_btree_iter_traverse(iter);
+       }
+
+       return ret ?: ret2;
+}
+
+int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end,
+               u64 *journal_seq, s64 *i_sectors_delta)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+                                  POS(inum, start),
+                                  BTREE_ITER_INTENT);
+
+       ret = bch2_fpunch_at(&trans, iter, POS(inum, end),
+                            journal_seq, i_sectors_delta);
+       bch2_trans_exit(&trans);
+
+       if (ret == -EINTR)
+               ret = 0;
+
+       return ret;
+}
+
+int bch2_write_index_default(struct bch_write_op *op)
+{
+       struct bch_fs *c = op->c;
+       struct bkey_on_stack sk;
+       struct keylist *keys = &op->insert_keys;
+       struct bkey_i *k = bch2_keylist_front(keys);
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       int ret;
+
+       bkey_on_stack_init(&sk);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+                                  bkey_start_pos(&k->k),
+                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+       do {
+               bch2_trans_begin(&trans);
+
+               k = bch2_keylist_front(keys);
+
+               bkey_on_stack_realloc(&sk, c, k->k.u64s);
+               bkey_copy(sk.k, k);
+               bch2_cut_front(iter->pos, sk.k);
+
+               ret = bch2_extent_update(&trans, iter, sk.k,
+                                        &op->res, op_journal_seq(op),
+                                        op->new_i_size, &op->i_sectors_delta);
+               if (ret == -EINTR)
+                       continue;
+               if (ret)
+                       break;
+
+               if (bkey_cmp(iter->pos, k->k.p) >= 0)
+                       bch2_keylist_pop_front(keys);
+       } while (!bch2_keylist_empty(keys));
+
+       bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&sk, c);
+
+       return ret;
+}
+
+/* Writes */
+
+void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
+                              enum bch_data_type type,
+                              const struct bkey_i *k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
+       const struct bch_extent_ptr *ptr;
+       struct bch_write_bio *n;
+       struct bch_dev *ca;
+
+       BUG_ON(c->opts.nochanges);
+
+       bkey_for_each_ptr(ptrs, ptr) {
+               BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX ||
+                      !c->devs[ptr->dev]);
+
+               ca = bch_dev_bkey_exists(c, ptr->dev);
+
+               if (to_entry(ptr + 1) < ptrs.end) {
+                       n = to_wbio(bio_clone_fast(&wbio->bio, GFP_NOIO,
+                                                  &ca->replica_set));
+
+                       n->bio.bi_end_io        = wbio->bio.bi_end_io;
+                       n->bio.bi_private       = wbio->bio.bi_private;
+                       n->parent               = wbio;
+                       n->split                = true;
+                       n->bounce               = false;
+                       n->put_bio              = true;
+                       n->bio.bi_opf           = wbio->bio.bi_opf;
+                       bio_inc_remaining(&wbio->bio);
+               } else {
+                       n = wbio;
+                       n->split                = false;
+               }
+
+               n->c                    = c;
+               n->dev                  = ptr->dev;
+               n->have_ioref           = bch2_dev_get_ioref(ca,
+                                       type == BCH_DATA_btree ? READ : WRITE);
+               n->submit_time          = local_clock();
+               n->bio.bi_iter.bi_sector = ptr->offset;
+
+               if (!journal_flushes_device(ca))
+                       n->bio.bi_opf |= REQ_FUA;
+
+               if (likely(n->have_ioref)) {
+                       this_cpu_add(ca->io_done->sectors[WRITE][type],
+                                    bio_sectors(&n->bio));
+
+                       bio_set_dev(&n->bio, ca->disk_sb.bdev);
+                       submit_bio(&n->bio);
+               } else {
+                       n->bio.bi_status        = BLK_STS_REMOVED;
+                       bio_endio(&n->bio);
+               }
+       }
+}
+
+static void __bch2_write(struct closure *);
+
+static void bch2_write_done(struct closure *cl)
+{
+       struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+       struct bch_fs *c = op->c;
+
+       if (!op->error && (op->flags & BCH_WRITE_FLUSH))
+               op->error = bch2_journal_error(&c->journal);
+
+       bch2_disk_reservation_put(c, &op->res);
+       percpu_ref_put(&c->writes);
+       bch2_keylist_free(&op->insert_keys, op->inline_keys);
+
+       bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
+
+       if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
+               up(&c->io_in_flight);
+
+       if (op->end_io) {
+               EBUG_ON(cl->parent);
+               closure_debug_destroy(cl);
+               op->end_io(op);
+       } else {
+               closure_return(cl);
+       }
+}
+
+/**
+ * bch_write_index - after a write, update index to point to new data
+ */
+static void __bch2_write_index(struct bch_write_op *op)
+{
+       struct bch_fs *c = op->c;
+       struct keylist *keys = &op->insert_keys;
+       struct bch_extent_ptr *ptr;
+       struct bkey_i *src, *dst = keys->keys, *n, *k;
+       unsigned dev;
+       int ret;
+
+       for (src = keys->keys; src != keys->top; src = n) {
+               n = bkey_next(src);
+
+               if (bkey_extent_is_direct_data(&src->k)) {
+                       bch2_bkey_drop_ptrs(bkey_i_to_s(src), ptr,
+                                           test_bit(ptr->dev, op->failed.d));
+
+                       if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) {
+                               ret = -EIO;
+                               goto err;
+                       }
+               }
+
+               if (dst != src)
+                       memmove_u64s_down(dst, src, src->u64s);
+               dst = bkey_next(dst);
+       }
+
+       keys->top = dst;
+
+       /*
+        * probably not the ideal place to hook this in, but I don't
+        * particularly want to plumb io_opts all the way through the btree
+        * update stack right now
+        */
+       for_each_keylist_key(keys, k) {
+               bch2_rebalance_add_key(c, bkey_i_to_s_c(k), &op->opts);
+
+               if (bch2_bkey_is_incompressible(bkey_i_to_s_c(k)))
+                       bch2_check_set_feature(op->c, BCH_FEATURE_incompressible);
+
+       }
+
+       if (!bch2_keylist_empty(keys)) {
+               u64 sectors_start = keylist_sectors(keys);
+               int ret = op->index_update_fn(op);
+
+               BUG_ON(ret == -EINTR);
+               BUG_ON(keylist_sectors(keys) && !ret);
+
+               op->written += sectors_start - keylist_sectors(keys);
+
+               if (ret) {
+                       __bcache_io_error(c, "btree IO error %i", ret);
+                       op->error = ret;
+               }
+       }
+out:
+       /* If some a bucket wasn't written, we can't erasure code it: */
+       for_each_set_bit(dev, op->failed.d, BCH_SB_MEMBERS_MAX)
+               bch2_open_bucket_write_error(c, &op->open_buckets, dev);
+
+       bch2_open_buckets_put(c, &op->open_buckets);
+       return;
+err:
+       keys->top = keys->keys;
+       op->error = ret;
+       goto out;
+}
+
+static void bch2_write_index(struct closure *cl)
+{
+       struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+       struct bch_fs *c = op->c;
+
+       __bch2_write_index(op);
+
+       if (!(op->flags & BCH_WRITE_DONE)) {
+               continue_at(cl, __bch2_write, index_update_wq(op));
+       } else if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
+               bch2_journal_flush_seq_async(&c->journal,
+                                            *op_journal_seq(op),
+                                            cl);
+               continue_at(cl, bch2_write_done, index_update_wq(op));
+       } else {
+               continue_at_nobarrier(cl, bch2_write_done, NULL);
+       }
+}
+
+static void bch2_write_endio(struct bio *bio)
+{
+       struct closure *cl              = bio->bi_private;
+       struct bch_write_op *op         = container_of(cl, struct bch_write_op, cl);
+       struct bch_write_bio *wbio      = to_wbio(bio);
+       struct bch_write_bio *parent    = wbio->split ? wbio->parent : NULL;
+       struct bch_fs *c                = wbio->c;
+       struct bch_dev *ca              = bch_dev_bkey_exists(c, wbio->dev);
+
+       if (bch2_dev_io_err_on(bio->bi_status, ca, "data write: %s",
+                              bch2_blk_status_to_str(bio->bi_status)))
+               set_bit(wbio->dev, op->failed.d);
+
+       if (wbio->have_ioref) {
+               bch2_latency_acct(ca, wbio->submit_time, WRITE);
+               percpu_ref_put(&ca->io_ref);
+       }
+
+       if (wbio->bounce)
+               bch2_bio_free_pages_pool(c, bio);
+
+       if (wbio->put_bio)
+               bio_put(bio);
+
+       if (parent)
+               bio_endio(&parent->bio);
+       else if (!(op->flags & BCH_WRITE_SKIP_CLOSURE_PUT))
+               closure_put(cl);
+       else
+               continue_at_nobarrier(cl, bch2_write_index, index_update_wq(op));
+}
+
+static void init_append_extent(struct bch_write_op *op,
+                              struct write_point *wp,
+                              struct bversion version,
+                              struct bch_extent_crc_unpacked crc)
+{
+       struct bch_fs *c = op->c;
+       struct bkey_i_extent *e;
+       struct open_bucket *ob;
+       unsigned i;
+
+       BUG_ON(crc.compressed_size > wp->sectors_free);
+       wp->sectors_free -= crc.compressed_size;
+       op->pos.offset += crc.uncompressed_size;
+
+       e = bkey_extent_init(op->insert_keys.top);
+       e->k.p          = op->pos;
+       e->k.size       = crc.uncompressed_size;
+       e->k.version    = version;
+
+       if (crc.csum_type ||
+           crc.compression_type ||
+           crc.nonce)
+               bch2_extent_crc_append(&e->k_i, crc);
+
+       open_bucket_for_each(c, &wp->ptrs, ob, i) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+               union bch_extent_entry *end =
+                       bkey_val_end(bkey_i_to_s(&e->k_i));
+
+               end->ptr = ob->ptr;
+               end->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
+               end->ptr.cached = !ca->mi.durability ||
+                       (op->flags & BCH_WRITE_CACHED) != 0;
+               end->ptr.offset += ca->mi.bucket_size - ob->sectors_free;
+
+               e->k.u64s++;
+
+               BUG_ON(crc.compressed_size > ob->sectors_free);
+               ob->sectors_free -= crc.compressed_size;
+       }
+
+       bch2_keylist_push(&op->insert_keys);
+}
+
+static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
+                                       struct write_point *wp,
+                                       struct bio *src,
+                                       bool *page_alloc_failed,
+                                       void *buf)
+{
+       struct bch_write_bio *wbio;
+       struct bio *bio;
+       unsigned output_available =
+               min(wp->sectors_free << 9, src->bi_iter.bi_size);
+       unsigned pages = DIV_ROUND_UP(output_available +
+                                     (buf
+                                      ? ((unsigned long) buf & (PAGE_SIZE - 1))
+                                      : 0), PAGE_SIZE);
+
+       bio = bio_alloc_bioset(GFP_NOIO, pages, &c->bio_write);
+       wbio                    = wbio_init(bio);
+       wbio->put_bio           = true;
+       /* copy WRITE_SYNC flag */
+       wbio->bio.bi_opf        = src->bi_opf;
+
+       if (buf) {
+               bch2_bio_map(bio, buf, output_available);
+               return bio;
+       }
+
+       wbio->bounce            = true;
+
+       /*
+        * We can't use mempool for more than c->sb.encoded_extent_max
+        * worth of pages, but we'd like to allocate more if we can:
+        */
+       bch2_bio_alloc_pages_pool(c, bio,
+                                 min_t(unsigned, output_available,
+                                       c->sb.encoded_extent_max << 9));
+
+       if (bio->bi_iter.bi_size < output_available)
+               *page_alloc_failed =
+                       bch2_bio_alloc_pages(bio,
+                                            output_available -
+                                            bio->bi_iter.bi_size,
+                                            GFP_NOFS) != 0;
+
+       return bio;
+}
+
+static int bch2_write_rechecksum(struct bch_fs *c,
+                                struct bch_write_op *op,
+                                unsigned new_csum_type)
+{
+       struct bio *bio = &op->wbio.bio;
+       struct bch_extent_crc_unpacked new_crc;
+       int ret;
+
+       /* bch2_rechecksum_bio() can't encrypt or decrypt data: */
+
+       if (bch2_csum_type_is_encryption(op->crc.csum_type) !=
+           bch2_csum_type_is_encryption(new_csum_type))
+               new_csum_type = op->crc.csum_type;
+
+       ret = bch2_rechecksum_bio(c, bio, op->version, op->crc,
+                                 NULL, &new_crc,
+                                 op->crc.offset, op->crc.live_size,
+                                 new_csum_type);
+       if (ret)
+               return ret;
+
+       bio_advance(bio, op->crc.offset << 9);
+       bio->bi_iter.bi_size = op->crc.live_size << 9;
+       op->crc = new_crc;
+       return 0;
+}
+
+static int bch2_write_decrypt(struct bch_write_op *op)
+{
+       struct bch_fs *c = op->c;
+       struct nonce nonce = extent_nonce(op->version, op->crc);
+       struct bch_csum csum;
+
+       if (!bch2_csum_type_is_encryption(op->crc.csum_type))
+               return 0;
+
+       /*
+        * If we need to decrypt data in the write path, we'll no longer be able
+        * to verify the existing checksum (poly1305 mac, in this case) after
+        * it's decrypted - this is the last point we'll be able to reverify the
+        * checksum:
+        */
+       csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
+       if (bch2_crc_cmp(op->crc.csum, csum))
+               return -EIO;
+
+       bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
+       op->crc.csum_type = 0;
+       op->crc.csum = (struct bch_csum) { 0, 0 };
+       return 0;
+}
+
+static enum prep_encoded_ret {
+       PREP_ENCODED_OK,
+       PREP_ENCODED_ERR,
+       PREP_ENCODED_CHECKSUM_ERR,
+       PREP_ENCODED_DO_WRITE,
+} bch2_write_prep_encoded_data(struct bch_write_op *op, struct write_point *wp)
+{
+       struct bch_fs *c = op->c;
+       struct bio *bio = &op->wbio.bio;
+
+       if (!(op->flags & BCH_WRITE_DATA_ENCODED))
+               return PREP_ENCODED_OK;
+
+       BUG_ON(bio_sectors(bio) != op->crc.compressed_size);
+
+       /* Can we just write the entire extent as is? */
+       if (op->crc.uncompressed_size == op->crc.live_size &&
+           op->crc.compressed_size <= wp->sectors_free &&
+           (op->crc.compression_type == op->compression_type ||
+            op->incompressible)) {
+               if (!crc_is_compressed(op->crc) &&
+                   op->csum_type != op->crc.csum_type &&
+                   bch2_write_rechecksum(c, op, op->csum_type))
+                       return PREP_ENCODED_CHECKSUM_ERR;
+
+               return PREP_ENCODED_DO_WRITE;
+       }
+
+       /*
+        * If the data is compressed and we couldn't write the entire extent as
+        * is, we have to decompress it:
+        */
+       if (crc_is_compressed(op->crc)) {
+               struct bch_csum csum;
+
+               if (bch2_write_decrypt(op))
+                       return PREP_ENCODED_CHECKSUM_ERR;
+
+               /* Last point we can still verify checksum: */
+               csum = bch2_checksum_bio(c, op->crc.csum_type,
+                                        extent_nonce(op->version, op->crc),
+                                        bio);
+               if (bch2_crc_cmp(op->crc.csum, csum))
+                       return PREP_ENCODED_CHECKSUM_ERR;
+
+               if (bch2_bio_uncompress_inplace(c, bio, &op->crc))
+                       return PREP_ENCODED_ERR;
+       }
+
+       /*
+        * No longer have compressed data after this point - data might be
+        * encrypted:
+        */
+
+       /*
+        * If the data is checksummed and we're only writing a subset,
+        * rechecksum and adjust bio to point to currently live data:
+        */
+       if ((op->crc.live_size != op->crc.uncompressed_size ||
+            op->crc.csum_type != op->csum_type) &&
+           bch2_write_rechecksum(c, op, op->csum_type))
+               return PREP_ENCODED_CHECKSUM_ERR;
+
+       /*
+        * If we want to compress the data, it has to be decrypted:
+        */
+       if ((op->compression_type ||
+            bch2_csum_type_is_encryption(op->crc.csum_type) !=
+            bch2_csum_type_is_encryption(op->csum_type)) &&
+           bch2_write_decrypt(op))
+               return PREP_ENCODED_CHECKSUM_ERR;
+
+       return PREP_ENCODED_OK;
+}
+
+static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
+                            struct bio **_dst)
+{
+       struct bch_fs *c = op->c;
+       struct bio *src = &op->wbio.bio, *dst = src;
+       struct bvec_iter saved_iter;
+       void *ec_buf;
+       struct bpos ec_pos = op->pos;
+       unsigned total_output = 0, total_input = 0;
+       bool bounce = false;
+       bool page_alloc_failed = false;
+       int ret, more = 0;
+
+       BUG_ON(!bio_sectors(src));
+
+       ec_buf = bch2_writepoint_ec_buf(c, wp);
+
+       switch (bch2_write_prep_encoded_data(op, wp)) {
+       case PREP_ENCODED_OK:
+               break;
+       case PREP_ENCODED_ERR:
+               ret = -EIO;
+               goto err;
+       case PREP_ENCODED_CHECKSUM_ERR:
+               BUG();
+               goto csum_err;
+       case PREP_ENCODED_DO_WRITE:
+               /* XXX look for bug here */
+               if (ec_buf) {
+                       dst = bch2_write_bio_alloc(c, wp, src,
+                                                  &page_alloc_failed,
+                                                  ec_buf);
+                       bio_copy_data(dst, src);
+                       bounce = true;
+               }
+               init_append_extent(op, wp, op->version, op->crc);
+               goto do_write;
+       }
+
+       if (ec_buf ||
+           op->compression_type ||
+           (op->csum_type &&
+            !(op->flags & BCH_WRITE_PAGES_STABLE)) ||
+           (bch2_csum_type_is_encryption(op->csum_type) &&
+            !(op->flags & BCH_WRITE_PAGES_OWNED))) {
+               dst = bch2_write_bio_alloc(c, wp, src,
+                                          &page_alloc_failed,
+                                          ec_buf);
+               bounce = true;
+       }
+
+       saved_iter = dst->bi_iter;
+
+       do {
+               struct bch_extent_crc_unpacked crc =
+                       (struct bch_extent_crc_unpacked) { 0 };
+               struct bversion version = op->version;
+               size_t dst_len, src_len;
+
+               if (page_alloc_failed &&
+                   bio_sectors(dst) < wp->sectors_free &&
+                   bio_sectors(dst) < c->sb.encoded_extent_max)
+                       break;
+
+               BUG_ON(op->compression_type &&
+                      (op->flags & BCH_WRITE_DATA_ENCODED) &&
+                      bch2_csum_type_is_encryption(op->crc.csum_type));
+               BUG_ON(op->compression_type && !bounce);
+
+               crc.compression_type = op->incompressible
+                       ? BCH_COMPRESSION_TYPE_incompressible
+                       : op->compression_type
+                       ? bch2_bio_compress(c, dst, &dst_len, src, &src_len,
+                                           op->compression_type)
+                       : 0;
+               if (!crc_is_compressed(crc)) {
+                       dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
+                       dst_len = min_t(unsigned, dst_len, wp->sectors_free << 9);
+
+                       if (op->csum_type)
+                               dst_len = min_t(unsigned, dst_len,
+                                               c->sb.encoded_extent_max << 9);
+
+                       if (bounce) {
+                               swap(dst->bi_iter.bi_size, dst_len);
+                               bio_copy_data(dst, src);
+                               swap(dst->bi_iter.bi_size, dst_len);
+                       }
+
+                       src_len = dst_len;
+               }
+
+               BUG_ON(!src_len || !dst_len);
+
+               if (bch2_csum_type_is_encryption(op->csum_type)) {
+                       if (bversion_zero(version)) {
+                               version.lo = atomic64_inc_return(&c->key_version);
+                       } else {
+                               crc.nonce = op->nonce;
+                               op->nonce += src_len >> 9;
+                       }
+               }
+
+               if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
+                   !crc_is_compressed(crc) &&
+                   bch2_csum_type_is_encryption(op->crc.csum_type) ==
+                   bch2_csum_type_is_encryption(op->csum_type)) {
+                       /*
+                        * Note: when we're using rechecksum(), we need to be
+                        * checksumming @src because it has all the data our
+                        * existing checksum covers - if we bounced (because we
+                        * were trying to compress), @dst will only have the
+                        * part of the data the new checksum will cover.
+                        *
+                        * But normally we want to be checksumming post bounce,
+                        * because part of the reason for bouncing is so the
+                        * data can't be modified (by userspace) while it's in
+                        * flight.
+                        */
+                       if (bch2_rechecksum_bio(c, src, version, op->crc,
+                                       &crc, &op->crc,
+                                       src_len >> 9,
+                                       bio_sectors(src) - (src_len >> 9),
+                                       op->csum_type))
+                               goto csum_err;
+               } else {
+                       if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
+                           bch2_rechecksum_bio(c, src, version, op->crc,
+                                       NULL, &op->crc,
+                                       src_len >> 9,
+                                       bio_sectors(src) - (src_len >> 9),
+                                       op->crc.csum_type))
+                               goto csum_err;
+
+                       crc.compressed_size     = dst_len >> 9;
+                       crc.uncompressed_size   = src_len >> 9;
+                       crc.live_size           = src_len >> 9;
+
+                       swap(dst->bi_iter.bi_size, dst_len);
+                       bch2_encrypt_bio(c, op->csum_type,
+                                        extent_nonce(version, crc), dst);
+                       crc.csum = bch2_checksum_bio(c, op->csum_type,
+                                        extent_nonce(version, crc), dst);
+                       crc.csum_type = op->csum_type;
+                       swap(dst->bi_iter.bi_size, dst_len);
+               }
+
+               init_append_extent(op, wp, version, crc);
+
+               if (dst != src)
+                       bio_advance(dst, dst_len);
+               bio_advance(src, src_len);
+               total_output    += dst_len;
+               total_input     += src_len;
+       } while (dst->bi_iter.bi_size &&
+                src->bi_iter.bi_size &&
+                wp->sectors_free &&
+                !bch2_keylist_realloc(&op->insert_keys,
+                                     op->inline_keys,
+                                     ARRAY_SIZE(op->inline_keys),
+                                     BKEY_EXTENT_U64s_MAX));
+
+       more = src->bi_iter.bi_size != 0;
+
+       dst->bi_iter = saved_iter;
+
+       if (dst == src && more) {
+               BUG_ON(total_output != total_input);
+
+               dst = bio_split(src, total_input >> 9,
+                               GFP_NOIO, &c->bio_write);
+               wbio_init(dst)->put_bio = true;
+               /* copy WRITE_SYNC flag */
+               dst->bi_opf             = src->bi_opf;
+       }
+
+       dst->bi_iter.bi_size = total_output;
+do_write:
+       /* might have done a realloc... */
+       bch2_ec_add_backpointer(c, wp, ec_pos, total_input >> 9);
+
+       *_dst = dst;
+       return more;
+csum_err:
+       bch_err(c, "error verifying existing checksum while "
+               "rewriting existing data (memory corruption?)");
+       ret = -EIO;
+err:
+       if (to_wbio(dst)->bounce)
+               bch2_bio_free_pages_pool(c, dst);
+       if (to_wbio(dst)->put_bio)
+               bio_put(dst);
+
+       return ret;
+}
+
+static void __bch2_write(struct closure *cl)
+{
+       struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+       struct bch_fs *c = op->c;
+       struct write_point *wp;
+       struct bio *bio;
+       bool skip_put = true;
+       unsigned nofs_flags;
+       int ret;
+
+       nofs_flags = memalloc_nofs_save();
+again:
+       memset(&op->failed, 0, sizeof(op->failed));
+
+       do {
+               struct bkey_i *key_to_write;
+               unsigned key_to_write_offset = op->insert_keys.top_p -
+                       op->insert_keys.keys_p;
+
+               /* +1 for possible cache device: */
+               if (op->open_buckets.nr + op->nr_replicas + 1 >
+                   ARRAY_SIZE(op->open_buckets.v))
+                       goto flush_io;
+
+               if (bch2_keylist_realloc(&op->insert_keys,
+                                       op->inline_keys,
+                                       ARRAY_SIZE(op->inline_keys),
+                                       BKEY_EXTENT_U64s_MAX))
+                       goto flush_io;
+
+               if ((op->flags & BCH_WRITE_FROM_INTERNAL) &&
+                   percpu_ref_is_dying(&c->writes)) {
+                       ret = -EROFS;
+                       goto err;
+               }
+
+               /*
+                * The copygc thread is now global, which means it's no longer
+                * freeing up space on specific disks, which means that
+                * allocations for specific disks may hang arbitrarily long:
+                */
+               wp = bch2_alloc_sectors_start(c,
+                       op->target,
+                       op->opts.erasure_code,
+                       op->write_point,
+                       &op->devs_have,
+                       op->nr_replicas,
+                       op->nr_replicas_required,
+                       op->alloc_reserve,
+                       op->flags,
+                       (op->flags & (BCH_WRITE_ALLOC_NOWAIT|
+                                     BCH_WRITE_ONLY_SPECIFIED_DEVS)) ? NULL : cl);
+               EBUG_ON(!wp);
+
+               if (unlikely(IS_ERR(wp))) {
+                       if (unlikely(PTR_ERR(wp) != -EAGAIN)) {
+                               ret = PTR_ERR(wp);
+                               goto err;
+                       }
+
+                       goto flush_io;
+               }
+
+               /*
+                * It's possible for the allocator to fail, put us on the
+                * freelist waitlist, and then succeed in one of various retry
+                * paths: if that happens, we need to disable the skip_put
+                * optimization because otherwise there won't necessarily be a
+                * barrier before we free the bch_write_op:
+                */
+               if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
+                       skip_put = false;
+
+               bch2_open_bucket_get(c, wp, &op->open_buckets);
+               ret = bch2_write_extent(op, wp, &bio);
+               bch2_alloc_sectors_done(c, wp);
+
+               if (ret < 0)
+                       goto err;
+
+               if (ret) {
+                       skip_put = false;
+               } else {
+                       /*
+                        * for the skip_put optimization this has to be set
+                        * before we submit the bio:
+                        */
+                       op->flags |= BCH_WRITE_DONE;
+               }
+
+               bio->bi_end_io  = bch2_write_endio;
+               bio->bi_private = &op->cl;
+               bio->bi_opf |= REQ_OP_WRITE;
+
+               if (!skip_put)
+                       closure_get(bio->bi_private);
+               else
+                       op->flags |= BCH_WRITE_SKIP_CLOSURE_PUT;
+
+               key_to_write = (void *) (op->insert_keys.keys_p +
+                                        key_to_write_offset);
+
+               bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user,
+                                         key_to_write);
+       } while (ret);
+
+       if (!skip_put)
+               continue_at(cl, bch2_write_index, index_update_wq(op));
+out:
+       memalloc_nofs_restore(nofs_flags);
+       return;
+err:
+       op->error = ret;
+       op->flags |= BCH_WRITE_DONE;
+
+       continue_at(cl, bch2_write_index, index_update_wq(op));
+       goto out;
+flush_io:
+       /*
+        * If the write can't all be submitted at once, we generally want to
+        * block synchronously as that signals backpressure to the caller.
+        *
+        * However, if we're running out of a workqueue, we can't block here
+        * because we'll be blocking other work items from completing:
+        */
+       if (current->flags & PF_WQ_WORKER) {
+               continue_at(cl, bch2_write_index, index_update_wq(op));
+               goto out;
+       }
+
+       closure_sync(cl);
+
+       if (!bch2_keylist_empty(&op->insert_keys)) {
+               __bch2_write_index(op);
+
+               if (op->error) {
+                       op->flags |= BCH_WRITE_DONE;
+                       continue_at_nobarrier(cl, bch2_write_done, NULL);
+                       goto out;
+               }
+       }
+
+       goto again;
+}
+
+static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
+{
+       struct closure *cl = &op->cl;
+       struct bio *bio = &op->wbio.bio;
+       struct bvec_iter iter;
+       struct bkey_i_inline_data *id;
+       unsigned sectors;
+       int ret;
+
+       bch2_check_set_feature(op->c, BCH_FEATURE_inline_data);
+
+       ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys,
+                                  ARRAY_SIZE(op->inline_keys),
+                                  BKEY_U64s + DIV_ROUND_UP(data_len, 8));
+       if (ret) {
+               op->error = ret;
+               goto err;
+       }
+
+       sectors = bio_sectors(bio);
+       op->pos.offset += sectors;
+
+       id = bkey_inline_data_init(op->insert_keys.top);
+       id->k.p         = op->pos;
+       id->k.version   = op->version;
+       id->k.size      = sectors;
+
+       iter = bio->bi_iter;
+       iter.bi_size = data_len;
+       memcpy_from_bio(id->v.data, bio, iter);
+
+       while (data_len & 7)
+               id->v.data[data_len++] = '\0';
+       set_bkey_val_bytes(&id->k, data_len);
+       bch2_keylist_push(&op->insert_keys);
+
+       op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
+       op->flags |= BCH_WRITE_DONE;
+
+       continue_at_nobarrier(cl, bch2_write_index, NULL);
+       return;
+err:
+       bch2_write_done(&op->cl);
+}
+
+/**
+ * bch_write - handle a write to a cache device or flash only volume
+ *
+ * This is the starting point for any data to end up in a cache device; it could
+ * be from a normal write, or a writeback write, or a write to a flash only
+ * volume - it's also used by the moving garbage collector to compact data in
+ * mostly empty buckets.
+ *
+ * It first writes the data to the cache, creating a list of keys to be inserted
+ * (if the data won't fit in a single open bucket, there will be multiple keys);
+ * after the data is written it calls bch_journal, and after the keys have been
+ * added to the next journal write they're inserted into the btree.
+ *
+ * If op->discard is true, instead of inserting the data it invalidates the
+ * region of the cache represented by op->bio and op->inode.
+ */
+void bch2_write(struct closure *cl)
+{
+       struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+       struct bio *bio = &op->wbio.bio;
+       struct bch_fs *c = op->c;
+       unsigned data_len;
+
+       BUG_ON(!op->nr_replicas);
+       BUG_ON(!op->write_point.v);
+       BUG_ON(!bkey_cmp(op->pos, POS_MAX));
+
+       op->start_time = local_clock();
+       bch2_keylist_init(&op->insert_keys, op->inline_keys);
+       wbio_init(bio)->put_bio = false;
+
+       if (bio_sectors(bio) & (c->opts.block_size - 1)) {
+               __bcache_io_error(c, "misaligned write");
+               op->error = -EIO;
+               goto err;
+       }
+
+       if (c->opts.nochanges ||
+           !percpu_ref_tryget(&c->writes)) {
+               if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
+                       __bcache_io_error(c, "read only");
+               op->error = -EROFS;
+               goto err;
+       }
+
+       /*
+        * Can't ratelimit copygc - we'd deadlock:
+        */
+       if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
+               down(&c->io_in_flight);
+
+       bch2_increment_clock(c, bio_sectors(bio), WRITE);
+
+       data_len = min_t(u64, bio->bi_iter.bi_size,
+                        op->new_i_size - (op->pos.offset << 9));
+
+       if (c->opts.inline_data &&
+           data_len <= min(block_bytes(c) / 2, 1024U)) {
+               bch2_write_data_inline(op, data_len);
+               return;
+       }
+
+       continue_at_nobarrier(cl, __bch2_write, NULL);
+       return;
+err:
+       bch2_disk_reservation_put(c, &op->res);
+
+       if (op->end_io) {
+               EBUG_ON(cl->parent);
+               closure_debug_destroy(cl);
+               op->end_io(op);
+       } else {
+               closure_return(cl);
+       }
+}
+
+/* Cache promotion on read */
+
+struct promote_op {
+       struct closure          cl;
+       struct rcu_head         rcu;
+       u64                     start_time;
+
+       struct rhash_head       hash;
+       struct bpos             pos;
+
+       struct migrate_write    write;
+       struct bio_vec          bi_inline_vecs[0]; /* must be last */
+};
+
+static const struct rhashtable_params bch_promote_params = {
+       .head_offset    = offsetof(struct promote_op, hash),
+       .key_offset     = offsetof(struct promote_op, pos),
+       .key_len        = sizeof(struct bpos),
+};
+
+static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
+                                 struct bpos pos,
+                                 struct bch_io_opts opts,
+                                 unsigned flags)
+{
+       if (!(flags & BCH_READ_MAY_PROMOTE))
+               return false;
+
+       if (!opts.promote_target)
+               return false;
+
+       if (bch2_bkey_has_target(c, k, opts.promote_target))
+               return false;
+
+       if (bch2_target_congested(c, opts.promote_target)) {
+               /* XXX trace this */
+               return false;
+       }
+
+       if (rhashtable_lookup_fast(&c->promote_table, &pos,
+                                  bch_promote_params))
+               return false;
+
+       return true;
+}
+
+static void promote_free(struct bch_fs *c, struct promote_op *op)
+{
+       int ret;
+
+       ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
+                                    bch_promote_params);
+       BUG_ON(ret);
+       percpu_ref_put(&c->writes);
+       kfree_rcu(op, rcu);
+}
+
+static void promote_done(struct closure *cl)
+{
+       struct promote_op *op =
+               container_of(cl, struct promote_op, cl);
+       struct bch_fs *c = op->write.op.c;
+
+       bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
+                              op->start_time);
+
+       bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio);
+       promote_free(c, op);
+}
+
+static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
+{
+       struct bch_fs *c = rbio->c;
+       struct closure *cl = &op->cl;
+       struct bio *bio = &op->write.op.wbio.bio;
+
+       trace_promote(&rbio->bio);
+
+       /* we now own pages: */
+       BUG_ON(!rbio->bounce);
+       BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
+
+       memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
+              sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
+       swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
+
+       bch2_migrate_read_done(&op->write, rbio);
+
+       closure_init(cl, NULL);
+       closure_call(&op->write.op.cl, bch2_write, c->wq, cl);
+       closure_return_with_destructor(cl, promote_done);
+}
+
+static struct promote_op *__promote_alloc(struct bch_fs *c,
+                                         enum btree_id btree_id,
+                                         struct bkey_s_c k,
+                                         struct bpos pos,
+                                         struct extent_ptr_decoded *pick,
+                                         struct bch_io_opts opts,
+                                         unsigned sectors,
+                                         struct bch_read_bio **rbio)
+{
+       struct promote_op *op = NULL;
+       struct bio *bio;
+       unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
+       int ret;
+
+       if (!percpu_ref_tryget(&c->writes))
+               return NULL;
+
+       op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO);
+       if (!op)
+               goto err;
+
+       op->start_time = local_clock();
+       op->pos = pos;
+
+       /*
+        * We don't use the mempool here because extents that aren't
+        * checksummed or compressed can be too big for the mempool:
+        */
+       *rbio = kzalloc(sizeof(struct bch_read_bio) +
+                       sizeof(struct bio_vec) * pages,
+                       GFP_NOIO);
+       if (!*rbio)
+               goto err;
+
+       rbio_init(&(*rbio)->bio, opts);
+       bio_init(&(*rbio)->bio, (*rbio)->bio.bi_inline_vecs, pages);
+
+       if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9,
+                                GFP_NOIO))
+               goto err;
+
+       (*rbio)->bounce         = true;
+       (*rbio)->split          = true;
+       (*rbio)->kmalloc        = true;
+
+       if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
+                                         bch_promote_params))
+               goto err;
+
+       bio = &op->write.op.wbio.bio;
+       bio_init(bio, bio->bi_inline_vecs, pages);
+
+       ret = bch2_migrate_write_init(c, &op->write,
+                       writepoint_hashed((unsigned long) current),
+                       opts,
+                       DATA_PROMOTE,
+                       (struct data_opts) {
+                               .target = opts.promote_target
+                       },
+                       btree_id, k);
+       BUG_ON(ret);
+
+       return op;
+err:
+       if (*rbio)
+               bio_free_pages(&(*rbio)->bio);
+       kfree(*rbio);
+       *rbio = NULL;
+       kfree(op);
+       percpu_ref_put(&c->writes);
+       return NULL;
+}
+
+noinline
+static struct promote_op *promote_alloc(struct bch_fs *c,
+                                              struct bvec_iter iter,
+                                              struct bkey_s_c k,
+                                              struct extent_ptr_decoded *pick,
+                                              struct bch_io_opts opts,
+                                              unsigned flags,
+                                              struct bch_read_bio **rbio,
+                                              bool *bounce,
+                                              bool *read_full)
+{
+       bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
+       /* data might have to be decompressed in the write path: */
+       unsigned sectors = promote_full
+               ? max(pick->crc.compressed_size, pick->crc.live_size)
+               : bvec_iter_sectors(iter);
+       struct bpos pos = promote_full
+               ? bkey_start_pos(k.k)
+               : POS(k.k->p.inode, iter.bi_sector);
+       struct promote_op *promote;
+
+       if (!should_promote(c, k, pos, opts, flags))
+               return NULL;
+
+       promote = __promote_alloc(c,
+                                 k.k->type == KEY_TYPE_reflink_v
+                                 ? BTREE_ID_REFLINK
+                                 : BTREE_ID_EXTENTS,
+                                 k, pos, pick, opts, sectors, rbio);
+       if (!promote)
+               return NULL;
+
+       *bounce         = true;
+       *read_full      = promote_full;
+       return promote;
+}
+
+/* Read */
+
+#define READ_RETRY_AVOID       1
+#define READ_RETRY             2
+#define READ_ERR               3
+
+enum rbio_context {
+       RBIO_CONTEXT_NULL,
+       RBIO_CONTEXT_HIGHPRI,
+       RBIO_CONTEXT_UNBOUND,
+};
+
+static inline struct bch_read_bio *
+bch2_rbio_parent(struct bch_read_bio *rbio)
+{
+       return rbio->split ? rbio->parent : rbio;
+}
+
+__always_inline
+static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn,
+                          enum rbio_context context,
+                          struct workqueue_struct *wq)
+{
+       if (context <= rbio->context) {
+               fn(&rbio->work);
+       } else {
+               rbio->work.func         = fn;
+               rbio->context           = context;
+               queue_work(wq, &rbio->work);
+       }
+}
+
+static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
+{
+       BUG_ON(rbio->bounce && !rbio->split);
+
+       if (rbio->promote)
+               promote_free(rbio->c, rbio->promote);
+       rbio->promote = NULL;
+
+       if (rbio->bounce)
+               bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
+
+       if (rbio->split) {
+               struct bch_read_bio *parent = rbio->parent;
+
+               if (rbio->kmalloc)
+                       kfree(rbio);
+               else
+                       bio_put(&rbio->bio);
+
+               rbio = parent;
+       }
+
+       return rbio;
+}
+
+/*
+ * Only called on a top level bch_read_bio to complete an entire read request,
+ * not a split:
+ */
+static void bch2_rbio_done(struct bch_read_bio *rbio)
+{
+       if (rbio->start_time)
+               bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
+                                      rbio->start_time);
+       bio_endio(&rbio->bio);
+}
+
+static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
+                                    struct bvec_iter bvec_iter, u64 inode,
+                                    struct bch_io_failures *failed,
+                                    unsigned flags)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_on_stack sk;
+       struct bkey_s_c k;
+       int ret;
+
+       flags &= ~BCH_READ_LAST_FRAGMENT;
+       flags |= BCH_READ_MUST_CLONE;
+
+       bkey_on_stack_init(&sk);
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+                                  rbio->pos, BTREE_ITER_SLOTS);
+retry:
+       rbio->bio.bi_status = 0;
+
+       k = bch2_btree_iter_peek_slot(iter);
+       if (bkey_err(k))
+               goto err;
+
+       bkey_on_stack_reassemble(&sk, c, k);
+       k = bkey_i_to_s_c(sk.k);
+       bch2_trans_unlock(&trans);
+
+       if (!bch2_bkey_matches_ptr(c, k,
+                                  rbio->pick.ptr,
+                                  rbio->pos.offset -
+                                  rbio->pick.crc.offset)) {
+               /* extent we wanted to read no longer exists: */
+               rbio->hole = true;
+               goto out;
+       }
+
+       ret = __bch2_read_extent(&trans, rbio, bvec_iter, k, 0, failed, flags);
+       if (ret == READ_RETRY)
+               goto retry;
+       if (ret)
+               goto err;
+out:
+       bch2_rbio_done(rbio);
+       bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&sk, c);
+       return;
+err:
+       rbio->bio.bi_status = BLK_STS_IOERR;
+       goto out;
+}
+
+static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
+                           struct bvec_iter bvec_iter, u64 inode,
+                           struct bch_io_failures *failed, unsigned flags)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_on_stack sk;
+       struct bkey_s_c k;
+       int ret;
+
+       flags &= ~BCH_READ_LAST_FRAGMENT;
+       flags |= BCH_READ_MUST_CLONE;
+
+       bkey_on_stack_init(&sk);
+       bch2_trans_init(&trans, c, 0, 0);
+retry:
+       bch2_trans_begin(&trans);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
+                          POS(inode, bvec_iter.bi_sector),
+                          BTREE_ITER_SLOTS, k, ret) {
+               unsigned bytes, sectors, offset_into_extent;
+
+               bkey_on_stack_reassemble(&sk, c, k);
+               k = bkey_i_to_s_c(sk.k);
+
+               offset_into_extent = iter->pos.offset -
+                       bkey_start_offset(k.k);
+               sectors = k.k->size - offset_into_extent;
+
+               ret = bch2_read_indirect_extent(&trans,
+                                       &offset_into_extent, &sk);
+               if (ret)
+                       break;
+
+               sectors = min(sectors, k.k->size - offset_into_extent);
+
+               bch2_trans_unlock(&trans);
+
+               bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
+               swap(bvec_iter.bi_size, bytes);
+
+               ret = __bch2_read_extent(&trans, rbio, bvec_iter, k,
+                               offset_into_extent, failed, flags);
+               switch (ret) {
+               case READ_RETRY:
+                       goto retry;
+               case READ_ERR:
+                       goto err;
+               };
+
+               if (bytes == bvec_iter.bi_size)
+                       goto out;
+
+               swap(bvec_iter.bi_size, bytes);
+               bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
+       }
+
+       if (ret == -EINTR)
+               goto retry;
+       /*
+        * If we get here, it better have been because there was an error
+        * reading a btree node
+        */
+       BUG_ON(!ret);
+       __bcache_io_error(c, "btree IO error: %i", ret);
+err:
+       rbio->bio.bi_status = BLK_STS_IOERR;
+out:
+       bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&sk, c);
+       bch2_rbio_done(rbio);
+}
+
+static void bch2_rbio_retry(struct work_struct *work)
+{
+       struct bch_read_bio *rbio =
+               container_of(work, struct bch_read_bio, work);
+       struct bch_fs *c        = rbio->c;
+       struct bvec_iter iter   = rbio->bvec_iter;
+       unsigned flags          = rbio->flags;
+       u64 inode               = rbio->pos.inode;
+       struct bch_io_failures failed = { .nr = 0 };
+
+       trace_read_retry(&rbio->bio);
+
+       if (rbio->retry == READ_RETRY_AVOID)
+               bch2_mark_io_failure(&failed, &rbio->pick);
+
+       rbio->bio.bi_status = 0;
+
+       rbio = bch2_rbio_free(rbio);
+
+       flags |= BCH_READ_IN_RETRY;
+       flags &= ~BCH_READ_MAY_PROMOTE;
+
+       if (flags & BCH_READ_NODECODE)
+               bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags);
+       else
+               bch2_read_retry(c, rbio, iter, inode, &failed, flags);
+}
+
+static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
+                           blk_status_t error)
+{
+       rbio->retry = retry;
+
+       if (rbio->flags & BCH_READ_IN_RETRY)
+               return;
+
+       if (retry == READ_ERR) {
+               rbio = bch2_rbio_free(rbio);
+
+               rbio->bio.bi_status = error;
+               bch2_rbio_done(rbio);
+       } else {
+               bch2_rbio_punt(rbio, bch2_rbio_retry,
+                              RBIO_CONTEXT_UNBOUND, system_unbound_wq);
+       }
+}
+
+static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
+                                  struct bch_read_bio *rbio)
+{
+       struct bch_fs *c = rbio->c;
+       u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
+       struct bch_extent_crc_unpacked new_crc;
+       struct btree_iter *iter = NULL;
+       struct bkey_i *new;
+       struct bkey_s_c k;
+       int ret = 0;
+
+       if (crc_is_compressed(rbio->pick.crc))
+               return 0;
+
+       iter = bch2_trans_get_iter(trans, BTREE_ID_EXTENTS, rbio->pos,
+                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+       if ((ret = PTR_ERR_OR_ZERO(iter)))
+               goto out;
+
+       k = bch2_btree_iter_peek_slot(iter);
+       if ((ret = bkey_err(k)))
+               goto out;
+
+       /*
+        * going to be temporarily appending another checksum entry:
+        */
+       new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
+                                BKEY_EXTENT_U64s_MAX * 8);
+       if ((ret = PTR_ERR_OR_ZERO(new)))
+               goto out;
+
+       bkey_reassemble(new, k);
+       k = bkey_i_to_s_c(new);
+
+       if (bversion_cmp(k.k->version, rbio->version) ||
+           !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
+               goto out;
+
+       /* Extent was merged? */
+       if (bkey_start_offset(k.k) < data_offset ||
+           k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size)
+               goto out;
+
+       if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
+                       rbio->pick.crc, NULL, &new_crc,
+                       bkey_start_offset(k.k) - data_offset, k.k->size,
+                       rbio->pick.crc.csum_type)) {
+               bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
+               ret = 0;
+               goto out;
+       }
+
+       if (!bch2_bkey_narrow_crcs(new, new_crc))
+               goto out;
+
+       bch2_trans_update(trans, iter, new, 0);
+out:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
+{
+       bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL,
+                     __bch2_rbio_narrow_crcs(&trans, rbio));
+}
+
+/* Inner part that may run in process context */
+static void __bch2_read_endio(struct work_struct *work)
+{
+       struct bch_read_bio *rbio =
+               container_of(work, struct bch_read_bio, work);
+       struct bch_fs *c        = rbio->c;
+       struct bch_dev *ca      = bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
+       struct bio *src         = &rbio->bio;
+       struct bio *dst         = &bch2_rbio_parent(rbio)->bio;
+       struct bvec_iter dst_iter = rbio->bvec_iter;
+       struct bch_extent_crc_unpacked crc = rbio->pick.crc;
+       struct nonce nonce = extent_nonce(rbio->version, crc);
+       struct bch_csum csum;
+
+       /* Reset iterator for checksumming and copying bounced data: */
+       if (rbio->bounce) {
+               src->bi_iter.bi_size            = crc.compressed_size << 9;
+               src->bi_iter.bi_idx             = 0;
+               src->bi_iter.bi_bvec_done       = 0;
+       } else {
+               src->bi_iter                    = rbio->bvec_iter;
+       }
+
+       csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
+       if (bch2_crc_cmp(csum, rbio->pick.crc.csum))
+               goto csum_err;
+
+       if (unlikely(rbio->narrow_crcs))
+               bch2_rbio_narrow_crcs(rbio);
+
+       if (rbio->flags & BCH_READ_NODECODE)
+               goto nodecode;
+
+       /* Adjust crc to point to subset of data we want: */
+       crc.offset     += rbio->offset_into_extent;
+       crc.live_size   = bvec_iter_sectors(rbio->bvec_iter);
+
+       if (crc_is_compressed(crc)) {
+               bch2_encrypt_bio(c, crc.csum_type, nonce, src);
+               if (bch2_bio_uncompress(c, src, dst, dst_iter, crc))
+                       goto decompression_err;
+       } else {
+               /* don't need to decrypt the entire bio: */
+               nonce = nonce_add(nonce, crc.offset << 9);
+               bio_advance(src, crc.offset << 9);
+
+               BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
+               src->bi_iter.bi_size = dst_iter.bi_size;
+
+               bch2_encrypt_bio(c, crc.csum_type, nonce, src);
+
+               if (rbio->bounce) {
+                       struct bvec_iter src_iter = src->bi_iter;
+                       bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
+               }
+       }
+
+       if (rbio->promote) {
+               /*
+                * Re encrypt data we decrypted, so it's consistent with
+                * rbio->crc:
+                */
+               bch2_encrypt_bio(c, crc.csum_type, nonce, src);
+               promote_start(rbio->promote, rbio);
+               rbio->promote = NULL;
+       }
+nodecode:
+       if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) {
+               rbio = bch2_rbio_free(rbio);
+               bch2_rbio_done(rbio);
+       }
+       return;
+csum_err:
+       /*
+        * Checksum error: if the bio wasn't bounced, we may have been
+        * reading into buffers owned by userspace (that userspace can
+        * scribble over) - retry the read, bouncing it this time:
+        */
+       if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
+               rbio->flags |= BCH_READ_MUST_BOUNCE;
+               bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR);
+               return;
+       }
+
+       bch2_dev_io_error(ca,
+               "data checksum error, inode %llu offset %llu: expected %0llx:%0llx got %0llx:%0llx (type %u)",
+               rbio->pos.inode, (u64) rbio->bvec_iter.bi_sector,
+               rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
+               csum.hi, csum.lo, crc.csum_type);
+       bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+       return;
+decompression_err:
+       __bcache_io_error(c, "decompression error, inode %llu offset %llu",
+                         rbio->pos.inode,
+                         (u64) rbio->bvec_iter.bi_sector);
+       bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
+       return;
+}
+
+static void bch2_read_endio(struct bio *bio)
+{
+       struct bch_read_bio *rbio =
+               container_of(bio, struct bch_read_bio, bio);
+       struct bch_fs *c        = rbio->c;
+       struct bch_dev *ca      = bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
+       struct workqueue_struct *wq = NULL;
+       enum rbio_context context = RBIO_CONTEXT_NULL;
+
+       if (rbio->have_ioref) {
+               bch2_latency_acct(ca, rbio->submit_time, READ);
+               percpu_ref_put(&ca->io_ref);
+       }
+
+       if (!rbio->split)
+               rbio->bio.bi_end_io = rbio->end_io;
+
+       if (bch2_dev_io_err_on(bio->bi_status, ca, "data read; %s",
+                              bch2_blk_status_to_str(bio->bi_status))) {
+               bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
+               return;
+       }
+
+       if (rbio->pick.ptr.cached &&
+           (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
+            ptr_stale(ca, &rbio->pick.ptr))) {
+               atomic_long_inc(&c->read_realloc_races);
+
+               if (rbio->flags & BCH_READ_RETRY_IF_STALE)
+                       bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
+               else
+                       bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN);
+               return;
+       }
+
+       if (rbio->narrow_crcs ||
+           crc_is_compressed(rbio->pick.crc) ||
+           bch2_csum_type_is_encryption(rbio->pick.crc.csum_type))
+               context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq;
+       else if (rbio->pick.crc.csum_type)
+               context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq;
+
+       bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
+}
+
+int __bch2_read_indirect_extent(struct btree_trans *trans,
+                               unsigned *offset_into_extent,
+                               struct bkey_on_stack *orig_k)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       u64 reflink_offset;
+       int ret;
+
+       reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
+               *offset_into_extent;
+
+       iter = bch2_trans_get_iter(trans, BTREE_ID_REFLINK,
+                                  POS(0, reflink_offset),
+                                  BTREE_ITER_SLOTS);
+       ret = PTR_ERR_OR_ZERO(iter);
+       if (ret)
+               return ret;
+
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       if (k.k->type != KEY_TYPE_reflink_v) {
+               __bcache_io_error(trans->c,
+                               "pointer to nonexistent indirect extent");
+               ret = -EIO;
+               goto err;
+       }
+
+       *offset_into_extent = iter->pos.offset - bkey_start_offset(k.k);
+       bkey_on_stack_reassemble(orig_k, trans->c, k);
+err:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
+                      struct bvec_iter iter, struct bkey_s_c k,
+                      unsigned offset_into_extent,
+                      struct bch_io_failures *failed, unsigned flags)
+{
+       struct bch_fs *c = trans->c;
+       struct extent_ptr_decoded pick;
+       struct bch_read_bio *rbio = NULL;
+       struct bch_dev *ca;
+       struct promote_op *promote = NULL;
+       bool bounce = false, read_full = false, narrow_crcs = false;
+       struct bpos pos = bkey_start_pos(k.k);
+       int pick_ret;
+
+       if (k.k->type == KEY_TYPE_inline_data) {
+               struct bkey_s_c_inline_data d = bkey_s_c_to_inline_data(k);
+               unsigned bytes = min_t(unsigned, iter.bi_size,
+                                      bkey_val_bytes(d.k));
+
+               swap(iter.bi_size, bytes);
+               memcpy_to_bio(&orig->bio, iter, d.v->data);
+               swap(iter.bi_size, bytes);
+               bio_advance_iter(&orig->bio, &iter, bytes);
+               zero_fill_bio_iter(&orig->bio, iter);
+               goto out_read_done;
+       }
+
+       pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
+
+       /* hole or reservation - just zero fill: */
+       if (!pick_ret)
+               goto hole;
+
+       if (pick_ret < 0) {
+               __bcache_io_error(c, "no device to read from");
+               goto err;
+       }
+
+       if (pick_ret > 0)
+               ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+
+       if (flags & BCH_READ_NODECODE) {
+               /*
+                * can happen if we retry, and the extent we were going to read
+                * has been merged in the meantime:
+                */
+               if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS)
+                       goto hole;
+
+               iter.bi_size    = pick.crc.compressed_size << 9;
+               goto get_bio;
+       }
+
+       if (!(flags & BCH_READ_LAST_FRAGMENT) ||
+           bio_flagged(&orig->bio, BIO_CHAIN))
+               flags |= BCH_READ_MUST_CLONE;
+
+       narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
+               bch2_can_narrow_extent_crcs(k, pick.crc);
+
+       if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
+               flags |= BCH_READ_MUST_BOUNCE;
+
+       EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
+
+       if (crc_is_compressed(pick.crc) ||
+           (pick.crc.csum_type != BCH_CSUM_NONE &&
+            (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
+             (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
+              (flags & BCH_READ_USER_MAPPED)) ||
+             (flags & BCH_READ_MUST_BOUNCE)))) {
+               read_full = true;
+               bounce = true;
+       }
+
+       if (orig->opts.promote_target)
+               promote = promote_alloc(c, iter, k, &pick, orig->opts, flags,
+                                       &rbio, &bounce, &read_full);
+
+       if (!read_full) {
+               EBUG_ON(crc_is_compressed(pick.crc));
+               EBUG_ON(pick.crc.csum_type &&
+                       (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
+                        bvec_iter_sectors(iter) != pick.crc.live_size ||
+                        pick.crc.offset ||
+                        offset_into_extent));
+
+               pos.offset += offset_into_extent;
+               pick.ptr.offset += pick.crc.offset +
+                       offset_into_extent;
+               offset_into_extent              = 0;
+               pick.crc.compressed_size        = bvec_iter_sectors(iter);
+               pick.crc.uncompressed_size      = bvec_iter_sectors(iter);
+               pick.crc.offset                 = 0;
+               pick.crc.live_size              = bvec_iter_sectors(iter);
+               offset_into_extent              = 0;
+       }
+get_bio:
+       if (rbio) {
+               /*
+                * promote already allocated bounce rbio:
+                * promote needs to allocate a bio big enough for uncompressing
+                * data in the write path, but we're not going to use it all
+                * here:
+                */
+               EBUG_ON(rbio->bio.bi_iter.bi_size <
+                      pick.crc.compressed_size << 9);
+               rbio->bio.bi_iter.bi_size =
+                       pick.crc.compressed_size << 9;
+       } else if (bounce) {
+               unsigned sectors = pick.crc.compressed_size;
+
+               rbio = rbio_init(bio_alloc_bioset(GFP_NOIO,
+                                                 DIV_ROUND_UP(sectors, PAGE_SECTORS),
+                                                 &c->bio_read_split),
+                                orig->opts);
+
+               bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
+               rbio->bounce    = true;
+               rbio->split     = true;
+       } else if (flags & BCH_READ_MUST_CLONE) {
+               /*
+                * Have to clone if there were any splits, due to error
+                * reporting issues (if a split errored, and retrying didn't
+                * work, when it reports the error to its parent (us) we don't
+                * know if the error was from our bio, and we should retry, or
+                * from the whole bio, in which case we don't want to retry and
+                * lose the error)
+                */
+               rbio = rbio_init(bio_clone_fast(&orig->bio, GFP_NOIO,
+                                               &c->bio_read_split),
+                                orig->opts);
+               rbio->bio.bi_iter = iter;
+               rbio->split     = true;
+       } else {
+               rbio = orig;
+               rbio->bio.bi_iter = iter;
+               EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN));
+       }
+
+       EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size);
+
+       rbio->c                 = c;
+       rbio->submit_time       = local_clock();
+       if (rbio->split)
+               rbio->parent    = orig;
+       else
+               rbio->end_io    = orig->bio.bi_end_io;
+       rbio->bvec_iter         = iter;
+       rbio->offset_into_extent= offset_into_extent;
+       rbio->flags             = flags;
+       rbio->have_ioref        = pick_ret > 0 && bch2_dev_get_ioref(ca, READ);
+       rbio->narrow_crcs       = narrow_crcs;
+       rbio->hole              = 0;
+       rbio->retry             = 0;
+       rbio->context           = 0;
+       /* XXX: only initialize this if needed */
+       rbio->devs_have         = bch2_bkey_devs(k);
+       rbio->pick              = pick;
+       rbio->pos               = pos;
+       rbio->version           = k.k->version;
+       rbio->promote           = promote;
+       INIT_WORK(&rbio->work, NULL);
+
+       rbio->bio.bi_opf        = orig->bio.bi_opf;
+       rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
+       rbio->bio.bi_end_io     = bch2_read_endio;
+
+       if (rbio->bounce)
+               trace_read_bounce(&rbio->bio);
+
+       bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
+
+       if (pick.ptr.cached)
+               bch2_bucket_io_time_reset(trans, pick.ptr.dev,
+                       PTR_BUCKET_NR(ca, &pick.ptr), READ);
+
+       if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
+               bio_inc_remaining(&orig->bio);
+               trace_read_split(&orig->bio);
+       }
+
+       if (!rbio->pick.idx) {
+               if (!rbio->have_ioref) {
+                       __bcache_io_error(c, "no device to read from");
+                       bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+                       goto out;
+               }
+
+               this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user],
+                            bio_sectors(&rbio->bio));
+               bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
+
+               if (likely(!(flags & BCH_READ_IN_RETRY)))
+                       submit_bio(&rbio->bio);
+               else
+                       submit_bio_wait(&rbio->bio);
+       } else {
+               /* Attempting reconstruct read: */
+               if (bch2_ec_read_extent(c, rbio)) {
+                       bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+                       goto out;
+               }
+
+               if (likely(!(flags & BCH_READ_IN_RETRY)))
+                       bio_endio(&rbio->bio);
+       }
+out:
+       if (likely(!(flags & BCH_READ_IN_RETRY))) {
+               return 0;
+       } else {
+               int ret;
+
+               rbio->context = RBIO_CONTEXT_UNBOUND;
+               bch2_read_endio(&rbio->bio);
+
+               ret = rbio->retry;
+               rbio = bch2_rbio_free(rbio);
+
+               if (ret == READ_RETRY_AVOID) {
+                       bch2_mark_io_failure(failed, &pick);
+                       ret = READ_RETRY;
+               }
+
+               return ret;
+       }
+
+err:
+       if (flags & BCH_READ_IN_RETRY)
+               return READ_ERR;
+
+       orig->bio.bi_status = BLK_STS_IOERR;
+       goto out_read_done;
+
+hole:
+       /*
+        * won't normally happen in the BCH_READ_NODECODE
+        * (bch2_move_extent()) path, but if we retry and the extent we wanted
+        * to read no longer exists we have to signal that:
+        */
+       if (flags & BCH_READ_NODECODE)
+               orig->hole = true;
+
+       zero_fill_bio_iter(&orig->bio, iter);
+out_read_done:
+       if (flags & BCH_READ_LAST_FRAGMENT)
+               bch2_rbio_done(orig);
+       return 0;
+}
+
+void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_on_stack sk;
+       struct bkey_s_c k;
+       unsigned flags = BCH_READ_RETRY_IF_STALE|
+               BCH_READ_MAY_PROMOTE|
+               BCH_READ_USER_MAPPED;
+       int ret;
+
+       BUG_ON(rbio->_state);
+       BUG_ON(flags & BCH_READ_NODECODE);
+       BUG_ON(flags & BCH_READ_IN_RETRY);
+
+       rbio->c = c;
+       rbio->start_time = local_clock();
+
+       bkey_on_stack_init(&sk);
+       bch2_trans_init(&trans, c, 0, 0);
+retry:
+       bch2_trans_begin(&trans);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+                                  POS(inode, rbio->bio.bi_iter.bi_sector),
+                                  BTREE_ITER_SLOTS);
+       while (1) {
+               unsigned bytes, sectors, offset_into_extent;
+
+               bch2_btree_iter_set_pos(iter,
+                               POS(inode, rbio->bio.bi_iter.bi_sector));
+
+               k = bch2_btree_iter_peek_slot(iter);
+               ret = bkey_err(k);
+               if (ret)
+                       goto err;
+
+               offset_into_extent = iter->pos.offset -
+                       bkey_start_offset(k.k);
+               sectors = k.k->size - offset_into_extent;
+
+               bkey_on_stack_reassemble(&sk, c, k);
+               k = bkey_i_to_s_c(sk.k);
+
+               ret = bch2_read_indirect_extent(&trans,
+                                       &offset_into_extent, &sk);
+               if (ret)
+                       goto err;
+
+               /*
+                * With indirect extents, the amount of data to read is the min
+                * of the original extent and the indirect extent:
+                */
+               sectors = min(sectors, k.k->size - offset_into_extent);
+
+               /*
+                * Unlock the iterator while the btree node's lock is still in
+                * cache, before doing the IO:
+                */
+               bch2_trans_unlock(&trans);
+
+               bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
+               swap(rbio->bio.bi_iter.bi_size, bytes);
+
+               if (rbio->bio.bi_iter.bi_size == bytes)
+                       flags |= BCH_READ_LAST_FRAGMENT;
+
+               bch2_read_extent(&trans, rbio, k, offset_into_extent, flags);
+
+               if (flags & BCH_READ_LAST_FRAGMENT)
+                       break;
+
+               swap(rbio->bio.bi_iter.bi_size, bytes);
+               bio_advance(&rbio->bio, bytes);
+       }
+out:
+       bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&sk, c);
+       return;
+err:
+       if (ret == -EINTR)
+               goto retry;
+
+       bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret);
+       bch2_rbio_done(rbio);
+       goto out;
+}
+
+void bch2_fs_io_exit(struct bch_fs *c)
+{
+       if (c->promote_table.tbl)
+               rhashtable_destroy(&c->promote_table);
+       mempool_exit(&c->bio_bounce_pages);
+       bioset_exit(&c->bio_write);
+       bioset_exit(&c->bio_read_split);
+       bioset_exit(&c->bio_read);
+}
+
+int bch2_fs_io_init(struct bch_fs *c)
+{
+       if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
+                       BIOSET_NEED_BVECS) ||
+           bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
+                       BIOSET_NEED_BVECS) ||
+           bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio),
+                       BIOSET_NEED_BVECS) ||
+           mempool_init_page_pool(&c->bio_bounce_pages,
+                                  max_t(unsigned,
+                                        c->opts.btree_node_size,
+                                        c->sb.encoded_extent_max) /
+                                  PAGE_SECTORS, 0) ||
+           rhashtable_init(&c->promote_table, &bch_promote_params))
+               return -ENOMEM;
+
+       return 0;
+}
diff --git a/libbcachefs/io.h b/libbcachefs/io.h
new file mode 100644 (file)
index 0000000..e6aac59
--- /dev/null
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_IO_H
+#define _BCACHEFS_IO_H
+
+#include "checksum.h"
+#include "bkey_on_stack.h"
+#include "io_types.h"
+
+#define to_wbio(_bio)                  \
+       container_of((_bio), struct bch_write_bio, bio)
+
+#define to_rbio(_bio)                  \
+       container_of((_bio), struct bch_read_bio, bio)
+
+void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
+void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
+
+void bch2_latency_acct(struct bch_dev *, u64, int);
+
+void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
+                              enum bch_data_type, const struct bkey_i *);
+
+#define BLK_STS_REMOVED                ((__force blk_status_t)128)
+
+const char *bch2_blk_status_to_str(blk_status_t);
+
+enum bch_write_flags {
+       BCH_WRITE_ALLOC_NOWAIT          = (1 << 0),
+       BCH_WRITE_CACHED                = (1 << 1),
+       BCH_WRITE_FLUSH                 = (1 << 2),
+       BCH_WRITE_DATA_ENCODED          = (1 << 3),
+       BCH_WRITE_PAGES_STABLE          = (1 << 4),
+       BCH_WRITE_PAGES_OWNED           = (1 << 5),
+       BCH_WRITE_ONLY_SPECIFIED_DEVS   = (1 << 6),
+       BCH_WRITE_WROTE_DATA_INLINE     = (1 << 7),
+       BCH_WRITE_FROM_INTERNAL         = (1 << 8),
+
+       /* Internal: */
+       BCH_WRITE_JOURNAL_SEQ_PTR       = (1 << 9),
+       BCH_WRITE_SKIP_CLOSURE_PUT      = (1 << 10),
+       BCH_WRITE_DONE                  = (1 << 11),
+};
+
+static inline u64 *op_journal_seq(struct bch_write_op *op)
+{
+       return (op->flags & BCH_WRITE_JOURNAL_SEQ_PTR)
+               ? op->journal_seq_p : &op->journal_seq;
+}
+
+static inline void op_journal_seq_set(struct bch_write_op *op, u64 *journal_seq)
+{
+       op->journal_seq_p = journal_seq;
+       op->flags |= BCH_WRITE_JOURNAL_SEQ_PTR;
+}
+
+static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
+{
+       return op->alloc_reserve == RESERVE_MOVINGGC
+               ? op->c->copygc_wq
+               : op->c->wq;
+}
+
+int bch2_extent_update(struct btree_trans *, struct btree_iter *,
+                      struct bkey_i *, struct disk_reservation *,
+                      u64 *, u64, s64 *);
+int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
+                  struct bpos, u64 *, s64 *);
+int bch2_fpunch(struct bch_fs *c, u64, u64, u64, u64 *, s64 *);
+
+int bch2_write_index_default(struct bch_write_op *);
+
+static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
+                                     struct bch_io_opts opts)
+{
+       op->c                   = c;
+       op->end_io              = NULL;
+       op->flags               = 0;
+       op->written             = 0;
+       op->error               = 0;
+       op->csum_type           = bch2_data_checksum_type(c, opts.data_checksum);
+       op->compression_type    = bch2_compression_opt_to_type[opts.compression];
+       op->nr_replicas         = 0;
+       op->nr_replicas_required = c->opts.data_replicas_required;
+       op->alloc_reserve       = RESERVE_NONE;
+       op->incompressible      = 0;
+       op->open_buckets.nr     = 0;
+       op->devs_have.nr        = 0;
+       op->target              = 0;
+       op->opts                = opts;
+       op->pos                 = POS_MAX;
+       op->version             = ZERO_VERSION;
+       op->write_point         = (struct write_point_specifier) { 0 };
+       op->res                 = (struct disk_reservation) { 0 };
+       op->journal_seq         = 0;
+       op->new_i_size          = U64_MAX;
+       op->i_sectors_delta     = 0;
+       op->index_update_fn     = bch2_write_index_default;
+}
+
+void bch2_write(struct closure *);
+
+static inline struct bch_write_bio *wbio_init(struct bio *bio)
+{
+       struct bch_write_bio *wbio = to_wbio(bio);
+
+       memset(wbio, 0, offsetof(struct bch_write_bio, bio));
+       return wbio;
+}
+
+struct bch_devs_mask;
+struct cache_promote_op;
+struct extent_ptr_decoded;
+
+int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
+                               struct bkey_on_stack *);
+
+static inline int bch2_read_indirect_extent(struct btree_trans *trans,
+                                           unsigned *offset_into_extent,
+                                           struct bkey_on_stack *k)
+{
+       return k->k->k.type == KEY_TYPE_reflink_p
+               ? __bch2_read_indirect_extent(trans, offset_into_extent, k)
+               : 0;
+}
+
+enum bch_read_flags {
+       BCH_READ_RETRY_IF_STALE         = 1 << 0,
+       BCH_READ_MAY_PROMOTE            = 1 << 1,
+       BCH_READ_USER_MAPPED            = 1 << 2,
+       BCH_READ_NODECODE               = 1 << 3,
+       BCH_READ_LAST_FRAGMENT          = 1 << 4,
+
+       /* internal: */
+       BCH_READ_MUST_BOUNCE            = 1 << 5,
+       BCH_READ_MUST_CLONE             = 1 << 6,
+       BCH_READ_IN_RETRY               = 1 << 7,
+};
+
+int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
+                      struct bvec_iter, struct bkey_s_c, unsigned,
+                      struct bch_io_failures *, unsigned);
+
+static inline void bch2_read_extent(struct btree_trans *trans,
+                                   struct bch_read_bio *rbio,
+                                   struct bkey_s_c k,
+                                   unsigned offset_into_extent,
+                                   unsigned flags)
+{
+       __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, k,
+                          offset_into_extent, NULL, flags);
+}
+
+void bch2_read(struct bch_fs *, struct bch_read_bio *, u64);
+
+static inline struct bch_read_bio *rbio_init(struct bio *bio,
+                                            struct bch_io_opts opts)
+{
+       struct bch_read_bio *rbio = to_rbio(bio);
+
+       rbio->_state    = 0;
+       rbio->promote   = NULL;
+       rbio->opts      = opts;
+       return rbio;
+}
+
+void bch2_fs_io_exit(struct bch_fs *);
+int bch2_fs_io_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_IO_H */
diff --git a/libbcachefs/io_types.h b/libbcachefs/io_types.h
new file mode 100644 (file)
index 0000000..b23727d
--- /dev/null
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_IO_TYPES_H
+#define _BCACHEFS_IO_TYPES_H
+
+#include "alloc_types.h"
+#include "btree_types.h"
+#include "buckets_types.h"
+#include "extents_types.h"
+#include "keylist_types.h"
+#include "opts.h"
+#include "super_types.h"
+
+#include <linux/llist.h>
+#include <linux/workqueue.h>
+
+struct bch_read_bio {
+       struct bch_fs           *c;
+       u64                     start_time;
+       u64                     submit_time;
+
+       /*
+        * Reads will often have to be split, and if the extent being read from
+        * was checksummed or compressed we'll also have to allocate bounce
+        * buffers and copy the data back into the original bio.
+        *
+        * If we didn't have to split, we have to save and restore the original
+        * bi_end_io - @split below indicates which:
+        */
+       union {
+       struct bch_read_bio     *parent;
+       bio_end_io_t            *end_io;
+       };
+
+       /*
+        * Saved copy of bio->bi_iter, from submission time - allows us to
+        * resubmit on IO error, and also to copy data back to the original bio
+        * when we're bouncing:
+        */
+       struct bvec_iter        bvec_iter;
+
+       unsigned                offset_into_extent;
+
+       u16                     flags;
+       union {
+       struct {
+       u16                     bounce:1,
+                               split:1,
+                               kmalloc:1,
+                               have_ioref:1,
+                               narrow_crcs:1,
+                               hole:1,
+                               retry:2,
+                               context:2;
+       };
+       u16                     _state;
+       };
+
+       struct bch_devs_list    devs_have;
+
+       struct extent_ptr_decoded pick;
+       /* start pos of data we read (may not be pos of data we want) */
+       struct bpos             pos;
+       struct bversion         version;
+
+       struct promote_op       *promote;
+
+       struct bch_io_opts      opts;
+
+       struct work_struct      work;
+
+       struct bio              bio;
+};
+
+struct bch_write_bio {
+       struct bch_fs           *c;
+       struct bch_write_bio    *parent;
+
+       u64                     submit_time;
+
+       struct bch_devs_list    failed;
+       u8                      dev;
+
+       unsigned                split:1,
+                               bounce:1,
+                               put_bio:1,
+                               have_ioref:1,
+                               used_mempool:1;
+
+       struct bio              bio;
+};
+
+struct bch_write_op {
+       struct closure          cl;
+       struct bch_fs           *c;
+       void                    (*end_io)(struct bch_write_op *);
+       u64                     start_time;
+
+       unsigned                written; /* sectors */
+       u16                     flags;
+       s16                     error; /* dio write path expects it to hold -ERESTARTSYS... */
+
+       unsigned                csum_type:4;
+       unsigned                compression_type:4;
+       unsigned                nr_replicas:4;
+       unsigned                nr_replicas_required:4;
+       unsigned                alloc_reserve:3;
+       unsigned                incompressible:1;
+
+       struct bch_devs_list    devs_have;
+       u16                     target;
+       u16                     nonce;
+       struct bch_io_opts      opts;
+
+       struct bpos             pos;
+       struct bversion         version;
+
+       /* For BCH_WRITE_DATA_ENCODED: */
+       struct bch_extent_crc_unpacked crc;
+
+       struct write_point_specifier write_point;
+
+       struct disk_reservation res;
+
+       struct open_buckets     open_buckets;
+
+       /*
+        * If caller wants to flush but hasn't passed us a journal_seq ptr, we
+        * still need to stash the journal_seq somewhere:
+        */
+       union {
+               u64                     *journal_seq_p;
+               u64                     journal_seq;
+       };
+       u64                     new_i_size;
+       s64                     i_sectors_delta;
+
+       int                     (*index_update_fn)(struct bch_write_op *);
+
+       struct bch_devs_mask    failed;
+
+       struct keylist          insert_keys;
+       u64                     inline_keys[BKEY_EXTENT_U64s_MAX * 2];
+
+       /* Must be last: */
+       struct bch_write_bio    wbio;
+};
+
+#endif /* _BCACHEFS_IO_TYPES_H */
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
new file mode 100644 (file)
index 0000000..b8b7199
--- /dev/null
@@ -0,0 +1,1263 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bcachefs journalling code, for btree insertions
+ *
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_methods.h"
+#include "btree_gc.h"
+#include "buckets.h"
+#include "journal.h"
+#include "journal_io.h"
+#include "journal_reclaim.h"
+#include "journal_seq_blacklist.h"
+#include "super-io.h"
+
+#include <trace/events/bcachefs.h>
+
+static inline struct journal_buf *journal_seq_to_buf(struct journal *, u64);
+
+static bool __journal_entry_is_open(union journal_res_state state)
+{
+       return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
+}
+
+static bool journal_entry_is_open(struct journal *j)
+{
+       return __journal_entry_is_open(j->reservations);
+}
+
+static void journal_pin_new_entry(struct journal *j, int count)
+{
+       struct journal_entry_pin_list *p;
+
+       /*
+        * The fifo_push() needs to happen at the same time as j->seq is
+        * incremented for journal_last_seq() to be calculated correctly
+        */
+       atomic64_inc(&j->seq);
+       p = fifo_push_ref(&j->pin);
+
+       INIT_LIST_HEAD(&p->list);
+       INIT_LIST_HEAD(&p->flushed);
+       atomic_set(&p->count, count);
+       p->devs.nr = 0;
+}
+
+static void bch2_journal_buf_init(struct journal *j)
+{
+       struct journal_buf *buf = journal_cur_buf(j);
+
+       memset(buf->has_inode, 0, sizeof(buf->has_inode));
+
+       memset(buf->data, 0, sizeof(*buf->data));
+       buf->data->seq  = cpu_to_le64(journal_cur_seq(j));
+       buf->data->u64s = 0;
+}
+
+void bch2_journal_halt(struct journal *j)
+{
+       union journal_res_state old, new;
+       u64 v = atomic64_read(&j->reservations.counter);
+
+       do {
+               old.v = new.v = v;
+               if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
+                       return;
+
+               new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL;
+       } while ((v = atomic64_cmpxchg(&j->reservations.counter,
+                                      old.v, new.v)) != old.v);
+
+       journal_wake(j);
+       closure_wake_up(&journal_cur_buf(j)->wait);
+}
+
+/* journal entry close/open: */
+
+void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set)
+{
+       if (!need_write_just_set &&
+           test_bit(JOURNAL_NEED_WRITE, &j->flags))
+               bch2_time_stats_update(j->delay_time,
+                                      j->need_write_time);
+
+       clear_bit(JOURNAL_NEED_WRITE, &j->flags);
+
+       closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
+}
+
+/*
+ * Returns true if journal entry is now closed:
+ */
+static bool __journal_entry_close(struct journal *j)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct journal_buf *buf = journal_cur_buf(j);
+       union journal_res_state old, new;
+       u64 v = atomic64_read(&j->reservations.counter);
+       bool set_need_write = false;
+       unsigned sectors;
+
+       lockdep_assert_held(&j->lock);
+
+       do {
+               old.v = new.v = v;
+               if (old.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL)
+                       return true;
+
+               if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) {
+                       /* this entry will never be written: */
+                       closure_wake_up(&buf->wait);
+                       return true;
+               }
+
+               if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) {
+                       set_bit(JOURNAL_NEED_WRITE, &j->flags);
+                       j->need_write_time = local_clock();
+                       set_need_write = true;
+               }
+
+               if (new.prev_buf_unwritten)
+                       return false;
+
+               new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL;
+               new.idx++;
+               new.prev_buf_unwritten = 1;
+
+               BUG_ON(journal_state_count(new, new.idx));
+       } while ((v = atomic64_cmpxchg(&j->reservations.counter,
+                                      old.v, new.v)) != old.v);
+
+       buf->data->u64s         = cpu_to_le32(old.cur_entry_offset);
+
+       sectors = vstruct_blocks_plus(buf->data, c->block_bits,
+                                     buf->u64s_reserved) << c->block_bits;
+       BUG_ON(sectors > buf->sectors);
+       buf->sectors = sectors;
+
+       bkey_extent_init(&buf->key);
+
+       /*
+        * We have to set last_seq here, _before_ opening a new journal entry:
+        *
+        * A threads may replace an old pin with a new pin on their current
+        * journal reservation - the expectation being that the journal will
+        * contain either what the old pin protected or what the new pin
+        * protects.
+        *
+        * After the old pin is dropped journal_last_seq() won't include the old
+        * pin, so we can only write the updated last_seq on the entry that
+        * contains whatever the new pin protects.
+        *
+        * Restated, we can _not_ update last_seq for a given entry if there
+        * could be a newer entry open with reservations/pins that have been
+        * taken against it.
+        *
+        * Hence, we want update/set last_seq on the current journal entry right
+        * before we open a new one:
+        */
+       buf->data->last_seq     = cpu_to_le64(journal_last_seq(j));
+
+       if (journal_entry_empty(buf->data))
+               clear_bit(JOURNAL_NOT_EMPTY, &j->flags);
+       else
+               set_bit(JOURNAL_NOT_EMPTY, &j->flags);
+
+       journal_pin_new_entry(j, 1);
+
+       bch2_journal_buf_init(j);
+
+       cancel_delayed_work(&j->write_work);
+
+       bch2_journal_space_available(j);
+
+       bch2_journal_buf_put(j, old.idx, set_need_write);
+       return true;
+}
+
+static bool journal_entry_close(struct journal *j)
+{
+       bool ret;
+
+       spin_lock(&j->lock);
+       ret = __journal_entry_close(j);
+       spin_unlock(&j->lock);
+
+       return ret;
+}
+
+/*
+ * should _only_ called from journal_res_get() - when we actually want a
+ * journal reservation - journal entry is open means journal is dirty:
+ *
+ * returns:
+ * 0:          success
+ * -ENOSPC:    journal currently full, must invoke reclaim
+ * -EAGAIN:    journal blocked, must wait
+ * -EROFS:     insufficient rw devices or journal error
+ */
+static int journal_entry_open(struct journal *j)
+{
+       struct journal_buf *buf = journal_cur_buf(j);
+       union journal_res_state old, new;
+       int u64s;
+       u64 v;
+
+       lockdep_assert_held(&j->lock);
+       BUG_ON(journal_entry_is_open(j));
+
+       if (j->blocked)
+               return -EAGAIN;
+
+       if (j->cur_entry_error)
+               return j->cur_entry_error;
+
+       BUG_ON(!j->cur_entry_sectors);
+
+       buf->u64s_reserved      = j->entry_u64s_reserved;
+       buf->disk_sectors       = j->cur_entry_sectors;
+       buf->sectors            = min(buf->disk_sectors, buf->buf_size >> 9);
+
+       u64s = (int) (buf->sectors << 9) / sizeof(u64) -
+               journal_entry_overhead(j);
+       u64s  = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
+
+       if (u64s <= le32_to_cpu(buf->data->u64s))
+               return -ENOSPC;
+
+       /*
+        * Must be set before marking the journal entry as open:
+        */
+       j->cur_entry_u64s = u64s;
+
+       v = atomic64_read(&j->reservations.counter);
+       do {
+               old.v = new.v = v;
+
+               if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
+                       return -EROFS;
+
+               /* Handle any already added entries */
+               new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
+
+               EBUG_ON(journal_state_count(new, new.idx));
+               journal_state_inc(&new);
+       } while ((v = atomic64_cmpxchg(&j->reservations.counter,
+                                      old.v, new.v)) != old.v);
+
+       if (j->res_get_blocked_start)
+               bch2_time_stats_update(j->blocked_time,
+                                      j->res_get_blocked_start);
+       j->res_get_blocked_start = 0;
+
+       mod_delayed_work(system_freezable_wq,
+                        &j->write_work,
+                        msecs_to_jiffies(j->write_delay_ms));
+       journal_wake(j);
+       return 0;
+}
+
+static bool journal_quiesced(struct journal *j)
+{
+       union journal_res_state state = READ_ONCE(j->reservations);
+       bool ret = !state.prev_buf_unwritten && !__journal_entry_is_open(state);
+
+       if (!ret)
+               journal_entry_close(j);
+       return ret;
+}
+
+static void journal_quiesce(struct journal *j)
+{
+       wait_event(j->wait, journal_quiesced(j));
+}
+
+static void journal_write_work(struct work_struct *work)
+{
+       struct journal *j = container_of(work, struct journal, write_work.work);
+
+       journal_entry_close(j);
+}
+
+/*
+ * Given an inode number, if that inode number has data in the journal that
+ * hasn't yet been flushed, return the journal sequence number that needs to be
+ * flushed:
+ */
+u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
+{
+       size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
+       u64 seq = 0;
+
+       if (!test_bit(h, j->buf[0].has_inode) &&
+           !test_bit(h, j->buf[1].has_inode))
+               return 0;
+
+       spin_lock(&j->lock);
+       if (test_bit(h, journal_cur_buf(j)->has_inode))
+               seq = journal_cur_seq(j);
+       else if (test_bit(h, journal_prev_buf(j)->has_inode))
+               seq = journal_cur_seq(j) - 1;
+       spin_unlock(&j->lock);
+
+       return seq;
+}
+
+void bch2_journal_set_has_inum(struct journal *j, u64 inode, u64 seq)
+{
+       size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
+       struct journal_buf *buf;
+
+       spin_lock(&j->lock);
+
+       if ((buf = journal_seq_to_buf(j, seq)))
+               set_bit(h, buf->has_inode);
+
+       spin_unlock(&j->lock);
+}
+
+static int __journal_res_get(struct journal *j, struct journal_res *res,
+                            unsigned flags)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct journal_buf *buf;
+       bool can_discard;
+       int ret;
+retry:
+       if (journal_res_get_fast(j, res, flags))
+               return 0;
+
+       if (bch2_journal_error(j))
+               return -EROFS;
+
+       spin_lock(&j->lock);
+
+       /*
+        * Recheck after taking the lock, so we don't race with another thread
+        * that just did journal_entry_open() and call journal_entry_close()
+        * unnecessarily
+        */
+       if (journal_res_get_fast(j, res, flags)) {
+               spin_unlock(&j->lock);
+               return 0;
+       }
+
+       if (!(flags & JOURNAL_RES_GET_RESERVED) &&
+           !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
+               /*
+                * Don't want to close current journal entry, just need to
+                * invoke reclaim:
+                */
+               ret = -ENOSPC;
+               goto unlock;
+       }
+
+       /*
+        * If we couldn't get a reservation because the current buf filled up,
+        * and we had room for a bigger entry on disk, signal that we want to
+        * realloc the journal bufs:
+        */
+       buf = journal_cur_buf(j);
+       if (journal_entry_is_open(j) &&
+           buf->buf_size >> 9 < buf->disk_sectors &&
+           buf->buf_size < JOURNAL_ENTRY_SIZE_MAX)
+               j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
+
+       if (journal_entry_is_open(j) &&
+           !__journal_entry_close(j)) {
+               /*
+                * We failed to get a reservation on the current open journal
+                * entry because it's full, and we can't close it because
+                * there's still a previous one in flight:
+                */
+               trace_journal_entry_full(c);
+               ret = -EAGAIN;
+       } else {
+               ret = journal_entry_open(j);
+       }
+unlock:
+       if ((ret == -EAGAIN || ret == -ENOSPC) &&
+           !j->res_get_blocked_start)
+               j->res_get_blocked_start = local_clock() ?: 1;
+
+       can_discard = j->can_discard;
+       spin_unlock(&j->lock);
+
+       if (!ret)
+               goto retry;
+
+       if (ret == -ENOSPC) {
+               WARN_ONCE(!can_discard && (flags & JOURNAL_RES_GET_RESERVED),
+                         "JOURNAL_RES_GET_RESERVED set but journal full");
+
+               /*
+                * Journal is full - can't rely on reclaim from work item due to
+                * freezing:
+                */
+               trace_journal_full(c);
+
+               if (!(flags & JOURNAL_RES_GET_NONBLOCK)) {
+                       if (can_discard) {
+                               bch2_journal_do_discards(j);
+                               goto retry;
+                       }
+
+                       if (mutex_trylock(&j->reclaim_lock)) {
+                               bch2_journal_reclaim(j);
+                               mutex_unlock(&j->reclaim_lock);
+                       }
+               }
+
+               ret = -EAGAIN;
+       }
+
+       return ret;
+}
+
+/*
+ * Essentially the entry function to the journaling code. When bcachefs is doing
+ * a btree insert, it calls this function to get the current journal write.
+ * Journal write is the structure used set up journal writes. The calling
+ * function will then add its keys to the structure, queuing them for the next
+ * write.
+ *
+ * To ensure forward progress, the current task must not be holding any
+ * btree node write locks.
+ */
+int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
+                                 unsigned flags)
+{
+       int ret;
+
+       closure_wait_event(&j->async_wait,
+                  (ret = __journal_res_get(j, res, flags)) != -EAGAIN ||
+                  (flags & JOURNAL_RES_GET_NONBLOCK));
+       return ret;
+}
+
+/* journal_preres: */
+
+static bool journal_preres_available(struct journal *j,
+                                    struct journal_preres *res,
+                                    unsigned new_u64s,
+                                    unsigned flags)
+{
+       bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags);
+
+       if (!ret)
+               bch2_journal_reclaim_work(&j->reclaim_work.work);
+
+       return ret;
+}
+
+int __bch2_journal_preres_get(struct journal *j,
+                             struct journal_preres *res,
+                             unsigned new_u64s,
+                             unsigned flags)
+{
+       int ret;
+
+       closure_wait_event(&j->preres_wait,
+                  (ret = bch2_journal_error(j)) ||
+                  journal_preres_available(j, res, new_u64s, flags));
+       return ret;
+}
+
+/* journal_entry_res: */
+
+void bch2_journal_entry_res_resize(struct journal *j,
+                                  struct journal_entry_res *res,
+                                  unsigned new_u64s)
+{
+       union journal_res_state state;
+       int d = new_u64s - res->u64s;
+
+       spin_lock(&j->lock);
+
+       j->entry_u64s_reserved += d;
+       if (d <= 0)
+               goto out;
+
+       j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d);
+       smp_mb();
+       state = READ_ONCE(j->reservations);
+
+       if (state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL &&
+           state.cur_entry_offset > j->cur_entry_u64s) {
+               j->cur_entry_u64s += d;
+               /*
+                * Not enough room in current journal entry, have to flush it:
+                */
+               __journal_entry_close(j);
+       } else {
+               journal_cur_buf(j)->u64s_reserved += d;
+       }
+out:
+       spin_unlock(&j->lock);
+       res->u64s += d;
+}
+
+/* journal flushing: */
+
+u64 bch2_journal_last_unwritten_seq(struct journal *j)
+{
+       u64 seq;
+
+       spin_lock(&j->lock);
+       seq = journal_cur_seq(j);
+       if (j->reservations.prev_buf_unwritten)
+               seq--;
+       spin_unlock(&j->lock);
+
+       return seq;
+}
+
+/**
+ * bch2_journal_open_seq_async - try to open a new journal entry if @seq isn't
+ * open yet, or wait if we cannot
+ *
+ * used by the btree interior update machinery, when it needs to write a new
+ * btree root - every journal entry contains the roots of all the btrees, so it
+ * doesn't need to bother with getting a journal reservation
+ */
+int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       int ret;
+
+       spin_lock(&j->lock);
+
+       /*
+        * Can't try to open more than one sequence number ahead:
+        */
+       BUG_ON(journal_cur_seq(j) < seq && !journal_entry_is_open(j));
+
+       if (journal_cur_seq(j) > seq ||
+           journal_entry_is_open(j)) {
+               spin_unlock(&j->lock);
+               return 0;
+       }
+
+       if (journal_cur_seq(j) < seq &&
+           !__journal_entry_close(j)) {
+               /* haven't finished writing out the previous one: */
+               trace_journal_entry_full(c);
+               ret = -EAGAIN;
+       } else {
+               BUG_ON(journal_cur_seq(j) != seq);
+
+               ret = journal_entry_open(j);
+       }
+
+       if ((ret == -EAGAIN || ret == -ENOSPC) &&
+           !j->res_get_blocked_start)
+               j->res_get_blocked_start = local_clock() ?: 1;
+
+       if (ret == -EAGAIN || ret == -ENOSPC)
+               closure_wait(&j->async_wait, cl);
+
+       spin_unlock(&j->lock);
+
+       if (ret == -ENOSPC) {
+               trace_journal_full(c);
+               bch2_journal_reclaim_work(&j->reclaim_work.work);
+               ret = -EAGAIN;
+       }
+
+       return ret;
+}
+
+static int journal_seq_error(struct journal *j, u64 seq)
+{
+       union journal_res_state state = READ_ONCE(j->reservations);
+
+       if (seq == journal_cur_seq(j))
+               return bch2_journal_error(j);
+
+       if (seq + 1 == journal_cur_seq(j) &&
+           !state.prev_buf_unwritten &&
+           seq > j->seq_ondisk)
+               return -EIO;
+
+       return 0;
+}
+
+static inline struct journal_buf *
+journal_seq_to_buf(struct journal *j, u64 seq)
+{
+       /* seq should be for a journal entry that has been opened: */
+       BUG_ON(seq > journal_cur_seq(j));
+       BUG_ON(seq == journal_cur_seq(j) &&
+              j->reservations.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL);
+
+       if (seq == journal_cur_seq(j))
+               return journal_cur_buf(j);
+       if (seq + 1 == journal_cur_seq(j) &&
+           j->reservations.prev_buf_unwritten)
+               return journal_prev_buf(j);
+       return NULL;
+}
+
+/**
+ * bch2_journal_wait_on_seq - wait for a journal entry to be written
+ *
+ * does _not_ cause @seq to be written immediately - if there is no other
+ * activity to cause the relevant journal entry to be filled up or flushed it
+ * can wait for an arbitrary amount of time (up to @j->write_delay_ms, which is
+ * configurable).
+ */
+void bch2_journal_wait_on_seq(struct journal *j, u64 seq,
+                             struct closure *parent)
+{
+       struct journal_buf *buf;
+
+       spin_lock(&j->lock);
+
+       if ((buf = journal_seq_to_buf(j, seq))) {
+               if (!closure_wait(&buf->wait, parent))
+                       BUG();
+
+               if (seq == journal_cur_seq(j)) {
+                       smp_mb();
+                       if (bch2_journal_error(j))
+                               closure_wake_up(&buf->wait);
+               }
+       }
+
+       spin_unlock(&j->lock);
+}
+
+/**
+ * bch2_journal_flush_seq_async - wait for a journal entry to be written
+ *
+ * like bch2_journal_wait_on_seq, except that it triggers a write immediately if
+ * necessary
+ */
+void bch2_journal_flush_seq_async(struct journal *j, u64 seq,
+                                 struct closure *parent)
+{
+       struct journal_buf *buf;
+
+       spin_lock(&j->lock);
+
+       if (parent &&
+           (buf = journal_seq_to_buf(j, seq)))
+               if (!closure_wait(&buf->wait, parent))
+                       BUG();
+
+       if (seq == journal_cur_seq(j))
+               __journal_entry_close(j);
+       spin_unlock(&j->lock);
+}
+
+static int journal_seq_flushed(struct journal *j, u64 seq)
+{
+       int ret;
+
+       spin_lock(&j->lock);
+       ret = seq <= j->seq_ondisk ? 1 : journal_seq_error(j, seq);
+
+       if (seq == journal_cur_seq(j))
+               __journal_entry_close(j);
+       spin_unlock(&j->lock);
+
+       return ret;
+}
+
+int bch2_journal_flush_seq(struct journal *j, u64 seq)
+{
+       u64 start_time = local_clock();
+       int ret, ret2;
+
+       ret = wait_event_killable(j->wait, (ret2 = journal_seq_flushed(j, seq)));
+
+       bch2_time_stats_update(j->flush_seq_time, start_time);
+
+       return ret ?: ret2 < 0 ? ret2 : 0;
+}
+
+/**
+ * bch2_journal_meta_async - force a journal entry to be written
+ */
+void bch2_journal_meta_async(struct journal *j, struct closure *parent)
+{
+       struct journal_res res;
+
+       memset(&res, 0, sizeof(res));
+
+       bch2_journal_res_get(j, &res, jset_u64s(0), 0);
+       bch2_journal_res_put(j, &res);
+
+       bch2_journal_flush_seq_async(j, res.seq, parent);
+}
+
+int bch2_journal_meta(struct journal *j)
+{
+       struct journal_res res;
+       int ret;
+
+       memset(&res, 0, sizeof(res));
+
+       ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
+       if (ret)
+               return ret;
+
+       bch2_journal_res_put(j, &res);
+
+       return bch2_journal_flush_seq(j, res.seq);
+}
+
+/*
+ * bch2_journal_flush_async - if there is an open journal entry, or a journal
+ * still being written, write it and wait for the write to complete
+ */
+void bch2_journal_flush_async(struct journal *j, struct closure *parent)
+{
+       u64 seq, journal_seq;
+
+       spin_lock(&j->lock);
+       journal_seq = journal_cur_seq(j);
+
+       if (journal_entry_is_open(j)) {
+               seq = journal_seq;
+       } else if (journal_seq) {
+               seq = journal_seq - 1;
+       } else {
+               spin_unlock(&j->lock);
+               return;
+       }
+       spin_unlock(&j->lock);
+
+       bch2_journal_flush_seq_async(j, seq, parent);
+}
+
+int bch2_journal_flush(struct journal *j)
+{
+       u64 seq, journal_seq;
+
+       spin_lock(&j->lock);
+       journal_seq = journal_cur_seq(j);
+
+       if (journal_entry_is_open(j)) {
+               seq = journal_seq;
+       } else if (journal_seq) {
+               seq = journal_seq - 1;
+       } else {
+               spin_unlock(&j->lock);
+               return 0;
+       }
+       spin_unlock(&j->lock);
+
+       return bch2_journal_flush_seq(j, seq);
+}
+
+/* block/unlock the journal: */
+
+void bch2_journal_unblock(struct journal *j)
+{
+       spin_lock(&j->lock);
+       j->blocked--;
+       spin_unlock(&j->lock);
+
+       journal_wake(j);
+}
+
+void bch2_journal_block(struct journal *j)
+{
+       spin_lock(&j->lock);
+       j->blocked++;
+       spin_unlock(&j->lock);
+
+       journal_quiesce(j);
+}
+
+/* allocate journal on a device: */
+
+static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
+                                        bool new_fs, struct closure *cl)
+{
+       struct bch_fs *c = ca->fs;
+       struct journal_device *ja = &ca->journal;
+       struct bch_sb_field_journal *journal_buckets;
+       u64 *new_bucket_seq = NULL, *new_buckets = NULL;
+       int ret = 0;
+
+       /* don't handle reducing nr of buckets yet: */
+       if (nr <= ja->nr)
+               return 0;
+
+       ret = -ENOMEM;
+       new_buckets     = kzalloc(nr * sizeof(u64), GFP_KERNEL);
+       new_bucket_seq  = kzalloc(nr * sizeof(u64), GFP_KERNEL);
+       if (!new_buckets || !new_bucket_seq)
+               goto err;
+
+       journal_buckets = bch2_sb_resize_journal(&ca->disk_sb,
+                                                nr + sizeof(*journal_buckets) / sizeof(u64));
+       if (!journal_buckets)
+               goto err;
+
+       /*
+        * We may be called from the device add path, before the new device has
+        * actually been added to the running filesystem:
+        */
+       if (c)
+               spin_lock(&c->journal.lock);
+
+       memcpy(new_buckets,     ja->buckets,    ja->nr * sizeof(u64));
+       memcpy(new_bucket_seq,  ja->bucket_seq, ja->nr * sizeof(u64));
+       swap(new_buckets,       ja->buckets);
+       swap(new_bucket_seq,    ja->bucket_seq);
+
+       if (c)
+               spin_unlock(&c->journal.lock);
+
+       while (ja->nr < nr) {
+               struct open_bucket *ob = NULL;
+               unsigned pos;
+               long bucket;
+
+               if (new_fs) {
+                       bucket = bch2_bucket_alloc_new_fs(ca);
+                       if (bucket < 0) {
+                               ret = -ENOSPC;
+                               goto err;
+                       }
+               } else {
+                       ob = bch2_bucket_alloc(c, ca, RESERVE_ALLOC,
+                                              false, cl);
+                       if (IS_ERR(ob)) {
+                               ret = cl ? -EAGAIN : -ENOSPC;
+                               goto err;
+                       }
+
+                       bucket = sector_to_bucket(ca, ob->ptr.offset);
+               }
+
+               if (c) {
+                       percpu_down_read(&c->mark_lock);
+                       spin_lock(&c->journal.lock);
+               }
+
+               pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
+               __array_insert_item(ja->buckets,                ja->nr, pos);
+               __array_insert_item(ja->bucket_seq,             ja->nr, pos);
+               __array_insert_item(journal_buckets->buckets,   ja->nr, pos);
+               ja->nr++;
+
+               ja->buckets[pos] = bucket;
+               ja->bucket_seq[pos] = 0;
+               journal_buckets->buckets[pos] = cpu_to_le64(bucket);
+
+               if (pos <= ja->discard_idx)
+                       ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
+               if (pos <= ja->dirty_idx_ondisk)
+                       ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
+               if (pos <= ja->dirty_idx)
+                       ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
+               if (pos <= ja->cur_idx)
+                       ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
+
+               bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_journal,
+                                         ca->mi.bucket_size,
+                                         gc_phase(GC_PHASE_SB),
+                                         0);
+
+               if (c) {
+                       spin_unlock(&c->journal.lock);
+                       percpu_up_read(&c->mark_lock);
+               }
+
+               if (!new_fs)
+                       bch2_open_bucket_put(c, ob);
+       }
+
+       ret = 0;
+err:
+       kfree(new_bucket_seq);
+       kfree(new_buckets);
+
+       return ret;
+}
+
+/*
+ * Allocate more journal space at runtime - not currently making use if it, but
+ * the code works:
+ */
+int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
+                               unsigned nr)
+{
+       struct journal_device *ja = &ca->journal;
+       struct closure cl;
+       unsigned current_nr;
+       int ret;
+
+       closure_init_stack(&cl);
+
+       do {
+               struct disk_reservation disk_res = { 0, 0 };
+
+               closure_sync(&cl);
+
+               mutex_lock(&c->sb_lock);
+               current_nr = ja->nr;
+
+               /*
+                * note: journal buckets aren't really counted as _sectors_ used yet, so
+                * we don't need the disk reservation to avoid the BUG_ON() in buckets.c
+                * when space used goes up without a reservation - but we do need the
+                * reservation to ensure we'll actually be able to allocate:
+                */
+
+               if (bch2_disk_reservation_get(c, &disk_res,
+                                             bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
+                       mutex_unlock(&c->sb_lock);
+                       return -ENOSPC;
+               }
+
+               ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl);
+
+               bch2_disk_reservation_put(c, &disk_res);
+
+               if (ja->nr != current_nr)
+                       bch2_write_super(c);
+               mutex_unlock(&c->sb_lock);
+       } while (ret == -EAGAIN);
+
+       return ret;
+}
+
+int bch2_dev_journal_alloc(struct bch_dev *ca)
+{
+       unsigned nr;
+
+       if (dynamic_fault("bcachefs:add:journal_alloc"))
+               return -ENOMEM;
+
+       /*
+        * clamp journal size to 1024 buckets or 512MB (in sectors), whichever
+        * is smaller:
+        */
+       nr = clamp_t(unsigned, ca->mi.nbuckets >> 8,
+                    BCH_JOURNAL_BUCKETS_MIN,
+                    min(1 << 10,
+                        (1 << 20) / ca->mi.bucket_size));
+
+       return __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
+}
+
+/* startup/shutdown: */
+
+static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
+{
+       union journal_res_state state;
+       struct journal_buf *w;
+       bool ret;
+
+       spin_lock(&j->lock);
+       state = READ_ONCE(j->reservations);
+       w = j->buf + !state.idx;
+
+       ret = state.prev_buf_unwritten &&
+               bch2_bkey_has_device(bkey_i_to_s_c(&w->key), dev_idx);
+       spin_unlock(&j->lock);
+
+       return ret;
+}
+
+void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
+{
+       wait_event(j->wait, !bch2_journal_writing_to_device(j, ca->dev_idx));
+}
+
+void bch2_fs_journal_stop(struct journal *j)
+{
+       bch2_journal_flush_all_pins(j);
+
+       wait_event(j->wait, journal_entry_close(j));
+
+       /* do we need to write another journal entry? */
+       if (test_bit(JOURNAL_NOT_EMPTY, &j->flags))
+               bch2_journal_meta(j);
+
+       journal_quiesce(j);
+
+       BUG_ON(!bch2_journal_error(j) &&
+              test_bit(JOURNAL_NOT_EMPTY, &j->flags));
+
+       cancel_delayed_work_sync(&j->write_work);
+       cancel_delayed_work_sync(&j->reclaim_work);
+}
+
+int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
+                         struct list_head *journal_entries)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct journal_entry_pin_list *p;
+       struct journal_replay *i;
+       u64 last_seq = cur_seq, nr, seq;
+
+       if (!list_empty(journal_entries))
+               last_seq = le64_to_cpu(list_last_entry(journal_entries,
+                               struct journal_replay, list)->j.last_seq);
+
+       nr = cur_seq - last_seq;
+
+       if (nr + 1 > j->pin.size) {
+               free_fifo(&j->pin);
+               init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
+               if (!j->pin.data) {
+                       bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
+                       return -ENOMEM;
+               }
+       }
+
+       j->replay_journal_seq   = last_seq;
+       j->replay_journal_seq_end = cur_seq;
+       j->last_seq_ondisk      = last_seq;
+       j->pin.front            = last_seq;
+       j->pin.back             = cur_seq;
+       atomic64_set(&j->seq, cur_seq - 1);
+
+       fifo_for_each_entry_ptr(p, &j->pin, seq) {
+               INIT_LIST_HEAD(&p->list);
+               INIT_LIST_HEAD(&p->flushed);
+               atomic_set(&p->count, 1);
+               p->devs.nr = 0;
+       }
+
+       list_for_each_entry(i, journal_entries, list) {
+               seq = le64_to_cpu(i->j.seq);
+               BUG_ON(seq >= cur_seq);
+
+               if (seq < last_seq)
+                       continue;
+
+               journal_seq_pin(j, seq)->devs = i->devs;
+       }
+
+       spin_lock(&j->lock);
+
+       set_bit(JOURNAL_STARTED, &j->flags);
+
+       journal_pin_new_entry(j, 1);
+       bch2_journal_buf_init(j);
+
+       c->last_bucket_seq_cleanup = journal_cur_seq(j);
+
+       bch2_journal_space_available(j);
+       spin_unlock(&j->lock);
+
+       return 0;
+}
+
+/* init/exit: */
+
+void bch2_dev_journal_exit(struct bch_dev *ca)
+{
+       kfree(ca->journal.bio);
+       kfree(ca->journal.buckets);
+       kfree(ca->journal.bucket_seq);
+
+       ca->journal.bio         = NULL;
+       ca->journal.buckets     = NULL;
+       ca->journal.bucket_seq  = NULL;
+}
+
+int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
+{
+       struct journal_device *ja = &ca->journal;
+       struct bch_sb_field_journal *journal_buckets =
+               bch2_sb_get_journal(sb);
+       unsigned i;
+
+       ja->nr = bch2_nr_journal_buckets(journal_buckets);
+
+       ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
+       if (!ja->bucket_seq)
+               return -ENOMEM;
+
+       ca->journal.bio = bio_kmalloc(GFP_KERNEL,
+                       DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE));
+       if (!ca->journal.bio)
+               return -ENOMEM;
+
+       ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
+       if (!ja->buckets)
+               return -ENOMEM;
+
+       for (i = 0; i < ja->nr; i++)
+               ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]);
+
+       return 0;
+}
+
+void bch2_fs_journal_exit(struct journal *j)
+{
+       kvpfree(j->buf[1].data, j->buf[1].buf_size);
+       kvpfree(j->buf[0].data, j->buf[0].buf_size);
+       free_fifo(&j->pin);
+}
+
+int bch2_fs_journal_init(struct journal *j)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       static struct lock_class_key res_key;
+       int ret = 0;
+
+       pr_verbose_init(c->opts, "");
+
+       spin_lock_init(&j->lock);
+       spin_lock_init(&j->err_lock);
+       init_waitqueue_head(&j->wait);
+       INIT_DELAYED_WORK(&j->write_work, journal_write_work);
+       INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
+       init_waitqueue_head(&j->pin_flush_wait);
+       mutex_init(&j->reclaim_lock);
+       mutex_init(&j->discard_lock);
+
+       lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
+
+       j->buf[0].buf_size      = JOURNAL_ENTRY_SIZE_MIN;
+       j->buf[1].buf_size      = JOURNAL_ENTRY_SIZE_MIN;
+       j->write_delay_ms       = 1000;
+       j->reclaim_delay_ms     = 100;
+
+       /* Btree roots: */
+       j->entry_u64s_reserved +=
+               BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX);
+
+       atomic64_set(&j->reservations.counter,
+               ((union journal_res_state)
+                { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
+
+       if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
+           !(j->buf[0].data = kvpmalloc(j->buf[0].buf_size, GFP_KERNEL)) ||
+           !(j->buf[1].data = kvpmalloc(j->buf[1].buf_size, GFP_KERNEL))) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       j->pin.front = j->pin.back = 1;
+out:
+       pr_verbose_init(c->opts, "ret %i", ret);
+       return ret;
+}
+
+/* debug: */
+
+void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       union journal_res_state s;
+       struct bch_dev *ca;
+       unsigned iter;
+
+       rcu_read_lock();
+       spin_lock(&j->lock);
+       s = READ_ONCE(j->reservations);
+
+       pr_buf(out,
+              "active journal entries:\t%llu\n"
+              "seq:\t\t\t%llu\n"
+              "last_seq:\t\t%llu\n"
+              "last_seq_ondisk:\t%llu\n"
+              "prereserved:\t\t%u/%u\n"
+              "current entry sectors:\t%u\n"
+              "current entry:\t\t",
+              fifo_used(&j->pin),
+              journal_cur_seq(j),
+              journal_last_seq(j),
+              j->last_seq_ondisk,
+              j->prereserved.reserved,
+              j->prereserved.remaining,
+              j->cur_entry_sectors);
+
+       switch (s.cur_entry_offset) {
+       case JOURNAL_ENTRY_ERROR_VAL:
+               pr_buf(out, "error\n");
+               break;
+       case JOURNAL_ENTRY_CLOSED_VAL:
+               pr_buf(out, "closed\n");
+               break;
+       default:
+               pr_buf(out, "%u/%u\n",
+                      s.cur_entry_offset,
+                      j->cur_entry_u64s);
+               break;
+       }
+
+       pr_buf(out,
+              "current entry refs:\t%u\n"
+              "prev entry unwritten:\t",
+              journal_state_count(s, s.idx));
+
+       if (s.prev_buf_unwritten)
+               pr_buf(out, "yes, ref %u sectors %u\n",
+                      journal_state_count(s, !s.idx),
+                      journal_prev_buf(j)->sectors);
+       else
+               pr_buf(out, "no\n");
+
+       pr_buf(out,
+              "need write:\t\t%i\n"
+              "replay done:\t\t%i\n",
+              test_bit(JOURNAL_NEED_WRITE,     &j->flags),
+              test_bit(JOURNAL_REPLAY_DONE,    &j->flags));
+
+       for_each_member_device_rcu(ca, c, iter,
+                                  &c->rw_devs[BCH_DATA_journal]) {
+               struct journal_device *ja = &ca->journal;
+
+               if (!ja->nr)
+                       continue;
+
+               pr_buf(out,
+                      "dev %u:\n"
+                      "\tnr\t\t%u\n"
+                      "\tavailable\t%u:%u\n"
+                      "\tdiscard_idx\t\t%u\n"
+                      "\tdirty_idx_ondisk\t%u (seq %llu)\n"
+                      "\tdirty_idx\t\t%u (seq %llu)\n"
+                      "\tcur_idx\t\t%u (seq %llu)\n",
+                      iter, ja->nr,
+                      bch2_journal_dev_buckets_available(j, ja, journal_space_discarded),
+                      ja->sectors_free,
+                      ja->discard_idx,
+                      ja->dirty_idx_ondisk,    ja->bucket_seq[ja->dirty_idx_ondisk],
+                      ja->dirty_idx,           ja->bucket_seq[ja->dirty_idx],
+                      ja->cur_idx,             ja->bucket_seq[ja->cur_idx]);
+       }
+
+       spin_unlock(&j->lock);
+       rcu_read_unlock();
+}
+
+void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
+{
+       struct journal_entry_pin_list *pin_list;
+       struct journal_entry_pin *pin;
+       u64 i;
+
+       spin_lock(&j->lock);
+       fifo_for_each_entry_ptr(pin_list, &j->pin, i) {
+               pr_buf(out, "%llu: count %u\n",
+                      i, atomic_read(&pin_list->count));
+
+               list_for_each_entry(pin, &pin_list->list, list)
+                       pr_buf(out, "\t%px %ps\n",
+                              pin, pin->flush);
+
+               if (!list_empty(&pin_list->flushed))
+                       pr_buf(out, "flushed:\n");
+
+               list_for_each_entry(pin, &pin_list->flushed, list)
+                       pr_buf(out, "\t%px %ps\n",
+                              pin, pin->flush);
+       }
+       spin_unlock(&j->lock);
+}
diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h
new file mode 100644 (file)
index 0000000..f60bc96
--- /dev/null
@@ -0,0 +1,520 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_JOURNAL_H
+#define _BCACHEFS_JOURNAL_H
+
+/*
+ * THE JOURNAL:
+ *
+ * The primary purpose of the journal is to log updates (insertions) to the
+ * b-tree, to avoid having to do synchronous updates to the b-tree on disk.
+ *
+ * Without the journal, the b-tree is always internally consistent on
+ * disk - and in fact, in the earliest incarnations bcache didn't have a journal
+ * but did handle unclean shutdowns by doing all index updates synchronously
+ * (with coalescing).
+ *
+ * Updates to interior nodes still happen synchronously and without the journal
+ * (for simplicity) - this may change eventually but updates to interior nodes
+ * are rare enough it's not a huge priority.
+ *
+ * This means the journal is relatively separate from the b-tree; it consists of
+ * just a list of keys and journal replay consists of just redoing those
+ * insertions in same order that they appear in the journal.
+ *
+ * PERSISTENCE:
+ *
+ * For synchronous updates (where we're waiting on the index update to hit
+ * disk), the journal entry will be written out immediately (or as soon as
+ * possible, if the write for the previous journal entry was still in flight).
+ *
+ * Synchronous updates are specified by passing a closure (@flush_cl) to
+ * bch2_btree_insert() or bch_btree_insert_node(), which then pass that parameter
+ * down to the journalling code. That closure will will wait on the journal
+ * write to complete (via closure_wait()).
+ *
+ * If the index update wasn't synchronous, the journal entry will be
+ * written out after 10 ms have elapsed, by default (the delay_ms field
+ * in struct journal).
+ *
+ * JOURNAL ENTRIES:
+ *
+ * A journal entry is variable size (struct jset), it's got a fixed length
+ * header and then a variable number of struct jset_entry entries.
+ *
+ * Journal entries are identified by monotonically increasing 64 bit sequence
+ * numbers - jset->seq; other places in the code refer to this sequence number.
+ *
+ * A jset_entry entry contains one or more bkeys (which is what gets inserted
+ * into the b-tree). We need a container to indicate which b-tree the key is
+ * for; also, the roots of the various b-trees are stored in jset_entry entries
+ * (one for each b-tree) - this lets us add new b-tree types without changing
+ * the on disk format.
+ *
+ * We also keep some things in the journal header that are logically part of the
+ * superblock - all the things that are frequently updated. This is for future
+ * bcache on raw flash support; the superblock (which will become another
+ * journal) can't be moved or wear leveled, so it contains just enough
+ * information to find the main journal, and the superblock only has to be
+ * rewritten when we want to move/wear level the main journal.
+ *
+ * JOURNAL LAYOUT ON DISK:
+ *
+ * The journal is written to a ringbuffer of buckets (which is kept in the
+ * superblock); the individual buckets are not necessarily contiguous on disk
+ * which means that journal entries are not allowed to span buckets, but also
+ * that we can resize the journal at runtime if desired (unimplemented).
+ *
+ * The journal buckets exist in the same pool as all the other buckets that are
+ * managed by the allocator and garbage collection - garbage collection marks
+ * the journal buckets as metadata buckets.
+ *
+ * OPEN/DIRTY JOURNAL ENTRIES:
+ *
+ * Open/dirty journal entries are journal entries that contain b-tree updates
+ * that have not yet been written out to the b-tree on disk. We have to track
+ * which journal entries are dirty, and we also have to avoid wrapping around
+ * the journal and overwriting old but still dirty journal entries with new
+ * journal entries.
+ *
+ * On disk, this is represented with the "last_seq" field of struct jset;
+ * last_seq is the first sequence number that journal replay has to replay.
+ *
+ * To avoid overwriting dirty journal entries on disk, we keep a mapping (in
+ * journal_device->seq) of for each journal bucket, the highest sequence number
+ * any journal entry it contains. Then, by comparing that against last_seq we
+ * can determine whether that journal bucket contains dirty journal entries or
+ * not.
+ *
+ * To track which journal entries are dirty, we maintain a fifo of refcounts
+ * (where each entry corresponds to a specific sequence number) - when a ref
+ * goes to 0, that journal entry is no longer dirty.
+ *
+ * Journalling of index updates is done at the same time as the b-tree itself is
+ * being modified (see btree_insert_key()); when we add the key to the journal
+ * the pending b-tree write takes a ref on the journal entry the key was added
+ * to. If a pending b-tree write would need to take refs on multiple dirty
+ * journal entries, it only keeps the ref on the oldest one (since a newer
+ * journal entry will still be replayed if an older entry was dirty).
+ *
+ * JOURNAL FILLING UP:
+ *
+ * There are two ways the journal could fill up; either we could run out of
+ * space to write to, or we could have too many open journal entries and run out
+ * of room in the fifo of refcounts. Since those refcounts are decremented
+ * without any locking we can't safely resize that fifo, so we handle it the
+ * same way.
+ *
+ * If the journal fills up, we start flushing dirty btree nodes until we can
+ * allocate space for a journal write again - preferentially flushing btree
+ * nodes that are pinning the oldest journal entries first.
+ */
+
+#include <linux/hash.h>
+
+#include "journal_types.h"
+
+struct bch_fs;
+
+static inline void journal_wake(struct journal *j)
+{
+       wake_up(&j->wait);
+       closure_wake_up(&j->async_wait);
+       closure_wake_up(&j->preres_wait);
+}
+
+static inline struct journal_buf *journal_cur_buf(struct journal *j)
+{
+       return j->buf + j->reservations.idx;
+}
+
+static inline struct journal_buf *journal_prev_buf(struct journal *j)
+{
+       return j->buf + !j->reservations.idx;
+}
+
+/* Sequence number of oldest dirty journal entry */
+
+static inline u64 journal_last_seq(struct journal *j)
+{
+       return j->pin.front;
+}
+
+static inline u64 journal_cur_seq(struct journal *j)
+{
+       BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
+
+       return j->pin.back - 1;
+}
+
+u64 bch2_inode_journal_seq(struct journal *, u64);
+void bch2_journal_set_has_inum(struct journal *, u64, u64);
+
+static inline int journal_state_count(union journal_res_state s, int idx)
+{
+       return idx == 0 ? s.buf0_count : s.buf1_count;
+}
+
+static inline void journal_state_inc(union journal_res_state *s)
+{
+       s->buf0_count += s->idx == 0;
+       s->buf1_count += s->idx == 1;
+}
+
+static inline void bch2_journal_set_has_inode(struct journal *j,
+                                             struct journal_res *res,
+                                             u64 inum)
+{
+       struct journal_buf *buf = &j->buf[res->idx];
+       unsigned long bit = hash_64(inum, ilog2(sizeof(buf->has_inode) * 8));
+
+       /* avoid atomic op if possible */
+       if (unlikely(!test_bit(bit, buf->has_inode)))
+               set_bit(bit, buf->has_inode);
+}
+
+/*
+ * Amount of space that will be taken up by some keys in the journal (i.e.
+ * including the jset header)
+ */
+static inline unsigned jset_u64s(unsigned u64s)
+{
+       return u64s + sizeof(struct jset_entry) / sizeof(u64);
+}
+
+static inline int journal_entry_overhead(struct journal *j)
+{
+       return sizeof(struct jset) / sizeof(u64) + j->entry_u64s_reserved;
+}
+
+static inline struct jset_entry *
+bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
+{
+       struct jset *jset = buf->data;
+       struct jset_entry *entry = vstruct_idx(jset, le32_to_cpu(jset->u64s));
+
+       memset(entry, 0, sizeof(*entry));
+       entry->u64s = cpu_to_le16(u64s);
+
+       le32_add_cpu(&jset->u64s, jset_u64s(u64s));
+
+       return entry;
+}
+
+static inline struct jset_entry *
+journal_res_entry(struct journal *j, struct journal_res *res)
+{
+       return vstruct_idx(j->buf[res->idx].data, res->offset);
+}
+
+static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type,
+                                         enum btree_id id, unsigned level,
+                                         const void *data, unsigned u64s)
+{
+       memset(entry, 0, sizeof(*entry));
+       entry->u64s     = cpu_to_le16(u64s);
+       entry->type     = type;
+       entry->btree_id = id;
+       entry->level    = level;
+       memcpy_u64s_small(entry->_data, data, u64s);
+
+       return jset_u64s(u64s);
+}
+
+static inline void bch2_journal_add_entry(struct journal *j, struct journal_res *res,
+                                         unsigned type, enum btree_id id,
+                                         unsigned level,
+                                         const void *data, unsigned u64s)
+{
+       unsigned actual = journal_entry_set(journal_res_entry(j, res),
+                              type, id, level, data, u64s);
+
+       EBUG_ON(!res->ref);
+       EBUG_ON(actual > res->u64s);
+
+       res->offset     += actual;
+       res->u64s       -= actual;
+}
+
+static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *res,
+                                       enum btree_id id, const struct bkey_i *k)
+{
+       bch2_journal_add_entry(j, res, BCH_JSET_ENTRY_btree_keys,
+                              id, 0, k, k->k.u64s);
+}
+
+static inline bool journal_entry_empty(struct jset *j)
+{
+       struct jset_entry *i;
+
+       if (j->seq != j->last_seq)
+               return false;
+
+       vstruct_for_each(j, i)
+               if (i->type == BCH_JSET_ENTRY_btree_keys && i->u64s)
+                       return false;
+       return true;
+}
+
+void __bch2_journal_buf_put(struct journal *, bool);
+
+static inline void bch2_journal_buf_put(struct journal *j, unsigned idx,
+                                      bool need_write_just_set)
+{
+       union journal_res_state s;
+
+       s.v = atomic64_sub_return(((union journal_res_state) {
+                                   .buf0_count = idx == 0,
+                                   .buf1_count = idx == 1,
+                                   }).v, &j->reservations.counter);
+       if (!journal_state_count(s, idx)) {
+               EBUG_ON(s.idx == idx || !s.prev_buf_unwritten);
+               __bch2_journal_buf_put(j, need_write_just_set);
+       }
+}
+
+/*
+ * This function releases the journal write structure so other threads can
+ * then proceed to add their keys as well.
+ */
+static inline void bch2_journal_res_put(struct journal *j,
+                                      struct journal_res *res)
+{
+       if (!res->ref)
+               return;
+
+       lock_release(&j->res_map, _THIS_IP_);
+
+       while (res->u64s)
+               bch2_journal_add_entry(j, res,
+                                      BCH_JSET_ENTRY_btree_keys,
+                                      0, 0, NULL, 0);
+
+       bch2_journal_buf_put(j, res->idx, false);
+
+       res->ref = 0;
+}
+
+int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
+                                 unsigned);
+
+#define JOURNAL_RES_GET_NONBLOCK       (1 << 0)
+#define JOURNAL_RES_GET_CHECK          (1 << 1)
+#define JOURNAL_RES_GET_RESERVED       (1 << 2)
+#define JOURNAL_RES_GET_RECLAIM                (1 << 3)
+
+static inline int journal_res_get_fast(struct journal *j,
+                                      struct journal_res *res,
+                                      unsigned flags)
+{
+       union journal_res_state old, new;
+       u64 v = atomic64_read(&j->reservations.counter);
+
+       do {
+               old.v = new.v = v;
+
+               /*
+                * Check if there is still room in the current journal
+                * entry:
+                */
+               if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
+                       return 0;
+
+               EBUG_ON(!journal_state_count(new, new.idx));
+
+               if (!(flags & JOURNAL_RES_GET_RESERVED) &&
+                   !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags))
+                       return 0;
+
+               if (flags & JOURNAL_RES_GET_CHECK)
+                       return 1;
+
+               new.cur_entry_offset += res->u64s;
+               journal_state_inc(&new);
+       } while ((v = atomic64_cmpxchg(&j->reservations.counter,
+                                      old.v, new.v)) != old.v);
+
+       res->ref        = true;
+       res->idx        = old.idx;
+       res->offset     = old.cur_entry_offset;
+       res->seq        = le64_to_cpu(j->buf[old.idx].data->seq);
+       return 1;
+}
+
+static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
+                                      unsigned u64s, unsigned flags)
+{
+       int ret;
+
+       EBUG_ON(res->ref);
+       EBUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
+
+       res->u64s = u64s;
+
+       if (journal_res_get_fast(j, res, flags))
+               goto out;
+
+       ret = bch2_journal_res_get_slowpath(j, res, flags);
+       if (ret)
+               return ret;
+out:
+       if (!(flags & JOURNAL_RES_GET_CHECK)) {
+               lock_acquire_shared(&j->res_map, 0,
+                                   (flags & JOURNAL_RES_GET_NONBLOCK) != 0,
+                                   NULL, _THIS_IP_);
+               EBUG_ON(!res->ref);
+       }
+       return 0;
+}
+
+/* journal_preres: */
+
+static inline bool journal_check_may_get_unreserved(struct journal *j)
+{
+       union journal_preres_state s = READ_ONCE(j->prereserved);
+       bool ret = s.reserved <= s.remaining &&
+               fifo_free(&j->pin) > 8;
+
+       lockdep_assert_held(&j->lock);
+
+       if (ret != test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
+               if (ret) {
+                       set_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
+                       journal_wake(j);
+               } else {
+                       clear_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
+               }
+       }
+       return ret;
+}
+
+static inline void bch2_journal_preres_put(struct journal *j,
+                                          struct journal_preres *res)
+{
+       union journal_preres_state s = { .reserved = res->u64s };
+
+       if (!res->u64s)
+               return;
+
+       s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
+       res->u64s = 0;
+       closure_wake_up(&j->preres_wait);
+
+       if (s.reserved <= s.remaining &&
+           !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
+               spin_lock(&j->lock);
+               journal_check_may_get_unreserved(j);
+               spin_unlock(&j->lock);
+       }
+}
+
+int __bch2_journal_preres_get(struct journal *,
+                       struct journal_preres *, unsigned, unsigned);
+
+static inline int bch2_journal_preres_get_fast(struct journal *j,
+                                              struct journal_preres *res,
+                                              unsigned new_u64s,
+                                              unsigned flags)
+{
+       int d = new_u64s - res->u64s;
+       union journal_preres_state old, new;
+       u64 v = atomic64_read(&j->prereserved.counter);
+
+       do {
+               old.v = new.v = v;
+
+               new.reserved += d;
+
+               /*
+                * If we're being called from the journal reclaim path, we have
+                * to unconditionally give out the pre-reservation, there's
+                * nothing else sensible we can do - otherwise we'd recurse back
+                * into the reclaim path and deadlock:
+                */
+
+               if (!(flags & JOURNAL_RES_GET_RECLAIM) &&
+                   new.reserved > new.remaining)
+                       return 0;
+       } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
+                                      old.v, new.v)) != old.v);
+
+       res->u64s += d;
+       return 1;
+}
+
+static inline int bch2_journal_preres_get(struct journal *j,
+                                         struct journal_preres *res,
+                                         unsigned new_u64s,
+                                         unsigned flags)
+{
+       if (new_u64s <= res->u64s)
+               return 0;
+
+       if (bch2_journal_preres_get_fast(j, res, new_u64s, flags))
+               return 0;
+
+       if (flags & JOURNAL_RES_GET_NONBLOCK)
+               return -EAGAIN;
+
+       return __bch2_journal_preres_get(j, res, new_u64s, flags);
+}
+
+/* journal_entry_res: */
+
+void bch2_journal_entry_res_resize(struct journal *,
+                                  struct journal_entry_res *,
+                                  unsigned);
+
+u64 bch2_journal_last_unwritten_seq(struct journal *);
+int bch2_journal_open_seq_async(struct journal *, u64, struct closure *);
+
+void bch2_journal_wait_on_seq(struct journal *, u64, struct closure *);
+void bch2_journal_flush_seq_async(struct journal *, u64, struct closure *);
+void bch2_journal_flush_async(struct journal *, struct closure *);
+void bch2_journal_meta_async(struct journal *, struct closure *);
+
+int bch2_journal_flush_seq(struct journal *, u64);
+int bch2_journal_flush(struct journal *);
+int bch2_journal_meta(struct journal *);
+
+void bch2_journal_halt(struct journal *);
+
+static inline int bch2_journal_error(struct journal *j)
+{
+       return j->reservations.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL
+               ? -EIO : 0;
+}
+
+struct bch_dev;
+
+static inline bool journal_flushes_device(struct bch_dev *ca)
+{
+       return true;
+}
+
+static inline void bch2_journal_set_replay_done(struct journal *j)
+{
+       BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
+       set_bit(JOURNAL_REPLAY_DONE, &j->flags);
+}
+
+void bch2_journal_unblock(struct journal *);
+void bch2_journal_block(struct journal *);
+
+void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
+void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
+
+int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
+                               unsigned nr);
+int bch2_dev_journal_alloc(struct bch_dev *);
+
+void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
+
+void bch2_fs_journal_stop(struct journal *);
+int bch2_fs_journal_start(struct journal *, u64, struct list_head *);
+
+void bch2_dev_journal_exit(struct bch_dev *);
+int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
+void bch2_fs_journal_exit(struct journal *);
+int bch2_fs_journal_init(struct journal *);
+
+#endif /* _BCACHEFS_JOURNAL_H */
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
new file mode 100644 (file)
index 0000000..bd0e6b3
--- /dev/null
@@ -0,0 +1,1183 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "btree_io.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "checksum.h"
+#include "error.h"
+#include "io.h"
+#include "journal.h"
+#include "journal_io.h"
+#include "journal_reclaim.h"
+#include "replicas.h"
+
+#include <trace/events/bcachefs.h>
+
+struct journal_list {
+       struct closure          cl;
+       struct mutex            lock;
+       struct list_head        *head;
+       int                     ret;
+};
+
+#define JOURNAL_ENTRY_ADD_OK           0
+#define JOURNAL_ENTRY_ADD_OUT_OF_RANGE 5
+
+/*
+ * Given a journal entry we just read, add it to the list of journal entries to
+ * be replayed:
+ */
+static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
+                            struct journal_list *jlist, struct jset *j,
+                            bool bad)
+{
+       struct journal_replay *i, *pos;
+       struct bch_devs_list devs = { .nr = 0 };
+       struct list_head *where;
+       size_t bytes = vstruct_bytes(j);
+       __le64 last_seq;
+       int ret;
+
+       last_seq = !list_empty(jlist->head)
+               ? list_last_entry(jlist->head, struct journal_replay,
+                                 list)->j.last_seq
+               : 0;
+
+       if (!c->opts.read_entire_journal) {
+               /* Is this entry older than the range we need? */
+               if (le64_to_cpu(j->seq) < le64_to_cpu(last_seq)) {
+                       ret = JOURNAL_ENTRY_ADD_OUT_OF_RANGE;
+                       goto out;
+               }
+
+               /* Drop entries we don't need anymore */
+               list_for_each_entry_safe(i, pos, jlist->head, list) {
+                       if (le64_to_cpu(i->j.seq) >= le64_to_cpu(j->last_seq))
+                               break;
+                       list_del(&i->list);
+                       kvpfree(i, offsetof(struct journal_replay, j) +
+                               vstruct_bytes(&i->j));
+               }
+       }
+
+       list_for_each_entry_reverse(i, jlist->head, list) {
+               if (le64_to_cpu(j->seq) > le64_to_cpu(i->j.seq)) {
+                       where = &i->list;
+                       goto add;
+               }
+       }
+
+       where = jlist->head;
+add:
+       i = where->next != jlist->head
+               ? container_of(where->next, struct journal_replay, list)
+               : NULL;
+
+       /*
+        * Duplicate journal entries? If so we want the one that didn't have a
+        * checksum error:
+        */
+       if (i && le64_to_cpu(j->seq) == le64_to_cpu(i->j.seq)) {
+               if (i->bad) {
+                       devs = i->devs;
+                       list_del(&i->list);
+                       kvpfree(i, offsetof(struct journal_replay, j) +
+                               vstruct_bytes(&i->j));
+               } else if (bad) {
+                       goto found;
+               } else {
+                       fsck_err_on(bytes != vstruct_bytes(&i->j) ||
+                                   memcmp(j, &i->j, bytes), c,
+                                   "found duplicate but non identical journal entries (seq %llu)",
+                                   le64_to_cpu(j->seq));
+                       goto found;
+               }
+
+       }
+
+       i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
+       if (!i) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       list_add(&i->list, where);
+       i->devs = devs;
+       i->bad  = bad;
+       memcpy(&i->j, j, bytes);
+found:
+       if (!bch2_dev_list_has_dev(i->devs, ca->dev_idx))
+               bch2_dev_list_add_dev(&i->devs, ca->dev_idx);
+       else
+               fsck_err_on(1, c, "duplicate journal entries on same device");
+       ret = JOURNAL_ENTRY_ADD_OK;
+out:
+fsck_err:
+       return ret;
+}
+
+static struct nonce journal_nonce(const struct jset *jset)
+{
+       return (struct nonce) {{
+               [0] = 0,
+               [1] = ((__le32 *) &jset->seq)[0],
+               [2] = ((__le32 *) &jset->seq)[1],
+               [3] = BCH_NONCE_JOURNAL,
+       }};
+}
+
+/* this fills in a range with empty jset_entries: */
+static void journal_entry_null_range(void *start, void *end)
+{
+       struct jset_entry *entry;
+
+       for (entry = start; entry != end; entry = vstruct_next(entry))
+               memset(entry, 0, sizeof(*entry));
+}
+
+#define JOURNAL_ENTRY_REREAD   5
+#define JOURNAL_ENTRY_NONE     6
+#define JOURNAL_ENTRY_BAD      7
+
+#define journal_entry_err(c, msg, ...)                                 \
+({                                                                     \
+       switch (write) {                                                \
+       case READ:                                                      \
+               mustfix_fsck_err(c, msg, ##__VA_ARGS__);                \
+               break;                                                  \
+       case WRITE:                                                     \
+               bch_err(c, "corrupt metadata before write:\n"           \
+                       msg, ##__VA_ARGS__);                            \
+               if (bch2_fs_inconsistent(c)) {                          \
+                       ret = BCH_FSCK_ERRORS_NOT_FIXED;                \
+                       goto fsck_err;                                  \
+               }                                                       \
+               break;                                                  \
+       }                                                               \
+       true;                                                           \
+})
+
+#define journal_entry_err_on(cond, c, msg, ...)                                \
+       ((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false)
+
+static int journal_validate_key(struct bch_fs *c, struct jset *jset,
+                               struct jset_entry *entry,
+                               unsigned level, enum btree_id btree_id,
+                               struct bkey_i *k,
+                               const char *type, int write)
+{
+       void *next = vstruct_next(entry);
+       const char *invalid;
+       unsigned version = le32_to_cpu(jset->version);
+       int ret = 0;
+
+       if (journal_entry_err_on(!k->k.u64s, c,
+                       "invalid %s in journal: k->u64s 0", type)) {
+               entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
+               journal_entry_null_range(vstruct_next(entry), next);
+               return 0;
+       }
+
+       if (journal_entry_err_on((void *) bkey_next(k) >
+                               (void *) vstruct_next(entry), c,
+                       "invalid %s in journal: extends past end of journal entry",
+                       type)) {
+               entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
+               journal_entry_null_range(vstruct_next(entry), next);
+               return 0;
+       }
+
+       if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
+                       "invalid %s in journal: bad format %u",
+                       type, k->k.format)) {
+               le16_add_cpu(&entry->u64s, -k->k.u64s);
+               memmove(k, bkey_next(k), next - (void *) bkey_next(k));
+               journal_entry_null_range(vstruct_next(entry), next);
+               return 0;
+       }
+
+       if (!write)
+               bch2_bkey_compat(level, btree_id, version,
+                           JSET_BIG_ENDIAN(jset), write,
+                           NULL, bkey_to_packed(k));
+
+       invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k),
+                                   __btree_node_type(level, btree_id));
+       if (invalid) {
+               char buf[160];
+
+               bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k));
+               mustfix_fsck_err(c, "invalid %s in journal: %s\n%s",
+                                type, invalid, buf);
+
+               le16_add_cpu(&entry->u64s, -k->k.u64s);
+               memmove(k, bkey_next(k), next - (void *) bkey_next(k));
+               journal_entry_null_range(vstruct_next(entry), next);
+               return 0;
+       }
+
+       if (write)
+               bch2_bkey_compat(level, btree_id, version,
+                           JSET_BIG_ENDIAN(jset), write,
+                           NULL, bkey_to_packed(k));
+fsck_err:
+       return ret;
+}
+
+static int journal_entry_validate_btree_keys(struct bch_fs *c,
+                                            struct jset *jset,
+                                            struct jset_entry *entry,
+                                            int write)
+{
+       struct bkey_i *k;
+
+       vstruct_for_each(entry, k) {
+               int ret = journal_validate_key(c, jset, entry,
+                                              entry->level,
+                                              entry->btree_id,
+                                              k, "key", write);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int journal_entry_validate_btree_root(struct bch_fs *c,
+                                            struct jset *jset,
+                                            struct jset_entry *entry,
+                                            int write)
+{
+       struct bkey_i *k = entry->start;
+       int ret = 0;
+
+       if (journal_entry_err_on(!entry->u64s ||
+                                le16_to_cpu(entry->u64s) != k->k.u64s, c,
+                                "invalid btree root journal entry: wrong number of keys")) {
+               void *next = vstruct_next(entry);
+               /*
+                * we don't want to null out this jset_entry,
+                * just the contents, so that later we can tell
+                * we were _supposed_ to have a btree root
+                */
+               entry->u64s = 0;
+               journal_entry_null_range(vstruct_next(entry), next);
+               return 0;
+       }
+
+       return journal_validate_key(c, jset, entry, 1, entry->btree_id, k,
+                                   "btree root", write);
+fsck_err:
+       return ret;
+}
+
+static int journal_entry_validate_prio_ptrs(struct bch_fs *c,
+                                           struct jset *jset,
+                                           struct jset_entry *entry,
+                                           int write)
+{
+       /* obsolete, don't care: */
+       return 0;
+}
+
+static int journal_entry_validate_blacklist(struct bch_fs *c,
+                                           struct jset *jset,
+                                           struct jset_entry *entry,
+                                           int write)
+{
+       int ret = 0;
+
+       if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1, c,
+               "invalid journal seq blacklist entry: bad size")) {
+               journal_entry_null_range(entry, vstruct_next(entry));
+       }
+fsck_err:
+       return ret;
+}
+
+static int journal_entry_validate_blacklist_v2(struct bch_fs *c,
+                                              struct jset *jset,
+                                              struct jset_entry *entry,
+                                              int write)
+{
+       struct jset_entry_blacklist_v2 *bl_entry;
+       int ret = 0;
+
+       if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2, c,
+               "invalid journal seq blacklist entry: bad size")) {
+               journal_entry_null_range(entry, vstruct_next(entry));
+               goto out;
+       }
+
+       bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
+
+       if (journal_entry_err_on(le64_to_cpu(bl_entry->start) >
+                                le64_to_cpu(bl_entry->end), c,
+               "invalid journal seq blacklist entry: start > end")) {
+               journal_entry_null_range(entry, vstruct_next(entry));
+       }
+out:
+fsck_err:
+       return ret;
+}
+
+static int journal_entry_validate_usage(struct bch_fs *c,
+                                       struct jset *jset,
+                                       struct jset_entry *entry,
+                                       int write)
+{
+       struct jset_entry_usage *u =
+               container_of(entry, struct jset_entry_usage, entry);
+       unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
+       int ret = 0;
+
+       if (journal_entry_err_on(bytes < sizeof(*u),
+                                c,
+                                "invalid journal entry usage: bad size")) {
+               journal_entry_null_range(entry, vstruct_next(entry));
+               return ret;
+       }
+
+fsck_err:
+       return ret;
+}
+
+static int journal_entry_validate_data_usage(struct bch_fs *c,
+                                       struct jset *jset,
+                                       struct jset_entry *entry,
+                                       int write)
+{
+       struct jset_entry_data_usage *u =
+               container_of(entry, struct jset_entry_data_usage, entry);
+       unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
+       int ret = 0;
+
+       if (journal_entry_err_on(bytes < sizeof(*u) ||
+                                bytes < sizeof(*u) + u->r.nr_devs,
+                                c,
+                                "invalid journal entry usage: bad size")) {
+               journal_entry_null_range(entry, vstruct_next(entry));
+               return ret;
+       }
+
+fsck_err:
+       return ret;
+}
+
+struct jset_entry_ops {
+       int (*validate)(struct bch_fs *, struct jset *,
+                       struct jset_entry *, int);
+};
+
+static const struct jset_entry_ops bch2_jset_entry_ops[] = {
+#define x(f, nr)                                               \
+       [BCH_JSET_ENTRY_##f]    = (struct jset_entry_ops) {     \
+               .validate       = journal_entry_validate_##f,   \
+       },
+       BCH_JSET_ENTRY_TYPES()
+#undef x
+};
+
+static int journal_entry_validate(struct bch_fs *c, struct jset *jset,
+                                 struct jset_entry *entry, int write)
+{
+       return entry->type < BCH_JSET_ENTRY_NR
+               ? bch2_jset_entry_ops[entry->type].validate(c, jset,
+                                                           entry, write)
+               : 0;
+}
+
+static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
+                                int write)
+{
+       struct jset_entry *entry;
+       int ret = 0;
+
+       vstruct_for_each(jset, entry) {
+               if (journal_entry_err_on(vstruct_next(entry) >
+                                        vstruct_last(jset), c,
+                               "journal entry extends past end of jset")) {
+                       jset->u64s = cpu_to_le32((u64 *) entry - jset->_data);
+                       break;
+               }
+
+               ret = journal_entry_validate(c, jset, entry, write);
+               if (ret)
+                       break;
+       }
+fsck_err:
+       return ret;
+}
+
+static int jset_validate(struct bch_fs *c,
+                        struct bch_dev *ca,
+                        struct jset *jset, u64 sector,
+                        unsigned bucket_sectors_left,
+                        unsigned sectors_read,
+                        int write)
+{
+       size_t bytes = vstruct_bytes(jset);
+       struct bch_csum csum;
+       unsigned version;
+       int ret = 0;
+
+       if (le64_to_cpu(jset->magic) != jset_magic(c))
+               return JOURNAL_ENTRY_NONE;
+
+       version = le32_to_cpu(jset->version);
+       if (journal_entry_err_on((version != BCH_JSET_VERSION_OLD &&
+                                 version < bcachefs_metadata_version_min) ||
+                                version >= bcachefs_metadata_version_max, c,
+                       "%s sector %llu seq %llu: unknown journal entry version %u",
+                       ca->name, sector, le64_to_cpu(jset->seq),
+                       version)) {
+               /* XXX: note we might have missing journal entries */
+               return JOURNAL_ENTRY_BAD;
+       }
+
+       if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c,
+                       "%s sector %llu seq %llu: journal entry too big (%zu bytes)",
+                       ca->name, sector, le64_to_cpu(jset->seq), bytes)) {
+               /* XXX: note we might have missing journal entries */
+               return JOURNAL_ENTRY_BAD;
+       }
+
+       if (bytes > sectors_read << 9)
+               return JOURNAL_ENTRY_REREAD;
+
+       if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
+                       "%s sector %llu seq %llu: journal entry with unknown csum type %llu",
+                       ca->name, sector, le64_to_cpu(jset->seq),
+                       JSET_CSUM_TYPE(jset)))
+               return JOURNAL_ENTRY_BAD;
+
+       csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
+       if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c,
+                                "%s sector %llu seq %llu: journal checksum bad",
+                                ca->name, sector, le64_to_cpu(jset->seq))) {
+               /* XXX: retry IO, when we start retrying checksum errors */
+               /* XXX: note we might have missing journal entries */
+               return JOURNAL_ENTRY_BAD;
+       }
+
+       bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
+                    jset->encrypted_start,
+                    vstruct_end(jset) - (void *) jset->encrypted_start);
+
+       if (journal_entry_err_on(le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
+                                "invalid journal entry: last_seq > seq")) {
+               jset->last_seq = jset->seq;
+               return JOURNAL_ENTRY_BAD;
+       }
+
+       return 0;
+fsck_err:
+       return ret;
+}
+
+struct journal_read_buf {
+       void            *data;
+       size_t          size;
+};
+
+static int journal_read_buf_realloc(struct journal_read_buf *b,
+                                   size_t new_size)
+{
+       void *n;
+
+       /* the bios are sized for this many pages, max: */
+       if (new_size > JOURNAL_ENTRY_SIZE_MAX)
+               return -ENOMEM;
+
+       new_size = roundup_pow_of_two(new_size);
+       n = kvpmalloc(new_size, GFP_KERNEL);
+       if (!n)
+               return -ENOMEM;
+
+       kvpfree(b->data, b->size);
+       b->data = n;
+       b->size = new_size;
+       return 0;
+}
+
+static int journal_read_bucket(struct bch_dev *ca,
+                              struct journal_read_buf *buf,
+                              struct journal_list *jlist,
+                              unsigned bucket)
+{
+       struct bch_fs *c = ca->fs;
+       struct journal_device *ja = &ca->journal;
+       struct jset *j = NULL;
+       unsigned sectors, sectors_read = 0;
+       u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
+           end = offset + ca->mi.bucket_size;
+       bool saw_bad = false;
+       int ret = 0;
+
+       pr_debug("reading %u", bucket);
+
+       while (offset < end) {
+               if (!sectors_read) {
+                       struct bio *bio;
+reread:
+                       sectors_read = min_t(unsigned,
+                               end - offset, buf->size >> 9);
+
+                       bio = bio_kmalloc(GFP_KERNEL,
+                                         buf_pages(buf->data,
+                                                   sectors_read << 9));
+                       bio_set_dev(bio, ca->disk_sb.bdev);
+                       bio->bi_iter.bi_sector  = offset;
+                       bio_set_op_attrs(bio, REQ_OP_READ, 0);
+                       bch2_bio_map(bio, buf->data, sectors_read << 9);
+
+                       ret = submit_bio_wait(bio);
+                       bio_put(bio);
+
+                       if (bch2_dev_io_err_on(ret, ca,
+                                              "journal read from sector %llu",
+                                              offset) ||
+                           bch2_meta_read_fault("journal"))
+                               return -EIO;
+
+                       j = buf->data;
+               }
+
+               ret = jset_validate(c, ca, j, offset,
+                                   end - offset, sectors_read,
+                                   READ);
+               switch (ret) {
+               case BCH_FSCK_OK:
+                       sectors = vstruct_sectors(j, c->block_bits);
+                       break;
+               case JOURNAL_ENTRY_REREAD:
+                       if (vstruct_bytes(j) > buf->size) {
+                               ret = journal_read_buf_realloc(buf,
+                                                       vstruct_bytes(j));
+                               if (ret)
+                                       return ret;
+                       }
+                       goto reread;
+               case JOURNAL_ENTRY_NONE:
+                       if (!saw_bad)
+                               return 0;
+                       sectors = c->opts.block_size;
+                       goto next_block;
+               case JOURNAL_ENTRY_BAD:
+                       saw_bad = true;
+                       /*
+                        * On checksum error we don't really trust the size
+                        * field of the journal entry we read, so try reading
+                        * again at next block boundary:
+                        */
+                       sectors = c->opts.block_size;
+                       break;
+               default:
+                       return ret;
+               }
+
+               /*
+                * This happens sometimes if we don't have discards on -
+                * when we've partially overwritten a bucket with new
+                * journal entries. We don't need the rest of the
+                * bucket:
+                */
+               if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
+                       return 0;
+
+               ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
+
+               mutex_lock(&jlist->lock);
+               ret = journal_entry_add(c, ca, jlist, j, ret != 0);
+               mutex_unlock(&jlist->lock);
+
+               switch (ret) {
+               case JOURNAL_ENTRY_ADD_OK:
+                       break;
+               case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
+                       break;
+               default:
+                       return ret;
+               }
+next_block:
+               pr_debug("next");
+               offset          += sectors;
+               sectors_read    -= sectors;
+               j = ((void *) j) + (sectors << 9);
+       }
+
+       return 0;
+}
+
+static void bch2_journal_read_device(struct closure *cl)
+{
+       struct journal_device *ja =
+               container_of(cl, struct journal_device, read);
+       struct bch_dev *ca = container_of(ja, struct bch_dev, journal);
+       struct journal_list *jlist =
+               container_of(cl->parent, struct journal_list, cl);
+       struct journal_read_buf buf = { NULL, 0 };
+       u64 min_seq = U64_MAX;
+       unsigned i;
+       int ret;
+
+       if (!ja->nr)
+               goto out;
+
+       ret = journal_read_buf_realloc(&buf, PAGE_SIZE);
+       if (ret)
+               goto err;
+
+       pr_debug("%u journal buckets", ja->nr);
+
+       for (i = 0; i < ja->nr; i++) {
+               ret = journal_read_bucket(ca, &buf, jlist, i);
+               if (ret)
+                       goto err;
+       }
+
+       /* Find the journal bucket with the highest sequence number: */
+       for (i = 0; i < ja->nr; i++) {
+               if (ja->bucket_seq[i] > ja->bucket_seq[ja->cur_idx])
+                       ja->cur_idx = i;
+
+               min_seq = min(ja->bucket_seq[i], min_seq);
+       }
+
+       /*
+        * If there's duplicate journal entries in multiple buckets (which
+        * definitely isn't supposed to happen, but...) - make sure to start
+        * cur_idx at the last of those buckets, so we don't deadlock trying to
+        * allocate
+        */
+       while (ja->bucket_seq[ja->cur_idx] > min_seq &&
+              ja->bucket_seq[ja->cur_idx] >
+              ja->bucket_seq[(ja->cur_idx + 1) % ja->nr])
+               ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
+
+       ja->sectors_free = 0;
+
+       /*
+        * Set dirty_idx to indicate the entire journal is full and needs to be
+        * reclaimed - journal reclaim will immediately reclaim whatever isn't
+        * pinned when it first runs:
+        */
+       ja->discard_idx = ja->dirty_idx_ondisk =
+               ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
+out:
+       kvpfree(buf.data, buf.size);
+       percpu_ref_put(&ca->io_ref);
+       closure_return(cl);
+       return;
+err:
+       mutex_lock(&jlist->lock);
+       jlist->ret = ret;
+       mutex_unlock(&jlist->lock);
+       goto out;
+}
+
+int bch2_journal_read(struct bch_fs *c, struct list_head *list)
+{
+       struct journal_list jlist;
+       struct journal_replay *i;
+       struct bch_dev *ca;
+       unsigned iter;
+       size_t keys = 0, entries = 0;
+       bool degraded = false;
+       int ret = 0;
+
+       closure_init_stack(&jlist.cl);
+       mutex_init(&jlist.lock);
+       jlist.head = list;
+       jlist.ret = 0;
+
+       for_each_member_device(ca, c, iter) {
+               if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
+                   !(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_journal)))
+                       continue;
+
+               if ((ca->mi.state == BCH_MEMBER_STATE_RW ||
+                    ca->mi.state == BCH_MEMBER_STATE_RO) &&
+                   percpu_ref_tryget(&ca->io_ref))
+                       closure_call(&ca->journal.read,
+                                    bch2_journal_read_device,
+                                    system_unbound_wq,
+                                    &jlist.cl);
+               else
+                       degraded = true;
+       }
+
+       closure_sync(&jlist.cl);
+
+       if (jlist.ret)
+               return jlist.ret;
+
+       list_for_each_entry(i, list, list) {
+               struct jset_entry *entry;
+               struct bkey_i *k, *_n;
+               struct bch_replicas_padded replicas;
+               char buf[80];
+
+               ret = jset_validate_entries(c, &i->j, READ);
+               if (ret)
+                       goto fsck_err;
+
+               /*
+                * If we're mounting in degraded mode - if we didn't read all
+                * the devices - this is wrong:
+                */
+
+               bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, i->devs);
+
+               if (!degraded &&
+                   (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
+                    fsck_err_on(!bch2_replicas_marked(c, &replicas.e), c,
+                                "superblock not marked as containing replicas %s",
+                                (bch2_replicas_entry_to_text(&PBUF(buf),
+                                                             &replicas.e), buf)))) {
+                       ret = bch2_mark_replicas(c, &replicas.e);
+                       if (ret)
+                               return ret;
+               }
+
+               for_each_jset_key(k, _n, entry, &i->j)
+                       keys++;
+               entries++;
+       }
+
+       if (!list_empty(list)) {
+               i = list_last_entry(list, struct journal_replay, list);
+
+               bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
+                        keys, entries, le64_to_cpu(i->j.seq));
+       }
+fsck_err:
+       return ret;
+}
+
+/* journal write: */
+
+static void __journal_write_alloc(struct journal *j,
+                                 struct journal_buf *w,
+                                 struct dev_alloc_list *devs_sorted,
+                                 unsigned sectors,
+                                 unsigned *replicas,
+                                 unsigned replicas_want)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct journal_device *ja;
+       struct bch_dev *ca;
+       unsigned i;
+
+       if (*replicas >= replicas_want)
+               return;
+
+       for (i = 0; i < devs_sorted->nr; i++) {
+               ca = rcu_dereference(c->devs[devs_sorted->devs[i]]);
+               if (!ca)
+                       continue;
+
+               ja = &ca->journal;
+
+               /*
+                * Check that we can use this device, and aren't already using
+                * it:
+                */
+               if (!ca->mi.durability ||
+                   ca->mi.state != BCH_MEMBER_STATE_RW ||
+                   !ja->nr ||
+                   bch2_bkey_has_device(bkey_i_to_s_c(&w->key),
+                                        ca->dev_idx) ||
+                   sectors > ja->sectors_free)
+                       continue;
+
+               bch2_dev_stripe_increment(ca, &j->wp.stripe);
+
+               bch2_bkey_append_ptr(&w->key,
+                       (struct bch_extent_ptr) {
+                                 .offset = bucket_to_sector(ca,
+                                       ja->buckets[ja->cur_idx]) +
+                                       ca->mi.bucket_size -
+                                       ja->sectors_free,
+                                 .dev = ca->dev_idx,
+               });
+
+               ja->sectors_free -= sectors;
+               ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
+
+               *replicas += ca->mi.durability;
+
+               if (*replicas >= replicas_want)
+                       break;
+       }
+}
+
+/**
+ * journal_next_bucket - move on to the next journal bucket if possible
+ */
+static int journal_write_alloc(struct journal *j, struct journal_buf *w,
+                              unsigned sectors)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct journal_device *ja;
+       struct bch_dev *ca;
+       struct dev_alloc_list devs_sorted;
+       unsigned i, replicas = 0, replicas_want =
+               READ_ONCE(c->opts.metadata_replicas);
+
+       rcu_read_lock();
+
+       devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe,
+                                         &c->rw_devs[BCH_DATA_journal]);
+
+       __journal_write_alloc(j, w, &devs_sorted,
+                             sectors, &replicas, replicas_want);
+
+       if (replicas >= replicas_want)
+               goto done;
+
+       for (i = 0; i < devs_sorted.nr; i++) {
+               ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
+               if (!ca)
+                       continue;
+
+               ja = &ca->journal;
+
+               if (sectors > ja->sectors_free &&
+                   sectors <= ca->mi.bucket_size &&
+                   bch2_journal_dev_buckets_available(j, ja,
+                                       journal_space_discarded)) {
+                       ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
+                       ja->sectors_free = ca->mi.bucket_size;
+
+                       /*
+                        * ja->bucket_seq[ja->cur_idx] must always have
+                        * something sensible:
+                        */
+                       ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
+               }
+       }
+
+       __journal_write_alloc(j, w, &devs_sorted,
+                             sectors, &replicas, replicas_want);
+done:
+       rcu_read_unlock();
+
+       return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS;
+}
+
+static void journal_write_compact(struct jset *jset)
+{
+       struct jset_entry *i, *next, *prev = NULL;
+
+       /*
+        * Simple compaction, dropping empty jset_entries (from journal
+        * reservations that weren't fully used) and merging jset_entries that
+        * can be.
+        *
+        * If we wanted to be really fancy here, we could sort all the keys in
+        * the jset and drop keys that were overwritten - probably not worth it:
+        */
+       vstruct_for_each_safe(jset, i, next) {
+               unsigned u64s = le16_to_cpu(i->u64s);
+
+               /* Empty entry: */
+               if (!u64s)
+                       continue;
+
+               /* Can we merge with previous entry? */
+               if (prev &&
+                   i->btree_id == prev->btree_id &&
+                   i->level    == prev->level &&
+                   i->type     == prev->type &&
+                   i->type     == BCH_JSET_ENTRY_btree_keys &&
+                   le16_to_cpu(prev->u64s) + u64s <= U16_MAX) {
+                       memmove_u64s_down(vstruct_next(prev),
+                                         i->_data,
+                                         u64s);
+                       le16_add_cpu(&prev->u64s, u64s);
+                       continue;
+               }
+
+               /* Couldn't merge, move i into new position (after prev): */
+               prev = prev ? vstruct_next(prev) : jset->start;
+               if (i != prev)
+                       memmove_u64s_down(prev, i, jset_u64s(u64s));
+       }
+
+       prev = prev ? vstruct_next(prev) : jset->start;
+       jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
+}
+
+static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
+{
+       /* we aren't holding j->lock: */
+       unsigned new_size = READ_ONCE(j->buf_size_want);
+       void *new_buf;
+
+       if (buf->buf_size >= new_size)
+               return;
+
+       new_buf = kvpmalloc(new_size, GFP_NOIO|__GFP_NOWARN);
+       if (!new_buf)
+               return;
+
+       memcpy(new_buf, buf->data, buf->buf_size);
+       kvpfree(buf->data, buf->buf_size);
+       buf->data       = new_buf;
+       buf->buf_size   = new_size;
+}
+
+static void journal_write_done(struct closure *cl)
+{
+       struct journal *j = container_of(cl, struct journal, io);
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct journal_buf *w = journal_prev_buf(j);
+       struct bch_devs_list devs =
+               bch2_bkey_devs(bkey_i_to_s_c(&w->key));
+       struct bch_replicas_padded replicas;
+       u64 seq = le64_to_cpu(w->data->seq);
+       u64 last_seq = le64_to_cpu(w->data->last_seq);
+
+       bch2_time_stats_update(j->write_time, j->write_start_time);
+
+       if (!devs.nr) {
+               bch_err(c, "unable to write journal to sufficient devices");
+               goto err;
+       }
+
+       bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, devs);
+
+       if (bch2_mark_replicas(c, &replicas.e))
+               goto err;
+
+       spin_lock(&j->lock);
+       if (seq >= j->pin.front)
+               journal_seq_pin(j, seq)->devs = devs;
+
+       j->seq_ondisk           = seq;
+       j->last_seq_ondisk      = last_seq;
+       bch2_journal_space_available(j);
+
+       /*
+        * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
+        * more buckets:
+        *
+        * Must come before signaling write completion, for
+        * bch2_fs_journal_stop():
+        */
+       mod_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0);
+out:
+       /* also must come before signalling write completion: */
+       closure_debug_destroy(cl);
+
+       BUG_ON(!j->reservations.prev_buf_unwritten);
+       atomic64_sub(((union journal_res_state) { .prev_buf_unwritten = 1 }).v,
+                    &j->reservations.counter);
+
+       closure_wake_up(&w->wait);
+       journal_wake(j);
+
+       if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
+               mod_delayed_work(system_freezable_wq, &j->write_work, 0);
+       spin_unlock(&j->lock);
+       return;
+err:
+       bch2_fatal_error(c);
+       spin_lock(&j->lock);
+       goto out;
+}
+
+static void journal_write_endio(struct bio *bio)
+{
+       struct bch_dev *ca = bio->bi_private;
+       struct journal *j = &ca->fs->journal;
+
+       if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write: %s",
+                              bch2_blk_status_to_str(bio->bi_status)) ||
+           bch2_meta_write_fault("journal")) {
+               struct journal_buf *w = journal_prev_buf(j);
+               unsigned long flags;
+
+               spin_lock_irqsave(&j->err_lock, flags);
+               bch2_bkey_drop_device(bkey_i_to_s(&w->key), ca->dev_idx);
+               spin_unlock_irqrestore(&j->err_lock, flags);
+       }
+
+       closure_put(&j->io);
+       percpu_ref_put(&ca->io_ref);
+}
+
+void bch2_journal_write(struct closure *cl)
+{
+       struct journal *j = container_of(cl, struct journal, io);
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct bch_dev *ca;
+       struct journal_buf *w = journal_prev_buf(j);
+       struct jset_entry *start, *end;
+       struct jset *jset;
+       struct bio *bio;
+       struct bch_extent_ptr *ptr;
+       bool validate_before_checksum = false;
+       unsigned i, sectors, bytes, u64s;
+       int ret;
+
+       bch2_journal_pin_put(j, le64_to_cpu(w->data->seq));
+
+       journal_buf_realloc(j, w);
+       jset = w->data;
+
+       j->write_start_time = local_clock();
+
+       /*
+        * New btree roots are set by journalling them; when the journal entry
+        * gets written we have to propagate them to c->btree_roots
+        *
+        * But, every journal entry we write has to contain all the btree roots
+        * (at least for now); so after we copy btree roots to c->btree_roots we
+        * have to get any missing btree roots and add them to this journal
+        * entry:
+        */
+
+       bch2_journal_entries_to_btree_roots(c, jset);
+
+       start = end = vstruct_last(jset);
+
+       end     = bch2_btree_roots_to_journal_entries(c, jset->start, end);
+
+       end     = bch2_journal_super_entries_add_common(c, end,
+                                               le64_to_cpu(jset->seq));
+       u64s    = (u64 *) end - (u64 *) start;
+       BUG_ON(u64s > j->entry_u64s_reserved);
+
+       le32_add_cpu(&jset->u64s, u64s);
+       BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors);
+
+       journal_write_compact(jset);
+
+       jset->read_clock        = cpu_to_le16(c->bucket_clock[READ].hand);
+       jset->write_clock       = cpu_to_le16(c->bucket_clock[WRITE].hand);
+       jset->magic             = cpu_to_le64(jset_magic(c));
+
+       jset->version           = c->sb.version < bcachefs_metadata_version_new_versioning
+               ? cpu_to_le32(BCH_JSET_VERSION_OLD)
+               : cpu_to_le32(c->sb.version);
+
+       SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
+       SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
+
+       if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
+               validate_before_checksum = true;
+
+       if (le32_to_cpu(jset->version) < bcachefs_metadata_version_max)
+               validate_before_checksum = true;
+
+       if (validate_before_checksum &&
+           jset_validate_entries(c, jset, WRITE))
+               goto err;
+
+       bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
+                   jset->encrypted_start,
+                   vstruct_end(jset) - (void *) jset->encrypted_start);
+
+       jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
+                                 journal_nonce(jset), jset);
+
+       if (!validate_before_checksum &&
+           jset_validate_entries(c, jset, WRITE))
+               goto err;
+
+       sectors = vstruct_sectors(jset, c->block_bits);
+       BUG_ON(sectors > w->sectors);
+
+       bytes = vstruct_bytes(jset);
+       memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
+
+retry_alloc:
+       spin_lock(&j->lock);
+       ret = journal_write_alloc(j, w, sectors);
+
+       if (ret && j->can_discard) {
+               spin_unlock(&j->lock);
+               bch2_journal_do_discards(j);
+               goto retry_alloc;
+       }
+
+       /*
+        * write is allocated, no longer need to account for it in
+        * bch2_journal_space_available():
+        */
+       w->sectors = 0;
+
+       /*
+        * journal entry has been compacted and allocated, recalculate space
+        * available:
+        */
+       bch2_journal_space_available(j);
+       spin_unlock(&j->lock);
+
+       if (ret) {
+               bch_err(c, "Unable to allocate journal write");
+               bch2_fatal_error(c);
+               continue_at(cl, journal_write_done, system_highpri_wq);
+               return;
+       }
+
+       /*
+        * XXX: we really should just disable the entire journal in nochanges
+        * mode
+        */
+       if (c->opts.nochanges)
+               goto no_io;
+
+       extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
+               ca = bch_dev_bkey_exists(c, ptr->dev);
+               if (!percpu_ref_tryget(&ca->io_ref)) {
+                       /* XXX: fix this */
+                       bch_err(c, "missing device for journal write\n");
+                       continue;
+               }
+
+               this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
+                            sectors);
+
+               bio = ca->journal.bio;
+               bio_reset(bio);
+               bio_set_dev(bio, ca->disk_sb.bdev);
+               bio->bi_iter.bi_sector  = ptr->offset;
+               bio->bi_end_io          = journal_write_endio;
+               bio->bi_private         = ca;
+               bio_set_op_attrs(bio, REQ_OP_WRITE,
+                                REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
+               bch2_bio_map(bio, jset, sectors << 9);
+
+               trace_journal_write(bio);
+               closure_bio_submit(bio, cl);
+
+               ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(jset->seq);
+       }
+
+       for_each_rw_member(ca, c, i)
+               if (journal_flushes_device(ca) &&
+                   !bch2_bkey_has_device(bkey_i_to_s_c(&w->key), i)) {
+                       percpu_ref_get(&ca->io_ref);
+
+                       bio = ca->journal.bio;
+                       bio_reset(bio);
+                       bio_set_dev(bio, ca->disk_sb.bdev);
+                       bio->bi_opf             = REQ_OP_FLUSH;
+                       bio->bi_end_io          = journal_write_endio;
+                       bio->bi_private         = ca;
+                       closure_bio_submit(bio, cl);
+               }
+
+no_io:
+       bch2_bucket_seq_cleanup(c);
+
+       continue_at(cl, journal_write_done, system_highpri_wq);
+       return;
+err:
+       bch2_inconsistent_error(c);
+       continue_at(cl, journal_write_done, system_highpri_wq);
+}
diff --git a/libbcachefs/journal_io.h b/libbcachefs/journal_io.h
new file mode 100644 (file)
index 0000000..6958ee0
--- /dev/null
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_JOURNAL_IO_H
+#define _BCACHEFS_JOURNAL_IO_H
+
+/*
+ * Only used for holding the journal entries we read in btree_journal_read()
+ * during cache_registration
+ */
+struct journal_replay {
+       struct list_head        list;
+       struct bch_devs_list    devs;
+       /* checksum error, but we may want to try using it anyways: */
+       bool                    bad;
+       /* must be last: */
+       struct jset             j;
+};
+
+static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
+                                       struct jset_entry *entry, unsigned type)
+{
+       while (entry < vstruct_last(jset)) {
+               if (entry->type == type)
+                       return entry;
+
+               entry = vstruct_next(entry);
+       }
+
+       return NULL;
+}
+
+#define for_each_jset_entry_type(entry, jset, type)                    \
+       for (entry = (jset)->start;                                     \
+            (entry = __jset_entry_type_next(jset, entry, type));       \
+            entry = vstruct_next(entry))
+
+#define for_each_jset_key(k, _n, entry, jset)                          \
+       for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys)        \
+               vstruct_for_each_safe(entry, k, _n)
+
+int bch2_journal_read(struct bch_fs *, struct list_head *);
+
+void bch2_journal_write(struct closure *);
+
+#endif /* _BCACHEFS_JOURNAL_IO_H */
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
new file mode 100644 (file)
index 0000000..5759198
--- /dev/null
@@ -0,0 +1,644 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "journal.h"
+#include "journal_io.h"
+#include "journal_reclaim.h"
+#include "replicas.h"
+#include "super.h"
+
+/* Free space calculations: */
+
+static unsigned journal_space_from(struct journal_device *ja,
+                                  enum journal_space_from from)
+{
+       switch (from) {
+       case journal_space_discarded:
+               return ja->discard_idx;
+       case journal_space_clean_ondisk:
+               return ja->dirty_idx_ondisk;
+       case journal_space_clean:
+               return ja->dirty_idx;
+       default:
+               BUG();
+       }
+}
+
+unsigned bch2_journal_dev_buckets_available(struct journal *j,
+                                           struct journal_device *ja,
+                                           enum journal_space_from from)
+{
+       unsigned available = (journal_space_from(ja, from) -
+                             ja->cur_idx - 1 + ja->nr) % ja->nr;
+
+       /*
+        * Don't use the last bucket unless writing the new last_seq
+        * will make another bucket available:
+        */
+       if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
+               --available;
+
+       return available;
+}
+
+static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
+{
+       union journal_preres_state old, new;
+       u64 v = atomic64_read(&j->prereserved.counter);
+
+       do {
+               old.v = new.v = v;
+               new.remaining = u64s_remaining;
+       } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
+                                      old.v, new.v)) != old.v);
+}
+
+static struct journal_space {
+       unsigned        next_entry;
+       unsigned        remaining;
+} __journal_space_available(struct journal *j, unsigned nr_devs_want,
+                           enum journal_space_from from)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct bch_dev *ca;
+       unsigned sectors_next_entry     = UINT_MAX;
+       unsigned sectors_total          = UINT_MAX;
+       unsigned i, nr_devs = 0;
+       unsigned unwritten_sectors = j->reservations.prev_buf_unwritten
+               ? journal_prev_buf(j)->sectors
+               : 0;
+
+       rcu_read_lock();
+       for_each_member_device_rcu(ca, c, i,
+                                  &c->rw_devs[BCH_DATA_journal]) {
+               struct journal_device *ja = &ca->journal;
+               unsigned buckets_this_device, sectors_this_device;
+
+               if (!ja->nr)
+                       continue;
+
+               buckets_this_device = bch2_journal_dev_buckets_available(j, ja, from);
+               sectors_this_device = ja->sectors_free;
+
+               /*
+                * We that we don't allocate the space for a journal entry
+                * until we write it out - thus, account for it here:
+                */
+               if (unwritten_sectors >= sectors_this_device) {
+                       if (!buckets_this_device)
+                               continue;
+
+                       buckets_this_device--;
+                       sectors_this_device = ca->mi.bucket_size;
+               }
+
+               sectors_this_device -= unwritten_sectors;
+
+               if (sectors_this_device < ca->mi.bucket_size &&
+                   buckets_this_device) {
+                       buckets_this_device--;
+                       sectors_this_device = ca->mi.bucket_size;
+               }
+
+               if (!sectors_this_device)
+                       continue;
+
+               sectors_next_entry = min(sectors_next_entry,
+                                        sectors_this_device);
+
+               sectors_total = min(sectors_total,
+                       buckets_this_device * ca->mi.bucket_size +
+                       sectors_this_device);
+
+               nr_devs++;
+       }
+       rcu_read_unlock();
+
+       if (nr_devs < nr_devs_want)
+               return (struct journal_space) { 0, 0 };
+
+       return (struct journal_space) {
+               .next_entry     = sectors_next_entry,
+               .remaining      = max_t(int, 0, sectors_total - sectors_next_entry),
+       };
+}
+
+void bch2_journal_space_available(struct journal *j)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct bch_dev *ca;
+       struct journal_space discarded, clean_ondisk, clean;
+       unsigned overhead, u64s_remaining = 0;
+       unsigned max_entry_size  = min(j->buf[0].buf_size >> 9,
+                                      j->buf[1].buf_size >> 9);
+       unsigned i, nr_online = 0, nr_devs_want;
+       bool can_discard = false;
+       int ret = 0;
+
+       lockdep_assert_held(&j->lock);
+
+       rcu_read_lock();
+       for_each_member_device_rcu(ca, c, i,
+                                  &c->rw_devs[BCH_DATA_journal]) {
+               struct journal_device *ja = &ca->journal;
+
+               if (!ja->nr)
+                       continue;
+
+               while (ja->dirty_idx != ja->cur_idx &&
+                      ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
+                       ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
+
+               while (ja->dirty_idx_ondisk != ja->dirty_idx &&
+                      ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
+                       ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
+
+               if (ja->discard_idx != ja->dirty_idx_ondisk)
+                       can_discard = true;
+
+               max_entry_size = min_t(unsigned, max_entry_size, ca->mi.bucket_size);
+               nr_online++;
+       }
+       rcu_read_unlock();
+
+       j->can_discard = can_discard;
+
+       if (nr_online < c->opts.metadata_replicas_required) {
+               ret = -EROFS;
+               goto out;
+       }
+
+       if (!fifo_free(&j->pin)) {
+               ret = -ENOSPC;
+               goto out;
+       }
+
+       nr_devs_want = min_t(unsigned, nr_online, c->opts.metadata_replicas);
+
+       discarded       = __journal_space_available(j, nr_devs_want, journal_space_discarded);
+       clean_ondisk    = __journal_space_available(j, nr_devs_want, journal_space_clean_ondisk);
+       clean           = __journal_space_available(j, nr_devs_want, journal_space_clean);
+
+       if (!discarded.next_entry)
+               ret = -ENOSPC;
+
+       overhead = DIV_ROUND_UP(clean.remaining, max_entry_size) *
+               journal_entry_overhead(j);
+       u64s_remaining = clean.remaining << 6;
+       u64s_remaining = max_t(int, 0, u64s_remaining - overhead);
+       u64s_remaining /= 4;
+out:
+       j->cur_entry_sectors    = !ret ? discarded.next_entry : 0;
+       j->cur_entry_error      = ret;
+       journal_set_remaining(j, u64s_remaining);
+       journal_check_may_get_unreserved(j);
+
+       if (!ret)
+               journal_wake(j);
+}
+
+/* Discards - last part of journal reclaim: */
+
+static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
+{
+       bool ret;
+
+       spin_lock(&j->lock);
+       ret = ja->discard_idx != ja->dirty_idx_ondisk;
+       spin_unlock(&j->lock);
+
+       return ret;
+}
+
+/*
+ * Advance ja->discard_idx as long as it points to buckets that are no longer
+ * dirty, issuing discards if necessary:
+ */
+void bch2_journal_do_discards(struct journal *j)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct bch_dev *ca;
+       unsigned iter;
+
+       mutex_lock(&j->discard_lock);
+
+       for_each_rw_member(ca, c, iter) {
+               struct journal_device *ja = &ca->journal;
+
+               while (should_discard_bucket(j, ja)) {
+                       if (ca->mi.discard &&
+                           blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
+                               blkdev_issue_discard(ca->disk_sb.bdev,
+                                       bucket_to_sector(ca,
+                                               ja->buckets[ja->discard_idx]),
+                                       ca->mi.bucket_size, GFP_NOIO, 0);
+
+                       spin_lock(&j->lock);
+                       ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
+
+                       bch2_journal_space_available(j);
+                       spin_unlock(&j->lock);
+               }
+       }
+
+       mutex_unlock(&j->discard_lock);
+}
+
+/*
+ * Journal entry pinning - machinery for holding a reference on a given journal
+ * entry, holding it open to ensure it gets replayed during recovery:
+ */
+
+static void bch2_journal_reclaim_fast(struct journal *j)
+{
+       struct journal_entry_pin_list temp;
+       bool popped = false;
+
+       lockdep_assert_held(&j->lock);
+
+       /*
+        * Unpin journal entries whose reference counts reached zero, meaning
+        * all btree nodes got written out
+        */
+       while (!fifo_empty(&j->pin) &&
+              !atomic_read(&fifo_peek_front(&j->pin).count)) {
+               BUG_ON(!list_empty(&fifo_peek_front(&j->pin).list));
+               BUG_ON(!fifo_pop(&j->pin, temp));
+               popped = true;
+       }
+
+       if (popped)
+               bch2_journal_space_available(j);
+}
+
+void bch2_journal_pin_put(struct journal *j, u64 seq)
+{
+       struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
+
+       if (atomic_dec_and_test(&pin_list->count)) {
+               spin_lock(&j->lock);
+               bch2_journal_reclaim_fast(j);
+               spin_unlock(&j->lock);
+       }
+}
+
+static inline void __journal_pin_drop(struct journal *j,
+                                     struct journal_entry_pin *pin)
+{
+       struct journal_entry_pin_list *pin_list;
+
+       if (!journal_pin_active(pin))
+               return;
+
+       pin_list = journal_seq_pin(j, pin->seq);
+       pin->seq = 0;
+       list_del_init(&pin->list);
+
+       /*
+        * Unpinning a journal entry make make journal_next_bucket() succeed, if
+        * writing a new last_seq will now make another bucket available:
+        */
+       if (atomic_dec_and_test(&pin_list->count) &&
+           pin_list == &fifo_peek_front(&j->pin))
+               bch2_journal_reclaim_fast(j);
+       else if (fifo_used(&j->pin) == 1 &&
+                atomic_read(&pin_list->count) == 1)
+               journal_wake(j);
+}
+
+void bch2_journal_pin_drop(struct journal *j,
+                          struct journal_entry_pin *pin)
+{
+       spin_lock(&j->lock);
+       __journal_pin_drop(j, pin);
+       spin_unlock(&j->lock);
+}
+
+static void bch2_journal_pin_add_locked(struct journal *j, u64 seq,
+                           struct journal_entry_pin *pin,
+                           journal_pin_flush_fn flush_fn)
+{
+       struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
+
+       __journal_pin_drop(j, pin);
+
+       BUG_ON(!atomic_read(&pin_list->count) && seq == journal_last_seq(j));
+
+       atomic_inc(&pin_list->count);
+       pin->seq        = seq;
+       pin->flush      = flush_fn;
+
+       list_add(&pin->list, flush_fn ? &pin_list->list : &pin_list->flushed);
+}
+
+void __bch2_journal_pin_add(struct journal *j, u64 seq,
+                           struct journal_entry_pin *pin,
+                           journal_pin_flush_fn flush_fn)
+{
+       spin_lock(&j->lock);
+       bch2_journal_pin_add_locked(j, seq, pin, flush_fn);
+       spin_unlock(&j->lock);
+
+       /*
+        * If the journal is currently full,  we might want to call flush_fn
+        * immediately:
+        */
+       journal_wake(j);
+}
+
+void bch2_journal_pin_update(struct journal *j, u64 seq,
+                            struct journal_entry_pin *pin,
+                            journal_pin_flush_fn flush_fn)
+{
+       if (journal_pin_active(pin) && pin->seq < seq)
+               return;
+
+       spin_lock(&j->lock);
+
+       if (pin->seq != seq) {
+               bch2_journal_pin_add_locked(j, seq, pin, flush_fn);
+       } else {
+               struct journal_entry_pin_list *pin_list =
+                       journal_seq_pin(j, seq);
+
+               /*
+                * If the pin is already pinning the right sequence number, it
+                * still might've already been flushed:
+                */
+               list_move(&pin->list, &pin_list->list);
+       }
+
+       spin_unlock(&j->lock);
+
+       /*
+        * If the journal is currently full,  we might want to call flush_fn
+        * immediately:
+        */
+       journal_wake(j);
+}
+
+void bch2_journal_pin_copy(struct journal *j,
+                          struct journal_entry_pin *dst,
+                          struct journal_entry_pin *src,
+                          journal_pin_flush_fn flush_fn)
+{
+       spin_lock(&j->lock);
+
+       if (journal_pin_active(src) &&
+           (!journal_pin_active(dst) || src->seq < dst->seq))
+               bch2_journal_pin_add_locked(j, src->seq, dst, flush_fn);
+
+       spin_unlock(&j->lock);
+}
+
+/**
+ * bch2_journal_pin_flush: ensure journal pin callback is no longer running
+ */
+void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
+{
+       BUG_ON(journal_pin_active(pin));
+
+       wait_event(j->pin_flush_wait, j->flush_in_progress != pin);
+}
+
+/*
+ * Journal reclaim: flush references to open journal entries to reclaim space in
+ * the journal
+ *
+ * May be done by the journal code in the background as needed to free up space
+ * for more journal entries, or as part of doing a clean shutdown, or to migrate
+ * data off of a specific device:
+ */
+
+static struct journal_entry_pin *
+journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
+{
+       struct journal_entry_pin_list *pin_list;
+       struct journal_entry_pin *ret = NULL;
+
+       if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
+               return NULL;
+
+       spin_lock(&j->lock);
+
+       fifo_for_each_entry_ptr(pin_list, &j->pin, *seq)
+               if (*seq > max_seq ||
+                   (ret = list_first_entry_or_null(&pin_list->list,
+                               struct journal_entry_pin, list)))
+                       break;
+
+       if (ret) {
+               list_move(&ret->list, &pin_list->flushed);
+               BUG_ON(j->flush_in_progress);
+               j->flush_in_progress = ret;
+               j->last_flushed = jiffies;
+       }
+
+       spin_unlock(&j->lock);
+
+       return ret;
+}
+
+/* returns true if we did work */
+static bool journal_flush_pins(struct journal *j, u64 seq_to_flush,
+                              unsigned min_nr)
+{
+       struct journal_entry_pin *pin;
+       bool ret = false;
+       u64 seq;
+
+       lockdep_assert_held(&j->reclaim_lock);
+
+       while ((pin = journal_get_next_pin(j, min_nr
+                               ? U64_MAX : seq_to_flush, &seq))) {
+               if (min_nr)
+                       min_nr--;
+
+               pin->flush(j, pin, seq);
+
+               BUG_ON(j->flush_in_progress != pin);
+               j->flush_in_progress = NULL;
+               wake_up(&j->pin_flush_wait);
+               ret = true;
+       }
+
+       return ret;
+}
+
+/**
+ * bch2_journal_reclaim - free up journal buckets
+ *
+ * Background journal reclaim writes out btree nodes. It should be run
+ * early enough so that we never completely run out of journal buckets.
+ *
+ * High watermarks for triggering background reclaim:
+ * - FIFO has fewer than 512 entries left
+ * - fewer than 25% journal buckets free
+ *
+ * Background reclaim runs until low watermarks are reached:
+ * - FIFO has more than 1024 entries left
+ * - more than 50% journal buckets free
+ *
+ * As long as a reclaim can complete in the time it takes to fill up
+ * 512 journal entries or 25% of all journal buckets, then
+ * journal_next_bucket() should not stall.
+ */
+void bch2_journal_reclaim(struct journal *j)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct bch_dev *ca;
+       unsigned iter, min_nr = 0;
+       u64 seq_to_flush = 0;
+
+       lockdep_assert_held(&j->reclaim_lock);
+
+       bch2_journal_do_discards(j);
+
+       spin_lock(&j->lock);
+
+       for_each_rw_member(ca, c, iter) {
+               struct journal_device *ja = &ca->journal;
+               unsigned nr_buckets, bucket_to_flush;
+
+               if (!ja->nr)
+                       continue;
+
+               /* Try to keep the journal at most half full: */
+               nr_buckets = ja->nr / 2;
+
+               /* And include pre-reservations: */
+               nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
+                                          (ca->mi.bucket_size << 6) -
+                                          journal_entry_overhead(j));
+
+               nr_buckets = min(nr_buckets, ja->nr);
+
+               bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
+               seq_to_flush = max(seq_to_flush,
+                                  ja->bucket_seq[bucket_to_flush]);
+       }
+
+       /* Also flush if the pin fifo is more than half full */
+       seq_to_flush = max_t(s64, seq_to_flush,
+                            (s64) journal_cur_seq(j) -
+                            (j->pin.size >> 1));
+       spin_unlock(&j->lock);
+
+       /*
+        * If it's been longer than j->reclaim_delay_ms since we last flushed,
+        * make sure to flush at least one journal pin:
+        */
+       if (time_after(jiffies, j->last_flushed +
+                      msecs_to_jiffies(j->reclaim_delay_ms)))
+               min_nr = 1;
+
+       if (j->prereserved.reserved * 2 > j->prereserved.remaining) {
+               seq_to_flush = max(seq_to_flush, journal_last_seq(j));
+               min_nr = 1;
+       }
+
+       journal_flush_pins(j, seq_to_flush, min_nr);
+
+       if (!bch2_journal_error(j))
+               queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work,
+                                  msecs_to_jiffies(j->reclaim_delay_ms));
+}
+
+void bch2_journal_reclaim_work(struct work_struct *work)
+{
+       struct journal *j = container_of(to_delayed_work(work),
+                               struct journal, reclaim_work);
+
+       mutex_lock(&j->reclaim_lock);
+       bch2_journal_reclaim(j);
+       mutex_unlock(&j->reclaim_lock);
+}
+
+static int journal_flush_done(struct journal *j, u64 seq_to_flush,
+                             bool *did_work)
+{
+       int ret;
+
+       ret = bch2_journal_error(j);
+       if (ret)
+               return ret;
+
+       mutex_lock(&j->reclaim_lock);
+
+       *did_work = journal_flush_pins(j, seq_to_flush, 0);
+
+       spin_lock(&j->lock);
+       /*
+        * If journal replay hasn't completed, the unreplayed journal entries
+        * hold refs on their corresponding sequence numbers
+        */
+       ret = !test_bit(JOURNAL_REPLAY_DONE, &j->flags) ||
+               journal_last_seq(j) > seq_to_flush ||
+               (fifo_used(&j->pin) == 1 &&
+                atomic_read(&fifo_peek_front(&j->pin).count) == 1);
+
+       spin_unlock(&j->lock);
+       mutex_unlock(&j->reclaim_lock);
+
+       return ret;
+}
+
+bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
+{
+       bool did_work = false;
+
+       if (!test_bit(JOURNAL_STARTED, &j->flags))
+               return false;
+
+       closure_wait_event(&j->async_wait,
+               journal_flush_done(j, seq_to_flush, &did_work));
+
+       return did_work;
+}
+
+int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct journal_entry_pin_list *p;
+       u64 iter, seq = 0;
+       int ret = 0;
+
+       spin_lock(&j->lock);
+       fifo_for_each_entry_ptr(p, &j->pin, iter)
+               if (dev_idx >= 0
+                   ? bch2_dev_list_has_dev(p->devs, dev_idx)
+                   : p->devs.nr < c->opts.metadata_replicas)
+                       seq = iter;
+       spin_unlock(&j->lock);
+
+       bch2_journal_flush_pins(j, seq);
+
+       ret = bch2_journal_error(j);
+       if (ret)
+               return ret;
+
+       mutex_lock(&c->replicas_gc_lock);
+       bch2_replicas_gc_start(c, 1 << BCH_DATA_journal);
+
+       seq = 0;
+
+       spin_lock(&j->lock);
+       while (!ret && seq < j->pin.back) {
+               struct bch_replicas_padded replicas;
+
+               seq = max(seq, journal_last_seq(j));
+               bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
+                                        journal_seq_pin(j, seq)->devs);
+               seq++;
+
+               spin_unlock(&j->lock);
+               ret = bch2_mark_replicas(c, &replicas.e);
+               spin_lock(&j->lock);
+       }
+       spin_unlock(&j->lock);
+
+       ret = bch2_replicas_gc_end(c, ret);
+       mutex_unlock(&c->replicas_gc_lock);
+
+       return ret;
+}
diff --git a/libbcachefs/journal_reclaim.h b/libbcachefs/journal_reclaim.h
new file mode 100644 (file)
index 0000000..8128907
--- /dev/null
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_JOURNAL_RECLAIM_H
+#define _BCACHEFS_JOURNAL_RECLAIM_H
+
+#define JOURNAL_PIN    (32 * 1024)
+
+enum journal_space_from {
+       journal_space_discarded,
+       journal_space_clean_ondisk,
+       journal_space_clean,
+};
+
+unsigned bch2_journal_dev_buckets_available(struct journal *,
+                                           struct journal_device *,
+                                           enum journal_space_from);
+void bch2_journal_space_available(struct journal *);
+
+static inline bool journal_pin_active(struct journal_entry_pin *pin)
+{
+       return pin->seq != 0;
+}
+
+static inline struct journal_entry_pin_list *
+journal_seq_pin(struct journal *j, u64 seq)
+{
+       EBUG_ON(seq < j->pin.front || seq >= j->pin.back);
+
+       return &j->pin.data[seq & j->pin.mask];
+}
+
+void bch2_journal_pin_put(struct journal *, u64);
+void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);
+
+void __bch2_journal_pin_add(struct journal *, u64, struct journal_entry_pin *,
+                           journal_pin_flush_fn);
+
+static inline void bch2_journal_pin_add(struct journal *j, u64 seq,
+                                       struct journal_entry_pin *pin,
+                                       journal_pin_flush_fn flush_fn)
+{
+       if (unlikely(!journal_pin_active(pin) || pin->seq > seq))
+               __bch2_journal_pin_add(j, seq, pin, flush_fn);
+}
+
+void bch2_journal_pin_update(struct journal *, u64,
+                            struct journal_entry_pin *,
+                            journal_pin_flush_fn);
+
+void bch2_journal_pin_copy(struct journal *,
+                          struct journal_entry_pin *,
+                          struct journal_entry_pin *,
+                          journal_pin_flush_fn);
+
+void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *);
+
+void bch2_journal_do_discards(struct journal *);
+void bch2_journal_reclaim(struct journal *);
+void bch2_journal_reclaim_work(struct work_struct *);
+
+bool bch2_journal_flush_pins(struct journal *, u64);
+
+static inline bool bch2_journal_flush_all_pins(struct journal *j)
+{
+       return bch2_journal_flush_pins(j, U64_MAX);
+}
+
+int bch2_journal_flush_device_pins(struct journal *, int);
+
+#endif /* _BCACHEFS_JOURNAL_RECLAIM_H */
diff --git a/libbcachefs/journal_seq_blacklist.c b/libbcachefs/journal_seq_blacklist.c
new file mode 100644 (file)
index 0000000..d0f1bbf
--- /dev/null
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_iter.h"
+#include "eytzinger.h"
+#include "journal_seq_blacklist.h"
+#include "super-io.h"
+
+/*
+ * journal_seq_blacklist machinery:
+ *
+ * To guarantee order of btree updates after a crash, we need to detect when a
+ * btree node entry (bset) is newer than the newest journal entry that was
+ * successfully written, and ignore it - effectively ignoring any btree updates
+ * that didn't make it into the journal.
+ *
+ * If we didn't do this, we might have two btree nodes, a and b, both with
+ * updates that weren't written to the journal yet: if b was updated after a,
+ * but b was flushed and not a - oops; on recovery we'll find that the updates
+ * to b happened, but not the updates to a that happened before it.
+ *
+ * Ignoring bsets that are newer than the newest journal entry is always safe,
+ * because everything they contain will also have been journalled - and must
+ * still be present in the journal on disk until a journal entry has been
+ * written _after_ that bset was written.
+ *
+ * To accomplish this, bsets record the newest journal sequence number they
+ * contain updates for; then, on startup, the btree code queries the journal
+ * code to ask "Is this sequence number newer than the newest journal entry? If
+ * so, ignore it."
+ *
+ * When this happens, we must blacklist that journal sequence number: the
+ * journal must not write any entries with that sequence number, and it must
+ * record that it was blacklisted so that a) on recovery we don't think we have
+ * missing journal entries and b) so that the btree code continues to ignore
+ * that bset, until that btree node is rewritten.
+ */
+
+static unsigned sb_blacklist_u64s(unsigned nr)
+{
+       struct bch_sb_field_journal_seq_blacklist *bl;
+
+       return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
+}
+
+static struct bch_sb_field_journal_seq_blacklist *
+blacklist_entry_try_merge(struct bch_fs *c,
+                         struct bch_sb_field_journal_seq_blacklist *bl,
+                         unsigned i)
+{
+       unsigned nr = blacklist_nr_entries(bl);
+
+       if (le64_to_cpu(bl->start[i].end) >=
+           le64_to_cpu(bl->start[i + 1].start)) {
+               bl->start[i].end = bl->start[i + 1].end;
+               --nr;
+               memmove(&bl->start[i],
+                       &bl->start[i + 1],
+                       sizeof(bl->start[0]) * (nr - i));
+
+               bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
+                                                       sb_blacklist_u64s(nr));
+               BUG_ON(!bl);
+       }
+
+       return bl;
+}
+
+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
+{
+       struct bch_sb_field_journal_seq_blacklist *bl;
+       unsigned i, nr;
+       int ret = 0;
+
+       mutex_lock(&c->sb_lock);
+       bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
+       nr = blacklist_nr_entries(bl);
+
+       if (bl) {
+               for (i = 0; i < nr; i++) {
+                       struct journal_seq_blacklist_entry *e =
+                               bl->start + i;
+
+                       if (start == le64_to_cpu(e->start) &&
+                           end   == le64_to_cpu(e->end))
+                               goto out;
+
+                       if (start <= le64_to_cpu(e->start) &&
+                           end   >= le64_to_cpu(e->end)) {
+                               e->start = cpu_to_le64(start);
+                               e->end  = cpu_to_le64(end);
+
+                               if (i + 1 < nr)
+                                       bl = blacklist_entry_try_merge(c,
+                                                               bl, i);
+                               if (i)
+                                       bl = blacklist_entry_try_merge(c,
+                                                               bl, i - 1);
+                               goto out_write_sb;
+                       }
+               }
+       }
+
+       bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
+                                       sb_blacklist_u64s(nr + 1));
+       if (!bl) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       bl->start[nr].start     = cpu_to_le64(start);
+       bl->start[nr].end       = cpu_to_le64(end);
+out_write_sb:
+       c->disk_sb.sb->features[0] |=
+               1ULL << BCH_FEATURE_journal_seq_blacklist_v3;
+
+       ret = bch2_write_super(c);
+out:
+       mutex_unlock(&c->sb_lock);
+
+       return ret;
+}
+
+static int journal_seq_blacklist_table_cmp(const void *_l,
+                                          const void *_r, size_t size)
+{
+       const struct journal_seq_blacklist_table_entry *l = _l;
+       const struct journal_seq_blacklist_table_entry *r = _r;
+
+       return cmp_int(l->start, r->start);
+}
+
+bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
+                                    bool dirty)
+{
+       struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
+       struct journal_seq_blacklist_table_entry search = { .start = seq };
+       int idx;
+
+       if (!t)
+               return false;
+
+       idx = eytzinger0_find_le(t->entries, t->nr,
+                                sizeof(t->entries[0]),
+                                journal_seq_blacklist_table_cmp,
+                                &search);
+       if (idx < 0)
+               return false;
+
+       BUG_ON(t->entries[idx].start > seq);
+
+       if (seq >= t->entries[idx].end)
+               return false;
+
+       if (dirty)
+               t->entries[idx].dirty = true;
+       return true;
+}
+
+int bch2_blacklist_table_initialize(struct bch_fs *c)
+{
+       struct bch_sb_field_journal_seq_blacklist *bl =
+               bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
+       struct journal_seq_blacklist_table *t;
+       unsigned i, nr = blacklist_nr_entries(bl);
+
+       BUG_ON(c->journal_seq_blacklist_table);
+
+       if (!bl)
+               return 0;
+
+       t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
+                   GFP_KERNEL);
+       if (!t)
+               return -ENOMEM;
+
+       t->nr = nr;
+
+       for (i = 0; i < nr; i++) {
+               t->entries[i].start     = le64_to_cpu(bl->start[i].start);
+               t->entries[i].end       = le64_to_cpu(bl->start[i].end);
+       }
+
+       eytzinger0_sort(t->entries,
+                       t->nr,
+                       sizeof(t->entries[0]),
+                       journal_seq_blacklist_table_cmp,
+                       NULL);
+
+       c->journal_seq_blacklist_table = t;
+       return 0;
+}
+
+static const char *
+bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
+                                      struct bch_sb_field *f)
+{
+       struct bch_sb_field_journal_seq_blacklist *bl =
+               field_to_type(f, journal_seq_blacklist);
+       struct journal_seq_blacklist_entry *i;
+       unsigned nr = blacklist_nr_entries(bl);
+
+       for (i = bl->start; i < bl->start + nr; i++) {
+               if (le64_to_cpu(i->start) >=
+                   le64_to_cpu(i->end))
+                       return "entry start >= end";
+
+               if (i + 1 < bl->start + nr &&
+                   le64_to_cpu(i[0].end) >
+                   le64_to_cpu(i[1].start))
+                       return "entries out of order";
+       }
+
+       return NULL;
+}
+
+static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
+                                                 struct bch_sb *sb,
+                                                 struct bch_sb_field *f)
+{
+       struct bch_sb_field_journal_seq_blacklist *bl =
+               field_to_type(f, journal_seq_blacklist);
+       struct journal_seq_blacklist_entry *i;
+       unsigned nr = blacklist_nr_entries(bl);
+
+       for (i = bl->start; i < bl->start + nr; i++) {
+               if (i != bl->start)
+                       pr_buf(out, " ");
+
+               pr_buf(out, "%llu-%llu",
+                      le64_to_cpu(i->start),
+                      le64_to_cpu(i->end));
+       }
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
+       .validate       = bch2_sb_journal_seq_blacklist_validate,
+       .to_text        = bch2_sb_journal_seq_blacklist_to_text
+};
+
+void bch2_blacklist_entries_gc(struct work_struct *work)
+{
+       struct bch_fs *c = container_of(work, struct bch_fs,
+                                       journal_seq_blacklist_gc_work);
+       struct journal_seq_blacklist_table *t;
+       struct bch_sb_field_journal_seq_blacklist *bl;
+       struct journal_seq_blacklist_entry *src, *dst;
+       struct btree_trans trans;
+       unsigned i, nr, new_nr;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for (i = 0; i < BTREE_ID_NR; i++) {
+               struct btree_iter *iter;
+               struct btree *b;
+
+               for_each_btree_node(&trans, iter, i, POS_MIN,
+                                   BTREE_ITER_PREFETCH, b)
+                       if (test_bit(BCH_FS_STOPPING, &c->flags)) {
+                               bch2_trans_exit(&trans);
+                               return;
+                       }
+               bch2_trans_iter_free(&trans, iter);
+       }
+
+       ret = bch2_trans_exit(&trans);
+       if (ret)
+               return;
+
+       mutex_lock(&c->sb_lock);
+       bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
+       if (!bl)
+               goto out;
+
+       nr = blacklist_nr_entries(bl);
+       dst = bl->start;
+
+       t = c->journal_seq_blacklist_table;
+       BUG_ON(nr != t->nr);
+
+       for (src = bl->start, i = eytzinger0_first(t->nr);
+            src < bl->start + nr;
+            src++, i = eytzinger0_next(i, nr)) {
+               BUG_ON(t->entries[i].start      != le64_to_cpu(src->start));
+               BUG_ON(t->entries[i].end        != le64_to_cpu(src->end));
+
+               if (t->entries[i].dirty)
+                       *dst++ = *src;
+       }
+
+       new_nr = dst - bl->start;
+
+       bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
+
+       if (new_nr != nr) {
+               bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
+                               new_nr ? sb_blacklist_u64s(new_nr) : 0);
+               BUG_ON(new_nr && !bl);
+
+               if (!new_nr)
+                       c->disk_sb.sb->features[0] &=
+                               ~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
+
+               bch2_write_super(c);
+       }
+out:
+       mutex_unlock(&c->sb_lock);
+}
diff --git a/libbcachefs/journal_seq_blacklist.h b/libbcachefs/journal_seq_blacklist.h
new file mode 100644 (file)
index 0000000..afb886e
--- /dev/null
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
+#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
+
+static inline unsigned
+blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
+{
+       return bl
+               ? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
+                  sizeof(struct journal_seq_blacklist_entry))
+               : 0;
+}
+
+bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
+int bch2_blacklist_table_initialize(struct bch_fs *);
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
+
+void bch2_blacklist_entries_gc(struct work_struct *);
+
+#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h
new file mode 100644 (file)
index 0000000..154b51b
--- /dev/null
@@ -0,0 +1,277 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_JOURNAL_TYPES_H
+#define _BCACHEFS_JOURNAL_TYPES_H
+
+#include <linux/cache.h>
+#include <linux/workqueue.h>
+
+#include "alloc_types.h"
+#include "super_types.h"
+#include "fifo.h"
+
+struct journal_res;
+
+/*
+ * We put two of these in struct journal; we used them for writes to the
+ * journal that are being staged or in flight.
+ */
+struct journal_buf {
+       struct jset             *data;
+
+       BKEY_PADDED(key);
+
+       struct closure_waitlist wait;
+
+       unsigned                buf_size;       /* size in bytes of @data */
+       unsigned                sectors;        /* maximum size for current entry */
+       unsigned                disk_sectors;   /* maximum size entry could have been, if
+                                                  buf_size was bigger */
+       unsigned                u64s_reserved;
+       /* bloom filter: */
+       unsigned long           has_inode[1024 / sizeof(unsigned long)];
+};
+
+/*
+ * Something that makes a journal entry dirty - i.e. a btree node that has to be
+ * flushed:
+ */
+
+struct journal_entry_pin_list {
+       struct list_head                list;
+       struct list_head                flushed;
+       atomic_t                        count;
+       struct bch_devs_list            devs;
+};
+
+struct journal;
+struct journal_entry_pin;
+typedef void (*journal_pin_flush_fn)(struct journal *j,
+                               struct journal_entry_pin *, u64);
+
+struct journal_entry_pin {
+       struct list_head                list;
+       journal_pin_flush_fn            flush;
+       u64                             seq;
+};
+
+struct journal_res {
+       bool                    ref;
+       u8                      idx;
+       u16                     u64s;
+       u32                     offset;
+       u64                     seq;
+};
+
+/*
+ * For reserving space in the journal prior to getting a reservation on a
+ * particular journal entry:
+ */
+struct journal_preres {
+       unsigned                u64s;
+};
+
+union journal_res_state {
+       struct {
+               atomic64_t      counter;
+       };
+
+       struct {
+               u64             v;
+       };
+
+       struct {
+               u64             cur_entry_offset:20,
+                               idx:1,
+                               prev_buf_unwritten:1,
+                               buf0_count:21,
+                               buf1_count:21;
+       };
+};
+
+union journal_preres_state {
+       struct {
+               atomic64_t      counter;
+       };
+
+       struct {
+               u64             v;
+       };
+
+       struct {
+               u32             reserved;
+               u32             remaining;
+       };
+};
+
+/* bytes: */
+#define JOURNAL_ENTRY_SIZE_MIN         (64U << 10) /* 64k */
+#define JOURNAL_ENTRY_SIZE_MAX         (4U  << 20) /* 4M */
+
+/*
+ * We stash some journal state as sentinal values in cur_entry_offset:
+ * note - cur_entry_offset is in units of u64s
+ */
+#define JOURNAL_ENTRY_OFFSET_MAX       ((1U << 20) - 1)
+
+#define JOURNAL_ENTRY_CLOSED_VAL       (JOURNAL_ENTRY_OFFSET_MAX - 1)
+#define JOURNAL_ENTRY_ERROR_VAL                (JOURNAL_ENTRY_OFFSET_MAX)
+
+/*
+ * JOURNAL_NEED_WRITE - current (pending) journal entry should be written ASAP,
+ * either because something's waiting on the write to complete or because it's
+ * been dirty too long and the timer's expired.
+ */
+
+enum {
+       JOURNAL_REPLAY_DONE,
+       JOURNAL_STARTED,
+       JOURNAL_RECLAIM_STARTED,
+       JOURNAL_NEED_WRITE,
+       JOURNAL_NOT_EMPTY,
+       JOURNAL_MAY_GET_UNRESERVED,
+};
+
+/* Embedded in struct bch_fs */
+struct journal {
+       /* Fastpath stuff up front: */
+
+       unsigned long           flags;
+
+       union journal_res_state reservations;
+
+       /* Max size of current journal entry */
+       unsigned                cur_entry_u64s;
+       unsigned                cur_entry_sectors;
+
+       /*
+        * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
+        * insufficient devices:
+        */
+       int                     cur_entry_error;
+
+       union journal_preres_state prereserved;
+
+       /* Reserved space in journal entry to be used just prior to write */
+       unsigned                entry_u64s_reserved;
+
+       unsigned                buf_size_want;
+
+       /*
+        * Two journal entries -- one is currently open for new entries, the
+        * other is possibly being written out.
+        */
+       struct journal_buf      buf[2];
+
+       spinlock_t              lock;
+
+       /* if nonzero, we may not open a new journal entry: */
+       unsigned                blocked;
+
+       /* Used when waiting because the journal was full */
+       wait_queue_head_t       wait;
+       struct closure_waitlist async_wait;
+       struct closure_waitlist preres_wait;
+
+       struct closure          io;
+       struct delayed_work     write_work;
+
+       /* Sequence number of most recent journal entry (last entry in @pin) */
+       atomic64_t              seq;
+
+       /* seq, last_seq from the most recent journal entry successfully written */
+       u64                     seq_ondisk;
+       u64                     last_seq_ondisk;
+
+       /*
+        * FIFO of journal entries whose btree updates have not yet been
+        * written out.
+        *
+        * Each entry is a reference count. The position in the FIFO is the
+        * entry's sequence number relative to @seq.
+        *
+        * The journal entry itself holds a reference count, put when the
+        * journal entry is written out. Each btree node modified by the journal
+        * entry also holds a reference count, put when the btree node is
+        * written.
+        *
+        * When a reference count reaches zero, the journal entry is no longer
+        * needed. When all journal entries in the oldest journal bucket are no
+        * longer needed, the bucket can be discarded and reused.
+        */
+       struct {
+               u64 front, back, size, mask;
+               struct journal_entry_pin_list *data;
+       }                       pin;
+
+       u64                     replay_journal_seq;
+       u64                     replay_journal_seq_end;
+
+       struct write_point      wp;
+       spinlock_t              err_lock;
+
+       struct delayed_work     reclaim_work;
+       struct mutex            reclaim_lock;
+       unsigned long           last_flushed;
+       struct journal_entry_pin *flush_in_progress;
+       wait_queue_head_t       pin_flush_wait;
+
+       /* protects advancing ja->discard_idx: */
+       struct mutex            discard_lock;
+       bool                    can_discard;
+
+       unsigned                write_delay_ms;
+       unsigned                reclaim_delay_ms;
+
+       u64                     res_get_blocked_start;
+       u64                     need_write_time;
+       u64                     write_start_time;
+
+       struct time_stats       *write_time;
+       struct time_stats       *delay_time;
+       struct time_stats       *blocked_time;
+       struct time_stats       *flush_seq_time;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       struct lockdep_map      res_map;
+#endif
+};
+
+/*
+ * Embedded in struct bch_dev. First three fields refer to the array of journal
+ * buckets, in bch_sb.
+ */
+struct journal_device {
+       /*
+        * For each journal bucket, contains the max sequence number of the
+        * journal writes it contains - so we know when a bucket can be reused.
+        */
+       u64                     *bucket_seq;
+
+       unsigned                sectors_free;
+
+       /*
+        * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
+        */
+       unsigned                discard_idx;            /* Next bucket to discard */
+       unsigned                dirty_idx_ondisk;
+       unsigned                dirty_idx;
+       unsigned                cur_idx;                /* Journal bucket we're currently writing to */
+       unsigned                nr;
+
+       u64                     *buckets;
+
+       /* Bio for journal reads/writes to this device */
+       struct bio              *bio;
+
+       /* for bch_journal_read_device */
+       struct closure          read;
+};
+
+/*
+ * journal_entry_res - reserve space in every journal entry:
+ */
+struct journal_entry_res {
+       unsigned                u64s;
+};
+
+#endif /* _BCACHEFS_JOURNAL_TYPES_H */
diff --git a/libbcachefs/keylist.c b/libbcachefs/keylist.c
new file mode 100644 (file)
index 0000000..864dfaa
--- /dev/null
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "keylist.h"
+
+int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,
+                       size_t nr_inline_u64s, size_t new_u64s)
+{
+       size_t oldsize = bch2_keylist_u64s(l);
+       size_t newsize = oldsize + new_u64s;
+       u64 *old_buf = l->keys_p == inline_u64s ? NULL : l->keys_p;
+       u64 *new_keys;
+
+       newsize = roundup_pow_of_two(newsize);
+
+       if (newsize <= nr_inline_u64s ||
+           (old_buf && roundup_pow_of_two(oldsize) == newsize))
+               return 0;
+
+       new_keys = krealloc(old_buf, sizeof(u64) * newsize, GFP_NOIO);
+       if (!new_keys)
+               return -ENOMEM;
+
+       if (!old_buf)
+               memcpy_u64s(new_keys, inline_u64s, oldsize);
+
+       l->keys_p = new_keys;
+       l->top_p = new_keys + oldsize;
+
+       return 0;
+}
+
+void bch2_keylist_add_in_order(struct keylist *l, struct bkey_i *insert)
+{
+       struct bkey_i *where;
+
+       for_each_keylist_key(l, where)
+               if (bkey_cmp(insert->k.p, where->k.p) < 0)
+                       break;
+
+       memmove_u64s_up((u64 *) where + insert->k.u64s,
+                       where,
+                       ((u64 *) l->top) - ((u64 *) where));
+
+       l->top_p += insert->k.u64s;
+       bkey_copy(where, insert);
+}
+
+void bch2_keylist_pop_front(struct keylist *l)
+{
+       l->top_p -= bch2_keylist_front(l)->k.u64s;
+
+       memmove_u64s_down(l->keys,
+                         bkey_next(l->keys),
+                         bch2_keylist_u64s(l));
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_verify_keylist_sorted(struct keylist *l)
+{
+       struct bkey_i *k;
+
+       for_each_keylist_key(l, k)
+               BUG_ON(bkey_next(k) != l->top &&
+                      bkey_cmp(k->k.p, bkey_next(k)->k.p) >= 0);
+}
+#endif
diff --git a/libbcachefs/keylist.h b/libbcachefs/keylist.h
new file mode 100644 (file)
index 0000000..195799b
--- /dev/null
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_KEYLIST_H
+#define _BCACHEFS_KEYLIST_H
+
+#include "keylist_types.h"
+
+int bch2_keylist_realloc(struct keylist *, u64 *, size_t, size_t);
+void bch2_keylist_add_in_order(struct keylist *, struct bkey_i *);
+void bch2_keylist_pop_front(struct keylist *);
+
+static inline void bch2_keylist_init(struct keylist *l, u64 *inline_keys)
+{
+       l->top_p = l->keys_p = inline_keys;
+}
+
+static inline void bch2_keylist_free(struct keylist *l, u64 *inline_keys)
+{
+       if (l->keys_p != inline_keys)
+               kfree(l->keys_p);
+       bch2_keylist_init(l, inline_keys);
+}
+
+static inline void bch2_keylist_push(struct keylist *l)
+{
+       l->top = bkey_next(l->top);
+}
+
+static inline void bch2_keylist_add(struct keylist *l, const struct bkey_i *k)
+{
+       bkey_copy(l->top, k);
+       bch2_keylist_push(l);
+}
+
+static inline bool bch2_keylist_empty(struct keylist *l)
+{
+       return l->top == l->keys;
+}
+
+static inline size_t bch2_keylist_u64s(struct keylist *l)
+{
+       return l->top_p - l->keys_p;
+}
+
+static inline size_t bch2_keylist_bytes(struct keylist *l)
+{
+       return bch2_keylist_u64s(l) * sizeof(u64);
+}
+
+static inline struct bkey_i *bch2_keylist_front(struct keylist *l)
+{
+       return l->keys;
+}
+
+#define for_each_keylist_key(_keylist, _k)                     \
+       for (_k = (_keylist)->keys;                             \
+            _k != (_keylist)->top;                             \
+            _k = bkey_next(_k))
+
+static inline u64 keylist_sectors(struct keylist *keys)
+{
+       struct bkey_i *k;
+       u64 ret = 0;
+
+       for_each_keylist_key(keys, k)
+               ret += k->k.size;
+
+       return ret;
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_verify_keylist_sorted(struct keylist *);
+#else
+static inline void bch2_verify_keylist_sorted(struct keylist *l) {}
+#endif
+
+#endif /* _BCACHEFS_KEYLIST_H */
diff --git a/libbcachefs/keylist_types.h b/libbcachefs/keylist_types.h
new file mode 100644 (file)
index 0000000..4b3ff7d
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_KEYLIST_TYPES_H
+#define _BCACHEFS_KEYLIST_TYPES_H
+
+struct keylist {
+       union {
+               struct bkey_i           *keys;
+               u64                     *keys_p;
+       };
+       union {
+               struct bkey_i           *top;
+               u64                     *top_p;
+       };
+};
+
+#endif /* _BCACHEFS_KEYLIST_TYPES_H */
diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c
new file mode 100644 (file)
index 0000000..96c8690
--- /dev/null
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Code for moving data off a device.
+ */
+
+#include "bcachefs.h"
+#include "bkey_on_stack.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "extents.h"
+#include "io.h"
+#include "journal.h"
+#include "keylist.h"
+#include "migrate.h"
+#include "move.h"
+#include "replicas.h"
+#include "super-io.h"
+
+static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
+                        unsigned dev_idx, int flags, bool metadata)
+{
+       unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
+       unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST;
+       unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED;
+       unsigned nr_good;
+
+       bch2_bkey_drop_device(k, dev_idx);
+
+       nr_good = bch2_bkey_durability(c, k.s_c);
+       if ((!nr_good && !(flags & lost)) ||
+           (nr_good < replicas && !(flags & degraded)))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags,
+                                  enum btree_id btree_id)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_on_stack sk;
+       int ret = 0;
+
+       bkey_on_stack_init(&sk);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+       iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
+                                  BTREE_ITER_PREFETCH);
+
+       while ((k = bch2_btree_iter_peek(iter)).k &&
+              !(ret = bkey_err(k))) {
+               if (!bch2_bkey_has_device(k, dev_idx)) {
+                       bch2_btree_iter_next(iter);
+                       continue;
+               }
+
+               bkey_on_stack_reassemble(&sk, c, k);
+
+               ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k),
+                                   dev_idx, flags, false);
+               if (ret)
+                       break;
+
+               /*
+                * If the new extent no longer has any pointers, bch2_extent_normalize()
+                * will do the appropriate thing with it (turning it into a
+                * KEY_TYPE_error key, or just a discard if it was a cached extent)
+                */
+               bch2_extent_normalize(c, bkey_i_to_s(sk.k));
+
+               bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
+
+               bch2_trans_update(&trans, iter, sk.k, 0);
+
+               ret = bch2_trans_commit(&trans, NULL, NULL,
+                                       BTREE_INSERT_NOFAIL);
+
+               /*
+                * don't want to leave ret == -EINTR, since if we raced and
+                * something else overwrote the key we could spuriously return
+                * -EINTR below:
+                */
+               if (ret == -EINTR)
+                       ret = 0;
+               if (ret)
+                       break;
+       }
+
+       ret = bch2_trans_exit(&trans) ?: ret;
+       bkey_on_stack_exit(&sk, c);
+
+       BUG_ON(ret == -EINTR);
+
+       return ret;
+}
+
+static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
+{
+       return  __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_EXTENTS) ?:
+               __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_REFLINK);
+}
+
+static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct closure cl;
+       struct btree *b;
+       unsigned id;
+       int ret;
+
+       /* don't handle this yet: */
+       if (flags & BCH_FORCE_IF_METADATA_LOST)
+               return -EINVAL;
+
+       bch2_trans_init(&trans, c, 0, 0);
+       closure_init_stack(&cl);
+
+       for (id = 0; id < BTREE_ID_NR; id++) {
+               for_each_btree_node(&trans, iter, id, POS_MIN,
+                                   BTREE_ITER_PREFETCH, b) {
+                       __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
+retry:
+                       if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key),
+                                                 dev_idx))
+                               continue;
+
+                       bkey_copy(&tmp.k, &b->key);
+
+                       ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.k),
+                                           dev_idx, flags, true);
+                       if (ret) {
+                               bch_err(c, "Cannot drop device without losing data");
+                               goto err;
+                       }
+
+                       ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
+                       if (ret == -EINTR) {
+                               b = bch2_btree_iter_peek_node(iter);
+                               goto retry;
+                       }
+                       if (ret) {
+                               bch_err(c, "Error updating btree node key: %i", ret);
+                               goto err;
+                       }
+               }
+               bch2_trans_iter_free(&trans, iter);
+       }
+
+       /* flush relevant btree updates */
+       closure_wait_event(&c->btree_interior_update_wait,
+                          !bch2_btree_interior_updates_nr_pending(c));
+
+       ret = 0;
+err:
+       ret = bch2_trans_exit(&trans) ?: ret;
+
+       BUG_ON(ret == -EINTR);
+
+       return ret;
+}
+
+int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
+{
+       return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
+               bch2_dev_metadata_drop(c, dev_idx, flags);
+}
diff --git a/libbcachefs/migrate.h b/libbcachefs/migrate.h
new file mode 100644 (file)
index 0000000..027efaa
--- /dev/null
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_MIGRATE_H
+#define _BCACHEFS_MIGRATE_H
+
+int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
+
+#endif /* _BCACHEFS_MIGRATE_H */
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
new file mode 100644 (file)
index 0000000..62dcac7
--- /dev/null
@@ -0,0 +1,826 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_on_stack.h"
+#include "btree_gc.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "disk_groups.h"
+#include "inode.h"
+#include "io.h"
+#include "journal_reclaim.h"
+#include "move.h"
+#include "replicas.h"
+#include "super-io.h"
+#include "keylist.h"
+
+#include <linux/ioprio.h>
+#include <linux/kthread.h>
+
+#include <trace/events/bcachefs.h>
+
+#define SECTORS_IN_FLIGHT_PER_DEVICE   2048
+
+struct moving_io {
+       struct list_head        list;
+       struct closure          cl;
+       bool                    read_completed;
+
+       unsigned                read_sectors;
+       unsigned                write_sectors;
+
+       struct bch_read_bio     rbio;
+
+       struct migrate_write    write;
+       /* Must be last since it is variable size */
+       struct bio_vec          bi_inline_vecs[0];
+};
+
+struct moving_context {
+       /* Closure for waiting on all reads and writes to complete */
+       struct closure          cl;
+
+       struct bch_move_stats   *stats;
+
+       struct list_head        reads;
+
+       /* in flight sectors: */
+       atomic_t                read_sectors;
+       atomic_t                write_sectors;
+
+       wait_queue_head_t       wait;
+};
+
+static int bch2_migrate_index_update(struct bch_write_op *op)
+{
+       struct bch_fs *c = op->c;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct migrate_write *m =
+               container_of(op, struct migrate_write, op);
+       struct keylist *keys = &op->insert_keys;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+
+       iter = bch2_trans_get_iter(&trans, m->btree_id,
+                                  bkey_start_pos(&bch2_keylist_front(keys)->k),
+                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+       while (1) {
+               struct bkey_s_c k;
+               struct bkey_i *insert;
+               struct bkey_i_extent *new;
+               BKEY_PADDED(k) _new, _insert;
+               const union bch_extent_entry *entry;
+               struct extent_ptr_decoded p;
+               bool did_work = false;
+               int nr;
+
+               bch2_trans_reset(&trans, 0);
+
+               k = bch2_btree_iter_peek_slot(iter);
+               ret = bkey_err(k);
+               if (ret) {
+                       if (ret == -EINTR)
+                               continue;
+                       break;
+               }
+
+               new = bkey_i_to_extent(bch2_keylist_front(keys));
+
+               if (bversion_cmp(k.k->version, new->k.version) ||
+                   !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
+                       goto nomatch;
+
+               bkey_reassemble(&_insert.k, k);
+               insert = &_insert.k;
+
+               bkey_copy(&_new.k, bch2_keylist_front(keys));
+               new = bkey_i_to_extent(&_new.k);
+               bch2_cut_front(iter->pos, &new->k_i);
+
+               bch2_cut_front(iter->pos,       insert);
+               bch2_cut_back(new->k.p,         insert);
+               bch2_cut_back(insert->k.p,      &new->k_i);
+
+               if (m->data_cmd == DATA_REWRITE) {
+                       struct bch_extent_ptr *new_ptr, *old_ptr = (void *)
+                               bch2_bkey_has_device(bkey_i_to_s_c(insert),
+                                                    m->data_opts.rewrite_dev);
+                       if (!old_ptr)
+                               goto nomatch;
+
+                       if (old_ptr->cached)
+                               extent_for_each_ptr(extent_i_to_s(new), new_ptr)
+                                       new_ptr->cached = true;
+
+                       bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr);
+               }
+
+               extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
+                       if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
+                               /*
+                                * raced with another move op? extent already
+                                * has a pointer to the device we just wrote
+                                * data to
+                                */
+                               continue;
+                       }
+
+                       bch2_extent_ptr_decoded_append(insert, &p);
+                       did_work = true;
+               }
+
+               if (!did_work)
+                       goto nomatch;
+
+               bch2_bkey_narrow_crcs(insert,
+                               (struct bch_extent_crc_unpacked) { 0 });
+               bch2_extent_normalize(c, bkey_i_to_s(insert));
+               bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert),
+                                              op->opts.background_target,
+                                              op->opts.data_replicas);
+
+               /*
+                * If we're not fully overwriting @k, and it's compressed, we
+                * need a reservation for all the pointers in @insert
+                */
+               nr = bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(insert)) -
+                        m->nr_ptrs_reserved;
+
+               if (insert->k.size < k.k->size &&
+                   bch2_bkey_sectors_compressed(k) &&
+                   nr > 0) {
+                       ret = bch2_disk_reservation_add(c, &op->res,
+                                       keylist_sectors(keys) * nr, 0);
+                       if (ret)
+                               goto out;
+
+                       m->nr_ptrs_reserved += nr;
+                       goto next;
+               }
+
+               bch2_trans_update(&trans, iter, insert, 0);
+
+               ret = bch2_trans_commit(&trans, &op->res,
+                               op_journal_seq(op),
+                               BTREE_INSERT_NOFAIL|
+                               BTREE_INSERT_USE_RESERVE|
+                               m->data_opts.btree_insert_flags);
+               if (!ret)
+                       atomic_long_inc(&c->extent_migrate_done);
+               if (ret == -EINTR)
+                       ret = 0;
+               if (ret)
+                       break;
+next:
+               while (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) >= 0) {
+                       bch2_keylist_pop_front(keys);
+                       if (bch2_keylist_empty(keys))
+                               goto out;
+               }
+               continue;
+nomatch:
+               if (m->ctxt) {
+                       BUG_ON(k.k->p.offset <= iter->pos.offset);
+                       atomic64_inc(&m->ctxt->stats->keys_raced);
+                       atomic64_add(k.k->p.offset - iter->pos.offset,
+                                    &m->ctxt->stats->sectors_raced);
+               }
+               atomic_long_inc(&c->extent_migrate_raced);
+               trace_move_race(&new->k);
+               bch2_btree_iter_next_slot(iter);
+               goto next;
+       }
+out:
+       bch2_trans_exit(&trans);
+       BUG_ON(ret == -EINTR);
+       return ret;
+}
+
+void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio)
+{
+       /* write bio must own pages: */
+       BUG_ON(!m->op.wbio.bio.bi_vcnt);
+
+       m->ptr          = rbio->pick.ptr;
+       m->offset       = rbio->pos.offset - rbio->pick.crc.offset;
+       m->op.devs_have = rbio->devs_have;
+       m->op.pos       = rbio->pos;
+       m->op.version   = rbio->version;
+       m->op.crc       = rbio->pick.crc;
+       m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
+
+       if (bch2_csum_type_is_encryption(m->op.crc.csum_type)) {
+               m->op.nonce     = m->op.crc.nonce + m->op.crc.offset;
+               m->op.csum_type = m->op.crc.csum_type;
+       }
+
+       if (m->data_cmd == DATA_REWRITE)
+               bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev);
+}
+
+int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
+                           struct write_point_specifier wp,
+                           struct bch_io_opts io_opts,
+                           enum data_cmd data_cmd,
+                           struct data_opts data_opts,
+                           enum btree_id btree_id,
+                           struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       int ret;
+
+       m->btree_id     = btree_id;
+       m->data_cmd     = data_cmd;
+       m->data_opts    = data_opts;
+       m->nr_ptrs_reserved = 0;
+
+       bch2_write_op_init(&m->op, c, io_opts);
+
+       if (!bch2_bkey_is_incompressible(k))
+               m->op.compression_type =
+                       bch2_compression_opt_to_type[io_opts.background_compression ?:
+                                                    io_opts.compression];
+       else
+               m->op.incompressible = true;
+
+       m->op.target    = data_opts.target,
+       m->op.write_point = wp;
+
+       if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) {
+               m->op.alloc_reserve = RESERVE_MOVINGGC;
+               m->op.flags |= BCH_WRITE_ALLOC_NOWAIT;
+       } else {
+               /* XXX: this should probably be passed in */
+               m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS;
+       }
+
+       m->op.flags |= BCH_WRITE_PAGES_STABLE|
+               BCH_WRITE_PAGES_OWNED|
+               BCH_WRITE_DATA_ENCODED|
+               BCH_WRITE_FROM_INTERNAL;
+
+       m->op.nr_replicas       = 1;
+       m->op.nr_replicas_required = 1;
+       m->op.index_update_fn   = bch2_migrate_index_update;
+
+       switch (data_cmd) {
+       case DATA_ADD_REPLICAS: {
+               /*
+                * DATA_ADD_REPLICAS is used for moving data to a different
+                * device in the background, and due to compression the new copy
+                * might take up more space than the old copy:
+                */
+#if 0
+               int nr = (int) io_opts.data_replicas -
+                       bch2_bkey_nr_ptrs_allocated(k);
+#endif
+               int nr = (int) io_opts.data_replicas;
+
+               if (nr > 0) {
+                       m->op.nr_replicas = m->nr_ptrs_reserved = nr;
+
+                       ret = bch2_disk_reservation_get(c, &m->op.res,
+                                       k.k->size, m->op.nr_replicas, 0);
+                       if (ret)
+                               return ret;
+               }
+               break;
+       }
+       case DATA_REWRITE: {
+               unsigned compressed_sectors = 0;
+
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+                       if (p.ptr.dev == data_opts.rewrite_dev &&
+                           !p.ptr.cached &&
+                           crc_is_compressed(p.crc))
+                               compressed_sectors += p.crc.compressed_size;
+
+               if (compressed_sectors) {
+                       ret = bch2_disk_reservation_add(c, &m->op.res,
+                                       k.k->size * m->op.nr_replicas,
+                                       BCH_DISK_RESERVATION_NOFAIL);
+                       if (ret)
+                               return ret;
+               }
+               break;
+       }
+       case DATA_PROMOTE:
+               m->op.flags     |= BCH_WRITE_ALLOC_NOWAIT;
+               m->op.flags     |= BCH_WRITE_CACHED;
+               break;
+       default:
+               BUG();
+       }
+
+       return 0;
+}
+
+static void move_free(struct closure *cl)
+{
+       struct moving_io *io = container_of(cl, struct moving_io, cl);
+       struct moving_context *ctxt = io->write.ctxt;
+       struct bvec_iter_all iter;
+       struct bio_vec *bv;
+
+       bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);
+
+       bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter)
+               if (bv->bv_page)
+                       __free_page(bv->bv_page);
+
+       wake_up(&ctxt->wait);
+
+       kfree(io);
+}
+
+static void move_write_done(struct closure *cl)
+{
+       struct moving_io *io = container_of(cl, struct moving_io, cl);
+
+       atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors);
+       closure_return_with_destructor(cl, move_free);
+}
+
+static void move_write(struct closure *cl)
+{
+       struct moving_io *io = container_of(cl, struct moving_io, cl);
+
+       if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) {
+               closure_return_with_destructor(cl, move_free);
+               return;
+       }
+
+       bch2_migrate_read_done(&io->write, &io->rbio);
+
+       atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
+       closure_call(&io->write.op.cl, bch2_write, NULL, cl);
+       continue_at(cl, move_write_done, NULL);
+}
+
+static inline struct moving_io *next_pending_write(struct moving_context *ctxt)
+{
+       struct moving_io *io =
+               list_first_entry_or_null(&ctxt->reads, struct moving_io, list);
+
+       return io && io->read_completed ? io : NULL;
+}
+
+static void move_read_endio(struct bio *bio)
+{
+       struct moving_io *io = container_of(bio, struct moving_io, rbio.bio);
+       struct moving_context *ctxt = io->write.ctxt;
+
+       atomic_sub(io->read_sectors, &ctxt->read_sectors);
+       io->read_completed = true;
+
+       if (next_pending_write(ctxt))
+               wake_up(&ctxt->wait);
+
+       closure_put(&ctxt->cl);
+}
+
+static void do_pending_writes(struct moving_context *ctxt)
+{
+       struct moving_io *io;
+
+       while ((io = next_pending_write(ctxt))) {
+               list_del(&io->list);
+               closure_call(&io->cl, move_write, NULL, &ctxt->cl);
+       }
+}
+
+#define move_ctxt_wait_event(_ctxt, _cond)                     \
+do {                                                           \
+       do_pending_writes(_ctxt);                               \
+                                                               \
+       if (_cond)                                              \
+               break;                                          \
+       __wait_event((_ctxt)->wait,                             \
+                    next_pending_write(_ctxt) || (_cond));     \
+} while (1)
+
+static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
+{
+       unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
+
+       move_ctxt_wait_event(ctxt,
+               !atomic_read(&ctxt->write_sectors) ||
+               atomic_read(&ctxt->write_sectors) != sectors_pending);
+}
+
+static int bch2_move_extent(struct btree_trans *trans,
+                           struct moving_context *ctxt,
+                           struct write_point_specifier wp,
+                           struct bch_io_opts io_opts,
+                           enum btree_id btree_id,
+                           struct bkey_s_c k,
+                           enum data_cmd data_cmd,
+                           struct data_opts data_opts)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       struct moving_io *io;
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       unsigned sectors = k.k->size, pages;
+       int ret = -ENOMEM;
+
+       move_ctxt_wait_event(ctxt,
+               atomic_read(&ctxt->write_sectors) <
+               SECTORS_IN_FLIGHT_PER_DEVICE);
+
+       move_ctxt_wait_event(ctxt,
+               atomic_read(&ctxt->read_sectors) <
+               SECTORS_IN_FLIGHT_PER_DEVICE);
+
+       /* write path might have to decompress data: */
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+               sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
+
+       pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
+       io = kzalloc(sizeof(struct moving_io) +
+                    sizeof(struct bio_vec) * pages, GFP_KERNEL);
+       if (!io)
+               goto err;
+
+       io->write.ctxt          = ctxt;
+       io->read_sectors        = k.k->size;
+       io->write_sectors       = k.k->size;
+
+       bio_init(&io->write.op.wbio.bio, io->bi_inline_vecs, pages);
+       bio_set_prio(&io->write.op.wbio.bio,
+                    IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
+
+       if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9,
+                                GFP_KERNEL))
+               goto err_free;
+
+       io->rbio.c              = c;
+       io->rbio.opts           = io_opts;
+       bio_init(&io->rbio.bio, io->bi_inline_vecs, pages);
+       io->rbio.bio.bi_vcnt = pages;
+       bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
+       io->rbio.bio.bi_iter.bi_size = sectors << 9;
+
+       bio_set_op_attrs(&io->rbio.bio, REQ_OP_READ, 0);
+       io->rbio.bio.bi_iter.bi_sector  = bkey_start_offset(k.k);
+       io->rbio.bio.bi_end_io          = move_read_endio;
+
+       ret = bch2_migrate_write_init(c, &io->write, wp, io_opts,
+                                     data_cmd, data_opts, btree_id, k);
+       if (ret)
+               goto err_free_pages;
+
+       atomic64_inc(&ctxt->stats->keys_moved);
+       atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
+
+       trace_move_extent(k.k);
+
+       atomic_add(io->read_sectors, &ctxt->read_sectors);
+       list_add_tail(&io->list, &ctxt->reads);
+
+       /*
+        * dropped by move_read_endio() - guards against use after free of
+        * ctxt when doing wakeup
+        */
+       closure_get(&ctxt->cl);
+       bch2_read_extent(trans, &io->rbio, k, 0,
+                        BCH_READ_NODECODE|
+                        BCH_READ_LAST_FRAGMENT);
+       return 0;
+err_free_pages:
+       bio_free_pages(&io->write.op.wbio.bio);
+err_free:
+       kfree(io);
+err:
+       trace_move_alloc_fail(k.k);
+       return ret;
+}
+
+static int __bch2_move_data(struct bch_fs *c,
+               struct moving_context *ctxt,
+               struct bch_ratelimit *rate,
+               struct write_point_specifier wp,
+               struct bpos start,
+               struct bpos end,
+               move_pred_fn pred, void *arg,
+               struct bch_move_stats *stats,
+               enum btree_id btree_id)
+{
+       bool kthread = (current->flags & PF_KTHREAD) != 0;
+       struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
+       struct bkey_on_stack sk;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct data_opts data_opts;
+       enum data_cmd data_cmd;
+       u64 delay, cur_inum = U64_MAX;
+       int ret = 0, ret2;
+
+       bkey_on_stack_init(&sk);
+       bch2_trans_init(&trans, c, 0, 0);
+
+       stats->data_type = BCH_DATA_user;
+       stats->btree_id = btree_id;
+       stats->pos      = POS_MIN;
+
+       iter = bch2_trans_get_iter(&trans, btree_id, start,
+                                  BTREE_ITER_PREFETCH);
+
+       if (rate)
+               bch2_ratelimit_reset(rate);
+
+       while (1) {
+               do {
+                       delay = rate ? bch2_ratelimit_delay(rate) : 0;
+
+                       if (delay) {
+                               bch2_trans_unlock(&trans);
+                               set_current_state(TASK_INTERRUPTIBLE);
+                       }
+
+                       if (kthread && (ret = kthread_should_stop())) {
+                               __set_current_state(TASK_RUNNING);
+                               goto out;
+                       }
+
+                       if (delay)
+                               schedule_timeout(delay);
+
+                       if (unlikely(freezing(current))) {
+                               bch2_trans_unlock(&trans);
+                               move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
+                               try_to_freeze();
+                       }
+               } while (delay);
+peek:
+               k = bch2_btree_iter_peek(iter);
+
+               stats->pos = iter->pos;
+
+               if (!k.k)
+                       break;
+               ret = bkey_err(k);
+               if (ret)
+                       break;
+               if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
+                       break;
+
+               if (!bkey_extent_is_direct_data(k.k))
+                       goto next_nondata;
+
+               if (btree_id == BTREE_ID_EXTENTS &&
+                   cur_inum != k.k->p.inode) {
+                       struct bch_inode_unpacked inode;
+
+                       /* don't hold btree locks while looking up inode: */
+                       bch2_trans_unlock(&trans);
+
+                       io_opts = bch2_opts_to_inode_opts(c->opts);
+                       if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode))
+                               bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode));
+                       cur_inum = k.k->p.inode;
+                       goto peek;
+               }
+
+               switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {
+               case DATA_SKIP:
+                       goto next;
+               case DATA_SCRUB:
+                       BUG();
+               case DATA_ADD_REPLICAS:
+               case DATA_REWRITE:
+               case DATA_PROMOTE:
+                       break;
+               default:
+                       BUG();
+               }
+
+               /* unlock before doing IO: */
+               bkey_on_stack_reassemble(&sk, c, k);
+               k = bkey_i_to_s_c(sk.k);
+               bch2_trans_unlock(&trans);
+
+               ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
+                                       data_cmd, data_opts);
+               if (ret2) {
+                       if (ret2 == -ENOMEM) {
+                               /* memory allocation failure, wait for some IO to finish */
+                               bch2_move_ctxt_wait_for_io(ctxt);
+                               continue;
+                       }
+
+                       /* XXX signal failure */
+                       goto next;
+               }
+
+               if (rate)
+                       bch2_ratelimit_increment(rate, k.k->size);
+next:
+               atomic64_add(k.k->size * bch2_bkey_nr_ptrs_allocated(k),
+                            &stats->sectors_seen);
+next_nondata:
+               bch2_btree_iter_next(iter);
+               bch2_trans_cond_resched(&trans);
+       }
+out:
+       ret = bch2_trans_exit(&trans) ?: ret;
+       bkey_on_stack_exit(&sk, c);
+
+       return ret;
+}
+
+int bch2_move_data(struct bch_fs *c,
+                  struct bch_ratelimit *rate,
+                  struct write_point_specifier wp,
+                  struct bpos start,
+                  struct bpos end,
+                  move_pred_fn pred, void *arg,
+                  struct bch_move_stats *stats)
+{
+       struct moving_context ctxt = { .stats = stats };
+       int ret;
+
+       closure_init_stack(&ctxt.cl);
+       INIT_LIST_HEAD(&ctxt.reads);
+       init_waitqueue_head(&ctxt.wait);
+
+       stats->data_type = BCH_DATA_user;
+
+       ret =   __bch2_move_data(c, &ctxt, rate, wp, start, end,
+                                pred, arg, stats, BTREE_ID_EXTENTS) ?:
+               __bch2_move_data(c, &ctxt, rate, wp, start, end,
+                                pred, arg, stats, BTREE_ID_REFLINK);
+
+       move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
+       closure_sync(&ctxt.cl);
+
+       EBUG_ON(atomic_read(&ctxt.write_sectors));
+
+       trace_move_data(c,
+                       atomic64_read(&stats->sectors_moved),
+                       atomic64_read(&stats->keys_moved));
+
+       return ret;
+}
+
+static int bch2_move_btree(struct bch_fs *c,
+                          move_pred_fn pred,
+                          void *arg,
+                          struct bch_move_stats *stats)
+{
+       struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct btree *b;
+       unsigned id;
+       struct data_opts data_opts;
+       enum data_cmd cmd;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       stats->data_type = BCH_DATA_btree;
+
+       for (id = 0; id < BTREE_ID_NR; id++) {
+               stats->btree_id = id;
+
+               for_each_btree_node(&trans, iter, id, POS_MIN,
+                                   BTREE_ITER_PREFETCH, b) {
+                       stats->pos = iter->pos;
+
+                       switch ((cmd = pred(c, arg,
+                                           bkey_i_to_s_c(&b->key),
+                                           &io_opts, &data_opts))) {
+                       case DATA_SKIP:
+                               goto next;
+                       case DATA_SCRUB:
+                               BUG();
+                       case DATA_ADD_REPLICAS:
+                       case DATA_REWRITE:
+                               break;
+                       default:
+                               BUG();
+                       }
+
+                       ret = bch2_btree_node_rewrite(c, iter,
+                                       b->data->keys.seq, 0) ?: ret;
+next:
+                       bch2_trans_cond_resched(&trans);
+               }
+
+               ret = bch2_trans_iter_free(&trans, iter) ?: ret;
+       }
+
+       bch2_trans_exit(&trans);
+
+       return ret;
+}
+
+#if 0
+static enum data_cmd scrub_pred(struct bch_fs *c, void *arg,
+                               struct bkey_s_c k,
+                               struct bch_io_opts *io_opts,
+                               struct data_opts *data_opts)
+{
+       return DATA_SCRUB;
+}
+#endif
+
+static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg,
+                                     struct bkey_s_c k,
+                                     struct bch_io_opts *io_opts,
+                                     struct data_opts *data_opts)
+{
+       unsigned nr_good = bch2_bkey_durability(c, k);
+       unsigned replicas = 0;
+
+       switch (k.k->type) {
+       case KEY_TYPE_btree_ptr:
+               replicas = c->opts.metadata_replicas;
+               break;
+       case KEY_TYPE_extent:
+               replicas = io_opts->data_replicas;
+               break;
+       }
+
+       if (!nr_good || nr_good >= replicas)
+               return DATA_SKIP;
+
+       data_opts->target               = 0;
+       data_opts->btree_insert_flags   = 0;
+       return DATA_ADD_REPLICAS;
+}
+
+static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
+                                 struct bkey_s_c k,
+                                 struct bch_io_opts *io_opts,
+                                 struct data_opts *data_opts)
+{
+       struct bch_ioctl_data *op = arg;
+
+       if (!bch2_bkey_has_device(k, op->migrate.dev))
+               return DATA_SKIP;
+
+       data_opts->target               = 0;
+       data_opts->btree_insert_flags   = 0;
+       data_opts->rewrite_dev          = op->migrate.dev;
+       return DATA_REWRITE;
+}
+
+int bch2_data_job(struct bch_fs *c,
+                 struct bch_move_stats *stats,
+                 struct bch_ioctl_data op)
+{
+       int ret = 0;
+
+       switch (op.op) {
+       case BCH_DATA_OP_REREPLICATE:
+               stats->data_type = BCH_DATA_journal;
+               ret = bch2_journal_flush_device_pins(&c->journal, -1);
+
+               ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
+
+               closure_wait_event(&c->btree_interior_update_wait,
+                                  !bch2_btree_interior_updates_nr_pending(c));
+
+               ret = bch2_replicas_gc2(c) ?: ret;
+
+               ret = bch2_move_data(c, NULL,
+                                    writepoint_hashed((unsigned long) current),
+                                    op.start,
+                                    op.end,
+                                    rereplicate_pred, c, stats) ?: ret;
+               ret = bch2_replicas_gc2(c) ?: ret;
+               break;
+       case BCH_DATA_OP_MIGRATE:
+               if (op.migrate.dev >= c->sb.nr_devices)
+                       return -EINVAL;
+
+               stats->data_type = BCH_DATA_journal;
+               ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
+
+               ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
+               ret = bch2_replicas_gc2(c) ?: ret;
+
+               ret = bch2_move_data(c, NULL,
+                                    writepoint_hashed((unsigned long) current),
+                                    op.start,
+                                    op.end,
+                                    migrate_pred, &op, stats) ?: ret;
+               ret = bch2_replicas_gc2(c) ?: ret;
+               break;
+       default:
+               ret = -EINVAL;
+       }
+
+       return ret;
+}
diff --git a/libbcachefs/move.h b/libbcachefs/move.h
new file mode 100644 (file)
index 0000000..0acd172
--- /dev/null
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_MOVE_H
+#define _BCACHEFS_MOVE_H
+
+#include "btree_iter.h"
+#include "buckets.h"
+#include "io_types.h"
+#include "move_types.h"
+
+struct bch_read_bio;
+struct moving_context;
+
+enum data_cmd {
+       DATA_SKIP,
+       DATA_SCRUB,
+       DATA_ADD_REPLICAS,
+       DATA_REWRITE,
+       DATA_PROMOTE,
+};
+
+struct data_opts {
+       u16             target;
+       unsigned        rewrite_dev;
+       int             btree_insert_flags;
+};
+
+struct migrate_write {
+       enum btree_id           btree_id;
+       enum data_cmd           data_cmd;
+       struct data_opts        data_opts;
+
+       unsigned                nr_ptrs_reserved;
+
+       struct moving_context   *ctxt;
+
+       /* what we read: */
+       struct bch_extent_ptr   ptr;
+       u64                     offset;
+
+       struct bch_write_op     op;
+};
+
+void bch2_migrate_read_done(struct migrate_write *, struct bch_read_bio *);
+int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
+                           struct write_point_specifier,
+                           struct bch_io_opts,
+                           enum data_cmd, struct data_opts,
+                           enum btree_id, struct bkey_s_c);
+
+typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
+                               struct bkey_s_c,
+                               struct bch_io_opts *, struct data_opts *);
+
+int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
+                  struct write_point_specifier,
+                  struct bpos, struct bpos,
+                  move_pred_fn, void *,
+                  struct bch_move_stats *);
+
+int bch2_data_job(struct bch_fs *,
+                 struct bch_move_stats *,
+                 struct bch_ioctl_data);
+
+#endif /* _BCACHEFS_MOVE_H */
diff --git a/libbcachefs/move_types.h b/libbcachefs/move_types.h
new file mode 100644 (file)
index 0000000..fc0de16
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_MOVE_TYPES_H
+#define _BCACHEFS_MOVE_TYPES_H
+
+struct bch_move_stats {
+       enum bch_data_type      data_type;
+       enum btree_id           btree_id;
+       struct bpos             pos;
+
+       atomic64_t              keys_moved;
+       atomic64_t              keys_raced;
+       atomic64_t              sectors_moved;
+       atomic64_t              sectors_seen;
+       atomic64_t              sectors_raced;
+};
+
+#endif /* _BCACHEFS_MOVE_TYPES_H */
diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c
new file mode 100644 (file)
index 0000000..de0a797
--- /dev/null
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Moving/copying garbage collector
+ *
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "btree_iter.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "clock.h"
+#include "disk_groups.h"
+#include "error.h"
+#include "extents.h"
+#include "eytzinger.h"
+#include "io.h"
+#include "keylist.h"
+#include "move.h"
+#include "movinggc.h"
+#include "super-io.h"
+
+#include <trace/events/bcachefs.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/math64.h>
+#include <linux/sched/task.h>
+#include <linux/sort.h>
+#include <linux/wait.h>
+
+/*
+ * We can't use the entire copygc reserve in one iteration of copygc: we may
+ * need the buckets we're freeing up to go back into the copygc reserve to make
+ * forward progress, but if the copygc reserve is full they'll be available for
+ * any allocation - and it's possible that in a given iteration, we free up most
+ * of the buckets we're going to free before we allocate most of the buckets
+ * we're going to allocate.
+ *
+ * If we only use half of the reserve per iteration, then in steady state we'll
+ * always have room in the reserve for the buckets we're going to need in the
+ * next iteration:
+ */
+#define COPYGC_BUCKETS_PER_ITER(ca)                                    \
+       ((ca)->free[RESERVE_MOVINGGC].size / 2)
+
+static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
+{
+       const struct copygc_heap_entry *l = _l;
+       const struct copygc_heap_entry *r = _r;
+
+       return  cmp_int(l->dev,    r->dev) ?:
+               cmp_int(l->offset, r->offset);
+}
+
+static int __copygc_pred(struct bch_fs *c, struct bkey_s_c k)
+{
+       copygc_heap *h = &c->copygc_heap;
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
+
+       bkey_for_each_ptr(ptrs, ptr) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+               struct copygc_heap_entry search = {
+                       .dev = ptr->dev,
+                       .offset = ptr->offset
+               };
+
+               ssize_t i = eytzinger0_find_le(h->data, h->used,
+                                              sizeof(h->data[0]),
+                                              bucket_offset_cmp, &search);
+#if 0
+               /* eytzinger search verify code: */
+               ssize_t j = -1, k;
+
+               for (k = 0; k < h->used; k++)
+                       if (h->data[k].offset <= ptr->offset &&
+                           (j < 0 || h->data[k].offset > h->data[j].offset))
+                               j = k;
+
+               BUG_ON(i != j);
+#endif
+               if (i >= 0 &&
+                   ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
+                   ptr->gen == h->data[i].gen)
+                       return ptr->dev;
+       }
+
+       return -1;
+}
+
+static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
+                                struct bkey_s_c k,
+                                struct bch_io_opts *io_opts,
+                                struct data_opts *data_opts)
+{
+       int dev_idx = __copygc_pred(c, k);
+       if (dev_idx < 0)
+               return DATA_SKIP;
+
+       data_opts->target               = io_opts->background_target;
+       data_opts->btree_insert_flags   = BTREE_INSERT_USE_RESERVE;
+       data_opts->rewrite_dev          = dev_idx;
+       return DATA_REWRITE;
+}
+
+static bool have_copygc_reserve(struct bch_dev *ca)
+{
+       bool ret;
+
+       spin_lock(&ca->fs->freelist_lock);
+       ret = fifo_full(&ca->free[RESERVE_MOVINGGC]) ||
+               ca->allocator_state != ALLOCATOR_RUNNING;
+       spin_unlock(&ca->fs->freelist_lock);
+
+       return ret;
+}
+
+static inline int fragmentation_cmp(copygc_heap *heap,
+                                  struct copygc_heap_entry l,
+                                  struct copygc_heap_entry r)
+{
+       return cmp_int(l.fragmentation, r.fragmentation);
+}
+
+static int bch2_copygc(struct bch_fs *c)
+{
+       copygc_heap *h = &c->copygc_heap;
+       struct copygc_heap_entry e, *i;
+       struct bucket_array *buckets;
+       struct bch_move_stats move_stats;
+       u64 sectors_to_move = 0, sectors_not_moved = 0;
+       u64 sectors_reserved = 0;
+       u64 buckets_to_move, buckets_not_moved = 0;
+       struct bch_dev *ca;
+       unsigned dev_idx;
+       size_t b, heap_size = 0;
+       int ret;
+
+       memset(&move_stats, 0, sizeof(move_stats));
+       /*
+        * Find buckets with lowest sector counts, skipping completely
+        * empty buckets, by building a maxheap sorted by sector count,
+        * and repeatedly replacing the maximum element until all
+        * buckets have been visited.
+        */
+       h->used = 0;
+
+       for_each_rw_member(ca, c, dev_idx)
+               heap_size += ca->mi.nbuckets >> 7;
+
+       if (h->size < heap_size) {
+               free_heap(&c->copygc_heap);
+               if (!init_heap(&c->copygc_heap, heap_size, GFP_KERNEL)) {
+                       bch_err(c, "error allocating copygc heap");
+                       return 0;
+               }
+       }
+
+       for_each_rw_member(ca, c, dev_idx) {
+               closure_wait_event(&c->freelist_wait, have_copygc_reserve(ca));
+
+               spin_lock(&ca->fs->freelist_lock);
+               sectors_reserved += fifo_used(&ca->free[RESERVE_MOVINGGC]) * ca->mi.bucket_size;
+               spin_unlock(&ca->fs->freelist_lock);
+
+               down_read(&ca->bucket_lock);
+               buckets = bucket_array(ca);
+
+               for (b = buckets->first_bucket; b < buckets->nbuckets; b++) {
+                       struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
+                       struct copygc_heap_entry e;
+
+                       if (m.owned_by_allocator ||
+                           m.data_type != BCH_DATA_user ||
+                           !bucket_sectors_used(m) ||
+                           bucket_sectors_used(m) >= ca->mi.bucket_size)
+                               continue;
+
+                       e = (struct copygc_heap_entry) {
+                               .dev            = dev_idx,
+                               .gen            = m.gen,
+                               .fragmentation  = bucket_sectors_used(m) * (1U << 15)
+                                       / ca->mi.bucket_size,
+                               .sectors        = bucket_sectors_used(m),
+                               .offset         = bucket_to_sector(ca, b),
+                       };
+                       heap_add_or_replace(h, e, -fragmentation_cmp, NULL);
+               }
+               up_read(&ca->bucket_lock);
+       }
+
+       if (!sectors_reserved) {
+               bch2_fs_fatal_error(c, "stuck, ran out of copygc reserve!");
+               return -1;
+       }
+
+       for (i = h->data; i < h->data + h->used; i++)
+               sectors_to_move += i->sectors;
+
+       while (sectors_to_move > sectors_reserved) {
+               BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL));
+               sectors_to_move -= e.sectors;
+       }
+
+       buckets_to_move = h->used;
+
+       if (!buckets_to_move)
+               return 0;
+
+       eytzinger0_sort(h->data, h->used,
+                       sizeof(h->data[0]),
+                       bucket_offset_cmp, NULL);
+
+       ret = bch2_move_data(c, &c->copygc_pd.rate,
+                            writepoint_ptr(&c->copygc_write_point),
+                            POS_MIN, POS_MAX,
+                            copygc_pred, NULL,
+                            &move_stats);
+
+       for_each_rw_member(ca, c, dev_idx) {
+               down_read(&ca->bucket_lock);
+               buckets = bucket_array(ca);
+               for (i = h->data; i < h->data + h->used; i++) {
+                       struct bucket_mark m;
+                       size_t b;
+
+                       if (i->dev != dev_idx)
+                               continue;
+
+                       b = sector_to_bucket(ca, i->offset);
+                       m = READ_ONCE(buckets->b[b].mark);
+
+                       if (i->gen == m.gen &&
+                           bucket_sectors_used(m)) {
+                               sectors_not_moved += bucket_sectors_used(m);
+                               buckets_not_moved++;
+                       }
+               }
+               up_read(&ca->bucket_lock);
+       }
+
+       if (sectors_not_moved && !ret)
+               bch_warn_ratelimited(c,
+                       "copygc finished but %llu/%llu sectors, %llu/%llu buckets not moved (move stats: moved %llu sectors, raced %llu keys, %llu sectors)",
+                        sectors_not_moved, sectors_to_move,
+                        buckets_not_moved, buckets_to_move,
+                        atomic64_read(&move_stats.sectors_moved),
+                        atomic64_read(&move_stats.keys_raced),
+                        atomic64_read(&move_stats.sectors_raced));
+
+       trace_copygc(c,
+                    atomic64_read(&move_stats.sectors_moved), sectors_not_moved,
+                    buckets_to_move, buckets_not_moved);
+       return 0;
+}
+
+/*
+ * Copygc runs when the amount of fragmented data is above some arbitrary
+ * threshold:
+ *
+ * The threshold at the limit - when the device is full - is the amount of space
+ * we reserved in bch2_recalc_capacity; we can't have more than that amount of
+ * disk space stranded due to fragmentation and store everything we have
+ * promised to store.
+ *
+ * But we don't want to be running copygc unnecessarily when the device still
+ * has plenty of free space - rather, we want copygc to smoothly run every so
+ * often and continually reduce the amount of fragmented space as the device
+ * fills up. So, we increase the threshold by half the current free space.
+ */
+unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       unsigned dev_idx;
+       u64 fragmented_allowed = c->copygc_threshold;
+       u64 fragmented = 0;
+
+       for_each_rw_member(ca, c, dev_idx) {
+               struct bch_dev_usage usage = bch2_dev_usage_read(ca);
+
+               fragmented_allowed += ((__dev_buckets_available(ca, usage) *
+                                       ca->mi.bucket_size) >> 1);
+               fragmented += usage.sectors_fragmented;
+       }
+
+       return max_t(s64, 0, fragmented_allowed - fragmented);
+}
+
+static int bch2_copygc_thread(void *arg)
+{
+       struct bch_fs *c = arg;
+       struct io_clock *clock = &c->io_clock[WRITE];
+       unsigned long last, wait;
+
+       set_freezable();
+
+       while (!kthread_should_stop()) {
+               if (kthread_wait_freezable(c->copy_gc_enabled))
+                       break;
+
+               last = atomic_long_read(&clock->now);
+               wait = bch2_copygc_wait_amount(c);
+
+               if (wait > clock->max_slop) {
+                       bch2_kthread_io_clock_wait(clock, last + wait,
+                                       MAX_SCHEDULE_TIMEOUT);
+                       continue;
+               }
+
+               if (bch2_copygc(c))
+                       break;
+       }
+
+       return 0;
+}
+
+void bch2_copygc_stop(struct bch_fs *c)
+{
+       c->copygc_pd.rate.rate = UINT_MAX;
+       bch2_ratelimit_reset(&c->copygc_pd.rate);
+
+       if (c->copygc_thread) {
+               kthread_stop(c->copygc_thread);
+               put_task_struct(c->copygc_thread);
+       }
+       c->copygc_thread = NULL;
+}
+
+int bch2_copygc_start(struct bch_fs *c)
+{
+       struct task_struct *t;
+
+       if (c->copygc_thread)
+               return 0;
+
+       if (c->opts.nochanges)
+               return 0;
+
+       if (bch2_fs_init_fault("copygc_start"))
+               return -ENOMEM;
+
+       t = kthread_create(bch2_copygc_thread, c, "bch_copygc");
+       if (IS_ERR(t))
+               return PTR_ERR(t);
+
+       get_task_struct(t);
+
+       c->copygc_thread = t;
+       wake_up_process(c->copygc_thread);
+
+       return 0;
+}
+
+void bch2_fs_copygc_init(struct bch_fs *c)
+{
+       bch2_pd_controller_init(&c->copygc_pd);
+       c->copygc_pd.d_term = 0;
+}
diff --git a/libbcachefs/movinggc.h b/libbcachefs/movinggc.h
new file mode 100644 (file)
index 0000000..9227382
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_MOVINGGC_H
+#define _BCACHEFS_MOVINGGC_H
+
+void bch2_copygc_stop(struct bch_fs *);
+int bch2_copygc_start(struct bch_fs *);
+void bch2_fs_copygc_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_MOVINGGC_H */
diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c
new file mode 100644 (file)
index 0000000..afe25cd
--- /dev/null
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+
+#include "bcachefs.h"
+#include "compress.h"
+#include "disk_groups.h"
+#include "opts.h"
+#include "super-io.h"
+#include "util.h"
+
+const char * const bch2_error_actions[] = {
+       "continue",
+       "remount-ro",
+       "panic",
+       NULL
+};
+
+const char * const bch2_sb_features[] = {
+#define x(f, n) #f,
+       BCH_SB_FEATURES()
+#undef x
+       NULL
+};
+
+const char * const bch2_csum_opts[] = {
+       "none",
+       "crc32c",
+       "crc64",
+       NULL
+};
+
+const char * const bch2_compression_opts[] = {
+#define x(t, n) #t,
+       BCH_COMPRESSION_OPTS()
+#undef x
+       NULL
+};
+
+const char * const bch2_str_hash_types[] = {
+       "crc32c",
+       "crc64",
+       "siphash",
+       NULL
+};
+
+const char * const bch2_data_types[] = {
+#define x(t, n) #t,
+       BCH_DATA_TYPES()
+#undef x
+       NULL
+};
+
+const char * const bch2_cache_replacement_policies[] = {
+       "lru",
+       "fifo",
+       "random",
+       NULL
+};
+
+/* Default is -1; we skip past it for struct cached_dev's cache mode */
+const char * const bch2_cache_modes[] = {
+       "default",
+       "writethrough",
+       "writeback",
+       "writearound",
+       "none",
+       NULL
+};
+
+const char * const bch2_dev_state[] = {
+       "readwrite",
+       "readonly",
+       "failed",
+       "spare",
+       NULL
+};
+
+void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src)
+{
+#define x(_name, ...)                                          \
+       if (opt_defined(src, _name))                                    \
+               opt_set(*dst, _name, src._name);
+
+       BCH_OPTS()
+#undef x
+}
+
+bool bch2_opt_defined_by_id(const struct bch_opts *opts, enum bch_opt_id id)
+{
+       switch (id) {
+#define x(_name, ...)                                          \
+       case Opt_##_name:                                               \
+               return opt_defined(*opts, _name);
+       BCH_OPTS()
+#undef x
+       default:
+               BUG();
+       }
+}
+
+u64 bch2_opt_get_by_id(const struct bch_opts *opts, enum bch_opt_id id)
+{
+       switch (id) {
+#define x(_name, ...)                                          \
+       case Opt_##_name:                                               \
+               return opts->_name;
+       BCH_OPTS()
+#undef x
+       default:
+               BUG();
+       }
+}
+
+void bch2_opt_set_by_id(struct bch_opts *opts, enum bch_opt_id id, u64 v)
+{
+       switch (id) {
+#define x(_name, ...)                                          \
+       case Opt_##_name:                                               \
+               opt_set(*opts, _name, v);                               \
+               break;
+       BCH_OPTS()
+#undef x
+       default:
+               BUG();
+       }
+}
+
+/*
+ * Initial options from superblock - here we don't want any options undefined,
+ * any options the superblock doesn't specify are set to 0:
+ */
+struct bch_opts bch2_opts_from_sb(struct bch_sb *sb)
+{
+       struct bch_opts opts = bch2_opts_empty();
+
+#define x(_name, _bits, _mode, _type, _sb_opt, ...)                    \
+       if (_sb_opt != NO_SB_OPT)                                       \
+               opt_set(opts, _name, _sb_opt(sb));
+       BCH_OPTS()
+#undef x
+
+       return opts;
+}
+
+const struct bch_option bch2_opt_table[] = {
+#define OPT_BOOL()             .type = BCH_OPT_BOOL
+#define OPT_UINT(_min, _max)   .type = BCH_OPT_UINT, .min = _min, .max = _max
+#define OPT_SECTORS(_min, _max)        .type = BCH_OPT_SECTORS, .min = _min, .max = _max
+#define OPT_STR(_choices)      .type = BCH_OPT_STR, .choices = _choices
+#define OPT_FN(_fn)            .type = BCH_OPT_FN,                     \
+                               .parse = _fn##_parse,                   \
+                               .to_text = _fn##_to_text
+
+#define x(_name, _bits, _mode, _type, _sb_opt, _default, _hint, _help) \
+       [Opt_##_name] = {                                               \
+               .attr   = {                                             \
+                       .name   = #_name,                               \
+                       .mode = (_mode) & OPT_RUNTIME ? 0644 : 0444,    \
+               },                                                      \
+               .mode   = _mode,                                        \
+               .hint   = _hint,                                        \
+               .help   = _help,                                        \
+               .set_sb = SET_##_sb_opt,                                \
+               _type                                                   \
+       },
+
+       BCH_OPTS()
+#undef x
+};
+
+int bch2_opt_lookup(const char *name)
+{
+       const struct bch_option *i;
+
+       for (i = bch2_opt_table;
+            i < bch2_opt_table + ARRAY_SIZE(bch2_opt_table);
+            i++)
+               if (!strcmp(name, i->attr.name))
+                       return i - bch2_opt_table;
+
+       return -1;
+}
+
+struct synonym {
+       const char      *s1, *s2;
+};
+
+static const struct synonym bch_opt_synonyms[] = {
+       { "quota",      "usrquota" },
+};
+
+static int bch2_mount_opt_lookup(const char *name)
+{
+       const struct synonym *i;
+
+       for (i = bch_opt_synonyms;
+            i < bch_opt_synonyms + ARRAY_SIZE(bch_opt_synonyms);
+            i++)
+               if (!strcmp(name, i->s1))
+                       name = i->s2;
+
+       return bch2_opt_lookup(name);
+}
+
+int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt,
+                  const char *val, u64 *res)
+{
+       ssize_t ret;
+
+       switch (opt->type) {
+       case BCH_OPT_BOOL:
+               ret = kstrtou64(val, 10, res);
+               if (ret < 0)
+                       return ret;
+
+               if (*res > 1)
+                       return -ERANGE;
+               break;
+       case BCH_OPT_UINT:
+               ret = kstrtou64(val, 10, res);
+               if (ret < 0)
+                       return ret;
+
+               if (*res < opt->min || *res >= opt->max)
+                       return -ERANGE;
+               break;
+       case BCH_OPT_SECTORS:
+               ret = bch2_strtou64_h(val, res);
+               if (ret < 0)
+                       return ret;
+
+               if (*res & 511)
+                       return -EINVAL;
+
+               *res >>= 9;
+
+               if (*res < opt->min || *res >= opt->max)
+                       return -ERANGE;
+               break;
+       case BCH_OPT_STR:
+               ret = match_string(opt->choices, -1, val);
+               if (ret < 0)
+                       return ret;
+
+               *res = ret;
+               break;
+       case BCH_OPT_FN:
+               if (!c)
+                       return -EINVAL;
+
+               return opt->parse(c, val, res);
+       }
+
+       return 0;
+}
+
+void bch2_opt_to_text(struct printbuf *out, struct bch_fs *c,
+                     const struct bch_option *opt, u64 v,
+                     unsigned flags)
+{
+       if (flags & OPT_SHOW_MOUNT_STYLE) {
+               if (opt->type == BCH_OPT_BOOL) {
+                       pr_buf(out, "%s%s",
+                              v ? "" : "no",
+                              opt->attr.name);
+                       return;
+               }
+
+               pr_buf(out, "%s=", opt->attr.name);
+       }
+
+       switch (opt->type) {
+       case BCH_OPT_BOOL:
+       case BCH_OPT_UINT:
+               pr_buf(out, "%lli", v);
+               break;
+       case BCH_OPT_SECTORS:
+               bch2_hprint(out, v);
+               break;
+       case BCH_OPT_STR:
+               if (flags & OPT_SHOW_FULL_LIST)
+                       bch2_string_opt_to_text(out, opt->choices, v);
+               else
+                       pr_buf(out, opt->choices[v]);
+               break;
+       case BCH_OPT_FN:
+               opt->to_text(out, c, v);
+               break;
+       default:
+               BUG();
+       }
+}
+
+int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
+{
+       int ret = 0;
+
+       switch (id) {
+       case Opt_compression:
+       case Opt_background_compression:
+               ret = bch2_check_set_has_compressed_data(c, v);
+               break;
+       case Opt_erasure_code:
+               if (v)
+                       bch2_check_set_feature(c, BCH_FEATURE_ec);
+               break;
+       }
+
+       return ret;
+}
+
+int bch2_opts_check_may_set(struct bch_fs *c)
+{
+       unsigned i;
+       int ret;
+
+       for (i = 0; i < bch2_opts_nr; i++) {
+               ret = bch2_opt_check_may_set(c, i,
+                               bch2_opt_get_by_id(&c->opts, i));
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
+{
+       char *opt, *name, *val;
+       int ret, id;
+       u64 v;
+
+       while ((opt = strsep(&options, ",")) != NULL) {
+               name    = strsep(&opt, "=");
+               val     = opt;
+
+               if (val) {
+                       id = bch2_mount_opt_lookup(name);
+                       if (id < 0)
+                               goto bad_opt;
+
+                       ret = bch2_opt_parse(NULL, &bch2_opt_table[id], val, &v);
+                       if (ret < 0)
+                               goto bad_val;
+               } else {
+                       id = bch2_mount_opt_lookup(name);
+                       v = 1;
+
+                       if (id < 0 &&
+                           !strncmp("no", name, 2)) {
+                               id = bch2_mount_opt_lookup(name + 2);
+                               v = 0;
+                       }
+
+                       if (id < 0)
+                               goto bad_opt;
+
+                       if (bch2_opt_table[id].type != BCH_OPT_BOOL)
+                               goto no_val;
+               }
+
+               if (!(bch2_opt_table[id].mode & OPT_MOUNT))
+                       goto bad_opt;
+
+               if (id == Opt_acl &&
+                   !IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL))
+                       goto bad_opt;
+
+               if ((id == Opt_usrquota ||
+                    id == Opt_grpquota) &&
+                   !IS_ENABLED(CONFIG_BCACHEFS_QUOTA))
+                       goto bad_opt;
+
+               bch2_opt_set_by_id(opts, id, v);
+       }
+
+       return 0;
+bad_opt:
+       pr_err("Bad mount option %s", name);
+       return -1;
+bad_val:
+       pr_err("Invalid value %s for mount option %s", val, name);
+       return -1;
+no_val:
+       pr_err("Mount option %s requires a value", name);
+       return -1;
+}
+
+/* io opts: */
+
+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src)
+{
+       struct bch_io_opts ret = { 0 };
+#define x(_name, _bits)                                        \
+       if (opt_defined(src, _name))                                    \
+               opt_set(ret, _name, src._name);
+       BCH_INODE_OPTS()
+#undef x
+       return ret;
+}
+
+struct bch_opts bch2_inode_opts_to_opts(struct bch_io_opts src)
+{
+       struct bch_opts ret = { 0 };
+#define x(_name, _bits)                                        \
+       if (opt_defined(src, _name))                                    \
+               opt_set(ret, _name, src._name);
+       BCH_INODE_OPTS()
+#undef x
+       return ret;
+}
+
+void bch2_io_opts_apply(struct bch_io_opts *dst, struct bch_io_opts src)
+{
+#define x(_name, _bits)                                        \
+       if (opt_defined(src, _name))                                    \
+               opt_set(*dst, _name, src._name);
+       BCH_INODE_OPTS()
+#undef x
+}
+
+bool bch2_opt_is_inode_opt(enum bch_opt_id id)
+{
+       static const enum bch_opt_id inode_opt_list[] = {
+#define x(_name, _bits)        Opt_##_name,
+       BCH_INODE_OPTS()
+#undef x
+       };
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(inode_opt_list); i++)
+               if (inode_opt_list[i] == id)
+                       return true;
+
+       return false;
+}
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
new file mode 100644 (file)
index 0000000..014c608
--- /dev/null
@@ -0,0 +1,440 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_OPTS_H
+#define _BCACHEFS_OPTS_H
+
+#include <linux/bug.h>
+#include <linux/log2.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include "bcachefs_format.h"
+
+extern const char * const bch2_error_actions[];
+extern const char * const bch2_sb_features[];
+extern const char * const bch2_csum_opts[];
+extern const char * const bch2_compression_opts[];
+extern const char * const bch2_str_hash_types[];
+extern const char * const bch2_data_types[];
+extern const char * const bch2_cache_replacement_policies[];
+extern const char * const bch2_cache_modes[];
+extern const char * const bch2_dev_state[];
+
+/*
+ * Mount options; we also store defaults in the superblock.
+ *
+ * Also exposed via sysfs: if an option is writeable, and it's also stored in
+ * the superblock, changing it via sysfs (currently? might change this) also
+ * updates the superblock.
+ *
+ * We store options as signed integers, where -1 means undefined. This means we
+ * can pass the mount options to bch2_fs_alloc() as a whole struct, and then only
+ * apply the options from that struct that are defined.
+ */
+
+/* dummy option, for options that aren't stored in the superblock */
+LE64_BITMASK(NO_SB_OPT,                struct bch_sb, flags[0], 0, 0);
+
+/* When can be set: */
+enum opt_mode {
+       OPT_FORMAT      = (1 << 0),
+       OPT_MOUNT       = (1 << 1),
+       OPT_RUNTIME     = (1 << 2),
+       OPT_INODE       = (1 << 3),
+       OPT_DEVICE      = (1 << 4),
+};
+
+enum opt_type {
+       BCH_OPT_BOOL,
+       BCH_OPT_UINT,
+       BCH_OPT_SECTORS,
+       BCH_OPT_STR,
+       BCH_OPT_FN,
+};
+
+/**
+ * x(name, shortopt, type, in mem type, mode, sb_opt)
+ *
+ * @name       - name of mount option, sysfs attribute, and struct bch_opts
+ *               member
+ *
+ * @mode       - when opt may be set
+ *
+ * @sb_option  - name of corresponding superblock option
+ *
+ * @type       - one of OPT_BOOL, OPT_UINT, OPT_STR
+ */
+
+/*
+ * XXX: add fields for
+ *  - default value
+ *  - helptext
+ */
+
+#ifdef __KERNEL__
+#define RATELIMIT_ERRORS true
+#else
+#define RATELIMIT_ERRORS false
+#endif
+
+#define BCH_OPTS()                                                     \
+       x(block_size,                   u16,                            \
+         OPT_FORMAT,                                                   \
+         OPT_SECTORS(1, 128),                                          \
+         BCH_SB_BLOCK_SIZE,            8,                              \
+         "size",       NULL)                                           \
+       x(btree_node_size,              u16,                            \
+         OPT_FORMAT,                                                   \
+         OPT_SECTORS(1, 512),                                          \
+         BCH_SB_BTREE_NODE_SIZE,       512,                            \
+         "size",       "Btree node size, default 256k")                \
+       x(errors,                       u8,                             \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,                             \
+         OPT_STR(bch2_error_actions),                                  \
+         BCH_SB_ERROR_ACTION,          BCH_ON_ERROR_RO,                \
+         NULL,         "Action to take on filesystem error")           \
+       x(metadata_replicas,            u8,                             \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,                             \
+         OPT_UINT(1, BCH_REPLICAS_MAX),                                \
+         BCH_SB_META_REPLICAS_WANT,    1,                              \
+         "#",          "Number of metadata replicas")                  \
+       x(data_replicas,                u8,                             \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,                   \
+         OPT_UINT(1, BCH_REPLICAS_MAX),                                \
+         BCH_SB_DATA_REPLICAS_WANT,    1,                              \
+         "#",          "Number of data replicas")                      \
+       x(metadata_replicas_required, u8,                               \
+         OPT_FORMAT|OPT_MOUNT,                                         \
+         OPT_UINT(1, BCH_REPLICAS_MAX),                                \
+         BCH_SB_META_REPLICAS_REQ,     1,                              \
+         "#",          NULL)                                           \
+       x(data_replicas_required,       u8,                             \
+         OPT_FORMAT|OPT_MOUNT,                                         \
+         OPT_UINT(1, BCH_REPLICAS_MAX),                                \
+         BCH_SB_DATA_REPLICAS_REQ,     1,                              \
+         "#",          NULL)                                           \
+       x(metadata_checksum,            u8,                             \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,                             \
+         OPT_STR(bch2_csum_opts),                                      \
+         BCH_SB_META_CSUM_TYPE,        BCH_CSUM_OPT_CRC32C,            \
+         NULL,         NULL)                                           \
+       x(data_checksum,                u8,                             \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,                   \
+         OPT_STR(bch2_csum_opts),                                      \
+         BCH_SB_DATA_CSUM_TYPE,        BCH_CSUM_OPT_CRC32C,            \
+         NULL,         NULL)                                           \
+       x(compression,                  u8,                             \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,                   \
+         OPT_STR(bch2_compression_opts),                               \
+         BCH_SB_COMPRESSION_TYPE,      BCH_COMPRESSION_OPT_none,       \
+         NULL,         NULL)                                           \
+       x(background_compression,       u8,                             \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,                   \
+         OPT_STR(bch2_compression_opts),                               \
+         BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_none,  \
+         NULL,         NULL)                                           \
+       x(str_hash,                     u8,                             \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,                             \
+         OPT_STR(bch2_str_hash_types),                                 \
+         BCH_SB_STR_HASH_TYPE,         BCH_STR_HASH_OPT_SIPHASH,       \
+         NULL,         "Hash function for directory entries and xattrs")\
+       x(foreground_target,            u16,                            \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,                   \
+         OPT_FN(bch2_opt_target),                                      \
+         BCH_SB_FOREGROUND_TARGET,     0,                              \
+         "(target)",   "Device or disk group for foreground writes")   \
+       x(background_target,            u16,                            \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,                   \
+         OPT_FN(bch2_opt_target),                                      \
+         BCH_SB_BACKGROUND_TARGET,     0,                              \
+         "(target)",   "Device or disk group to move data to in the background")\
+       x(promote_target,               u16,                            \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,                   \
+         OPT_FN(bch2_opt_target),                                      \
+         BCH_SB_PROMOTE_TARGET,        0,                              \
+         "(target)",   "Device or disk group to promote data to on read")\
+       x(erasure_code,                 u16,                            \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE,                   \
+         OPT_BOOL(),                                                   \
+         BCH_SB_ERASURE_CODE,          false,                          \
+         NULL,         "Enable erasure coding (DO NOT USE YET)")       \
+       x(inodes_32bit,                 u8,                             \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,                             \
+         OPT_BOOL(),                                                   \
+         BCH_SB_INODE_32BIT,           false,                          \
+         NULL,         "Constrain inode numbers to 32 bits")           \
+       x(gc_reserve_percent,           u8,                             \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,                             \
+         OPT_UINT(5, 21),                                              \
+         BCH_SB_GC_RESERVE,            8,                              \
+         "%",          "Percentage of disk space to reserve for copygc")\
+       x(gc_reserve_bytes,             u64,                            \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,                             \
+         OPT_SECTORS(0, U64_MAX),                                      \
+         BCH_SB_GC_RESERVE_BYTES,      0,                              \
+         "%",          "Amount of disk space to reserve for copygc\n"  \
+                       "Takes precedence over gc_reserve_percent if set")\
+       x(root_reserve_percent,         u8,                             \
+         OPT_FORMAT|OPT_MOUNT,                                         \
+         OPT_UINT(0, 100),                                             \
+         BCH_SB_ROOT_RESERVE,          0,                              \
+         "%",          "Percentage of disk space to reserve for superuser")\
+       x(wide_macs,                    u8,                             \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,                             \
+         OPT_BOOL(),                                                   \
+         BCH_SB_128_BIT_MACS,          false,                          \
+         NULL,         "Store full 128 bits of cryptographic MACs, instead of 80")\
+       x(inline_data,                  u8,                             \
+         OPT_MOUNT|OPT_RUNTIME,                                        \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Enable inline data extents")                   \
+       x(acl,                          u8,                             \
+         OPT_FORMAT|OPT_MOUNT,                                         \
+         OPT_BOOL(),                                                   \
+         BCH_SB_POSIX_ACL,             true,                           \
+         NULL,         "Enable POSIX acls")                            \
+       x(usrquota,                     u8,                             \
+         OPT_FORMAT|OPT_MOUNT,                                         \
+         OPT_BOOL(),                                                   \
+         BCH_SB_USRQUOTA,              false,                          \
+         NULL,         "Enable user quotas")                           \
+       x(grpquota,                     u8,                             \
+         OPT_FORMAT|OPT_MOUNT,                                         \
+         OPT_BOOL(),                                                   \
+         BCH_SB_GRPQUOTA,              false,                          \
+         NULL,         "Enable group quotas")                          \
+       x(prjquota,                     u8,                             \
+         OPT_FORMAT|OPT_MOUNT,                                         \
+         OPT_BOOL(),                                                   \
+         BCH_SB_PRJQUOTA,              false,                          \
+         NULL,         "Enable project quotas")                        \
+       x(reflink,                      u8,                             \
+         OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,                             \
+         OPT_BOOL(),                                                   \
+         BCH_SB_REFLINK,               true,                           \
+         NULL,         "Enable reflink support")                       \
+       x(degraded,                     u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Allow mounting in degraded mode")              \
+       x(discard,                      u8,                             \
+         OPT_MOUNT|OPT_DEVICE,                                         \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Enable discard/TRIM support")                  \
+       x(verbose,                      u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Extra debugging information during mount/recovery")\
+       x(journal_flush_disabled,       u8,                             \
+         OPT_MOUNT|OPT_RUNTIME,                                        \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Disable journal flush on sync/fsync\n"         \
+                       "If enabled, writes can be lost, but only since the\n"\
+                       "last journal write (default 1 second)")        \
+       x(fsck,                         u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Run fsck on mount")                            \
+       x(fix_errors,                   u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Fix errors during fsck without asking")        \
+       x(ratelimit_errors,             u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    RATELIMIT_ERRORS,               \
+         NULL,         "Ratelimit error messages during fsck")         \
+       x(nochanges,                    u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Super read only mode - no writes at all will be issued,\n"\
+                       "even if we have to replay the journal")        \
+       x(norecovery,                   u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Don't replay the journal")                     \
+       x(rebuild_replicas,             u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Rebuild the superblock replicas section")      \
+       x(keep_journal,                 u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Don't free journal entries/keys after startup")\
+       x(read_entire_journal,          u8,                             \
+         0,                                                            \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Read all journal entries, not just dirty ones")\
+       x(noexcl,                       u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Don't open device in exclusive mode")          \
+       x(sb,                           u64,                            \
+         OPT_MOUNT,                                                    \
+         OPT_UINT(0, S64_MAX),                                         \
+         NO_SB_OPT,                    BCH_SB_SECTOR,                  \
+         "offset",     "Sector offset of superblock")                  \
+       x(read_only,                    u8,                             \
+         0,                                                            \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         NULL)                                           \
+       x(nostart,                      u8,                             \
+         0,                                                            \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Don\'t start filesystem, only open devices")   \
+       x(reconstruct_alloc,            u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Reconstruct alloc btree")                      \
+       x(version_upgrade,              u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Set superblock to latest version,\n"           \
+                       "allowing any new features to be used")         \
+       x(project,                      u8,                             \
+         OPT_INODE,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         NULL)                                           \
+       x(fs_size,                      u64,                            \
+         OPT_DEVICE,                                                   \
+         OPT_SECTORS(0, S64_MAX),                                      \
+         NO_SB_OPT,                    0,                              \
+         "size",       "Size of filesystem on device")                 \
+       x(bucket,                       u32,                            \
+         OPT_DEVICE,                                                   \
+         OPT_SECTORS(0, S64_MAX),                                      \
+         NO_SB_OPT,                    0,                              \
+         "size",       "Size of filesystem on device")                 \
+       x(durability,                   u8,                             \
+         OPT_DEVICE,                                                   \
+         OPT_UINT(0, BCH_REPLICAS_MAX),                                \
+         NO_SB_OPT,                    1,                              \
+         "n",          "Data written to this device will be considered\n"\
+                       "to have already been replicated n times")
+
+struct bch_opts {
+#define x(_name, _bits, ...)   unsigned _name##_defined:1;
+       BCH_OPTS()
+#undef x
+
+#define x(_name, _bits, ...)   _bits   _name;
+       BCH_OPTS()
+#undef x
+};
+
+static const struct bch_opts bch2_opts_default = {
+#define x(_name, _bits, _mode, _type, _sb_opt, _default, ...)          \
+       ._name##_defined = true,                                        \
+       ._name = _default,                                              \
+
+       BCH_OPTS()
+#undef x
+};
+
+#define opt_defined(_opts, _name)      ((_opts)._name##_defined)
+
+#define opt_get(_opts, _name)                                          \
+       (opt_defined(_opts, _name) ? (_opts)._name : bch2_opts_default._name)
+
+#define opt_set(_opts, _name, _v)                                      \
+do {                                                                   \
+       (_opts)._name##_defined = true;                                 \
+       (_opts)._name = _v;                                             \
+} while (0)
+
+static inline struct bch_opts bch2_opts_empty(void)
+{
+       return (struct bch_opts) { 0 };
+}
+
+void bch2_opts_apply(struct bch_opts *, struct bch_opts);
+
+enum bch_opt_id {
+#define x(_name, ...)  Opt_##_name,
+       BCH_OPTS()
+#undef x
+       bch2_opts_nr
+};
+
+struct bch_fs;
+struct printbuf;
+
+struct bch_option {
+       struct attribute        attr;
+       void                    (*set_sb)(struct bch_sb *, u64);
+       enum opt_mode           mode;
+       enum opt_type           type;
+
+       union {
+       struct {
+               u64             min, max;
+       };
+       struct {
+               const char * const *choices;
+       };
+       struct {
+               int (*parse)(struct bch_fs *, const char *, u64 *);
+               void (*to_text)(struct printbuf *, struct bch_fs *, u64);
+       };
+       };
+
+       const char              *hint;
+       const char              *help;
+
+};
+
+extern const struct bch_option bch2_opt_table[];
+
+bool bch2_opt_defined_by_id(const struct bch_opts *, enum bch_opt_id);
+u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id);
+void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64);
+
+struct bch_opts bch2_opts_from_sb(struct bch_sb *);
+
+int bch2_opt_lookup(const char *);
+int bch2_opt_parse(struct bch_fs *, const struct bch_option *, const char *, u64 *);
+
+#define OPT_SHOW_FULL_LIST     (1 << 0)
+#define OPT_SHOW_MOUNT_STYLE   (1 << 1)
+
+void bch2_opt_to_text(struct printbuf *, struct bch_fs *,
+                     const struct bch_option *, u64, unsigned);
+
+int bch2_opt_check_may_set(struct bch_fs *, int, u64);
+int bch2_opts_check_may_set(struct bch_fs *);
+int bch2_parse_mount_opts(struct bch_opts *, char *);
+
+/* inode opts: */
+
+struct bch_io_opts {
+#define x(_name, _bits)        unsigned _name##_defined:1;
+       BCH_INODE_OPTS()
+#undef x
+
+#define x(_name, _bits)        u##_bits _name;
+       BCH_INODE_OPTS()
+#undef x
+};
+
+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts);
+struct bch_opts bch2_inode_opts_to_opts(struct bch_io_opts);
+void bch2_io_opts_apply(struct bch_io_opts *, struct bch_io_opts);
+bool bch2_opt_is_inode_opt(enum bch_opt_id);
+
+#endif /* _BCACHEFS_OPTS_H */
diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c
new file mode 100644 (file)
index 0000000..d3032a4
--- /dev/null
@@ -0,0 +1,783 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "btree_update.h"
+#include "inode.h"
+#include "quota.h"
+#include "super-io.h"
+
+static const char *bch2_sb_validate_quota(struct bch_sb *sb,
+                                         struct bch_sb_field *f)
+{
+       struct bch_sb_field_quota *q = field_to_type(f, quota);
+
+       if (vstruct_bytes(&q->field) != sizeof(*q))
+               return "invalid field quota: wrong size";
+
+       return NULL;
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_quota = {
+       .validate       = bch2_sb_validate_quota,
+};
+
+const char *bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       if (k.k->p.inode >= QTYP_NR)
+               return "invalid quota type";
+
+       if (bkey_val_bytes(k.k) != sizeof(struct bch_quota))
+               return "incorrect value size";
+
+       return NULL;
+}
+
+static const char * const bch2_quota_counters[] = {
+       "space",
+       "inodes",
+};
+
+void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c,
+                       struct bkey_s_c k)
+{
+       struct bkey_s_c_quota dq = bkey_s_c_to_quota(k);
+       unsigned i;
+
+       for (i = 0; i < Q_COUNTERS; i++)
+               pr_buf(out, "%s hardlimit %llu softlimit %llu",
+                      bch2_quota_counters[i],
+                      le64_to_cpu(dq.v->c[i].hardlimit),
+                      le64_to_cpu(dq.v->c[i].softlimit));
+}
+
+#ifdef CONFIG_BCACHEFS_QUOTA
+
+#include <linux/cred.h>
+#include <linux/fs.h>
+#include <linux/quota.h>
+
+static inline unsigned __next_qtype(unsigned i, unsigned qtypes)
+{
+       qtypes >>= i;
+       return qtypes ? i + __ffs(qtypes) : QTYP_NR;
+}
+
+#define for_each_set_qtype(_c, _i, _q, _qtypes)                                \
+       for (_i = 0;                                                    \
+            (_i = __next_qtype(_i, _qtypes),                           \
+             _q = &(_c)->quotas[_i],                                   \
+             _i < QTYP_NR);                                            \
+            _i++)
+
+static bool ignore_hardlimit(struct bch_memquota_type *q)
+{
+       if (capable(CAP_SYS_RESOURCE))
+               return true;
+#if 0
+       struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
+
+       return capable(CAP_SYS_RESOURCE) &&
+              (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
+               !(info->dqi_flags & DQF_ROOT_SQUASH));
+#endif
+       return false;
+}
+
+enum quota_msg {
+       SOFTWARN,       /* Softlimit reached */
+       SOFTLONGWARN,   /* Grace time expired */
+       HARDWARN,       /* Hardlimit reached */
+
+       HARDBELOW,      /* Usage got below inode hardlimit */
+       SOFTBELOW,      /* Usage got below inode softlimit */
+};
+
+static int quota_nl[][Q_COUNTERS] = {
+       [HARDWARN][Q_SPC]       = QUOTA_NL_BHARDWARN,
+       [SOFTLONGWARN][Q_SPC]   = QUOTA_NL_BSOFTLONGWARN,
+       [SOFTWARN][Q_SPC]       = QUOTA_NL_BSOFTWARN,
+       [HARDBELOW][Q_SPC]      = QUOTA_NL_BHARDBELOW,
+       [SOFTBELOW][Q_SPC]      = QUOTA_NL_BSOFTBELOW,
+
+       [HARDWARN][Q_INO]       = QUOTA_NL_IHARDWARN,
+       [SOFTLONGWARN][Q_INO]   = QUOTA_NL_ISOFTLONGWARN,
+       [SOFTWARN][Q_INO]       = QUOTA_NL_ISOFTWARN,
+       [HARDBELOW][Q_INO]      = QUOTA_NL_IHARDBELOW,
+       [SOFTBELOW][Q_INO]      = QUOTA_NL_ISOFTBELOW,
+};
+
+struct quota_msgs {
+       u8              nr;
+       struct {
+               u8      qtype;
+               u8      msg;
+       }               m[QTYP_NR * Q_COUNTERS];
+};
+
+static void prepare_msg(unsigned qtype,
+                       enum quota_counters counter,
+                       struct quota_msgs *msgs,
+                       enum quota_msg msg_type)
+{
+       BUG_ON(msgs->nr >= ARRAY_SIZE(msgs->m));
+
+       msgs->m[msgs->nr].qtype = qtype;
+       msgs->m[msgs->nr].msg   = quota_nl[msg_type][counter];
+       msgs->nr++;
+}
+
+static void prepare_warning(struct memquota_counter *qc,
+                           unsigned qtype,
+                           enum quota_counters counter,
+                           struct quota_msgs *msgs,
+                           enum quota_msg msg_type)
+{
+       if (qc->warning_issued & (1 << msg_type))
+               return;
+
+       prepare_msg(qtype, counter, msgs, msg_type);
+}
+
+static void flush_warnings(struct bch_qid qid,
+                          struct super_block *sb,
+                          struct quota_msgs *msgs)
+{
+       unsigned i;
+
+       for (i = 0; i < msgs->nr; i++)
+               quota_send_warning(make_kqid(&init_user_ns, msgs->m[i].qtype, qid.q[i]),
+                                  sb->s_dev, msgs->m[i].msg);
+}
+
+static int bch2_quota_check_limit(struct bch_fs *c,
+                                 unsigned qtype,
+                                 struct bch_memquota *mq,
+                                 struct quota_msgs *msgs,
+                                 enum quota_counters counter,
+                                 s64 v,
+                                 enum quota_acct_mode mode)
+{
+       struct bch_memquota_type *q = &c->quotas[qtype];
+       struct memquota_counter *qc = &mq->c[counter];
+       u64 n = qc->v + v;
+
+       BUG_ON((s64) n < 0);
+
+       if (mode == KEY_TYPE_QUOTA_NOCHECK)
+               return 0;
+
+       if (v <= 0) {
+               if (n < qc->hardlimit &&
+                   (qc->warning_issued & (1 << HARDWARN))) {
+                       qc->warning_issued &= ~(1 << HARDWARN);
+                       prepare_msg(qtype, counter, msgs, HARDBELOW);
+               }
+
+               if (n < qc->softlimit &&
+                   (qc->warning_issued & (1 << SOFTWARN))) {
+                       qc->warning_issued &= ~(1 << SOFTWARN);
+                       prepare_msg(qtype, counter, msgs, SOFTBELOW);
+               }
+
+               qc->warning_issued = 0;
+               return 0;
+       }
+
+       if (qc->hardlimit &&
+           qc->hardlimit < n &&
+           !ignore_hardlimit(q)) {
+               if (mode == KEY_TYPE_QUOTA_PREALLOC)
+                       return -EDQUOT;
+
+               prepare_warning(qc, qtype, counter, msgs, HARDWARN);
+       }
+
+       if (qc->softlimit &&
+           qc->softlimit < n &&
+           qc->timer &&
+           ktime_get_real_seconds() >= qc->timer &&
+           !ignore_hardlimit(q)) {
+               if (mode == KEY_TYPE_QUOTA_PREALLOC)
+                       return -EDQUOT;
+
+               prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN);
+       }
+
+       if (qc->softlimit &&
+           qc->softlimit < n &&
+           qc->timer == 0) {
+               if (mode == KEY_TYPE_QUOTA_PREALLOC)
+                       return -EDQUOT;
+
+               prepare_warning(qc, qtype, counter, msgs, SOFTWARN);
+
+               /* XXX is this the right one? */
+               qc->timer = ktime_get_real_seconds() +
+                       q->limits[counter].warnlimit;
+       }
+
+       return 0;
+}
+
+int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
+                   enum quota_counters counter, s64 v,
+                   enum quota_acct_mode mode)
+{
+       unsigned qtypes = enabled_qtypes(c);
+       struct bch_memquota_type *q;
+       struct bch_memquota *mq[QTYP_NR];
+       struct quota_msgs msgs;
+       unsigned i;
+       int ret = 0;
+
+       memset(&msgs, 0, sizeof(msgs));
+
+       for_each_set_qtype(c, i, q, qtypes)
+               mutex_lock_nested(&q->lock, i);
+
+       for_each_set_qtype(c, i, q, qtypes) {
+               mq[i] = genradix_ptr_alloc(&q->table, qid.q[i], GFP_NOFS);
+               if (!mq[i]) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
+               ret = bch2_quota_check_limit(c, i, mq[i], &msgs, counter, v, mode);
+               if (ret)
+                       goto err;
+       }
+
+       for_each_set_qtype(c, i, q, qtypes)
+               mq[i]->c[counter].v += v;
+err:
+       for_each_set_qtype(c, i, q, qtypes)
+               mutex_unlock(&q->lock);
+
+       flush_warnings(qid, c->vfs_sb, &msgs);
+
+       return ret;
+}
+
+static void __bch2_quota_transfer(struct bch_memquota *src_q,
+                                 struct bch_memquota *dst_q,
+                                 enum quota_counters counter, s64 v)
+{
+       BUG_ON(v > src_q->c[counter].v);
+       BUG_ON(v + dst_q->c[counter].v < v);
+
+       src_q->c[counter].v -= v;
+       dst_q->c[counter].v += v;
+}
+
+int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
+                       struct bch_qid dst,
+                       struct bch_qid src, u64 space,
+                       enum quota_acct_mode mode)
+{
+       struct bch_memquota_type *q;
+       struct bch_memquota *src_q[3], *dst_q[3];
+       struct quota_msgs msgs;
+       unsigned i;
+       int ret = 0;
+
+       qtypes &= enabled_qtypes(c);
+
+       memset(&msgs, 0, sizeof(msgs));
+
+       for_each_set_qtype(c, i, q, qtypes)
+               mutex_lock_nested(&q->lock, i);
+
+       for_each_set_qtype(c, i, q, qtypes) {
+               src_q[i] = genradix_ptr_alloc(&q->table, src.q[i], GFP_NOFS);
+               dst_q[i] = genradix_ptr_alloc(&q->table, dst.q[i], GFP_NOFS);
+
+               if (!src_q[i] || !dst_q[i]) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
+               ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC,
+                                            dst_q[i]->c[Q_SPC].v + space,
+                                            mode);
+               if (ret)
+                       goto err;
+
+               ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO,
+                                            dst_q[i]->c[Q_INO].v + 1,
+                                            mode);
+               if (ret)
+                       goto err;
+       }
+
+       for_each_set_qtype(c, i, q, qtypes) {
+               __bch2_quota_transfer(src_q[i], dst_q[i], Q_SPC, space);
+               __bch2_quota_transfer(src_q[i], dst_q[i], Q_INO, 1);
+       }
+
+err:
+       for_each_set_qtype(c, i, q, qtypes)
+               mutex_unlock(&q->lock);
+
+       flush_warnings(dst, c->vfs_sb, &msgs);
+
+       return ret;
+}
+
+static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_s_c_quota dq;
+       struct bch_memquota_type *q;
+       struct bch_memquota *mq;
+       unsigned i;
+
+       BUG_ON(k.k->p.inode >= QTYP_NR);
+
+       switch (k.k->type) {
+       case KEY_TYPE_quota:
+               dq = bkey_s_c_to_quota(k);
+               q = &c->quotas[k.k->p.inode];
+
+               mutex_lock(&q->lock);
+               mq = genradix_ptr_alloc(&q->table, k.k->p.offset, GFP_KERNEL);
+               if (!mq) {
+                       mutex_unlock(&q->lock);
+                       return -ENOMEM;
+               }
+
+               for (i = 0; i < Q_COUNTERS; i++) {
+                       mq->c[i].hardlimit = le64_to_cpu(dq.v->c[i].hardlimit);
+                       mq->c[i].softlimit = le64_to_cpu(dq.v->c[i].softlimit);
+               }
+
+               mutex_unlock(&q->lock);
+       }
+
+       return 0;
+}
+
+static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_QUOTAS, POS(type, 0),
+                          BTREE_ITER_PREFETCH, k, ret) {
+               if (k.k->p.inode != type)
+                       break;
+
+               ret = __bch2_quota_set(c, k);
+               if (ret)
+                       break;
+       }
+
+       return bch2_trans_exit(&trans) ?: ret;
+}
+
+void bch2_fs_quota_exit(struct bch_fs *c)
+{
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
+               genradix_free(&c->quotas[i].table);
+}
+
+void bch2_fs_quota_init(struct bch_fs *c)
+{
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
+               mutex_init(&c->quotas[i].lock);
+}
+
+static void bch2_sb_quota_read(struct bch_fs *c)
+{
+       struct bch_sb_field_quota *sb_quota;
+       unsigned i, j;
+
+       sb_quota = bch2_sb_get_quota(c->disk_sb.sb);
+       if (!sb_quota)
+               return;
+
+       for (i = 0; i < QTYP_NR; i++) {
+               struct bch_memquota_type *q = &c->quotas[i];
+
+               for (j = 0; j < Q_COUNTERS; j++) {
+                       q->limits[j].timelimit =
+                               le32_to_cpu(sb_quota->q[i].c[j].timelimit);
+                       q->limits[j].warnlimit =
+                               le32_to_cpu(sb_quota->q[i].c[j].warnlimit);
+               }
+       }
+}
+
+int bch2_fs_quota_read(struct bch_fs *c)
+{
+       unsigned i, qtypes = enabled_qtypes(c);
+       struct bch_memquota_type *q;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bch_inode_unpacked u;
+       struct bkey_s_c k;
+       int ret;
+
+       mutex_lock(&c->sb_lock);
+       bch2_sb_quota_read(c);
+       mutex_unlock(&c->sb_lock);
+
+       for_each_set_qtype(c, i, q, qtypes) {
+               ret = bch2_quota_init_type(c, i);
+               if (ret)
+                       return ret;
+       }
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN,
+                          BTREE_ITER_PREFETCH, k, ret) {
+               switch (k.k->type) {
+               case KEY_TYPE_inode:
+                       ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
+                       if (ret)
+                               return ret;
+
+                       bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors,
+                                       KEY_TYPE_QUOTA_NOCHECK);
+                       bch2_quota_acct(c, bch_qid(&u), Q_INO, 1,
+                                       KEY_TYPE_QUOTA_NOCHECK);
+               }
+       }
+       return bch2_trans_exit(&trans) ?: ret;
+}
+
+/* Enable/disable/delete quotas for an entire filesystem: */
+
+static int bch2_quota_enable(struct super_block        *sb, unsigned uflags)
+{
+       struct bch_fs *c = sb->s_fs_info;
+
+       if (sb->s_flags & SB_RDONLY)
+               return -EROFS;
+
+       /* Accounting must be enabled at mount time: */
+       if (uflags & (FS_QUOTA_UDQ_ACCT|FS_QUOTA_GDQ_ACCT|FS_QUOTA_PDQ_ACCT))
+               return -EINVAL;
+
+       /* Can't enable enforcement without accounting: */
+       if ((uflags & FS_QUOTA_UDQ_ENFD) && !c->opts.usrquota)
+               return -EINVAL;
+
+       if ((uflags & FS_QUOTA_GDQ_ENFD) && !c->opts.grpquota)
+               return -EINVAL;
+
+       if (uflags & FS_QUOTA_PDQ_ENFD && !c->opts.prjquota)
+               return -EINVAL;
+
+       mutex_lock(&c->sb_lock);
+       if (uflags & FS_QUOTA_UDQ_ENFD)
+               SET_BCH_SB_USRQUOTA(c->disk_sb.sb, true);
+
+       if (uflags & FS_QUOTA_GDQ_ENFD)
+               SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, true);
+
+       if (uflags & FS_QUOTA_PDQ_ENFD)
+               SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, true);
+
+       bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       return 0;
+}
+
+static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
+{
+       struct bch_fs *c = sb->s_fs_info;
+
+       if (sb->s_flags & SB_RDONLY)
+               return -EROFS;
+
+       mutex_lock(&c->sb_lock);
+       if (uflags & FS_QUOTA_UDQ_ENFD)
+               SET_BCH_SB_USRQUOTA(c->disk_sb.sb, false);
+
+       if (uflags & FS_QUOTA_GDQ_ENFD)
+               SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, false);
+
+       if (uflags & FS_QUOTA_PDQ_ENFD)
+               SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, false);
+
+       bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       return 0;
+}
+
+static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
+{
+       struct bch_fs *c = sb->s_fs_info;
+       int ret;
+
+       if (sb->s_flags & SB_RDONLY)
+               return -EROFS;
+
+       if (uflags & FS_USER_QUOTA) {
+               if (c->opts.usrquota)
+                       return -EINVAL;
+
+               ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
+                                             POS(QTYP_USR, 0),
+                                             POS(QTYP_USR + 1, 0),
+                                             NULL);
+               if (ret)
+                       return ret;
+       }
+
+       if (uflags & FS_GROUP_QUOTA) {
+               if (c->opts.grpquota)
+                       return -EINVAL;
+
+               ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
+                                             POS(QTYP_GRP, 0),
+                                             POS(QTYP_GRP + 1, 0),
+                                             NULL);
+               if (ret)
+                       return ret;
+       }
+
+       if (uflags & FS_PROJ_QUOTA) {
+               if (c->opts.prjquota)
+                       return -EINVAL;
+
+               ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
+                                             POS(QTYP_PRJ, 0),
+                                             POS(QTYP_PRJ + 1, 0),
+                                             NULL);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+/*
+ * Return quota status information, such as enforcements, quota file inode
+ * numbers etc.
+ */
+static int bch2_quota_get_state(struct super_block *sb, struct qc_state *state)
+{
+       struct bch_fs *c = sb->s_fs_info;
+       unsigned qtypes = enabled_qtypes(c);
+       unsigned i;
+
+       memset(state, 0, sizeof(*state));
+
+       for (i = 0; i < QTYP_NR; i++) {
+               state->s_state[i].flags |= QCI_SYSFILE;
+
+               if (!(qtypes & (1 << i)))
+                       continue;
+
+               state->s_state[i].flags |= QCI_ACCT_ENABLED;
+
+               state->s_state[i].spc_timelimit = c->quotas[i].limits[Q_SPC].timelimit;
+               state->s_state[i].spc_warnlimit = c->quotas[i].limits[Q_SPC].warnlimit;
+
+               state->s_state[i].ino_timelimit = c->quotas[i].limits[Q_INO].timelimit;
+               state->s_state[i].ino_warnlimit = c->quotas[i].limits[Q_INO].warnlimit;
+       }
+
+       return 0;
+}
+
+/*
+ * Adjust quota timers & warnings
+ */
+static int bch2_quota_set_info(struct super_block *sb, int type,
+                              struct qc_info *info)
+{
+       struct bch_fs *c = sb->s_fs_info;
+       struct bch_sb_field_quota *sb_quota;
+       struct bch_memquota_type *q;
+
+       if (sb->s_flags & SB_RDONLY)
+               return -EROFS;
+
+       if (type >= QTYP_NR)
+               return -EINVAL;
+
+       if (!((1 << type) & enabled_qtypes(c)))
+               return -ESRCH;
+
+       if (info->i_fieldmask &
+           ~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS))
+               return -EINVAL;
+
+       q = &c->quotas[type];
+
+       mutex_lock(&c->sb_lock);
+       sb_quota = bch2_sb_get_quota(c->disk_sb.sb);
+       if (!sb_quota) {
+               sb_quota = bch2_sb_resize_quota(&c->disk_sb,
+                                       sizeof(*sb_quota) / sizeof(u64));
+               if (!sb_quota)
+                       return -ENOSPC;
+       }
+
+       if (info->i_fieldmask & QC_SPC_TIMER)
+               sb_quota->q[type].c[Q_SPC].timelimit =
+                       cpu_to_le32(info->i_spc_timelimit);
+
+       if (info->i_fieldmask & QC_SPC_WARNS)
+               sb_quota->q[type].c[Q_SPC].warnlimit =
+                       cpu_to_le32(info->i_spc_warnlimit);
+
+       if (info->i_fieldmask & QC_INO_TIMER)
+               sb_quota->q[type].c[Q_INO].timelimit =
+                       cpu_to_le32(info->i_ino_timelimit);
+
+       if (info->i_fieldmask & QC_INO_WARNS)
+               sb_quota->q[type].c[Q_INO].warnlimit =
+                       cpu_to_le32(info->i_ino_warnlimit);
+
+       bch2_sb_quota_read(c);
+
+       bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       return 0;
+}
+
+/* Get/set individual quotas: */
+
+static void __bch2_quota_get(struct qc_dqblk *dst, struct bch_memquota *src)
+{
+       dst->d_space            = src->c[Q_SPC].v << 9;
+       dst->d_spc_hardlimit    = src->c[Q_SPC].hardlimit << 9;
+       dst->d_spc_softlimit    = src->c[Q_SPC].softlimit << 9;
+       dst->d_spc_timer        = src->c[Q_SPC].timer;
+       dst->d_spc_warns        = src->c[Q_SPC].warns;
+
+       dst->d_ino_count        = src->c[Q_INO].v;
+       dst->d_ino_hardlimit    = src->c[Q_INO].hardlimit;
+       dst->d_ino_softlimit    = src->c[Q_INO].softlimit;
+       dst->d_ino_timer        = src->c[Q_INO].timer;
+       dst->d_ino_warns        = src->c[Q_INO].warns;
+}
+
+static int bch2_get_quota(struct super_block *sb, struct kqid kqid,
+                         struct qc_dqblk *qdq)
+{
+       struct bch_fs *c                = sb->s_fs_info;
+       struct bch_memquota_type *q     = &c->quotas[kqid.type];
+       qid_t qid                       = from_kqid(&init_user_ns, kqid);
+       struct bch_memquota *mq;
+
+       memset(qdq, 0, sizeof(*qdq));
+
+       mutex_lock(&q->lock);
+       mq = genradix_ptr(&q->table, qid);
+       if (mq)
+               __bch2_quota_get(qdq, mq);
+       mutex_unlock(&q->lock);
+
+       return 0;
+}
+
+static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid,
+                              struct qc_dqblk *qdq)
+{
+       struct bch_fs *c                = sb->s_fs_info;
+       struct bch_memquota_type *q     = &c->quotas[kqid->type];
+       qid_t qid                       = from_kqid(&init_user_ns, *kqid);
+       struct genradix_iter iter;
+       struct bch_memquota *mq;
+       int ret = 0;
+
+       mutex_lock(&q->lock);
+
+       genradix_for_each_from(&q->table, iter, mq, qid)
+               if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) {
+                       __bch2_quota_get(qdq, mq);
+                       *kqid = make_kqid(current_user_ns(), kqid->type, iter.pos);
+                       goto found;
+               }
+
+       ret = -ENOENT;
+found:
+       mutex_unlock(&q->lock);
+       return ret;
+}
+
+static int bch2_set_quota_trans(struct btree_trans *trans,
+                               struct bkey_i_quota *new_quota,
+                               struct qc_dqblk *qdq)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret;
+
+       iter = bch2_trans_get_iter(trans, BTREE_ID_QUOTAS, new_quota->k.p,
+                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+       k = bch2_btree_iter_peek_slot(iter);
+
+       ret = bkey_err(k);
+       if (unlikely(ret))
+               return ret;
+
+       if (k.k->type == KEY_TYPE_quota)
+               new_quota->v = *bkey_s_c_to_quota(k).v;
+
+       if (qdq->d_fieldmask & QC_SPC_SOFT)
+               new_quota->v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9);
+       if (qdq->d_fieldmask & QC_SPC_HARD)
+               new_quota->v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9);
+
+       if (qdq->d_fieldmask & QC_INO_SOFT)
+               new_quota->v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit);
+       if (qdq->d_fieldmask & QC_INO_HARD)
+               new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
+
+       return bch2_trans_update(trans, iter, &new_quota->k_i, 0);
+}
+
+static int bch2_set_quota(struct super_block *sb, struct kqid qid,
+                         struct qc_dqblk *qdq)
+{
+       struct bch_fs *c = sb->s_fs_info;
+       struct btree_trans trans;
+       struct bkey_i_quota new_quota;
+       int ret;
+
+       if (sb->s_flags & SB_RDONLY)
+               return -EROFS;
+
+       bkey_quota_init(&new_quota.k_i);
+       new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK,
+                           bch2_set_quota_trans(&trans, &new_quota, qdq)) ?:
+               __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));
+
+       bch2_trans_exit(&trans);
+
+       return ret;
+}
+
+const struct quotactl_ops bch2_quotactl_operations = {
+       .quota_enable           = bch2_quota_enable,
+       .quota_disable          = bch2_quota_disable,
+       .rm_xquota              = bch2_quota_remove,
+
+       .get_state              = bch2_quota_get_state,
+       .set_info               = bch2_quota_set_info,
+
+       .get_dqblk              = bch2_get_quota,
+       .get_nextdqblk          = bch2_get_next_quota,
+       .set_dqblk              = bch2_set_quota,
+};
+
+#endif /* CONFIG_BCACHEFS_QUOTA */
diff --git a/libbcachefs/quota.h b/libbcachefs/quota.h
new file mode 100644 (file)
index 0000000..51e4f97
--- /dev/null
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_QUOTA_H
+#define _BCACHEFS_QUOTA_H
+
+#include "inode.h"
+#include "quota_types.h"
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_quota;
+
+const char *bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_quota (struct bkey_ops) {                \
+       .key_invalid    = bch2_quota_invalid,           \
+       .val_to_text    = bch2_quota_to_text,           \
+}
+
+static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u)
+{
+       return (struct bch_qid) {
+               .q[QTYP_USR] = u->bi_uid,
+               .q[QTYP_GRP] = u->bi_gid,
+               .q[QTYP_PRJ] = u->bi_project ? u->bi_project - 1 : 0,
+       };
+}
+
+static inline unsigned enabled_qtypes(struct bch_fs *c)
+{
+       return ((c->opts.usrquota << QTYP_USR)|
+               (c->opts.grpquota << QTYP_GRP)|
+               (c->opts.prjquota << QTYP_PRJ));
+}
+
+#ifdef CONFIG_BCACHEFS_QUOTA
+
+int bch2_quota_acct(struct bch_fs *, struct bch_qid, enum quota_counters,
+                   s64, enum quota_acct_mode);
+
+int bch2_quota_transfer(struct bch_fs *, unsigned, struct bch_qid,
+                       struct bch_qid, u64, enum quota_acct_mode);
+
+void bch2_fs_quota_exit(struct bch_fs *);
+void bch2_fs_quota_init(struct bch_fs *);
+int bch2_fs_quota_read(struct bch_fs *);
+
+extern const struct quotactl_ops bch2_quotactl_operations;
+
+#else
+
+static inline int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
+                                 enum quota_counters counter, s64 v,
+                                 enum quota_acct_mode mode)
+{
+       return 0;
+}
+
+static inline int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
+                                     struct bch_qid dst,
+                                     struct bch_qid src, u64 space,
+                                     enum quota_acct_mode mode)
+{
+       return 0;
+}
+
+static inline void bch2_fs_quota_exit(struct bch_fs *c) {}
+static inline void bch2_fs_quota_init(struct bch_fs *c) {}
+static inline int bch2_fs_quota_read(struct bch_fs *c) { return 0; }
+
+#endif
+
+#endif /* _BCACHEFS_QUOTA_H */
diff --git a/libbcachefs/quota_types.h b/libbcachefs/quota_types.h
new file mode 100644 (file)
index 0000000..6a13608
--- /dev/null
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_QUOTA_TYPES_H
+#define _BCACHEFS_QUOTA_TYPES_H
+
+#include <linux/generic-radix-tree.h>
+
+struct bch_qid {
+       u32             q[QTYP_NR];
+};
+
+enum quota_acct_mode {
+       KEY_TYPE_QUOTA_PREALLOC,
+       KEY_TYPE_QUOTA_WARN,
+       KEY_TYPE_QUOTA_NOCHECK,
+};
+
+struct memquota_counter {
+       u64                             v;
+       u64                             hardlimit;
+       u64                             softlimit;
+       s64                             timer;
+       int                             warns;
+       int                             warning_issued;
+};
+
+struct bch_memquota {
+       struct memquota_counter         c[Q_COUNTERS];
+};
+
+typedef GENRADIX(struct bch_memquota)  bch_memquota_table;
+
+struct quota_limit {
+       u32                             timelimit;
+       u32                             warnlimit;
+};
+
+struct bch_memquota_type {
+       struct quota_limit              limits[Q_COUNTERS];
+       bch_memquota_table              table;
+       struct mutex                    lock;
+};
+
+#endif /* _BCACHEFS_QUOTA_TYPES_H */
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
new file mode 100644 (file)
index 0000000..56a1f76
--- /dev/null
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "btree_iter.h"
+#include "buckets.h"
+#include "clock.h"
+#include "disk_groups.h"
+#include "extents.h"
+#include "io.h"
+#include "move.h"
+#include "rebalance.h"
+#include "super-io.h"
+
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/sched/cputime.h>
+#include <trace/events/bcachefs.h>
+
+/*
+ * Check if an extent should be moved:
+ * returns -1 if it should not be moved, or
+ * device of pointer that should be moved, if known, or INT_MAX if unknown
+ */
+static int __bch2_rebalance_pred(struct bch_fs *c,
+                                struct bkey_s_c k,
+                                struct bch_io_opts *io_opts)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+
+       if (io_opts->background_compression &&
+           !bch2_bkey_is_incompressible(k))
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+                       if (!p.ptr.cached &&
+                           p.crc.compression_type !=
+                           bch2_compression_opt_to_type[io_opts->background_compression])
+                               return p.ptr.dev;
+
+       if (io_opts->background_target)
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+                       if (!p.ptr.cached &&
+                           !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target))
+                               return p.ptr.dev;
+
+       return -1;
+}
+
+void bch2_rebalance_add_key(struct bch_fs *c,
+                           struct bkey_s_c k,
+                           struct bch_io_opts *io_opts)
+{
+       atomic64_t *counter;
+       int dev;
+
+       dev = __bch2_rebalance_pred(c, k, io_opts);
+       if (dev < 0)
+               return;
+
+       counter = dev < INT_MAX
+               ? &bch_dev_bkey_exists(c, dev)->rebalance_work
+               : &c->rebalance.work_unknown_dev;
+
+       if (atomic64_add_return(k.k->size, counter) == k.k->size)
+               rebalance_wakeup(c);
+}
+
+static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
+                                   struct bkey_s_c k,
+                                   struct bch_io_opts *io_opts,
+                                   struct data_opts *data_opts)
+{
+       if (__bch2_rebalance_pred(c, k, io_opts) >= 0) {
+               data_opts->target               = io_opts->background_target;
+               data_opts->btree_insert_flags   = 0;
+               return DATA_ADD_REPLICAS;
+       } else {
+               return DATA_SKIP;
+       }
+}
+
+void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors)
+{
+       if (atomic64_add_return(sectors, &c->rebalance.work_unknown_dev) ==
+           sectors)
+               rebalance_wakeup(c);
+}
+
+struct rebalance_work {
+       int             dev_most_full_idx;
+       unsigned        dev_most_full_percent;
+       u64             dev_most_full_work;
+       u64             dev_most_full_capacity;
+       u64             total_work;
+};
+
+static void rebalance_work_accumulate(struct rebalance_work *w,
+               u64 dev_work, u64 unknown_dev, u64 capacity, int idx)
+{
+       unsigned percent_full;
+       u64 work = dev_work + unknown_dev;
+
+       if (work < dev_work || work < unknown_dev)
+               work = U64_MAX;
+       work = min(work, capacity);
+
+       percent_full = div64_u64(work * 100, capacity);
+
+       if (percent_full >= w->dev_most_full_percent) {
+               w->dev_most_full_idx            = idx;
+               w->dev_most_full_percent        = percent_full;
+               w->dev_most_full_work           = work;
+               w->dev_most_full_capacity       = capacity;
+       }
+
+       if (w->total_work + dev_work >= w->total_work &&
+           w->total_work + dev_work >= dev_work)
+               w->total_work += dev_work;
+}
+
+static struct rebalance_work rebalance_work(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       struct rebalance_work ret = { .dev_most_full_idx = -1 };
+       u64 unknown_dev = atomic64_read(&c->rebalance.work_unknown_dev);
+       unsigned i;
+
+       for_each_online_member(ca, c, i)
+               rebalance_work_accumulate(&ret,
+                       atomic64_read(&ca->rebalance_work),
+                       unknown_dev,
+                       bucket_to_sector(ca, ca->mi.nbuckets -
+                                        ca->mi.first_bucket),
+                       i);
+
+       rebalance_work_accumulate(&ret,
+               unknown_dev, 0, c->capacity, -1);
+
+       return ret;
+}
+
+static void rebalance_work_reset(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       unsigned i;
+
+       for_each_online_member(ca, c, i)
+               atomic64_set(&ca->rebalance_work, 0);
+
+       atomic64_set(&c->rebalance.work_unknown_dev, 0);
+}
+
+static unsigned long curr_cputime(void)
+{
+       u64 utime, stime;
+
+       task_cputime_adjusted(current, &utime, &stime);
+       return nsecs_to_jiffies(utime + stime);
+}
+
+static int bch2_rebalance_thread(void *arg)
+{
+       struct bch_fs *c = arg;
+       struct bch_fs_rebalance *r = &c->rebalance;
+       struct io_clock *clock = &c->io_clock[WRITE];
+       struct rebalance_work w, p;
+       unsigned long start, prev_start;
+       unsigned long prev_run_time, prev_run_cputime;
+       unsigned long cputime, prev_cputime;
+       unsigned long io_start;
+       long throttle;
+
+       set_freezable();
+
+       io_start        = atomic_long_read(&clock->now);
+       p               = rebalance_work(c);
+       prev_start      = jiffies;
+       prev_cputime    = curr_cputime();
+
+       while (!kthread_wait_freezable(r->enabled)) {
+               cond_resched();
+
+               start                   = jiffies;
+               cputime                 = curr_cputime();
+
+               prev_run_time           = start - prev_start;
+               prev_run_cputime        = cputime - prev_cputime;
+
+               w                       = rebalance_work(c);
+               BUG_ON(!w.dev_most_full_capacity);
+
+               if (!w.total_work) {
+                       r->state = REBALANCE_WAITING;
+                       kthread_wait_freezable(rebalance_work(c).total_work);
+                       continue;
+               }
+
+               /*
+                * If there isn't much work to do, throttle cpu usage:
+                */
+               throttle = prev_run_cputime * 100 /
+                       max(1U, w.dev_most_full_percent) -
+                       prev_run_time;
+
+               if (w.dev_most_full_percent < 20 && throttle > 0) {
+                       r->throttled_until_iotime = io_start +
+                               div_u64(w.dev_most_full_capacity *
+                                       (20 - w.dev_most_full_percent),
+                                       50);
+
+                       if (atomic_long_read(&clock->now) + clock->max_slop <
+                           r->throttled_until_iotime) {
+                               r->throttled_until_cputime = start + throttle;
+                               r->state = REBALANCE_THROTTLED;
+
+                               bch2_kthread_io_clock_wait(clock,
+                                       r->throttled_until_iotime,
+                                       throttle);
+                               continue;
+                       }
+               }
+
+               /* minimum 1 mb/sec: */
+               r->pd.rate.rate =
+                       max_t(u64, 1 << 11,
+                             r->pd.rate.rate *
+                             max(p.dev_most_full_percent, 1U) /
+                             max(w.dev_most_full_percent, 1U));
+
+               io_start        = atomic_long_read(&clock->now);
+               p               = w;
+               prev_start      = start;
+               prev_cputime    = cputime;
+
+               r->state = REBALANCE_RUNNING;
+               memset(&r->move_stats, 0, sizeof(r->move_stats));
+               rebalance_work_reset(c);
+
+               bch2_move_data(c,
+                              /* ratelimiting disabled for now */
+                              NULL, /*  &r->pd.rate, */
+                              writepoint_ptr(&c->rebalance_write_point),
+                              POS_MIN, POS_MAX,
+                              rebalance_pred, NULL,
+                              &r->move_stats);
+       }
+
+       return 0;
+}
+
+void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       struct bch_fs_rebalance *r = &c->rebalance;
+       struct rebalance_work w = rebalance_work(c);
+       char h1[21], h2[21];
+
+       bch2_hprint(&PBUF(h1), w.dev_most_full_work << 9);
+       bch2_hprint(&PBUF(h2), w.dev_most_full_capacity << 9);
+       pr_buf(out, "fullest_dev (%i):\t%s/%s\n",
+              w.dev_most_full_idx, h1, h2);
+
+       bch2_hprint(&PBUF(h1), w.total_work << 9);
+       bch2_hprint(&PBUF(h2), c->capacity << 9);
+       pr_buf(out, "total work:\t\t%s/%s\n", h1, h2);
+
+       pr_buf(out, "rate:\t\t\t%u\n", r->pd.rate.rate);
+
+       switch (r->state) {
+       case REBALANCE_WAITING:
+               pr_buf(out, "waiting\n");
+               break;
+       case REBALANCE_THROTTLED:
+               bch2_hprint(&PBUF(h1),
+                           (r->throttled_until_iotime -
+                            atomic_long_read(&c->io_clock[WRITE].now)) << 9);
+               pr_buf(out, "throttled for %lu sec or %s io\n",
+                      (r->throttled_until_cputime - jiffies) / HZ,
+                      h1);
+               break;
+       case REBALANCE_RUNNING:
+               pr_buf(out, "running\n");
+               pr_buf(out, "pos %llu:%llu\n",
+                      r->move_stats.pos.inode,
+                      r->move_stats.pos.offset);
+               break;
+       }
+}
+
+void bch2_rebalance_stop(struct bch_fs *c)
+{
+       struct task_struct *p;
+
+       c->rebalance.pd.rate.rate = UINT_MAX;
+       bch2_ratelimit_reset(&c->rebalance.pd.rate);
+
+       p = rcu_dereference_protected(c->rebalance.thread, 1);
+       c->rebalance.thread = NULL;
+
+       if (p) {
+               /* for sychronizing with rebalance_wakeup() */
+               synchronize_rcu();
+
+               kthread_stop(p);
+               put_task_struct(p);
+       }
+}
+
+int bch2_rebalance_start(struct bch_fs *c)
+{
+       struct task_struct *p;
+
+       if (c->opts.nochanges)
+               return 0;
+
+       p = kthread_create(bch2_rebalance_thread, c, "bch_rebalance");
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+
+       get_task_struct(p);
+       rcu_assign_pointer(c->rebalance.thread, p);
+       wake_up_process(p);
+       return 0;
+}
+
+void bch2_fs_rebalance_init(struct bch_fs *c)
+{
+       bch2_pd_controller_init(&c->rebalance.pd);
+
+       atomic64_set(&c->rebalance.work_unknown_dev, S64_MAX);
+}
diff --git a/libbcachefs/rebalance.h b/libbcachefs/rebalance.h
new file mode 100644 (file)
index 0000000..7ade0bb
--- /dev/null
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_REBALANCE_H
+#define _BCACHEFS_REBALANCE_H
+
+#include "rebalance_types.h"
+
+static inline void rebalance_wakeup(struct bch_fs *c)
+{
+       struct task_struct *p;
+
+       rcu_read_lock();
+       p = rcu_dereference(c->rebalance.thread);
+       if (p)
+               wake_up_process(p);
+       rcu_read_unlock();
+}
+
+void bch2_rebalance_add_key(struct bch_fs *, struct bkey_s_c,
+                           struct bch_io_opts *);
+void bch2_rebalance_add_work(struct bch_fs *, u64);
+
+void bch2_rebalance_work_to_text(struct printbuf *, struct bch_fs *);
+
+void bch2_rebalance_stop(struct bch_fs *);
+int bch2_rebalance_start(struct bch_fs *);
+void bch2_fs_rebalance_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_REBALANCE_H */
diff --git a/libbcachefs/rebalance_types.h b/libbcachefs/rebalance_types.h
new file mode 100644 (file)
index 0000000..192c6be
--- /dev/null
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_REBALANCE_TYPES_H
+#define _BCACHEFS_REBALANCE_TYPES_H
+
+#include "move_types.h"
+
+enum rebalance_state {
+       REBALANCE_WAITING,
+       REBALANCE_THROTTLED,
+       REBALANCE_RUNNING,
+};
+
+struct bch_fs_rebalance {
+       struct task_struct __rcu *thread;
+       struct bch_pd_controller pd;
+
+       atomic64_t              work_unknown_dev;
+
+       enum rebalance_state    state;
+       unsigned long           throttled_until_iotime;
+       unsigned long           throttled_until_cputime;
+       struct bch_move_stats   move_stats;
+
+       unsigned                enabled:1;
+};
+
+#endif /* _BCACHEFS_REBALANCE_TYPES_H */
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
new file mode 100644 (file)
index 0000000..d70fa96
--- /dev/null
@@ -0,0 +1,1350 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "btree_gc.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "btree_io.h"
+#include "buckets.h"
+#include "dirent.h"
+#include "ec.h"
+#include "error.h"
+#include "fs-common.h"
+#include "fsck.h"
+#include "journal_io.h"
+#include "journal_reclaim.h"
+#include "journal_seq_blacklist.h"
+#include "quota.h"
+#include "recovery.h"
+#include "replicas.h"
+#include "super-io.h"
+
+#include <linux/sort.h>
+#include <linux/stat.h>
+
+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
+
+/* iterate over keys read from the journal: */
+
+static struct journal_key *journal_key_search(struct journal_keys *journal_keys,
+                                             enum btree_id id, unsigned level,
+                                             struct bpos pos)
+{
+       size_t l = 0, r = journal_keys->nr, m;
+
+       while (l < r) {
+               m = l + ((r - l) >> 1);
+               if ((cmp_int(id,        journal_keys->d[m].btree_id) ?:
+                    cmp_int(level,     journal_keys->d[m].level) ?:
+                    bkey_cmp(pos,      journal_keys->d[m].k->k.p)) > 0)
+                       l = m + 1;
+               else
+                       r = m;
+       }
+
+       BUG_ON(l < journal_keys->nr &&
+              (cmp_int(id,     journal_keys->d[l].btree_id) ?:
+               cmp_int(level,  journal_keys->d[l].level) ?:
+               bkey_cmp(pos,   journal_keys->d[l].k->k.p)) > 0);
+
+       BUG_ON(l &&
+              (cmp_int(id,     journal_keys->d[l - 1].btree_id) ?:
+               cmp_int(level,  journal_keys->d[l - 1].level) ?:
+               bkey_cmp(pos,   journal_keys->d[l - 1].k->k.p)) <= 0);
+
+       return l < journal_keys->nr ? journal_keys->d + l : NULL;
+}
+
+static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
+{
+       if (iter->k &&
+           iter->k < iter->keys->d + iter->keys->nr &&
+           iter->k->btree_id   == iter->btree_id &&
+           iter->k->level      == iter->level)
+               return iter->k->k;
+
+       iter->k = NULL;
+       return NULL;
+}
+
+static void bch2_journal_iter_advance(struct journal_iter *iter)
+{
+       if (iter->k)
+               iter->k++;
+}
+
+static void bch2_journal_iter_init(struct journal_iter *iter,
+                                  struct journal_keys *journal_keys,
+                                  enum btree_id id, unsigned level,
+                                  struct bpos pos)
+{
+       iter->btree_id  = id;
+       iter->level     = level;
+       iter->keys      = journal_keys;
+       iter->k         = journal_key_search(journal_keys, id, level, pos);
+}
+
+static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
+{
+       return iter->btree
+               ? bch2_btree_iter_peek(iter->btree)
+               : bch2_btree_node_iter_peek_unpack(&iter->node_iter,
+                                                  iter->b, &iter->unpacked);
+}
+
+static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter)
+{
+       if (iter->btree)
+               bch2_btree_iter_next(iter->btree);
+       else
+               bch2_btree_node_iter_advance(&iter->node_iter, iter->b);
+}
+
+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
+{
+       switch (iter->last) {
+       case none:
+               break;
+       case btree:
+               bch2_journal_iter_advance_btree(iter);
+               break;
+       case journal:
+               bch2_journal_iter_advance(&iter->journal);
+               break;
+       }
+
+       iter->last = none;
+}
+
+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
+{
+       struct bkey_s_c ret;
+
+       while (1) {
+               struct bkey_s_c btree_k         =
+                       bch2_journal_iter_peek_btree(iter);
+               struct bkey_s_c journal_k       =
+                       bkey_i_to_s_c(bch2_journal_iter_peek(&iter->journal));
+
+               if (btree_k.k && journal_k.k) {
+                       int cmp = bkey_cmp(btree_k.k->p, journal_k.k->p);
+
+                       if (!cmp)
+                               bch2_journal_iter_advance_btree(iter);
+
+                       iter->last = cmp < 0 ? btree : journal;
+               } else if (btree_k.k) {
+                       iter->last = btree;
+               } else if (journal_k.k) {
+                       iter->last = journal;
+               } else {
+                       iter->last = none;
+                       return bkey_s_c_null;
+               }
+
+               ret = iter->last == journal ? journal_k : btree_k;
+
+               if (iter->b &&
+                   bkey_cmp(ret.k->p, iter->b->data->max_key) > 0) {
+                       iter->journal.k = NULL;
+                       iter->last = none;
+                       return bkey_s_c_null;
+               }
+
+               if (!bkey_deleted(ret.k))
+                       break;
+
+               bch2_btree_and_journal_iter_advance(iter);
+       }
+
+       return ret;
+}
+
+struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *iter)
+{
+       bch2_btree_and_journal_iter_advance(iter);
+
+       return bch2_btree_and_journal_iter_peek(iter);
+}
+
+void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *iter,
+                                     struct btree_trans *trans,
+                                     struct journal_keys *journal_keys,
+                                     enum btree_id id, struct bpos pos)
+{
+       memset(iter, 0, sizeof(*iter));
+
+       iter->btree = bch2_trans_get_iter(trans, id, pos, 0);
+       bch2_journal_iter_init(&iter->journal, journal_keys, id, 0, pos);
+}
+
+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
+                                               struct journal_keys *journal_keys,
+                                               struct btree *b)
+{
+       memset(iter, 0, sizeof(*iter));
+
+       iter->b = b;
+       bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b);
+       bch2_journal_iter_init(&iter->journal, journal_keys,
+                              b->c.btree_id, b->c.level, b->data->min_key);
+}
+
+/* Walk btree, overlaying keys from the journal: */
+
+static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b,
+                               struct journal_keys *journal_keys,
+                               enum btree_id btree_id,
+                               btree_walk_node_fn node_fn,
+                               btree_walk_key_fn key_fn)
+{
+       struct btree_and_journal_iter iter;
+       struct bkey_s_c k;
+       int ret = 0;
+
+       bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
+
+       while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
+               ret = key_fn(c, btree_id, b->c.level, k);
+               if (ret)
+                       break;
+
+               if (b->c.level) {
+                       struct btree *child;
+                       BKEY_PADDED(k) tmp;
+
+                       bkey_reassemble(&tmp.k, k);
+                       k = bkey_i_to_s_c(&tmp.k);
+
+                       bch2_btree_and_journal_iter_advance(&iter);
+
+                       if (b->c.level > 0) {
+                               child = bch2_btree_node_get_noiter(c, &tmp.k,
+                                                       b->c.btree_id, b->c.level - 1);
+                               ret = PTR_ERR_OR_ZERO(child);
+                               if (ret)
+                                       break;
+
+                               ret   = (node_fn ? node_fn(c, b) : 0) ?:
+                                       bch2_btree_and_journal_walk_recurse(c, child,
+                                               journal_keys, btree_id, node_fn, key_fn);
+                               six_unlock_read(&child->c.lock);
+
+                               if (ret)
+                                       break;
+                       }
+               } else {
+                       bch2_btree_and_journal_iter_advance(&iter);
+               }
+       }
+
+       return ret;
+}
+
+int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_keys,
+                               enum btree_id btree_id,
+                               btree_walk_node_fn node_fn,
+                               btree_walk_key_fn key_fn)
+{
+       struct btree *b = c->btree_roots[btree_id].b;
+       int ret = 0;
+
+       if (btree_node_fake(b))
+               return 0;
+
+       six_lock_read(&b->c.lock, NULL, NULL);
+       ret   = (node_fn ? node_fn(c, b) : 0) ?:
+               bch2_btree_and_journal_walk_recurse(c, b, journal_keys, btree_id,
+                                                   node_fn, key_fn) ?:
+               key_fn(c, btree_id, b->c.level + 1, bkey_i_to_s_c(&b->key));
+       six_unlock_read(&b->c.lock);
+
+       return ret;
+}
+
+/* sort and dedup all keys in the journal: */
+
+void bch2_journal_entries_free(struct list_head *list)
+{
+
+       while (!list_empty(list)) {
+               struct journal_replay *i =
+                       list_first_entry(list, struct journal_replay, list);
+               list_del(&i->list);
+               kvpfree(i, offsetof(struct journal_replay, j) +
+                       vstruct_bytes(&i->j));
+       }
+}
+
+/*
+ * When keys compare equal, oldest compares first:
+ */
+static int journal_sort_key_cmp(const void *_l, const void *_r)
+{
+       const struct journal_key *l = _l;
+       const struct journal_key *r = _r;
+
+       return  cmp_int(l->btree_id,    r->btree_id) ?:
+               cmp_int(l->level,       r->level) ?:
+               bkey_cmp(l->k->k.p, r->k->k.p) ?:
+               cmp_int(l->journal_seq, r->journal_seq) ?:
+               cmp_int(l->journal_offset, r->journal_offset);
+}
+
+void bch2_journal_keys_free(struct journal_keys *keys)
+{
+       kvfree(keys->d);
+       keys->d = NULL;
+       keys->nr = 0;
+}
+
+static struct journal_keys journal_keys_sort(struct list_head *journal_entries)
+{
+       struct journal_replay *p;
+       struct jset_entry *entry;
+       struct bkey_i *k, *_n;
+       struct journal_keys keys = { NULL };
+       struct journal_key *src, *dst;
+       size_t nr_keys = 0;
+
+       if (list_empty(journal_entries))
+               return keys;
+
+       keys.journal_seq_base =
+               le64_to_cpu(list_last_entry(journal_entries,
+                               struct journal_replay, list)->j.last_seq);
+
+       list_for_each_entry(p, journal_entries, list) {
+               if (le64_to_cpu(p->j.seq) < keys.journal_seq_base)
+                       continue;
+
+               for_each_jset_key(k, _n, entry, &p->j)
+                       nr_keys++;
+       }
+
+
+       keys.d = kvmalloc(sizeof(keys.d[0]) * nr_keys, GFP_KERNEL);
+       if (!keys.d)
+               goto err;
+
+       list_for_each_entry(p, journal_entries, list) {
+               if (le64_to_cpu(p->j.seq) < keys.journal_seq_base)
+                       continue;
+
+               for_each_jset_key(k, _n, entry, &p->j)
+                       keys.d[keys.nr++] = (struct journal_key) {
+                               .btree_id       = entry->btree_id,
+                               .level          = entry->level,
+                               .k              = k,
+                               .journal_seq    = le64_to_cpu(p->j.seq) -
+                                       keys.journal_seq_base,
+                               .journal_offset = k->_data - p->j._data,
+                       };
+       }
+
+       sort(keys.d, keys.nr, sizeof(keys.d[0]), journal_sort_key_cmp, NULL);
+
+       src = dst = keys.d;
+       while (src < keys.d + keys.nr) {
+               while (src + 1 < keys.d + keys.nr &&
+                      src[0].btree_id  == src[1].btree_id &&
+                      src[0].level     == src[1].level &&
+                      !bkey_cmp(src[0].k->k.p, src[1].k->k.p))
+                       src++;
+
+               *dst++ = *src++;
+       }
+
+       keys.nr = dst - keys.d;
+err:
+       return keys;
+}
+
+/* journal replay: */
+
+static void replay_now_at(struct journal *j, u64 seq)
+{
+       BUG_ON(seq < j->replay_journal_seq);
+       BUG_ON(seq > j->replay_journal_seq_end);
+
+       while (j->replay_journal_seq < seq)
+               bch2_journal_pin_put(j, j->replay_journal_seq++);
+}
+
+static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id,
+                                 struct bkey_i *k)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter, *split_iter;
+       /*
+        * We might cause compressed extents to be split, so we need to pass in
+        * a disk_reservation:
+        */
+       struct disk_reservation disk_res =
+               bch2_disk_reservation_init(c, 0);
+       struct bkey_i *split;
+       struct bpos atomic_end;
+       /*
+        * Some extents aren't equivalent - w.r.t. what the triggers do
+        * - if they're split:
+        */
+       bool remark_if_split = bch2_bkey_sectors_compressed(bkey_i_to_s_c(k)) ||
+               k->k.type == KEY_TYPE_reflink_p;
+       bool remark = false;
+       int ret;
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
+retry:
+       bch2_trans_begin(&trans);
+
+       iter = bch2_trans_get_iter(&trans, btree_id,
+                                  bkey_start_pos(&k->k),
+                                  BTREE_ITER_INTENT);
+
+       do {
+               ret = bch2_btree_iter_traverse(iter);
+               if (ret)
+                       goto err;
+
+               atomic_end = bpos_min(k->k.p, iter->l[0].b->key.k.p);
+
+               split = bch2_trans_kmalloc(&trans, bkey_bytes(&k->k));
+               ret = PTR_ERR_OR_ZERO(split);
+               if (ret)
+                       goto err;
+
+               if (!remark &&
+                   remark_if_split &&
+                   bkey_cmp(atomic_end, k->k.p) < 0) {
+                       ret = bch2_disk_reservation_add(c, &disk_res,
+                                       k->k.size *
+                                       bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(k)),
+                                       BCH_DISK_RESERVATION_NOFAIL);
+                       BUG_ON(ret);
+
+                       remark = true;
+               }
+
+               bkey_copy(split, k);
+               bch2_cut_front(iter->pos, split);
+               bch2_cut_back(atomic_end, split);
+
+               split_iter = bch2_trans_copy_iter(&trans, iter);
+               ret = PTR_ERR_OR_ZERO(split_iter);
+               if (ret)
+                       goto err;
+
+               /*
+                * It's important that we don't go through the
+                * extent_handle_overwrites() and extent_update_to_keys() path
+                * here: journal replay is supposed to treat extents like
+                * regular keys
+                */
+               __bch2_btree_iter_set_pos(split_iter, split->k.p, false);
+               bch2_trans_update(&trans, split_iter, split,
+                                 BTREE_TRIGGER_NORUN);
+
+               bch2_btree_iter_set_pos(iter, split->k.p);
+
+               if (remark) {
+                       ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(split),
+                                                 0, split->k.size,
+                                                 BTREE_TRIGGER_INSERT);
+                       if (ret)
+                               goto err;
+               }
+       } while (bkey_cmp(iter->pos, k->k.p) < 0);
+
+       if (remark) {
+               ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k),
+                                         0, -((s64) k->k.size),
+                                         BTREE_TRIGGER_OVERWRITE);
+               if (ret)
+                       goto err;
+       }
+
+       ret = bch2_trans_commit(&trans, &disk_res, NULL,
+                               BTREE_INSERT_NOFAIL|
+                               BTREE_INSERT_LAZY_RW|
+                               BTREE_INSERT_JOURNAL_REPLAY);
+err:
+       if (ret == -EINTR)
+               goto retry;
+
+       bch2_disk_reservation_put(c, &disk_res);
+
+       return bch2_trans_exit(&trans) ?: ret;
+}
+
+static int __bch2_journal_replay_key(struct btree_trans *trans,
+                                    enum btree_id id, unsigned level,
+                                    struct bkey_i *k)
+{
+       struct btree_iter *iter;
+       int ret;
+
+       iter = bch2_trans_get_node_iter(trans, id, k->k.p,
+                                       BTREE_MAX_DEPTH, level,
+                                       BTREE_ITER_INTENT);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter);
+
+       /*
+        * iter->flags & BTREE_ITER_IS_EXTENTS triggers the update path to run
+        * extent_handle_overwrites() and extent_update_to_keys() - but we don't
+        * want that here, journal replay is supposed to treat extents like
+        * regular keys:
+        */
+       __bch2_btree_iter_set_pos(iter, k->k.p, false);
+
+       ret   = bch2_btree_iter_traverse(iter) ?:
+               bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+static int bch2_journal_replay_key(struct bch_fs *c, enum btree_id id,
+                                  unsigned level, struct bkey_i *k)
+{
+       return bch2_trans_do(c, NULL, NULL,
+                            BTREE_INSERT_NOFAIL|
+                            BTREE_INSERT_LAZY_RW|
+                            BTREE_INSERT_JOURNAL_REPLAY,
+                            __bch2_journal_replay_key(&trans, id, level, k));
+}
+
+static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k)
+{
+       struct btree_iter *iter;
+       int ret;
+
+       iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, k->k.p,
+                                  BTREE_ITER_CACHED|
+                                  BTREE_ITER_CACHED_NOFILL|
+                                  BTREE_ITER_INTENT);
+       ret =   PTR_ERR_OR_ZERO(iter) ?:
+               bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN);
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+static int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
+{
+       return bch2_trans_do(c, NULL, NULL,
+                            BTREE_INSERT_NOFAIL|
+                            BTREE_INSERT_USE_RESERVE|
+                            BTREE_INSERT_LAZY_RW|
+                            BTREE_INSERT_JOURNAL_REPLAY,
+                       __bch2_alloc_replay_key(&trans, k));
+}
+
+static int journal_sort_seq_cmp(const void *_l, const void *_r)
+{
+       const struct journal_key *l = _l;
+       const struct journal_key *r = _r;
+
+       return  cmp_int(r->level,       l->level) ?:
+               cmp_int(l->journal_seq, r->journal_seq) ?:
+               cmp_int(l->btree_id,    r->btree_id) ?:
+               bkey_cmp(l->k->k.p,     r->k->k.p);
+}
+
+static int bch2_journal_replay(struct bch_fs *c,
+                              struct journal_keys keys)
+{
+       struct journal *j = &c->journal;
+       struct journal_key *i;
+       u64 seq;
+       int ret;
+
+       sort(keys.d, keys.nr, sizeof(keys.d[0]), journal_sort_seq_cmp, NULL);
+
+       if (keys.nr)
+               replay_now_at(j, keys.journal_seq_base);
+
+       seq = j->replay_journal_seq;
+
+       /*
+        * First replay updates to the alloc btree - these will only update the
+        * btree key cache:
+        */
+       for_each_journal_key(keys, i) {
+               cond_resched();
+
+               if (!i->level && i->btree_id == BTREE_ID_ALLOC) {
+                       j->replay_journal_seq = keys.journal_seq_base + i->journal_seq;
+                       ret = bch2_alloc_replay_key(c, i->k);
+                       if (ret)
+                               goto err;
+               }
+       }
+
+       /*
+        * Next replay updates to interior btree nodes:
+        */
+       for_each_journal_key(keys, i) {
+               cond_resched();
+
+               if (i->level) {
+                       j->replay_journal_seq = keys.journal_seq_base + i->journal_seq;
+                       ret = bch2_journal_replay_key(c, i->btree_id, i->level, i->k);
+                       if (ret)
+                               goto err;
+               }
+       }
+
+       /*
+        * Now that the btree is in a consistent state, we can start journal
+        * reclaim (which will be flushing entries from the btree key cache back
+        * to the btree:
+        */
+       set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
+       set_bit(JOURNAL_RECLAIM_STARTED, &j->flags);
+
+       j->replay_journal_seq = seq;
+
+       /*
+        * Now replay leaf node updates:
+        */
+       for_each_journal_key(keys, i) {
+               cond_resched();
+
+               if (i->level || i->btree_id == BTREE_ID_ALLOC)
+                       continue;
+
+               replay_now_at(j, keys.journal_seq_base + i->journal_seq);
+
+               ret = i->k->k.size
+                       ? bch2_extent_replay_key(c, i->btree_id, i->k)
+                       : bch2_journal_replay_key(c, i->btree_id, i->level, i->k);
+               if (ret)
+                       goto err;
+       }
+
+       replay_now_at(j, j->replay_journal_seq_end);
+       j->replay_journal_seq = 0;
+
+       bch2_journal_set_replay_done(j);
+       bch2_journal_flush_all_pins(j);
+       return bch2_journal_error(j);
+err:
+       bch_err(c, "journal replay: error %d while replaying key", ret);
+       return ret;
+}
+
+static bool journal_empty(struct list_head *journal)
+{
+       return list_empty(journal) ||
+               journal_entry_empty(&list_last_entry(journal,
+                                       struct journal_replay, list)->j);
+}
+
+static int
+verify_journal_entries_not_blacklisted_or_missing(struct bch_fs *c,
+                                                 struct list_head *journal)
+{
+       struct journal_replay *i =
+               list_last_entry(journal, struct journal_replay, list);
+       u64 start_seq   = le64_to_cpu(i->j.last_seq);
+       u64 end_seq     = le64_to_cpu(i->j.seq);
+       u64 seq         = start_seq;
+       int ret = 0;
+
+       list_for_each_entry(i, journal, list) {
+               if (le64_to_cpu(i->j.seq) < start_seq)
+                       continue;
+
+               fsck_err_on(seq != le64_to_cpu(i->j.seq), c,
+                       "journal entries %llu-%llu missing! (replaying %llu-%llu)",
+                       seq, le64_to_cpu(i->j.seq) - 1,
+                       start_seq, end_seq);
+
+               seq = le64_to_cpu(i->j.seq);
+
+               fsck_err_on(bch2_journal_seq_is_blacklisted(c, seq, false), c,
+                           "found blacklisted journal entry %llu", seq);
+
+               do {
+                       seq++;
+               } while (bch2_journal_seq_is_blacklisted(c, seq, false));
+       }
+fsck_err:
+       return ret;
+}
+
+/* journal replay early: */
+
+static int journal_replay_entry_early(struct bch_fs *c,
+                                     struct jset_entry *entry)
+{
+       int ret = 0;
+
+       switch (entry->type) {
+       case BCH_JSET_ENTRY_btree_root: {
+               struct btree_root *r;
+
+               if (entry->btree_id >= BTREE_ID_NR) {
+                       bch_err(c, "filesystem has unknown btree type %u",
+                               entry->btree_id);
+                       return -EINVAL;
+               }
+
+               r = &c->btree_roots[entry->btree_id];
+
+               if (entry->u64s) {
+                       r->level = entry->level;
+                       bkey_copy(&r->key, &entry->start[0]);
+                       r->error = 0;
+               } else {
+                       r->error = -EIO;
+               }
+               r->alive = true;
+               break;
+       }
+       case BCH_JSET_ENTRY_usage: {
+               struct jset_entry_usage *u =
+                       container_of(entry, struct jset_entry_usage, entry);
+
+               switch (entry->btree_id) {
+               case FS_USAGE_RESERVED:
+                       if (entry->level < BCH_REPLICAS_MAX)
+                               c->usage_base->persistent_reserved[entry->level] =
+                                       le64_to_cpu(u->v);
+                       break;
+               case FS_USAGE_INODES:
+                       c->usage_base->nr_inodes = le64_to_cpu(u->v);
+                       break;
+               case FS_USAGE_KEY_VERSION:
+                       atomic64_set(&c->key_version,
+                                    le64_to_cpu(u->v));
+                       break;
+               }
+
+               break;
+       }
+       case BCH_JSET_ENTRY_data_usage: {
+               struct jset_entry_data_usage *u =
+                       container_of(entry, struct jset_entry_data_usage, entry);
+               ret = bch2_replicas_set_usage(c, &u->r,
+                                             le64_to_cpu(u->v));
+               break;
+       }
+       case BCH_JSET_ENTRY_blacklist: {
+               struct jset_entry_blacklist *bl_entry =
+                       container_of(entry, struct jset_entry_blacklist, entry);
+
+               ret = bch2_journal_seq_blacklist_add(c,
+                               le64_to_cpu(bl_entry->seq),
+                               le64_to_cpu(bl_entry->seq) + 1);
+               break;
+       }
+       case BCH_JSET_ENTRY_blacklist_v2: {
+               struct jset_entry_blacklist_v2 *bl_entry =
+                       container_of(entry, struct jset_entry_blacklist_v2, entry);
+
+               ret = bch2_journal_seq_blacklist_add(c,
+                               le64_to_cpu(bl_entry->start),
+                               le64_to_cpu(bl_entry->end) + 1);
+               break;
+       }
+       }
+
+       return ret;
+}
+
+static int journal_replay_early(struct bch_fs *c,
+                               struct bch_sb_field_clean *clean,
+                               struct list_head *journal)
+{
+       struct jset_entry *entry;
+       int ret;
+
+       if (clean) {
+               c->bucket_clock[READ].hand = le16_to_cpu(clean->read_clock);
+               c->bucket_clock[WRITE].hand = le16_to_cpu(clean->write_clock);
+
+               for (entry = clean->start;
+                    entry != vstruct_end(&clean->field);
+                    entry = vstruct_next(entry)) {
+                       ret = journal_replay_entry_early(c, entry);
+                       if (ret)
+                               return ret;
+               }
+       } else {
+               struct journal_replay *i =
+                       list_last_entry(journal, struct journal_replay, list);
+
+               c->bucket_clock[READ].hand = le16_to_cpu(i->j.read_clock);
+               c->bucket_clock[WRITE].hand = le16_to_cpu(i->j.write_clock);
+
+               list_for_each_entry(i, journal, list)
+                       vstruct_for_each(&i->j, entry) {
+                               ret = journal_replay_entry_early(c, entry);
+                               if (ret)
+                                       return ret;
+                       }
+       }
+
+       bch2_fs_usage_initialize(c);
+
+       return 0;
+}
+
+/* sb clean section: */
+
+static struct bkey_i *btree_root_find(struct bch_fs *c,
+                                     struct bch_sb_field_clean *clean,
+                                     struct jset *j,
+                                     enum btree_id id, unsigned *level)
+{
+       struct bkey_i *k;
+       struct jset_entry *entry, *start, *end;
+
+       if (clean) {
+               start = clean->start;
+               end = vstruct_end(&clean->field);
+       } else {
+               start = j->start;
+               end = vstruct_last(j);
+       }
+
+       for (entry = start; entry < end; entry = vstruct_next(entry))
+               if (entry->type == BCH_JSET_ENTRY_btree_root &&
+                   entry->btree_id == id)
+                       goto found;
+
+       return NULL;
+found:
+       if (!entry->u64s)
+               return ERR_PTR(-EINVAL);
+
+       k = entry->start;
+       *level = entry->level;
+       return k;
+}
+
+static int verify_superblock_clean(struct bch_fs *c,
+                                  struct bch_sb_field_clean **cleanp,
+                                  struct jset *j)
+{
+       unsigned i;
+       struct bch_sb_field_clean *clean = *cleanp;
+       int ret = 0;
+
+       if (!c->sb.clean || !j)
+               return 0;
+
+       if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
+                       "superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
+                       le64_to_cpu(clean->journal_seq),
+                       le64_to_cpu(j->seq))) {
+               kfree(clean);
+               *cleanp = NULL;
+               return 0;
+       }
+
+       mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
+                       "superblock read clock %u doesn't match journal %u after clean shutdown",
+                       clean->read_clock, j->read_clock);
+       mustfix_fsck_err_on(j->write_clock != clean->write_clock, c,
+                       "superblock write clock %u doesn't match journal %u after clean shutdown",
+                       clean->write_clock, j->write_clock);
+
+       for (i = 0; i < BTREE_ID_NR; i++) {
+               char buf1[200], buf2[200];
+               struct bkey_i *k1, *k2;
+               unsigned l1 = 0, l2 = 0;
+
+               k1 = btree_root_find(c, clean, NULL, i, &l1);
+               k2 = btree_root_find(c, NULL, j, i, &l2);
+
+               if (!k1 && !k2)
+                       continue;
+
+               mustfix_fsck_err_on(!k1 || !k2 ||
+                                   IS_ERR(k1) ||
+                                   IS_ERR(k2) ||
+                                   k1->k.u64s != k2->k.u64s ||
+                                   memcmp(k1, k2, bkey_bytes(k1)) ||
+                                   l1 != l2, c,
+                       "superblock btree root %u doesn't match journal after clean shutdown\n"
+                       "sb:      l=%u %s\n"
+                       "journal: l=%u %s\n", i,
+                       l1, (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(k1)), buf1),
+                       l2, (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(k2)), buf2));
+       }
+fsck_err:
+       return ret;
+}
+
+static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
+{
+       struct bch_sb_field_clean *clean, *sb_clean;
+       int ret;
+
+       mutex_lock(&c->sb_lock);
+       sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
+
+       if (fsck_err_on(!sb_clean, c,
+                       "superblock marked clean but clean section not present")) {
+               SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+               c->sb.clean = false;
+               mutex_unlock(&c->sb_lock);
+               return NULL;
+       }
+
+       clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
+                       GFP_KERNEL);
+       if (!clean) {
+               mutex_unlock(&c->sb_lock);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       if (le16_to_cpu(c->disk_sb.sb->version) <
+           bcachefs_metadata_version_bkey_renumber)
+               bch2_sb_clean_renumber(clean, READ);
+
+       mutex_unlock(&c->sb_lock);
+
+       return clean;
+fsck_err:
+       mutex_unlock(&c->sb_lock);
+       return ERR_PTR(ret);
+}
+
+static int read_btree_roots(struct bch_fs *c)
+{
+       unsigned i;
+       int ret = 0;
+
+       for (i = 0; i < BTREE_ID_NR; i++) {
+               struct btree_root *r = &c->btree_roots[i];
+
+               if (!r->alive)
+                       continue;
+
+               if (i == BTREE_ID_ALLOC &&
+                   c->opts.reconstruct_alloc) {
+                       c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
+                       continue;
+               }
+
+
+               if (r->error) {
+                       __fsck_err(c, i == BTREE_ID_ALLOC
+                                  ? FSCK_CAN_IGNORE : 0,
+                                  "invalid btree root %s",
+                                  bch2_btree_ids[i]);
+                       if (i == BTREE_ID_ALLOC)
+                               c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
+               }
+
+               ret = bch2_btree_root_read(c, i, &r->key, r->level);
+               if (ret) {
+                       __fsck_err(c, i == BTREE_ID_ALLOC
+                                  ? FSCK_CAN_IGNORE : 0,
+                                  "error reading btree root %s",
+                                  bch2_btree_ids[i]);
+                       if (i == BTREE_ID_ALLOC)
+                               c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
+               }
+       }
+
+       for (i = 0; i < BTREE_ID_NR; i++)
+               if (!c->btree_roots[i].b)
+                       bch2_btree_root_alloc(c, i);
+fsck_err:
+       return ret;
+}
+
+int bch2_fs_recovery(struct bch_fs *c)
+{
+       const char *err = "cannot allocate memory";
+       struct bch_sb_field_clean *clean = NULL;
+       u64 journal_seq;
+       bool write_sb = false, need_write_alloc = false;
+       int ret;
+
+       if (c->sb.clean)
+               clean = read_superblock_clean(c);
+       ret = PTR_ERR_OR_ZERO(clean);
+       if (ret)
+               goto err;
+
+       if (c->sb.clean)
+               bch_info(c, "recovering from clean shutdown, journal seq %llu",
+                        le64_to_cpu(clean->journal_seq));
+
+       if (!c->replicas.entries ||
+           c->opts.rebuild_replicas) {
+               bch_info(c, "building replicas info");
+               set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
+       }
+
+       if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
+               struct jset *j;
+
+               ret = bch2_journal_read(c, &c->journal_entries);
+               if (ret)
+                       goto err;
+
+               if (mustfix_fsck_err_on(c->sb.clean && !journal_empty(&c->journal_entries), c,
+                               "filesystem marked clean but journal not empty")) {
+                       c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
+                       SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+                       c->sb.clean = false;
+               }
+
+               if (!c->sb.clean && list_empty(&c->journal_entries)) {
+                       bch_err(c, "no journal entries found");
+                       ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
+                       goto err;
+               }
+
+               c->journal_keys = journal_keys_sort(&c->journal_entries);
+               if (!c->journal_keys.d) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
+               j = &list_last_entry(&c->journal_entries,
+                                    struct journal_replay, list)->j;
+
+               ret = verify_superblock_clean(c, &clean, j);
+               if (ret)
+                       goto err;
+
+               journal_seq = le64_to_cpu(j->seq) + 1;
+       } else {
+               journal_seq = le64_to_cpu(clean->journal_seq) + 1;
+       }
+
+       if (!c->sb.clean &&
+           !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) {
+               bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix");
+               ret = -EINVAL;
+               goto err;
+       }
+
+       ret = journal_replay_early(c, clean, &c->journal_entries);
+       if (ret)
+               goto err;
+
+       if (!c->sb.clean) {
+               ret = bch2_journal_seq_blacklist_add(c,
+                                                    journal_seq,
+                                                    journal_seq + 4);
+               if (ret) {
+                       bch_err(c, "error creating new journal seq blacklist entry");
+                       goto err;
+               }
+
+               journal_seq += 4;
+
+               /*
+                * The superblock needs to be written before we do any btree
+                * node writes: it will be in the read_write() path
+                */
+       }
+
+       ret = bch2_blacklist_table_initialize(c);
+
+       if (!list_empty(&c->journal_entries)) {
+               ret = verify_journal_entries_not_blacklisted_or_missing(c,
+                                                       &c->journal_entries);
+               if (ret)
+                       goto err;
+       }
+
+       ret = bch2_fs_journal_start(&c->journal, journal_seq,
+                                   &c->journal_entries);
+       if (ret)
+               goto err;
+
+       ret = read_btree_roots(c);
+       if (ret)
+               goto err;
+
+       bch_verbose(c, "starting alloc read");
+       err = "error reading allocation information";
+       ret = bch2_alloc_read(c, &c->journal_keys);
+       if (ret)
+               goto err;
+       bch_verbose(c, "alloc read done");
+
+       bch_verbose(c, "starting stripes_read");
+       err = "error reading stripes";
+       ret = bch2_stripes_read(c, &c->journal_keys);
+       if (ret)
+               goto err;
+       bch_verbose(c, "stripes_read done");
+
+       set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
+
+       if ((c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) &&
+           !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA))) {
+               /*
+                * interior btree node updates aren't consistent with the
+                * journal; after an unclean shutdown we have to walk all
+                * pointers to metadata:
+                */
+               bch_info(c, "starting metadata mark and sweep");
+               err = "error in mark and sweep";
+               ret = bch2_gc(c, &c->journal_keys, true, true);
+               if (ret < 0)
+                       goto err;
+               if (ret)
+                       need_write_alloc = true;
+               bch_verbose(c, "mark and sweep done");
+       }
+
+       if (c->opts.fsck ||
+           !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
+           test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
+               bch_info(c, "starting mark and sweep");
+               err = "error in mark and sweep";
+               ret = bch2_gc(c, &c->journal_keys, true, false);
+               if (ret < 0)
+                       goto err;
+               if (ret)
+                       need_write_alloc = true;
+               bch_verbose(c, "mark and sweep done");
+       }
+
+       clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
+       set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
+
+       /*
+        * Skip past versions that might have possibly been used (as nonces),
+        * but hadn't had their pointers written:
+        */
+       if (c->sb.encryption_type && !c->sb.clean)
+               atomic64_add(1 << 16, &c->key_version);
+
+       if (c->opts.norecovery)
+               goto out;
+
+       bch_verbose(c, "starting journal replay");
+       err = "journal replay failed";
+       ret = bch2_journal_replay(c, c->journal_keys);
+       if (ret)
+               goto err;
+       bch_verbose(c, "journal replay done");
+
+       if (need_write_alloc && !c->opts.nochanges) {
+               /*
+                * note that even when filesystem was clean there might be work
+                * to do here, if we ran gc (because of fsck) which recalculated
+                * oldest_gen:
+                */
+               bch_verbose(c, "writing allocation info");
+               err = "error writing out alloc info";
+               ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW) ?:
+                       bch2_alloc_write(c, BTREE_INSERT_LAZY_RW);
+               if (ret) {
+                       bch_err(c, "error writing alloc info");
+                       goto err;
+               }
+               bch_verbose(c, "alloc write done");
+
+               set_bit(BCH_FS_ALLOC_WRITTEN, &c->flags);
+       }
+
+       if (!c->sb.clean) {
+               if (!(c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
+                       bch_info(c, "checking inode link counts");
+                       err = "error in recovery";
+                       ret = bch2_fsck_inode_nlink(c);
+                       if (ret)
+                               goto err;
+                       bch_verbose(c, "check inodes done");
+
+               } else {
+                       bch_verbose(c, "checking for deleted inodes");
+                       err = "error in recovery";
+                       ret = bch2_fsck_walk_inodes_only(c);
+                       if (ret)
+                               goto err;
+                       bch_verbose(c, "check inodes done");
+               }
+       }
+
+       if (c->opts.fsck) {
+               bch_info(c, "starting fsck");
+               err = "error in fsck";
+               ret = bch2_fsck_full(c);
+               if (ret)
+                       goto err;
+               bch_verbose(c, "fsck done");
+       }
+
+       if (enabled_qtypes(c)) {
+               bch_verbose(c, "reading quotas");
+               ret = bch2_fs_quota_read(c);
+               if (ret)
+                       goto err;
+               bch_verbose(c, "quotas done");
+       }
+
+       mutex_lock(&c->sb_lock);
+       if (c->opts.version_upgrade) {
+               if (c->sb.version < bcachefs_metadata_version_new_versioning)
+                       c->disk_sb.sb->version_min =
+                               le16_to_cpu(bcachefs_metadata_version_min);
+               c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
+               c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
+               write_sb = true;
+       }
+
+       if (!test_bit(BCH_FS_ERROR, &c->flags)) {
+               c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
+               write_sb = true;
+       }
+
+       if (c->opts.fsck &&
+           !test_bit(BCH_FS_ERROR, &c->flags)) {
+               c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
+               SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
+               write_sb = true;
+       }
+
+       if (write_sb)
+               bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       if (c->journal_seq_blacklist_table &&
+           c->journal_seq_blacklist_table->nr > 128)
+               queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
+out:
+       ret = 0;
+err:
+fsck_err:
+       set_bit(BCH_FS_FSCK_DONE, &c->flags);
+       bch2_flush_fsck_errs(c);
+
+       if (!c->opts.keep_journal) {
+               bch2_journal_keys_free(&c->journal_keys);
+               bch2_journal_entries_free(&c->journal_entries);
+       }
+       kfree(clean);
+       if (ret)
+               bch_err(c, "Error in recovery: %s (%i)", err, ret);
+       else
+               bch_verbose(c, "ret %i", ret);
+       return ret;
+}
+
+int bch2_fs_initialize(struct bch_fs *c)
+{
+       struct bch_inode_unpacked root_inode, lostfound_inode;
+       struct bkey_inode_buf packed_inode;
+       struct qstr lostfound = QSTR("lost+found");
+       const char *err = "cannot allocate memory";
+       struct bch_dev *ca;
+       LIST_HEAD(journal);
+       unsigned i;
+       int ret;
+
+       bch_notice(c, "initializing new filesystem");
+
+       mutex_lock(&c->sb_lock);
+       for_each_online_member(ca, c, i)
+               bch2_mark_dev_superblock(c, ca, 0);
+       mutex_unlock(&c->sb_lock);
+
+       mutex_lock(&c->sb_lock);
+       c->disk_sb.sb->version = c->disk_sb.sb->version_min =
+               le16_to_cpu(bcachefs_metadata_version_current);
+       c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
+       c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL;
+
+       bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
+       set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
+
+       for (i = 0; i < BTREE_ID_NR; i++)
+               bch2_btree_root_alloc(c, i);
+
+       set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
+       set_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
+
+       err = "unable to allocate journal buckets";
+       for_each_online_member(ca, c, i) {
+               ret = bch2_dev_journal_alloc(ca);
+               if (ret) {
+                       percpu_ref_put(&ca->io_ref);
+                       goto err;
+               }
+       }
+
+       /*
+        * journal_res_get() will crash if called before this has
+        * set up the journal.pin FIFO and journal.cur pointer:
+        */
+       bch2_fs_journal_start(&c->journal, 1, &journal);
+       bch2_journal_set_replay_done(&c->journal);
+
+       err = "error going read-write";
+       ret = bch2_fs_read_write_early(c);
+       if (ret)
+               goto err;
+
+       /*
+        * Write out the superblock and journal buckets, now that we can do
+        * btree updates
+        */
+       err = "error writing alloc info";
+       ret = bch2_alloc_write(c, 0);
+       if (ret)
+               goto err;
+
+       bch2_inode_init(c, &root_inode, 0, 0,
+                       S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
+       root_inode.bi_inum = BCACHEFS_ROOT_INO;
+       bch2_inode_pack(&packed_inode, &root_inode);
+
+       err = "error creating root directory";
+       ret = bch2_btree_insert(c, BTREE_ID_INODES,
+                               &packed_inode.inode.k_i,
+                               NULL, NULL, 0);
+       if (ret)
+               goto err;
+
+       bch2_inode_init_early(c, &lostfound_inode);
+
+       err = "error creating lost+found";
+       ret = bch2_trans_do(c, NULL, NULL, 0,
+               bch2_create_trans(&trans, BCACHEFS_ROOT_INO,
+                                 &root_inode, &lostfound_inode,
+                                 &lostfound,
+                                 0, 0, S_IFDIR|0700, 0,
+                                 NULL, NULL));
+       if (ret)
+               goto err;
+
+       if (enabled_qtypes(c)) {
+               ret = bch2_fs_quota_read(c);
+               if (ret)
+                       goto err;
+       }
+
+       err = "error writing first journal entry";
+       ret = bch2_journal_meta(&c->journal);
+       if (ret)
+               goto err;
+
+       mutex_lock(&c->sb_lock);
+       SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
+       SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+
+       bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       return 0;
+err:
+       pr_err("Error initializing new filesystem: %s (%i)", err, ret);
+       return ret;
+}
diff --git a/libbcachefs/recovery.h b/libbcachefs/recovery.h
new file mode 100644 (file)
index 0000000..a66827c
--- /dev/null
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_RECOVERY_H
+#define _BCACHEFS_RECOVERY_H
+
+#define for_each_journal_key(keys, i)                          \
+       for (i = (keys).d; i < (keys).d + (keys).nr; (i)++)
+
+struct journal_iter {
+       enum btree_id           btree_id;
+       unsigned                level;
+       struct journal_keys     *keys;
+       struct journal_key      *k;
+};
+
+/*
+ * Iterate over keys in the btree, with keys from the journal overlaid on top:
+ */
+
+struct btree_and_journal_iter {
+       struct btree_iter       *btree;
+
+       struct btree            *b;
+       struct btree_node_iter  node_iter;
+       struct bkey             unpacked;
+
+       struct journal_iter     journal;
+
+       enum last_key_returned {
+               none,
+               btree,
+               journal,
+       }                       last;
+};
+
+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
+struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
+
+void bch2_btree_and_journal_iter_init(struct btree_and_journal_iter *,
+                                     struct btree_trans *,
+                                     struct journal_keys *,
+                                     enum btree_id, struct bpos);
+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
+                                               struct journal_keys *,
+                                               struct btree *);
+
+typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b);
+typedef int (*btree_walk_key_fn)(struct bch_fs *c, enum btree_id id,
+                                unsigned level, struct bkey_s_c k);
+
+int bch2_btree_and_journal_walk(struct bch_fs *, struct journal_keys *, enum btree_id,
+                               btree_walk_node_fn, btree_walk_key_fn);
+
+void bch2_journal_keys_free(struct journal_keys *);
+void bch2_journal_entries_free(struct list_head *);
+
+int bch2_fs_recovery(struct bch_fs *);
+int bch2_fs_initialize(struct bch_fs *);
+
+#endif /* _BCACHEFS_RECOVERY_H */
diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c
new file mode 100644 (file)
index 0000000..3c473f1
--- /dev/null
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "bkey_on_stack.h"
+#include "btree_update.h"
+#include "extents.h"
+#include "inode.h"
+#include "io.h"
+#include "reflink.h"
+
+#include <linux/sched/signal.h>
+
+/* reflink pointers */
+
+const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+
+       if (bkey_val_bytes(p.k) != sizeof(*p.v))
+               return "incorrect value size";
+
+       return NULL;
+}
+
+void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
+                           struct bkey_s_c k)
+{
+       struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+
+       pr_buf(out, "idx %llu", le64_to_cpu(p.v->idx));
+}
+
+enum merge_result bch2_reflink_p_merge(struct bch_fs *c,
+                                      struct bkey_s _l, struct bkey_s _r)
+{
+       struct bkey_s_reflink_p l = bkey_s_to_reflink_p(_l);
+       struct bkey_s_reflink_p r = bkey_s_to_reflink_p(_r);
+
+       if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx))
+               return BCH_MERGE_NOMERGE;
+
+       if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
+               bch2_key_resize(l.k, KEY_SIZE_MAX);
+               bch2_cut_front_s(l.k->p, _r);
+               return BCH_MERGE_PARTIAL;
+       }
+
+       bch2_key_resize(l.k, l.k->size + r.k->size);
+
+       return BCH_MERGE_MERGE;
+}
+
+/* indirect extents */
+
+const char *bch2_reflink_v_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
+
+       if (bkey_val_bytes(r.k) < sizeof(*r.v))
+               return "incorrect value size";
+
+       return bch2_bkey_ptrs_invalid(c, k);
+}
+
+void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c,
+                           struct bkey_s_c k)
+{
+       struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
+
+       pr_buf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount));
+
+       bch2_bkey_ptrs_to_text(out, c, k);
+}
+
+static int bch2_make_extent_indirect(struct btree_trans *trans,
+                                    struct btree_iter *extent_iter,
+                                    struct bkey_i_extent *e)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter *reflink_iter;
+       struct bkey_s_c k;
+       struct bkey_i_reflink_v *r_v;
+       struct bkey_i_reflink_p *r_p;
+       int ret;
+
+       for_each_btree_key(trans, reflink_iter, BTREE_ID_REFLINK,
+                          POS(0, c->reflink_hint),
+                          BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) {
+               if (reflink_iter->pos.inode) {
+                       bch2_btree_iter_set_pos(reflink_iter, POS_MIN);
+                       continue;
+               }
+
+               if (bkey_deleted(k.k) && e->k.size <= k.k->size)
+                       break;
+       }
+
+       if (ret)
+               goto err;
+
+       /* rewind iter to start of hole, if necessary: */
+       bch2_btree_iter_set_pos(reflink_iter, bkey_start_pos(k.k));
+
+       r_v = bch2_trans_kmalloc(trans, sizeof(*r_v) + bkey_val_bytes(&e->k));
+       ret = PTR_ERR_OR_ZERO(r_v);
+       if (ret)
+               goto err;
+
+       bkey_reflink_v_init(&r_v->k_i);
+       r_v->k.p        = reflink_iter->pos;
+       bch2_key_resize(&r_v->k, e->k.size);
+       r_v->k.version  = e->k.version;
+
+       set_bkey_val_u64s(&r_v->k, bkey_val_u64s(&r_v->k) +
+                         bkey_val_u64s(&e->k));
+       r_v->v.refcount = 0;
+       memcpy(r_v->v.start, e->v.start, bkey_val_bytes(&e->k));
+
+       bch2_trans_update(trans, reflink_iter, &r_v->k_i, 0);
+
+       r_p = bch2_trans_kmalloc(trans, sizeof(*r_p));
+       if (IS_ERR(r_p))
+               return PTR_ERR(r_p);
+
+       e->k.type = KEY_TYPE_reflink_p;
+       r_p = bkey_i_to_reflink_p(&e->k_i);
+       set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
+       r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k));
+
+       bch2_trans_update(trans, extent_iter, &r_p->k_i, 0);
+err:
+       if (!IS_ERR(reflink_iter))
+               c->reflink_hint = reflink_iter->pos.offset;
+       bch2_trans_iter_put(trans, reflink_iter);
+
+       return ret;
+}
+
+static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
+{
+       struct bkey_s_c k = bch2_btree_iter_peek(iter);
+       int ret;
+
+       for_each_btree_key_continue(iter, 0, k, ret) {
+               if (bkey_cmp(iter->pos, end) >= 0)
+                       return bkey_s_c_null;
+
+               if (k.k->type == KEY_TYPE_extent ||
+                   k.k->type == KEY_TYPE_reflink_p)
+                       break;
+       }
+
+       return k;
+}
+
+s64 bch2_remap_range(struct bch_fs *c,
+                    struct bpos dst_start, struct bpos src_start,
+                    u64 remap_sectors, u64 *journal_seq,
+                    u64 new_i_size, s64 *i_sectors_delta)
+{
+       struct btree_trans trans;
+       struct btree_iter *dst_iter, *src_iter;
+       struct bkey_s_c src_k;
+       BKEY_PADDED(k) new_dst;
+       struct bkey_on_stack new_src;
+       struct bpos dst_end = dst_start, src_end = src_start;
+       struct bpos dst_want, src_want;
+       u64 src_done, dst_done;
+       int ret = 0, ret2 = 0;
+
+       if (!c->opts.reflink)
+               return -EOPNOTSUPP;
+
+       if (!percpu_ref_tryget(&c->writes))
+               return -EROFS;
+
+       bch2_check_set_feature(c, BCH_FEATURE_reflink);
+
+       dst_end.offset += remap_sectors;
+       src_end.offset += remap_sectors;
+
+       bkey_on_stack_init(&new_src);
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
+
+       src_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start,
+                                      BTREE_ITER_INTENT);
+       dst_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, dst_start,
+                                      BTREE_ITER_INTENT);
+
+       while (1) {
+               bch2_trans_begin(&trans);
+
+               trans.mem_top = 0;
+
+               if (fatal_signal_pending(current)) {
+                       ret = -EINTR;
+                       goto err;
+               }
+
+               src_k = get_next_src(src_iter, src_end);
+               ret = bkey_err(src_k);
+               if (ret)
+                       goto btree_err;
+
+               src_done = bpos_min(src_iter->pos, src_end).offset -
+                       src_start.offset;
+               dst_want = POS(dst_start.inode, dst_start.offset + src_done);
+
+               if (bkey_cmp(dst_iter->pos, dst_want) < 0) {
+                       ret = bch2_fpunch_at(&trans, dst_iter, dst_want,
+                                            journal_seq, i_sectors_delta);
+                       if (ret)
+                               goto btree_err;
+                       continue;
+               }
+
+               BUG_ON(bkey_cmp(dst_iter->pos, dst_want));
+
+               if (!bkey_cmp(dst_iter->pos, dst_end))
+                       break;
+
+               if (src_k.k->type == KEY_TYPE_extent) {
+                       bkey_on_stack_reassemble(&new_src, c, src_k);
+                       src_k = bkey_i_to_s_c(new_src.k);
+
+                       bch2_cut_front(src_iter->pos,   new_src.k);
+                       bch2_cut_back(src_end,          new_src.k);
+
+                       ret = bch2_make_extent_indirect(&trans, src_iter,
+                                               bkey_i_to_extent(new_src.k));
+                       if (ret)
+                               goto btree_err;
+
+                       BUG_ON(src_k.k->type != KEY_TYPE_reflink_p);
+               }
+
+               if (src_k.k->type == KEY_TYPE_reflink_p) {
+                       struct bkey_s_c_reflink_p src_p =
+                               bkey_s_c_to_reflink_p(src_k);
+                       struct bkey_i_reflink_p *dst_p =
+                               bkey_reflink_p_init(&new_dst.k);
+
+                       u64 offset = le64_to_cpu(src_p.v->idx) +
+                               (src_iter->pos.offset -
+                                bkey_start_offset(src_k.k));
+
+                       dst_p->v.idx = cpu_to_le64(offset);
+               } else {
+                       BUG();
+               }
+
+               new_dst.k.k.p = dst_iter->pos;
+               bch2_key_resize(&new_dst.k.k,
+                               min(src_k.k->p.offset - src_iter->pos.offset,
+                                   dst_end.offset - dst_iter->pos.offset));
+
+               ret = bch2_extent_update(&trans, dst_iter, &new_dst.k,
+                                        NULL, journal_seq,
+                                        new_i_size, i_sectors_delta);
+               if (ret)
+                       goto btree_err;
+
+               dst_done = dst_iter->pos.offset - dst_start.offset;
+               src_want = POS(src_start.inode, src_start.offset + dst_done);
+               bch2_btree_iter_set_pos(src_iter, src_want);
+btree_err:
+               if (ret == -EINTR)
+                       ret = 0;
+               if (ret)
+                       goto err;
+       }
+
+       BUG_ON(bkey_cmp(dst_iter->pos, dst_end));
+err:
+       BUG_ON(bkey_cmp(dst_iter->pos, dst_end) > 0);
+
+       dst_done = dst_iter->pos.offset - dst_start.offset;
+       new_i_size = min(dst_iter->pos.offset << 9, new_i_size);
+
+       bch2_trans_begin(&trans);
+
+       do {
+               struct bch_inode_unpacked inode_u;
+               struct btree_iter *inode_iter;
+
+               inode_iter = bch2_inode_peek(&trans, &inode_u,
+                               dst_start.inode, BTREE_ITER_INTENT);
+               ret2 = PTR_ERR_OR_ZERO(inode_iter);
+
+               if (!ret2 &&
+                   inode_u.bi_size < new_i_size) {
+                       inode_u.bi_size = new_i_size;
+                       ret2  = bch2_inode_write(&trans, inode_iter, &inode_u) ?:
+                               bch2_trans_commit(&trans, NULL, journal_seq, 0);
+               }
+       } while (ret2 == -EINTR);
+
+       ret = bch2_trans_exit(&trans) ?: ret;
+       bkey_on_stack_exit(&new_src, c);
+
+       percpu_ref_put(&c->writes);
+
+       return dst_done ?: ret ?: ret2;
+}
diff --git a/libbcachefs/reflink.h b/libbcachefs/reflink.h
new file mode 100644 (file)
index 0000000..5445c1c
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_REFLINK_H
+#define _BCACHEFS_REFLINK_H
+
+const char *bch2_reflink_p_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
+                           struct bkey_s_c);
+enum merge_result bch2_reflink_p_merge(struct bch_fs *,
+                                      struct bkey_s, struct bkey_s);
+
+#define bch2_bkey_ops_reflink_p (struct bkey_ops) {            \
+       .key_invalid    = bch2_reflink_p_invalid,               \
+       .val_to_text    = bch2_reflink_p_to_text,               \
+       .key_merge      = bch2_reflink_p_merge,         \
+}
+
+const char *bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
+                           struct bkey_s_c);
+
+
+#define bch2_bkey_ops_reflink_v (struct bkey_ops) {            \
+       .key_invalid    = bch2_reflink_v_invalid,               \
+       .val_to_text    = bch2_reflink_v_to_text,               \
+       .swab           = bch2_ptr_swab,                        \
+}
+
+s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos,
+                    u64, u64 *, u64, s64 *);
+
+#endif /* _BCACHEFS_REFLINK_H */
diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c
new file mode 100644 (file)
index 0000000..6b6506c
--- /dev/null
@@ -0,0 +1,1059 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "buckets.h"
+#include "journal.h"
+#include "replicas.h"
+#include "super-io.h"
+
+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
+                                           struct bch_replicas_cpu *);
+
+/* Replicas tracking - in memory: */
+
+static inline int u8_cmp(u8 l, u8 r)
+{
+       return cmp_int(l, r);
+}
+
+static void verify_replicas_entry(struct bch_replicas_entry *e)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       unsigned i;
+
+       BUG_ON(e->data_type >= BCH_DATA_NR);
+       BUG_ON(!e->nr_devs);
+       BUG_ON(e->nr_required > 1 &&
+              e->nr_required >= e->nr_devs);
+
+       for (i = 0; i + 1 < e->nr_devs; i++)
+               BUG_ON(e->devs[i] >= e->devs[i + 1]);
+#endif
+}
+
+static void replicas_entry_sort(struct bch_replicas_entry *e)
+{
+       bubble_sort(e->devs, e->nr_devs, u8_cmp);
+}
+
+static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
+{
+       eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
+}
+
+void bch2_replicas_entry_to_text(struct printbuf *out,
+                                struct bch_replicas_entry *e)
+{
+       unsigned i;
+
+       pr_buf(out, "%s: %u/%u [",
+              bch2_data_types[e->data_type],
+              e->nr_required,
+              e->nr_devs);
+
+       for (i = 0; i < e->nr_devs; i++)
+               pr_buf(out, i ? " %u" : "%u", e->devs[i]);
+       pr_buf(out, "]");
+}
+
+void bch2_cpu_replicas_to_text(struct printbuf *out,
+                             struct bch_replicas_cpu *r)
+{
+       struct bch_replicas_entry *e;
+       bool first = true;
+
+       for_each_cpu_replicas_entry(r, e) {
+               if (!first)
+                       pr_buf(out, " ");
+               first = false;
+
+               bch2_replicas_entry_to_text(out, e);
+       }
+}
+
+static void extent_to_replicas(struct bkey_s_c k,
+                              struct bch_replicas_entry *r)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+
+       r->nr_required  = 1;
+
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+               if (p.ptr.cached)
+                       continue;
+
+               if (!p.has_ec)
+                       r->devs[r->nr_devs++] = p.ptr.dev;
+               else
+                       r->nr_required = 0;
+       }
+}
+
+static void stripe_to_replicas(struct bkey_s_c k,
+                              struct bch_replicas_entry *r)
+{
+       struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+       const struct bch_extent_ptr *ptr;
+
+       r->nr_required  = s.v->nr_blocks - s.v->nr_redundant;
+
+       for (ptr = s.v->ptrs;
+            ptr < s.v->ptrs + s.v->nr_blocks;
+            ptr++)
+               r->devs[r->nr_devs++] = ptr->dev;
+}
+
+void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
+                          struct bkey_s_c k)
+{
+       e->nr_devs = 0;
+
+       switch (k.k->type) {
+       case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
+               e->data_type = BCH_DATA_btree;
+               extent_to_replicas(k, e);
+               break;
+       case KEY_TYPE_extent:
+       case KEY_TYPE_reflink_v:
+               e->data_type = BCH_DATA_user;
+               extent_to_replicas(k, e);
+               break;
+       case KEY_TYPE_stripe:
+               e->data_type = BCH_DATA_user;
+               stripe_to_replicas(k, e);
+               break;
+       }
+
+       replicas_entry_sort(e);
+}
+
+void bch2_devlist_to_replicas(struct bch_replicas_entry *e,
+                             enum bch_data_type data_type,
+                             struct bch_devs_list devs)
+{
+       unsigned i;
+
+       BUG_ON(!data_type ||
+              data_type == BCH_DATA_sb ||
+              data_type >= BCH_DATA_NR);
+
+       e->data_type    = data_type;
+       e->nr_devs      = 0;
+       e->nr_required  = 1;
+
+       for (i = 0; i < devs.nr; i++)
+               e->devs[e->nr_devs++] = devs.devs[i];
+
+       replicas_entry_sort(e);
+}
+
+static struct bch_replicas_cpu
+cpu_replicas_add_entry(struct bch_replicas_cpu *old,
+                      struct bch_replicas_entry *new_entry)
+{
+       unsigned i;
+       struct bch_replicas_cpu new = {
+               .nr             = old->nr + 1,
+               .entry_size     = max_t(unsigned, old->entry_size,
+                                       replicas_entry_bytes(new_entry)),
+       };
+
+       BUG_ON(!new_entry->data_type);
+       verify_replicas_entry(new_entry);
+
+       new.entries = kcalloc(new.nr, new.entry_size, GFP_NOIO);
+       if (!new.entries)
+               return new;
+
+       for (i = 0; i < old->nr; i++)
+               memcpy(cpu_replicas_entry(&new, i),
+                      cpu_replicas_entry(old, i),
+                      old->entry_size);
+
+       memcpy(cpu_replicas_entry(&new, old->nr),
+              new_entry,
+              replicas_entry_bytes(new_entry));
+
+       bch2_cpu_replicas_sort(&new);
+       return new;
+}
+
+static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
+                                      struct bch_replicas_entry *search)
+{
+       int idx, entry_size = replicas_entry_bytes(search);
+
+       if (unlikely(entry_size > r->entry_size))
+               return -1;
+
+       verify_replicas_entry(search);
+
+#define entry_cmp(_l, _r, size)        memcmp(_l, _r, entry_size)
+       idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
+                             entry_cmp, search);
+#undef entry_cmp
+
+       return idx < r->nr ? idx : -1;
+}
+
+int bch2_replicas_entry_idx(struct bch_fs *c,
+                           struct bch_replicas_entry *search)
+{
+       replicas_entry_sort(search);
+
+       return __replicas_entry_idx(&c->replicas, search);
+}
+
+static bool __replicas_has_entry(struct bch_replicas_cpu *r,
+                                struct bch_replicas_entry *search)
+{
+       return __replicas_entry_idx(r, search) >= 0;
+}
+
+bool bch2_replicas_marked(struct bch_fs *c,
+                         struct bch_replicas_entry *search)
+{
+       bool marked;
+
+       if (!search->nr_devs)
+               return true;
+
+       verify_replicas_entry(search);
+
+       percpu_down_read(&c->mark_lock);
+       marked = __replicas_has_entry(&c->replicas, search) &&
+               (likely((!c->replicas_gc.entries)) ||
+                __replicas_has_entry(&c->replicas_gc, search));
+       percpu_up_read(&c->mark_lock);
+
+       return marked;
+}
+
+static void __replicas_table_update(struct bch_fs_usage *dst,
+                                   struct bch_replicas_cpu *dst_r,
+                                   struct bch_fs_usage *src,
+                                   struct bch_replicas_cpu *src_r)
+{
+       int src_idx, dst_idx;
+
+       *dst = *src;
+
+       for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
+               if (!src->replicas[src_idx])
+                       continue;
+
+               dst_idx = __replicas_entry_idx(dst_r,
+                               cpu_replicas_entry(src_r, src_idx));
+               BUG_ON(dst_idx < 0);
+
+               dst->replicas[dst_idx] = src->replicas[src_idx];
+       }
+}
+
+static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p,
+                                   struct bch_replicas_cpu *dst_r,
+                                   struct bch_fs_usage __percpu *src_p,
+                                   struct bch_replicas_cpu *src_r)
+{
+       unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr;
+       struct bch_fs_usage *dst, *src = (void *)
+               bch2_acc_percpu_u64s((void *) src_p, src_nr);
+
+       preempt_disable();
+       dst = this_cpu_ptr(dst_p);
+       preempt_enable();
+
+       __replicas_table_update(dst, dst_r, src, src_r);
+}
+
+/*
+ * Resize filesystem accounting:
+ */
+static int replicas_table_update(struct bch_fs *c,
+                                struct bch_replicas_cpu *new_r)
+{
+       struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL };
+       struct bch_fs_usage *new_scratch = NULL;
+       struct bch_fs_usage __percpu *new_gc = NULL;
+       struct bch_fs_usage *new_base = NULL;
+       unsigned bytes = sizeof(struct bch_fs_usage) +
+               sizeof(u64) * new_r->nr;
+       int ret = -ENOMEM;
+
+       if (!(new_base = kzalloc(bytes, GFP_NOIO)) ||
+           !(new_usage[0] = __alloc_percpu_gfp(bytes, sizeof(u64),
+                                               GFP_NOIO)) ||
+           !(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64),
+                                               GFP_NOIO)) ||
+           !(new_scratch  = kmalloc(bytes, GFP_NOIO)) ||
+           (c->usage_gc &&
+            !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) {
+               bch_err(c, "error updating replicas table: memory allocation failure");
+               goto err;
+       }
+
+       if (c->usage_base)
+               __replicas_table_update(new_base,               new_r,
+                                       c->usage_base,          &c->replicas);
+       if (c->usage[0])
+               __replicas_table_update_pcpu(new_usage[0],      new_r,
+                                            c->usage[0],       &c->replicas);
+       if (c->usage[1])
+               __replicas_table_update_pcpu(new_usage[1],      new_r,
+                                            c->usage[1],       &c->replicas);
+       if (c->usage_gc)
+               __replicas_table_update_pcpu(new_gc,            new_r,
+                                            c->usage_gc,       &c->replicas);
+
+       swap(c->usage_base,     new_base);
+       swap(c->usage[0],       new_usage[0]);
+       swap(c->usage[1],       new_usage[1]);
+       swap(c->usage_scratch,  new_scratch);
+       swap(c->usage_gc,       new_gc);
+       swap(c->replicas,       *new_r);
+       ret = 0;
+err:
+       free_percpu(new_gc);
+       kfree(new_scratch);
+       free_percpu(new_usage[1]);
+       free_percpu(new_usage[0]);
+       kfree(new_base);
+       return ret;
+}
+
+static unsigned reserve_journal_replicas(struct bch_fs *c,
+                                    struct bch_replicas_cpu *r)
+{
+       struct bch_replicas_entry *e;
+       unsigned journal_res_u64s = 0;
+
+       /* nr_inodes: */
+       journal_res_u64s +=
+               DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
+
+       /* key_version: */
+       journal_res_u64s +=
+               DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
+
+       /* persistent_reserved: */
+       journal_res_u64s +=
+               DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) *
+               BCH_REPLICAS_MAX;
+
+       for_each_cpu_replicas_entry(r, e)
+               journal_res_u64s +=
+                       DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) +
+                                    e->nr_devs, sizeof(u64));
+       return journal_res_u64s;
+}
+
+noinline
+static int bch2_mark_replicas_slowpath(struct bch_fs *c,
+                               struct bch_replicas_entry *new_entry)
+{
+       struct bch_replicas_cpu new_r, new_gc;
+       int ret = 0;
+
+       verify_replicas_entry(new_entry);
+
+       memset(&new_r, 0, sizeof(new_r));
+       memset(&new_gc, 0, sizeof(new_gc));
+
+       mutex_lock(&c->sb_lock);
+
+       if (c->replicas_gc.entries &&
+           !__replicas_has_entry(&c->replicas_gc, new_entry)) {
+               new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry);
+               if (!new_gc.entries)
+                       goto err;
+       }
+
+       if (!__replicas_has_entry(&c->replicas, new_entry)) {
+               new_r = cpu_replicas_add_entry(&c->replicas, new_entry);
+               if (!new_r.entries)
+                       goto err;
+
+               ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
+               if (ret)
+                       goto err;
+
+               bch2_journal_entry_res_resize(&c->journal,
+                               &c->replicas_journal_res,
+                               reserve_journal_replicas(c, &new_r));
+       }
+
+       if (!new_r.entries &&
+           !new_gc.entries)
+               goto out;
+
+       /* allocations done, now commit: */
+
+       if (new_r.entries)
+               bch2_write_super(c);
+
+       /* don't update in memory replicas until changes are persistent */
+       percpu_down_write(&c->mark_lock);
+       if (new_r.entries)
+               ret = replicas_table_update(c, &new_r);
+       if (new_gc.entries)
+               swap(new_gc, c->replicas_gc);
+       percpu_up_write(&c->mark_lock);
+out:
+       mutex_unlock(&c->sb_lock);
+
+       kfree(new_r.entries);
+       kfree(new_gc.entries);
+
+       return ret;
+err:
+       bch_err(c, "error adding replicas entry: memory allocation failure");
+       ret = -ENOMEM;
+       goto out;
+}
+
+static int __bch2_mark_replicas(struct bch_fs *c,
+                               struct bch_replicas_entry *r,
+                               bool check)
+{
+       return likely(bch2_replicas_marked(c, r))       ? 0
+               : check                                 ? -1
+               : bch2_mark_replicas_slowpath(c, r);
+}
+
+int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry *r)
+{
+       return __bch2_mark_replicas(c, r, false);
+}
+
+static int __bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k,
+                                    bool check)
+{
+       struct bch_replicas_padded search;
+       struct bch_devs_list cached = bch2_bkey_cached_devs(k);
+       unsigned i;
+       int ret;
+
+       for (i = 0; i < cached.nr; i++) {
+               bch2_replicas_entry_cached(&search.e, cached.devs[i]);
+
+               ret = __bch2_mark_replicas(c, &search.e, check);
+               if (ret)
+                       return ret;
+       }
+
+       bch2_bkey_to_replicas(&search.e, k);
+
+       return __bch2_mark_replicas(c, &search.e, check);
+}
+
+bool bch2_bkey_replicas_marked(struct bch_fs *c,
+                              struct bkey_s_c k)
+{
+       return __bch2_mark_bkey_replicas(c, k, true) == 0;
+}
+
+int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
+{
+       return __bch2_mark_bkey_replicas(c, k, false);
+}
+
+int bch2_replicas_gc_end(struct bch_fs *c, int ret)
+{
+       unsigned i;
+
+       lockdep_assert_held(&c->replicas_gc_lock);
+
+       mutex_lock(&c->sb_lock);
+       percpu_down_write(&c->mark_lock);
+
+       /*
+        * this is kind of crappy; the replicas gc mechanism needs to be ripped
+        * out
+        */
+
+       for (i = 0; i < c->replicas.nr; i++) {
+               struct bch_replicas_entry *e =
+                       cpu_replicas_entry(&c->replicas, i);
+               struct bch_replicas_cpu n;
+
+               if (!__replicas_has_entry(&c->replicas_gc, e) &&
+                   (c->usage_base->replicas[i] ||
+                    percpu_u64_get(&c->usage[0]->replicas[i]) ||
+                    percpu_u64_get(&c->usage[1]->replicas[i]))) {
+                       n = cpu_replicas_add_entry(&c->replicas_gc, e);
+                       if (!n.entries) {
+                               ret = -ENOSPC;
+                               goto err;
+                       }
+
+                       swap(n, c->replicas_gc);
+                       kfree(n.entries);
+               }
+       }
+
+       if (bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc)) {
+               ret = -ENOSPC;
+               goto err;
+       }
+
+       ret = replicas_table_update(c, &c->replicas_gc);
+err:
+       kfree(c->replicas_gc.entries);
+       c->replicas_gc.entries = NULL;
+
+       percpu_up_write(&c->mark_lock);
+
+       if (!ret)
+               bch2_write_super(c);
+
+       mutex_unlock(&c->sb_lock);
+
+       return ret;
+}
+
+int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
+{
+       struct bch_replicas_entry *e;
+       unsigned i = 0;
+
+       lockdep_assert_held(&c->replicas_gc_lock);
+
+       mutex_lock(&c->sb_lock);
+       BUG_ON(c->replicas_gc.entries);
+
+       c->replicas_gc.nr               = 0;
+       c->replicas_gc.entry_size       = 0;
+
+       for_each_cpu_replicas_entry(&c->replicas, e)
+               if (!((1 << e->data_type) & typemask)) {
+                       c->replicas_gc.nr++;
+                       c->replicas_gc.entry_size =
+                               max_t(unsigned, c->replicas_gc.entry_size,
+                                     replicas_entry_bytes(e));
+               }
+
+       c->replicas_gc.entries = kcalloc(c->replicas_gc.nr,
+                                        c->replicas_gc.entry_size,
+                                        GFP_NOIO);
+       if (!c->replicas_gc.entries) {
+               mutex_unlock(&c->sb_lock);
+               bch_err(c, "error allocating c->replicas_gc");
+               return -ENOMEM;
+       }
+
+       for_each_cpu_replicas_entry(&c->replicas, e)
+               if (!((1 << e->data_type) & typemask))
+                       memcpy(cpu_replicas_entry(&c->replicas_gc, i++),
+                              e, c->replicas_gc.entry_size);
+
+       bch2_cpu_replicas_sort(&c->replicas_gc);
+       mutex_unlock(&c->sb_lock);
+
+       return 0;
+}
+
+int bch2_replicas_gc2(struct bch_fs *c)
+{
+       struct bch_replicas_cpu new = { 0 };
+       unsigned i, nr;
+       int ret = 0;
+
+       bch2_journal_meta(&c->journal);
+retry:
+       nr              = READ_ONCE(c->replicas.nr);
+       new.entry_size  = READ_ONCE(c->replicas.entry_size);
+       new.entries     = kcalloc(nr, new.entry_size, GFP_KERNEL);
+       if (!new.entries) {
+               bch_err(c, "error allocating c->replicas_gc");
+               return -ENOMEM;
+       }
+
+       mutex_lock(&c->sb_lock);
+       percpu_down_write(&c->mark_lock);
+
+       if (nr                  != c->replicas.nr ||
+           new.entry_size      != c->replicas.entry_size) {
+               percpu_up_write(&c->mark_lock);
+               mutex_unlock(&c->sb_lock);
+               kfree(new.entries);
+               goto retry;
+       }
+
+       for (i = 0; i < c->replicas.nr; i++) {
+               struct bch_replicas_entry *e =
+                       cpu_replicas_entry(&c->replicas, i);
+
+               if (e->data_type == BCH_DATA_journal ||
+                   c->usage_base->replicas[i] ||
+                   percpu_u64_get(&c->usage[0]->replicas[i]) ||
+                   percpu_u64_get(&c->usage[1]->replicas[i]))
+                       memcpy(cpu_replicas_entry(&new, new.nr++),
+                              e, new.entry_size);
+       }
+
+       bch2_cpu_replicas_sort(&new);
+
+       if (bch2_cpu_replicas_to_sb_replicas(c, &new)) {
+               ret = -ENOSPC;
+               goto err;
+       }
+
+       ret = replicas_table_update(c, &new);
+err:
+       kfree(new.entries);
+
+       percpu_up_write(&c->mark_lock);
+
+       if (!ret)
+               bch2_write_super(c);
+
+       mutex_unlock(&c->sb_lock);
+
+       return ret;
+}
+
+int bch2_replicas_set_usage(struct bch_fs *c,
+                           struct bch_replicas_entry *r,
+                           u64 sectors)
+{
+       int ret, idx = bch2_replicas_entry_idx(c, r);
+
+       if (idx < 0) {
+               struct bch_replicas_cpu n;
+
+               n = cpu_replicas_add_entry(&c->replicas, r);
+               if (!n.entries)
+                       return -ENOMEM;
+
+               ret = replicas_table_update(c, &n);
+               if (ret)
+                       return ret;
+
+               kfree(n.entries);
+
+               idx = bch2_replicas_entry_idx(c, r);
+               BUG_ON(ret < 0);
+       }
+
+       c->usage_base->replicas[idx] = sectors;
+
+       return 0;
+}
+
+/* Replicas tracking - superblock: */
+
+static int
+__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
+                                  struct bch_replicas_cpu *cpu_r)
+{
+       struct bch_replicas_entry *e, *dst;
+       unsigned nr = 0, entry_size = 0, idx = 0;
+
+       for_each_replicas_entry(sb_r, e) {
+               entry_size = max_t(unsigned, entry_size,
+                                  replicas_entry_bytes(e));
+               nr++;
+       }
+
+       cpu_r->entries = kcalloc(nr, entry_size, GFP_NOIO);
+       if (!cpu_r->entries)
+               return -ENOMEM;
+
+       cpu_r->nr               = nr;
+       cpu_r->entry_size       = entry_size;
+
+       for_each_replicas_entry(sb_r, e) {
+               dst = cpu_replicas_entry(cpu_r, idx++);
+               memcpy(dst, e, replicas_entry_bytes(e));
+               replicas_entry_sort(dst);
+       }
+
+       return 0;
+}
+
+static int
+__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
+                                     struct bch_replicas_cpu *cpu_r)
+{
+       struct bch_replicas_entry_v0 *e;
+       unsigned nr = 0, entry_size = 0, idx = 0;
+
+       for_each_replicas_entry(sb_r, e) {
+               entry_size = max_t(unsigned, entry_size,
+                                  replicas_entry_bytes(e));
+               nr++;
+       }
+
+       entry_size += sizeof(struct bch_replicas_entry) -
+               sizeof(struct bch_replicas_entry_v0);
+
+       cpu_r->entries = kcalloc(nr, entry_size, GFP_NOIO);
+       if (!cpu_r->entries)
+               return -ENOMEM;
+
+       cpu_r->nr               = nr;
+       cpu_r->entry_size       = entry_size;
+
+       for_each_replicas_entry(sb_r, e) {
+               struct bch_replicas_entry *dst =
+                       cpu_replicas_entry(cpu_r, idx++);
+
+               dst->data_type  = e->data_type;
+               dst->nr_devs    = e->nr_devs;
+               dst->nr_required = 1;
+               memcpy(dst->devs, e->devs, e->nr_devs);
+               replicas_entry_sort(dst);
+       }
+
+       return 0;
+}
+
+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
+{
+       struct bch_sb_field_replicas *sb_v1;
+       struct bch_sb_field_replicas_v0 *sb_v0;
+       struct bch_replicas_cpu new_r = { 0, 0, NULL };
+       int ret = 0;
+
+       if ((sb_v1 = bch2_sb_get_replicas(c->disk_sb.sb)))
+               ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r);
+       else if ((sb_v0 = bch2_sb_get_replicas_v0(c->disk_sb.sb)))
+               ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r);
+
+       if (ret)
+               return -ENOMEM;
+
+       bch2_cpu_replicas_sort(&new_r);
+
+       percpu_down_write(&c->mark_lock);
+
+       ret = replicas_table_update(c, &new_r);
+       percpu_up_write(&c->mark_lock);
+
+       kfree(new_r.entries);
+
+       return 0;
+}
+
+static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
+                                              struct bch_replicas_cpu *r)
+{
+       struct bch_sb_field_replicas_v0 *sb_r;
+       struct bch_replicas_entry_v0 *dst;
+       struct bch_replicas_entry *src;
+       size_t bytes;
+
+       bytes = sizeof(struct bch_sb_field_replicas);
+
+       for_each_cpu_replicas_entry(r, src)
+               bytes += replicas_entry_bytes(src) - 1;
+
+       sb_r = bch2_sb_resize_replicas_v0(&c->disk_sb,
+                       DIV_ROUND_UP(bytes, sizeof(u64)));
+       if (!sb_r)
+               return -ENOSPC;
+
+       bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
+       sb_r = bch2_sb_get_replicas_v0(c->disk_sb.sb);
+
+       memset(&sb_r->entries, 0,
+              vstruct_end(&sb_r->field) -
+              (void *) &sb_r->entries);
+
+       dst = sb_r->entries;
+       for_each_cpu_replicas_entry(r, src) {
+               dst->data_type  = src->data_type;
+               dst->nr_devs    = src->nr_devs;
+               memcpy(dst->devs, src->devs, src->nr_devs);
+
+               dst = replicas_entry_next(dst);
+
+               BUG_ON((void *) dst > vstruct_end(&sb_r->field));
+       }
+
+       return 0;
+}
+
+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
+                                           struct bch_replicas_cpu *r)
+{
+       struct bch_sb_field_replicas *sb_r;
+       struct bch_replicas_entry *dst, *src;
+       bool need_v1 = false;
+       size_t bytes;
+
+       bytes = sizeof(struct bch_sb_field_replicas);
+
+       for_each_cpu_replicas_entry(r, src) {
+               bytes += replicas_entry_bytes(src);
+               if (src->nr_required != 1)
+                       need_v1 = true;
+       }
+
+       if (!need_v1)
+               return bch2_cpu_replicas_to_sb_replicas_v0(c, r);
+
+       sb_r = bch2_sb_resize_replicas(&c->disk_sb,
+                       DIV_ROUND_UP(bytes, sizeof(u64)));
+       if (!sb_r)
+               return -ENOSPC;
+
+       bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
+       sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
+
+       memset(&sb_r->entries, 0,
+              vstruct_end(&sb_r->field) -
+              (void *) &sb_r->entries);
+
+       dst = sb_r->entries;
+       for_each_cpu_replicas_entry(r, src) {
+               memcpy(dst, src, replicas_entry_bytes(src));
+
+               dst = replicas_entry_next(dst);
+
+               BUG_ON((void *) dst > vstruct_end(&sb_r->field));
+       }
+
+       return 0;
+}
+
+static const char *check_dup_replicas_entries(struct bch_replicas_cpu *cpu_r)
+{
+       unsigned i;
+
+       sort_cmp_size(cpu_r->entries,
+                     cpu_r->nr,
+                     cpu_r->entry_size,
+                     memcmp, NULL);
+
+       for (i = 0; i + 1 < cpu_r->nr; i++) {
+               struct bch_replicas_entry *l =
+                       cpu_replicas_entry(cpu_r, i);
+               struct bch_replicas_entry *r =
+                       cpu_replicas_entry(cpu_r, i + 1);
+
+               BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
+
+               if (!memcmp(l, r, cpu_r->entry_size))
+                       return "duplicate replicas entry";
+       }
+
+       return NULL;
+}
+
+static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f)
+{
+       struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
+       struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
+       struct bch_replicas_cpu cpu_r = { .entries = NULL };
+       struct bch_replicas_entry *e;
+       const char *err;
+       unsigned i;
+
+       for_each_replicas_entry(sb_r, e) {
+               err = "invalid replicas entry: invalid data type";
+               if (e->data_type >= BCH_DATA_NR)
+                       goto err;
+
+               err = "invalid replicas entry: no devices";
+               if (!e->nr_devs)
+                       goto err;
+
+               err = "invalid replicas entry: bad nr_required";
+               if (e->nr_required > 1 &&
+                   e->nr_required >= e->nr_devs)
+                       goto err;
+
+               err = "invalid replicas entry: invalid device";
+               for (i = 0; i < e->nr_devs; i++)
+                       if (!bch2_dev_exists(sb, mi, e->devs[i]))
+                               goto err;
+       }
+
+       err = "cannot allocate memory";
+       if (__bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r))
+               goto err;
+
+       err = check_dup_replicas_entries(&cpu_r);
+err:
+       kfree(cpu_r.entries);
+       return err;
+}
+
+static void bch2_sb_replicas_to_text(struct printbuf *out,
+                                    struct bch_sb *sb,
+                                    struct bch_sb_field *f)
+{
+       struct bch_sb_field_replicas *r = field_to_type(f, replicas);
+       struct bch_replicas_entry *e;
+       bool first = true;
+
+       for_each_replicas_entry(r, e) {
+               if (!first)
+                       pr_buf(out, " ");
+               first = false;
+
+               bch2_replicas_entry_to_text(out, e);
+       }
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
+       .validate       = bch2_sb_validate_replicas,
+       .to_text        = bch2_sb_replicas_to_text,
+};
+
+static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f)
+{
+       struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
+       struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
+       struct bch_replicas_cpu cpu_r = { .entries = NULL };
+       struct bch_replicas_entry_v0 *e;
+       const char *err;
+       unsigned i;
+
+       for_each_replicas_entry_v0(sb_r, e) {
+               err = "invalid replicas entry: invalid data type";
+               if (e->data_type >= BCH_DATA_NR)
+                       goto err;
+
+               err = "invalid replicas entry: no devices";
+               if (!e->nr_devs)
+                       goto err;
+
+               err = "invalid replicas entry: invalid device";
+               for (i = 0; i < e->nr_devs; i++)
+                       if (!bch2_dev_exists(sb, mi, e->devs[i]))
+                               goto err;
+       }
+
+       err = "cannot allocate memory";
+       if (__bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r))
+               goto err;
+
+       err = check_dup_replicas_entries(&cpu_r);
+err:
+       kfree(cpu_r.entries);
+       return err;
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
+       .validate       = bch2_sb_validate_replicas_v0,
+};
+
+/* Query replicas: */
+
+struct replicas_status __bch2_replicas_status(struct bch_fs *c,
+                                             struct bch_devs_mask online_devs)
+{
+       struct bch_sb_field_members *mi;
+       struct bch_replicas_entry *e;
+       unsigned i, nr_online, nr_offline;
+       struct replicas_status ret;
+
+       memset(&ret, 0, sizeof(ret));
+
+       for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
+               ret.replicas[i].redundancy = INT_MAX;
+
+       mi = bch2_sb_get_members(c->disk_sb.sb);
+
+       percpu_down_read(&c->mark_lock);
+
+       for_each_cpu_replicas_entry(&c->replicas, e) {
+               if (e->data_type >= ARRAY_SIZE(ret.replicas))
+                       panic("e %p data_type %u\n", e, e->data_type);
+
+               nr_online = nr_offline = 0;
+
+               for (i = 0; i < e->nr_devs; i++) {
+                       BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi,
+                                               e->devs[i]));
+
+                       if (test_bit(e->devs[i], online_devs.d))
+                               nr_online++;
+                       else
+                               nr_offline++;
+               }
+
+               ret.replicas[e->data_type].redundancy =
+                       min(ret.replicas[e->data_type].redundancy,
+                           (int) nr_online - (int) e->nr_required);
+
+               ret.replicas[e->data_type].nr_offline =
+                       max(ret.replicas[e->data_type].nr_offline,
+                           nr_offline);
+       }
+
+       percpu_up_read(&c->mark_lock);
+
+       for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
+               if (ret.replicas[i].redundancy == INT_MAX)
+                       ret.replicas[i].redundancy = 0;
+
+       return ret;
+}
+
+struct replicas_status bch2_replicas_status(struct bch_fs *c)
+{
+       return __bch2_replicas_status(c, bch2_online_devs(c));
+}
+
+static bool have_enough_devs(struct replicas_status s,
+                            enum bch_data_type type,
+                            bool force_if_degraded,
+                            bool force_if_lost)
+{
+       return (!s.replicas[type].nr_offline || force_if_degraded) &&
+               (s.replicas[type].redundancy >= 0 || force_if_lost);
+}
+
+bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
+{
+       return (have_enough_devs(s, BCH_DATA_journal,
+                                flags & BCH_FORCE_IF_METADATA_DEGRADED,
+                                flags & BCH_FORCE_IF_METADATA_LOST) &&
+               have_enough_devs(s, BCH_DATA_btree,
+                                flags & BCH_FORCE_IF_METADATA_DEGRADED,
+                                flags & BCH_FORCE_IF_METADATA_LOST) &&
+               have_enough_devs(s, BCH_DATA_user,
+                                flags & BCH_FORCE_IF_DATA_DEGRADED,
+                                flags & BCH_FORCE_IF_DATA_LOST));
+}
+
+int bch2_replicas_online(struct bch_fs *c, bool meta)
+{
+       struct replicas_status s = bch2_replicas_status(c);
+
+       return (meta
+               ? min(s.replicas[BCH_DATA_journal].redundancy,
+                     s.replicas[BCH_DATA_btree].redundancy)
+               : s.replicas[BCH_DATA_user].redundancy) + 1;
+}
+
+unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
+{
+       struct bch_replicas_entry *e;
+       unsigned i, ret = 0;
+
+       percpu_down_read(&c->mark_lock);
+
+       for_each_cpu_replicas_entry(&c->replicas, e)
+               for (i = 0; i < e->nr_devs; i++)
+                       if (e->devs[i] == ca->dev_idx)
+                               ret |= 1 << e->data_type;
+
+       percpu_up_read(&c->mark_lock);
+
+       return ret;
+}
+
+int bch2_fs_replicas_init(struct bch_fs *c)
+{
+       c->journal.entry_u64s_reserved +=
+               reserve_journal_replicas(c, &c->replicas);
+
+       return replicas_table_update(c, &c->replicas);
+}
diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h
new file mode 100644 (file)
index 0000000..8b95164
--- /dev/null
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_REPLICAS_H
+#define _BCACHEFS_REPLICAS_H
+
+#include "eytzinger.h"
+#include "replicas_types.h"
+
+void bch2_replicas_entry_to_text(struct printbuf *,
+                                struct bch_replicas_entry *);
+void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
+
+static inline struct bch_replicas_entry *
+cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
+{
+       return (void *) r->entries + r->entry_size * i;
+}
+
+int bch2_replicas_entry_idx(struct bch_fs *,
+                           struct bch_replicas_entry *);
+
+void bch2_devlist_to_replicas(struct bch_replicas_entry *,
+                             enum bch_data_type,
+                             struct bch_devs_list);
+bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry *);
+int bch2_mark_replicas(struct bch_fs *,
+                      struct bch_replicas_entry *);
+
+void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c);
+bool bch2_bkey_replicas_marked(struct bch_fs *, struct bkey_s_c);
+int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
+
+static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e,
+                                             unsigned dev)
+{
+       e->data_type    = BCH_DATA_cached;
+       e->nr_devs      = 1;
+       e->nr_required  = 1;
+       e->devs[0]      = dev;
+}
+
+struct replicas_status {
+       struct {
+               int             redundancy;
+               unsigned        nr_offline;
+       }                       replicas[BCH_DATA_NR];
+};
+
+struct replicas_status __bch2_replicas_status(struct bch_fs *,
+                                             struct bch_devs_mask);
+struct replicas_status bch2_replicas_status(struct bch_fs *);
+bool bch2_have_enough_devs(struct replicas_status, unsigned);
+
+int bch2_replicas_online(struct bch_fs *, bool);
+unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
+
+int bch2_replicas_gc_end(struct bch_fs *, int);
+int bch2_replicas_gc_start(struct bch_fs *, unsigned);
+int bch2_replicas_gc2(struct bch_fs *);
+
+int bch2_replicas_set_usage(struct bch_fs *,
+                           struct bch_replicas_entry *,
+                           u64);
+
+#define for_each_cpu_replicas_entry(_r, _i)                            \
+       for (_i = (_r)->entries;                                        \
+            (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
+            _i = (void *) (_i) + (_r)->entry_size)
+
+/* iterate over superblock replicas - used by userspace tools: */
+
+#define replicas_entry_next(_i)                                                \
+       ((typeof(_i)) ((void *) (_i) + replicas_entry_bytes(_i)))
+
+#define for_each_replicas_entry(_r, _i)                                        \
+       for (_i = (_r)->entries;                                        \
+            (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
+            (_i) = replicas_entry_next(_i))
+
+#define for_each_replicas_entry_v0(_r, _i)                             \
+       for (_i = (_r)->entries;                                        \
+            (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
+            (_i) = replicas_entry_next(_i))
+
+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
+
+int bch2_fs_replicas_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_REPLICAS_H */
diff --git a/libbcachefs/replicas_types.h b/libbcachefs/replicas_types.h
new file mode 100644 (file)
index 0000000..0535b1d
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef _BCACHEFS_REPLICAS_TYPES_H
+#define _BCACHEFS_REPLICAS_TYPES_H
+
+struct bch_replicas_cpu {
+       unsigned                nr;
+       unsigned                entry_size;
+       struct bch_replicas_entry *entries;
+};
+
+#endif /* _BCACHEFS_REPLICAS_TYPES_H */
diff --git a/libbcachefs/siphash.c b/libbcachefs/siphash.c
new file mode 100644 (file)
index 0000000..c062edb
--- /dev/null
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*     $OpenBSD: siphash.c,v 1.3 2015/02/20 11:51:03 tedu Exp $ */
+
+/*-
+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * SipHash is a family of PRFs SipHash-c-d where the integer parameters c and d
+ * are the number of compression rounds and the number of finalization rounds.
+ * A compression round is identical to a finalization round and this round
+ * function is called SipRound.  Given a 128-bit key k and a (possibly empty)
+ * byte string m, SipHash-c-d returns a 64-bit value SipHash-c-d(k; m).
+ *
+ * Implemented from the paper "SipHash: a fast short-input PRF", 2012.09.18,
+ * by Jean-Philippe Aumasson and Daniel J. Bernstein,
+ * Permanent Document ID b9a943a805fbfc6fde808af9fc0ecdfa
+ * https://131002.net/siphash/siphash.pdf
+ * https://131002.net/siphash/
+ */
+
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+#include <linux/bitops.h>
+#include <linux/string.h>
+
+#include "siphash.h"
+
+static void SipHash_Rounds(SIPHASH_CTX *ctx, int rounds)
+{
+       while (rounds--) {
+               ctx->v[0] += ctx->v[1];
+               ctx->v[2] += ctx->v[3];
+               ctx->v[1] = rol64(ctx->v[1], 13);
+               ctx->v[3] = rol64(ctx->v[3], 16);
+
+               ctx->v[1] ^= ctx->v[0];
+               ctx->v[3] ^= ctx->v[2];
+               ctx->v[0] = rol64(ctx->v[0], 32);
+
+               ctx->v[2] += ctx->v[1];
+               ctx->v[0] += ctx->v[3];
+               ctx->v[1] = rol64(ctx->v[1], 17);
+               ctx->v[3] = rol64(ctx->v[3], 21);
+
+               ctx->v[1] ^= ctx->v[2];
+               ctx->v[3] ^= ctx->v[0];
+               ctx->v[2] = rol64(ctx->v[2], 32);
+       }
+}
+
+static void SipHash_CRounds(SIPHASH_CTX *ctx, const void *ptr, int rounds)
+{
+       u64 m = get_unaligned_le64(ptr);
+
+       ctx->v[3] ^= m;
+       SipHash_Rounds(ctx, rounds);
+       ctx->v[0] ^= m;
+}
+
+void SipHash_Init(SIPHASH_CTX *ctx, const SIPHASH_KEY *key)
+{
+       u64 k0, k1;
+
+       k0 = le64_to_cpu(key->k0);
+       k1 = le64_to_cpu(key->k1);
+
+       ctx->v[0] = 0x736f6d6570736575ULL ^ k0;
+       ctx->v[1] = 0x646f72616e646f6dULL ^ k1;
+       ctx->v[2] = 0x6c7967656e657261ULL ^ k0;
+       ctx->v[3] = 0x7465646279746573ULL ^ k1;
+
+       memset(ctx->buf, 0, sizeof(ctx->buf));
+       ctx->bytes = 0;
+}
+
+void SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf,
+                   const void *src, size_t len)
+{
+       const u8 *ptr = src;
+       size_t left, used;
+
+       if (len == 0)
+               return;
+
+       used = ctx->bytes % sizeof(ctx->buf);
+       ctx->bytes += len;
+
+       if (used > 0) {
+               left = sizeof(ctx->buf) - used;
+
+               if (len >= left) {
+                       memcpy(&ctx->buf[used], ptr, left);
+                       SipHash_CRounds(ctx, ctx->buf, rc);
+                       len -= left;
+                       ptr += left;
+               } else {
+                       memcpy(&ctx->buf[used], ptr, len);
+                       return;
+               }
+       }
+
+       while (len >= sizeof(ctx->buf)) {
+               SipHash_CRounds(ctx, ptr, rc);
+               len -= sizeof(ctx->buf);
+               ptr += sizeof(ctx->buf);
+       }
+
+       if (len > 0)
+               memcpy(&ctx->buf[used], ptr, len);
+}
+
+void SipHash_Final(void *dst, SIPHASH_CTX *ctx, int rc, int rf)
+{
+       u64 r;
+
+       r = SipHash_End(ctx, rc, rf);
+
+       *((__le64 *) dst) = cpu_to_le64(r);
+}
+
+u64 SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
+{
+       u64 r;
+       size_t left, used;
+
+       used = ctx->bytes % sizeof(ctx->buf);
+       left = sizeof(ctx->buf) - used;
+       memset(&ctx->buf[used], 0, left - 1);
+       ctx->buf[7] = ctx->bytes;
+
+       SipHash_CRounds(ctx, ctx->buf, rc);
+       ctx->v[2] ^= 0xff;
+       SipHash_Rounds(ctx, rf);
+
+       r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]);
+       memset(ctx, 0, sizeof(*ctx));
+       return (r);
+}
+
+u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len)
+{
+       SIPHASH_CTX ctx;
+
+       SipHash_Init(&ctx, key);
+       SipHash_Update(&ctx, rc, rf, src, len);
+       return SipHash_End(&ctx, rc, rf);
+}
diff --git a/libbcachefs/siphash.h b/libbcachefs/siphash.h
new file mode 100644 (file)
index 0000000..3dfaf34
--- /dev/null
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* $OpenBSD: siphash.h,v 1.5 2015/02/20 11:51:03 tedu Exp $ */
+/*-
+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * SipHash is a family of pseudorandom functions (a.k.a. keyed hash functions)
+ * optimized for speed on short messages returning a 64bit hash/digest value.
+ *
+ * The number of rounds is defined during the initialization:
+ *  SipHash24_Init() for the fast and resonable strong version
+ *  SipHash48_Init() for the strong version (half as fast)
+ *
+ * struct SIPHASH_CTX ctx;
+ * SipHash24_Init(&ctx);
+ * SipHash_SetKey(&ctx, "16bytes long key");
+ * SipHash_Update(&ctx, pointer_to_string, length_of_string);
+ * SipHash_Final(output, &ctx);
+ */
+
+#ifndef _SIPHASH_H_
+#define _SIPHASH_H_
+
+#include <linux/types.h>
+
+#define SIPHASH_BLOCK_LENGTH    8
+#define SIPHASH_KEY_LENGTH     16
+#define SIPHASH_DIGEST_LENGTH   8
+
+typedef struct _SIPHASH_CTX {
+       u64             v[4];
+       u8              buf[SIPHASH_BLOCK_LENGTH];
+       u32             bytes;
+} SIPHASH_CTX;
+
+typedef struct {
+       __le64          k0;
+       __le64          k1;
+} SIPHASH_KEY;
+
+void   SipHash_Init(SIPHASH_CTX *, const SIPHASH_KEY *);
+void   SipHash_Update(SIPHASH_CTX *, int, int, const void *, size_t);
+u64    SipHash_End(SIPHASH_CTX *, int, int);
+void   SipHash_Final(void *, SIPHASH_CTX *, int, int);
+u64    SipHash(const SIPHASH_KEY *, int, int, const void *, size_t);
+
+#define SipHash24_Init(_c, _k)         SipHash_Init((_c), (_k))
+#define SipHash24_Update(_c, _p, _l)   SipHash_Update((_c), 2, 4, (_p), (_l))
+#define SipHash24_End(_d)              SipHash_End((_d), 2, 4)
+#define SipHash24_Final(_d, _c)                SipHash_Final((_d), (_c), 2, 4)
+#define SipHash24(_k, _p, _l)          SipHash((_k), 2, 4, (_p), (_l))
+
+#define SipHash48_Init(_c, _k)         SipHash_Init((_c), (_k))
+#define SipHash48_Update(_c, _p, _l)   SipHash_Update((_c), 4, 8, (_p), (_l))
+#define SipHash48_End(_d)              SipHash_End((_d), 4, 8)
+#define SipHash48_Final(_d, _c)                SipHash_Final((_d), (_c), 4, 8)
+#define SipHash48(_k, _p, _l)          SipHash((_k), 4, 8, (_p), (_l))
+
+#endif /* _SIPHASH_H_ */
diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h
new file mode 100644 (file)
index 0000000..dea9b72
--- /dev/null
@@ -0,0 +1,336 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_STR_HASH_H
+#define _BCACHEFS_STR_HASH_H
+
+#include "btree_iter.h"
+#include "btree_update.h"
+#include "checksum.h"
+#include "error.h"
+#include "inode.h"
+#include "siphash.h"
+#include "super.h"
+
+#include <linux/crc32c.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+
+static inline enum bch_str_hash_type
+bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
+{
+       switch (opt) {
+       case BCH_STR_HASH_OPT_CRC32C:
+               return BCH_STR_HASH_CRC32C;
+       case BCH_STR_HASH_OPT_CRC64:
+               return BCH_STR_HASH_CRC64;
+       case BCH_STR_HASH_OPT_SIPHASH:
+               return c->sb.features & (1ULL << BCH_FEATURE_new_siphash)
+                       ? BCH_STR_HASH_SIPHASH
+                       : BCH_STR_HASH_SIPHASH_OLD;
+       default:
+            BUG();
+       }
+}
+
+struct bch_hash_info {
+       u8                      type;
+       union {
+               __le64          crc_key;
+               SIPHASH_KEY     siphash_key;
+       };
+};
+
+static inline struct bch_hash_info
+bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
+{
+       /* XXX ick */
+       struct bch_hash_info info = {
+               .type = (bi->bi_flags >> INODE_STR_HASH_OFFSET) &
+                       ~(~0U << INODE_STR_HASH_BITS),
+               .crc_key = bi->bi_hash_seed,
+       };
+
+       if (unlikely(info.type == BCH_STR_HASH_SIPHASH_OLD)) {
+               SHASH_DESC_ON_STACK(desc, c->sha256);
+               u8 digest[SHA256_DIGEST_SIZE];
+
+               desc->tfm = c->sha256;
+
+               crypto_shash_digest(desc, (void *) &bi->bi_hash_seed,
+                                   sizeof(bi->bi_hash_seed), digest);
+               memcpy(&info.siphash_key, digest, sizeof(info.siphash_key));
+       }
+
+       return info;
+}
+
+struct bch_str_hash_ctx {
+       union {
+               u32             crc32c;
+               u64             crc64;
+               SIPHASH_CTX     siphash;
+       };
+};
+
+static inline void bch2_str_hash_init(struct bch_str_hash_ctx *ctx,
+                                    const struct bch_hash_info *info)
+{
+       switch (info->type) {
+       case BCH_STR_HASH_CRC32C:
+               ctx->crc32c = crc32c(~0, &info->crc_key, sizeof(info->crc_key));
+               break;
+       case BCH_STR_HASH_CRC64:
+               ctx->crc64 = crc64_be(~0, &info->crc_key, sizeof(info->crc_key));
+               break;
+       case BCH_STR_HASH_SIPHASH_OLD:
+       case BCH_STR_HASH_SIPHASH:
+               SipHash24_Init(&ctx->siphash, &info->siphash_key);
+               break;
+       default:
+               BUG();
+       }
+}
+
+static inline void bch2_str_hash_update(struct bch_str_hash_ctx *ctx,
+                                      const struct bch_hash_info *info,
+                                      const void *data, size_t len)
+{
+       switch (info->type) {
+       case BCH_STR_HASH_CRC32C:
+               ctx->crc32c = crc32c(ctx->crc32c, data, len);
+               break;
+       case BCH_STR_HASH_CRC64:
+               ctx->crc64 = crc64_be(ctx->crc64, data, len);
+               break;
+       case BCH_STR_HASH_SIPHASH_OLD:
+       case BCH_STR_HASH_SIPHASH:
+               SipHash24_Update(&ctx->siphash, data, len);
+               break;
+       default:
+               BUG();
+       }
+}
+
+static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx,
+                                  const struct bch_hash_info *info)
+{
+       switch (info->type) {
+       case BCH_STR_HASH_CRC32C:
+               return ctx->crc32c;
+       case BCH_STR_HASH_CRC64:
+               return ctx->crc64 >> 1;
+       case BCH_STR_HASH_SIPHASH_OLD:
+       case BCH_STR_HASH_SIPHASH:
+               return SipHash24_End(&ctx->siphash) >> 1;
+       default:
+               BUG();
+       }
+}
+
+struct bch_hash_desc {
+       enum btree_id   btree_id;
+       u8              key_type;
+
+       u64             (*hash_key)(const struct bch_hash_info *, const void *);
+       u64             (*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c);
+       bool            (*cmp_key)(struct bkey_s_c, const void *);
+       bool            (*cmp_bkey)(struct bkey_s_c, struct bkey_s_c);
+};
+
+static __always_inline struct btree_iter *
+bch2_hash_lookup(struct btree_trans *trans,
+                const struct bch_hash_desc desc,
+                const struct bch_hash_info *info,
+                u64 inode, const void *key,
+                unsigned flags)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret;
+
+       for_each_btree_key(trans, iter, desc.btree_id,
+                          POS(inode, desc.hash_key(info, key)),
+                          BTREE_ITER_SLOTS|flags, k, ret) {
+               if (iter->pos.inode != inode)
+                       break;
+
+               if (k.k->type == desc.key_type) {
+                       if (!desc.cmp_key(k, key))
+                               return iter;
+               } else if (k.k->type == KEY_TYPE_whiteout) {
+                       ;
+               } else {
+                       /* hole, not found */
+                       break;
+               }
+       }
+       bch2_trans_iter_put(trans, iter);
+
+       return ERR_PTR(ret ?: -ENOENT);
+}
+
+static __always_inline struct btree_iter *
+bch2_hash_hole(struct btree_trans *trans,
+              const struct bch_hash_desc desc,
+              const struct bch_hash_info *info,
+              u64 inode, const void *key)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret;
+
+       for_each_btree_key(trans, iter, desc.btree_id,
+                          POS(inode, desc.hash_key(info, key)),
+                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
+               if (iter->pos.inode != inode)
+                       break;
+
+               if (k.k->type != desc.key_type)
+                       return iter;
+       }
+
+       iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
+       bch2_trans_iter_put(trans, iter);
+
+       return ERR_PTR(ret ?: -ENOSPC);
+}
+
+static __always_inline
+int bch2_hash_needs_whiteout(struct btree_trans *trans,
+                            const struct bch_hash_desc desc,
+                            const struct bch_hash_info *info,
+                            struct btree_iter *start)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret;
+
+       iter = bch2_trans_copy_iter(trans, start);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter);
+
+       bch2_btree_iter_next_slot(iter);
+
+       for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) {
+               if (k.k->type != desc.key_type &&
+                   k.k->type != KEY_TYPE_whiteout)
+                       break;
+
+               if (k.k->type == desc.key_type &&
+                   desc.hash_bkey(info, k) <= start->pos.offset) {
+                       iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
+                       ret = 1;
+                       break;
+               }
+       }
+
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+static __always_inline
+int bch2_hash_set(struct btree_trans *trans,
+                 const struct bch_hash_desc desc,
+                 const struct bch_hash_info *info,
+                 u64 inode, struct bkey_i *insert, int flags)
+{
+       struct btree_iter *iter, *slot = NULL;
+       struct bkey_s_c k;
+       bool found = false;
+       int ret;
+
+       for_each_btree_key(trans, iter, desc.btree_id,
+                          POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))),
+                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
+               if (iter->pos.inode != inode)
+                       break;
+
+               if (k.k->type == desc.key_type) {
+                       if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
+                               goto found;
+
+                       /* hash collision: */
+                       continue;
+               }
+
+               if (!slot &&
+                   !(flags & BCH_HASH_SET_MUST_REPLACE)) {
+                       slot = bch2_trans_copy_iter(trans, iter);
+                       if (IS_ERR(slot))
+                               return PTR_ERR(slot);
+               }
+
+               if (k.k->type != KEY_TYPE_whiteout)
+                       goto not_found;
+       }
+
+       if (!ret)
+               ret = -ENOSPC;
+out:
+       bch2_trans_iter_put(trans, slot);
+       bch2_trans_iter_put(trans, iter);
+
+       return ret;
+found:
+       found = true;
+not_found:
+
+       if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) {
+               ret = -ENOENT;
+       } else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) {
+               ret = -EEXIST;
+       } else {
+               if (!found && slot)
+                       swap(iter, slot);
+
+               insert->k.p = iter->pos;
+               bch2_trans_update(trans, iter, insert, 0);
+       }
+
+       goto out;
+}
+
+static __always_inline
+int bch2_hash_delete_at(struct btree_trans *trans,
+                       const struct bch_hash_desc desc,
+                       const struct bch_hash_info *info,
+                       struct btree_iter *iter)
+{
+       struct bkey_i *delete;
+       int ret;
+
+       ret = bch2_hash_needs_whiteout(trans, desc, info, iter);
+       if (ret < 0)
+               return ret;
+
+       delete = bch2_trans_kmalloc(trans, sizeof(*delete));
+       if (IS_ERR(delete))
+               return PTR_ERR(delete);
+
+       bkey_init(&delete->k);
+       delete->k.p = iter->pos;
+       delete->k.type = ret ? KEY_TYPE_whiteout : KEY_TYPE_deleted;
+
+       bch2_trans_update(trans, iter, delete, 0);
+       return 0;
+}
+
+static __always_inline
+int bch2_hash_delete(struct btree_trans *trans,
+                    const struct bch_hash_desc desc,
+                    const struct bch_hash_info *info,
+                    u64 inode, const void *key)
+{
+       struct btree_iter *iter;
+       int ret;
+
+       iter = bch2_hash_lookup(trans, desc, info, inode, key,
+                               BTREE_ITER_INTENT);
+       if (IS_ERR(iter))
+               return PTR_ERR(iter);
+
+       ret = bch2_hash_delete_at(trans, desc, info, iter);
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+#endif /* _BCACHEFS_STR_HASH_H */
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
new file mode 100644 (file)
index 0000000..cee6cc9
--- /dev/null
@@ -0,0 +1,1158 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "checksum.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "error.h"
+#include "io.h"
+#include "journal.h"
+#include "journal_seq_blacklist.h"
+#include "replicas.h"
+#include "quota.h"
+#include "super-io.h"
+#include "super.h"
+#include "vstructs.h"
+
+#include <linux/backing-dev.h>
+#include <linux/sort.h>
+
+const char * const bch2_sb_fields[] = {
+#define x(name, nr)    #name,
+       BCH_SB_FIELDS()
+#undef x
+       NULL
+};
+
+static const char *bch2_sb_field_validate(struct bch_sb *,
+                                         struct bch_sb_field *);
+
+struct bch_sb_field *bch2_sb_field_get(struct bch_sb *sb,
+                                     enum bch_sb_field_type type)
+{
+       struct bch_sb_field *f;
+
+       /* XXX: need locking around superblock to access optional fields */
+
+       vstruct_for_each(sb, f)
+               if (le32_to_cpu(f->type) == type)
+                       return f;
+       return NULL;
+}
+
+static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
+                                                  struct bch_sb_field *f,
+                                                  unsigned u64s)
+{
+       unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0;
+       unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s;
+
+       BUG_ON(get_order(__vstruct_bytes(struct bch_sb, sb_u64s)) >
+              sb->page_order);
+
+       if (!f && !u64s) {
+               /* nothing to do: */
+       } else if (!f) {
+               f = vstruct_last(sb->sb);
+               memset(f, 0, sizeof(u64) * u64s);
+               f->u64s = cpu_to_le32(u64s);
+               f->type = 0;
+       } else {
+               void *src, *dst;
+
+               src = vstruct_end(f);
+
+               if (u64s) {
+                       f->u64s = cpu_to_le32(u64s);
+                       dst = vstruct_end(f);
+               } else {
+                       dst = f;
+               }
+
+               memmove(dst, src, vstruct_end(sb->sb) - src);
+
+               if (dst > src)
+                       memset(src, 0, dst - src);
+       }
+
+       sb->sb->u64s = cpu_to_le32(sb_u64s);
+
+       return u64s ? f : NULL;
+}
+
+void bch2_sb_field_delete(struct bch_sb_handle *sb,
+                         enum bch_sb_field_type type)
+{
+       struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
+
+       if (f)
+               __bch2_sb_field_resize(sb, f, 0);
+}
+
+/* Superblock realloc/free: */
+
+void bch2_free_super(struct bch_sb_handle *sb)
+{
+       if (sb->bio)
+               bio_put(sb->bio);
+       if (!IS_ERR_OR_NULL(sb->bdev))
+               blkdev_put(sb->bdev, sb->mode);
+
+       free_pages((unsigned long) sb->sb, sb->page_order);
+       memset(sb, 0, sizeof(*sb));
+}
+
+int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
+{
+       size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s);
+       unsigned order = get_order(new_bytes);
+       struct bch_sb *new_sb;
+       struct bio *bio;
+
+       if (sb->sb && sb->page_order >= order)
+               return 0;
+
+       if (sb->have_layout) {
+               u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
+
+               if (new_bytes > max_bytes) {
+                       char buf[BDEVNAME_SIZE];
+
+                       pr_err("%s: superblock too big: want %zu but have %llu",
+                              bdevname(sb->bdev, buf), new_bytes, max_bytes);
+                       return -ENOSPC;
+               }
+       }
+
+       if (sb->page_order >= order && sb->sb)
+               return 0;
+
+       if (dynamic_fault("bcachefs:add:super_realloc"))
+               return -ENOMEM;
+
+       if (sb->have_bio) {
+               bio = bio_kmalloc(GFP_KERNEL, 1 << order);
+               if (!bio)
+                       return -ENOMEM;
+
+               if (sb->bio)
+                       bio_put(sb->bio);
+               sb->bio = bio;
+       }
+
+       new_sb = (void *) __get_free_pages(GFP_NOFS|__GFP_ZERO, order);
+       if (!new_sb)
+               return -ENOMEM;
+
+       if (sb->sb)
+               memcpy(new_sb, sb->sb, PAGE_SIZE << sb->page_order);
+
+       free_pages((unsigned long) sb->sb, sb->page_order);
+       sb->sb = new_sb;
+
+       sb->page_order = order;
+
+       return 0;
+}
+
+struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
+                                         enum bch_sb_field_type type,
+                                         unsigned u64s)
+{
+       struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
+       ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
+       ssize_t d = -old_u64s + u64s;
+
+       if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
+               return NULL;
+
+       if (sb->fs_sb) {
+               struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb);
+               struct bch_dev *ca;
+               unsigned i;
+
+               lockdep_assert_held(&c->sb_lock);
+
+               /* XXX: we're not checking that offline device have enough space */
+
+               for_each_online_member(ca, c, i) {
+                       struct bch_sb_handle *sb = &ca->disk_sb;
+
+                       if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
+                               percpu_ref_put(&ca->ref);
+                               return NULL;
+                       }
+               }
+       }
+
+       f = bch2_sb_field_get(sb->sb, type);
+       f = __bch2_sb_field_resize(sb, f, u64s);
+       if (f)
+               f->type = cpu_to_le32(type);
+       return f;
+}
+
+/* Superblock validate: */
+
+static inline void __bch2_sb_layout_size_assert(void)
+{
+       BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512);
+}
+
+static const char *validate_sb_layout(struct bch_sb_layout *layout)
+{
+       u64 offset, prev_offset, max_sectors;
+       unsigned i;
+
+       if (uuid_le_cmp(layout->magic, BCACHE_MAGIC))
+               return "Not a bcachefs superblock layout";
+
+       if (layout->layout_type != 0)
+               return "Invalid superblock layout type";
+
+       if (!layout->nr_superblocks)
+               return "Invalid superblock layout: no superblocks";
+
+       if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset))
+               return "Invalid superblock layout: too many superblocks";
+
+       max_sectors = 1 << layout->sb_max_size_bits;
+
+       prev_offset = le64_to_cpu(layout->sb_offset[0]);
+
+       for (i = 1; i < layout->nr_superblocks; i++) {
+               offset = le64_to_cpu(layout->sb_offset[i]);
+
+               if (offset < prev_offset + max_sectors)
+                       return "Invalid superblock layout: superblocks overlap";
+               prev_offset = offset;
+       }
+
+       return NULL;
+}
+
+const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
+{
+       struct bch_sb *sb = disk_sb->sb;
+       struct bch_sb_field *f;
+       struct bch_sb_field_members *mi;
+       const char *err;
+       u32 version, version_min;
+       u16 block_size;
+
+       version         = le16_to_cpu(sb->version);
+       version_min     = version >= bcachefs_metadata_version_new_versioning
+               ? le16_to_cpu(sb->version_min)
+               : version;
+
+       if (version    >= bcachefs_metadata_version_max ||
+           version_min < bcachefs_metadata_version_min)
+               return "Unsupported superblock version";
+
+       if (version_min > version)
+               return "Bad minimum version";
+
+       if (sb->features[1] ||
+           (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR)))
+               return "Filesystem has incompatible features";
+
+       block_size = le16_to_cpu(sb->block_size);
+
+       if (!is_power_of_2(block_size) ||
+           block_size > PAGE_SECTORS)
+               return "Bad block size";
+
+       if (bch2_is_zero(sb->user_uuid.b, sizeof(uuid_le)))
+               return "Bad user UUID";
+
+       if (bch2_is_zero(sb->uuid.b, sizeof(uuid_le)))
+               return "Bad internal UUID";
+
+       if (!sb->nr_devices ||
+           sb->nr_devices <= sb->dev_idx ||
+           sb->nr_devices > BCH_SB_MEMBERS_MAX)
+               return "Bad number of member devices";
+
+       if (!BCH_SB_META_REPLICAS_WANT(sb) ||
+           BCH_SB_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
+               return "Invalid number of metadata replicas";
+
+       if (!BCH_SB_META_REPLICAS_REQ(sb) ||
+           BCH_SB_META_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
+               return "Invalid number of metadata replicas";
+
+       if (!BCH_SB_DATA_REPLICAS_WANT(sb) ||
+           BCH_SB_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
+               return "Invalid number of data replicas";
+
+       if (!BCH_SB_DATA_REPLICAS_REQ(sb) ||
+           BCH_SB_DATA_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
+               return "Invalid number of data replicas";
+
+       if (BCH_SB_META_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
+               return "Invalid metadata checksum type";
+
+       if (BCH_SB_DATA_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR)
+               return "Invalid metadata checksum type";
+
+       if (BCH_SB_COMPRESSION_TYPE(sb) >= BCH_COMPRESSION_OPT_NR)
+               return "Invalid compression type";
+
+       if (!BCH_SB_BTREE_NODE_SIZE(sb))
+               return "Btree node size not set";
+
+       if (!is_power_of_2(BCH_SB_BTREE_NODE_SIZE(sb)))
+               return "Btree node size not a power of two";
+
+       if (BCH_SB_GC_RESERVE(sb) < 5)
+               return "gc reserve percentage too small";
+
+       if (!sb->time_precision ||
+           le32_to_cpu(sb->time_precision) > NSEC_PER_SEC)
+               return "invalid time precision";
+
+       /* validate layout */
+       err = validate_sb_layout(&sb->layout);
+       if (err)
+               return err;
+
+       vstruct_for_each(sb, f) {
+               if (!f->u64s)
+                       return "Invalid superblock: invalid optional field";
+
+               if (vstruct_next(f) > vstruct_last(sb))
+                       return "Invalid superblock: invalid optional field";
+       }
+
+       /* members must be validated first: */
+       mi = bch2_sb_get_members(sb);
+       if (!mi)
+               return "Invalid superblock: member info area missing";
+
+       err = bch2_sb_field_validate(sb, &mi->field);
+       if (err)
+               return err;
+
+       vstruct_for_each(sb, f) {
+               if (le32_to_cpu(f->type) == BCH_SB_FIELD_members)
+                       continue;
+
+               err = bch2_sb_field_validate(sb, f);
+               if (err)
+                       return err;
+       }
+
+       return NULL;
+}
+
+/* device open: */
+
+static void bch2_sb_update(struct bch_fs *c)
+{
+       struct bch_sb *src = c->disk_sb.sb;
+       struct bch_sb_field_members *mi = bch2_sb_get_members(src);
+       struct bch_dev *ca;
+       unsigned i;
+
+       lockdep_assert_held(&c->sb_lock);
+
+       c->sb.uuid              = src->uuid;
+       c->sb.user_uuid         = src->user_uuid;
+       c->sb.version           = le16_to_cpu(src->version);
+       c->sb.nr_devices        = src->nr_devices;
+       c->sb.clean             = BCH_SB_CLEAN(src);
+       c->sb.encryption_type   = BCH_SB_ENCRYPTION_TYPE(src);
+       c->sb.encoded_extent_max= 1 << BCH_SB_ENCODED_EXTENT_MAX_BITS(src);
+       c->sb.time_base_lo      = le64_to_cpu(src->time_base_lo);
+       c->sb.time_base_hi      = le32_to_cpu(src->time_base_hi);
+       c->sb.time_precision    = le32_to_cpu(src->time_precision);
+       c->sb.features          = le64_to_cpu(src->features[0]);
+       c->sb.compat            = le64_to_cpu(src->compat[0]);
+
+       for_each_member_device(ca, c, i)
+               ca->mi = bch2_mi_to_cpu(mi->members + i);
+}
+
+/* doesn't copy member info */
+static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
+{
+       struct bch_sb_field *src_f, *dst_f;
+       struct bch_sb *dst = dst_handle->sb;
+       unsigned i;
+
+       dst->version            = src->version;
+       dst->version_min        = src->version_min;
+       dst->seq                = src->seq;
+       dst->uuid               = src->uuid;
+       dst->user_uuid          = src->user_uuid;
+       memcpy(dst->label,      src->label, sizeof(dst->label));
+
+       dst->block_size         = src->block_size;
+       dst->nr_devices         = src->nr_devices;
+
+       dst->time_base_lo       = src->time_base_lo;
+       dst->time_base_hi       = src->time_base_hi;
+       dst->time_precision     = src->time_precision;
+
+       memcpy(dst->flags,      src->flags,     sizeof(dst->flags));
+       memcpy(dst->features,   src->features,  sizeof(dst->features));
+       memcpy(dst->compat,     src->compat,    sizeof(dst->compat));
+
+       for (i = 0; i < BCH_SB_FIELD_NR; i++) {
+               if (i == BCH_SB_FIELD_journal)
+                       continue;
+
+               src_f = bch2_sb_field_get(src, i);
+               dst_f = bch2_sb_field_get(dst, i);
+               dst_f = __bch2_sb_field_resize(dst_handle, dst_f,
+                               src_f ? le32_to_cpu(src_f->u64s) : 0);
+
+               if (src_f)
+                       memcpy(dst_f, src_f, vstruct_bytes(src_f));
+       }
+}
+
+int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src)
+{
+       struct bch_sb_field_journal *journal_buckets =
+               bch2_sb_get_journal(src);
+       unsigned journal_u64s = journal_buckets
+               ? le32_to_cpu(journal_buckets->field.u64s)
+               : 0;
+       int ret;
+
+       lockdep_assert_held(&c->sb_lock);
+
+       ret = bch2_sb_realloc(&c->disk_sb,
+                             le32_to_cpu(src->u64s) - journal_u64s);
+       if (ret)
+               return ret;
+
+       __copy_super(&c->disk_sb, src);
+
+       ret = bch2_sb_replicas_to_cpu_replicas(c);
+       if (ret)
+               return ret;
+
+       ret = bch2_sb_disk_groups_to_cpu(c);
+       if (ret)
+               return ret;
+
+       bch2_sb_update(c);
+       return 0;
+}
+
+int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
+{
+       struct bch_sb *src = c->disk_sb.sb, *dst = ca->disk_sb.sb;
+       struct bch_sb_field_journal *journal_buckets =
+               bch2_sb_get_journal(dst);
+       unsigned journal_u64s = journal_buckets
+               ? le32_to_cpu(journal_buckets->field.u64s)
+               : 0;
+       unsigned u64s = le32_to_cpu(src->u64s) + journal_u64s;
+       int ret;
+
+       ret = bch2_sb_realloc(&ca->disk_sb, u64s);
+       if (ret)
+               return ret;
+
+       __copy_super(&ca->disk_sb, src);
+       return 0;
+}
+
+/* read superblock: */
+
+static const char *read_one_super(struct bch_sb_handle *sb, u64 offset)
+{
+       struct bch_csum csum;
+       size_t bytes;
+reread:
+       bio_reset(sb->bio);
+       bio_set_dev(sb->bio, sb->bdev);
+       sb->bio->bi_iter.bi_sector = offset;
+       bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
+       bch2_bio_map(sb->bio, sb->sb, PAGE_SIZE << sb->page_order);
+
+       if (submit_bio_wait(sb->bio))
+               return "IO error";
+
+       if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC))
+               return "Not a bcachefs superblock";
+
+       if (le16_to_cpu(sb->sb->version) <  bcachefs_metadata_version_min ||
+           le16_to_cpu(sb->sb->version) >= bcachefs_metadata_version_max)
+               return "Unsupported superblock version";
+
+       bytes = vstruct_bytes(sb->sb);
+
+       if (bytes > 512 << sb->sb->layout.sb_max_size_bits)
+               return "Bad superblock: too big";
+
+       if (get_order(bytes) > sb->page_order) {
+               if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s)))
+                       return "cannot allocate memory";
+               goto reread;
+       }
+
+       if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR)
+               return "unknown csum type";
+
+       /* XXX: verify MACs */
+       csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
+                           null_nonce(), sb->sb);
+
+       if (bch2_crc_cmp(csum, sb->sb->csum))
+               return "bad checksum reading superblock";
+
+       sb->seq = le64_to_cpu(sb->sb->seq);
+
+       return NULL;
+}
+
+int bch2_read_super(const char *path, struct bch_opts *opts,
+                   struct bch_sb_handle *sb)
+{
+       u64 offset = opt_get(*opts, sb);
+       struct bch_sb_layout layout;
+       const char *err;
+       __le64 *i;
+       int ret;
+
+       pr_verbose_init(*opts, "");
+
+       memset(sb, 0, sizeof(*sb));
+       sb->mode        = FMODE_READ;
+       sb->have_bio    = true;
+
+       if (!opt_get(*opts, noexcl))
+               sb->mode |= FMODE_EXCL;
+
+       if (!opt_get(*opts, nochanges))
+               sb->mode |= FMODE_WRITE;
+
+       sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
+       if (IS_ERR(sb->bdev) &&
+           PTR_ERR(sb->bdev) == -EACCES &&
+           opt_get(*opts, read_only)) {
+               sb->mode &= ~FMODE_WRITE;
+
+               sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
+               if (!IS_ERR(sb->bdev))
+                       opt_set(*opts, nochanges, true);
+       }
+
+       if (IS_ERR(sb->bdev)) {
+               ret = PTR_ERR(sb->bdev);
+               goto out;
+       }
+
+       err = "cannot allocate memory";
+       ret = bch2_sb_realloc(sb, 0);
+       if (ret)
+               goto err;
+
+       ret = -EFAULT;
+       err = "dynamic fault";
+       if (bch2_fs_init_fault("read_super"))
+               goto err;
+
+       ret = -EINVAL;
+       err = read_one_super(sb, offset);
+       if (!err)
+               goto got_super;
+
+       if (opt_defined(*opts, sb))
+               goto err;
+
+       pr_err("error reading default superblock: %s", err);
+
+       /*
+        * Error reading primary superblock - read location of backup
+        * superblocks:
+        */
+       bio_reset(sb->bio);
+       bio_set_dev(sb->bio, sb->bdev);
+       sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
+       bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
+       /*
+        * use sb buffer to read layout, since sb buffer is page aligned but
+        * layout won't be:
+        */
+       bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout));
+
+       err = "IO error";
+       if (submit_bio_wait(sb->bio))
+               goto err;
+
+       memcpy(&layout, sb->sb, sizeof(layout));
+       err = validate_sb_layout(&layout);
+       if (err)
+               goto err;
+
+       for (i = layout.sb_offset;
+            i < layout.sb_offset + layout.nr_superblocks; i++) {
+               offset = le64_to_cpu(*i);
+
+               if (offset == opt_get(*opts, sb))
+                       continue;
+
+               err = read_one_super(sb, offset);
+               if (!err)
+                       goto got_super;
+       }
+
+       ret = -EINVAL;
+       goto err;
+
+got_super:
+       err = "Superblock block size smaller than device block size";
+       ret = -EINVAL;
+       if (le16_to_cpu(sb->sb->block_size) << 9 <
+           bdev_logical_block_size(sb->bdev))
+               goto err;
+
+       if (sb->mode & FMODE_WRITE)
+               bdev_get_queue(sb->bdev)->backing_dev_info->capabilities
+                       |= BDI_CAP_STABLE_WRITES;
+       ret = 0;
+       sb->have_layout = true;
+out:
+       pr_verbose_init(*opts, "ret %i", ret);
+       return ret;
+err:
+       bch2_free_super(sb);
+       pr_err("error reading superblock: %s", err);
+       goto out;
+}
+
+/* write superblock: */
+
+static void write_super_endio(struct bio *bio)
+{
+       struct bch_dev *ca = bio->bi_private;
+
+       /* XXX: return errors directly */
+
+       if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write: %s",
+                              bch2_blk_status_to_str(bio->bi_status)))
+               ca->sb_write_error = 1;
+
+       closure_put(&ca->fs->sb_write);
+       percpu_ref_put(&ca->io_ref);
+}
+
+static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
+{
+       struct bch_sb *sb = ca->disk_sb.sb;
+       struct bio *bio = ca->disk_sb.bio;
+
+       bio_reset(bio);
+       bio_set_dev(bio, ca->disk_sb.bdev);
+       bio->bi_iter.bi_sector  = le64_to_cpu(sb->layout.sb_offset[0]);
+       bio->bi_end_io          = write_super_endio;
+       bio->bi_private         = ca;
+       bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC|REQ_META);
+       bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE);
+
+       this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb],
+                    bio_sectors(bio));
+
+       percpu_ref_get(&ca->io_ref);
+       closure_bio_submit(bio, &c->sb_write);
+}
+
+static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
+{
+       struct bch_sb *sb = ca->disk_sb.sb;
+       struct bio *bio = ca->disk_sb.bio;
+
+       sb->offset = sb->layout.sb_offset[idx];
+
+       SET_BCH_SB_CSUM_TYPE(sb, c->opts.metadata_checksum);
+       sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb),
+                               null_nonce(), sb);
+
+       bio_reset(bio);
+       bio_set_dev(bio, ca->disk_sb.bdev);
+       bio->bi_iter.bi_sector  = le64_to_cpu(sb->offset);
+       bio->bi_end_io          = write_super_endio;
+       bio->bi_private         = ca;
+       bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
+       bch2_bio_map(bio, sb,
+                    roundup((size_t) vstruct_bytes(sb),
+                            bdev_logical_block_size(ca->disk_sb.bdev)));
+
+       this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb],
+                    bio_sectors(bio));
+
+       percpu_ref_get(&ca->io_ref);
+       closure_bio_submit(bio, &c->sb_write);
+}
+
+int bch2_write_super(struct bch_fs *c)
+{
+       struct closure *cl = &c->sb_write;
+       struct bch_dev *ca;
+       unsigned i, sb = 0, nr_wrote;
+       const char *err;
+       struct bch_devs_mask sb_written;
+       bool wrote, can_mount_without_written, can_mount_with_written;
+       int ret = 0;
+
+       lockdep_assert_held(&c->sb_lock);
+
+       closure_init_stack(cl);
+       memset(&sb_written, 0, sizeof(sb_written));
+
+       le64_add_cpu(&c->disk_sb.sb->seq, 1);
+
+       if (test_bit(BCH_FS_ERROR, &c->flags))
+               SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1);
+
+       for_each_online_member(ca, c, i)
+               bch2_sb_from_fs(c, ca);
+
+       for_each_online_member(ca, c, i) {
+               err = bch2_sb_validate(&ca->disk_sb);
+               if (err) {
+                       bch2_fs_inconsistent(c, "sb invalid before write: %s", err);
+                       ret = -1;
+                       goto out;
+               }
+       }
+
+       if (c->opts.nochanges)
+               goto out;
+
+       for_each_online_member(ca, c, i) {
+               __set_bit(ca->dev_idx, sb_written.d);
+               ca->sb_write_error = 0;
+       }
+
+       for_each_online_member(ca, c, i)
+               read_back_super(c, ca);
+       closure_sync(cl);
+
+       for_each_online_member(ca, c, i) {
+               if (!ca->sb_write_error &&
+                   ca->disk_sb.seq !=
+                   le64_to_cpu(ca->sb_read_scratch->seq)) {
+                       bch2_fs_fatal_error(c,
+                               "Superblock modified by another process");
+                       percpu_ref_put(&ca->io_ref);
+                       ret = -EROFS;
+                       goto out;
+               }
+       }
+
+       do {
+               wrote = false;
+               for_each_online_member(ca, c, i)
+                       if (!ca->sb_write_error &&
+                           sb < ca->disk_sb.sb->layout.nr_superblocks) {
+                               write_one_super(c, ca, sb);
+                               wrote = true;
+                       }
+               closure_sync(cl);
+               sb++;
+       } while (wrote);
+
+       for_each_online_member(ca, c, i) {
+               if (ca->sb_write_error)
+                       __clear_bit(ca->dev_idx, sb_written.d);
+               else
+                       ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq);
+       }
+
+       nr_wrote = dev_mask_nr(&sb_written);
+
+       can_mount_with_written =
+               bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
+                                     BCH_FORCE_IF_DEGRADED);
+
+       for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
+               sb_written.d[i] = ~sb_written.d[i];
+
+       can_mount_without_written =
+               bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
+                                     BCH_FORCE_IF_DEGRADED);
+
+       /*
+        * If we would be able to mount _without_ the devices we successfully
+        * wrote superblocks to, we weren't able to write to enough devices:
+        *
+        * Exception: if we can mount without the successes because we haven't
+        * written anything (new filesystem), we continue if we'd be able to
+        * mount with the devices we did successfully write to:
+        */
+       if (bch2_fs_fatal_err_on(!nr_wrote ||
+                                (can_mount_without_written &&
+                                 !can_mount_with_written), c,
+               "Unable to write superblock to sufficient devices"))
+               ret = -1;
+out:
+       /* Make new options visible after they're persistent: */
+       bch2_sb_update(c);
+       return ret;
+}
+
+void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
+{
+       mutex_lock(&c->sb_lock);
+       if (!(c->sb.features & (1ULL << feat))) {
+               c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat);
+
+               bch2_write_super(c);
+       }
+       mutex_unlock(&c->sb_lock);
+}
+
+/* BCH_SB_FIELD_journal: */
+
+static int u64_cmp(const void *_l, const void *_r)
+{
+       u64 l = *((const u64 *) _l), r = *((const u64 *) _r);
+
+       return l < r ? -1 : l > r ? 1 : 0;
+}
+
+static const char *bch2_sb_validate_journal(struct bch_sb *sb,
+                                           struct bch_sb_field *f)
+{
+       struct bch_sb_field_journal *journal = field_to_type(f, journal);
+       struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx;
+       const char *err;
+       unsigned nr;
+       unsigned i;
+       u64 *b;
+
+       journal = bch2_sb_get_journal(sb);
+       if (!journal)
+               return NULL;
+
+       nr = bch2_nr_journal_buckets(journal);
+       if (!nr)
+               return NULL;
+
+       b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL);
+       if (!b)
+               return "cannot allocate memory";
+
+       for (i = 0; i < nr; i++)
+               b[i] = le64_to_cpu(journal->buckets[i]);
+
+       sort(b, nr, sizeof(u64), u64_cmp, NULL);
+
+       err = "journal bucket at sector 0";
+       if (!b[0])
+               goto err;
+
+       err = "journal bucket before first bucket";
+       if (m && b[0] < le16_to_cpu(m->first_bucket))
+               goto err;
+
+       err = "journal bucket past end of device";
+       if (m && b[nr - 1] >= le64_to_cpu(m->nbuckets))
+               goto err;
+
+       err = "duplicate journal buckets";
+       for (i = 0; i + 1 < nr; i++)
+               if (b[i] == b[i + 1])
+                       goto err;
+
+       err = NULL;
+err:
+       kfree(b);
+       return err;
+}
+
+static const struct bch_sb_field_ops bch_sb_field_ops_journal = {
+       .validate       = bch2_sb_validate_journal,
+};
+
+/* BCH_SB_FIELD_members: */
+
+static const char *bch2_sb_validate_members(struct bch_sb *sb,
+                                           struct bch_sb_field *f)
+{
+       struct bch_sb_field_members *mi = field_to_type(f, members);
+       struct bch_member *m;
+
+       if ((void *) (mi->members + sb->nr_devices) >
+           vstruct_end(&mi->field))
+               return "Invalid superblock: bad member info";
+
+       for (m = mi->members;
+            m < mi->members + sb->nr_devices;
+            m++) {
+               if (!bch2_member_exists(m))
+                       continue;
+
+               if (le64_to_cpu(m->nbuckets) > LONG_MAX)
+                       return "Too many buckets";
+
+               if (le64_to_cpu(m->nbuckets) -
+                   le16_to_cpu(m->first_bucket) < BCH_MIN_NR_NBUCKETS)
+                       return "Not enough buckets";
+
+               if (le16_to_cpu(m->bucket_size) <
+                   le16_to_cpu(sb->block_size))
+                       return "bucket size smaller than block size";
+
+               if (le16_to_cpu(m->bucket_size) <
+                   BCH_SB_BTREE_NODE_SIZE(sb))
+                       return "bucket size smaller than btree node size";
+       }
+
+       return NULL;
+}
+
+static const struct bch_sb_field_ops bch_sb_field_ops_members = {
+       .validate       = bch2_sb_validate_members,
+};
+
+/* BCH_SB_FIELD_crypt: */
+
+static const char *bch2_sb_validate_crypt(struct bch_sb *sb,
+                                         struct bch_sb_field *f)
+{
+       struct bch_sb_field_crypt *crypt = field_to_type(f, crypt);
+
+       if (vstruct_bytes(&crypt->field) != sizeof(*crypt))
+               return "invalid field crypt: wrong size";
+
+       if (BCH_CRYPT_KDF_TYPE(crypt))
+               return "invalid field crypt: bad kdf type";
+
+       return NULL;
+}
+
+static const struct bch_sb_field_ops bch_sb_field_ops_crypt = {
+       .validate       = bch2_sb_validate_crypt,
+};
+
+/* BCH_SB_FIELD_clean: */
+
+void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
+{
+       struct jset_entry *entry;
+
+       for (entry = clean->start;
+            entry < (struct jset_entry *) vstruct_end(&clean->field);
+            entry = vstruct_next(entry))
+               bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write);
+}
+
+int bch2_fs_mark_dirty(struct bch_fs *c)
+{
+       int ret;
+
+       /*
+        * Unconditionally write superblock, to verify it hasn't changed before
+        * we go rw:
+        */
+
+       mutex_lock(&c->sb_lock);
+       SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+       c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
+       c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates;
+       c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_updates_journalled;
+       ret = bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       return ret;
+}
+
+static void
+entry_init_u64s(struct jset_entry *entry, unsigned u64s)
+{
+       memset(entry, 0, u64s * sizeof(u64));
+
+       /*
+        * The u64s field counts from the start of data, ignoring the shared
+        * fields.
+        */
+       entry->u64s = u64s - 1;
+}
+
+static void
+entry_init_size(struct jset_entry *entry, size_t size)
+{
+       unsigned u64s = DIV_ROUND_UP(size, sizeof(u64));
+       entry_init_u64s(entry, u64s);
+}
+
+struct jset_entry *
+bch2_journal_super_entries_add_common(struct bch_fs *c,
+                                     struct jset_entry *entry,
+                                     u64 journal_seq)
+{
+       unsigned i;
+
+       percpu_down_write(&c->mark_lock);
+
+       if (!journal_seq) {
+               bch2_fs_usage_acc_to_base(c, 0);
+               bch2_fs_usage_acc_to_base(c, 1);
+       } else {
+               bch2_fs_usage_acc_to_base(c, journal_seq & 1);
+       }
+
+       {
+               struct jset_entry_usage *u =
+                       container_of(entry, struct jset_entry_usage, entry);
+
+               entry_init_size(entry, sizeof(*u));
+               u->entry.type   = BCH_JSET_ENTRY_usage;
+               u->entry.btree_id = FS_USAGE_INODES;
+               u->v            = cpu_to_le64(c->usage_base->nr_inodes);
+
+               entry = vstruct_next(entry);
+       }
+
+       {
+               struct jset_entry_usage *u =
+                       container_of(entry, struct jset_entry_usage, entry);
+
+               entry_init_size(entry, sizeof(*u));
+               u->entry.type   = BCH_JSET_ENTRY_usage;
+               u->entry.btree_id = FS_USAGE_KEY_VERSION;
+               u->v            = cpu_to_le64(atomic64_read(&c->key_version));
+
+               entry = vstruct_next(entry);
+       }
+
+       for (i = 0; i < BCH_REPLICAS_MAX; i++) {
+               struct jset_entry_usage *u =
+                       container_of(entry, struct jset_entry_usage, entry);
+
+               entry_init_size(entry, sizeof(*u));
+               u->entry.type   = BCH_JSET_ENTRY_usage;
+               u->entry.btree_id = FS_USAGE_RESERVED;
+               u->entry.level  = i;
+               u->v            = cpu_to_le64(c->usage_base->persistent_reserved[i]);
+
+               entry = vstruct_next(entry);
+       }
+
+       for (i = 0; i < c->replicas.nr; i++) {
+               struct bch_replicas_entry *e =
+                       cpu_replicas_entry(&c->replicas, i);
+               struct jset_entry_data_usage *u =
+                       container_of(entry, struct jset_entry_data_usage, entry);
+
+               entry_init_size(entry, sizeof(*u) + e->nr_devs);
+               u->entry.type   = BCH_JSET_ENTRY_data_usage;
+               u->v            = cpu_to_le64(c->usage_base->replicas[i]);
+               memcpy(&u->r, e, replicas_entry_bytes(e));
+
+               entry = vstruct_next(entry);
+       }
+
+       percpu_up_write(&c->mark_lock);
+
+       return entry;
+}
+
+void bch2_fs_mark_clean(struct bch_fs *c)
+{
+       struct bch_sb_field_clean *sb_clean;
+       struct jset_entry *entry;
+       unsigned u64s;
+
+       mutex_lock(&c->sb_lock);
+       if (BCH_SB_CLEAN(c->disk_sb.sb))
+               goto out;
+
+       SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
+
+       c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
+       c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA;
+       c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_extents_above_btree_updates);
+       c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_btree_updates_journalled);
+
+       u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;
+
+       sb_clean = bch2_sb_resize_clean(&c->disk_sb, u64s);
+       if (!sb_clean) {
+               bch_err(c, "error resizing superblock while setting filesystem clean");
+               goto out;
+       }
+
+       sb_clean->flags         = 0;
+       sb_clean->read_clock    = cpu_to_le16(c->bucket_clock[READ].hand);
+       sb_clean->write_clock   = cpu_to_le16(c->bucket_clock[WRITE].hand);
+       sb_clean->journal_seq   = cpu_to_le64(journal_cur_seq(&c->journal) - 1);
+
+       /* Trying to catch outstanding bug: */
+       BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX);
+
+       entry = sb_clean->start;
+       entry = bch2_journal_super_entries_add_common(c, entry, 0);
+       entry = bch2_btree_roots_to_journal_entries(c, entry, entry);
+       BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
+
+       memset(entry, 0,
+              vstruct_end(&sb_clean->field) - (void *) entry);
+
+       if (le16_to_cpu(c->disk_sb.sb->version) <
+           bcachefs_metadata_version_bkey_renumber)
+               bch2_sb_clean_renumber(sb_clean, WRITE);
+
+       bch2_write_super(c);
+out:
+       mutex_unlock(&c->sb_lock);
+}
+
+static const char *bch2_sb_validate_clean(struct bch_sb *sb,
+                                         struct bch_sb_field *f)
+{
+       struct bch_sb_field_clean *clean = field_to_type(f, clean);
+
+       if (vstruct_bytes(&clean->field) < sizeof(*clean))
+               return "invalid field crypt: wrong size";
+
+       return NULL;
+}
+
+static const struct bch_sb_field_ops bch_sb_field_ops_clean = {
+       .validate       = bch2_sb_validate_clean,
+};
+
+static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
+#define x(f, nr)                                       \
+       [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f,
+       BCH_SB_FIELDS()
+#undef x
+};
+
+static const char *bch2_sb_field_validate(struct bch_sb *sb,
+                                         struct bch_sb_field *f)
+{
+       unsigned type = le32_to_cpu(f->type);
+
+       return type < BCH_SB_FIELD_NR
+               ? bch2_sb_field_ops[type]->validate(sb, f)
+               : NULL;
+}
+
+void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
+                          struct bch_sb_field *f)
+{
+       unsigned type = le32_to_cpu(f->type);
+       const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR
+               ? bch2_sb_field_ops[type] : NULL;
+
+       if (ops)
+               pr_buf(out, "%s", bch2_sb_fields[type]);
+       else
+               pr_buf(out, "(unknown field %u)", type);
+
+       pr_buf(out, " (size %llu):", vstruct_bytes(f));
+
+       if (ops && ops->to_text)
+               bch2_sb_field_ops[type]->to_text(out, sb, f);
+}
diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h
new file mode 100644 (file)
index 0000000..7a06815
--- /dev/null
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SUPER_IO_H
+#define _BCACHEFS_SUPER_IO_H
+
+#include "extents.h"
+#include "eytzinger.h"
+#include "super_types.h"
+#include "super.h"
+
+#include <asm/byteorder.h>
+
+struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
+struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *,
+                                         enum bch_sb_field_type, unsigned);
+void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type);
+
+#define field_to_type(_f, _name)                                       \
+       container_of_or_null(_f, struct bch_sb_field_##_name, field)
+
+#define x(_name, _nr)                                                  \
+static inline struct bch_sb_field_##_name *                            \
+bch2_sb_get_##_name(struct bch_sb *sb)                                 \
+{                                                                      \
+       return field_to_type(bch2_sb_field_get(sb,                      \
+                               BCH_SB_FIELD_##_name), _name);          \
+}                                                                      \
+                                                                       \
+static inline struct bch_sb_field_##_name *                            \
+bch2_sb_resize_##_name(struct bch_sb_handle *sb, unsigned u64s)        \
+{                                                                      \
+       return field_to_type(bch2_sb_field_resize(sb,                   \
+                               BCH_SB_FIELD_##_name, u64s), _name);    \
+}
+
+BCH_SB_FIELDS()
+#undef x
+
+extern const char * const bch2_sb_fields[];
+
+struct bch_sb_field_ops {
+       const char *    (*validate)(struct bch_sb *, struct bch_sb_field *);
+       void            (*to_text)(struct printbuf *, struct bch_sb *,
+                                  struct bch_sb_field *);
+};
+
+static inline __le64 bch2_sb_magic(struct bch_fs *c)
+{
+       __le64 ret;
+       memcpy(&ret, &c->sb.uuid, sizeof(ret));
+       return ret;
+}
+
+static inline __u64 jset_magic(struct bch_fs *c)
+{
+       return __le64_to_cpu(bch2_sb_magic(c) ^ JSET_MAGIC);
+}
+
+static inline __u64 bset_magic(struct bch_fs *c)
+{
+       return __le64_to_cpu(bch2_sb_magic(c) ^ BSET_MAGIC);
+}
+
+int bch2_sb_to_fs(struct bch_fs *, struct bch_sb *);
+int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *);
+
+void bch2_free_super(struct bch_sb_handle *);
+int bch2_sb_realloc(struct bch_sb_handle *, unsigned);
+
+const char *bch2_sb_validate(struct bch_sb_handle *);
+
+int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
+int bch2_write_super(struct bch_fs *);
+void __bch2_check_set_feature(struct bch_fs *, unsigned);
+
+static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
+{
+       if (!(c->sb.features & (1ULL << feat)))
+               __bch2_check_set_feature(c, feat);
+}
+
+/* BCH_SB_FIELD_journal: */
+
+static inline unsigned bch2_nr_journal_buckets(struct bch_sb_field_journal *j)
+{
+       return j
+               ? (__le64 *) vstruct_end(&j->field) - j->buckets
+               : 0;
+}
+
+/* BCH_SB_FIELD_members: */
+
+static inline bool bch2_member_exists(struct bch_member *m)
+{
+       return !bch2_is_zero(m->uuid.b, sizeof(uuid_le));
+}
+
+static inline bool bch2_dev_exists(struct bch_sb *sb,
+                                  struct bch_sb_field_members *mi,
+                                  unsigned dev)
+{
+       return dev < sb->nr_devices &&
+               bch2_member_exists(&mi->members[dev]);
+}
+
+static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
+{
+       return (struct bch_member_cpu) {
+               .nbuckets       = le64_to_cpu(mi->nbuckets),
+               .first_bucket   = le16_to_cpu(mi->first_bucket),
+               .bucket_size    = le16_to_cpu(mi->bucket_size),
+               .group          = BCH_MEMBER_GROUP(mi),
+               .state          = BCH_MEMBER_STATE(mi),
+               .replacement    = BCH_MEMBER_REPLACEMENT(mi),
+               .discard        = BCH_MEMBER_DISCARD(mi),
+               .data_allowed   = BCH_MEMBER_DATA_ALLOWED(mi),
+               .durability     = BCH_MEMBER_DURABILITY(mi)
+                       ? BCH_MEMBER_DURABILITY(mi) - 1
+                       : 1,
+               .valid          = !bch2_is_zero(mi->uuid.b, sizeof(uuid_le)),
+       };
+}
+
+/* BCH_SB_FIELD_clean: */
+
+struct jset_entry *
+bch2_journal_super_entries_add_common(struct bch_fs *,
+                                     struct jset_entry *, u64);
+
+void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
+
+int bch2_fs_mark_dirty(struct bch_fs *);
+void bch2_fs_mark_clean(struct bch_fs *);
+
+void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
+                          struct bch_sb_field *);
+
+#endif /* _BCACHEFS_SUPER_IO_H */
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
new file mode 100644 (file)
index 0000000..7f301fa
--- /dev/null
@@ -0,0 +1,2037 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bcachefs setup/teardown code, and some metadata io - read a superblock and
+ * figure out what to do with it.
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "bkey_sort.h"
+#include "btree_cache.h"
+#include "btree_gc.h"
+#include "btree_key_cache.h"
+#include "btree_update_interior.h"
+#include "btree_io.h"
+#include "chardev.h"
+#include "checksum.h"
+#include "clock.h"
+#include "compress.h"
+#include "debug.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "error.h"
+#include "fs.h"
+#include "fs-io.h"
+#include "fsck.h"
+#include "inode.h"
+#include "io.h"
+#include "journal.h"
+#include "journal_reclaim.h"
+#include "journal_seq_blacklist.h"
+#include "move.h"
+#include "migrate.h"
+#include "movinggc.h"
+#include "quota.h"
+#include "rebalance.h"
+#include "recovery.h"
+#include "replicas.h"
+#include "super.h"
+#include "super-io.h"
+#include "sysfs.h"
+
+#include <linux/backing-dev.h>
+#include <linux/blkdev.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/idr.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/random.h>
+#include <linux/sysfs.h>
+#include <crypto/hash.h>
+
+#include <trace/events/bcachefs.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
+
+#define KTYPE(type)                                                    \
+struct kobj_type type ## _ktype = {                                    \
+       .release        = type ## _release,                             \
+       .sysfs_ops      = &type ## _sysfs_ops,                          \
+       .default_attrs  = type ## _files                                \
+}
+
+static void bch2_fs_release(struct kobject *);
+static void bch2_dev_release(struct kobject *);
+
+static void bch2_fs_internal_release(struct kobject *k)
+{
+}
+
+static void bch2_fs_opts_dir_release(struct kobject *k)
+{
+}
+
+static void bch2_fs_time_stats_release(struct kobject *k)
+{
+}
+
+static KTYPE(bch2_fs);
+static KTYPE(bch2_fs_internal);
+static KTYPE(bch2_fs_opts_dir);
+static KTYPE(bch2_fs_time_stats);
+static KTYPE(bch2_dev);
+
+static struct kset *bcachefs_kset;
+static LIST_HEAD(bch_fs_list);
+static DEFINE_MUTEX(bch_fs_list_lock);
+
+static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait);
+
+static void bch2_dev_free(struct bch_dev *);
+static int bch2_dev_alloc(struct bch_fs *, unsigned);
+static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
+static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
+
+struct bch_fs *bch2_bdev_to_fs(struct block_device *bdev)
+{
+       struct bch_fs *c;
+       struct bch_dev *ca;
+       unsigned i;
+
+       mutex_lock(&bch_fs_list_lock);
+       rcu_read_lock();
+
+       list_for_each_entry(c, &bch_fs_list, list)
+               for_each_member_device_rcu(ca, c, i, NULL)
+                       if (ca->disk_sb.bdev == bdev) {
+                               closure_get(&c->cl);
+                               goto found;
+                       }
+       c = NULL;
+found:
+       rcu_read_unlock();
+       mutex_unlock(&bch_fs_list_lock);
+
+       return c;
+}
+
+static struct bch_fs *__bch2_uuid_to_fs(uuid_le uuid)
+{
+       struct bch_fs *c;
+
+       lockdep_assert_held(&bch_fs_list_lock);
+
+       list_for_each_entry(c, &bch_fs_list, list)
+               if (!memcmp(&c->disk_sb.sb->uuid, &uuid, sizeof(uuid_le)))
+                       return c;
+
+       return NULL;
+}
+
+struct bch_fs *bch2_uuid_to_fs(uuid_le uuid)
+{
+       struct bch_fs *c;
+
+       mutex_lock(&bch_fs_list_lock);
+       c = __bch2_uuid_to_fs(uuid);
+       if (c)
+               closure_get(&c->cl);
+       mutex_unlock(&bch_fs_list_lock);
+
+       return c;
+}
+
+/* Filesystem RO/RW: */
+
+/*
+ * For startup/shutdown of RW stuff, the dependencies are:
+ *
+ * - foreground writes depend on copygc and rebalance (to free up space)
+ *
+ * - copygc and rebalance depend on mark and sweep gc (they actually probably
+ *   don't because they either reserve ahead of time or don't block if
+ *   allocations fail, but allocations can require mark and sweep gc to run
+ *   because of generation number wraparound)
+ *
+ * - all of the above depends on the allocator threads
+ *
+ * - allocator depends on the journal (when it rewrites prios and gens)
+ */
+
+static void __bch2_fs_read_only(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       unsigned i, clean_passes = 0;
+
+       bch2_rebalance_stop(c);
+       bch2_copygc_stop(c);
+       bch2_gc_thread_stop(c);
+
+       /*
+        * Flush journal before stopping allocators, because flushing journal
+        * blacklist entries involves allocating new btree nodes:
+        */
+       bch2_journal_flush_all_pins(&c->journal);
+
+       /*
+        * If the allocator threads didn't all start up, the btree updates to
+        * write out alloc info aren't going to work:
+        */
+       if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags))
+               goto nowrote_alloc;
+
+       bch_verbose(c, "flushing journal and stopping allocators");
+
+       bch2_journal_flush_all_pins(&c->journal);
+       set_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
+
+       do {
+               clean_passes++;
+
+               if (bch2_journal_flush_all_pins(&c->journal))
+                       clean_passes = 0;
+
+               /*
+                * In flight interior btree updates will generate more journal
+                * updates and btree updates (alloc btree):
+                */
+               if (bch2_btree_interior_updates_nr_pending(c)) {
+                       closure_wait_event(&c->btree_interior_update_wait,
+                                          !bch2_btree_interior_updates_nr_pending(c));
+                       clean_passes = 0;
+               }
+               flush_work(&c->btree_interior_update_work);
+
+               if (bch2_journal_flush_all_pins(&c->journal))
+                       clean_passes = 0;
+       } while (clean_passes < 2);
+       bch_verbose(c, "flushing journal and stopping allocators complete");
+
+       set_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
+nowrote_alloc:
+       closure_wait_event(&c->btree_interior_update_wait,
+                          !bch2_btree_interior_updates_nr_pending(c));
+       flush_work(&c->btree_interior_update_work);
+
+       for_each_member_device(ca, c, i)
+               bch2_dev_allocator_stop(ca);
+
+       bch2_io_timer_del(&c->io_clock[READ], &c->bucket_clock[READ].rescale);
+       bch2_io_timer_del(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale);
+
+       clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
+       clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
+
+       bch2_fs_journal_stop(&c->journal);
+
+       /*
+        * the journal kicks off btree writes via reclaim - wait for in flight
+        * writes after stopping journal:
+        */
+       if (test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
+               bch2_btree_flush_all_writes(c);
+       else
+               bch2_btree_verify_flushed(c);
+
+       /*
+        * After stopping journal:
+        */
+       for_each_member_device(ca, c, i)
+               bch2_dev_allocator_remove(c, ca);
+}
+
+static void bch2_writes_disabled(struct percpu_ref *writes)
+{
+       struct bch_fs *c = container_of(writes, struct bch_fs, writes);
+
+       set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
+       wake_up(&bch_read_only_wait);
+}
+
+void bch2_fs_read_only(struct bch_fs *c)
+{
+       if (!test_bit(BCH_FS_RW, &c->flags)) {
+               cancel_delayed_work_sync(&c->journal.reclaim_work);
+               return;
+       }
+
+       BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
+
+       /*
+        * Block new foreground-end write operations from starting - any new
+        * writes will return -EROFS:
+        *
+        * (This is really blocking new _allocations_, writes to previously
+        * allocated space can still happen until stopping the allocator in
+        * bch2_dev_allocator_stop()).
+        */
+       percpu_ref_kill(&c->writes);
+
+       cancel_work_sync(&c->ec_stripe_delete_work);
+       cancel_delayed_work(&c->pd_controllers_update);
+
+       /*
+        * If we're not doing an emergency shutdown, we want to wait on
+        * outstanding writes to complete so they don't see spurious errors due
+        * to shutting down the allocator:
+        *
+        * If we are doing an emergency shutdown outstanding writes may
+        * hang until we shutdown the allocator so we don't want to wait
+        * on outstanding writes before shutting everything down - but
+        * we do need to wait on them before returning and signalling
+        * that going RO is complete:
+        */
+       wait_event(bch_read_only_wait,
+                  test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) ||
+                  test_bit(BCH_FS_EMERGENCY_RO, &c->flags));
+
+       __bch2_fs_read_only(c);
+
+       wait_event(bch_read_only_wait,
+                  test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
+
+       clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
+
+       if (!bch2_journal_error(&c->journal) &&
+           !test_bit(BCH_FS_ERROR, &c->flags) &&
+           !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
+           test_bit(BCH_FS_STARTED, &c->flags) &&
+           test_bit(BCH_FS_ALLOC_CLEAN, &c->flags) &&
+           !c->opts.norecovery) {
+               bch_verbose(c, "marking filesystem clean");
+               bch2_fs_mark_clean(c);
+       }
+
+       clear_bit(BCH_FS_RW, &c->flags);
+}
+
+static void bch2_fs_read_only_work(struct work_struct *work)
+{
+       struct bch_fs *c =
+               container_of(work, struct bch_fs, read_only_work);
+
+       down_write(&c->state_lock);
+       bch2_fs_read_only(c);
+       up_write(&c->state_lock);
+}
+
+static void bch2_fs_read_only_async(struct bch_fs *c)
+{
+       queue_work(system_long_wq, &c->read_only_work);
+}
+
+bool bch2_fs_emergency_read_only(struct bch_fs *c)
+{
+       bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags);
+
+       bch2_journal_halt(&c->journal);
+       bch2_fs_read_only_async(c);
+
+       wake_up(&bch_read_only_wait);
+       return ret;
+}
+
+static int bch2_fs_read_write_late(struct bch_fs *c)
+{
+       int ret;
+
+       ret = bch2_gc_thread_start(c);
+       if (ret) {
+               bch_err(c, "error starting gc thread");
+               return ret;
+       }
+
+       ret = bch2_copygc_start(c);
+       if (ret) {
+               bch_err(c, "error starting copygc thread");
+               return ret;
+       }
+
+       ret = bch2_rebalance_start(c);
+       if (ret) {
+               bch_err(c, "error starting rebalance thread");
+               return ret;
+       }
+
+       schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
+
+       schedule_work(&c->ec_stripe_delete_work);
+
+       return 0;
+}
+
+static int __bch2_fs_read_write(struct bch_fs *c, bool early)
+{
+       struct bch_dev *ca;
+       unsigned i;
+       int ret;
+
+       if (test_bit(BCH_FS_RW, &c->flags))
+               return 0;
+
+       /*
+        * nochanges is used for fsck -n mode - we have to allow going rw
+        * during recovery for that to work:
+        */
+       if (c->opts.norecovery ||
+           (c->opts.nochanges &&
+            (!early || c->opts.read_only)))
+               return -EROFS;
+
+       ret = bch2_fs_mark_dirty(c);
+       if (ret)
+               goto err;
+
+       /*
+        * We need to write out a journal entry before we start doing btree
+        * updates, to ensure that on unclean shutdown new journal blacklist
+        * entries are created:
+        */
+       bch2_journal_meta(&c->journal);
+
+       clear_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
+
+       for_each_rw_member(ca, c, i)
+               bch2_dev_allocator_add(c, ca);
+       bch2_recalc_capacity(c);
+
+       bch2_io_timer_add(&c->io_clock[READ], &c->bucket_clock[READ].rescale);
+       bch2_io_timer_add(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale);
+
+       for_each_rw_member(ca, c, i) {
+               ret = bch2_dev_allocator_start(ca);
+               if (ret) {
+                       bch_err(c, "error starting allocator threads");
+                       percpu_ref_put(&ca->io_ref);
+                       goto err;
+               }
+       }
+
+       set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
+
+       if (!early) {
+               ret = bch2_fs_read_write_late(c);
+               if (ret)
+                       goto err;
+       }
+
+       percpu_ref_reinit(&c->writes);
+       set_bit(BCH_FS_RW, &c->flags);
+
+       queue_delayed_work(c->journal_reclaim_wq,
+                          &c->journal.reclaim_work, 0);
+       return 0;
+err:
+       __bch2_fs_read_only(c);
+       return ret;
+}
+
+int bch2_fs_read_write(struct bch_fs *c)
+{
+       return __bch2_fs_read_write(c, false);
+}
+
+int bch2_fs_read_write_early(struct bch_fs *c)
+{
+       lockdep_assert_held(&c->state_lock);
+
+       return __bch2_fs_read_write(c, true);
+}
+
+/* Filesystem startup/shutdown: */
+
+static void __bch2_fs_free(struct bch_fs *c)
+{
+       unsigned i;
+
+       for (i = 0; i < BCH_TIME_STAT_NR; i++)
+               bch2_time_stats_exit(&c->times[i]);
+
+       bch2_fs_quota_exit(c);
+       bch2_fs_fsio_exit(c);
+       bch2_fs_ec_exit(c);
+       bch2_fs_encryption_exit(c);
+       bch2_fs_io_exit(c);
+       bch2_fs_btree_interior_update_exit(c);
+       bch2_fs_btree_iter_exit(c);
+       bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
+       bch2_fs_btree_cache_exit(c);
+       bch2_fs_journal_exit(&c->journal);
+       bch2_io_clock_exit(&c->io_clock[WRITE]);
+       bch2_io_clock_exit(&c->io_clock[READ]);
+       bch2_fs_compress_exit(c);
+       bch2_journal_keys_free(&c->journal_keys);
+       bch2_journal_entries_free(&c->journal_entries);
+       percpu_free_rwsem(&c->mark_lock);
+       kfree(c->usage_scratch);
+       free_percpu(c->usage[1]);
+       free_percpu(c->usage[0]);
+       kfree(c->usage_base);
+       free_percpu(c->pcpu);
+       mempool_exit(&c->large_bkey_pool);
+       mempool_exit(&c->btree_bounce_pool);
+       bioset_exit(&c->btree_bio);
+       mempool_exit(&c->fill_iter);
+       percpu_ref_exit(&c->writes);
+       kfree(c->replicas.entries);
+       kfree(c->replicas_gc.entries);
+       kfree(rcu_dereference_protected(c->disk_groups, 1));
+       kfree(c->journal_seq_blacklist_table);
+       free_heap(&c->copygc_heap);
+
+       if (c->journal_reclaim_wq)
+               destroy_workqueue(c->journal_reclaim_wq);
+       if (c->copygc_wq)
+               destroy_workqueue(c->copygc_wq);
+       if (c->wq)
+               destroy_workqueue(c->wq);
+
+       free_pages((unsigned long) c->disk_sb.sb,
+                  c->disk_sb.page_order);
+       kvpfree(c, sizeof(*c));
+       module_put(THIS_MODULE);
+}
+
+static void bch2_fs_release(struct kobject *kobj)
+{
+       struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
+
+       __bch2_fs_free(c);
+}
+
+void __bch2_fs_stop(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       unsigned i;
+
+       bch_verbose(c, "shutting down");
+
+       set_bit(BCH_FS_STOPPING, &c->flags);
+
+       cancel_work_sync(&c->journal_seq_blacklist_gc_work);
+
+       down_write(&c->state_lock);
+       bch2_fs_read_only(c);
+       up_write(&c->state_lock);
+
+       for_each_member_device(ca, c, i)
+               if (ca->kobj.state_in_sysfs &&
+                   ca->disk_sb.bdev)
+                       sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
+                                         "bcachefs");
+
+       if (c->kobj.state_in_sysfs)
+               kobject_del(&c->kobj);
+
+       bch2_fs_debug_exit(c);
+       bch2_fs_chardev_exit(c);
+
+       kobject_put(&c->time_stats);
+       kobject_put(&c->opts_dir);
+       kobject_put(&c->internal);
+
+       /* btree prefetch might have kicked off reads in the background: */
+       bch2_btree_flush_all_reads(c);
+
+       for_each_member_device(ca, c, i)
+               cancel_work_sync(&ca->io_error_work);
+
+       cancel_work_sync(&c->btree_write_error_work);
+       cancel_delayed_work_sync(&c->pd_controllers_update);
+       cancel_work_sync(&c->read_only_work);
+
+       for (i = 0; i < c->sb.nr_devices; i++)
+               if (c->devs[i])
+                       bch2_free_super(&c->devs[i]->disk_sb);
+}
+
+void bch2_fs_free(struct bch_fs *c)
+{
+       unsigned i;
+
+       mutex_lock(&bch_fs_list_lock);
+       list_del(&c->list);
+       mutex_unlock(&bch_fs_list_lock);
+
+       closure_sync(&c->cl);
+       closure_debug_destroy(&c->cl);
+
+       for (i = 0; i < c->sb.nr_devices; i++)
+               if (c->devs[i])
+                       bch2_dev_free(rcu_dereference_protected(c->devs[i], 1));
+
+       bch_verbose(c, "shutdown complete");
+
+       kobject_put(&c->kobj);
+}
+
+void bch2_fs_stop(struct bch_fs *c)
+{
+       __bch2_fs_stop(c);
+       bch2_fs_free(c);
+}
+
+static const char *bch2_fs_online(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       const char *err = NULL;
+       unsigned i;
+       int ret;
+
+       lockdep_assert_held(&bch_fs_list_lock);
+
+       if (!list_empty(&c->list))
+               return NULL;
+
+       if (__bch2_uuid_to_fs(c->sb.uuid))
+               return "filesystem UUID already open";
+
+       ret = bch2_fs_chardev_init(c);
+       if (ret)
+               return "error creating character device";
+
+       bch2_fs_debug_init(c);
+
+       if (kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ||
+           kobject_add(&c->internal, &c->kobj, "internal") ||
+           kobject_add(&c->opts_dir, &c->kobj, "options") ||
+           kobject_add(&c->time_stats, &c->kobj, "time_stats") ||
+           bch2_opts_create_sysfs_files(&c->opts_dir))
+               return "error creating sysfs objects";
+
+       down_write(&c->state_lock);
+
+       err = "error creating sysfs objects";
+       __for_each_member_device(ca, c, i, NULL)
+               if (bch2_dev_sysfs_online(c, ca))
+                       goto err;
+
+       list_add(&c->list, &bch_fs_list);
+       err = NULL;
+err:
+       up_write(&c->state_lock);
+       return err;
+}
+
+static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
+{
+       struct bch_sb_field_members *mi;
+       struct bch_fs *c;
+       unsigned i, iter_size;
+       const char *err;
+
+       pr_verbose_init(opts, "");
+
+       c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO);
+       if (!c)
+               goto out;
+
+       __module_get(THIS_MODULE);
+
+       closure_init(&c->cl, NULL);
+
+       c->kobj.kset = bcachefs_kset;
+       kobject_init(&c->kobj, &bch2_fs_ktype);
+       kobject_init(&c->internal, &bch2_fs_internal_ktype);
+       kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
+       kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
+
+       c->minor                = -1;
+       c->disk_sb.fs_sb        = true;
+
+       init_rwsem(&c->state_lock);
+       mutex_init(&c->sb_lock);
+       mutex_init(&c->replicas_gc_lock);
+       mutex_init(&c->btree_root_lock);
+       INIT_WORK(&c->read_only_work, bch2_fs_read_only_work);
+
+       init_rwsem(&c->gc_lock);
+
+       for (i = 0; i < BCH_TIME_STAT_NR; i++)
+               bch2_time_stats_init(&c->times[i]);
+
+       bch2_fs_copygc_init(c);
+       bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
+       bch2_fs_allocator_background_init(c);
+       bch2_fs_allocator_foreground_init(c);
+       bch2_fs_rebalance_init(c);
+       bch2_fs_quota_init(c);
+
+       INIT_LIST_HEAD(&c->list);
+
+       mutex_init(&c->usage_scratch_lock);
+
+       mutex_init(&c->bio_bounce_pages_lock);
+
+       bio_list_init(&c->btree_write_error_list);
+       spin_lock_init(&c->btree_write_error_lock);
+       INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
+
+       INIT_WORK(&c->journal_seq_blacklist_gc_work,
+                 bch2_blacklist_entries_gc);
+
+       INIT_LIST_HEAD(&c->journal_entries);
+
+       INIT_LIST_HEAD(&c->fsck_errors);
+       mutex_init(&c->fsck_error_lock);
+
+       INIT_LIST_HEAD(&c->ec_stripe_head_list);
+       mutex_init(&c->ec_stripe_head_lock);
+
+       INIT_LIST_HEAD(&c->ec_stripe_new_list);
+       mutex_init(&c->ec_stripe_new_lock);
+
+       spin_lock_init(&c->ec_stripes_heap_lock);
+
+       seqcount_init(&c->gc_pos_lock);
+
+       seqcount_init(&c->usage_lock);
+
+       sema_init(&c->io_in_flight, 64);
+
+       c->copy_gc_enabled              = 1;
+       c->rebalance.enabled            = 1;
+       c->promote_whole_extents        = true;
+
+       c->journal.write_time   = &c->times[BCH_TIME_journal_write];
+       c->journal.delay_time   = &c->times[BCH_TIME_journal_delay];
+       c->journal.blocked_time = &c->times[BCH_TIME_blocked_journal];
+       c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq];
+
+       bch2_fs_btree_cache_init_early(&c->btree_cache);
+
+       if (percpu_init_rwsem(&c->mark_lock))
+               goto err;
+
+       mutex_lock(&c->sb_lock);
+
+       if (bch2_sb_to_fs(c, sb)) {
+               mutex_unlock(&c->sb_lock);
+               goto err;
+       }
+
+       mutex_unlock(&c->sb_lock);
+
+       scnprintf(c->name, sizeof(c->name), "%pU", &c->sb.user_uuid);
+
+       c->opts = bch2_opts_default;
+       bch2_opts_apply(&c->opts, bch2_opts_from_sb(sb));
+       bch2_opts_apply(&c->opts, opts);
+
+       c->block_bits           = ilog2(c->opts.block_size);
+       c->btree_foreground_merge_threshold = BTREE_FOREGROUND_MERGE_THRESHOLD(c);
+
+       if (bch2_fs_init_fault("fs_alloc"))
+               goto err;
+
+       iter_size = sizeof(struct sort_iter) +
+               (btree_blocks(c) + 1) * 2 *
+               sizeof(struct sort_iter_set);
+
+       if (!(c->wq = alloc_workqueue("bcachefs",
+                               WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
+           !(c->copygc_wq = alloc_workqueue("bcache_copygc",
+                               WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
+           !(c->journal_reclaim_wq = alloc_workqueue("bcache_journal",
+                               WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
+           percpu_ref_init(&c->writes, bch2_writes_disabled,
+                           PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
+           mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
+           bioset_init(&c->btree_bio, 1,
+                       max(offsetof(struct btree_read_bio, bio),
+                           offsetof(struct btree_write_bio, wbio.bio)),
+                       BIOSET_NEED_BVECS) ||
+           !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
+           mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
+                                       btree_bytes(c)) ||
+           mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
+           bch2_io_clock_init(&c->io_clock[READ]) ||
+           bch2_io_clock_init(&c->io_clock[WRITE]) ||
+           bch2_fs_journal_init(&c->journal) ||
+           bch2_fs_replicas_init(c) ||
+           bch2_fs_btree_cache_init(c) ||
+           bch2_fs_btree_key_cache_init(&c->btree_key_cache) ||
+           bch2_fs_btree_iter_init(c) ||
+           bch2_fs_btree_interior_update_init(c) ||
+           bch2_fs_io_init(c) ||
+           bch2_fs_encryption_init(c) ||
+           bch2_fs_compress_init(c) ||
+           bch2_fs_ec_init(c) ||
+           bch2_fs_fsio_init(c))
+               goto err;
+
+       mi = bch2_sb_get_members(c->disk_sb.sb);
+       for (i = 0; i < c->sb.nr_devices; i++)
+               if (bch2_dev_exists(c->disk_sb.sb, mi, i) &&
+                   bch2_dev_alloc(c, i))
+                       goto err;
+
+       mutex_lock(&bch_fs_list_lock);
+       err = bch2_fs_online(c);
+       mutex_unlock(&bch_fs_list_lock);
+       if (err) {
+               bch_err(c, "bch2_fs_online() error: %s", err);
+               goto err;
+       }
+out:
+       pr_verbose_init(opts, "ret %i", c ? 0 : -ENOMEM);
+       return c;
+err:
+       bch2_fs_free(c);
+       c = NULL;
+       goto out;
+}
+
+noinline_for_stack
+static void print_mount_opts(struct bch_fs *c)
+{
+       enum bch_opt_id i;
+       char buf[512];
+       struct printbuf p = PBUF(buf);
+       bool first = true;
+
+       strcpy(buf, "(null)");
+
+       if (c->opts.read_only) {
+               pr_buf(&p, "ro");
+               first = false;
+       }
+
+       for (i = 0; i < bch2_opts_nr; i++) {
+               const struct bch_option *opt = &bch2_opt_table[i];
+               u64 v = bch2_opt_get_by_id(&c->opts, i);
+
+               if (!(opt->mode & OPT_MOUNT))
+                       continue;
+
+               if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
+                       continue;
+
+               if (!first)
+                       pr_buf(&p, ",");
+               first = false;
+               bch2_opt_to_text(&p, c, opt, v, OPT_SHOW_MOUNT_STYLE);
+       }
+
+       bch_info(c, "mounted with opts: %s", buf);
+}
+
+int bch2_fs_start(struct bch_fs *c)
+{
+       const char *err = "cannot allocate memory";
+       struct bch_sb_field_members *mi;
+       struct bch_dev *ca;
+       time64_t now = ktime_get_real_seconds();
+       unsigned i;
+       int ret = -EINVAL;
+
+       down_write(&c->state_lock);
+
+       BUG_ON(test_bit(BCH_FS_STARTED, &c->flags));
+
+       mutex_lock(&c->sb_lock);
+
+       for_each_online_member(ca, c, i)
+               bch2_sb_from_fs(c, ca);
+
+       mi = bch2_sb_get_members(c->disk_sb.sb);
+       for_each_online_member(ca, c, i)
+               mi->members[ca->dev_idx].last_mount = cpu_to_le64(now);
+
+       mutex_unlock(&c->sb_lock);
+
+       for_each_rw_member(ca, c, i)
+               bch2_dev_allocator_add(c, ca);
+       bch2_recalc_capacity(c);
+
+       ret = BCH_SB_INITIALIZED(c->disk_sb.sb)
+               ? bch2_fs_recovery(c)
+               : bch2_fs_initialize(c);
+       if (ret)
+               goto err;
+
+       ret = bch2_opts_check_may_set(c);
+       if (ret)
+               goto err;
+
+       err = "dynamic fault";
+       ret = -EINVAL;
+       if (bch2_fs_init_fault("fs_start"))
+               goto err;
+
+       set_bit(BCH_FS_STARTED, &c->flags);
+
+       /*
+        * Allocator threads don't start filling copygc reserve until after we
+        * set BCH_FS_STARTED - wake them now:
+        */
+       for_each_online_member(ca, c, i)
+               bch2_wake_allocator(ca);
+
+       if (c->opts.read_only || c->opts.nochanges) {
+               bch2_fs_read_only(c);
+       } else {
+               err = "error going read write";
+               ret = !test_bit(BCH_FS_RW, &c->flags)
+                       ? bch2_fs_read_write(c)
+                       : bch2_fs_read_write_late(c);
+               if (ret)
+                       goto err;
+       }
+
+       print_mount_opts(c);
+       ret = 0;
+out:
+       up_write(&c->state_lock);
+       return ret;
+err:
+       switch (ret) {
+       case BCH_FSCK_ERRORS_NOT_FIXED:
+               bch_err(c, "filesystem contains errors: please report this to the developers");
+               pr_cont("mount with -o fix_errors to repair\n");
+               err = "fsck error";
+               break;
+       case BCH_FSCK_REPAIR_UNIMPLEMENTED:
+               bch_err(c, "filesystem contains errors: please report this to the developers");
+               pr_cont("repair unimplemented: inform the developers so that it can be added\n");
+               err = "fsck error";
+               break;
+       case BCH_FSCK_REPAIR_IMPOSSIBLE:
+               bch_err(c, "filesystem contains errors, but repair impossible");
+               err = "fsck error";
+               break;
+       case BCH_FSCK_UNKNOWN_VERSION:
+               err = "unknown metadata version";;
+               break;
+       case -ENOMEM:
+               err = "cannot allocate memory";
+               break;
+       case -EIO:
+               err = "IO error";
+               break;
+       }
+
+       if (ret >= 0)
+               ret = -EIO;
+       goto out;
+}
+
+static const char *bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c)
+{
+       struct bch_sb_field_members *sb_mi;
+
+       sb_mi = bch2_sb_get_members(sb);
+       if (!sb_mi)
+               return "Invalid superblock: member info area missing";
+
+       if (le16_to_cpu(sb->block_size) != c->opts.block_size)
+               return "mismatched block size";
+
+       if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) <
+           BCH_SB_BTREE_NODE_SIZE(c->disk_sb.sb))
+               return "new cache bucket size is too small";
+
+       return NULL;
+}
+
+static const char *bch2_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb)
+{
+       struct bch_sb *newest =
+               le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb;
+       struct bch_sb_field_members *mi = bch2_sb_get_members(newest);
+
+       if (uuid_le_cmp(fs->uuid, sb->uuid))
+               return "device not a member of filesystem";
+
+       if (!bch2_dev_exists(newest, mi, sb->dev_idx))
+               return "device has been removed";
+
+       if (fs->block_size != sb->block_size)
+               return "mismatched block size";
+
+       return NULL;
+}
+
+/* Device startup/shutdown: */
+
+static void bch2_dev_release(struct kobject *kobj)
+{
+       struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
+
+       kfree(ca);
+}
+
+static void bch2_dev_free(struct bch_dev *ca)
+{
+       cancel_work_sync(&ca->io_error_work);
+
+       if (ca->kobj.state_in_sysfs &&
+           ca->disk_sb.bdev)
+               sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
+                                 "bcachefs");
+
+       if (ca->kobj.state_in_sysfs)
+               kobject_del(&ca->kobj);
+
+       bch2_free_super(&ca->disk_sb);
+       bch2_dev_journal_exit(ca);
+
+       free_percpu(ca->io_done);
+       bioset_exit(&ca->replica_set);
+       bch2_dev_buckets_free(ca);
+       free_page((unsigned long) ca->sb_read_scratch);
+
+       bch2_time_stats_exit(&ca->io_latency[WRITE]);
+       bch2_time_stats_exit(&ca->io_latency[READ]);
+
+       percpu_ref_exit(&ca->io_ref);
+       percpu_ref_exit(&ca->ref);
+       kobject_put(&ca->kobj);
+}
+
+static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
+{
+
+       lockdep_assert_held(&c->state_lock);
+
+       if (percpu_ref_is_zero(&ca->io_ref))
+               return;
+
+       __bch2_dev_read_only(c, ca);
+
+       reinit_completion(&ca->io_ref_completion);
+       percpu_ref_kill(&ca->io_ref);
+       wait_for_completion(&ca->io_ref_completion);
+
+       if (ca->kobj.state_in_sysfs) {
+               struct kobject *block =
+                       &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
+
+               sysfs_remove_link(block, "bcachefs");
+               sysfs_remove_link(&ca->kobj, "block");
+       }
+
+       bch2_free_super(&ca->disk_sb);
+       bch2_dev_journal_exit(ca);
+}
+
+static void bch2_dev_ref_complete(struct percpu_ref *ref)
+{
+       struct bch_dev *ca = container_of(ref, struct bch_dev, ref);
+
+       complete(&ca->ref_completion);
+}
+
+static void bch2_dev_io_ref_complete(struct percpu_ref *ref)
+{
+       struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref);
+
+       complete(&ca->io_ref_completion);
+}
+
+static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
+{
+       int ret;
+
+       if (!c->kobj.state_in_sysfs)
+               return 0;
+
+       if (!ca->kobj.state_in_sysfs) {
+               ret = kobject_add(&ca->kobj, &c->kobj,
+                                 "dev-%u", ca->dev_idx);
+               if (ret)
+                       return ret;
+       }
+
+       if (ca->disk_sb.bdev) {
+               struct kobject *block =
+                       &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
+
+               ret = sysfs_create_link(block, &ca->kobj, "bcachefs");
+               if (ret)
+                       return ret;
+               ret = sysfs_create_link(&ca->kobj, block, "block");
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
+                                       struct bch_member *member)
+{
+       struct bch_dev *ca;
+
+       ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+       if (!ca)
+               return NULL;
+
+       kobject_init(&ca->kobj, &bch2_dev_ktype);
+       init_completion(&ca->ref_completion);
+       init_completion(&ca->io_ref_completion);
+
+       init_rwsem(&ca->bucket_lock);
+
+       INIT_WORK(&ca->io_error_work, bch2_io_error_work);
+
+       bch2_time_stats_init(&ca->io_latency[READ]);
+       bch2_time_stats_init(&ca->io_latency[WRITE]);
+
+       ca->mi = bch2_mi_to_cpu(member);
+       ca->uuid = member->uuid;
+
+       if (opt_defined(c->opts, discard))
+               ca->mi.discard = opt_get(c->opts, discard);
+
+       if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete,
+                           0, GFP_KERNEL) ||
+           percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
+                           PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
+           !(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) ||
+           bch2_dev_buckets_alloc(c, ca) ||
+           bioset_init(&ca->replica_set, 4,
+                       offsetof(struct bch_write_bio, bio), 0) ||
+           !(ca->io_done       = alloc_percpu(*ca->io_done)))
+               goto err;
+
+       return ca;
+err:
+       bch2_dev_free(ca);
+       return NULL;
+}
+
+static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca,
+                           unsigned dev_idx)
+{
+       ca->dev_idx = dev_idx;
+       __set_bit(ca->dev_idx, ca->self.d);
+       scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
+
+       ca->fs = c;
+       rcu_assign_pointer(c->devs[ca->dev_idx], ca);
+
+       if (bch2_dev_sysfs_online(c, ca))
+               pr_warn("error creating sysfs objects");
+}
+
+static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
+{
+       struct bch_member *member =
+               bch2_sb_get_members(c->disk_sb.sb)->members + dev_idx;
+       struct bch_dev *ca = NULL;
+       int ret = 0;
+
+       pr_verbose_init(c->opts, "");
+
+       if (bch2_fs_init_fault("dev_alloc"))
+               goto err;
+
+       ca = __bch2_dev_alloc(c, member);
+       if (!ca)
+               goto err;
+
+       bch2_dev_attach(c, ca, dev_idx);
+out:
+       pr_verbose_init(c->opts, "ret %i", ret);
+       return ret;
+err:
+       if (ca)
+               bch2_dev_free(ca);
+       ret = -ENOMEM;
+       goto out;
+}
+
+static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
+{
+       unsigned ret;
+
+       if (bch2_dev_is_online(ca)) {
+               bch_err(ca, "already have device online in slot %u",
+                       sb->sb->dev_idx);
+               return -EINVAL;
+       }
+
+       if (get_capacity(sb->bdev->bd_disk) <
+           ca->mi.bucket_size * ca->mi.nbuckets) {
+               bch_err(ca, "cannot online: device too small");
+               return -EINVAL;
+       }
+
+       BUG_ON(!percpu_ref_is_zero(&ca->io_ref));
+
+       if (get_capacity(sb->bdev->bd_disk) <
+           ca->mi.bucket_size * ca->mi.nbuckets) {
+               bch_err(ca, "device too small");
+               return -EINVAL;
+       }
+
+       ret = bch2_dev_journal_init(ca, sb->sb);
+       if (ret)
+               return ret;
+
+       /* Commit: */
+       ca->disk_sb = *sb;
+       if (sb->mode & FMODE_EXCL)
+               ca->disk_sb.bdev->bd_holder = ca;
+       memset(sb, 0, sizeof(*sb));
+
+       percpu_ref_reinit(&ca->io_ref);
+
+       return 0;
+}
+
+static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
+{
+       struct bch_dev *ca;
+       int ret;
+
+       lockdep_assert_held(&c->state_lock);
+
+       if (le64_to_cpu(sb->sb->seq) >
+           le64_to_cpu(c->disk_sb.sb->seq))
+               bch2_sb_to_fs(c, sb->sb);
+
+       BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices ||
+              !c->devs[sb->sb->dev_idx]);
+
+       ca = bch_dev_locked(c, sb->sb->dev_idx);
+
+       ret = __bch2_dev_attach_bdev(ca, sb);
+       if (ret)
+               return ret;
+
+       if (test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags) &&
+           !percpu_u64_get(&ca->usage[0]->buckets[BCH_DATA_sb])) {
+               mutex_lock(&c->sb_lock);
+               bch2_mark_dev_superblock(ca->fs, ca, 0);
+               mutex_unlock(&c->sb_lock);
+       }
+
+       bch2_dev_sysfs_online(c, ca);
+
+       if (c->sb.nr_devices == 1)
+               bdevname(ca->disk_sb.bdev, c->name);
+       bdevname(ca->disk_sb.bdev, ca->name);
+
+       rebalance_wakeup(c);
+       return 0;
+}
+
+/* Device management: */
+
+/*
+ * Note: this function is also used by the error paths - when a particular
+ * device sees an error, we call it to determine whether we can just set the
+ * device RO, or - if this function returns false - we'll set the whole
+ * filesystem RO:
+ *
+ * XXX: maybe we should be more explicit about whether we're changing state
+ * because we got an error or what have you?
+ */
+bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
+                           enum bch_member_state new_state, int flags)
+{
+       struct bch_devs_mask new_online_devs;
+       struct replicas_status s;
+       struct bch_dev *ca2;
+       int i, nr_rw = 0, required;
+
+       lockdep_assert_held(&c->state_lock);
+
+       switch (new_state) {
+       case BCH_MEMBER_STATE_RW:
+               return true;
+       case BCH_MEMBER_STATE_RO:
+               if (ca->mi.state != BCH_MEMBER_STATE_RW)
+                       return true;
+
+               /* do we have enough devices to write to?  */
+               for_each_member_device(ca2, c, i)
+                       if (ca2 != ca)
+                               nr_rw += ca2->mi.state == BCH_MEMBER_STATE_RW;
+
+               required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED)
+                              ? c->opts.metadata_replicas
+                              : c->opts.metadata_replicas_required,
+                              !(flags & BCH_FORCE_IF_DATA_DEGRADED)
+                              ? c->opts.data_replicas
+                              : c->opts.data_replicas_required);
+
+               return nr_rw >= required;
+       case BCH_MEMBER_STATE_FAILED:
+       case BCH_MEMBER_STATE_SPARE:
+               if (ca->mi.state != BCH_MEMBER_STATE_RW &&
+                   ca->mi.state != BCH_MEMBER_STATE_RO)
+                       return true;
+
+               /* do we have enough devices to read from?  */
+               new_online_devs = bch2_online_devs(c);
+               __clear_bit(ca->dev_idx, new_online_devs.d);
+
+               s = __bch2_replicas_status(c, new_online_devs);
+
+               return bch2_have_enough_devs(s, flags);
+       default:
+               BUG();
+       }
+}
+
+static bool bch2_fs_may_start(struct bch_fs *c)
+{
+       struct replicas_status s;
+       struct bch_sb_field_members *mi;
+       struct bch_dev *ca;
+       unsigned i, flags = c->opts.degraded
+               ? BCH_FORCE_IF_DEGRADED
+               : 0;
+
+       if (!c->opts.degraded) {
+               mutex_lock(&c->sb_lock);
+               mi = bch2_sb_get_members(c->disk_sb.sb);
+
+               for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
+                       if (!bch2_dev_exists(c->disk_sb.sb, mi, i))
+                               continue;
+
+                       ca = bch_dev_locked(c, i);
+
+                       if (!bch2_dev_is_online(ca) &&
+                           (ca->mi.state == BCH_MEMBER_STATE_RW ||
+                            ca->mi.state == BCH_MEMBER_STATE_RO)) {
+                               mutex_unlock(&c->sb_lock);
+                               return false;
+                       }
+               }
+               mutex_unlock(&c->sb_lock);
+       }
+
+       s = bch2_replicas_status(c);
+
+       return bch2_have_enough_devs(s, flags);
+}
+
+static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
+{
+       /*
+        * Device going read only means the copygc reserve get smaller, so we
+        * don't want that happening while copygc is in progress:
+        */
+       bch2_copygc_stop(c);
+
+       /*
+        * The allocator thread itself allocates btree nodes, so stop it first:
+        */
+       bch2_dev_allocator_stop(ca);
+       bch2_dev_allocator_remove(c, ca);
+       bch2_dev_journal_stop(&c->journal, ca);
+
+       bch2_copygc_start(c);
+}
+
+static const char *__bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
+{
+       lockdep_assert_held(&c->state_lock);
+
+       BUG_ON(ca->mi.state != BCH_MEMBER_STATE_RW);
+
+       bch2_dev_allocator_add(c, ca);
+       bch2_recalc_capacity(c);
+
+       if (bch2_dev_allocator_start(ca))
+               return "error starting allocator thread";
+
+       return NULL;
+}
+
+int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
+                        enum bch_member_state new_state, int flags)
+{
+       struct bch_sb_field_members *mi;
+       int ret = 0;
+
+       if (ca->mi.state == new_state)
+               return 0;
+
+       if (!bch2_dev_state_allowed(c, ca, new_state, flags))
+               return -EINVAL;
+
+       if (new_state != BCH_MEMBER_STATE_RW)
+               __bch2_dev_read_only(c, ca);
+
+       bch_notice(ca, "%s", bch2_dev_state[new_state]);
+
+       mutex_lock(&c->sb_lock);
+       mi = bch2_sb_get_members(c->disk_sb.sb);
+       SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx], new_state);
+       bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       if (new_state == BCH_MEMBER_STATE_RW &&
+           __bch2_dev_read_write(c, ca))
+               ret = -ENOMEM;
+
+       rebalance_wakeup(c);
+
+       return ret;
+}
+
+int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
+                      enum bch_member_state new_state, int flags)
+{
+       int ret;
+
+       down_write(&c->state_lock);
+       ret = __bch2_dev_set_state(c, ca, new_state, flags);
+       up_write(&c->state_lock);
+
+       return ret;
+}
+
+/* Device add/removal: */
+
+int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
+{
+       struct btree_trans trans;
+       size_t i;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for (i = 0; i < ca->mi.nbuckets; i++) {
+               ret = bch2_btree_key_cache_flush(&trans,
+                               BTREE_ID_ALLOC, POS(ca->dev_idx, i));
+               if (ret)
+                       break;
+       }
+       bch2_trans_exit(&trans);
+
+       if (ret)
+               return ret;
+
+       return bch2_btree_delete_range(c, BTREE_ID_ALLOC,
+                                      POS(ca->dev_idx, 0),
+                                      POS(ca->dev_idx + 1, 0),
+                                      NULL);
+}
+
+int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
+{
+       struct bch_sb_field_members *mi;
+       unsigned dev_idx = ca->dev_idx, data;
+       int ret = -EINVAL;
+
+       down_write(&c->state_lock);
+
+       /*
+        * We consume a reference to ca->ref, regardless of whether we succeed
+        * or fail:
+        */
+       percpu_ref_put(&ca->ref);
+
+       if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
+               bch_err(ca, "Cannot remove without losing data");
+               goto err;
+       }
+
+       __bch2_dev_read_only(c, ca);
+
+       ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
+       if (ret) {
+               bch_err(ca, "Remove failed: error %i dropping data", ret);
+               goto err;
+       }
+
+       ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx);
+       if (ret) {
+               bch_err(ca, "Remove failed: error %i flushing journal", ret);
+               goto err;
+       }
+
+       ret = bch2_dev_remove_alloc(c, ca);
+       if (ret) {
+               bch_err(ca, "Remove failed, error deleting alloc info");
+               goto err;
+       }
+
+       /*
+        * must flush all existing journal entries, they might have
+        * (overwritten) keys that point to the device we're removing:
+        */
+       bch2_journal_flush_all_pins(&c->journal);
+       /*
+        * hack to ensure bch2_replicas_gc2() clears out entries to this device
+        */
+       bch2_journal_meta(&c->journal);
+       ret = bch2_journal_error(&c->journal);
+       if (ret) {
+               bch_err(ca, "Remove failed, journal error");
+               goto err;
+       }
+
+       ret = bch2_replicas_gc2(c);
+       if (ret) {
+               bch_err(ca, "Remove failed: error %i from replicas gc", ret);
+               goto err;
+       }
+
+       data = bch2_dev_has_data(c, ca);
+       if (data) {
+               char data_has_str[100];
+
+               bch2_flags_to_text(&PBUF(data_has_str),
+                                  bch2_data_types, data);
+               bch_err(ca, "Remove failed, still has data (%s)", data_has_str);
+               ret = -EBUSY;
+               goto err;
+       }
+
+       __bch2_dev_offline(c, ca);
+
+       mutex_lock(&c->sb_lock);
+       rcu_assign_pointer(c->devs[ca->dev_idx], NULL);
+       mutex_unlock(&c->sb_lock);
+
+       percpu_ref_kill(&ca->ref);
+       wait_for_completion(&ca->ref_completion);
+
+       bch2_dev_free(ca);
+
+       /*
+        * Free this device's slot in the bch_member array - all pointers to
+        * this device must be gone:
+        */
+       mutex_lock(&c->sb_lock);
+       mi = bch2_sb_get_members(c->disk_sb.sb);
+       memset(&mi->members[dev_idx].uuid, 0, sizeof(mi->members[dev_idx].uuid));
+
+       bch2_write_super(c);
+
+       mutex_unlock(&c->sb_lock);
+       up_write(&c->state_lock);
+       return 0;
+err:
+       if (ca->mi.state == BCH_MEMBER_STATE_RW &&
+           !percpu_ref_is_zero(&ca->io_ref))
+               __bch2_dev_read_write(c, ca);
+       up_write(&c->state_lock);
+       return ret;
+}
+
+static void dev_usage_clear(struct bch_dev *ca)
+{
+       struct bucket_array *buckets;
+
+       percpu_memset(ca->usage[0], 0, sizeof(*ca->usage[0]));
+
+       down_read(&ca->bucket_lock);
+       buckets = bucket_array(ca);
+
+       memset(buckets->b, 0, sizeof(buckets->b[0]) * buckets->nbuckets);
+       up_read(&ca->bucket_lock);
+}
+
+/* Add new device to running filesystem: */
+int bch2_dev_add(struct bch_fs *c, const char *path)
+{
+       struct bch_opts opts = bch2_opts_empty();
+       struct bch_sb_handle sb;
+       const char *err;
+       struct bch_dev *ca = NULL;
+       struct bch_sb_field_members *mi;
+       struct bch_member dev_mi;
+       unsigned dev_idx, nr_devices, u64s;
+       int ret;
+
+       ret = bch2_read_super(path, &opts, &sb);
+       if (ret)
+               return ret;
+
+       err = bch2_sb_validate(&sb);
+       if (err)
+               return -EINVAL;
+
+       dev_mi = bch2_sb_get_members(sb.sb)->members[sb.sb->dev_idx];
+
+       err = bch2_dev_may_add(sb.sb, c);
+       if (err)
+               return -EINVAL;
+
+       ca = __bch2_dev_alloc(c, &dev_mi);
+       if (!ca) {
+               bch2_free_super(&sb);
+               return -ENOMEM;
+       }
+
+       ret = __bch2_dev_attach_bdev(ca, &sb);
+       if (ret) {
+               bch2_dev_free(ca);
+               return ret;
+       }
+
+       /*
+        * We want to allocate journal on the new device before adding the new
+        * device to the filesystem because allocating after we attach requires
+        * spinning up the allocator thread, and the allocator thread requires
+        * doing btree writes, which if the existing devices are RO isn't going
+        * to work
+        *
+        * So we have to mark where the superblocks are, but marking allocated
+        * data normally updates the filesystem usage too, so we have to mark,
+        * allocate the journal, reset all the marks, then remark after we
+        * attach...
+        */
+       bch2_mark_dev_superblock(ca->fs, ca, 0);
+
+       err = "journal alloc failed";
+       ret = bch2_dev_journal_alloc(ca);
+       if (ret)
+               goto err;
+
+       dev_usage_clear(ca);
+
+       down_write(&c->state_lock);
+       mutex_lock(&c->sb_lock);
+
+       err = "insufficient space in new superblock";
+       ret = bch2_sb_from_fs(c, ca);
+       if (ret)
+               goto err_unlock;
+
+       mi = bch2_sb_get_members(ca->disk_sb.sb);
+
+       if (!bch2_sb_resize_members(&ca->disk_sb,
+                               le32_to_cpu(mi->field.u64s) +
+                               sizeof(dev_mi) / sizeof(u64))) {
+               ret = -ENOSPC;
+               goto err_unlock;
+       }
+
+       if (dynamic_fault("bcachefs:add:no_slot"))
+               goto no_slot;
+
+       mi = bch2_sb_get_members(c->disk_sb.sb);
+       for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++)
+               if (!bch2_dev_exists(c->disk_sb.sb, mi, dev_idx))
+                       goto have_slot;
+no_slot:
+       err = "no slots available in superblock";
+       ret = -ENOSPC;
+       goto err_unlock;
+
+have_slot:
+       nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
+       u64s = (sizeof(struct bch_sb_field_members) +
+               sizeof(struct bch_member) * nr_devices) / sizeof(u64);
+
+       err = "no space in superblock for member info";
+       ret = -ENOSPC;
+
+       mi = bch2_sb_resize_members(&c->disk_sb, u64s);
+       if (!mi)
+               goto err_unlock;
+
+       /* success: */
+
+       mi->members[dev_idx] = dev_mi;
+       mi->members[dev_idx].last_mount = cpu_to_le64(ktime_get_real_seconds());
+       c->disk_sb.sb->nr_devices       = nr_devices;
+
+       ca->disk_sb.sb->dev_idx = dev_idx;
+       bch2_dev_attach(c, ca, dev_idx);
+
+       bch2_mark_dev_superblock(c, ca, 0);
+
+       bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       err = "alloc write failed";
+       ret = bch2_dev_alloc_write(c, ca, 0);
+       if (ret)
+               goto err;
+
+       if (ca->mi.state == BCH_MEMBER_STATE_RW) {
+               err = __bch2_dev_read_write(c, ca);
+               if (err)
+                       goto err_late;
+       }
+
+       up_write(&c->state_lock);
+       return 0;
+
+err_unlock:
+       mutex_unlock(&c->sb_lock);
+       up_write(&c->state_lock);
+err:
+       if (ca)
+               bch2_dev_free(ca);
+       bch2_free_super(&sb);
+       bch_err(c, "Unable to add device: %s", err);
+       return ret;
+err_late:
+       bch_err(c, "Error going rw after adding device: %s", err);
+       return -EINVAL;
+}
+
+/* Hot add existing device to running filesystem: */
+int bch2_dev_online(struct bch_fs *c, const char *path)
+{
+       struct bch_opts opts = bch2_opts_empty();
+       struct bch_sb_handle sb = { NULL };
+       struct bch_sb_field_members *mi;
+       struct bch_dev *ca;
+       unsigned dev_idx;
+       const char *err;
+       int ret;
+
+       down_write(&c->state_lock);
+
+       ret = bch2_read_super(path, &opts, &sb);
+       if (ret) {
+               up_write(&c->state_lock);
+               return ret;
+       }
+
+       dev_idx = sb.sb->dev_idx;
+
+       err = bch2_dev_in_fs(c->disk_sb.sb, sb.sb);
+       if (err)
+               goto err;
+
+       if (bch2_dev_attach_bdev(c, &sb)) {
+               err = "bch2_dev_attach_bdev() error";
+               goto err;
+       }
+
+       ca = bch_dev_locked(c, dev_idx);
+       if (ca->mi.state == BCH_MEMBER_STATE_RW) {
+               err = __bch2_dev_read_write(c, ca);
+               if (err)
+                       goto err;
+       }
+
+       mutex_lock(&c->sb_lock);
+       mi = bch2_sb_get_members(c->disk_sb.sb);
+
+       mi->members[ca->dev_idx].last_mount =
+               cpu_to_le64(ktime_get_real_seconds());
+
+       bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       up_write(&c->state_lock);
+       return 0;
+err:
+       up_write(&c->state_lock);
+       bch2_free_super(&sb);
+       bch_err(c, "error bringing %s online: %s", path, err);
+       return -EINVAL;
+}
+
+int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
+{
+       down_write(&c->state_lock);
+
+       if (!bch2_dev_is_online(ca)) {
+               bch_err(ca, "Already offline");
+               up_write(&c->state_lock);
+               return 0;
+       }
+
+       if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
+               bch_err(ca, "Cannot offline required disk");
+               up_write(&c->state_lock);
+               return -EINVAL;
+       }
+
+       __bch2_dev_offline(c, ca);
+
+       up_write(&c->state_lock);
+       return 0;
+}
+
+int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
+{
+       struct bch_member *mi;
+       int ret = 0;
+
+       down_write(&c->state_lock);
+
+       if (nbuckets < ca->mi.nbuckets) {
+               bch_err(ca, "Cannot shrink yet");
+               ret = -EINVAL;
+               goto err;
+       }
+
+       if (bch2_dev_is_online(ca) &&
+           get_capacity(ca->disk_sb.bdev->bd_disk) <
+           ca->mi.bucket_size * nbuckets) {
+               bch_err(ca, "New size larger than device");
+               ret = -EINVAL;
+               goto err;
+       }
+
+       ret = bch2_dev_buckets_resize(c, ca, nbuckets);
+       if (ret) {
+               bch_err(ca, "Resize error: %i", ret);
+               goto err;
+       }
+
+       mutex_lock(&c->sb_lock);
+       mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
+       mi->nbuckets = cpu_to_le64(nbuckets);
+
+       bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       bch2_recalc_capacity(c);
+err:
+       up_write(&c->state_lock);
+       return ret;
+}
+
+/* return with ref on ca->ref: */
+struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path)
+{
+       struct block_device *bdev = lookup_bdev(path);
+       struct bch_dev *ca;
+       unsigned i;
+
+       if (IS_ERR(bdev))
+               return ERR_CAST(bdev);
+
+       for_each_member_device(ca, c, i)
+               if (ca->disk_sb.bdev == bdev)
+                       goto found;
+
+       ca = ERR_PTR(-ENOENT);
+found:
+       bdput(bdev);
+       return ca;
+}
+
+/* Filesystem open: */
+
+struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
+                           struct bch_opts opts)
+{
+       struct bch_sb_handle *sb = NULL;
+       struct bch_fs *c = NULL;
+       struct bch_sb_field_members *mi;
+       unsigned i, best_sb = 0;
+       const char *err;
+       int ret = -ENOMEM;
+
+       pr_verbose_init(opts, "");
+
+       if (!nr_devices) {
+               c = ERR_PTR(-EINVAL);
+               goto out2;
+       }
+
+       if (!try_module_get(THIS_MODULE)) {
+               c = ERR_PTR(-ENODEV);
+               goto out2;
+       }
+
+       sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL);
+       if (!sb)
+               goto err;
+
+       for (i = 0; i < nr_devices; i++) {
+               ret = bch2_read_super(devices[i], &opts, &sb[i]);
+               if (ret)
+                       goto err;
+
+               err = bch2_sb_validate(&sb[i]);
+               if (err)
+                       goto err_print;
+       }
+
+       for (i = 1; i < nr_devices; i++)
+               if (le64_to_cpu(sb[i].sb->seq) >
+                   le64_to_cpu(sb[best_sb].sb->seq))
+                       best_sb = i;
+
+       mi = bch2_sb_get_members(sb[best_sb].sb);
+
+       i = 0;
+       while (i < nr_devices) {
+               if (i != best_sb &&
+                   !bch2_dev_exists(sb[best_sb].sb, mi, sb[i].sb->dev_idx)) {
+                       char buf[BDEVNAME_SIZE];
+                       pr_info("%s has been removed, skipping",
+                               bdevname(sb[i].bdev, buf));
+                       bch2_free_super(&sb[i]);
+                       array_remove_item(sb, nr_devices, i);
+                       continue;
+               }
+
+               err = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb);
+               if (err)
+                       goto err_print;
+               i++;
+       }
+
+       ret = -ENOMEM;
+       c = bch2_fs_alloc(sb[best_sb].sb, opts);
+       if (!c)
+               goto err;
+
+       err = "bch2_dev_online() error";
+       down_write(&c->state_lock);
+       for (i = 0; i < nr_devices; i++)
+               if (bch2_dev_attach_bdev(c, &sb[i])) {
+                       up_write(&c->state_lock);
+                       goto err_print;
+               }
+       up_write(&c->state_lock);
+
+       err = "insufficient devices";
+       if (!bch2_fs_may_start(c))
+               goto err_print;
+
+       if (!c->opts.nostart) {
+               ret = bch2_fs_start(c);
+               if (ret)
+                       goto err;
+       }
+out:
+       kfree(sb);
+       module_put(THIS_MODULE);
+out2:
+       pr_verbose_init(opts, "ret %i", PTR_ERR_OR_ZERO(c));
+       return c;
+err_print:
+       pr_err("bch_fs_open err opening %s: %s",
+              devices[0], err);
+       ret = -EINVAL;
+err:
+       if (c)
+               bch2_fs_stop(c);
+       for (i = 0; i < nr_devices; i++)
+               bch2_free_super(&sb[i]);
+       c = ERR_PTR(ret);
+       goto out;
+}
+
+static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb,
+                                             struct bch_opts opts)
+{
+       const char *err;
+       struct bch_fs *c;
+       bool allocated_fs = false;
+       int ret;
+
+       err = bch2_sb_validate(sb);
+       if (err)
+               return err;
+
+       mutex_lock(&bch_fs_list_lock);
+       c = __bch2_uuid_to_fs(sb->sb->uuid);
+       if (c) {
+               closure_get(&c->cl);
+
+               err = bch2_dev_in_fs(c->disk_sb.sb, sb->sb);
+               if (err)
+                       goto err;
+       } else {
+               c = bch2_fs_alloc(sb->sb, opts);
+               err = "cannot allocate memory";
+               if (!c)
+                       goto err;
+
+               allocated_fs = true;
+       }
+
+       err = "bch2_dev_online() error";
+
+       mutex_lock(&c->sb_lock);
+       if (bch2_dev_attach_bdev(c, sb)) {
+               mutex_unlock(&c->sb_lock);
+               goto err;
+       }
+       mutex_unlock(&c->sb_lock);
+
+       if (!c->opts.nostart && bch2_fs_may_start(c)) {
+               err = "error starting filesystem";
+               ret = bch2_fs_start(c);
+               if (ret)
+                       goto err;
+       }
+
+       closure_put(&c->cl);
+       mutex_unlock(&bch_fs_list_lock);
+
+       return NULL;
+err:
+       mutex_unlock(&bch_fs_list_lock);
+
+       if (allocated_fs)
+               bch2_fs_stop(c);
+       else if (c)
+               closure_put(&c->cl);
+
+       return err;
+}
+
+const char *bch2_fs_open_incremental(const char *path)
+{
+       struct bch_sb_handle sb;
+       struct bch_opts opts = bch2_opts_empty();
+       const char *err;
+
+       if (bch2_read_super(path, &opts, &sb))
+               return "error reading superblock";
+
+       err = __bch2_fs_open_incremental(&sb, opts);
+       bch2_free_super(&sb);
+
+       return err;
+}
+
+/* Global interfaces/init */
+
+static void bcachefs_exit(void)
+{
+       bch2_debug_exit();
+       bch2_vfs_exit();
+       bch2_chardev_exit();
+       if (bcachefs_kset)
+               kset_unregister(bcachefs_kset);
+}
+
+static int __init bcachefs_init(void)
+{
+       bch2_bkey_pack_test();
+       bch2_inode_pack_test();
+
+       if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) ||
+           bch2_chardev_init() ||
+           bch2_vfs_init() ||
+           bch2_debug_init())
+               goto err;
+
+       return 0;
+err:
+       bcachefs_exit();
+       return -ENOMEM;
+}
+
+#define BCH_DEBUG_PARAM(name, description)                     \
+       bool bch2_##name;                                       \
+       module_param_named(name, bch2_##name, bool, 0644);      \
+       MODULE_PARM_DESC(name, description);
+BCH_DEBUG_PARAMS()
+#undef BCH_DEBUG_PARAM
+
+module_exit(bcachefs_exit);
+module_init(bcachefs_init);
diff --git a/libbcachefs/super.h b/libbcachefs/super.h
new file mode 100644 (file)
index 0000000..02c81f3
--- /dev/null
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SUPER_H
+#define _BCACHEFS_SUPER_H
+
+#include "extents.h"
+
+#include "bcachefs_ioctl.h"
+
+#include <linux/math64.h>
+
+static inline size_t sector_to_bucket(const struct bch_dev *ca, sector_t s)
+{
+       return div_u64(s, ca->mi.bucket_size);
+}
+
+static inline sector_t bucket_to_sector(const struct bch_dev *ca, size_t b)
+{
+       return ((sector_t) b) * ca->mi.bucket_size;
+}
+
+static inline sector_t bucket_remainder(const struct bch_dev *ca, sector_t s)
+{
+       u32 remainder;
+
+       div_u64_rem(s, ca->mi.bucket_size, &remainder);
+       return remainder;
+}
+
+static inline bool bch2_dev_is_online(struct bch_dev *ca)
+{
+       return !percpu_ref_is_zero(&ca->io_ref);
+}
+
+static inline bool bch2_dev_is_readable(struct bch_dev *ca)
+{
+       return bch2_dev_is_online(ca) &&
+               ca->mi.state != BCH_MEMBER_STATE_FAILED;
+}
+
+static inline bool bch2_dev_get_ioref(struct bch_dev *ca, int rw)
+{
+       if (!percpu_ref_tryget(&ca->io_ref))
+               return false;
+
+       if (ca->mi.state == BCH_MEMBER_STATE_RW ||
+           (ca->mi.state == BCH_MEMBER_STATE_RO && rw == READ))
+               return true;
+
+       percpu_ref_put(&ca->io_ref);
+       return false;
+}
+
+static inline unsigned dev_mask_nr(const struct bch_devs_mask *devs)
+{
+       return bitmap_weight(devs->d, BCH_SB_MEMBERS_MAX);
+}
+
+static inline bool bch2_dev_list_has_dev(struct bch_devs_list devs,
+                                        unsigned dev)
+{
+       unsigned i;
+
+       for (i = 0; i < devs.nr; i++)
+               if (devs.devs[i] == dev)
+                       return true;
+
+       return false;
+}
+
+static inline void bch2_dev_list_drop_dev(struct bch_devs_list *devs,
+                                         unsigned dev)
+{
+       unsigned i;
+
+       for (i = 0; i < devs->nr; i++)
+               if (devs->devs[i] == dev) {
+                       array_remove_item(devs->devs, devs->nr, i);
+                       return;
+               }
+}
+
+static inline void bch2_dev_list_add_dev(struct bch_devs_list *devs,
+                                        unsigned dev)
+{
+       BUG_ON(bch2_dev_list_has_dev(*devs, dev));
+       BUG_ON(devs->nr >= BCH_REPLICAS_MAX);
+       devs->devs[devs->nr++] = dev;
+}
+
+static inline struct bch_devs_list bch2_dev_list_single(unsigned dev)
+{
+       return (struct bch_devs_list) { .nr = 1, .devs[0] = dev };
+}
+
+static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter,
+                                             const struct bch_devs_mask *mask)
+{
+       struct bch_dev *ca = NULL;
+
+       while ((*iter = mask
+               ? find_next_bit(mask->d, c->sb.nr_devices, *iter)
+               : *iter) < c->sb.nr_devices &&
+              !(ca = rcu_dereference_check(c->devs[*iter],
+                                           lockdep_is_held(&c->state_lock))))
+               (*iter)++;
+
+       return ca;
+}
+
+#define __for_each_member_device(ca, c, iter, mask)                    \
+       for ((iter) = 0; ((ca) = __bch2_next_dev((c), &(iter), mask)); (iter)++)
+
+#define for_each_member_device_rcu(ca, c, iter, mask)                  \
+       __for_each_member_device(ca, c, iter, mask)
+
+static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, unsigned *iter)
+{
+       struct bch_dev *ca;
+
+       rcu_read_lock();
+       if ((ca = __bch2_next_dev(c, iter, NULL)))
+               percpu_ref_get(&ca->ref);
+       rcu_read_unlock();
+
+       return ca;
+}
+
+/*
+ * If you break early, you must drop your ref on the current device
+ */
+#define for_each_member_device(ca, c, iter)                            \
+       for ((iter) = 0;                                                \
+            (ca = bch2_get_next_dev(c, &(iter)));                      \
+            percpu_ref_put(&ca->ref), (iter)++)
+
+static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
+                                                     unsigned *iter,
+                                                     int state_mask)
+{
+       struct bch_dev *ca;
+
+       rcu_read_lock();
+       while ((ca = __bch2_next_dev(c, iter, NULL)) &&
+              (!((1 << ca->mi.state) & state_mask) ||
+               !percpu_ref_tryget(&ca->io_ref)))
+               (*iter)++;
+       rcu_read_unlock();
+
+       return ca;
+}
+
+#define __for_each_online_member(ca, c, iter, state_mask)              \
+       for ((iter) = 0;                                                \
+            (ca = bch2_get_next_online_dev(c, &(iter), state_mask));   \
+            percpu_ref_put(&ca->io_ref), (iter)++)
+
+#define for_each_online_member(ca, c, iter)                            \
+       __for_each_online_member(ca, c, iter, ~0)
+
+#define for_each_rw_member(ca, c, iter)                                        \
+       __for_each_online_member(ca, c, iter, 1 << BCH_MEMBER_STATE_RW)
+
+#define for_each_readable_member(ca, c, iter)                          \
+       __for_each_online_member(ca, c, iter,                           \
+               (1 << BCH_MEMBER_STATE_RW)|(1 << BCH_MEMBER_STATE_RO))
+
+/*
+ * If a key exists that references a device, the device won't be going away and
+ * we can omit rcu_read_lock():
+ */
+static inline struct bch_dev *bch_dev_bkey_exists(const struct bch_fs *c, unsigned idx)
+{
+       EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
+
+       return rcu_dereference_check(c->devs[idx], 1);
+}
+
+static inline struct bch_dev *bch_dev_locked(struct bch_fs *c, unsigned idx)
+{
+       EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
+
+       return rcu_dereference_protected(c->devs[idx],
+                                        lockdep_is_held(&c->sb_lock) ||
+                                        lockdep_is_held(&c->state_lock));
+}
+
+/* XXX kill, move to struct bch_fs */
+static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
+{
+       struct bch_devs_mask devs;
+       struct bch_dev *ca;
+       unsigned i;
+
+       memset(&devs, 0, sizeof(devs));
+       for_each_online_member(ca, c, i)
+               __set_bit(ca->dev_idx, devs.d);
+       return devs;
+}
+
+struct bch_fs *bch2_bdev_to_fs(struct block_device *);
+struct bch_fs *bch2_uuid_to_fs(uuid_le);
+
+bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,
+                          enum bch_member_state, int);
+int __bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
+                       enum bch_member_state, int);
+int bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
+                     enum bch_member_state, int);
+
+int bch2_dev_fail(struct bch_dev *, int);
+int bch2_dev_remove(struct bch_fs *, struct bch_dev *, int);
+int bch2_dev_add(struct bch_fs *, const char *);
+int bch2_dev_online(struct bch_fs *, const char *);
+int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int);
+int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
+struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
+
+bool bch2_fs_emergency_read_only(struct bch_fs *);
+void bch2_fs_read_only(struct bch_fs *);
+
+int bch2_fs_read_write(struct bch_fs *);
+int bch2_fs_read_write_early(struct bch_fs *);
+
+/*
+ * Only for use in the recovery/fsck path:
+ */
+static inline void bch2_fs_lazy_rw(struct bch_fs *c)
+{
+       if (percpu_ref_is_zero(&c->writes))
+               bch2_fs_read_write_early(c);
+}
+
+void __bch2_fs_stop(struct bch_fs *);
+void bch2_fs_free(struct bch_fs *);
+void bch2_fs_stop(struct bch_fs *);
+
+int bch2_fs_start(struct bch_fs *);
+struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts);
+const char *bch2_fs_open_incremental(const char *path);
+
+#endif /* _BCACHEFS_SUPER_H */
diff --git a/libbcachefs/super_types.h b/libbcachefs/super_types.h
new file mode 100644 (file)
index 0000000..20406eb
--- /dev/null
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SUPER_TYPES_H
+#define _BCACHEFS_SUPER_TYPES_H
+
+struct bch_sb_handle {
+       struct bch_sb           *sb;
+       struct block_device     *bdev;
+       struct bio              *bio;
+       unsigned                page_order;
+       fmode_t                 mode;
+       unsigned                have_layout:1;
+       unsigned                have_bio:1;
+       unsigned                fs_sb:1;
+       u64                     seq;
+};
+
+struct bch_devs_mask {
+       unsigned long d[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)];
+};
+
+struct bch_devs_list {
+       u8                      nr;
+       u8                      devs[BCH_REPLICAS_MAX + 1];
+};
+
+struct bch_member_cpu {
+       u64                     nbuckets;       /* device size */
+       u16                     first_bucket;   /* index of first bucket used */
+       u16                     bucket_size;    /* sectors */
+       u16                     group;
+       u8                      state;
+       u8                      replacement;
+       u8                      discard;
+       u8                      data_allowed;
+       u8                      durability;
+       u8                      valid;
+};
+
+struct bch_disk_group_cpu {
+       bool                            deleted;
+       u16                             parent;
+       struct bch_devs_mask            devs;
+};
+
+struct bch_disk_groups_cpu {
+       struct rcu_head                 rcu;
+       unsigned                        nr;
+       struct bch_disk_group_cpu       entries[];
+};
+
+#endif /* _BCACHEFS_SUPER_TYPES_H */
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
new file mode 100644 (file)
index 0000000..0cb29f4
--- /dev/null
@@ -0,0 +1,1074 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bcache sysfs interfaces
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#ifndef NO_BCACHEFS_SYSFS
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "sysfs.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "btree_key_cache.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "btree_gc.h"
+#include "buckets.h"
+#include "clock.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "inode.h"
+#include "journal.h"
+#include "keylist.h"
+#include "move.h"
+#include "opts.h"
+#include "rebalance.h"
+#include "replicas.h"
+#include "super-io.h"
+#include "tests.h"
+
+#include <linux/blkdev.h>
+#include <linux/sort.h>
+#include <linux/sched/clock.h>
+
+#include "util.h"
+
+#define SYSFS_OPS(type)                                                        \
+struct sysfs_ops type ## _sysfs_ops = {                                        \
+       .show   = type ## _show,                                        \
+       .store  = type ## _store                                        \
+}
+
+#define SHOW(fn)                                                       \
+static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
+                          char *buf)                                   \
+
+#define STORE(fn)                                                      \
+static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
+                           const char *buf, size_t size)               \
+
+#define __sysfs_attribute(_name, _mode)                                        \
+       static struct attribute sysfs_##_name =                         \
+               { .name = #_name, .mode = _mode }
+
+#define write_attribute(n)     __sysfs_attribute(n, S_IWUSR)
+#define read_attribute(n)      __sysfs_attribute(n, S_IRUGO)
+#define rw_attribute(n)                __sysfs_attribute(n, S_IRUGO|S_IWUSR)
+
+#define sysfs_printf(file, fmt, ...)                                   \
+do {                                                                   \
+       if (attr == &sysfs_ ## file)                                    \
+               return scnprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__);\
+} while (0)
+
+#define sysfs_print(file, var)                                         \
+do {                                                                   \
+       if (attr == &sysfs_ ## file)                                    \
+               return snprint(buf, PAGE_SIZE, var);                    \
+} while (0)
+
+#define sysfs_hprint(file, val)                                                \
+do {                                                                   \
+       if (attr == &sysfs_ ## file) {                                  \
+               bch2_hprint(&out, val);                                 \
+               pr_buf(&out, "\n");                                     \
+               return out.pos - buf;                                   \
+       }                                                               \
+} while (0)
+
+#define var_printf(_var, fmt)  sysfs_printf(_var, fmt, var(_var))
+#define var_print(_var)                sysfs_print(_var, var(_var))
+#define var_hprint(_var)       sysfs_hprint(_var, var(_var))
+
+#define sysfs_strtoul(file, var)                                       \
+do {                                                                   \
+       if (attr == &sysfs_ ## file)                                    \
+               return strtoul_safe(buf, var) ?: (ssize_t) size;        \
+} while (0)
+
+#define sysfs_strtoul_clamp(file, var, min, max)                       \
+do {                                                                   \
+       if (attr == &sysfs_ ## file)                                    \
+               return strtoul_safe_clamp(buf, var, min, max)           \
+                       ?: (ssize_t) size;                              \
+} while (0)
+
+#define strtoul_or_return(cp)                                          \
+({                                                                     \
+       unsigned long _v;                                               \
+       int _r = kstrtoul(cp, 10, &_v);                                 \
+       if (_r)                                                         \
+               return _r;                                              \
+       _v;                                                             \
+})
+
+#define strtoul_restrict_or_return(cp, min, max)                       \
+({                                                                     \
+       unsigned long __v = 0;                                          \
+       int _r = strtoul_safe_restrict(cp, __v, min, max);              \
+       if (_r)                                                         \
+               return _r;                                              \
+       __v;                                                            \
+})
+
+#define strtoi_h_or_return(cp)                                         \
+({                                                                     \
+       u64 _v;                                                         \
+       int _r = strtoi_h(cp, &_v);                                     \
+       if (_r)                                                         \
+               return _r;                                              \
+       _v;                                                             \
+})
+
+#define sysfs_hatoi(file, var)                                         \
+do {                                                                   \
+       if (attr == &sysfs_ ## file)                                    \
+               return strtoi_h(buf, &var) ?: (ssize_t) size;           \
+} while (0)
+
+write_attribute(trigger_journal_flush);
+write_attribute(trigger_btree_coalesce);
+write_attribute(trigger_gc);
+write_attribute(prune_cache);
+rw_attribute(btree_gc_periodic);
+
+read_attribute(uuid);
+read_attribute(minor);
+read_attribute(bucket_size);
+read_attribute(block_size);
+read_attribute(btree_node_size);
+read_attribute(first_bucket);
+read_attribute(nbuckets);
+read_attribute(durability);
+read_attribute(iodone);
+
+read_attribute(io_latency_read);
+read_attribute(io_latency_write);
+read_attribute(io_latency_stats_read);
+read_attribute(io_latency_stats_write);
+read_attribute(congested);
+
+read_attribute(bucket_quantiles_last_read);
+read_attribute(bucket_quantiles_last_write);
+read_attribute(bucket_quantiles_fragmentation);
+read_attribute(bucket_quantiles_oldest_gen);
+
+read_attribute(reserve_stats);
+read_attribute(btree_cache_size);
+read_attribute(compression_stats);
+read_attribute(journal_debug);
+read_attribute(journal_pins);
+read_attribute(btree_updates);
+read_attribute(dirty_btree_nodes);
+read_attribute(btree_key_cache);
+read_attribute(btree_transactions);
+read_attribute(stripes_heap);
+
+read_attribute(internal_uuid);
+
+read_attribute(has_data);
+read_attribute(alloc_debug);
+write_attribute(wake_allocator);
+
+read_attribute(read_realloc_races);
+read_attribute(extent_migrate_done);
+read_attribute(extent_migrate_raced);
+
+rw_attribute(journal_write_delay_ms);
+rw_attribute(journal_reclaim_delay_ms);
+
+rw_attribute(discard);
+rw_attribute(cache_replacement_policy);
+rw_attribute(label);
+
+rw_attribute(copy_gc_enabled);
+sysfs_pd_controller_attribute(copy_gc);
+
+rw_attribute(rebalance_enabled);
+sysfs_pd_controller_attribute(rebalance);
+read_attribute(rebalance_work);
+rw_attribute(promote_whole_extents);
+
+read_attribute(new_stripes);
+
+rw_attribute(pd_controllers_update_seconds);
+
+read_attribute(meta_replicas_have);
+read_attribute(data_replicas_have);
+
+read_attribute(io_timers_read);
+read_attribute(io_timers_write);
+
+#ifdef CONFIG_BCACHEFS_TESTS
+write_attribute(perf_test);
+#endif /* CONFIG_BCACHEFS_TESTS */
+
+#define BCH_DEBUG_PARAM(name, description)                             \
+       rw_attribute(name);
+
+       BCH_DEBUG_PARAMS()
+#undef BCH_DEBUG_PARAM
+
+#define x(_name)                                               \
+       static struct attribute sysfs_time_stat_##_name =               \
+               { .name = #_name, .mode = S_IRUGO };
+       BCH_TIME_STATS()
+#undef x
+
+static struct attribute sysfs_state_rw = {
+       .name = "state",
+       .mode = S_IRUGO
+};
+
+static size_t bch2_btree_cache_size(struct bch_fs *c)
+{
+       size_t ret = 0;
+       struct btree *b;
+
+       mutex_lock(&c->btree_cache.lock);
+       list_for_each_entry(b, &c->btree_cache.live, list)
+               ret += btree_bytes(c);
+
+       mutex_unlock(&c->btree_cache.lock);
+       return ret;
+}
+
+static int fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c);
+
+       if (!fs_usage)
+               return -ENOMEM;
+
+       bch2_fs_usage_to_text(out, c, fs_usage);
+
+       percpu_up_read(&c->mark_lock);
+
+       kfree(fs_usage);
+       return 0;
+}
+
+static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
+           nr_compressed_extents = 0,
+           compressed_sectors_compressed = 0,
+           compressed_sectors_uncompressed = 0;
+       int ret;
+
+       if (!test_bit(BCH_FS_STARTED, &c->flags))
+               return -EPERM;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k, ret)
+               if (k.k->type == KEY_TYPE_extent) {
+                       struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+                       const union bch_extent_entry *entry;
+                       struct extent_ptr_decoded p;
+
+                       extent_for_each_ptr_decode(e, p, entry) {
+                               if (!crc_is_compressed(p.crc)) {
+                                       nr_uncompressed_extents++;
+                                       uncompressed_sectors += e.k->size;
+                               } else {
+                                       nr_compressed_extents++;
+                                       compressed_sectors_compressed +=
+                                               p.crc.compressed_size;
+                                       compressed_sectors_uncompressed +=
+                                               p.crc.uncompressed_size;
+                               }
+
+                               /* only looking at the first ptr */
+                               break;
+                       }
+               }
+
+       ret = bch2_trans_exit(&trans) ?: ret;
+       if (ret)
+               return ret;
+
+       pr_buf(out,
+              "uncompressed data:\n"
+              "        nr extents:                     %llu\n"
+              "        size (bytes):                   %llu\n"
+              "compressed data:\n"
+              "        nr extents:                     %llu\n"
+              "        compressed size (bytes):        %llu\n"
+              "        uncompressed size (bytes):      %llu\n",
+              nr_uncompressed_extents,
+              uncompressed_sectors << 9,
+              nr_compressed_extents,
+              compressed_sectors_compressed << 9,
+              compressed_sectors_uncompressed << 9);
+       return 0;
+}
+
+SHOW(bch2_fs)
+{
+       struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
+       struct printbuf out = _PBUF(buf, PAGE_SIZE);
+
+       sysfs_print(minor,                      c->minor);
+       sysfs_printf(internal_uuid, "%pU",      c->sb.uuid.b);
+
+       sysfs_print(journal_write_delay_ms,     c->journal.write_delay_ms);
+       sysfs_print(journal_reclaim_delay_ms,   c->journal.reclaim_delay_ms);
+
+       sysfs_print(block_size,                 block_bytes(c));
+       sysfs_print(btree_node_size,            btree_bytes(c));
+       sysfs_hprint(btree_cache_size,          bch2_btree_cache_size(c));
+
+       sysfs_print(read_realloc_races,
+                   atomic_long_read(&c->read_realloc_races));
+       sysfs_print(extent_migrate_done,
+                   atomic_long_read(&c->extent_migrate_done));
+       sysfs_print(extent_migrate_raced,
+                   atomic_long_read(&c->extent_migrate_raced));
+
+       sysfs_printf(btree_gc_periodic, "%u",   (int) c->btree_gc_periodic);
+
+       sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
+
+       sysfs_print(pd_controllers_update_seconds,
+                   c->pd_controllers_update_seconds);
+
+       sysfs_printf(rebalance_enabled,         "%i", c->rebalance.enabled);
+       sysfs_pd_controller_show(rebalance,     &c->rebalance.pd); /* XXX */
+       sysfs_pd_controller_show(copy_gc,       &c->copygc_pd);
+
+       if (attr == &sysfs_rebalance_work) {
+               bch2_rebalance_work_to_text(&out, c);
+               return out.pos - buf;
+       }
+
+       sysfs_print(promote_whole_extents,      c->promote_whole_extents);
+
+       sysfs_printf(meta_replicas_have, "%i",  bch2_replicas_online(c, true));
+       sysfs_printf(data_replicas_have, "%i",  bch2_replicas_online(c, false));
+
+       /* Debugging: */
+
+       if (attr == &sysfs_alloc_debug)
+               return fs_alloc_debug_to_text(&out, c) ?: out.pos - buf;
+
+       if (attr == &sysfs_journal_debug) {
+               bch2_journal_debug_to_text(&out, &c->journal);
+               return out.pos - buf;
+       }
+
+       if (attr == &sysfs_journal_pins) {
+               bch2_journal_pins_to_text(&out, &c->journal);
+               return out.pos - buf;
+       }
+
+       if (attr == &sysfs_btree_updates) {
+               bch2_btree_updates_to_text(&out, c);
+               return out.pos - buf;
+       }
+
+       if (attr == &sysfs_dirty_btree_nodes) {
+               bch2_dirty_btree_nodes_to_text(&out, c);
+               return out.pos - buf;
+       }
+
+       if (attr == &sysfs_btree_key_cache) {
+               bch2_btree_key_cache_to_text(&out, &c->btree_key_cache);
+               return out.pos - buf;
+       }
+
+       if (attr == &sysfs_btree_transactions) {
+               bch2_btree_trans_to_text(&out, c);
+               return out.pos - buf;
+       }
+
+       if (attr == &sysfs_stripes_heap) {
+               bch2_stripes_heap_to_text(&out, c);
+               return out.pos - buf;
+       }
+
+       if (attr == &sysfs_compression_stats) {
+               bch2_compression_stats_to_text(&out, c);
+               return out.pos - buf;
+       }
+
+       if (attr == &sysfs_new_stripes) {
+               bch2_new_stripes_to_text(&out, c);
+               return out.pos - buf;
+       }
+
+       if (attr == &sysfs_io_timers_read) {
+               bch2_io_timers_to_text(&out, &c->io_clock[READ]);
+               return out.pos - buf;
+       }
+       if (attr == &sysfs_io_timers_write) {
+               bch2_io_timers_to_text(&out, &c->io_clock[WRITE]);
+               return out.pos - buf;
+       }
+
+#define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
+       BCH_DEBUG_PARAMS()
+#undef BCH_DEBUG_PARAM
+
+       return 0;
+}
+
+STORE(bch2_fs)
+{
+       struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
+
+       sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
+       sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
+
+       if (attr == &sysfs_btree_gc_periodic) {
+               ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
+                       ?: (ssize_t) size;
+
+               wake_up_process(c->gc_thread);
+               return ret;
+       }
+
+       if (attr == &sysfs_copy_gc_enabled) {
+               ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
+                       ?: (ssize_t) size;
+
+               if (c->copygc_thread)
+                       wake_up_process(c->copygc_thread);
+               return ret;
+       }
+
+       if (attr == &sysfs_rebalance_enabled) {
+               ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
+                       ?: (ssize_t) size;
+
+               rebalance_wakeup(c);
+               return ret;
+       }
+
+       sysfs_strtoul(pd_controllers_update_seconds,
+                     c->pd_controllers_update_seconds);
+       sysfs_pd_controller_store(rebalance,    &c->rebalance.pd);
+       sysfs_pd_controller_store(copy_gc,      &c->copygc_pd);
+
+       sysfs_strtoul(promote_whole_extents,    c->promote_whole_extents);
+
+       /* Debugging: */
+
+#define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
+       BCH_DEBUG_PARAMS()
+#undef BCH_DEBUG_PARAM
+
+       if (!test_bit(BCH_FS_STARTED, &c->flags))
+               return -EPERM;
+
+       /* Debugging: */
+
+       if (attr == &sysfs_trigger_journal_flush)
+               bch2_journal_meta_async(&c->journal, NULL);
+
+       if (attr == &sysfs_trigger_btree_coalesce)
+               bch2_coalesce(c);
+
+       if (attr == &sysfs_trigger_gc) {
+               /*
+                * Full gc is currently incompatible with btree key cache:
+                */
+#if 0
+               down_read(&c->state_lock);
+               bch2_gc(c, NULL, false, false);
+               up_read(&c->state_lock);
+#else
+               bch2_gc_gens(c);
+#endif
+       }
+
+       if (attr == &sysfs_prune_cache) {
+               struct shrink_control sc;
+
+               sc.gfp_mask = GFP_KERNEL;
+               sc.nr_to_scan = strtoul_or_return(buf);
+               c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
+       }
+
+#ifdef CONFIG_BCACHEFS_TESTS
+       if (attr == &sysfs_perf_test) {
+               char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
+               char *test              = strsep(&p, " \t\n");
+               char *nr_str            = strsep(&p, " \t\n");
+               char *threads_str       = strsep(&p, " \t\n");
+               unsigned threads;
+               u64 nr;
+               int ret = -EINVAL;
+
+               if (threads_str &&
+                   !(ret = kstrtouint(threads_str, 10, &threads)) &&
+                   !(ret = bch2_strtoull_h(nr_str, &nr)))
+                       bch2_btree_perf_test(c, test, nr, threads);
+               else
+                       size = ret;
+               kfree(tmp);
+       }
+#endif
+       return size;
+}
+SYSFS_OPS(bch2_fs);
+
+struct attribute *bch2_fs_files[] = {
+       &sysfs_minor,
+       &sysfs_block_size,
+       &sysfs_btree_node_size,
+       &sysfs_btree_cache_size,
+
+       &sysfs_meta_replicas_have,
+       &sysfs_data_replicas_have,
+
+       &sysfs_journal_write_delay_ms,
+       &sysfs_journal_reclaim_delay_ms,
+
+       &sysfs_promote_whole_extents,
+
+       &sysfs_compression_stats,
+
+#ifdef CONFIG_BCACHEFS_TESTS
+       &sysfs_perf_test,
+#endif
+       NULL
+};
+
+/* internal dir - just a wrapper */
+
+SHOW(bch2_fs_internal)
+{
+       struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
+       return bch2_fs_show(&c->kobj, attr, buf);
+}
+
+STORE(bch2_fs_internal)
+{
+       struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
+       return bch2_fs_store(&c->kobj, attr, buf, size);
+}
+SYSFS_OPS(bch2_fs_internal);
+
+struct attribute *bch2_fs_internal_files[] = {
+       &sysfs_alloc_debug,
+       &sysfs_journal_debug,
+       &sysfs_journal_pins,
+       &sysfs_btree_updates,
+       &sysfs_dirty_btree_nodes,
+       &sysfs_btree_key_cache,
+       &sysfs_btree_transactions,
+       &sysfs_stripes_heap,
+
+       &sysfs_read_realloc_races,
+       &sysfs_extent_migrate_done,
+       &sysfs_extent_migrate_raced,
+
+       &sysfs_trigger_journal_flush,
+       &sysfs_trigger_btree_coalesce,
+       &sysfs_trigger_gc,
+       &sysfs_prune_cache,
+
+       &sysfs_copy_gc_enabled,
+
+       &sysfs_rebalance_enabled,
+       &sysfs_rebalance_work,
+       sysfs_pd_controller_files(rebalance),
+       sysfs_pd_controller_files(copy_gc),
+
+       &sysfs_new_stripes,
+
+       &sysfs_io_timers_read,
+       &sysfs_io_timers_write,
+
+       &sysfs_internal_uuid,
+
+#define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
+       BCH_DEBUG_PARAMS()
+#undef BCH_DEBUG_PARAM
+
+       NULL
+};
+
+/* options */
+
+SHOW(bch2_fs_opts_dir)
+{
+       struct printbuf out = _PBUF(buf, PAGE_SIZE);
+       struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
+       const struct bch_option *opt = container_of(attr, struct bch_option, attr);
+       int id = opt - bch2_opt_table;
+       u64 v = bch2_opt_get_by_id(&c->opts, id);
+
+       bch2_opt_to_text(&out, c, opt, v, OPT_SHOW_FULL_LIST);
+       pr_buf(&out, "\n");
+
+       return out.pos - buf;
+}
+
+STORE(bch2_fs_opts_dir)
+{
+       struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
+       const struct bch_option *opt = container_of(attr, struct bch_option, attr);
+       int ret, id = opt - bch2_opt_table;
+       char *tmp;
+       u64 v;
+
+       tmp = kstrdup(buf, GFP_KERNEL);
+       if (!tmp)
+               return -ENOMEM;
+
+       ret = bch2_opt_parse(c, opt, strim(tmp), &v);
+       kfree(tmp);
+
+       if (ret < 0)
+               return ret;
+
+       ret = bch2_opt_check_may_set(c, id, v);
+       if (ret < 0)
+               return ret;
+
+       if (opt->set_sb != SET_NO_SB_OPT) {
+               mutex_lock(&c->sb_lock);
+               opt->set_sb(c->disk_sb.sb, v);
+               bch2_write_super(c);
+               mutex_unlock(&c->sb_lock);
+       }
+
+       bch2_opt_set_by_id(&c->opts, id, v);
+
+       if ((id == Opt_background_target ||
+            id == Opt_background_compression) && v) {
+               bch2_rebalance_add_work(c, S64_MAX);
+               rebalance_wakeup(c);
+       }
+
+       return size;
+}
+SYSFS_OPS(bch2_fs_opts_dir);
+
+struct attribute *bch2_fs_opts_dir_files[] = { NULL };
+
+int bch2_opts_create_sysfs_files(struct kobject *kobj)
+{
+       const struct bch_option *i;
+       int ret;
+
+       for (i = bch2_opt_table;
+            i < bch2_opt_table + bch2_opts_nr;
+            i++) {
+               if (!(i->mode & (OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME)))
+                       continue;
+
+               ret = sysfs_create_file(kobj, &i->attr);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+/* time stats */
+
+SHOW(bch2_fs_time_stats)
+{
+       struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
+       struct printbuf out = _PBUF(buf, PAGE_SIZE);
+
+#define x(name)                                                                \
+       if (attr == &sysfs_time_stat_##name) {                          \
+               bch2_time_stats_to_text(&out, &c->times[BCH_TIME_##name]);\
+               return out.pos - buf;                                   \
+       }
+       BCH_TIME_STATS()
+#undef x
+
+       return 0;
+}
+
+STORE(bch2_fs_time_stats)
+{
+       return size;
+}
+SYSFS_OPS(bch2_fs_time_stats);
+
+struct attribute *bch2_fs_time_stats_files[] = {
+#define x(name)                                                \
+       &sysfs_time_stat_##name,
+       BCH_TIME_STATS()
+#undef x
+       NULL
+};
+
+typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *,
+                                size_t, void *);
+
+static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca,
+                                 size_t b, void *private)
+{
+       int rw = (private ? 1 : 0);
+
+       return bucket_last_io(c, bucket(ca, b), rw);
+}
+
+static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca,
+                                      size_t b, void *private)
+{
+       struct bucket *g = bucket(ca, b);
+       return bucket_sectors_used(g->mark);
+}
+
+static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca,
+                                    size_t b, void *private)
+{
+       return bucket_gc_gen(ca, b);
+}
+
+static int unsigned_cmp(const void *_l, const void *_r)
+{
+       const unsigned *l = _l;
+       const unsigned *r = _r;
+
+       return cmp_int(*l, *r);
+}
+
+static int quantiles_to_text(struct printbuf *out,
+                            struct bch_fs *c, struct bch_dev *ca,
+                            bucket_map_fn *fn, void *private)
+{
+       size_t i, n;
+       /* Compute 31 quantiles */
+       unsigned q[31], *p;
+
+       down_read(&ca->bucket_lock);
+       n = ca->mi.nbuckets;
+
+       p = vzalloc(n * sizeof(unsigned));
+       if (!p) {
+               up_read(&ca->bucket_lock);
+               return -ENOMEM;
+       }
+
+       for (i = ca->mi.first_bucket; i < n; i++)
+               p[i] = fn(c, ca, i, private);
+
+       sort(p, n, sizeof(unsigned), unsigned_cmp, NULL);
+       up_read(&ca->bucket_lock);
+
+       while (n &&
+              !p[n - 1])
+               --n;
+
+       for (i = 0; i < ARRAY_SIZE(q); i++)
+               q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
+
+       vfree(p);
+
+       for (i = 0; i < ARRAY_SIZE(q); i++)
+               pr_buf(out, "%u ", q[i]);
+       pr_buf(out, "\n");
+       return 0;
+}
+
+static void reserve_stats_to_text(struct printbuf *out, struct bch_dev *ca)
+{
+       enum alloc_reserve i;
+
+       spin_lock(&ca->fs->freelist_lock);
+
+       pr_buf(out, "free_inc:\t%zu\t%zu\n",
+              fifo_used(&ca->free_inc),
+              ca->free_inc.size);
+
+       for (i = 0; i < RESERVE_NR; i++)
+               pr_buf(out, "free[%u]:\t%zu\t%zu\n", i,
+                      fifo_used(&ca->free[i]),
+                      ca->free[i].size);
+
+       spin_unlock(&ca->fs->freelist_lock);
+}
+
+static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
+{
+       struct bch_fs *c = ca->fs;
+       struct bch_dev_usage stats = bch2_dev_usage_read(ca);
+       unsigned i, nr[BCH_DATA_NR];
+
+       memset(nr, 0, sizeof(nr));
+
+       for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
+               nr[c->open_buckets[i].type]++;
+
+       pr_buf(out,
+               "free_inc:               %zu/%zu\n"
+               "free[RESERVE_BTREE]:    %zu/%zu\n"
+               "free[RESERVE_MOVINGGC]: %zu/%zu\n"
+               "free[RESERVE_NONE]:     %zu/%zu\n"
+               "buckets:\n"
+               "    capacity:           %llu\n"
+               "    alloc:              %llu\n"
+               "    sb:                 %llu\n"
+               "    journal:            %llu\n"
+               "    meta:               %llu\n"
+               "    user:               %llu\n"
+               "    cached:             %llu\n"
+               "    erasure coded:      %llu\n"
+               "    available:          %lli\n"
+               "sectors:\n"
+               "    sb:                 %llu\n"
+               "    journal:            %llu\n"
+               "    meta:               %llu\n"
+               "    user:               %llu\n"
+               "    cached:             %llu\n"
+               "    erasure coded:      %llu\n"
+               "    fragmented:         %llu\n"
+               "    copygc threshold:   %llu\n"
+               "freelist_wait:          %s\n"
+               "open buckets:           %u/%u (reserved %u)\n"
+               "open_buckets_wait:      %s\n"
+               "open_buckets_btree:     %u\n"
+               "open_buckets_user:      %u\n"
+               "btree reserve cache:    %u\n",
+               fifo_used(&ca->free_inc),               ca->free_inc.size,
+               fifo_used(&ca->free[RESERVE_BTREE]),    ca->free[RESERVE_BTREE].size,
+               fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
+               fifo_used(&ca->free[RESERVE_NONE]),     ca->free[RESERVE_NONE].size,
+               ca->mi.nbuckets - ca->mi.first_bucket,
+               stats.buckets_alloc,
+               stats.buckets[BCH_DATA_sb],
+               stats.buckets[BCH_DATA_journal],
+               stats.buckets[BCH_DATA_btree],
+               stats.buckets[BCH_DATA_user],
+               stats.buckets[BCH_DATA_cached],
+               stats.buckets_ec,
+               __dev_buckets_available(ca, stats),
+               stats.sectors[BCH_DATA_sb],
+               stats.sectors[BCH_DATA_journal],
+               stats.sectors[BCH_DATA_btree],
+               stats.sectors[BCH_DATA_user],
+               stats.sectors[BCH_DATA_cached],
+               stats.sectors_ec,
+               stats.sectors_fragmented,
+               c->copygc_threshold,
+               c->freelist_wait.list.first             ? "waiting" : "empty",
+               c->open_buckets_nr_free, OPEN_BUCKETS_COUNT,
+               BTREE_NODE_OPEN_BUCKET_RESERVE,
+               c->open_buckets_wait.list.first         ? "waiting" : "empty",
+               nr[BCH_DATA_btree],
+               nr[BCH_DATA_user],
+               c->btree_reserve_cache_nr);
+}
+
+static const char * const bch2_rw[] = {
+       "read",
+       "write",
+       NULL
+};
+
+static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca)
+{
+       int rw, i;
+
+       for (rw = 0; rw < 2; rw++) {
+               pr_buf(out, "%s:\n", bch2_rw[rw]);
+
+               for (i = 1; i < BCH_DATA_NR; i++)
+                       pr_buf(out, "%-12s:%12llu\n",
+                              bch2_data_types[i],
+                              percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
+       }
+}
+
+SHOW(bch2_dev)
+{
+       struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
+       struct bch_fs *c = ca->fs;
+       struct printbuf out = _PBUF(buf, PAGE_SIZE);
+
+       sysfs_printf(uuid,              "%pU\n", ca->uuid.b);
+
+       sysfs_print(bucket_size,        bucket_bytes(ca));
+       sysfs_print(block_size,         block_bytes(c));
+       sysfs_print(first_bucket,       ca->mi.first_bucket);
+       sysfs_print(nbuckets,           ca->mi.nbuckets);
+       sysfs_print(durability,         ca->mi.durability);
+       sysfs_print(discard,            ca->mi.discard);
+
+       if (attr == &sysfs_label) {
+               if (ca->mi.group) {
+                       mutex_lock(&c->sb_lock);
+                       bch2_disk_path_to_text(&out, &c->disk_sb,
+                                              ca->mi.group - 1);
+                       mutex_unlock(&c->sb_lock);
+               }
+
+               pr_buf(&out, "\n");
+               return out.pos - buf;
+       }
+
+       if (attr == &sysfs_has_data) {
+               bch2_flags_to_text(&out, bch2_data_types,
+                                  bch2_dev_has_data(c, ca));
+               pr_buf(&out, "\n");
+               return out.pos - buf;
+       }
+
+       if (attr == &sysfs_cache_replacement_policy) {
+               bch2_string_opt_to_text(&out,
+                                       bch2_cache_replacement_policies,
+                                       ca->mi.replacement);
+               pr_buf(&out, "\n");
+               return out.pos - buf;
+       }
+
+       if (attr == &sysfs_state_rw) {
+               bch2_string_opt_to_text(&out, bch2_dev_state,
+                                       ca->mi.state);
+               pr_buf(&out, "\n");
+               return out.pos - buf;
+       }
+
+       if (attr == &sysfs_iodone) {
+               dev_iodone_to_text(&out, ca);
+               return out.pos - buf;
+       }
+
+       sysfs_print(io_latency_read,            atomic64_read(&ca->cur_latency[READ]));
+       sysfs_print(io_latency_write,           atomic64_read(&ca->cur_latency[WRITE]));
+
+       if (attr == &sysfs_io_latency_stats_read) {
+               bch2_time_stats_to_text(&out, &ca->io_latency[READ]);
+               return out.pos - buf;
+       }
+       if (attr == &sysfs_io_latency_stats_write) {
+               bch2_time_stats_to_text(&out, &ca->io_latency[WRITE]);
+               return out.pos - buf;
+       }
+
+       sysfs_printf(congested,                 "%u%%",
+                    clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
+                    * 100 / CONGESTED_MAX);
+
+       if (attr == &sysfs_bucket_quantiles_last_read)
+               return quantiles_to_text(&out, c, ca, bucket_last_io_fn, (void *) 0) ?: out.pos - buf;
+       if (attr == &sysfs_bucket_quantiles_last_write)
+               return quantiles_to_text(&out, c, ca, bucket_last_io_fn, (void *) 1) ?: out.pos - buf;
+       if (attr == &sysfs_bucket_quantiles_fragmentation)
+               return quantiles_to_text(&out, c, ca, bucket_sectors_used_fn, NULL)  ?: out.pos - buf;
+       if (attr == &sysfs_bucket_quantiles_oldest_gen)
+               return quantiles_to_text(&out, c, ca, bucket_oldest_gen_fn, NULL)    ?: out.pos - buf;
+
+       if (attr == &sysfs_reserve_stats) {
+               reserve_stats_to_text(&out, ca);
+               return out.pos - buf;
+       }
+       if (attr == &sysfs_alloc_debug) {
+               dev_alloc_debug_to_text(&out, ca);
+               return out.pos - buf;
+       }
+
+       return 0;
+}
+
+STORE(bch2_dev)
+{
+       struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
+       struct bch_fs *c = ca->fs;
+       struct bch_member *mi;
+
+       if (attr == &sysfs_discard) {
+               bool v = strtoul_or_return(buf);
+
+               mutex_lock(&c->sb_lock);
+               mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
+
+               if (v != BCH_MEMBER_DISCARD(mi)) {
+                       SET_BCH_MEMBER_DISCARD(mi, v);
+                       bch2_write_super(c);
+               }
+               mutex_unlock(&c->sb_lock);
+       }
+
+       if (attr == &sysfs_cache_replacement_policy) {
+               ssize_t v = __sysfs_match_string(bch2_cache_replacement_policies, -1, buf);
+
+               if (v < 0)
+                       return v;
+
+               mutex_lock(&c->sb_lock);
+               mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
+
+               if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
+                       SET_BCH_MEMBER_REPLACEMENT(mi, v);
+                       bch2_write_super(c);
+               }
+               mutex_unlock(&c->sb_lock);
+       }
+
+       if (attr == &sysfs_label) {
+               char *tmp;
+               int ret;
+
+               tmp = kstrdup(buf, GFP_KERNEL);
+               if (!tmp)
+                       return -ENOMEM;
+
+               ret = bch2_dev_group_set(c, ca, strim(tmp));
+               kfree(tmp);
+               if (ret)
+                       return ret;
+       }
+
+       if (attr == &sysfs_wake_allocator)
+               bch2_wake_allocator(ca);
+
+       return size;
+}
+SYSFS_OPS(bch2_dev);
+
+struct attribute *bch2_dev_files[] = {
+       &sysfs_uuid,
+       &sysfs_bucket_size,
+       &sysfs_block_size,
+       &sysfs_first_bucket,
+       &sysfs_nbuckets,
+       &sysfs_durability,
+
+       /* settings: */
+       &sysfs_discard,
+       &sysfs_cache_replacement_policy,
+       &sysfs_state_rw,
+       &sysfs_label,
+
+       &sysfs_has_data,
+       &sysfs_iodone,
+
+       &sysfs_io_latency_read,
+       &sysfs_io_latency_write,
+       &sysfs_io_latency_stats_read,
+       &sysfs_io_latency_stats_write,
+       &sysfs_congested,
+
+       /* alloc info - other stats: */
+       &sysfs_bucket_quantiles_last_read,
+       &sysfs_bucket_quantiles_last_write,
+       &sysfs_bucket_quantiles_fragmentation,
+       &sysfs_bucket_quantiles_oldest_gen,
+
+       &sysfs_reserve_stats,
+
+       /* debug: */
+       &sysfs_alloc_debug,
+       &sysfs_wake_allocator,
+       NULL
+};
+
+#endif  /* _BCACHEFS_SYSFS_H_ */
diff --git a/libbcachefs/sysfs.h b/libbcachefs/sysfs.h
new file mode 100644 (file)
index 0000000..525fd05
--- /dev/null
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SYSFS_H_
+#define _BCACHEFS_SYSFS_H_
+
+#include <linux/sysfs.h>
+
+#ifndef NO_BCACHEFS_SYSFS
+
+struct attribute;
+struct sysfs_ops;
+
+extern struct attribute *bch2_fs_files[];
+extern struct attribute *bch2_fs_internal_files[];
+extern struct attribute *bch2_fs_opts_dir_files[];
+extern struct attribute *bch2_fs_time_stats_files[];
+extern struct attribute *bch2_dev_files[];
+
+extern struct sysfs_ops bch2_fs_sysfs_ops;
+extern struct sysfs_ops bch2_fs_internal_sysfs_ops;
+extern struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
+extern struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
+extern struct sysfs_ops bch2_dev_sysfs_ops;
+
+int bch2_opts_create_sysfs_files(struct kobject *);
+
+#else
+
+static struct attribute *bch2_fs_files[] = {};
+static struct attribute *bch2_fs_internal_files[] = {};
+static struct attribute *bch2_fs_opts_dir_files[] = {};
+static struct attribute *bch2_fs_time_stats_files[] = {};
+static struct attribute *bch2_dev_files[] = {};
+
+static const struct sysfs_ops bch2_fs_sysfs_ops;
+static const struct sysfs_ops bch2_fs_internal_sysfs_ops;
+static const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
+static const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
+static const struct sysfs_ops bch2_dev_sysfs_ops;
+
+static inline int bch2_opts_create_sysfs_files(struct kobject *kobj) { return 0; }
+
+#endif /* NO_BCACHEFS_SYSFS */
+
+#endif  /* _BCACHEFS_SYSFS_H_ */
diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c
new file mode 100644 (file)
index 0000000..4dcace6
--- /dev/null
@@ -0,0 +1,725 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifdef CONFIG_BCACHEFS_TESTS
+
+#include "bcachefs.h"
+#include "btree_update.h"
+#include "journal_reclaim.h"
+#include "tests.h"
+
+#include "linux/kthread.h"
+#include "linux/random.h"
+
+static void delete_test_keys(struct bch_fs *c)
+{
+       int ret;
+
+       ret = bch2_btree_delete_range(c, BTREE_ID_EXTENTS,
+                                     POS(0, 0), POS(0, U64_MAX),
+                                     NULL);
+       BUG_ON(ret);
+
+       ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS,
+                                     POS(0, 0), POS(0, U64_MAX),
+                                     NULL);
+       BUG_ON(ret);
+}
+
+/* unit tests */
+
+static void test_delete(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_i_cookie k;
+       int ret;
+
+       bkey_cookie_init(&k.k_i);
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, k.k.p,
+                                  BTREE_ITER_INTENT);
+
+       ret = bch2_btree_iter_traverse(iter);
+       BUG_ON(ret);
+
+       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+               bch2_trans_update(&trans, iter, &k.k_i, 0));
+       BUG_ON(ret);
+
+       pr_info("deleting once");
+       ret = bch2_btree_delete_at(&trans, iter, 0);
+       BUG_ON(ret);
+
+       pr_info("deleting twice");
+       ret = bch2_btree_delete_at(&trans, iter, 0);
+       BUG_ON(ret);
+
+       bch2_trans_exit(&trans);
+}
+
+static void test_delete_written(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_i_cookie k;
+       int ret;
+
+       bkey_cookie_init(&k.k_i);
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, k.k.p,
+                                  BTREE_ITER_INTENT);
+
+       ret = bch2_btree_iter_traverse(iter);
+       BUG_ON(ret);
+
+       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+               bch2_trans_update(&trans, iter, &k.k_i, 0));
+       BUG_ON(ret);
+
+       bch2_journal_flush_all_pins(&c->journal);
+
+       ret = bch2_btree_delete_at(&trans, iter, 0);
+       BUG_ON(ret);
+
+       bch2_trans_exit(&trans);
+}
+
+static void test_iterate(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       u64 i;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       delete_test_keys(c);
+
+       pr_info("inserting test keys");
+
+       for (i = 0; i < nr; i++) {
+               struct bkey_i_cookie k;
+
+               bkey_cookie_init(&k.k_i);
+               k.k.p.offset = i;
+
+               ret = bch2_btree_insert(c, BTREE_ID_XATTRS, &k.k_i,
+                                       NULL, NULL, 0);
+               BUG_ON(ret);
+       }
+
+       pr_info("iterating forwards");
+
+       i = 0;
+
+       for_each_btree_key(&trans, iter, BTREE_ID_XATTRS,
+                          POS_MIN, 0, k, ret) {
+               if (k.k->p.inode)
+                       break;
+
+               BUG_ON(k.k->p.offset != i++);
+       }
+
+       BUG_ON(i != nr);
+
+       pr_info("iterating backwards");
+
+       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k))
+               BUG_ON(k.k->p.offset != --i);
+
+       BUG_ON(i);
+
+       bch2_trans_exit(&trans);
+}
+
+static void test_iterate_extents(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       u64 i;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       delete_test_keys(c);
+
+       pr_info("inserting test extents");
+
+       for (i = 0; i < nr; i += 8) {
+               struct bkey_i_cookie k;
+
+               bkey_cookie_init(&k.k_i);
+               k.k.p.offset = i + 8;
+               k.k.size = 8;
+
+               ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
+                                       NULL, NULL, 0);
+               BUG_ON(ret);
+       }
+
+       pr_info("iterating forwards");
+
+       i = 0;
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
+                          POS_MIN, 0, k, ret) {
+               BUG_ON(bkey_start_offset(k.k) != i);
+               i = k.k->p.offset;
+       }
+
+       BUG_ON(i != nr);
+
+       pr_info("iterating backwards");
+
+       while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) {
+               BUG_ON(k.k->p.offset != i);
+               i = bkey_start_offset(k.k);
+       }
+
+       BUG_ON(i);
+
+       bch2_trans_exit(&trans);
+}
+
+static void test_iterate_slots(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       u64 i;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       delete_test_keys(c);
+
+       pr_info("inserting test keys");
+
+       for (i = 0; i < nr; i++) {
+               struct bkey_i_cookie k;
+
+               bkey_cookie_init(&k.k_i);
+               k.k.p.offset = i * 2;
+
+               ret = bch2_btree_insert(c, BTREE_ID_XATTRS, &k.k_i,
+                                       NULL, NULL, 0);
+               BUG_ON(ret);
+       }
+
+       pr_info("iterating forwards");
+
+       i = 0;
+
+       for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
+                          0, k, ret) {
+               if (k.k->p.inode)
+                       break;
+
+               BUG_ON(k.k->p.offset != i);
+               i += 2;
+       }
+       bch2_trans_iter_free(&trans, iter);
+
+       BUG_ON(i != nr * 2);
+
+       pr_info("iterating forwards by slots");
+
+       i = 0;
+
+       for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
+                          BTREE_ITER_SLOTS, k, ret) {
+               BUG_ON(k.k->p.offset != i);
+               BUG_ON(bkey_deleted(k.k) != (i & 1));
+
+               i++;
+               if (i == nr * 2)
+                       break;
+       }
+
+       bch2_trans_exit(&trans);
+}
+
+static void test_iterate_slots_extents(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       u64 i;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       delete_test_keys(c);
+
+       pr_info("inserting test keys");
+
+       for (i = 0; i < nr; i += 16) {
+               struct bkey_i_cookie k;
+
+               bkey_cookie_init(&k.k_i);
+               k.k.p.offset = i + 16;
+               k.k.size = 8;
+
+               ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
+                                       NULL, NULL, 0);
+               BUG_ON(ret);
+       }
+
+       pr_info("iterating forwards");
+
+       i = 0;
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN,
+                          0, k, ret) {
+               BUG_ON(bkey_start_offset(k.k) != i + 8);
+               BUG_ON(k.k->size != 8);
+               i += 16;
+       }
+       bch2_trans_iter_free(&trans, iter);
+
+       BUG_ON(i != nr);
+
+       pr_info("iterating forwards by slots");
+
+       i = 0;
+
+       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN,
+                          BTREE_ITER_SLOTS, k, ret) {
+               BUG_ON(bkey_deleted(k.k) != !(i % 16));
+
+               BUG_ON(bkey_start_offset(k.k) != i);
+               BUG_ON(k.k->size != 8);
+               i = k.k->p.offset;
+
+               if (i == nr)
+                       break;
+       }
+
+       bch2_trans_exit(&trans);
+}
+
+/*
+ * XXX: we really want to make sure we've got a btree with depth > 0 for these
+ * tests
+ */
+static void test_peek_end(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, POS_MIN, 0);
+
+       k = bch2_btree_iter_peek(iter);
+       BUG_ON(k.k);
+
+       k = bch2_btree_iter_peek(iter);
+       BUG_ON(k.k);
+
+       bch2_trans_exit(&trans);
+}
+
+static void test_peek_end_extents(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, 0);
+
+       k = bch2_btree_iter_peek(iter);
+       BUG_ON(k.k);
+
+       k = bch2_btree_iter_peek(iter);
+       BUG_ON(k.k);
+
+       bch2_trans_exit(&trans);
+}
+
+/* extent unit tests */
+
+u64 test_version;
+
+static void insert_test_extent(struct bch_fs *c,
+                              u64 start, u64 end)
+{
+       struct bkey_i_cookie k;
+       int ret;
+
+       //pr_info("inserting %llu-%llu v %llu", start, end, test_version);
+
+       bkey_cookie_init(&k.k_i);
+       k.k_i.k.p.offset = end;
+       k.k_i.k.size = end - start;
+       k.k_i.k.version.lo = test_version++;
+
+       ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i,
+                               NULL, NULL, 0);
+       BUG_ON(ret);
+}
+
+static void __test_extent_overwrite(struct bch_fs *c,
+                                   u64 e1_start, u64 e1_end,
+                                   u64 e2_start, u64 e2_end)
+{
+       insert_test_extent(c, e1_start, e1_end);
+       insert_test_extent(c, e2_start, e2_end);
+
+       delete_test_keys(c);
+}
+
+static void test_extent_overwrite_front(struct bch_fs *c, u64 nr)
+{
+       __test_extent_overwrite(c, 0, 64, 0, 32);
+       __test_extent_overwrite(c, 8, 64, 0, 32);
+}
+
+static void test_extent_overwrite_back(struct bch_fs *c, u64 nr)
+{
+       __test_extent_overwrite(c, 0, 64, 32, 64);
+       __test_extent_overwrite(c, 0, 64, 32, 72);
+}
+
+static void test_extent_overwrite_middle(struct bch_fs *c, u64 nr)
+{
+       __test_extent_overwrite(c, 0, 64, 32, 40);
+}
+
+static void test_extent_overwrite_all(struct bch_fs *c, u64 nr)
+{
+       __test_extent_overwrite(c, 32, 64,  0,  64);
+       __test_extent_overwrite(c, 32, 64,  0, 128);
+       __test_extent_overwrite(c, 32, 64, 32,  64);
+       __test_extent_overwrite(c, 32, 64, 32, 128);
+}
+
+/* perf tests */
+
+static u64 test_rand(void)
+{
+       u64 v;
+#if 0
+       v = prandom_u32();
+#else
+       prandom_bytes(&v, sizeof(v));
+#endif
+       return v;
+}
+
+static void rand_insert(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct bkey_i_cookie k;
+       int ret;
+       u64 i;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for (i = 0; i < nr; i++) {
+               bkey_cookie_init(&k.k_i);
+               k.k.p.offset = test_rand();
+
+               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+                       __bch2_btree_insert(&trans, BTREE_ID_XATTRS, &k.k_i));
+
+               BUG_ON(ret);
+       }
+
+       bch2_trans_exit(&trans);
+}
+
+static void rand_lookup(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       u64 i;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for (i = 0; i < nr; i++) {
+               iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
+                                          POS(0, test_rand()), 0);
+
+               k = bch2_btree_iter_peek(iter);
+               bch2_trans_iter_free(&trans, iter);
+       }
+
+       bch2_trans_exit(&trans);
+}
+
+static void rand_mixed(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret;
+       u64 i;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for (i = 0; i < nr; i++) {
+               iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS,
+                                          POS(0, test_rand()), 0);
+
+               k = bch2_btree_iter_peek(iter);
+
+               if (!(i & 3) && k.k) {
+                       struct bkey_i_cookie k;
+
+                       bkey_cookie_init(&k.k_i);
+                       k.k.p = iter->pos;
+
+                       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+                               bch2_trans_update(&trans, iter, &k.k_i, 0));
+
+                       BUG_ON(ret);
+               }
+
+               bch2_trans_iter_free(&trans, iter);
+       }
+
+       bch2_trans_exit(&trans);
+}
+
+static int __do_delete(struct btree_trans *trans, struct bpos pos)
+{
+       struct btree_iter *iter;
+       struct bkey_i delete;
+       struct bkey_s_c k;
+       int ret = 0;
+
+       iter = bch2_trans_get_iter(trans, BTREE_ID_XATTRS, pos,
+                                  BTREE_ITER_INTENT);
+       ret = PTR_ERR_OR_ZERO(iter);
+       if (ret)
+               goto err;
+
+       k = bch2_btree_iter_peek(iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       bkey_init(&delete.k);
+       delete.k.p = k.k->p;
+
+       bch2_trans_update(trans, iter, &delete, 0);
+err:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+static void rand_delete(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       int ret;
+       u64 i;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for (i = 0; i < nr; i++) {
+               struct bpos pos = POS(0, test_rand());
+
+               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+                       __do_delete(&trans, pos));
+               BUG_ON(ret);
+       }
+
+       bch2_trans_exit(&trans);
+}
+
+static void seq_insert(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_i_cookie insert;
+       int ret;
+       u64 i = 0;
+
+       bkey_cookie_init(&insert.k_i);
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
+                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
+               insert.k.p = iter->pos;
+
+               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+                       bch2_trans_update(&trans, iter, &insert.k_i, 0));
+
+               BUG_ON(ret);
+
+               if (++i == nr)
+                       break;
+       }
+       bch2_trans_exit(&trans);
+}
+
+static void seq_lookup(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN, 0, k, ret)
+               ;
+       bch2_trans_exit(&trans);
+}
+
+static void seq_overwrite(struct bch_fs *c, u64 nr)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN,
+                          BTREE_ITER_INTENT, k, ret) {
+               struct bkey_i_cookie u;
+
+               bkey_reassemble(&u.k_i, k);
+
+               ret = __bch2_trans_do(&trans, NULL, NULL, 0,
+                       bch2_trans_update(&trans, iter, &u.k_i, 0));
+
+               BUG_ON(ret);
+       }
+       bch2_trans_exit(&trans);
+}
+
+static void seq_delete(struct bch_fs *c, u64 nr)
+{
+       int ret;
+
+       ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS,
+                                     POS(0, 0), POS(0, U64_MAX),
+                                     NULL);
+       BUG_ON(ret);
+}
+
+typedef void (*perf_test_fn)(struct bch_fs *, u64);
+
+struct test_job {
+       struct bch_fs                   *c;
+       u64                             nr;
+       unsigned                        nr_threads;
+       perf_test_fn                    fn;
+
+       atomic_t                        ready;
+       wait_queue_head_t               ready_wait;
+
+       atomic_t                        done;
+       struct completion               done_completion;
+
+       u64                             start;
+       u64                             finish;
+};
+
+static int btree_perf_test_thread(void *data)
+{
+       struct test_job *j = data;
+
+       if (atomic_dec_and_test(&j->ready)) {
+               wake_up(&j->ready_wait);
+               j->start = sched_clock();
+       } else {
+               wait_event(j->ready_wait, !atomic_read(&j->ready));
+       }
+
+       j->fn(j->c, j->nr / j->nr_threads);
+
+       if (atomic_dec_and_test(&j->done)) {
+               j->finish = sched_clock();
+               complete(&j->done_completion);
+       }
+
+       return 0;
+}
+
+void bch2_btree_perf_test(struct bch_fs *c, const char *testname,
+                         u64 nr, unsigned nr_threads)
+{
+       struct test_job j = { .c = c, .nr = nr, .nr_threads = nr_threads };
+       char name_buf[20], nr_buf[20], per_sec_buf[20];
+       unsigned i;
+       u64 time;
+
+       atomic_set(&j.ready, nr_threads);
+       init_waitqueue_head(&j.ready_wait);
+
+       atomic_set(&j.done, nr_threads);
+       init_completion(&j.done_completion);
+
+#define perf_test(_test)                               \
+       if (!strcmp(testname, #_test)) j.fn = _test
+
+       perf_test(rand_insert);
+       perf_test(rand_lookup);
+       perf_test(rand_mixed);
+       perf_test(rand_delete);
+
+       perf_test(seq_insert);
+       perf_test(seq_lookup);
+       perf_test(seq_overwrite);
+       perf_test(seq_delete);
+
+       /* a unit test, not a perf test: */
+       perf_test(test_delete);
+       perf_test(test_delete_written);
+       perf_test(test_iterate);
+       perf_test(test_iterate_extents);
+       perf_test(test_iterate_slots);
+       perf_test(test_iterate_slots_extents);
+       perf_test(test_peek_end);
+       perf_test(test_peek_end_extents);
+
+       perf_test(test_extent_overwrite_front);
+       perf_test(test_extent_overwrite_back);
+       perf_test(test_extent_overwrite_middle);
+       perf_test(test_extent_overwrite_all);
+
+       if (!j.fn) {
+               pr_err("unknown test %s", testname);
+               return;
+       }
+
+       //pr_info("running test %s:", testname);
+
+       if (nr_threads == 1)
+               btree_perf_test_thread(&j);
+       else
+               for (i = 0; i < nr_threads; i++)
+                       kthread_run(btree_perf_test_thread, &j,
+                                   "bcachefs perf test[%u]", i);
+
+       while (wait_for_completion_interruptible(&j.done_completion))
+               ;
+
+       time = j.finish - j.start;
+
+       scnprintf(name_buf, sizeof(name_buf), "%s:", testname);
+       bch2_hprint(&PBUF(nr_buf), nr);
+       bch2_hprint(&PBUF(per_sec_buf), nr * NSEC_PER_SEC / time);
+       printk(KERN_INFO "%-12s %s with %u threads in %5llu sec, %5llu nsec per iter, %5s per sec\n",
+               name_buf, nr_buf, nr_threads,
+               time / NSEC_PER_SEC,
+               time * nr_threads / nr,
+               per_sec_buf);
+}
+
+#endif /* CONFIG_BCACHEFS_TESTS */
diff --git a/libbcachefs/tests.h b/libbcachefs/tests.h
new file mode 100644 (file)
index 0000000..551d076
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_TEST_H
+#define _BCACHEFS_TEST_H
+
+struct bch_fs;
+
+#ifdef CONFIG_BCACHEFS_TESTS
+
+void bch2_btree_perf_test(struct bch_fs *, const char *, u64, unsigned);
+
+#else
+
+#endif /* CONFIG_BCACHEFS_TESTS */
+
+#endif /* _BCACHEFS_TEST_H */
diff --git a/libbcachefs/trace.c b/libbcachefs/trace.c
new file mode 100644 (file)
index 0000000..59e8dfa
--- /dev/null
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "alloc_types.h"
+#include "buckets.h"
+#include "btree_types.h"
+#include "keylist.h"
+
+#include <linux/blktrace_api.h>
+#include "keylist.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/bcachefs.h>
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
new file mode 100644 (file)
index 0000000..fd4044a
--- /dev/null
@@ -0,0 +1,907 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * random utiility code, for bcache but in theory not specific to bcache
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/log2.h>
+#include <linux/math64.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/random.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/sched/clock.h>
+
+#include "eytzinger.h"
+#include "util.h"
+
+static const char si_units[] = "?kMGTPEZY";
+
+static int __bch2_strtoh(const char *cp, u64 *res,
+                        u64 t_max, bool t_signed)
+{
+       bool positive = *cp != '-';
+       unsigned u;
+       u64 v = 0;
+
+       if (*cp == '+' || *cp == '-')
+               cp++;
+
+       if (!isdigit(*cp))
+               return -EINVAL;
+
+       do {
+               if (v > U64_MAX / 10)
+                       return -ERANGE;
+               v *= 10;
+               if (v > U64_MAX - (*cp - '0'))
+                       return -ERANGE;
+               v += *cp - '0';
+               cp++;
+       } while (isdigit(*cp));
+
+       for (u = 1; u < strlen(si_units); u++)
+               if (*cp == si_units[u]) {
+                       cp++;
+                       goto got_unit;
+               }
+       u = 0;
+got_unit:
+       if (*cp == '\n')
+               cp++;
+       if (*cp)
+               return -EINVAL;
+
+       if (fls64(v) + u * 10 > 64)
+               return -ERANGE;
+
+       v <<= u * 10;
+
+       if (positive) {
+               if (v > t_max)
+                       return -ERANGE;
+       } else {
+               if (v && !t_signed)
+                       return -ERANGE;
+
+               if (v > t_max + 1)
+                       return -ERANGE;
+               v = -v;
+       }
+
+       *res = v;
+       return 0;
+}
+
+#define STRTO_H(name, type)                                    \
+int bch2_ ## name ## _h(const char *cp, type *res)             \
+{                                                              \
+       u64 v;                                                  \
+       int ret = __bch2_strtoh(cp, &v, ANYSINT_MAX(type),      \
+                       ANYSINT_MAX(type) != ((type) ~0ULL));   \
+       *res = v;                                               \
+       return ret;                                             \
+}
+
+STRTO_H(strtoint, int)
+STRTO_H(strtouint, unsigned int)
+STRTO_H(strtoll, long long)
+STRTO_H(strtoull, unsigned long long)
+STRTO_H(strtou64, u64)
+
+void bch2_hprint(struct printbuf *buf, s64 v)
+{
+       int u, t = 0;
+
+       for (u = 0; v >= 1024 || v <= -1024; u++) {
+               t = v & ~(~0U << 10);
+               v >>= 10;
+       }
+
+       pr_buf(buf, "%lli", v);
+
+       /*
+        * 103 is magic: t is in the range [-1023, 1023] and we want
+        * to turn it into [-9, 9]
+        */
+       if (u && v < 100 && v > -100)
+               pr_buf(buf, ".%i", t / 103);
+       if (u)
+               pr_buf(buf, "%c", si_units[u]);
+}
+
+void bch2_string_opt_to_text(struct printbuf *out,
+                            const char * const list[],
+                            size_t selected)
+{
+       size_t i;
+
+       for (i = 0; list[i]; i++)
+               pr_buf(out, i == selected ? "[%s] " : "%s ", list[i]);
+}
+
+void bch2_flags_to_text(struct printbuf *out,
+                       const char * const list[], u64 flags)
+{
+       unsigned bit, nr = 0;
+       bool first = true;
+
+       if (out->pos != out->end)
+               *out->pos = '\0';
+
+       while (list[nr])
+               nr++;
+
+       while (flags && (bit = __ffs(flags)) < nr) {
+               if (!first)
+                       pr_buf(out, ",");
+               first = false;
+               pr_buf(out, "%s", list[bit]);
+               flags ^= 1 << bit;
+       }
+}
+
+u64 bch2_read_flag_list(char *opt, const char * const list[])
+{
+       u64 ret = 0;
+       char *p, *s, *d = kstrndup(opt, PAGE_SIZE - 1, GFP_KERNEL);
+
+       if (!d)
+               return -ENOMEM;
+
+       s = strim(d);
+
+       while ((p = strsep(&s, ","))) {
+               int flag = match_string(list, -1, p);
+               if (flag < 0) {
+                       ret = -1;
+                       break;
+               }
+
+               ret |= 1 << flag;
+       }
+
+       kfree(d);
+
+       return ret;
+}
+
+bool bch2_is_zero(const void *_p, size_t n)
+{
+       const char *p = _p;
+       size_t i;
+
+       for (i = 0; i < n; i++)
+               if (p[i])
+                       return false;
+       return true;
+}
+
+static void bch2_quantiles_update(struct quantiles *q, u64 v)
+{
+       unsigned i = 0;
+
+       while (i < ARRAY_SIZE(q->entries)) {
+               struct quantile_entry *e = q->entries + i;
+
+               if (unlikely(!e->step)) {
+                       e->m = v;
+                       e->step = max_t(unsigned, v / 2, 1024);
+               } else if (e->m > v) {
+                       e->m = e->m >= e->step
+                               ? e->m - e->step
+                               : 0;
+               } else if (e->m < v) {
+                       e->m = e->m + e->step > e->m
+                               ? e->m + e->step
+                               : U32_MAX;
+               }
+
+               if ((e->m > v ? e->m - v : v - e->m) < e->step)
+                       e->step = max_t(unsigned, e->step / 2, 1);
+
+               if (v >= e->m)
+                       break;
+
+               i = eytzinger0_child(i, v > e->m);
+       }
+}
+
+/* time stats: */
+
+static void bch2_time_stats_update_one(struct time_stats *stats,
+                                      u64 start, u64 end)
+{
+       u64 duration, freq;
+
+       duration        = time_after64(end, start)
+               ? end - start : 0;
+       freq            = time_after64(end, stats->last_event)
+               ? end - stats->last_event : 0;
+
+       stats->count++;
+
+       stats->average_duration = stats->average_duration
+               ? ewma_add(stats->average_duration, duration, 6)
+               : duration;
+
+       stats->average_frequency = stats->average_frequency
+               ? ewma_add(stats->average_frequency, freq, 6)
+               : freq;
+
+       stats->max_duration = max(stats->max_duration, duration);
+
+       stats->last_event = end;
+
+       bch2_quantiles_update(&stats->quantiles, duration);
+}
+
+void __bch2_time_stats_update(struct time_stats *stats, u64 start, u64 end)
+{
+       unsigned long flags;
+
+       if (!stats->buffer) {
+               spin_lock_irqsave(&stats->lock, flags);
+               bch2_time_stats_update_one(stats, start, end);
+
+               if (stats->average_frequency < 32 &&
+                   stats->count > 1024)
+                       stats->buffer =
+                               alloc_percpu_gfp(struct time_stat_buffer,
+                                                GFP_ATOMIC);
+               spin_unlock_irqrestore(&stats->lock, flags);
+       } else {
+               struct time_stat_buffer_entry *i;
+               struct time_stat_buffer *b;
+
+               preempt_disable();
+               b = this_cpu_ptr(stats->buffer);
+
+               BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
+               b->entries[b->nr++] = (struct time_stat_buffer_entry) {
+                       .start = start,
+                       .end = end
+               };
+
+               if (b->nr == ARRAY_SIZE(b->entries)) {
+                       spin_lock_irqsave(&stats->lock, flags);
+                       for (i = b->entries;
+                            i < b->entries + ARRAY_SIZE(b->entries);
+                            i++)
+                               bch2_time_stats_update_one(stats, i->start, i->end);
+                       spin_unlock_irqrestore(&stats->lock, flags);
+
+                       b->nr = 0;
+               }
+
+               preempt_enable();
+       }
+}
+
+static const struct time_unit {
+       const char      *name;
+       u32             nsecs;
+} time_units[] = {
+       { "ns",         1               },
+       { "us",         NSEC_PER_USEC   },
+       { "ms",         NSEC_PER_MSEC   },
+       { "sec",        NSEC_PER_SEC    },
+};
+
+static const struct time_unit *pick_time_units(u64 ns)
+{
+       const struct time_unit *u;
+
+       for (u = time_units;
+            u + 1 < time_units + ARRAY_SIZE(time_units) &&
+            ns >= u[1].nsecs << 1;
+            u++)
+               ;
+
+       return u;
+}
+
+static void pr_time_units(struct printbuf *out, u64 ns)
+{
+       const struct time_unit *u = pick_time_units(ns);
+
+       pr_buf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
+}
+
+void bch2_time_stats_to_text(struct printbuf *out, struct time_stats *stats)
+{
+       const struct time_unit *u;
+       u64 freq = READ_ONCE(stats->average_frequency);
+       u64 q, last_q = 0;
+       int i;
+
+       pr_buf(out, "count:\t\t%llu\n",
+                        stats->count);
+       pr_buf(out, "rate:\t\t%llu/sec\n",
+              freq ?  div64_u64(NSEC_PER_SEC, freq) : 0);
+
+       pr_buf(out, "frequency:\t");
+       pr_time_units(out, freq);
+
+       pr_buf(out, "\navg duration:\t");
+       pr_time_units(out, stats->average_duration);
+
+       pr_buf(out, "\nmax duration:\t");
+       pr_time_units(out, stats->max_duration);
+
+       i = eytzinger0_first(NR_QUANTILES);
+       u = pick_time_units(stats->quantiles.entries[i].m);
+
+       pr_buf(out, "\nquantiles (%s):\t", u->name);
+       eytzinger0_for_each(i, NR_QUANTILES) {
+               bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
+
+               q = max(stats->quantiles.entries[i].m, last_q);
+               pr_buf(out, "%llu%s",
+                      div_u64(q, u->nsecs),
+                      is_last ? "\n" : " ");
+               last_q = q;
+       }
+}
+
+void bch2_time_stats_exit(struct time_stats *stats)
+{
+       free_percpu(stats->buffer);
+}
+
+void bch2_time_stats_init(struct time_stats *stats)
+{
+       memset(stats, 0, sizeof(*stats));
+       spin_lock_init(&stats->lock);
+}
+
+/* ratelimit: */
+
+/**
+ * bch2_ratelimit_delay() - return how long to delay until the next time to do
+ * some work
+ *
+ * @d - the struct bch_ratelimit to update
+ *
+ * Returns the amount of time to delay by, in jiffies
+ */
+u64 bch2_ratelimit_delay(struct bch_ratelimit *d)
+{
+       u64 now = local_clock();
+
+       return time_after64(d->next, now)
+               ? nsecs_to_jiffies(d->next - now)
+               : 0;
+}
+
+/**
+ * bch2_ratelimit_increment() - increment @d by the amount of work done
+ *
+ * @d - the struct bch_ratelimit to update
+ * @done - the amount of work done, in arbitrary units
+ */
+void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done)
+{
+       u64 now = local_clock();
+
+       d->next += div_u64(done * NSEC_PER_SEC, d->rate);
+
+       if (time_before64(now + NSEC_PER_SEC, d->next))
+               d->next = now + NSEC_PER_SEC;
+
+       if (time_after64(now - NSEC_PER_SEC * 2, d->next))
+               d->next = now - NSEC_PER_SEC * 2;
+}
+
+/* pd controller: */
+
+/*
+ * Updates pd_controller. Attempts to scale inputed values to units per second.
+ * @target: desired value
+ * @actual: current value
+ *
+ * @sign: 1 or -1; 1 if increasing the rate makes actual go up, -1 if increasing
+ * it makes actual go down.
+ */
+void bch2_pd_controller_update(struct bch_pd_controller *pd,
+                             s64 target, s64 actual, int sign)
+{
+       s64 proportional, derivative, change;
+
+       unsigned long seconds_since_update = (jiffies - pd->last_update) / HZ;
+
+       if (seconds_since_update == 0)
+               return;
+
+       pd->last_update = jiffies;
+
+       proportional = actual - target;
+       proportional *= seconds_since_update;
+       proportional = div_s64(proportional, pd->p_term_inverse);
+
+       derivative = actual - pd->last_actual;
+       derivative = div_s64(derivative, seconds_since_update);
+       derivative = ewma_add(pd->smoothed_derivative, derivative,
+                             (pd->d_term / seconds_since_update) ?: 1);
+       derivative = derivative * pd->d_term;
+       derivative = div_s64(derivative, pd->p_term_inverse);
+
+       change = proportional + derivative;
+
+       /* Don't increase rate if not keeping up */
+       if (change > 0 &&
+           pd->backpressure &&
+           time_after64(local_clock(),
+                        pd->rate.next + NSEC_PER_MSEC))
+               change = 0;
+
+       change *= (sign * -1);
+
+       pd->rate.rate = clamp_t(s64, (s64) pd->rate.rate + change,
+                               1, UINT_MAX);
+
+       pd->last_actual         = actual;
+       pd->last_derivative     = derivative;
+       pd->last_proportional   = proportional;
+       pd->last_change         = change;
+       pd->last_target         = target;
+}
+
+void bch2_pd_controller_init(struct bch_pd_controller *pd)
+{
+       pd->rate.rate           = 1024;
+       pd->last_update         = jiffies;
+       pd->p_term_inverse      = 6000;
+       pd->d_term              = 30;
+       pd->d_smooth            = pd->d_term;
+       pd->backpressure        = 1;
+}
+
+size_t bch2_pd_controller_print_debug(struct bch_pd_controller *pd, char *buf)
+{
+       /* 2^64 - 1 is 20 digits, plus null byte */
+       char rate[21];
+       char actual[21];
+       char target[21];
+       char proportional[21];
+       char derivative[21];
+       char change[21];
+       s64 next_io;
+
+       bch2_hprint(&PBUF(rate),        pd->rate.rate);
+       bch2_hprint(&PBUF(actual),      pd->last_actual);
+       bch2_hprint(&PBUF(target),      pd->last_target);
+       bch2_hprint(&PBUF(proportional), pd->last_proportional);
+       bch2_hprint(&PBUF(derivative),  pd->last_derivative);
+       bch2_hprint(&PBUF(change),      pd->last_change);
+
+       next_io = div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC);
+
+       return sprintf(buf,
+                      "rate:\t\t%s/sec\n"
+                      "target:\t\t%s\n"
+                      "actual:\t\t%s\n"
+                      "proportional:\t%s\n"
+                      "derivative:\t%s\n"
+                      "change:\t\t%s/sec\n"
+                      "next io:\t%llims\n",
+                      rate, target, actual, proportional,
+                      derivative, change, next_io);
+}
+
+/* misc: */
+
+void bch2_bio_map(struct bio *bio, void *base, size_t size)
+{
+       while (size) {
+               struct page *page = is_vmalloc_addr(base)
+                               ? vmalloc_to_page(base)
+                               : virt_to_page(base);
+               unsigned offset = offset_in_page(base);
+               unsigned len = min_t(size_t, PAGE_SIZE - offset, size);
+
+               BUG_ON(!bio_add_page(bio, page, len, offset));
+               size -= len;
+               base += len;
+       }
+}
+
+int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
+{
+       while (size) {
+               struct page *page = alloc_page(gfp_mask);
+               unsigned len = min(PAGE_SIZE, size);
+
+               if (!page)
+                       return -ENOMEM;
+
+               BUG_ON(!bio_add_page(bio, page, len, 0));
+               size -= len;
+       }
+
+       return 0;
+}
+
+size_t bch2_rand_range(size_t max)
+{
+       size_t rand;
+
+       if (!max)
+               return 0;
+
+       do {
+               rand = get_random_long();
+               rand &= roundup_pow_of_two(max) - 1;
+       } while (rand >= max);
+
+       return rand;
+}
+
+void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
+{
+       struct bio_vec bv;
+       struct bvec_iter iter;
+
+       __bio_for_each_segment(bv, dst, iter, dst_iter) {
+               void *dstp = kmap_atomic(bv.bv_page);
+               memcpy(dstp + bv.bv_offset, src, bv.bv_len);
+               kunmap_atomic(dstp);
+
+               src += bv.bv_len;
+       }
+}
+
+void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
+{
+       struct bio_vec bv;
+       struct bvec_iter iter;
+
+       __bio_for_each_segment(bv, src, iter, src_iter) {
+               void *srcp = kmap_atomic(bv.bv_page);
+               memcpy(dst, srcp + bv.bv_offset, bv.bv_len);
+               kunmap_atomic(srcp);
+
+               dst += bv.bv_len;
+       }
+}
+
+void bch_scnmemcpy(struct printbuf *out,
+                  const char *src, size_t len)
+{
+       size_t n = printbuf_remaining(out);
+
+       if (n) {
+               n = min(n - 1, len);
+               memcpy(out->pos, src, n);
+               out->pos += n;
+               *out->pos = '\0';
+       }
+}
+
+#include "eytzinger.h"
+
+static int alignment_ok(const void *base, size_t align)
+{
+       return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+               ((unsigned long)base & (align - 1)) == 0;
+}
+
+static void u32_swap(void *a, void *b, size_t size)
+{
+       u32 t = *(u32 *)a;
+       *(u32 *)a = *(u32 *)b;
+       *(u32 *)b = t;
+}
+
+static void u64_swap(void *a, void *b, size_t size)
+{
+       u64 t = *(u64 *)a;
+       *(u64 *)a = *(u64 *)b;
+       *(u64 *)b = t;
+}
+
+static void generic_swap(void *a, void *b, size_t size)
+{
+       char t;
+
+       do {
+               t = *(char *)a;
+               *(char *)a++ = *(char *)b;
+               *(char *)b++ = t;
+       } while (--size > 0);
+}
+
+static inline int do_cmp(void *base, size_t n, size_t size,
+                        int (*cmp_func)(const void *, const void *, size_t),
+                        size_t l, size_t r)
+{
+       return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
+                       base + inorder_to_eytzinger0(r, n) * size,
+                       size);
+}
+
+static inline void do_swap(void *base, size_t n, size_t size,
+                          void (*swap_func)(void *, void *, size_t),
+                          size_t l, size_t r)
+{
+       swap_func(base + inorder_to_eytzinger0(l, n) * size,
+                 base + inorder_to_eytzinger0(r, n) * size,
+                 size);
+}
+
+void eytzinger0_sort(void *base, size_t n, size_t size,
+                    int (*cmp_func)(const void *, const void *, size_t),
+                    void (*swap_func)(void *, void *, size_t))
+{
+       int i, c, r;
+
+       if (!swap_func) {
+               if (size == 4 && alignment_ok(base, 4))
+                       swap_func = u32_swap;
+               else if (size == 8 && alignment_ok(base, 8))
+                       swap_func = u64_swap;
+               else
+                       swap_func = generic_swap;
+       }
+
+       /* heapify */
+       for (i = n / 2 - 1; i >= 0; --i) {
+               for (r = i; r * 2 + 1 < n; r = c) {
+                       c = r * 2 + 1;
+
+                       if (c + 1 < n &&
+                           do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
+                               c++;
+
+                       if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
+                               break;
+
+                       do_swap(base, n, size, swap_func, r, c);
+               }
+       }
+
+       /* sort */
+       for (i = n - 1; i > 0; --i) {
+               do_swap(base, n, size, swap_func, 0, i);
+
+               for (r = 0; r * 2 + 1 < i; r = c) {
+                       c = r * 2 + 1;
+
+                       if (c + 1 < i &&
+                           do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
+                               c++;
+
+                       if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
+                               break;
+
+                       do_swap(base, n, size, swap_func, r, c);
+               }
+       }
+}
+
+void sort_cmp_size(void *base, size_t num, size_t size,
+         int (*cmp_func)(const void *, const void *, size_t),
+         void (*swap_func)(void *, void *, size_t size))
+{
+       /* pre-scale counters for performance */
+       int i = (num/2 - 1) * size, n = num * size, c, r;
+
+       if (!swap_func) {
+               if (size == 4 && alignment_ok(base, 4))
+                       swap_func = u32_swap;
+               else if (size == 8 && alignment_ok(base, 8))
+                       swap_func = u64_swap;
+               else
+                       swap_func = generic_swap;
+       }
+
+       /* heapify */
+       for ( ; i >= 0; i -= size) {
+               for (r = i; r * 2 + size < n; r  = c) {
+                       c = r * 2 + size;
+                       if (c < n - size &&
+                           cmp_func(base + c, base + c + size, size) < 0)
+                               c += size;
+                       if (cmp_func(base + r, base + c, size) >= 0)
+                               break;
+                       swap_func(base + r, base + c, size);
+               }
+       }
+
+       /* sort */
+       for (i = n - size; i > 0; i -= size) {
+               swap_func(base, base + i, size);
+               for (r = 0; r * 2 + size < i; r = c) {
+                       c = r * 2 + size;
+                       if (c < i - size &&
+                           cmp_func(base + c, base + c + size, size) < 0)
+                               c += size;
+                       if (cmp_func(base + r, base + c, size) >= 0)
+                               break;
+                       swap_func(base + r, base + c, size);
+               }
+       }
+}
+
+static void mempool_free_vp(void *element, void *pool_data)
+{
+       size_t size = (size_t) pool_data;
+
+       vpfree(element, size);
+}
+
+static void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data)
+{
+       size_t size = (size_t) pool_data;
+
+       return vpmalloc(size, gfp_mask);
+}
+
+int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size)
+{
+       return size < PAGE_SIZE
+               ? mempool_init_kmalloc_pool(pool, min_nr, size)
+               : mempool_init(pool, min_nr, mempool_alloc_vp,
+                              mempool_free_vp, (void *) size);
+}
+
+#if 0
+void eytzinger1_test(void)
+{
+       unsigned inorder, eytz, size;
+
+       pr_info("1 based eytzinger test:");
+
+       for (size = 2;
+            size < 65536;
+            size++) {
+               unsigned extra = eytzinger1_extra(size);
+
+               if (!(size % 4096))
+                       pr_info("tree size %u", size);
+
+               BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size));
+               BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size));
+
+               BUG_ON(eytzinger1_prev(eytzinger1_first(size), size)    != 0);
+               BUG_ON(eytzinger1_next(eytzinger1_last(size), size)     != 0);
+
+               inorder = 1;
+               eytzinger1_for_each(eytz, size) {
+                       BUG_ON(__inorder_to_eytzinger1(inorder, size, extra) != eytz);
+                       BUG_ON(__eytzinger1_to_inorder(eytz, size, extra) != inorder);
+                       BUG_ON(eytz != eytzinger1_last(size) &&
+                              eytzinger1_prev(eytzinger1_next(eytz, size), size) != eytz);
+
+                       inorder++;
+               }
+       }
+}
+
+void eytzinger0_test(void)
+{
+
+       unsigned inorder, eytz, size;
+
+       pr_info("0 based eytzinger test:");
+
+       for (size = 1;
+            size < 65536;
+            size++) {
+               unsigned extra = eytzinger0_extra(size);
+
+               if (!(size % 4096))
+                       pr_info("tree size %u", size);
+
+               BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size));
+               BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size));
+
+               BUG_ON(eytzinger0_prev(eytzinger0_first(size), size)    != -1);
+               BUG_ON(eytzinger0_next(eytzinger0_last(size), size)     != -1);
+
+               inorder = 0;
+               eytzinger0_for_each(eytz, size) {
+                       BUG_ON(__inorder_to_eytzinger0(inorder, size, extra) != eytz);
+                       BUG_ON(__eytzinger0_to_inorder(eytz, size, extra) != inorder);
+                       BUG_ON(eytz != eytzinger0_last(size) &&
+                              eytzinger0_prev(eytzinger0_next(eytz, size), size) != eytz);
+
+                       inorder++;
+               }
+       }
+}
+
+static inline int cmp_u16(const void *_l, const void *_r, size_t size)
+{
+       const u16 *l = _l, *r = _r;
+
+       return (*l > *r) - (*r - *l);
+}
+
+static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search)
+{
+       int i, c1 = -1, c2 = -1;
+       ssize_t r;
+
+       r = eytzinger0_find_le(test_array, nr,
+                              sizeof(test_array[0]),
+                              cmp_u16, &search);
+       if (r >= 0)
+               c1 = test_array[r];
+
+       for (i = 0; i < nr; i++)
+               if (test_array[i] <= search && test_array[i] > c2)
+                       c2 = test_array[i];
+
+       if (c1 != c2) {
+               eytzinger0_for_each(i, nr)
+                       pr_info("[%3u] = %12u", i, test_array[i]);
+               pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i",
+                       i, r, c1, c2);
+       }
+}
+
+void eytzinger0_find_test(void)
+{
+       unsigned i, nr, allocated = 1 << 12;
+       u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL);
+
+       for (nr = 1; nr < allocated; nr++) {
+               pr_info("testing %u elems", nr);
+
+               get_random_bytes(test_array, nr * sizeof(test_array[0]));
+               eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL);
+
+               /* verify array is sorted correctly: */
+               eytzinger0_for_each(i, nr)
+                       BUG_ON(i != eytzinger0_last(nr) &&
+                              test_array[i] > test_array[eytzinger0_next(i, nr)]);
+
+               for (i = 0; i < U16_MAX; i += 1 << 12)
+                       eytzinger0_find_test_val(test_array, nr, i);
+
+               for (i = 0; i < nr; i++) {
+                       eytzinger0_find_test_val(test_array, nr, test_array[i] - 1);
+                       eytzinger0_find_test_val(test_array, nr, test_array[i]);
+                       eytzinger0_find_test_val(test_array, nr, test_array[i] + 1);
+               }
+       }
+
+       kfree(test_array);
+}
+#endif
+
+/*
+ * Accumulate percpu counters onto one cpu's copy - only valid when access
+ * against any percpu counter is guarded against
+ */
+u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr)
+{
+       u64 *ret;
+       int cpu;
+
+       preempt_disable();
+       ret = this_cpu_ptr(p);
+       preempt_enable();
+
+       for_each_possible_cpu(cpu) {
+               u64 *i = per_cpu_ptr(p, cpu);
+
+               if (i != ret) {
+                       acc_u64s(ret, i, nr);
+                       memset(i, 0, nr * sizeof(u64));
+               }
+       }
+
+       return ret;
+}
diff --git a/libbcachefs/util.h b/libbcachefs/util.h
new file mode 100644 (file)
index 0000000..f48c638
--- /dev/null
@@ -0,0 +1,761 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_UTIL_H
+#define _BCACHEFS_UTIL_H
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/closure.h>
+#include <linux/errno.h>
+#include <linux/freezer.h>
+#include <linux/kernel.h>
+#include <linux/sched/clock.h>
+#include <linux/llist.h>
+#include <linux/log2.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/ratelimit.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/workqueue.h>
+
+#define PAGE_SECTOR_SHIFT      (PAGE_SHIFT - 9)
+#define PAGE_SECTORS           (1UL << PAGE_SECTOR_SHIFT)
+
+struct closure;
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+#define EBUG_ON(cond)          BUG_ON(cond)
+#define atomic_dec_bug(v)      BUG_ON(atomic_dec_return(v) < 0)
+#define atomic_inc_bug(v, i)   BUG_ON(atomic_inc_return(v) <= i)
+#define atomic_sub_bug(i, v)   BUG_ON(atomic_sub_return(i, v) < 0)
+#define atomic_add_bug(i, v)   BUG_ON(atomic_add_return(i, v) < 0)
+#define atomic_long_dec_bug(v)         BUG_ON(atomic_long_dec_return(v) < 0)
+#define atomic_long_sub_bug(i, v)      BUG_ON(atomic_long_sub_return(i, v) < 0)
+#define atomic64_dec_bug(v)    BUG_ON(atomic64_dec_return(v) < 0)
+#define atomic64_inc_bug(v, i) BUG_ON(atomic64_inc_return(v) <= i)
+#define atomic64_sub_bug(i, v) BUG_ON(atomic64_sub_return(i, v) < 0)
+#define atomic64_add_bug(i, v) BUG_ON(atomic64_add_return(i, v) < 0)
+
+#define memcpy(dst, src, len)                                          \
+({                                                                     \
+       void *_dst = (dst);                                             \
+       const void *_src = (src);                                       \
+       size_t _len = (len);                                            \
+                                                                       \
+       BUG_ON(!((void *) (_dst) >= (void *) (_src) + (_len) ||         \
+                (void *) (_dst) + (_len) <= (void *) (_src)));         \
+       memcpy(_dst, _src, _len);                                       \
+})
+
+#else /* DEBUG */
+
+#define EBUG_ON(cond)
+#define atomic_dec_bug(v)      atomic_dec(v)
+#define atomic_inc_bug(v, i)   atomic_inc(v)
+#define atomic_sub_bug(i, v)   atomic_sub(i, v)
+#define atomic_add_bug(i, v)   atomic_add(i, v)
+#define atomic_long_dec_bug(v)         atomic_long_dec(v)
+#define atomic_long_sub_bug(i, v)      atomic_long_sub(i, v)
+#define atomic64_dec_bug(v)    atomic64_dec(v)
+#define atomic64_inc_bug(v, i) atomic64_inc(v)
+#define atomic64_sub_bug(i, v) atomic64_sub(i, v)
+#define atomic64_add_bug(i, v) atomic64_add(i, v)
+
+#endif
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define CPU_BIG_ENDIAN         0
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define CPU_BIG_ENDIAN         1
+#endif
+
+/* type hackery */
+
+#define type_is_exact(_val, _type)                                     \
+       __builtin_types_compatible_p(typeof(_val), _type)
+
+#define type_is(_val, _type)                                           \
+       (__builtin_types_compatible_p(typeof(_val), _type) ||           \
+        __builtin_types_compatible_p(typeof(_val), const _type))
+
+/* Userspace doesn't align allocations as nicely as the kernel allocators: */
+static inline size_t buf_pages(void *p, size_t len)
+{
+       return DIV_ROUND_UP(len +
+                           ((unsigned long) p & (PAGE_SIZE - 1)),
+                           PAGE_SIZE);
+}
+
+static inline void vpfree(void *p, size_t size)
+{
+       if (is_vmalloc_addr(p))
+               vfree(p);
+       else
+               free_pages((unsigned long) p, get_order(size));
+}
+
+static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
+{
+       return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
+                                        get_order(size)) ?:
+               __vmalloc(size, gfp_mask);
+}
+
+static inline void kvpfree(void *p, size_t size)
+{
+       if (size < PAGE_SIZE)
+               kfree(p);
+       else
+               vpfree(p, size);
+}
+
+static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
+{
+       return size < PAGE_SIZE
+               ? kmalloc(size, gfp_mask)
+               : vpmalloc(size, gfp_mask);
+}
+
+int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t);
+
+#define HEAP(type)                                                     \
+struct {                                                               \
+       size_t size, used;                                              \
+       type *data;                                                     \
+}
+
+#define DECLARE_HEAP(type, name) HEAP(type) name
+
+#define init_heap(heap, _size, gfp)                                    \
+({                                                                     \
+       (heap)->used = 0;                                               \
+       (heap)->size = (_size);                                         \
+       (heap)->data = kvpmalloc((heap)->size * sizeof((heap)->data[0]),\
+                                (gfp));                                \
+})
+
+#define free_heap(heap)                                                        \
+do {                                                                   \
+       kvpfree((heap)->data, (heap)->size * sizeof((heap)->data[0]));  \
+       (heap)->data = NULL;                                            \
+} while (0)
+
+#define heap_set_backpointer(h, i, _fn)                                        \
+do {                                                                   \
+       void (*fn)(typeof(h), size_t) = _fn;                            \
+       if (fn)                                                         \
+               fn(h, i);                                               \
+} while (0)
+
+#define heap_swap(h, i, j, set_backpointer)                            \
+do {                                                                   \
+       swap((h)->data[i], (h)->data[j]);                               \
+       heap_set_backpointer(h, i, set_backpointer);                    \
+       heap_set_backpointer(h, j, set_backpointer);                    \
+} while (0)
+
+#define heap_peek(h)                                                   \
+({                                                                     \
+       EBUG_ON(!(h)->used);                                            \
+       (h)->data[0];                                                   \
+})
+
+#define heap_full(h)   ((h)->used == (h)->size)
+
+#define heap_sift_down(h, i, cmp, set_backpointer)                     \
+do {                                                                   \
+       size_t _c, _j = i;                                              \
+                                                                       \
+       for (; _j * 2 + 1 < (h)->used; _j = _c) {                       \
+               _c = _j * 2 + 1;                                        \
+               if (_c + 1 < (h)->used &&                               \
+                   cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0)      \
+                       _c++;                                           \
+                                                                       \
+               if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0)          \
+                       break;                                          \
+               heap_swap(h, _c, _j, set_backpointer);                  \
+       }                                                               \
+} while (0)
+
+#define heap_sift_up(h, i, cmp, set_backpointer)                       \
+do {                                                                   \
+       while (i) {                                                     \
+               size_t p = (i - 1) / 2;                                 \
+               if (cmp(h, (h)->data[i], (h)->data[p]) >= 0)            \
+                       break;                                          \
+               heap_swap(h, i, p, set_backpointer);                    \
+               i = p;                                                  \
+       }                                                               \
+} while (0)
+
+#define __heap_add(h, d, cmp, set_backpointer)                         \
+({                                                                     \
+       size_t _i = (h)->used++;                                        \
+       (h)->data[_i] = d;                                              \
+       heap_set_backpointer(h, _i, set_backpointer);                   \
+                                                                       \
+       heap_sift_up(h, _i, cmp, set_backpointer);                      \
+       _i;                                                             \
+})
+
+#define heap_add(h, d, cmp, set_backpointer)                           \
+({                                                                     \
+       bool _r = !heap_full(h);                                        \
+       if (_r)                                                         \
+               __heap_add(h, d, cmp, set_backpointer);                 \
+       _r;                                                             \
+})
+
+#define heap_add_or_replace(h, new, cmp, set_backpointer)              \
+do {                                                                   \
+       if (!heap_add(h, new, cmp, set_backpointer) &&                  \
+           cmp(h, new, heap_peek(h)) >= 0) {                           \
+               (h)->data[0] = new;                                     \
+               heap_set_backpointer(h, 0, set_backpointer);            \
+               heap_sift_down(h, 0, cmp, set_backpointer);             \
+       }                                                               \
+} while (0)
+
+#define heap_del(h, i, cmp, set_backpointer)                           \
+do {                                                                   \
+       size_t _i = (i);                                                \
+                                                                       \
+       BUG_ON(_i >= (h)->used);                                        \
+       (h)->used--;                                                    \
+       heap_swap(h, _i, (h)->used, set_backpointer);                   \
+       heap_sift_up(h, _i, cmp, set_backpointer);                      \
+       heap_sift_down(h, _i, cmp, set_backpointer);                    \
+} while (0)
+
+#define heap_pop(h, d, cmp, set_backpointer)                           \
+({                                                                     \
+       bool _r = (h)->used;                                            \
+       if (_r) {                                                       \
+               (d) = (h)->data[0];                                     \
+               heap_del(h, 0, cmp, set_backpointer);                   \
+       }                                                               \
+       _r;                                                             \
+})
+
+#define heap_resort(heap, cmp, set_backpointer)                                \
+do {                                                                   \
+       ssize_t _i;                                                     \
+       for (_i = (ssize_t) (heap)->used / 2 -  1; _i >= 0; --_i)       \
+               heap_sift_down(heap, _i, cmp, set_backpointer);         \
+} while (0)
+
+#define ANYSINT_MAX(t)                                                 \
+       ((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1)
+
+struct printbuf {
+       char            *pos;
+       char            *end;
+};
+
+static inline size_t printbuf_remaining(struct printbuf *buf)
+{
+       return buf->end - buf->pos;
+}
+
+#define _PBUF(_buf, _len)                                              \
+       ((struct printbuf) {                                            \
+               .pos    = _buf,                                         \
+               .end    = _buf + _len,                                  \
+       })
+
+#define PBUF(_buf) _PBUF(_buf, sizeof(_buf))
+
+#define pr_buf(_out, ...)                                              \
+do {                                                                   \
+       (_out)->pos += scnprintf((_out)->pos, printbuf_remaining(_out), \
+                                __VA_ARGS__);                          \
+} while (0)
+
+void bch_scnmemcpy(struct printbuf *, const char *, size_t);
+
+int bch2_strtoint_h(const char *, int *);
+int bch2_strtouint_h(const char *, unsigned int *);
+int bch2_strtoll_h(const char *, long long *);
+int bch2_strtoull_h(const char *, unsigned long long *);
+int bch2_strtou64_h(const char *, u64 *);
+
+static inline int bch2_strtol_h(const char *cp, long *res)
+{
+#if BITS_PER_LONG == 32
+       return bch2_strtoint_h(cp, (int *) res);
+#else
+       return bch2_strtoll_h(cp, (long long *) res);
+#endif
+}
+
+static inline int bch2_strtoul_h(const char *cp, long *res)
+{
+#if BITS_PER_LONG == 32
+       return bch2_strtouint_h(cp, (unsigned int *) res);
+#else
+       return bch2_strtoull_h(cp, (unsigned long long *) res);
+#endif
+}
+
+#define strtoi_h(cp, res)                                              \
+       ( type_is(*res, int)            ? bch2_strtoint_h(cp, (void *) res)\
+       : type_is(*res, long)           ? bch2_strtol_h(cp, (void *) res)\
+       : type_is(*res, long long)      ? bch2_strtoll_h(cp, (void *) res)\
+       : type_is(*res, unsigned)       ? bch2_strtouint_h(cp, (void *) res)\
+       : type_is(*res, unsigned long)  ? bch2_strtoul_h(cp, (void *) res)\
+       : type_is(*res, unsigned long long) ? bch2_strtoull_h(cp, (void *) res)\
+       : -EINVAL)
+
+#define strtoul_safe(cp, var)                                          \
+({                                                                     \
+       unsigned long _v;                                               \
+       int _r = kstrtoul(cp, 10, &_v);                                 \
+       if (!_r)                                                        \
+               var = _v;                                               \
+       _r;                                                             \
+})
+
+#define strtoul_safe_clamp(cp, var, min, max)                          \
+({                                                                     \
+       unsigned long _v;                                               \
+       int _r = kstrtoul(cp, 10, &_v);                                 \
+       if (!_r)                                                        \
+               var = clamp_t(typeof(var), _v, min, max);               \
+       _r;                                                             \
+})
+
+#define strtoul_safe_restrict(cp, var, min, max)                       \
+({                                                                     \
+       unsigned long _v;                                               \
+       int _r = kstrtoul(cp, 10, &_v);                                 \
+       if (!_r && _v >= min && _v <= max)                              \
+               var = _v;                                               \
+       else                                                            \
+               _r = -EINVAL;                                           \
+       _r;                                                             \
+})
+
+#define snprint(buf, size, var)                                                \
+       snprintf(buf, size,                                             \
+                  type_is(var, int)            ? "%i\n"                \
+                : type_is(var, unsigned)       ? "%u\n"                \
+                : type_is(var, long)           ? "%li\n"               \
+                : type_is(var, unsigned long)  ? "%lu\n"               \
+                : type_is(var, s64)            ? "%lli\n"              \
+                : type_is(var, u64)            ? "%llu\n"              \
+                : type_is(var, char *)         ? "%s\n"                \
+                : "%i\n", var)
+
+void bch2_hprint(struct printbuf *, s64);
+
+bool bch2_is_zero(const void *, size_t);
+
+void bch2_string_opt_to_text(struct printbuf *,
+                            const char * const [], size_t);
+
+void bch2_flags_to_text(struct printbuf *, const char * const[], u64);
+u64 bch2_read_flag_list(char *, const char * const[]);
+
+#define NR_QUANTILES   15
+#define QUANTILE_IDX(i)        inorder_to_eytzinger0(i, NR_QUANTILES)
+#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
+#define QUANTILE_LAST  eytzinger0_last(NR_QUANTILES)
+
+struct quantiles {
+       struct quantile_entry {
+               u64     m;
+               u64     step;
+       }               entries[NR_QUANTILES];
+};
+
+struct time_stat_buffer {
+       unsigned        nr;
+       struct time_stat_buffer_entry {
+               u64     start;
+               u64     end;
+       }               entries[32];
+};
+
+struct time_stats {
+       spinlock_t      lock;
+       u64             count;
+       /* all fields are in nanoseconds */
+       u64             average_duration;
+       u64             average_frequency;
+       u64             max_duration;
+       u64             last_event;
+       struct quantiles quantiles;
+
+       struct time_stat_buffer __percpu *buffer;
+};
+
+void __bch2_time_stats_update(struct time_stats *stats, u64, u64);
+
+static inline void bch2_time_stats_update(struct time_stats *stats, u64 start)
+{
+       __bch2_time_stats_update(stats, start, local_clock());
+}
+
+void bch2_time_stats_to_text(struct printbuf *, struct time_stats *);
+
+void bch2_time_stats_exit(struct time_stats *);
+void bch2_time_stats_init(struct time_stats *);
+
+#define ewma_add(ewma, val, weight)                                    \
+({                                                                     \
+       typeof(ewma) _ewma = (ewma);                                    \
+       typeof(weight) _weight = (weight);                              \
+                                                                       \
+       (((_ewma << _weight) - _ewma) + (val)) >> _weight;              \
+})
+
+struct bch_ratelimit {
+       /* Next time we want to do some work, in nanoseconds */
+       u64                     next;
+
+       /*
+        * Rate at which we want to do work, in units per nanosecond
+        * The units here correspond to the units passed to
+        * bch2_ratelimit_increment()
+        */
+       unsigned                rate;
+};
+
+static inline void bch2_ratelimit_reset(struct bch_ratelimit *d)
+{
+       d->next = local_clock();
+}
+
+u64 bch2_ratelimit_delay(struct bch_ratelimit *);
+void bch2_ratelimit_increment(struct bch_ratelimit *, u64);
+
+struct bch_pd_controller {
+       struct bch_ratelimit    rate;
+       unsigned long           last_update;
+
+       s64                     last_actual;
+       s64                     smoothed_derivative;
+
+       unsigned                p_term_inverse;
+       unsigned                d_smooth;
+       unsigned                d_term;
+
+       /* for exporting to sysfs (no effect on behavior) */
+       s64                     last_derivative;
+       s64                     last_proportional;
+       s64                     last_change;
+       s64                     last_target;
+
+       /* If true, the rate will not increase if bch2_ratelimit_delay()
+        * is not being called often enough. */
+       bool                    backpressure;
+};
+
+void bch2_pd_controller_update(struct bch_pd_controller *, s64, s64, int);
+void bch2_pd_controller_init(struct bch_pd_controller *);
+size_t bch2_pd_controller_print_debug(struct bch_pd_controller *, char *);
+
+#define sysfs_pd_controller_attribute(name)                            \
+       rw_attribute(name##_rate);                                      \
+       rw_attribute(name##_rate_bytes);                                \
+       rw_attribute(name##_rate_d_term);                               \
+       rw_attribute(name##_rate_p_term_inverse);                       \
+       read_attribute(name##_rate_debug)
+
+#define sysfs_pd_controller_files(name)                                        \
+       &sysfs_##name##_rate,                                           \
+       &sysfs_##name##_rate_bytes,                                     \
+       &sysfs_##name##_rate_d_term,                                    \
+       &sysfs_##name##_rate_p_term_inverse,                            \
+       &sysfs_##name##_rate_debug
+
+#define sysfs_pd_controller_show(name, var)                            \
+do {                                                                   \
+       sysfs_hprint(name##_rate,               (var)->rate.rate);      \
+       sysfs_print(name##_rate_bytes,          (var)->rate.rate);      \
+       sysfs_print(name##_rate_d_term,         (var)->d_term);         \
+       sysfs_print(name##_rate_p_term_inverse, (var)->p_term_inverse); \
+                                                                       \
+       if (attr == &sysfs_##name##_rate_debug)                         \
+               return bch2_pd_controller_print_debug(var, buf);                \
+} while (0)
+
+#define sysfs_pd_controller_store(name, var)                           \
+do {                                                                   \
+       sysfs_strtoul_clamp(name##_rate,                                \
+                           (var)->rate.rate, 1, UINT_MAX);             \
+       sysfs_strtoul_clamp(name##_rate_bytes,                          \
+                           (var)->rate.rate, 1, UINT_MAX);             \
+       sysfs_strtoul(name##_rate_d_term,       (var)->d_term);         \
+       sysfs_strtoul_clamp(name##_rate_p_term_inverse,                 \
+                           (var)->p_term_inverse, 1, INT_MAX);         \
+} while (0)
+
+#define container_of_or_null(ptr, type, member)                                \
+({                                                                     \
+       typeof(ptr) _ptr = ptr;                                         \
+       _ptr ? container_of(_ptr, type, member) : NULL;                 \
+})
+
+/* Does linear interpolation between powers of two */
+static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
+{
+       unsigned fract = x & ~(~0 << fract_bits);
+
+       x >>= fract_bits;
+       x   = 1 << x;
+       x  += (x * fract) >> fract_bits;
+
+       return x;
+}
+
+void bch2_bio_map(struct bio *bio, void *base, size_t);
+int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
+
+static inline sector_t bdev_sectors(struct block_device *bdev)
+{
+       return bdev->bd_inode->i_size >> 9;
+}
+
+#define closure_bio_submit(bio, cl)                                    \
+do {                                                                   \
+       closure_get(cl);                                                \
+       submit_bio(bio);                                                \
+} while (0)
+
+#define kthread_wait_freezable(cond)                                   \
+({                                                                     \
+       int _ret = 0;                                                   \
+       while (1) {                                                     \
+               set_current_state(TASK_INTERRUPTIBLE);                  \
+               if (kthread_should_stop()) {                            \
+                       _ret = -1;                                      \
+                       break;                                          \
+               }                                                       \
+                                                                       \
+               if (cond)                                               \
+                       break;                                          \
+                                                                       \
+               schedule();                                             \
+               try_to_freeze();                                        \
+       }                                                               \
+       set_current_state(TASK_RUNNING);                                \
+       _ret;                                                           \
+})
+
+size_t bch2_rand_range(size_t);
+
+void memcpy_to_bio(struct bio *, struct bvec_iter, const void *);
+void memcpy_from_bio(void *, struct bio *, struct bvec_iter);
+
+static inline void memcpy_u64s_small(void *dst, const void *src,
+                                    unsigned u64s)
+{
+       u64 *d = dst;
+       const u64 *s = src;
+
+       while (u64s--)
+               *d++ = *s++;
+}
+
+static inline void __memcpy_u64s(void *dst, const void *src,
+                                unsigned u64s)
+{
+#ifdef CONFIG_X86_64
+       long d0, d1, d2;
+       asm volatile("rep ; movsq"
+                    : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+                    : "0" (u64s), "1" (dst), "2" (src)
+                    : "memory");
+#else
+       u64 *d = dst;
+       const u64 *s = src;
+
+       while (u64s--)
+               *d++ = *s++;
+#endif
+}
+
+static inline void memcpy_u64s(void *dst, const void *src,
+                              unsigned u64s)
+{
+       EBUG_ON(!(dst >= src + u64s * sizeof(u64) ||
+                dst + u64s * sizeof(u64) <= src));
+
+       __memcpy_u64s(dst, src, u64s);
+}
+
+static inline void __memmove_u64s_down(void *dst, const void *src,
+                                      unsigned u64s)
+{
+       __memcpy_u64s(dst, src, u64s);
+}
+
+static inline void memmove_u64s_down(void *dst, const void *src,
+                                    unsigned u64s)
+{
+       EBUG_ON(dst > src);
+
+       __memmove_u64s_down(dst, src, u64s);
+}
+
+static inline void __memmove_u64s_up_small(void *_dst, const void *_src,
+                                          unsigned u64s)
+{
+       u64 *dst = (u64 *) _dst + u64s;
+       u64 *src = (u64 *) _src + u64s;
+
+       while (u64s--)
+               *--dst = *--src;
+}
+
+static inline void memmove_u64s_up_small(void *dst, const void *src,
+                                        unsigned u64s)
+{
+       EBUG_ON(dst < src);
+
+       __memmove_u64s_up_small(dst, src, u64s);
+}
+
+static inline void __memmove_u64s_up(void *_dst, const void *_src,
+                                    unsigned u64s)
+{
+       u64 *dst = (u64 *) _dst + u64s - 1;
+       u64 *src = (u64 *) _src + u64s - 1;
+
+#ifdef CONFIG_X86_64
+       long d0, d1, d2;
+       asm volatile("std ;\n"
+                    "rep ; movsq\n"
+                    "cld ;\n"
+                    : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+                    : "0" (u64s), "1" (dst), "2" (src)
+                    : "memory");
+#else
+       while (u64s--)
+               *dst-- = *src--;
+#endif
+}
+
+static inline void memmove_u64s_up(void *dst, const void *src,
+                                  unsigned u64s)
+{
+       EBUG_ON(dst < src);
+
+       __memmove_u64s_up(dst, src, u64s);
+}
+
+static inline void memmove_u64s(void *dst, const void *src,
+                               unsigned u64s)
+{
+       if (dst < src)
+               __memmove_u64s_down(dst, src, u64s);
+       else
+               __memmove_u64s_up(dst, src, u64s);
+}
+
+/* Set the last few bytes up to a u64 boundary given an offset into a buffer. */
+static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
+{
+       unsigned rem = round_up(bytes, sizeof(u64)) - bytes;
+
+       memset(s + bytes, c, rem);
+}
+
+void sort_cmp_size(void *base, size_t num, size_t size,
+         int (*cmp_func)(const void *, const void *, size_t),
+         void (*swap_func)(void *, void *, size_t));
+
+/* just the memmove, doesn't update @_nr */
+#define __array_insert_item(_array, _nr, _pos)                         \
+       memmove(&(_array)[(_pos) + 1],                                  \
+               &(_array)[(_pos)],                                      \
+               sizeof((_array)[0]) * ((_nr) - (_pos)))
+
+#define array_insert_item(_array, _nr, _pos, _new_item)                        \
+do {                                                                   \
+       __array_insert_item(_array, _nr, _pos);                         \
+       (_nr)++;                                                        \
+       (_array)[(_pos)] = (_new_item);                                 \
+} while (0)
+
+#define array_remove_items(_array, _nr, _pos, _nr_to_remove)           \
+do {                                                                   \
+       (_nr) -= (_nr_to_remove);                                       \
+       memmove(&(_array)[(_pos)],                                      \
+               &(_array)[(_pos) + (_nr_to_remove)],                    \
+               sizeof((_array)[0]) * ((_nr) - (_pos)));                \
+} while (0)
+
+#define array_remove_item(_array, _nr, _pos)                           \
+       array_remove_items(_array, _nr, _pos, 1)
+
+#define bubble_sort(_base, _nr, _cmp)                                  \
+do {                                                                   \
+       ssize_t _i, _end;                                               \
+       bool _swapped = true;                                           \
+                                                                       \
+       for (_end = (ssize_t) (_nr) - 1; _end > 0 && _swapped; --_end) {\
+               _swapped = false;                                       \
+               for (_i = 0; _i < _end; _i++)                           \
+                       if (_cmp((_base)[_i], (_base)[_i + 1]) > 0) {   \
+                               swap((_base)[_i], (_base)[_i + 1]);     \
+                               _swapped = true;                        \
+                       }                                               \
+       }                                                               \
+} while (0)
+
+static inline u64 percpu_u64_get(u64 __percpu *src)
+{
+       u64 ret = 0;
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               ret += *per_cpu_ptr(src, cpu);
+       return ret;
+}
+
+static inline void percpu_u64_set(u64 __percpu *dst, u64 src)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               *per_cpu_ptr(dst, cpu) = 0;
+
+       preempt_disable();
+       *this_cpu_ptr(dst) = src;
+       preempt_enable();
+}
+
+static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr)
+{
+       unsigned i;
+
+       for (i = 0; i < nr; i++)
+               acc[i] += src[i];
+}
+
+static inline void acc_u64s_percpu(u64 *acc, const u64 __percpu *src,
+                                  unsigned nr)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               acc_u64s(acc, per_cpu_ptr(src, cpu), nr);
+}
+
+static inline void percpu_memset(void __percpu *p, int c, size_t bytes)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               memset(per_cpu_ptr(p, cpu), c, bytes);
+}
+
+u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned);
+
+#define cmp_int(l, r)          ((l > r) - (l < r))
+
+#endif /* _BCACHEFS_UTIL_H */
diff --git a/libbcachefs/vstructs.h b/libbcachefs/vstructs.h
new file mode 100644 (file)
index 0000000..c099cdc
--- /dev/null
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _VSTRUCTS_H
+#define _VSTRUCTS_H
+
+#include "util.h"
+
+/*
+ * NOTE: we can't differentiate between __le64 and u64 with type_is - this
+ * assumes u64 is little endian:
+ */
+#define __vstruct_u64s(_s)                                             \
+({                                                                     \
+       ( type_is((_s)->u64s, u64) ? le64_to_cpu((__force __le64) (_s)->u64s)           \
+       : type_is((_s)->u64s, u32) ? le32_to_cpu((__force __le32) (_s)->u64s)           \
+       : type_is((_s)->u64s, u16) ? le16_to_cpu((__force __le16) (_s)->u64s)           \
+       : ((__force u8) ((_s)->u64s)));                                         \
+})
+
+#define __vstruct_bytes(_type, _u64s)                                  \
+({                                                                     \
+       BUILD_BUG_ON(offsetof(_type, _data) % sizeof(u64));             \
+                                                                       \
+       (offsetof(_type, _data) + (_u64s) * sizeof(u64));               \
+})
+
+#define vstruct_bytes(_s)                                              \
+       __vstruct_bytes(typeof(*(_s)), __vstruct_u64s(_s))
+
+#define __vstruct_blocks(_type, _sector_block_bits, _u64s)             \
+       (round_up(__vstruct_bytes(_type, _u64s),                        \
+                 512 << (_sector_block_bits)) >> (9 + (_sector_block_bits)))
+
+#define vstruct_blocks(_s, _sector_block_bits)                         \
+       __vstruct_blocks(typeof(*(_s)), _sector_block_bits, __vstruct_u64s(_s))
+
+#define vstruct_blocks_plus(_s, _sector_block_bits, _u64s)             \
+       __vstruct_blocks(typeof(*(_s)), _sector_block_bits,             \
+                        __vstruct_u64s(_s) + (_u64s))
+
+#define vstruct_sectors(_s, _sector_block_bits)                                \
+       (round_up(vstruct_bytes(_s), 512 << (_sector_block_bits)) >> 9)
+
+#define vstruct_next(_s)                                               \
+       ((typeof(_s))                   ((_s)->_data + __vstruct_u64s(_s)))
+#define vstruct_last(_s)                                               \
+       ((typeof(&(_s)->start[0]))      ((_s)->_data + __vstruct_u64s(_s)))
+#define vstruct_end(_s)                                                        \
+       ((void *)                       ((_s)->_data + __vstruct_u64s(_s)))
+
+#define vstruct_for_each(_s, _i)                                       \
+       for (_i = (_s)->start;                                          \
+            _i < vstruct_last(_s);                                     \
+            _i = vstruct_next(_i))
+
+#define vstruct_for_each_safe(_s, _i, _t)                              \
+       for (_i = (_s)->start;                                          \
+            _i < vstruct_last(_s) && (_t = vstruct_next(_i), true);    \
+            _i = _t)
+
+#define vstruct_idx(_s, _idx)                                          \
+       ((typeof(&(_s)->start[0])) ((_s)->_data + (_idx)))
+
+#endif /* _VSTRUCTS_H */
diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c
new file mode 100644 (file)
index 0000000..21f64cb
--- /dev/null
@@ -0,0 +1,586 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_methods.h"
+#include "btree_update.h"
+#include "extents.h"
+#include "fs.h"
+#include "rebalance.h"
+#include "str_hash.h"
+#include "xattr.h"
+
+#include <linux/dcache.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned);
+
+static u64 bch2_xattr_hash(const struct bch_hash_info *info,
+                         const struct xattr_search_key *key)
+{
+       struct bch_str_hash_ctx ctx;
+
+       bch2_str_hash_init(&ctx, info);
+       bch2_str_hash_update(&ctx, info, &key->type, sizeof(key->type));
+       bch2_str_hash_update(&ctx, info, key->name.name, key->name.len);
+
+       return bch2_str_hash_end(&ctx, info);
+}
+
+static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key)
+{
+       return bch2_xattr_hash(info, key);
+}
+
+static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
+{
+       struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k);
+
+       return bch2_xattr_hash(info,
+                &X_SEARCH(x.v->x_type, x.v->x_name, x.v->x_name_len));
+}
+
+static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
+{
+       struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
+       const struct xattr_search_key *r = _r;
+
+       return l.v->x_type != r->type ||
+               l.v->x_name_len != r->name.len ||
+               memcmp(l.v->x_name, r->name.name, r->name.len);
+}
+
+static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
+{
+       struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
+       struct bkey_s_c_xattr r = bkey_s_c_to_xattr(_r);
+
+       return l.v->x_type != r.v->x_type ||
+               l.v->x_name_len != r.v->x_name_len ||
+               memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len);
+}
+
+const struct bch_hash_desc bch2_xattr_hash_desc = {
+       .btree_id       = BTREE_ID_XATTRS,
+       .key_type       = KEY_TYPE_xattr,
+       .hash_key       = xattr_hash_key,
+       .hash_bkey      = xattr_hash_bkey,
+       .cmp_key        = xattr_cmp_key,
+       .cmp_bkey       = xattr_cmp_bkey,
+};
+
+const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       const struct xattr_handler *handler;
+       struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
+
+       if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr))
+               return "value too small";
+
+       if (bkey_val_u64s(k.k) <
+           xattr_val_u64s(xattr.v->x_name_len,
+                          le16_to_cpu(xattr.v->x_val_len)))
+               return "value too small";
+
+       if (bkey_val_u64s(k.k) >
+           xattr_val_u64s(xattr.v->x_name_len,
+                          le16_to_cpu(xattr.v->x_val_len) + 4))
+               return "value too big";
+
+       handler = bch2_xattr_type_to_handler(xattr.v->x_type);
+       if (!handler)
+               return "invalid type";
+
+       if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len))
+               return "xattr name has invalid characters";
+
+       return NULL;
+}
+
+void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
+                       struct bkey_s_c k)
+{
+       const struct xattr_handler *handler;
+       struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
+
+       handler = bch2_xattr_type_to_handler(xattr.v->x_type);
+       if (handler && handler->prefix)
+               pr_buf(out, "%s", handler->prefix);
+       else if (handler)
+               pr_buf(out, "(type %u)", xattr.v->x_type);
+       else
+               pr_buf(out, "(unknown type %u)", xattr.v->x_type);
+
+       bch_scnmemcpy(out, xattr.v->x_name,
+                     xattr.v->x_name_len);
+       pr_buf(out, ":");
+       bch_scnmemcpy(out, xattr_val(xattr.v),
+                     le16_to_cpu(xattr.v->x_val_len));
+}
+
+int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
+                  const char *name, void *buffer, size_t size, int type)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c_xattr xattr;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc,
+                               &inode->ei_str_hash, inode->v.i_ino,
+                               &X_SEARCH(type, name, strlen(name)),
+                               0);
+       if (IS_ERR(iter)) {
+               bch2_trans_exit(&trans);
+               BUG_ON(PTR_ERR(iter) == -EINTR);
+
+               return PTR_ERR(iter) == -ENOENT ? -ENODATA : PTR_ERR(iter);
+       }
+
+       xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
+       ret = le16_to_cpu(xattr.v->x_val_len);
+       if (buffer) {
+               if (ret > size)
+                       ret = -ERANGE;
+               else
+                       memcpy(buffer, xattr_val(xattr.v), ret);
+       }
+
+       bch2_trans_exit(&trans);
+       return ret;
+}
+
+int bch2_xattr_set(struct btree_trans *trans, u64 inum,
+                  const struct bch_hash_info *hash_info,
+                  const char *name, const void *value, size_t size,
+                  int type, int flags)
+{
+       int ret;
+
+       if (value) {
+               struct bkey_i_xattr *xattr;
+               unsigned namelen = strlen(name);
+               unsigned u64s = BKEY_U64s +
+                       xattr_val_u64s(namelen, size);
+
+               if (u64s > U8_MAX)
+                       return -ERANGE;
+
+               xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
+               if (IS_ERR(xattr))
+                       return PTR_ERR(xattr);
+
+               bkey_xattr_init(&xattr->k_i);
+               xattr->k.u64s           = u64s;
+               xattr->v.x_type         = type;
+               xattr->v.x_name_len     = namelen;
+               xattr->v.x_val_len      = cpu_to_le16(size);
+               memcpy(xattr->v.x_name, name, namelen);
+               memcpy(xattr_val(&xattr->v), value, size);
+
+               ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
+                             inum, &xattr->k_i,
+                             (flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
+                             (flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));
+       } else {
+               struct xattr_search_key search =
+                       X_SEARCH(type, name, strlen(name));
+
+               ret = bch2_hash_delete(trans, bch2_xattr_hash_desc,
+                                      hash_info, inum, &search);
+       }
+
+       if (ret == -ENOENT)
+               ret = flags & XATTR_REPLACE ? -ENODATA : 0;
+
+       return ret;
+}
+
+struct xattr_buf {
+       char            *buf;
+       size_t          len;
+       size_t          used;
+};
+
+static int __bch2_xattr_emit(const char *prefix,
+                            const char *name, size_t name_len,
+                            struct xattr_buf *buf)
+{
+       const size_t prefix_len = strlen(prefix);
+       const size_t total_len = prefix_len + name_len + 1;
+
+       if (buf->buf) {
+               if (buf->used + total_len > buf->len)
+                       return -ERANGE;
+
+               memcpy(buf->buf + buf->used, prefix, prefix_len);
+               memcpy(buf->buf + buf->used + prefix_len,
+                      name, name_len);
+               buf->buf[buf->used + prefix_len + name_len] = '\0';
+       }
+
+       buf->used += total_len;
+       return 0;
+}
+
+static int bch2_xattr_emit(struct dentry *dentry,
+                           const struct bch_xattr *xattr,
+                           struct xattr_buf *buf)
+{
+       const struct xattr_handler *handler =
+               bch2_xattr_type_to_handler(xattr->x_type);
+
+       return handler && (!handler->list || handler->list(dentry))
+               ? __bch2_xattr_emit(handler->prefix ?: handler->name,
+                                   xattr->x_name, xattr->x_name_len, buf)
+               : 0;
+}
+
+static int bch2_xattr_list_bcachefs(struct bch_fs *c,
+                                   struct bch_inode_info *inode,
+                                   struct xattr_buf *buf,
+                                   bool all)
+{
+       const char *prefix = all ? "bcachefs_effective." : "bcachefs.";
+       unsigned id;
+       int ret = 0;
+       u64 v;
+
+       for (id = 0; id < Inode_opt_nr; id++) {
+               v = bch2_inode_opt_get(&inode->ei_inode, id);
+               if (!v)
+                       continue;
+
+               if (!all &&
+                   !(inode->ei_inode.bi_fields_set & (1 << id)))
+                       continue;
+
+               ret = __bch2_xattr_emit(prefix, bch2_inode_opts[id],
+                                       strlen(bch2_inode_opts[id]), buf);
+               if (ret)
+                       break;
+       }
+
+       return ret;
+}
+
+ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
+{
+       struct bch_fs *c = dentry->d_sb->s_fs_info;
+       struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct xattr_buf buf = { .buf = buffer, .len = buffer_size };
+       u64 inum = dentry->d_inode->i_ino;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_XATTRS,
+                          POS(inum, 0), 0, k, ret) {
+               BUG_ON(k.k->p.inode < inum);
+
+               if (k.k->p.inode > inum)
+                       break;
+
+               if (k.k->type != KEY_TYPE_xattr)
+                       continue;
+
+               ret = bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf);
+               if (ret)
+                       break;
+       }
+       ret = bch2_trans_exit(&trans) ?: ret;
+
+       if (ret)
+               return ret;
+
+       ret = bch2_xattr_list_bcachefs(c, inode, &buf, false);
+       if (ret)
+               return ret;
+
+       ret = bch2_xattr_list_bcachefs(c, inode, &buf, true);
+       if (ret)
+               return ret;
+
+       return buf.used;
+}
+
+static int bch2_xattr_get_handler(const struct xattr_handler *handler,
+                                 struct dentry *dentry, struct inode *vinode,
+                                 const char *name, void *buffer, size_t size)
+{
+       struct bch_inode_info *inode = to_bch_ei(vinode);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+       return bch2_xattr_get(c, inode, name, buffer, size, handler->flags);
+}
+
+static int bch2_xattr_set_handler(const struct xattr_handler *handler,
+                                 struct dentry *dentry, struct inode *vinode,
+                                 const char *name, const void *value,
+                                 size_t size, int flags)
+{
+       struct bch_inode_info *inode = to_bch_ei(vinode);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+       return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0,
+                       bch2_xattr_set(&trans, inode->v.i_ino,
+                                      &inode->ei_str_hash,
+                                      name, value, size,
+                                      handler->flags, flags));
+}
+
+static const struct xattr_handler bch_xattr_user_handler = {
+       .prefix = XATTR_USER_PREFIX,
+       .get    = bch2_xattr_get_handler,
+       .set    = bch2_xattr_set_handler,
+       .flags  = KEY_TYPE_XATTR_INDEX_USER,
+};
+
+static bool bch2_xattr_trusted_list(struct dentry *dentry)
+{
+       return capable(CAP_SYS_ADMIN);
+}
+
+static const struct xattr_handler bch_xattr_trusted_handler = {
+       .prefix = XATTR_TRUSTED_PREFIX,
+       .list   = bch2_xattr_trusted_list,
+       .get    = bch2_xattr_get_handler,
+       .set    = bch2_xattr_set_handler,
+       .flags  = KEY_TYPE_XATTR_INDEX_TRUSTED,
+};
+
+static const struct xattr_handler bch_xattr_security_handler = {
+       .prefix = XATTR_SECURITY_PREFIX,
+       .get    = bch2_xattr_get_handler,
+       .set    = bch2_xattr_set_handler,
+       .flags  = KEY_TYPE_XATTR_INDEX_SECURITY,
+};
+
+#ifndef NO_BCACHEFS_FS
+
+static int opt_to_inode_opt(int id)
+{
+       switch (id) {
+#define x(name, ...)                           \
+       case Opt_##name: return Inode_opt_##name;
+       BCH_INODE_OPTS()
+#undef  x
+       default:
+               return -1;
+       }
+}
+
+static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
+                               struct dentry *dentry, struct inode *vinode,
+                               const char *name, void *buffer, size_t size,
+                               bool all)
+{
+       struct bch_inode_info *inode = to_bch_ei(vinode);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       struct bch_opts opts =
+               bch2_inode_opts_to_opts(bch2_inode_opts_get(&inode->ei_inode));
+       const struct bch_option *opt;
+       int id, inode_opt_id;
+       char buf[512];
+       struct printbuf out = PBUF(buf);
+       unsigned val_len;
+       u64 v;
+
+       id = bch2_opt_lookup(name);
+       if (id < 0 || !bch2_opt_is_inode_opt(id))
+               return -EINVAL;
+
+       inode_opt_id = opt_to_inode_opt(id);
+       if (inode_opt_id < 0)
+               return -EINVAL;
+
+       opt = bch2_opt_table + id;
+
+       if (!bch2_opt_defined_by_id(&opts, id))
+               return -ENODATA;
+
+       if (!all &&
+           !(inode->ei_inode.bi_fields_set & (1 << inode_opt_id)))
+               return -ENODATA;
+
+       v = bch2_opt_get_by_id(&opts, id);
+       bch2_opt_to_text(&out, c, opt, v, 0);
+
+       val_len = out.pos - buf;
+
+       if (buffer && val_len > size)
+               return -ERANGE;
+
+       if (buffer)
+               memcpy(buffer, buf, val_len);
+       return val_len;
+}
+
+static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
+                                  struct dentry *dentry, struct inode *vinode,
+                                  const char *name, void *buffer, size_t size)
+{
+       return __bch2_xattr_bcachefs_get(handler, dentry, vinode,
+                                        name, buffer, size, false);
+}
+
+struct inode_opt_set {
+       int                     id;
+       u64                     v;
+       bool                    defined;
+};
+
+static int inode_opt_set_fn(struct bch_inode_info *inode,
+                           struct bch_inode_unpacked *bi,
+                           void *p)
+{
+       struct inode_opt_set *s = p;
+
+       if (s->defined)
+               bi->bi_fields_set |= 1U << s->id;
+       else
+               bi->bi_fields_set &= ~(1U << s->id);
+
+       bch2_inode_opt_set(bi, s->id, s->v);
+
+       return 0;
+}
+
+static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
+                                  struct dentry *dentry, struct inode *vinode,
+                                  const char *name, const void *value,
+                                  size_t size, int flags)
+{
+       struct bch_inode_info *inode = to_bch_ei(vinode);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       const struct bch_option *opt;
+       char *buf;
+       struct inode_opt_set s;
+       int opt_id, inode_opt_id, ret;
+
+       opt_id = bch2_opt_lookup(name);
+       if (opt_id < 0)
+               return -EINVAL;
+
+       opt = bch2_opt_table + opt_id;
+
+       inode_opt_id = opt_to_inode_opt(opt_id);
+       if (inode_opt_id < 0)
+               return -EINVAL;
+
+       s.id = inode_opt_id;
+
+       if (value) {
+               u64 v = 0;
+
+               buf = kmalloc(size + 1, GFP_KERNEL);
+               if (!buf)
+                       return -ENOMEM;
+               memcpy(buf, value, size);
+               buf[size] = '\0';
+
+               ret = bch2_opt_parse(c, opt, buf, &v);
+               kfree(buf);
+
+               if (ret < 0)
+                       return ret;
+
+               ret = bch2_opt_check_may_set(c, opt_id, v);
+               if (ret < 0)
+                       return ret;
+
+               s.v = v + 1;
+               s.defined = true;
+       } else {
+               if (!IS_ROOT(dentry)) {
+                       struct bch_inode_info *dir =
+                               to_bch_ei(d_inode(dentry->d_parent));
+
+                       s.v = bch2_inode_opt_get(&dir->ei_inode, inode_opt_id);
+               } else {
+                       s.v = 0;
+               }
+
+               s.defined = false;
+       }
+
+       mutex_lock(&inode->ei_update_lock);
+       if (inode_opt_id == Inode_opt_project) {
+               /*
+                * inode fields accessible via the xattr interface are stored
+                * with a +1 bias, so that 0 means unset:
+                */
+               ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0);
+               if (ret)
+                       goto err;
+       }
+
+       ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0);
+err:
+       mutex_unlock(&inode->ei_update_lock);
+
+       if (value &&
+           (opt_id == Opt_background_compression ||
+            opt_id == Opt_background_target))
+               bch2_rebalance_add_work(c, inode->v.i_blocks);
+
+       return ret;
+}
+
+static const struct xattr_handler bch_xattr_bcachefs_handler = {
+       .prefix = "bcachefs.",
+       .get    = bch2_xattr_bcachefs_get,
+       .set    = bch2_xattr_bcachefs_set,
+};
+
+static int bch2_xattr_bcachefs_get_effective(
+                               const struct xattr_handler *handler,
+                               struct dentry *dentry, struct inode *vinode,
+                               const char *name, void *buffer, size_t size)
+{
+       return __bch2_xattr_bcachefs_get(handler, dentry, vinode,
+                                        name, buffer, size, true);
+}
+
+static const struct xattr_handler bch_xattr_bcachefs_effective_handler = {
+       .prefix = "bcachefs_effective.",
+       .get    = bch2_xattr_bcachefs_get_effective,
+       .set    = bch2_xattr_bcachefs_set,
+};
+
+#endif /* NO_BCACHEFS_FS */
+
+const struct xattr_handler *bch2_xattr_handlers[] = {
+       &bch_xattr_user_handler,
+       &posix_acl_access_xattr_handler,
+       &posix_acl_default_xattr_handler,
+       &bch_xattr_trusted_handler,
+       &bch_xattr_security_handler,
+#ifndef NO_BCACHEFS_FS
+       &bch_xattr_bcachefs_handler,
+       &bch_xattr_bcachefs_effective_handler,
+#endif
+       NULL
+};
+
+static const struct xattr_handler *bch_xattr_handler_map[] = {
+       [KEY_TYPE_XATTR_INDEX_USER]                     = &bch_xattr_user_handler,
+       [KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS] =
+               &posix_acl_access_xattr_handler,
+       [KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT]        =
+               &posix_acl_default_xattr_handler,
+       [KEY_TYPE_XATTR_INDEX_TRUSTED]          = &bch_xattr_trusted_handler,
+       [KEY_TYPE_XATTR_INDEX_SECURITY]         = &bch_xattr_security_handler,
+};
+
+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned type)
+{
+       return type < ARRAY_SIZE(bch_xattr_handler_map)
+               ? bch_xattr_handler_map[type]
+               : NULL;
+}
diff --git a/libbcachefs/xattr.h b/libbcachefs/xattr.h
new file mode 100644 (file)
index 0000000..4151065
--- /dev/null
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_XATTR_H
+#define _BCACHEFS_XATTR_H
+
+#include "str_hash.h"
+
+extern const struct bch_hash_desc bch2_xattr_hash_desc;
+
+const char *bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_xattr (struct bkey_ops) {                \
+       .key_invalid    = bch2_xattr_invalid,           \
+       .val_to_text    = bch2_xattr_to_text,           \
+}
+
+static inline unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
+{
+       return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) +
+                           name_len + val_len, sizeof(u64));
+}
+
+#define xattr_val(_xattr)                                      \
+       ((void *) (_xattr)->x_name + (_xattr)->x_name_len)
+
+struct xattr_search_key {
+       u8              type;
+       struct qstr     name;
+};
+
+#define X_SEARCH(_type, _name, _len) ((struct xattr_search_key)        \
+       { .type = _type, .name = QSTR_INIT(_name, _len) })
+
+struct dentry;
+struct xattr_handler;
+struct bch_hash_info;
+struct bch_inode_info;
+
+int bch2_xattr_get(struct bch_fs *, struct bch_inode_info *,
+                 const char *, void *, size_t, int);
+
+int bch2_xattr_set(struct btree_trans *, u64, const struct bch_hash_info *,
+                  const char *, const void *, size_t, int, int);
+
+ssize_t bch2_xattr_list(struct dentry *, char *, size_t);
+
+extern const struct xattr_handler *bch2_xattr_handlers[];
+
+#endif /* _BCACHEFS_XATTR_H */
diff --git a/linux/atomic64.c b/linux/atomic64.c
new file mode 100644 (file)
index 0000000..4654d09
--- /dev/null
@@ -0,0 +1,188 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright Â© 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+
+#ifdef ATOMIC64_SPINLOCK
+
+/*
+ * We use a hashed array of spinlocks to provide exclusive access
+ * to each atomic64_t variable.  Since this is expected to used on
+ * systems with small numbers of CPUs (<= 4 or so), we use a
+ * relatively small array of 16 spinlocks to avoid wasting too much
+ * memory on the spinlock array.
+ */
+#define NR_LOCKS       16
+
+/*
+ * Ensure each lock is in a separate cacheline.
+ */
+static union {
+       raw_spinlock_t lock;
+       char pad[L1_CACHE_BYTES];
+} atomic64_lock[NR_LOCKS] ____cacheline_aligned_in_smp = {
+       [0 ... (NR_LOCKS - 1)] = {
+               .lock =  __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock),
+       },
+};
+
+static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
+{
+       unsigned long addr = (unsigned long) v;
+
+       addr >>= L1_CACHE_SHIFT;
+       addr ^= (addr >> 8) ^ (addr >> 16);
+       return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
+}
+
+long long atomic64_read(const atomic64_t *v)
+{
+       unsigned long flags;
+       raw_spinlock_t *lock = lock_addr(v);
+       long long val;
+
+       raw_spin_lock_irqsave(lock, flags);
+       val = v->counter;
+       raw_spin_unlock_irqrestore(lock, flags);
+       return val;
+}
+
+void atomic64_set(atomic64_t *v, long long i)
+{
+       unsigned long flags;
+       raw_spinlock_t *lock = lock_addr(v);
+
+       raw_spin_lock_irqsave(lock, flags);
+       v->counter = i;
+       raw_spin_unlock_irqrestore(lock, flags);
+}
+
+#define ATOMIC64_OP(op, c_op)                                          \
+void atomic64_##op(long long a, atomic64_t *v)                         \
+{                                                                      \
+       unsigned long flags;                                            \
+       raw_spinlock_t *lock = lock_addr(v);                            \
+                                                                       \
+       raw_spin_lock_irqsave(lock, flags);                             \
+       v->counter c_op a;                                              \
+       raw_spin_unlock_irqrestore(lock, flags);                        \
+}
+
+#define ATOMIC64_OP_RETURN(op, c_op)                                   \
+long long atomic64_##op##_return(long long a, atomic64_t *v)           \
+{                                                                      \
+       unsigned long flags;                                            \
+       raw_spinlock_t *lock = lock_addr(v);                            \
+       long long val;                                                  \
+                                                                       \
+       raw_spin_lock_irqsave(lock, flags);                             \
+       val = (v->counter c_op a);                                      \
+       raw_spin_unlock_irqrestore(lock, flags);                        \
+       return val;                                                     \
+}
+
+#define ATOMIC64_FETCH_OP(op, c_op)                                    \
+long long atomic64_fetch_##op(long long a, atomic64_t *v)              \
+{                                                                      \
+       unsigned long flags;                                            \
+       raw_spinlock_t *lock = lock_addr(v);                            \
+       long long val;                                                  \
+                                                                       \
+       raw_spin_lock_irqsave(lock, flags);                             \
+       val = v->counter;                                               \
+       v->counter c_op a;                                              \
+       raw_spin_unlock_irqrestore(lock, flags);                        \
+       return val;                                                     \
+}
+
+#define ATOMIC64_OPS(op, c_op)                                         \
+       ATOMIC64_OP(op, c_op)                                           \
+       ATOMIC64_OP_RETURN(op, c_op)                                    \
+       ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(add, +=)
+ATOMIC64_OPS(sub, -=)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op)                                         \
+       ATOMIC64_OP(op, c_op)                                           \
+       ATOMIC64_OP_RETURN(op, c_op)                                    \
+       ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &=)
+ATOMIC64_OPS(or, |=)
+ATOMIC64_OPS(xor, ^=)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+long long atomic64_dec_if_positive(atomic64_t *v)
+{
+       unsigned long flags;
+       raw_spinlock_t *lock = lock_addr(v);
+       long long val;
+
+       raw_spin_lock_irqsave(lock, flags);
+       val = v->counter - 1;
+       if (val >= 0)
+               v->counter = val;
+       raw_spin_unlock_irqrestore(lock, flags);
+       return val;
+}
+
+long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+{
+       unsigned long flags;
+       raw_spinlock_t *lock = lock_addr(v);
+       long long val;
+
+       raw_spin_lock_irqsave(lock, flags);
+       val = v->counter;
+       if (val == o)
+               v->counter = n;
+       raw_spin_unlock_irqrestore(lock, flags);
+       return val;
+}
+
+long long atomic64_xchg(atomic64_t *v, long long new)
+{
+       unsigned long flags;
+       raw_spinlock_t *lock = lock_addr(v);
+       long long val;
+
+       raw_spin_lock_irqsave(lock, flags);
+       val = v->counter;
+       v->counter = new;
+       raw_spin_unlock_irqrestore(lock, flags);
+       return val;
+}
+
+int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+       unsigned long flags;
+       raw_spinlock_t *lock = lock_addr(v);
+       int ret = 0;
+
+       raw_spin_lock_irqsave(lock, flags);
+       if (v->counter != u) {
+               v->counter += a;
+               ret = 1;
+       }
+       raw_spin_unlock_irqrestore(lock, flags);
+       return ret;
+}
+
+#endif
diff --git a/linux/bio.c b/linux/bio.c
new file mode 100644 (file)
index 0000000..8422c26
--- /dev/null
@@ -0,0 +1,345 @@
+/*
+ * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ *
+ */
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+static const struct {
+       int             err;
+       const char      *name;
+} blk_errors[] = {
+       [BLK_STS_OK]            = { 0,          "" },
+       [BLK_STS_NOTSUPP]       = { -EOPNOTSUPP, "operation not supported" },
+       [BLK_STS_TIMEOUT]       = { -ETIMEDOUT, "timeout" },
+       [BLK_STS_NOSPC]         = { -ENOSPC,    "critical space allocation" },
+       [BLK_STS_TRANSPORT]     = { -ENOLINK,   "recoverable transport" },
+       [BLK_STS_TARGET]        = { -EREMOTEIO, "critical target" },
+       [BLK_STS_NEXUS]         = { -EBADE,     "critical nexus" },
+       [BLK_STS_MEDIUM]        = { -ENODATA,   "critical medium" },
+       [BLK_STS_PROTECTION]    = { -EILSEQ,    "protection" },
+       [BLK_STS_RESOURCE]      = { -ENOMEM,    "kernel resource" },
+       [BLK_STS_AGAIN]         = { -EAGAIN,    "nonblocking retry" },
+
+       /* device mapper special case, should not leak out: */
+       [BLK_STS_DM_REQUEUE]    = { -EREMCHG, "dm internal retry" },
+
+       /* everything else not covered above: */
+       [BLK_STS_IOERR]         = { -EIO,       "I/O" },
+};
+
+int blk_status_to_errno(blk_status_t status)
+{
+       int idx = (__force int)status;
+
+       if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+               return -EIO;
+       return blk_errors[idx].err;
+}
+
+const char *blk_status_to_str(blk_status_t status)
+{
+       int idx = (__force int)status;
+
+       if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+               return "(invalid error)";
+       return blk_errors[idx].name;
+}
+
+void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+                       struct bio *src, struct bvec_iter *src_iter)
+{
+       struct bio_vec src_bv, dst_bv;
+       void *src_p, *dst_p;
+       unsigned bytes;
+
+       while (src_iter->bi_size && dst_iter->bi_size) {
+               src_bv = bio_iter_iovec(src, *src_iter);
+               dst_bv = bio_iter_iovec(dst, *dst_iter);
+
+               bytes = min(src_bv.bv_len, dst_bv.bv_len);
+
+               src_p = kmap_atomic(src_bv.bv_page);
+               dst_p = kmap_atomic(dst_bv.bv_page);
+
+               memcpy(dst_p + dst_bv.bv_offset,
+                      src_p + src_bv.bv_offset,
+                      bytes);
+
+               kunmap_atomic(dst_p);
+               kunmap_atomic(src_p);
+
+               flush_dcache_page(dst_bv.bv_page);
+
+               bio_advance_iter(src, src_iter, bytes);
+               bio_advance_iter(dst, dst_iter, bytes);
+       }
+}
+
+/**
+ * bio_copy_data - copy contents of data buffers from one bio to another
+ * @src: source bio
+ * @dst: destination bio
+ *
+ * Stops when it reaches the end of either @src or @dst - that is, copies
+ * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
+ */
+void bio_copy_data(struct bio *dst, struct bio *src)
+{
+       struct bvec_iter src_iter = src->bi_iter;
+       struct bvec_iter dst_iter = dst->bi_iter;
+
+       bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
+}
+
+void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
+{
+       unsigned long flags;
+       struct bio_vec bv;
+       struct bvec_iter iter;
+
+       __bio_for_each_segment(bv, bio, iter, start) {
+               char *data = bvec_kmap_irq(&bv, &flags);
+               memset(data, 0, bv.bv_len);
+               bvec_kunmap_irq(data, &flags);
+       }
+}
+
+void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
+{
+       /*
+        * most users will be overriding ->bi_bdev with a new target,
+        * so we don't set nor calculate new physical/hw segment counts here
+        */
+       bio->bi_bdev = bio_src->bi_bdev;
+       bio_set_flag(bio, BIO_CLONED);
+       bio->bi_opf = bio_src->bi_opf;
+       bio->bi_iter = bio_src->bi_iter;
+       bio->bi_io_vec = bio_src->bi_io_vec;
+}
+
+struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
+{
+       struct bio *b;
+
+       b = bio_alloc_bioset(gfp_mask, 0, bs);
+       if (!b)
+               return NULL;
+
+       __bio_clone_fast(b, bio);
+       return b;
+}
+
+struct bio *bio_split(struct bio *bio, int sectors,
+                     gfp_t gfp, struct bio_set *bs)
+{
+       struct bio *split = NULL;
+
+       BUG_ON(sectors <= 0);
+       BUG_ON(sectors >= bio_sectors(bio));
+
+       /*
+        * Discards need a mutable bio_vec to accommodate the payload
+        * required by the DSM TRIM and UNMAP commands.
+        */
+       if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
+               split = bio_clone_bioset(bio, gfp, bs);
+       else
+               split = bio_clone_fast(bio, gfp, bs);
+
+       if (!split)
+               return NULL;
+
+       split->bi_iter.bi_size = sectors << 9;
+
+       bio_advance(bio, split->bi_iter.bi_size);
+
+       return split;
+}
+
+void bio_free_pages(struct bio *bio)
+{
+       struct bvec_iter_all iter;
+       struct bio_vec *bvec;
+
+       bio_for_each_segment_all(bvec, bio, iter)
+               __free_page(bvec->bv_page);
+}
+
+void bio_advance(struct bio *bio, unsigned bytes)
+{
+       bio_advance_iter(bio, &bio->bi_iter, bytes);
+}
+
+static void bio_free(struct bio *bio)
+{
+       unsigned front_pad = bio->bi_pool ? bio->bi_pool->front_pad : 0;
+
+       kfree((void *) bio - front_pad);
+}
+
+void bio_put(struct bio *bio)
+{
+       if (!bio_flagged(bio, BIO_REFFED))
+               bio_free(bio);
+       else {
+               BUG_ON(!atomic_read(&bio->__bi_cnt));
+
+               /*
+                * last put frees it
+                */
+               if (atomic_dec_and_test(&bio->__bi_cnt))
+                       bio_free(bio);
+       }
+}
+
+int bio_add_page(struct bio *bio, struct page *page,
+                unsigned int len, unsigned int off)
+{
+       struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
+
+       WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
+       WARN_ON_ONCE(bio->bi_vcnt >= bio->bi_max_vecs);
+
+       bv->bv_page = page;
+       bv->bv_offset = off;
+       bv->bv_len = len;
+
+       bio->bi_iter.bi_size += len;
+       bio->bi_vcnt++;
+       return len;
+}
+
+static inline bool bio_remaining_done(struct bio *bio)
+{
+       /*
+        * If we're not chaining, then ->__bi_remaining is always 1 and
+        * we always end io on the first invocation.
+        */
+       if (!bio_flagged(bio, BIO_CHAIN))
+               return true;
+
+       BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
+
+       if (atomic_dec_and_test(&bio->__bi_remaining)) {
+               bio_clear_flag(bio, BIO_CHAIN);
+               return true;
+       }
+
+       return false;
+}
+
+static struct bio *__bio_chain_endio(struct bio *bio)
+{
+       struct bio *parent = bio->bi_private;
+
+       if (!parent->bi_status)
+               parent->bi_status = bio->bi_status;
+       bio_put(bio);
+       return parent;
+}
+
+static void bio_chain_endio(struct bio *bio)
+{
+       bio_endio(__bio_chain_endio(bio));
+}
+
+void bio_endio(struct bio *bio)
+{
+again:
+       if (!bio_remaining_done(bio))
+               return;
+
+       /*
+        * Need to have a real endio function for chained bios, otherwise
+        * various corner cases will break (like stacking block devices that
+        * save/restore bi_end_io) - however, we want to avoid unbounded
+        * recursion and blowing the stack. Tail call optimization would
+        * handle this, but compiling with frame pointers also disables
+        * gcc's sibling call optimization.
+        */
+       if (bio->bi_end_io == bio_chain_endio) {
+               bio = __bio_chain_endio(bio);
+               goto again;
+       }
+
+       if (bio->bi_end_io)
+               bio->bi_end_io(bio);
+}
+
+void bio_reset(struct bio *bio)
+{
+       unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
+
+       memset(bio, 0, BIO_RESET_BYTES);
+       bio->bi_flags = flags;
+       atomic_set(&bio->__bi_remaining, 1);
+}
+
+struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
+{
+       unsigned front_pad = bs ? bs->front_pad : 0;
+       struct bio *bio;
+       void *p;
+
+       p = kmalloc(front_pad +
+                   sizeof(struct bio) +
+                   nr_iovecs * sizeof(struct bio_vec),
+                   gfp_mask);
+
+       if (unlikely(!p))
+               return NULL;
+
+       bio = p + front_pad;
+       bio_init(bio, bio->bi_inline_vecs, nr_iovecs);
+       bio->bi_pool = bs;
+
+       return bio;
+}
+
+struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
+                            struct bio_set *bs)
+{
+       struct bvec_iter iter;
+       struct bio_vec bv;
+       struct bio *bio;
+
+       bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
+       if (!bio)
+               return NULL;
+
+       bio->bi_bdev            = bio_src->bi_bdev;
+       bio->bi_opf             = bio_src->bi_opf;
+       bio->bi_iter.bi_sector  = bio_src->bi_iter.bi_sector;
+       bio->bi_iter.bi_size    = bio_src->bi_iter.bi_size;
+
+       switch (bio_op(bio)) {
+       case REQ_OP_DISCARD:
+       case REQ_OP_SECURE_ERASE:
+               break;
+       case REQ_OP_WRITE_SAME:
+               bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
+               break;
+       default:
+               bio_for_each_segment(bv, bio_src, iter)
+                       bio->bi_io_vec[bio->bi_vcnt++] = bv;
+               break;
+       }
+
+       return bio;
+}
diff --git a/linux/blkdev.c b/linux/blkdev.c
new file mode 100644 (file)
index 0000000..709e770
--- /dev/null
@@ -0,0 +1,323 @@
+
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include <libaio.h>
+
+#ifdef CONFIG_VALGRIND
+#include <valgrind/memcheck.h>
+#endif
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/completion.h>
+#include <linux/fs.h>
+#include <linux/kthread.h>
+
+#include "tools-util.h"
+
+static io_context_t aio_ctx;
+static atomic_t running_requests;
+
+void generic_make_request(struct bio *bio)
+{
+       struct iovec *iov;
+       struct bvec_iter iter;
+       struct bio_vec bv;
+       ssize_t ret;
+       unsigned i;
+
+       if (bio->bi_opf & REQ_PREFLUSH) {
+               ret = fdatasync(bio->bi_bdev->bd_fd);
+               if (ret) {
+                       fprintf(stderr, "fsync error: %m\n");
+                       bio->bi_status = BLK_STS_IOERR;
+                       bio_endio(bio);
+                       return;
+               }
+       }
+
+       i = 0;
+       bio_for_each_segment(bv, bio, iter)
+               i++;
+
+       iov = alloca(sizeof(*iov) * i);
+
+       i = 0;
+       bio_for_each_segment(bv, bio, iter) {
+               void *start = page_address(bv.bv_page) + bv.bv_offset;
+               size_t len = bv.bv_len;
+
+               iov[i++] = (struct iovec) {
+                       .iov_base = start,
+                       .iov_len = len,
+               };
+
+#ifdef CONFIG_VALGRIND
+               /* To be pedantic it should only be on IO completion. */
+               if (bio_op(bio) == REQ_OP_READ)
+                       VALGRIND_MAKE_MEM_DEFINED(start, len);
+#endif
+       }
+
+       struct iocb iocb = {
+               .data           = bio,
+               .aio_fildes     = bio->bi_opf & REQ_FUA
+                       ? bio->bi_bdev->bd_sync_fd
+                       : bio->bi_bdev->bd_fd,
+       }, *iocbp = &iocb;
+
+       switch (bio_op(bio)) {
+       case REQ_OP_READ:
+               iocb.aio_lio_opcode     = IO_CMD_PREADV;
+               iocb.u.v.vec            = iov;
+               iocb.u.v.nr             = i;
+               iocb.u.v.offset         = bio->bi_iter.bi_sector << 9;
+
+               atomic_inc(&running_requests);
+               ret = io_submit(aio_ctx, 1, &iocbp);
+               if (ret != 1)
+                       die("io_submit err: %s", strerror(-ret));
+               break;
+       case REQ_OP_WRITE:
+               iocb.aio_lio_opcode     = IO_CMD_PWRITEV;
+               iocb.u.v.vec            = iov;
+               iocb.u.v.nr             = i;
+               iocb.u.v.offset         = bio->bi_iter.bi_sector << 9;
+
+               atomic_inc(&running_requests);
+               ret = io_submit(aio_ctx, 1, &iocbp);
+               if (ret != 1)
+                       die("io_submit err: %s", strerror(-ret));
+               break;
+       case REQ_OP_FLUSH:
+               ret = fsync(bio->bi_bdev->bd_fd);
+               if (ret)
+                       die("fsync error: %m");
+               bio_endio(bio);
+               break;
+       default:
+               BUG();
+       }
+}
+
+static void submit_bio_wait_endio(struct bio *bio)
+{
+       complete(bio->bi_private);
+}
+
+int submit_bio_wait(struct bio *bio)
+{
+       struct completion done;
+
+       init_completion(&done);
+       bio->bi_private = &done;
+       bio->bi_end_io = submit_bio_wait_endio;
+       bio->bi_opf |= REQ_SYNC;
+       submit_bio(bio);
+       wait_for_completion(&done);
+
+       return blk_status_to_errno(bio->bi_status);
+}
+
+int blkdev_issue_discard(struct block_device *bdev,
+                        sector_t sector, sector_t nr_sects,
+                        gfp_t gfp_mask, unsigned long flags)
+{
+       return 0;
+}
+
+unsigned bdev_logical_block_size(struct block_device *bdev)
+{
+       struct stat statbuf;
+       unsigned blksize;
+       int ret;
+
+       ret = fstat(bdev->bd_fd, &statbuf);
+       BUG_ON(ret);
+
+       if (!S_ISBLK(statbuf.st_mode))
+               return statbuf.st_blksize >> 9;
+
+       ret = ioctl(bdev->bd_fd, BLKPBSZGET, &blksize);
+       BUG_ON(ret);
+
+       return blksize >> 9;
+}
+
+sector_t get_capacity(struct gendisk *disk)
+{
+       struct block_device *bdev =
+               container_of(disk, struct block_device, __bd_disk);
+       struct stat statbuf;
+       u64 bytes;
+       int ret;
+
+       ret = fstat(bdev->bd_fd, &statbuf);
+       BUG_ON(ret);
+
+       if (!S_ISBLK(statbuf.st_mode))
+               return statbuf.st_size >> 9;
+
+       ret = ioctl(bdev->bd_fd, BLKGETSIZE64, &bytes);
+       BUG_ON(ret);
+
+       return bytes >> 9;
+}
+
+void blkdev_put(struct block_device *bdev, fmode_t mode)
+{
+       fdatasync(bdev->bd_fd);
+       close(bdev->bd_sync_fd);
+       close(bdev->bd_fd);
+       free(bdev);
+}
+
+struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+                                       void *holder)
+{
+       struct block_device *bdev;
+       int fd, sync_fd, flags = O_DIRECT;
+
+       if ((mode & (FMODE_READ|FMODE_WRITE)) == (FMODE_READ|FMODE_WRITE))
+               flags = O_RDWR;
+       else if (mode & FMODE_READ)
+               flags = O_RDONLY;
+       else if (mode & FMODE_WRITE)
+               flags = O_WRONLY;
+
+#if 0
+       /* using O_EXCL doesn't work with opening twice for an O_SYNC fd: */
+       if (mode & FMODE_EXCL)
+               flags |= O_EXCL;
+#endif
+
+       fd = open(path, flags);
+       if (fd < 0)
+               return ERR_PTR(-errno);
+
+       sync_fd = open(path, flags|O_SYNC);
+       if (sync_fd < 0) {
+               assert(0);
+               close(fd);
+               return ERR_PTR(-errno);
+       }
+
+       bdev = malloc(sizeof(*bdev));
+       memset(bdev, 0, sizeof(*bdev));
+
+       strncpy(bdev->name, path, sizeof(bdev->name));
+       bdev->name[sizeof(bdev->name) - 1] = '\0';
+
+       bdev->bd_fd             = fd;
+       bdev->bd_sync_fd        = sync_fd;
+       bdev->bd_holder         = holder;
+       bdev->bd_disk           = &bdev->__bd_disk;
+       bdev->bd_bdi            = &bdev->__bd_bdi;
+       bdev->queue.backing_dev_info = bdev->bd_bdi;
+
+       return bdev;
+}
+
+void bdput(struct block_device *bdev)
+{
+       BUG();
+}
+
+struct block_device *lookup_bdev(const char *path)
+{
+       return ERR_PTR(-EINVAL);
+}
+
+static int aio_completion_thread(void *arg)
+{
+       struct io_event events[8], *ev;
+       int ret;
+       bool stop = false;
+
+       while (!stop) {
+               ret = io_getevents(aio_ctx, 1, ARRAY_SIZE(events),
+                                  events, NULL);
+
+               if (ret < 0 && ret == -EINTR)
+                       continue;
+               if (ret < 0)
+                       die("io_getevents() error: %s", strerror(-ret));
+
+               for (ev = events; ev < events + ret; ev++) {
+                       struct bio *bio = (struct bio *) ev->data;
+
+                       /* This should only happen during blkdev_cleanup() */
+                       if (!bio) {
+                               BUG_ON(atomic_read(&running_requests) != 0);
+                               stop = true;
+                               continue;
+                       }
+
+                       if (ev->res != bio->bi_iter.bi_size)
+                               bio->bi_status = BLK_STS_IOERR;
+
+                       bio_endio(bio);
+                       atomic_dec(&running_requests);
+               }
+       }
+
+       return 0;
+}
+
+static struct task_struct *aio_task = NULL;
+
+__attribute__((constructor(102)))
+static void blkdev_init(void)
+{
+       struct task_struct *p;
+
+       if (io_setup(256, &aio_ctx))
+               die("io_setup() error: %m");
+
+       p = kthread_run(aio_completion_thread, NULL, "aio_completion");
+       BUG_ON(IS_ERR(p));
+
+       aio_task = p;
+}
+
+__attribute__((destructor(102)))
+static void blkdev_cleanup(void)
+{
+       struct task_struct *p = NULL;
+       swap(aio_task, p);
+       get_task_struct(p);
+
+       /* I mean, really?! IO_CMD_NOOP is even defined, but not implemented. */
+       int fds[2];
+       int ret = pipe(fds);
+       if (ret != 0)
+               die("pipe err: %s", strerror(ret));
+
+       /* Wake up the completion thread with spurious work. */
+       int junk = 0;
+       struct iocb iocb = {
+               .aio_lio_opcode = IO_CMD_PWRITE,
+               .data = NULL, /* Signal to stop */
+               .aio_fildes = fds[1],
+               .u.c.buf = &junk,
+               .u.c.nbytes = 1,
+       }, *iocbp = &iocb;
+       ret = io_submit(aio_ctx, 1, &iocbp);
+       if (ret != 1)
+               die("io_submit cleanup err: %s", strerror(-ret));
+
+       ret = kthread_stop(p);
+       BUG_ON(ret);
+
+       put_task_struct(p);
+
+       close(fds[0]);
+       close(fds[1]);
+}
diff --git a/linux/closure.c b/linux/closure.c
new file mode 100644 (file)
index 0000000..26a2935
--- /dev/null
@@ -0,0 +1,211 @@
+/*
+ * Asynchronous refcounty things
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include <linux/closure.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/seq_file.h>
+
+static inline void closure_put_after_sub(struct closure *cl, int flags)
+{
+       int r = flags & CLOSURE_REMAINING_MASK;
+
+       BUG_ON(flags & CLOSURE_GUARD_MASK);
+       BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
+
+       if (!r) {
+               if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
+                       atomic_set(&cl->remaining,
+                                  CLOSURE_REMAINING_INITIALIZER);
+                       closure_queue(cl);
+               } else {
+                       struct closure *parent = cl->parent;
+                       closure_fn *destructor = cl->fn;
+
+                       closure_debug_destroy(cl);
+
+                       if (destructor)
+                               destructor(cl);
+
+                       if (parent)
+                               closure_put(parent);
+               }
+       }
+}
+
+/* For clearing flags with the same atomic op as a put */
+void closure_sub(struct closure *cl, int v)
+{
+       closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
+}
+EXPORT_SYMBOL(closure_sub);
+
+/**
+ * closure_put - decrement a closure's refcount
+ */
+void closure_put(struct closure *cl)
+{
+       closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
+}
+EXPORT_SYMBOL(closure_put);
+
+/**
+ * closure_wake_up - wake up all closures on a wait list, without memory barrier
+ */
+void __closure_wake_up(struct closure_waitlist *wait_list)
+{
+       struct llist_node *list, *next;
+       struct closure *cl;
+
+       /*
+        * Grab entire list, reverse order to preserve FIFO ordering, and wake
+        * everything up
+        */
+       for (list = llist_reverse_order(llist_del_all(&wait_list->list));
+            list;
+            list = next) {
+               next = llist_next(list);
+               cl = container_of(list, struct closure, list);
+
+               closure_set_waiting(cl, 0);
+               closure_sub(cl, CLOSURE_WAITING + 1);
+       }
+}
+EXPORT_SYMBOL(__closure_wake_up);
+
+/**
+ * closure_wait - add a closure to a waitlist
+ *
+ * @waitlist will own a ref on @cl, which will be released when
+ * closure_wake_up() is called on @waitlist.
+ *
+ */
+bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
+{
+       if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
+               return false;
+
+       closure_set_waiting(cl, _RET_IP_);
+       atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
+       llist_add(&cl->list, &waitlist->list);
+
+       return true;
+}
+EXPORT_SYMBOL(closure_wait);
+
+struct closure_syncer {
+       struct task_struct      *task;
+       int                     done;
+};
+
+static void closure_sync_fn(struct closure *cl)
+{
+       cl->s->done = 1;
+       wake_up_process(cl->s->task);
+}
+
+void __sched __closure_sync(struct closure *cl)
+{
+       struct closure_syncer s = { .task = current };
+
+       cl->s = &s;
+       continue_at_noreturn(cl, closure_sync_fn, NULL);
+
+       while (1) {
+               __set_current_state(TASK_UNINTERRUPTIBLE);
+               smp_mb();
+               if (s.done)
+                       break;
+               schedule();
+       }
+
+       __set_current_state(TASK_RUNNING);
+}
+EXPORT_SYMBOL(__closure_sync);
+
+#ifdef CONFIG_DEBUG_CLOSURES
+
+static LIST_HEAD(closure_list);
+static DEFINE_SPINLOCK(closure_list_lock);
+
+void closure_debug_create(struct closure *cl)
+{
+       unsigned long flags;
+
+       BUG_ON(cl->magic == CLOSURE_MAGIC_ALIVE);
+       cl->magic = CLOSURE_MAGIC_ALIVE;
+
+       spin_lock_irqsave(&closure_list_lock, flags);
+       list_add(&cl->all, &closure_list);
+       spin_unlock_irqrestore(&closure_list_lock, flags);
+}
+EXPORT_SYMBOL(closure_debug_create);
+
+void closure_debug_destroy(struct closure *cl)
+{
+       unsigned long flags;
+
+       BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
+       cl->magic = CLOSURE_MAGIC_DEAD;
+
+       spin_lock_irqsave(&closure_list_lock, flags);
+       list_del(&cl->all);
+       spin_unlock_irqrestore(&closure_list_lock, flags);
+}
+EXPORT_SYMBOL(closure_debug_destroy);
+
+static struct dentry *debug;
+
+static int debug_seq_show(struct seq_file *f, void *data)
+{
+       struct closure *cl;
+
+       spin_lock_irq(&closure_list_lock);
+
+       list_for_each_entry(cl, &closure_list, all) {
+               int r = atomic_read(&cl->remaining);
+
+               seq_printf(f, "%p: %pF -> %pf p %p r %i ",
+                          cl, (void *) cl->ip, cl->fn, cl->parent,
+                          r & CLOSURE_REMAINING_MASK);
+
+               seq_printf(f, "%s%s\n",
+                          test_bit(WORK_STRUCT_PENDING_BIT,
+                                   work_data_bits(&cl->work)) ? "Q" : "",
+                          r & CLOSURE_RUNNING  ? "R" : "");
+
+               if (r & CLOSURE_WAITING)
+                       seq_printf(f, " W %pF\n",
+                                  (void *) cl->waiting_on);
+
+               seq_puts(f, "\n");
+       }
+
+       spin_unlock_irq(&closure_list_lock);
+       return 0;
+}
+
+static int debug_seq_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, debug_seq_show, NULL);
+}
+
+static const struct file_operations debug_ops = {
+       .owner          = THIS_MODULE,
+       .open           = debug_seq_open,
+       .read           = seq_read,
+       .release        = single_release
+};
+
+static int __init closure_debug_init(void)
+{
+       debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops);
+       return 0;
+}
+late_initcall(closure_debug_init)
+
+#endif
diff --git a/linux/crc64.c b/linux/crc64.c
new file mode 100644 (file)
index 0000000..0ef8ae6
--- /dev/null
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Normal 64-bit CRC calculation.
+ *
+ * This is a basic crc64 implementation following ECMA-182 specification,
+ * which can be found from,
+ * http://www.ecma-international.org/publications/standards/Ecma-182.htm
+ *
+ * Dr. Ross N. Williams has a great document to introduce the idea of CRC
+ * algorithm, here the CRC64 code is also inspired by the table-driven
+ * algorithm and detail example from this paper. This paper can be found
+ * from,
+ * http://www.ross.net/crc/download/crc_v3.txt
+ *
+ * crc64table[256] is the lookup table of a table-driven 64-bit CRC
+ * calculation, which is generated by gen_crc64table.c in kernel build
+ * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
+ * as well, which is defined as,
+ *
+ * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x + 1
+ *
+ * Copyright 2018 SUSE Linux.
+ *   Author: Coly Li <colyli@suse.de>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include "crc64table.h"
+
+MODULE_DESCRIPTION("CRC64 calculations");
+MODULE_LICENSE("GPL v2");
+
+/**
+ * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64
+ * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation,
+       or the previous crc64 value if computing incrementally.
+ * @p: pointer to buffer over which CRC64 is run
+ * @len: length of buffer @p
+ */
+u64 __pure crc64_be(u64 crc, const void *p, size_t len)
+{
+       size_t i, t;
+
+       const unsigned char *_p = p;
+
+       for (i = 0; i < len; i++) {
+               t = ((crc >> 56) ^ (*_p++)) & 0xFF;
+               crc = crc64table[t] ^ (crc << 8);
+       }
+
+       return crc;
+}
+EXPORT_SYMBOL_GPL(crc64_be);
diff --git a/linux/crc64table.h b/linux/crc64table.h
new file mode 100644 (file)
index 0000000..9964164
--- /dev/null
@@ -0,0 +1,135 @@
+/* this file is generated - do not edit */
+
+#include <linux/types.h>
+#include <linux/cache.h>
+
+static const u64 ____cacheline_aligned crc64table[256] = {
+       0x0000000000000000ULL,  0x42f0e1eba9ea3693ULL,
+       0x85e1c3d753d46d26ULL,  0xc711223cfa3e5bb5ULL,
+       0x493366450e42ecdfULL,  0x0bc387aea7a8da4cULL,
+       0xccd2a5925d9681f9ULL,  0x8e224479f47cb76aULL,
+       0x9266cc8a1c85d9beULL,  0xd0962d61b56fef2dULL,
+       0x17870f5d4f51b498ULL,  0x5577eeb6e6bb820bULL,
+       0xdb55aacf12c73561ULL,  0x99a54b24bb2d03f2ULL,
+       0x5eb4691841135847ULL,  0x1c4488f3e8f96ed4ULL,
+       0x663d78ff90e185efULL,  0x24cd9914390bb37cULL,
+       0xe3dcbb28c335e8c9ULL,  0xa12c5ac36adfde5aULL,
+       0x2f0e1eba9ea36930ULL,  0x6dfeff5137495fa3ULL,
+       0xaaefdd6dcd770416ULL,  0xe81f3c86649d3285ULL,
+       0xf45bb4758c645c51ULL,  0xb6ab559e258e6ac2ULL,
+       0x71ba77a2dfb03177ULL,  0x334a9649765a07e4ULL,
+       0xbd68d2308226b08eULL,  0xff9833db2bcc861dULL,
+       0x388911e7d1f2dda8ULL,  0x7a79f00c7818eb3bULL,
+       0xcc7af1ff21c30bdeULL,  0x8e8a101488293d4dULL,
+       0x499b3228721766f8ULL,  0x0b6bd3c3dbfd506bULL,
+       0x854997ba2f81e701ULL,  0xc7b97651866bd192ULL,
+       0x00a8546d7c558a27ULL,  0x4258b586d5bfbcb4ULL,
+       0x5e1c3d753d46d260ULL,  0x1cecdc9e94ace4f3ULL,
+       0xdbfdfea26e92bf46ULL,  0x990d1f49c77889d5ULL,
+       0x172f5b3033043ebfULL,  0x55dfbadb9aee082cULL,
+       0x92ce98e760d05399ULL,  0xd03e790cc93a650aULL,
+       0xaa478900b1228e31ULL,  0xe8b768eb18c8b8a2ULL,
+       0x2fa64ad7e2f6e317ULL,  0x6d56ab3c4b1cd584ULL,
+       0xe374ef45bf6062eeULL,  0xa1840eae168a547dULL,
+       0x66952c92ecb40fc8ULL,  0x2465cd79455e395bULL,
+       0x3821458aada7578fULL,  0x7ad1a461044d611cULL,
+       0xbdc0865dfe733aa9ULL,  0xff3067b657990c3aULL,
+       0x711223cfa3e5bb50ULL,  0x33e2c2240a0f8dc3ULL,
+       0xf4f3e018f031d676ULL,  0xb60301f359dbe0e5ULL,
+       0xda050215ea6c212fULL,  0x98f5e3fe438617bcULL,
+       0x5fe4c1c2b9b84c09ULL,  0x1d14202910527a9aULL,
+       0x93366450e42ecdf0ULL,  0xd1c685bb4dc4fb63ULL,
+       0x16d7a787b7faa0d6ULL,  0x5427466c1e109645ULL,
+       0x4863ce9ff6e9f891ULL,  0x0a932f745f03ce02ULL,
+       0xcd820d48a53d95b7ULL,  0x8f72eca30cd7a324ULL,
+       0x0150a8daf8ab144eULL,  0x43a04931514122ddULL,
+       0x84b16b0dab7f7968ULL,  0xc6418ae602954ffbULL,
+       0xbc387aea7a8da4c0ULL,  0xfec89b01d3679253ULL,
+       0x39d9b93d2959c9e6ULL,  0x7b2958d680b3ff75ULL,
+       0xf50b1caf74cf481fULL,  0xb7fbfd44dd257e8cULL,
+       0x70eadf78271b2539ULL,  0x321a3e938ef113aaULL,
+       0x2e5eb66066087d7eULL,  0x6cae578bcfe24bedULL,
+       0xabbf75b735dc1058ULL,  0xe94f945c9c3626cbULL,
+       0x676dd025684a91a1ULL,  0x259d31cec1a0a732ULL,
+       0xe28c13f23b9efc87ULL,  0xa07cf2199274ca14ULL,
+       0x167ff3eacbaf2af1ULL,  0x548f120162451c62ULL,
+       0x939e303d987b47d7ULL,  0xd16ed1d631917144ULL,
+       0x5f4c95afc5edc62eULL,  0x1dbc74446c07f0bdULL,
+       0xdaad56789639ab08ULL,  0x985db7933fd39d9bULL,
+       0x84193f60d72af34fULL,  0xc6e9de8b7ec0c5dcULL,
+       0x01f8fcb784fe9e69ULL,  0x43081d5c2d14a8faULL,
+       0xcd2a5925d9681f90ULL,  0x8fdab8ce70822903ULL,
+       0x48cb9af28abc72b6ULL,  0x0a3b7b1923564425ULL,
+       0x70428b155b4eaf1eULL,  0x32b26afef2a4998dULL,
+       0xf5a348c2089ac238ULL,  0xb753a929a170f4abULL,
+       0x3971ed50550c43c1ULL,  0x7b810cbbfce67552ULL,
+       0xbc902e8706d82ee7ULL,  0xfe60cf6caf321874ULL,
+       0xe224479f47cb76a0ULL,  0xa0d4a674ee214033ULL,
+       0x67c58448141f1b86ULL,  0x253565a3bdf52d15ULL,
+       0xab1721da49899a7fULL,  0xe9e7c031e063acecULL,
+       0x2ef6e20d1a5df759ULL,  0x6c0603e6b3b7c1caULL,
+       0xf6fae5c07d3274cdULL,  0xb40a042bd4d8425eULL,
+       0x731b26172ee619ebULL,  0x31ebc7fc870c2f78ULL,
+       0xbfc9838573709812ULL,  0xfd39626eda9aae81ULL,
+       0x3a28405220a4f534ULL,  0x78d8a1b9894ec3a7ULL,
+       0x649c294a61b7ad73ULL,  0x266cc8a1c85d9be0ULL,
+       0xe17dea9d3263c055ULL,  0xa38d0b769b89f6c6ULL,
+       0x2daf4f0f6ff541acULL,  0x6f5faee4c61f773fULL,
+       0xa84e8cd83c212c8aULL,  0xeabe6d3395cb1a19ULL,
+       0x90c79d3fedd3f122ULL,  0xd2377cd44439c7b1ULL,
+       0x15265ee8be079c04ULL,  0x57d6bf0317edaa97ULL,
+       0xd9f4fb7ae3911dfdULL,  0x9b041a914a7b2b6eULL,
+       0x5c1538adb04570dbULL,  0x1ee5d94619af4648ULL,
+       0x02a151b5f156289cULL,  0x4051b05e58bc1e0fULL,
+       0x87409262a28245baULL,  0xc5b073890b687329ULL,
+       0x4b9237f0ff14c443ULL,  0x0962d61b56fef2d0ULL,
+       0xce73f427acc0a965ULL,  0x8c8315cc052a9ff6ULL,
+       0x3a80143f5cf17f13ULL,  0x7870f5d4f51b4980ULL,
+       0xbf61d7e80f251235ULL,  0xfd913603a6cf24a6ULL,
+       0x73b3727a52b393ccULL,  0x31439391fb59a55fULL,
+       0xf652b1ad0167feeaULL,  0xb4a25046a88dc879ULL,
+       0xa8e6d8b54074a6adULL,  0xea16395ee99e903eULL,
+       0x2d071b6213a0cb8bULL,  0x6ff7fa89ba4afd18ULL,
+       0xe1d5bef04e364a72ULL,  0xa3255f1be7dc7ce1ULL,
+       0x64347d271de22754ULL,  0x26c49cccb40811c7ULL,
+       0x5cbd6cc0cc10fafcULL,  0x1e4d8d2b65facc6fULL,
+       0xd95caf179fc497daULL,  0x9bac4efc362ea149ULL,
+       0x158e0a85c2521623ULL,  0x577eeb6e6bb820b0ULL,
+       0x906fc95291867b05ULL,  0xd29f28b9386c4d96ULL,
+       0xcedba04ad0952342ULL,  0x8c2b41a1797f15d1ULL,
+       0x4b3a639d83414e64ULL,  0x09ca82762aab78f7ULL,
+       0x87e8c60fded7cf9dULL,  0xc51827e4773df90eULL,
+       0x020905d88d03a2bbULL,  0x40f9e43324e99428ULL,
+       0x2cffe7d5975e55e2ULL,  0x6e0f063e3eb46371ULL,
+       0xa91e2402c48a38c4ULL,  0xebeec5e96d600e57ULL,
+       0x65cc8190991cb93dULL,  0x273c607b30f68faeULL,
+       0xe02d4247cac8d41bULL,  0xa2dda3ac6322e288ULL,
+       0xbe992b5f8bdb8c5cULL,  0xfc69cab42231bacfULL,
+       0x3b78e888d80fe17aULL,  0x7988096371e5d7e9ULL,
+       0xf7aa4d1a85996083ULL,  0xb55aacf12c735610ULL,
+       0x724b8ecdd64d0da5ULL,  0x30bb6f267fa73b36ULL,
+       0x4ac29f2a07bfd00dULL,  0x08327ec1ae55e69eULL,
+       0xcf235cfd546bbd2bULL,  0x8dd3bd16fd818bb8ULL,
+       0x03f1f96f09fd3cd2ULL,  0x41011884a0170a41ULL,
+       0x86103ab85a2951f4ULL,  0xc4e0db53f3c36767ULL,
+       0xd8a453a01b3a09b3ULL,  0x9a54b24bb2d03f20ULL,
+       0x5d45907748ee6495ULL,  0x1fb5719ce1045206ULL,
+       0x919735e51578e56cULL,  0xd367d40ebc92d3ffULL,
+       0x1476f63246ac884aULL,  0x568617d9ef46bed9ULL,
+       0xe085162ab69d5e3cULL,  0xa275f7c11f7768afULL,
+       0x6564d5fde549331aULL,  0x279434164ca30589ULL,
+       0xa9b6706fb8dfb2e3ULL,  0xeb46918411358470ULL,
+       0x2c57b3b8eb0bdfc5ULL,  0x6ea7525342e1e956ULL,
+       0x72e3daa0aa188782ULL,  0x30133b4b03f2b111ULL,
+       0xf7021977f9cceaa4ULL,  0xb5f2f89c5026dc37ULL,
+       0x3bd0bce5a45a6b5dULL,  0x79205d0e0db05dceULL,
+       0xbe317f32f78e067bULL,  0xfcc19ed95e6430e8ULL,
+       0x86b86ed5267cdbd3ULL,  0xc4488f3e8f96ed40ULL,
+       0x0359ad0275a8b6f5ULL,  0x41a94ce9dc428066ULL,
+       0xcf8b0890283e370cULL,  0x8d7be97b81d4019fULL,
+       0x4a6acb477bea5a2aULL,  0x089a2aacd2006cb9ULL,
+       0x14dea25f3af9026dULL,  0x562e43b4931334feULL,
+       0x913f6188692d6f4bULL,  0xd3cf8063c0c759d8ULL,
+       0x5dedc41a34bbeeb2ULL,  0x1f1d25f19d51d821ULL,
+       0xd80c07cd676f8394ULL,  0x9afce626ce85b507ULL,
+};
diff --git a/linux/crypto/api.c b/linux/crypto/api.c
new file mode 100644 (file)
index 0000000..2f3ab2b
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * Cryptographic API for algorithms (i.e., low-level API).
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <crypto/algapi.h>
+
+static LIST_HEAD(crypto_alg_list);
+static DECLARE_RWSEM(crypto_alg_sem);
+
+struct crypto_type {
+};
+
+int crypto_register_alg(struct crypto_alg *alg)
+{
+       down_write(&crypto_alg_sem);
+       list_add(&alg->cra_list, &crypto_alg_list);
+       up_write(&crypto_alg_sem);
+
+       return 0;
+}
+
+static void *crypto_alloc_tfm(const char *name,
+                             const struct crypto_type *type)
+{
+       struct crypto_alg *alg;
+
+       down_read(&crypto_alg_sem);
+       list_for_each_entry(alg, &crypto_alg_list, cra_list)
+               if (alg->cra_type == type && !strcmp(alg->cra_name, name))
+                       goto found;
+
+       alg = ERR_PTR(-ENOENT);
+found:
+       up_read(&crypto_alg_sem);
+
+       if (IS_ERR(alg))
+               return ERR_CAST(alg);
+
+       return alg->alloc_tfm() ?: ERR_PTR(-ENOMEM);
+}
+
+/* skcipher: */
+
+static const struct crypto_type crypto_skcipher_type2 = {
+};
+
+struct crypto_skcipher *crypto_alloc_skcipher(const char *name,
+                                             u32 type, u32 mask)
+{
+       return crypto_alloc_tfm(name, &crypto_skcipher_type2);
+}
+
+int crypto_register_skcipher(struct skcipher_alg *alg)
+{
+       alg->base.cra_type = &crypto_skcipher_type2;
+
+       return crypto_register_alg(&alg->base);
+}
+
+/* shash: */
+
+#include <crypto/hash.h>
+
+static int shash_finup(struct shash_desc *desc, const u8 *data,
+                      unsigned len, u8 *out)
+{
+       return crypto_shash_update(desc, data, len) ?:
+              crypto_shash_final(desc, out);
+}
+
+static int shash_digest(struct shash_desc *desc, const u8 *data,
+                                 unsigned len, u8 *out)
+{
+       return crypto_shash_init(desc) ?:
+              crypto_shash_finup(desc, data, len, out);
+}
+
+static const struct crypto_type crypto_shash_type = {
+};
+
+struct crypto_shash *crypto_alloc_shash(const char *name,
+                                       u32 type, u32 mask)
+{
+       return crypto_alloc_tfm(name, &crypto_shash_type);
+}
+
+int crypto_register_shash(struct shash_alg *alg)
+{
+       alg->base.cra_type = &crypto_shash_type;
+
+       if (!alg->finup)
+               alg->finup = shash_finup;
+       if (!alg->digest)
+               alg->digest = shash_digest;
+
+       return crypto_register_alg(&alg->base);
+}
diff --git a/linux/crypto/chacha20_generic.c b/linux/crypto/chacha20_generic.c
new file mode 100644 (file)
index 0000000..914189e
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/byteorder.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/scatterlist.h>
+#include <asm/unaligned.h>
+
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/chacha.h>
+#include <crypto/skcipher.h>
+
+#include <sodium/crypto_stream_chacha20.h>
+
+static struct skcipher_alg alg;
+
+struct chacha20_tfm {
+       struct crypto_skcipher  tfm;
+       u32                     key[8];
+};
+
+static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+                                 unsigned int keysize)
+{
+       struct chacha20_tfm *ctx =
+               container_of(tfm, struct chacha20_tfm, tfm);
+       int i;
+
+       if (keysize != CHACHA_KEY_SIZE)
+               return -EINVAL;
+
+       for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
+               ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+       return 0;
+}
+
+static int crypto_chacha20_crypt(struct skcipher_request *req)
+{
+       struct chacha20_tfm *ctx =
+               container_of(req->tfm, struct chacha20_tfm, tfm.base);
+       struct scatterlist *sg = req->src;
+       unsigned nbytes = req->cryptlen;
+       u32 iv[4];
+       int ret;
+
+       BUG_ON(req->src != req->dst);
+
+       memcpy(iv, req->iv, sizeof(iv));
+
+       while (1) {
+               ret = crypto_stream_chacha20_xor_ic(sg_virt(sg),
+                                                   sg_virt(sg),
+                                                   sg->length,
+                                                   (void *) &iv[2],
+                                                   iv[0] | ((u64) iv[1] << 32),
+                                                   (void *) ctx->key);
+               BUG_ON(ret);
+
+               nbytes -= sg->length;
+
+               if (sg_is_last(sg))
+                       break;
+
+               BUG_ON(sg->length % CHACHA_BLOCK_SIZE);
+               iv[0] += sg->length / CHACHA_BLOCK_SIZE;
+               sg = sg_next(sg);
+       };
+
+       BUG_ON(nbytes);
+
+       return 0;
+}
+
+static void *crypto_chacha20_alloc_tfm(void)
+{
+       struct chacha20_tfm *tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+
+       if (!tfm)
+               return NULL;
+
+       tfm->tfm.base.alg       = &alg.base;
+       tfm->tfm.setkey         = crypto_chacha20_setkey;
+       tfm->tfm.encrypt        = crypto_chacha20_crypt;
+       tfm->tfm.decrypt        = crypto_chacha20_crypt;
+       tfm->tfm.ivsize         = CHACHA_IV_SIZE;
+       tfm->tfm.keysize        = CHACHA_KEY_SIZE;
+
+       return tfm;
+}
+
+static struct skcipher_alg alg = {
+       .base.cra_name          = "chacha20",
+       .base.alloc_tfm         = crypto_chacha20_alloc_tfm,
+};
+
+__attribute__((constructor(110)))
+static int chacha20_generic_mod_init(void)
+{
+       return crypto_register_skcipher(&alg);
+}
diff --git a/linux/crypto/poly1305_generic.c b/linux/crypto/poly1305_generic.c
new file mode 100644 (file)
index 0000000..acb554c
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * Poly1305 authenticator algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/byteorder.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <asm/unaligned.h>
+
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/poly1305.h>
+
+static struct shash_alg poly1305_alg;
+
+struct poly1305_desc_ctx {
+       bool                                    key_done;
+       crypto_onetimeauth_poly1305_state       s;
+};
+
+static int poly1305_init(struct shash_desc *desc)
+{
+       struct poly1305_desc_ctx *state = (void *) desc->ctx;
+
+       state->key_done = false;
+       return 0;
+}
+
+static int poly1305_update(struct shash_desc *desc,
+                          const u8 *src, unsigned len)
+{
+       struct poly1305_desc_ctx *state = (void *) desc->ctx;
+
+       if (!state->key_done) {
+               BUG_ON(len != crypto_onetimeauth_poly1305_KEYBYTES);
+
+               state->key_done = true;
+               return crypto_onetimeauth_poly1305_init(&state->s, src);
+       }
+
+       return crypto_onetimeauth_poly1305_update(&state->s, src, len);
+}
+
+static int poly1305_final(struct shash_desc *desc, u8 *out)
+{
+       struct poly1305_desc_ctx *state = (void *) desc->ctx;
+
+       return crypto_onetimeauth_poly1305_final(&state->s, out);
+}
+
+static void *poly1305_alloc_tfm(void)
+{
+       struct crypto_shash *tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+
+       if (!tfm)
+               return NULL;
+
+       tfm->base.alg = &poly1305_alg.base;
+       tfm->descsize = sizeof(struct poly1305_desc_ctx);
+       return tfm;
+}
+
+static struct shash_alg poly1305_alg = {
+       .digestsize     = crypto_onetimeauth_poly1305_BYTES,
+       .init           = poly1305_init,
+       .update         = poly1305_update,
+       .final          = poly1305_final,
+       .descsize       = sizeof(struct poly1305_desc_ctx),
+
+       .base.cra_name  = "poly1305",
+       .base.alloc_tfm = poly1305_alloc_tfm,
+};
+
+__attribute__((constructor(110)))
+static int poly1305_mod_init(void)
+{
+       return crypto_register_shash(&poly1305_alg);
+}
diff --git a/linux/crypto/sha256_generic.c b/linux/crypto/sha256_generic.c
new file mode 100644 (file)
index 0000000..9326bfe
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * Cryptographic API.
+ *
+ * SHA-256, as specified in
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ *
+ * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/byteorder.h>
+#include <linux/types.h>
+#include <asm/unaligned.h>
+
+#include <linux/crypto.h>
+#include <crypto/hash.h>
+
+#include <sodium/crypto_hash_sha256.h>
+
+static struct shash_alg sha256_alg;
+
+static int sha256_init(struct shash_desc *desc)
+{
+       crypto_hash_sha256_state *state = (void *) desc->ctx;
+
+       return crypto_hash_sha256_init(state);
+}
+
+static int sha256_update(struct shash_desc *desc, const u8 *data,
+                         unsigned int len)
+{
+       crypto_hash_sha256_state *state = (void *) desc->ctx;
+
+       return crypto_hash_sha256_update(state, data, len);
+}
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+       crypto_hash_sha256_state *state = (void *) desc->ctx;
+
+       return crypto_hash_sha256_final(state, out);
+}
+
+static void *sha256_alloc_tfm(void)
+{
+       struct crypto_shash *tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+
+       if (!tfm)
+               return NULL;
+
+       tfm->base.alg = &sha256_alg.base;
+       tfm->descsize = sizeof(crypto_hash_sha256_state);
+       return tfm;
+}
+
+static struct shash_alg sha256_alg = {
+       .digestsize     = crypto_hash_sha256_BYTES,
+       .init           = sha256_init,
+       .update         = sha256_update,
+       .final          = sha256_final,
+       .descsize       = sizeof(crypto_hash_sha256_state),
+       .base.cra_name  = "sha256",
+       .base.alloc_tfm = sha256_alloc_tfm,
+};
+
+__attribute__((constructor(110)))
+static int __init sha256_generic_mod_init(void)
+{
+       return crypto_register_shash(&sha256_alg);
+}
diff --git a/linux/fs.c b/linux/fs.c
new file mode 100644 (file)
index 0000000..0002846
--- /dev/null
@@ -0,0 +1,14 @@
+#include <linux/fs.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+const struct xattr_handler posix_acl_access_xattr_handler = {
+       .name = XATTR_NAME_POSIX_ACL_ACCESS,
+       .flags = ACL_TYPE_ACCESS,
+};
+
+const struct xattr_handler posix_acl_default_xattr_handler = {
+       .name = XATTR_NAME_POSIX_ACL_DEFAULT,
+       .flags = ACL_TYPE_DEFAULT,
+};
diff --git a/linux/generic-radix-tree.c b/linux/generic-radix-tree.c
new file mode 100644 (file)
index 0000000..4f43d0b
--- /dev/null
@@ -0,0 +1,218 @@
+
+#include <linux/atomic.h>
+#include <linux/export.h>
+#include <linux/generic-radix-tree.h>
+#include <linux/gfp.h>
+
+#define GENRADIX_ARY           (PAGE_SIZE / sizeof(struct genradix_node *))
+#define GENRADIX_ARY_SHIFT     ilog2(GENRADIX_ARY)
+
+struct genradix_node {
+       union {
+               /* Interior node: */
+               struct genradix_node    *children[GENRADIX_ARY];
+
+               /* Leaf: */
+               u8                      data[PAGE_SIZE];
+       };
+};
+
+static inline int genradix_depth_shift(unsigned depth)
+{
+       return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth;
+}
+
+/*
+ * Returns size (of data, in bytes) that a tree of a given depth holds:
+ */
+static inline size_t genradix_depth_size(unsigned depth)
+{
+       return 1UL << genradix_depth_shift(depth);
+}
+
+/* depth that's needed for a genradix that can address up to ULONG_MAX: */
+#define GENRADIX_MAX_DEPTH     \
+       DIV_ROUND_UP(BITS_PER_LONG - PAGE_SHIFT, GENRADIX_ARY_SHIFT)
+
+#define GENRADIX_DEPTH_MASK                            \
+       ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
+
+unsigned genradix_root_to_depth(struct genradix_root *r)
+{
+       return (unsigned long) r & GENRADIX_DEPTH_MASK;
+}
+
+struct genradix_node *genradix_root_to_node(struct genradix_root *r)
+{
+       return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
+}
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, or NULL if not
+ * allocated
+ */
+void *__genradix_ptr(struct __genradix *radix, size_t offset)
+{
+       struct genradix_root *r = READ_ONCE(radix->root);
+       struct genradix_node *n = genradix_root_to_node(r);
+       unsigned level          = genradix_root_to_depth(r);
+
+       if (ilog2(offset) >= genradix_depth_shift(level))
+               return NULL;
+
+       while (1) {
+               if (!n)
+                       return NULL;
+               if (!level)
+                       break;
+
+               level--;
+
+               n = n->children[offset >> genradix_depth_shift(level)];
+               offset &= genradix_depth_size(level) - 1;
+       }
+
+       return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr);
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, allocating it if
+ * necessary - newly allocated slots are always zeroed out:
+ */
+void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
+                          gfp_t gfp_mask)
+{
+       struct genradix_root *v = READ_ONCE(radix->root);
+       struct genradix_node *n, *new_node = NULL;
+       unsigned level;
+
+       /* Increase tree depth if necessary: */
+       while (1) {
+               struct genradix_root *r = v, *new_root;
+
+               n       = genradix_root_to_node(r);
+               level   = genradix_root_to_depth(r);
+
+               if (n && ilog2(offset) < genradix_depth_shift(level))
+                       break;
+
+               if (!new_node) {
+                       new_node = (void *)
+                               __get_free_page(gfp_mask|__GFP_ZERO);
+                       if (!new_node)
+                               return NULL;
+               }
+
+               new_node->children[0] = n;
+               new_root = ((struct genradix_root *)
+                           ((unsigned long) new_node | (n ? level + 1 : 0)));
+
+               if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) {
+                       v = new_root;
+                       new_node = NULL;
+               }
+       }
+
+       while (level--) {
+               struct genradix_node **p =
+                       &n->children[offset >> genradix_depth_shift(level)];
+               offset &= genradix_depth_size(level) - 1;
+
+               n = READ_ONCE(*p);
+               if (!n) {
+                       if (!new_node) {
+                               new_node = (void *)
+                                       __get_free_page(gfp_mask|__GFP_ZERO);
+                               if (!new_node)
+                                       return NULL;
+                       }
+
+                       if (!(n = cmpxchg_release(p, NULL, new_node)))
+                               swap(n, new_node);
+               }
+       }
+
+       if (new_node)
+               free_page((unsigned long) new_node);
+
+       return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr_alloc);
+
+void *__genradix_iter_peek(struct genradix_iter *iter,
+                          struct __genradix *radix,
+                          size_t objs_per_page)
+{
+       struct genradix_root *r;
+       struct genradix_node *n;
+       unsigned level, i;
+restart:
+       r = READ_ONCE(radix->root);
+       if (!r)
+               return NULL;
+
+       n       = genradix_root_to_node(r);
+       level   = genradix_root_to_depth(r);
+
+       if (ilog2(iter->offset) >= genradix_depth_shift(level))
+               return NULL;
+
+       while (level) {
+               level--;
+
+               i = (iter->offset >> genradix_depth_shift(level)) &
+                       (GENRADIX_ARY - 1);
+
+               while (!n->children[i]) {
+                       i++;
+                       iter->offset = round_down(iter->offset +
+                                          genradix_depth_size(level),
+                                          genradix_depth_size(level));
+                       iter->pos = (iter->offset >> PAGE_SHIFT) *
+                               objs_per_page;
+                       if (i == GENRADIX_ARY)
+                               goto restart;
+               }
+
+               n = n->children[i];
+       }
+
+       return &n->data[iter->offset & (PAGE_SIZE - 1)];
+}
+EXPORT_SYMBOL(__genradix_iter_peek);
+
+static void genradix_free_recurse(struct genradix_node *n, unsigned level)
+{
+       if (level) {
+               unsigned i;
+
+               for (i = 0; i < GENRADIX_ARY; i++)
+                       if (n->children[i])
+                               genradix_free_recurse(n->children[i], level - 1);
+       }
+
+       free_page((unsigned long) n);
+}
+
+int __genradix_prealloc(struct __genradix *radix, size_t size,
+                       gfp_t gfp_mask)
+{
+       size_t offset;
+
+       for (offset = 0; offset < size; offset += PAGE_SIZE)
+               if (!__genradix_ptr_alloc(radix, offset, gfp_mask))
+                       return -ENOMEM;
+
+       return 0;
+}
+EXPORT_SYMBOL(__genradix_prealloc);
+
+void __genradix_free(struct __genradix *radix)
+{
+       struct genradix_root *r = xchg(&radix->root, NULL);
+
+       genradix_free_recurse(genradix_root_to_node(r),
+                             genradix_root_to_depth(r));
+}
+EXPORT_SYMBOL(__genradix_free);
diff --git a/linux/kstrtox.c b/linux/kstrtox.c
new file mode 100644 (file)
index 0000000..bde5580
--- /dev/null
@@ -0,0 +1,357 @@
+/*
+ * Convert integer string representation to an integer.
+ * If an integer doesn't fit into specified type, -E is returned.
+ *
+ * Integer starts with optional sign.
+ * kstrtou*() functions do not accept sign "-".
+ *
+ * Radix 0 means autodetection: leading "0x" implies radix 16,
+ * leading "0" implies radix 8, otherwise radix is 10.
+ * Autodetection hints work after optional sign, but not before.
+ *
+ * If -E is returned, result is not touched.
+ */
+#include <errno.h>
+#include <ctype.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include "kstrtox.h"
+
+#define KSTRTOX_OVERFLOW       (1U << 31)
+
+const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
+{
+       if (*base == 0) {
+               if (s[0] == '0') {
+                       if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
+                               *base = 16;
+                       else
+                               *base = 8;
+               } else
+                       *base = 10;
+       }
+       if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
+               s += 2;
+       return s;
+}
+
+/*
+ * Convert non-negative integer string representation in explicitly given radix
+ * to an integer.
+ * Return number of characters consumed maybe or-ed with overflow bit.
+ * If overflow occurs, result integer (incorrect) is still returned.
+ *
+ * Don't you dare use this function.
+ */
+unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
+{
+       unsigned long long res;
+       unsigned int rv;
+       int overflow;
+
+       res = 0;
+       rv = 0;
+       overflow = 0;
+       while (*s) {
+               unsigned int val;
+
+               if ('0' <= *s && *s <= '9')
+                       val = *s - '0';
+               else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
+                       val = _tolower(*s) - 'a' + 10;
+               else
+                       break;
+
+               if (val >= base)
+                       break;
+               /*
+                * Check for overflow only if we are within range of
+                * it in the max base we support (16)
+                */
+               if (unlikely(res & (~0ull << 60))) {
+                       if (res > ULLONG_MAX - val / base)
+                               overflow = 1;
+               }
+               res = res * base + val;
+               rv++;
+               s++;
+       }
+       *p = res;
+       if (overflow)
+               rv |= KSTRTOX_OVERFLOW;
+       return rv;
+}
+
+static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+       unsigned long long _res;
+       unsigned int rv;
+
+       s = _parse_integer_fixup_radix(s, &base);
+       rv = _parse_integer(s, base, &_res);
+       if (rv & KSTRTOX_OVERFLOW)
+               return -ERANGE;
+       if (rv == 0)
+               return -EINVAL;
+       s += rv;
+       if (*s == '\n')
+               s++;
+       if (*s)
+               return -EINVAL;
+       *res = _res;
+       return 0;
+}
+
+/**
+ * kstrtoull - convert a string to an unsigned long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+       if (s[0] == '+')
+               s++;
+       return _kstrtoull(s, base, res);
+}
+
+/**
+ * kstrtoll - convert a string to a long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtoll(const char *s, unsigned int base, long long *res)
+{
+       unsigned long long tmp;
+       int rv;
+
+       if (s[0] == '-') {
+               rv = _kstrtoull(s + 1, base, &tmp);
+               if (rv < 0)
+                       return rv;
+               if ((long long)-tmp > 0)
+                       return -ERANGE;
+               *res = -tmp;
+       } else {
+               rv = kstrtoull(s, base, &tmp);
+               if (rv < 0)
+                       return rv;
+               if ((long long)tmp < 0)
+                       return -ERANGE;
+               *res = tmp;
+       }
+       return 0;
+}
+
+/* Internal, do not use. */
+int _kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+       unsigned long long tmp;
+       int rv;
+
+       rv = kstrtoull(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (unsigned long long)(unsigned long)tmp)
+               return -ERANGE;
+       *res = tmp;
+       return 0;
+}
+
+/* Internal, do not use. */
+int _kstrtol(const char *s, unsigned int base, long *res)
+{
+       long long tmp;
+       int rv;
+
+       rv = kstrtoll(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (long long)(long)tmp)
+               return -ERANGE;
+       *res = tmp;
+       return 0;
+}
+
+/**
+ * kstrtouint - convert a string to an unsigned int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtouint(const char *s, unsigned int base, unsigned int *res)
+{
+       unsigned long long tmp;
+       int rv;
+
+       rv = kstrtoull(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (unsigned long long)(unsigned int)tmp)
+               return -ERANGE;
+       *res = tmp;
+       return 0;
+}
+
+/**
+ * kstrtoint - convert a string to an int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtoint(const char *s, unsigned int base, int *res)
+{
+       long long tmp;
+       int rv;
+
+       rv = kstrtoll(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (long long)(int)tmp)
+               return -ERANGE;
+       *res = tmp;
+       return 0;
+}
+
+int kstrtou16(const char *s, unsigned int base, u16 *res)
+{
+       unsigned long long tmp;
+       int rv;
+
+       rv = kstrtoull(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (unsigned long long)(u16)tmp)
+               return -ERANGE;
+       *res = tmp;
+       return 0;
+}
+
+int kstrtos16(const char *s, unsigned int base, s16 *res)
+{
+       long long tmp;
+       int rv;
+
+       rv = kstrtoll(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (long long)(s16)tmp)
+               return -ERANGE;
+       *res = tmp;
+       return 0;
+}
+
+int kstrtou8(const char *s, unsigned int base, u8 *res)
+{
+       unsigned long long tmp;
+       int rv;
+
+       rv = kstrtoull(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (unsigned long long)(u8)tmp)
+               return -ERANGE;
+       *res = tmp;
+       return 0;
+}
+
+int kstrtos8(const char *s, unsigned int base, s8 *res)
+{
+       long long tmp;
+       int rv;
+
+       rv = kstrtoll(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (long long)(s8)tmp)
+               return -ERANGE;
+       *res = tmp;
+       return 0;
+}
+
+/**
+ * kstrtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0', or
+ * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL.  Value
+ * pointed to by res is updated upon finding a match.
+ */
+int kstrtobool(const char *s, bool *res)
+{
+       if (!s)
+               return -EINVAL;
+
+       switch (s[0]) {
+       case 'y':
+       case 'Y':
+       case '1':
+               *res = true;
+               return 0;
+       case 'n':
+       case 'N':
+       case '0':
+               *res = false;
+               return 0;
+       case 'o':
+       case 'O':
+               switch (s[1]) {
+               case 'n':
+               case 'N':
+                       *res = true;
+                       return 0;
+               case 'f':
+               case 'F':
+                       *res = false;
+                       return 0;
+               default:
+                       break;
+               }
+       default:
+               break;
+       }
+
+       return -EINVAL;
+}
diff --git a/linux/kstrtox.h b/linux/kstrtox.h
new file mode 100644 (file)
index 0000000..910b6de
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _LIB_KSTRTOX_H
+#define _LIB_KSTRTOX_H
+
+const char *_parse_integer_fixup_radix(const char *s, unsigned int *base);
+unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res);
+
+#endif
diff --git a/linux/kthread.c b/linux/kthread.c
new file mode 100644 (file)
index 0000000..65e824b
--- /dev/null
@@ -0,0 +1,126 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <linux/bitops.h>
+#include <linux/kthread.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "tools-util.h"
+
+enum KTHREAD_BITS {
+       KTHREAD_IS_PER_CPU = 0,
+       KTHREAD_SHOULD_STOP,
+       KTHREAD_SHOULD_PARK,
+       KTHREAD_IS_PARKED,
+};
+
+static void *kthread_start_fn(void *data)
+{
+       rcu_register_thread();
+
+       current = data;
+       schedule();
+       current->thread_fn(current->thread_data);
+
+       complete(&current->exited);
+       put_task_struct(current);
+       rcu_unregister_thread();
+       return NULL;
+}
+
+/**
+ * kthread_create_on_node - create a kthread.
+ * @threadfn: the function to run until signal_pending(current).
+ * @data: data ptr for @threadfn.
+ * @node: task and thread structures for the thread are allocated on this node
+ * @namefmt: printf-style name for the thread.
+ *
+ * Description: This helper function creates and names a kernel
+ * thread.  The thread will be stopped: use wake_up_process() to start
+ * it.  See also kthread_run().  The new thread has SCHED_NORMAL policy and
+ * is affine to all CPUs.
+ *
+ * If thread is going to be bound on a particular cpu, give its node
+ * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
+ * When woken, the thread will run @threadfn() with @data as its
+ * argument. @threadfn() can either call do_exit() directly if it is a
+ * standalone thread for which no one will call kthread_stop(), or
+ * return when 'kthread_should_stop()' is true (which means
+ * kthread_stop() has been called).  The return value should be zero
+ * or a negative error number; it will be passed to kthread_stop().
+ *
+ * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR).
+ */
+struct task_struct *kthread_create(int (*thread_fn)(void *data),
+                                  void *thread_data,
+                                  const char namefmt[], ...)
+{
+       va_list args;
+       struct task_struct *p = malloc(sizeof(*p));
+       int ret;
+
+       memset(p, 0, sizeof(*p));
+
+       va_start(args, namefmt);
+       vsnprintf(p->comm, sizeof(p->comm), namefmt, args);
+       va_end(args);
+
+       p->flags        |= PF_KTHREAD;
+       p->thread_fn    = thread_fn;
+       p->thread_data  = thread_data;
+       p->state        = TASK_UNINTERRUPTIBLE;
+       atomic_set(&p->usage, 1);
+       init_completion(&p->exited);
+
+       pthread_attr_t attr;
+       pthread_attr_init(&attr);
+       pthread_attr_setstacksize(&attr, 32 << 10);
+
+       ret = pthread_create(&p->thread, &attr, kthread_start_fn, p);
+       if (ret)
+               die("pthread_create error %s", strerror(ret));
+       pthread_setname_np(p->thread, p->comm);
+       return p;
+}
+
+/**
+ * kthread_should_stop - should this kthread return now?
+ *
+ * When someone calls kthread_stop() on your kthread, it will be woken
+ * and this will return true.  You should then return, and your return
+ * value will be passed through to kthread_stop().
+ */
+bool kthread_should_stop(void)
+{
+       return test_bit(KTHREAD_SHOULD_STOP, &current->kthread_flags);
+}
+
+/**
+ * kthread_stop - stop a thread created by kthread_create().
+ * @k: thread created by kthread_create().
+ *
+ * Sets kthread_should_stop() for @k to return true, wakes it, and
+ * waits for it to exit. This can also be called after kthread_create()
+ * instead of calling wake_up_process(): the thread will exit without
+ * calling threadfn().
+ *
+ * If threadfn() may call do_exit() itself, the caller must ensure
+ * task_struct can't go away.
+ *
+ * Returns the result of threadfn(), or %-EINTR if wake_up_process()
+ * was never called.
+ */
+int kthread_stop(struct task_struct *p)
+{
+       get_task_struct(p);
+
+       set_bit(KTHREAD_SHOULD_STOP, &p->kthread_flags);
+       wake_up_process(p);
+       wait_for_completion(&p->exited);
+
+       put_task_struct(p);
+
+       return 0;
+}
diff --git a/linux/llist.c b/linux/llist.c
new file mode 100644 (file)
index 0000000..611ce48
--- /dev/null
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Lock-less NULL terminated single linked list
+ *
+ * The basic atomic operation of this list is cmpxchg on long.  On
+ * architectures that don't have NMI-safe cmpxchg implementation, the
+ * list can NOT be used in NMI handlers.  So code that uses the list in
+ * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ *
+ * Copyright 2010,2011 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/llist.h>
+
+
+/**
+ * llist_add_batch - add several linked entries in batch
+ * @new_first: first entry in batch to be added
+ * @new_last:  last entry in batch to be added
+ * @head:      the head for your lock-less list
+ *
+ * Return whether list is empty before adding.
+ */
+bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
+                    struct llist_head *head)
+{
+       struct llist_node *first;
+
+       do {
+               new_last->next = first = READ_ONCE(head->first);
+       } while (cmpxchg(&head->first, first, new_first) != first);
+
+       return !first;
+}
+EXPORT_SYMBOL_GPL(llist_add_batch);
+
+/**
+ * llist_del_first - delete the first entry of lock-less list
+ * @head:      the head for your lock-less list
+ *
+ * If list is empty, return NULL, otherwise, return the first entry
+ * deleted, this is the newest added one.
+ *
+ * Only one llist_del_first user can be used simultaneously with
+ * multiple llist_add users without lock.  Because otherwise
+ * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add,
+ * llist_add) sequence in another user may change @head->first->next,
+ * but keep @head->first.  If multiple consumers are needed, please
+ * use llist_del_all or use lock between consumers.
+ */
+struct llist_node *llist_del_first(struct llist_head *head)
+{
+       struct llist_node *entry, *old_entry, *next;
+
+       entry = smp_load_acquire(&head->first);
+       for (;;) {
+               if (entry == NULL)
+                       return NULL;
+               old_entry = entry;
+               next = READ_ONCE(entry->next);
+               entry = cmpxchg(&head->first, old_entry, next);
+               if (entry == old_entry)
+                       break;
+       }
+
+       return entry;
+}
+EXPORT_SYMBOL_GPL(llist_del_first);
+
+/**
+ * llist_reverse_order - reverse order of a llist chain
+ * @head:      first item of the list to be reversed
+ *
+ * Reverse the order of a chain of llist entries and return the
+ * new first entry.
+ */
+struct llist_node *llist_reverse_order(struct llist_node *head)
+{
+       struct llist_node *new_head = NULL;
+
+       while (head) {
+               struct llist_node *tmp = head;
+               head = head->next;
+               tmp->next = new_head;
+               new_head = tmp;
+       }
+
+       return new_head;
+}
+EXPORT_SYMBOL_GPL(llist_reverse_order);
diff --git a/linux/preempt.c b/linux/preempt.c
new file mode 100644 (file)
index 0000000..72eceed
--- /dev/null
@@ -0,0 +1,37 @@
+#include <pthread.h>
+
+#include "linux/preempt.h"
+
+/*
+ * In userspace, pthreads are preemptible and can migrate CPUs at any time.
+ *
+ * In the kernel, preempt_disable() logic essentially guarantees that a marked
+ * critical section owns its CPU for the relevant block. This is necessary for
+ * various code paths, critically including the percpu system as it allows for
+ * non-atomic reads and writes to CPU-local data structures.
+ *
+ * The high performance userspace equivalent would be to use thread local
+ * storage to replace percpu data, but that would be complicated. It should be
+ * correct to instead guarantee mutual exclusion for the critical sections.
+ */
+
+static pthread_mutex_t preempt_lock;
+
+__attribute__((constructor))
+static void preempt_init(void) {
+       pthread_mutexattr_t attr;
+       pthread_mutexattr_init(&attr);
+       pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+       pthread_mutex_init(&preempt_lock, &attr);
+       pthread_mutexattr_destroy(&attr);
+}
+
+void preempt_disable(void)
+{
+       pthread_mutex_lock(&preempt_lock);
+}
+
+void preempt_enable(void)
+{
+       pthread_mutex_unlock(&preempt_lock);
+}
diff --git a/linux/rhashtable.c b/linux/rhashtable.c
new file mode 100644 (file)
index 0000000..351eac7
--- /dev/null
@@ -0,0 +1,524 @@
+/*
+ * Resizable, Scalable, Concurrent Hash Table
+ *
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
+ * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * Code partially derived from nft_hash
+ * Rewritten with rehash code from br_multicast plus single list
+ * pointer as suggested by Josh Triplett
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/atomic.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+#include <linux/rhashtable.h>
+#include <linux/err.h>
+
+#define HASH_DEFAULT_SIZE      64UL
+#define HASH_MIN_SIZE          4U
+#define BUCKET_LOCKS_PER_CPU   32UL
+
+static u32 head_hashfn(struct rhashtable *ht,
+                      const struct bucket_table *tbl,
+                      const struct rhash_head *he)
+{
+       return rht_head_hashfn(ht, tbl, he, ht->p);
+}
+
+static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
+                             gfp_t gfp)
+{
+       unsigned int i, size;
+       unsigned int nr_pcpus = num_possible_cpus();
+
+       nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
+       size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
+
+       /* Never allocate more than 0.5 locks per bucket */
+       size = min_t(unsigned int, size, tbl->size >> 1);
+
+       if (sizeof(spinlock_t) != 0) {
+               tbl->locks = NULL;
+               if (gfp != GFP_KERNEL)
+                       gfp |= __GFP_NOWARN | __GFP_NORETRY;
+
+               if (!tbl->locks)
+                       tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
+                                                  gfp);
+               if (!tbl->locks)
+                       return -ENOMEM;
+               for (i = 0; i < size; i++)
+                       spin_lock_init(&tbl->locks[i]);
+       }
+       tbl->locks_mask = size - 1;
+
+       return 0;
+}
+
+static void bucket_table_free(struct bucket_table *tbl)
+{
+       if (tbl)
+               kvfree(tbl->locks);
+
+       kvfree(tbl);
+}
+
+static void bucket_table_free_rcu(struct rcu_head *head)
+{
+       bucket_table_free(container_of(head, struct bucket_table, rcu));
+}
+
+static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
+                                              size_t nbuckets,
+                                              gfp_t gfp)
+{
+       struct bucket_table *tbl = NULL;
+       size_t size;
+       int i;
+
+       size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
+       if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
+           gfp != GFP_KERNEL)
+               tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
+       if (tbl == NULL && gfp == GFP_KERNEL)
+               tbl = vzalloc(size);
+       if (tbl == NULL)
+               return NULL;
+
+       tbl->size = nbuckets;
+
+       if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
+               bucket_table_free(tbl);
+               return NULL;
+       }
+
+       INIT_LIST_HEAD(&tbl->walkers);
+
+       get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+
+       for (i = 0; i < nbuckets; i++)
+               INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
+
+       return tbl;
+}
+
+static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
+                                                 struct bucket_table *tbl)
+{
+       struct bucket_table *new_tbl;
+
+       do {
+               new_tbl = tbl;
+               tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+       } while (tbl);
+
+       return new_tbl;
+}
+
+static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
+{
+       struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+       struct bucket_table *new_tbl = rhashtable_last_table(ht,
+               rht_dereference_rcu(old_tbl->future_tbl, ht));
+       struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
+       int err = -ENOENT;
+       struct rhash_head *head, *next, *entry;
+       spinlock_t *new_bucket_lock;
+       unsigned int new_hash;
+
+       rht_for_each(entry, old_tbl, old_hash) {
+               err = 0;
+               next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
+
+               if (rht_is_a_nulls(next))
+                       break;
+
+               pprev = &entry->next;
+       }
+
+       if (err)
+               goto out;
+
+       new_hash = head_hashfn(ht, new_tbl, entry);
+
+       new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
+
+       spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
+       head = rht_dereference_bucket(new_tbl->buckets[new_hash],
+                                     new_tbl, new_hash);
+
+       RCU_INIT_POINTER(entry->next, head);
+
+       rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
+       spin_unlock(new_bucket_lock);
+
+       rcu_assign_pointer(*pprev, next);
+
+out:
+       return err;
+}
+
+static void rhashtable_rehash_chain(struct rhashtable *ht,
+                                   unsigned int old_hash)
+{
+       struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+       spinlock_t *old_bucket_lock;
+
+       old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
+
+       spin_lock_bh(old_bucket_lock);
+       while (!rhashtable_rehash_one(ht, old_hash))
+               ;
+       old_tbl->rehash++;
+       spin_unlock_bh(old_bucket_lock);
+}
+
+static int rhashtable_rehash_attach(struct rhashtable *ht,
+                                   struct bucket_table *old_tbl,
+                                   struct bucket_table *new_tbl)
+{
+       /* Protect future_tbl using the first bucket lock. */
+       spin_lock_bh(old_tbl->locks);
+
+       /* Did somebody beat us to it? */
+       if (rcu_access_pointer(old_tbl->future_tbl)) {
+               spin_unlock_bh(old_tbl->locks);
+               return -EEXIST;
+       }
+
+       /* Make insertions go into the new, empty table right away. Deletions
+        * and lookups will be attempted in both tables until we synchronize.
+        */
+       rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
+
+       spin_unlock_bh(old_tbl->locks);
+
+       return 0;
+}
+
+static int rhashtable_rehash_table(struct rhashtable *ht)
+{
+       struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+       struct bucket_table *new_tbl;
+       struct rhashtable_walker *walker;
+       unsigned int old_hash;
+
+       new_tbl = rht_dereference(old_tbl->future_tbl, ht);
+       if (!new_tbl)
+               return 0;
+
+       for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
+               rhashtable_rehash_chain(ht, old_hash);
+
+       /* Publish the new table pointer. */
+       rcu_assign_pointer(ht->tbl, new_tbl);
+
+       spin_lock(&ht->lock);
+       list_for_each_entry(walker, &old_tbl->walkers, list)
+               walker->tbl = NULL;
+       spin_unlock(&ht->lock);
+
+       /* Wait for readers. All new readers will see the new
+        * table, and thus no references to the old table will
+        * remain.
+        */
+       call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
+
+       return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
+}
+
+static int rhashtable_expand(struct rhashtable *ht)
+{
+       struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
+       int err;
+
+       old_tbl = rhashtable_last_table(ht, old_tbl);
+
+       new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
+       if (new_tbl == NULL)
+               return -ENOMEM;
+
+       err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
+       if (err)
+               bucket_table_free(new_tbl);
+
+       return err;
+}
+
+static int rhashtable_shrink(struct rhashtable *ht)
+{
+       struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
+       unsigned int nelems = atomic_read(&ht->nelems);
+       unsigned int size = 0;
+       int err;
+
+       if (nelems)
+               size = roundup_pow_of_two(nelems * 3 / 2);
+       if (size < ht->p.min_size)
+               size = ht->p.min_size;
+
+       if (old_tbl->size <= size)
+               return 0;
+
+       if (rht_dereference(old_tbl->future_tbl, ht))
+               return -EEXIST;
+
+       new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
+       if (new_tbl == NULL)
+               return -ENOMEM;
+
+       err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
+       if (err)
+               bucket_table_free(new_tbl);
+
+       return err;
+}
+
+static void rht_deferred_worker(struct work_struct *work)
+{
+       struct rhashtable *ht;
+       struct bucket_table *tbl;
+       int err = 0;
+
+       ht = container_of(work, struct rhashtable, run_work);
+       mutex_lock(&ht->mutex);
+
+       tbl = rht_dereference(ht->tbl, ht);
+       tbl = rhashtable_last_table(ht, tbl);
+
+       if (rht_grow_above_75(ht, tbl))
+               rhashtable_expand(ht);
+       else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
+               rhashtable_shrink(ht);
+
+       err = rhashtable_rehash_table(ht);
+
+       mutex_unlock(&ht->mutex);
+
+       if (err)
+               schedule_work(&ht->run_work);
+}
+
+static bool rhashtable_check_elasticity(struct rhashtable *ht,
+                                       struct bucket_table *tbl,
+                                       unsigned int hash)
+{
+       unsigned int elasticity = ht->elasticity;
+       struct rhash_head *head;
+
+       rht_for_each(head, tbl, hash)
+               if (!--elasticity)
+                       return true;
+
+       return false;
+}
+
+int rhashtable_insert_rehash(struct rhashtable *ht,
+                            struct bucket_table *tbl)
+{
+       struct bucket_table *old_tbl;
+       struct bucket_table *new_tbl;
+       unsigned int size;
+       int err;
+
+       old_tbl = rht_dereference_rcu(ht->tbl, ht);
+
+       size = tbl->size;
+
+       err = -EBUSY;
+
+       if (rht_grow_above_75(ht, tbl))
+               size *= 2;
+       /* Do not schedule more than one rehash */
+       else if (old_tbl != tbl)
+               goto fail;
+
+       err = -ENOMEM;
+
+       new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
+       if (new_tbl == NULL)
+               goto fail;
+
+       err = rhashtable_rehash_attach(ht, tbl, new_tbl);
+       if (err) {
+               bucket_table_free(new_tbl);
+               if (err == -EEXIST)
+                       err = 0;
+       } else
+               schedule_work(&ht->run_work);
+
+       return err;
+
+fail:
+       /* Do not fail the insert if someone else did a rehash. */
+       if (likely(rcu_dereference_raw(tbl->future_tbl)))
+               return 0;
+
+       /* Schedule async rehash to retry allocation in process context. */
+       if (err == -ENOMEM)
+               schedule_work(&ht->run_work);
+
+       return err;
+}
+
+struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+                                           const void *key,
+                                           struct rhash_head *obj,
+                                           struct bucket_table *tbl)
+{
+       struct rhash_head *head;
+       unsigned int hash;
+       int err;
+
+       tbl = rhashtable_last_table(ht, tbl);
+       hash = head_hashfn(ht, tbl, obj);
+       spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
+
+       err = -EEXIST;
+       if (key && rhashtable_lookup_fast(ht, key, ht->p))
+               goto exit;
+
+       err = -E2BIG;
+       if (unlikely(rht_grow_above_max(ht, tbl)))
+               goto exit;
+
+       err = -EAGAIN;
+       if (rhashtable_check_elasticity(ht, tbl, hash) ||
+           rht_grow_above_100(ht, tbl))
+               goto exit;
+
+       err = 0;
+
+       head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+
+       RCU_INIT_POINTER(obj->next, head);
+
+       rcu_assign_pointer(tbl->buckets[hash], obj);
+
+       atomic_inc(&ht->nelems);
+
+exit:
+       spin_unlock(rht_bucket_lock(tbl, hash));
+
+       if (err == 0)
+               return NULL;
+       else if (err == -EAGAIN)
+               return tbl;
+       else
+               return ERR_PTR(err);
+}
+
+static size_t rounded_hashtable_size(const struct rhashtable_params *params)
+{
+       return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
+                  (unsigned long)params->min_size);
+}
+
+static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
+{
+       return jhash2(key, length, seed);
+}
+
+int rhashtable_init(struct rhashtable *ht,
+                   const struct rhashtable_params *params)
+{
+       struct bucket_table *tbl;
+       size_t size;
+
+       size = HASH_DEFAULT_SIZE;
+
+       if ((!params->key_len && !params->obj_hashfn) ||
+           (params->obj_hashfn && !params->obj_cmpfn))
+               return -EINVAL;
+
+       if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
+               return -EINVAL;
+
+       memset(ht, 0, sizeof(*ht));
+       mutex_init(&ht->mutex);
+       spin_lock_init(&ht->lock);
+       memcpy(&ht->p, params, sizeof(*params));
+
+       if (params->min_size)
+               ht->p.min_size = roundup_pow_of_two(params->min_size);
+
+       if (params->max_size)
+               ht->p.max_size = rounddown_pow_of_two(params->max_size);
+
+       if (params->insecure_max_entries)
+               ht->p.insecure_max_entries =
+                       rounddown_pow_of_two(params->insecure_max_entries);
+       else
+               ht->p.insecure_max_entries = ht->p.max_size * 2;
+
+       ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
+
+       if (params->nelem_hint)
+               size = rounded_hashtable_size(&ht->p);
+
+       /* The maximum (not average) chain length grows with the
+        * size of the hash table, at a rate of (log N)/(log log N).
+        * The value of 16 is selected so that even if the hash
+        * table grew to 2^32 you would not expect the maximum
+        * chain length to exceed it unless we are under attack
+        * (or extremely unlucky).
+        *
+        * As this limit is only to detect attacks, we don't need
+        * to set it to a lower value as you'd need the chain
+        * length to vastly exceed 16 to have any real effect
+        * on the system.
+        */
+       if (!params->insecure_elasticity)
+               ht->elasticity = 16;
+
+       if (params->locks_mul)
+               ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
+       else
+               ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
+
+       ht->key_len = ht->p.key_len;
+       if (!params->hashfn) {
+               ht->p.hashfn = jhash;
+
+               if (!(ht->key_len & (sizeof(u32) - 1))) {
+                       ht->key_len /= sizeof(u32);
+                       ht->p.hashfn = rhashtable_jhash2;
+               }
+       }
+
+       tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
+       if (tbl == NULL)
+               return -ENOMEM;
+
+       atomic_set(&ht->nelems, 0);
+
+       RCU_INIT_POINTER(ht->tbl, tbl);
+
+       INIT_WORK(&ht->run_work, rht_deferred_worker);
+
+       return 0;
+}
+
+void rhashtable_destroy(struct rhashtable *ht)
+{
+       struct bucket_table *tbl;
+
+       cancel_work_sync(&ht->run_work);
+
+       mutex_lock(&ht->mutex);
+       tbl = rht_dereference(ht->tbl, ht);
+       bucket_table_free(tbl);
+       mutex_unlock(&ht->mutex);
+}
diff --git a/linux/sched.c b/linux/sched.c
new file mode 100644 (file)
index 0000000..d926e20
--- /dev/null
@@ -0,0 +1,135 @@
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <linux/futex.h>
+
+/* hack for mips: */
+#define CONFIG_RCU_HAVE_FUTEX 1
+#include <urcu/futex.h>
+
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+
+__thread struct task_struct *current;
+
+void __put_task_struct(struct task_struct *t)
+{
+       pthread_join(t->thread, NULL);
+       free(t);
+}
+
+/* returns true if process was woken up, false if it was already running */
+int wake_up_process(struct task_struct *p)
+{
+       int ret = p->state != TASK_RUNNING;
+
+       p->state = TASK_RUNNING;
+       futex(&p->state, FUTEX_WAKE|FUTEX_PRIVATE_FLAG,
+             INT_MAX, NULL, NULL, 0);
+       return ret;
+}
+
+void schedule(void)
+{
+       int v;
+
+       rcu_quiescent_state();
+
+       while ((v = READ_ONCE(current->state)) != TASK_RUNNING)
+               futex(&current->state, FUTEX_WAIT|FUTEX_PRIVATE_FLAG,
+                     v, NULL, NULL, 0);
+}
+
+struct process_timer {
+       struct timer_list timer;
+       struct task_struct *task;
+};
+
+static void process_timeout(struct timer_list *t)
+{
+       struct process_timer *timeout =
+               container_of(t, struct process_timer, timer);
+
+       wake_up_process(timeout->task);
+}
+
+long schedule_timeout(long timeout)
+{
+       struct process_timer timer;
+       unsigned long expire;
+
+       switch (timeout)
+       {
+       case MAX_SCHEDULE_TIMEOUT:
+               /*
+                * These two special cases are useful to be comfortable
+                * in the caller. Nothing more. We could take
+                * MAX_SCHEDULE_TIMEOUT from one of the negative value
+                * but I' d like to return a valid offset (>=0) to allow
+                * the caller to do everything it want with the retval.
+                */
+               schedule();
+               goto out;
+       default:
+               /*
+                * Another bit of PARANOID. Note that the retval will be
+                * 0 since no piece of kernel is supposed to do a check
+                * for a negative retval of schedule_timeout() (since it
+                * should never happens anyway). You just have the printk()
+                * that will tell you if something is gone wrong and where.
+                */
+               if (timeout < 0) {
+                       fprintf(stderr, "schedule_timeout: wrong timeout "
+                               "value %lx\n", timeout);
+                       current->state = TASK_RUNNING;
+                       goto out;
+               }
+       }
+
+       expire = timeout + jiffies;
+
+       timer.task = current;
+       timer_setup_on_stack(&timer.timer, process_timeout, 0);
+       mod_timer(&timer.timer, expire);
+       schedule();
+       del_timer_sync(&timer.timer);
+
+       timeout = expire - jiffies;
+out:
+       return timeout < 0 ? 0 : timeout;
+}
+
+__attribute__((constructor(101)))
+static void sched_init(void)
+{
+       struct task_struct *p = malloc(sizeof(*p));
+
+       mlockall(MCL_CURRENT|MCL_FUTURE);
+
+       memset(p, 0, sizeof(*p));
+
+       p->state        = TASK_RUNNING;
+       atomic_set(&p->usage, 1);
+       init_completion(&p->exited);
+
+       current = p;
+
+       rcu_init();
+       rcu_register_thread();
+}
+
+#ifndef SYS_getrandom
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+int urandom_fd;
+
+__attribute__((constructor(101)))
+static void rand_init(void)
+{
+       urandom_fd = open("/dev/urandom", O_RDONLY);
+       BUG_ON(urandom_fd < 0);
+}
+#endif
diff --git a/linux/semaphore.c b/linux/semaphore.c
new file mode 100644 (file)
index 0000000..b7d4b51
--- /dev/null
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008 Intel Corporation
+ * Author: Matthew Wilcox <willy@linux.intel.com>
+ *
+ * This file implements counting semaphores.
+ * A counting semaphore may be acquired 'n' times before sleeping.
+ * See mutex.c for single-acquisition sleeping locks which enforce
+ * rules which allow code to be debugged more easily.
+ */
+
+/*
+ * Some notes on the implementation:
+ *
+ * The spinlock controls access to the other members of the semaphore.
+ * down_trylock() and up() can be called from interrupt context, so we
+ * have to disable interrupts when taking the lock.  It turns out various
+ * parts of the kernel expect to be able to use down() on a semaphore in
+ * interrupt context when they know it will succeed, so we have to use
+ * irqsave variants for down(), down_interruptible() and down_killable()
+ * too.
+ *
+ * The ->count variable represents how many more tasks can acquire this
+ * semaphore.  If it's zero, there may be tasks waiting on the wait_list.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+
+static noinline void __down(struct semaphore *sem);
+static noinline int __down_timeout(struct semaphore *sem, long timeout);
+static noinline void __up(struct semaphore *sem);
+
+/**
+ * down - acquire the semaphore
+ * @sem: the semaphore to be acquired
+ *
+ * Acquires the semaphore.  If no more tasks are allowed to acquire the
+ * semaphore, calling this function will put the task to sleep until the
+ * semaphore is released.
+ *
+ * Use of this function is deprecated, please use down_interruptible() or
+ * down_killable() instead.
+ */
+void down(struct semaphore *sem)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&sem->lock, flags);
+       if (likely(sem->count > 0))
+               sem->count--;
+       else
+               __down(sem);
+       raw_spin_unlock_irqrestore(&sem->lock, flags);
+}
+EXPORT_SYMBOL(down);
+
+/**
+ * down_trylock - try to acquire the semaphore, without waiting
+ * @sem: the semaphore to be acquired
+ *
+ * Try to acquire the semaphore atomically.  Returns 0 if the semaphore has
+ * been acquired successfully or 1 if it it cannot be acquired.
+ *
+ * NOTE: This return value is inverted from both spin_trylock and
+ * mutex_trylock!  Be careful about this when converting code.
+ *
+ * Unlike mutex_trylock, this function can be used from interrupt context,
+ * and the semaphore can be released by any task or interrupt.
+ */
+int down_trylock(struct semaphore *sem)
+{
+       unsigned long flags;
+       int count;
+
+       raw_spin_lock_irqsave(&sem->lock, flags);
+       count = sem->count - 1;
+       if (likely(count >= 0))
+               sem->count = count;
+       raw_spin_unlock_irqrestore(&sem->lock, flags);
+
+       return (count < 0);
+}
+EXPORT_SYMBOL(down_trylock);
+
+/**
+ * down_timeout - acquire the semaphore within a specified time
+ * @sem: the semaphore to be acquired
+ * @timeout: how long to wait before failing
+ *
+ * Attempts to acquire the semaphore.  If no more tasks are allowed to
+ * acquire the semaphore, calling this function will put the task to sleep.
+ * If the semaphore is not released within the specified number of jiffies,
+ * this function returns -ETIME.  It returns 0 if the semaphore was acquired.
+ */
+int down_timeout(struct semaphore *sem, long timeout)
+{
+       unsigned long flags;
+       int result = 0;
+
+       raw_spin_lock_irqsave(&sem->lock, flags);
+       if (likely(sem->count > 0))
+               sem->count--;
+       else
+               result = __down_timeout(sem, timeout);
+       raw_spin_unlock_irqrestore(&sem->lock, flags);
+
+       return result;
+}
+EXPORT_SYMBOL(down_timeout);
+
+/**
+ * up - release the semaphore
+ * @sem: the semaphore to release
+ *
+ * Release the semaphore.  Unlike mutexes, up() may be called from any
+ * context and even by tasks which have never called down().
+ */
+void up(struct semaphore *sem)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&sem->lock, flags);
+       if (likely(list_empty(&sem->wait_list)))
+               sem->count++;
+       else
+               __up(sem);
+       raw_spin_unlock_irqrestore(&sem->lock, flags);
+}
+EXPORT_SYMBOL(up);
+
+/* Functions for the contended case */
+
+struct semaphore_waiter {
+       struct list_head list;
+       struct task_struct *task;
+       bool up;
+};
+
+/*
+ * Because this function is inlined, the 'state' parameter will be
+ * constant, and thus optimised away by the compiler.  Likewise the
+ * 'timeout' parameter for the cases without timeouts.
+ */
+static inline int __sched __down_common(struct semaphore *sem, long state,
+                                                               long timeout)
+{
+       struct semaphore_waiter waiter;
+
+       list_add_tail(&waiter.list, &sem->wait_list);
+       waiter.task = current;
+       waiter.up = false;
+
+       for (;;) {
+               if (unlikely(timeout <= 0))
+                       goto timed_out;
+               __set_current_state(state);
+               raw_spin_unlock_irq(&sem->lock);
+               timeout = schedule_timeout(timeout);
+               raw_spin_lock_irq(&sem->lock);
+               if (waiter.up)
+                       return 0;
+       }
+
+ timed_out:
+       list_del(&waiter.list);
+       return -ETIME;
+}
+
+static noinline void __sched __down(struct semaphore *sem)
+{
+       __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+}
+
+static noinline int __sched __down_timeout(struct semaphore *sem, long timeout)
+{
+       return __down_common(sem, TASK_UNINTERRUPTIBLE, timeout);
+}
+
+static noinline void __sched __up(struct semaphore *sem)
+{
+       struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
+                                               struct semaphore_waiter, list);
+       list_del(&waiter->list);
+       waiter->up = true;
+       wake_up_process(waiter->task);
+}
diff --git a/linux/shrinker.c b/linux/shrinker.c
new file mode 100644 (file)
index 0000000..7926be0
--- /dev/null
@@ -0,0 +1,94 @@
+
+#include <stdio.h>
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/shrinker.h>
+
+#include "tools-util.h"
+
+static LIST_HEAD(shrinker_list);
+static DEFINE_MUTEX(shrinker_lock);
+
+int register_shrinker(struct shrinker *shrinker)
+{
+       mutex_lock(&shrinker_lock);
+       list_add_tail(&shrinker->list, &shrinker_list);
+       mutex_unlock(&shrinker_lock);
+       return 0;
+}
+
+void unregister_shrinker(struct shrinker *shrinker)
+{
+       mutex_lock(&shrinker_lock);
+       list_del(&shrinker->list);
+       mutex_unlock(&shrinker_lock);
+}
+
+struct meminfo {
+       u64             total;
+       u64             available;
+
+};
+
+static u64 parse_meminfo_line(const char *line)
+{
+       u64 v;
+
+       if (sscanf(line, " %llu kB", &v) < 1)
+               die("sscanf error");
+       return v << 10;
+}
+
+static struct meminfo read_meminfo(void)
+{
+       struct meminfo ret = { 0 };
+       size_t len, n = 0;
+       char *line = NULL;
+       const char *v;
+       FILE *f;
+
+       f = fopen("/proc/meminfo", "r");
+       if (!f)
+               die("error opening /proc/meminfo: %m");
+
+       while ((len = getline(&line, &n, f)) != -1) {
+               if ((v = strcmp_prefix(line, "MemTotal:")))
+                       ret.total = parse_meminfo_line(v);
+
+               if ((v = strcmp_prefix(line, "MemAvailable:")))
+                       ret.available = parse_meminfo_line(v);
+       }
+
+       fclose(f);
+       free(line);
+
+       return ret;
+}
+
+void run_shrinkers(void)
+{
+       struct shrinker *shrinker;
+       struct meminfo info;
+       s64 want_shrink;
+
+       /* Fast out if there are no shrinkers to run. */
+       if (list_empty(&shrinker_list))
+               return;
+
+       info = read_meminfo();
+       want_shrink = (info.total >> 2) - info.available;
+
+       if (want_shrink <= 0)
+               return;
+
+       mutex_lock(&shrinker_lock);
+       list_for_each_entry(shrinker, &shrinker_list, list) {
+               struct shrink_control sc = {
+                       .nr_to_scan = want_shrink >> PAGE_SHIFT
+               };
+
+               shrinker->scan_objects(shrinker, &sc);
+       }
+       mutex_unlock(&shrinker_lock);
+}
diff --git a/linux/six.c b/linux/six.c
new file mode 100644 (file)
index 0000000..49d46ed
--- /dev/null
@@ -0,0 +1,553 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/export.h>
+#include <linux/log2.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
+#include <linux/six.h>
+
+#ifdef DEBUG
+#define EBUG_ON(cond)          BUG_ON(cond)
+#else
+#define EBUG_ON(cond)          do {} while (0)
+#endif
+
+#define six_acquire(l, t)      lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
+#define six_release(l)         lock_release(l, _RET_IP_)
+
+struct six_lock_vals {
+       /* Value we add to the lock in order to take the lock: */
+       u64                     lock_val;
+
+       /* If the lock has this value (used as a mask), taking the lock fails: */
+       u64                     lock_fail;
+
+       /* Value we add to the lock in order to release the lock: */
+       u64                     unlock_val;
+
+       /* Mask that indicates lock is held for this type: */
+       u64                     held_mask;
+
+       /* Waitlist we wakeup when releasing the lock: */
+       enum six_lock_type      unlock_wakeup;
+};
+
+#define __SIX_LOCK_HELD_read   __SIX_VAL(read_lock, ~0)
+#define __SIX_LOCK_HELD_intent __SIX_VAL(intent_lock, ~0)
+#define __SIX_LOCK_HELD_write  __SIX_VAL(seq, 1)
+
+#define LOCK_VALS {                                                    \
+       [SIX_LOCK_read] = {                                             \
+               .lock_val       = __SIX_VAL(read_lock, 1),              \
+               .lock_fail      = __SIX_LOCK_HELD_write,                \
+               .unlock_val     = -__SIX_VAL(read_lock, 1),             \
+               .held_mask      = __SIX_LOCK_HELD_read,                 \
+               .unlock_wakeup  = SIX_LOCK_write,                       \
+       },                                                              \
+       [SIX_LOCK_intent] = {                                           \
+               .lock_val       = __SIX_VAL(intent_lock, 1),            \
+               .lock_fail      = __SIX_LOCK_HELD_intent,               \
+               .unlock_val     = -__SIX_VAL(intent_lock, 1),           \
+               .held_mask      = __SIX_LOCK_HELD_intent,               \
+               .unlock_wakeup  = SIX_LOCK_intent,                      \
+       },                                                              \
+       [SIX_LOCK_write] = {                                            \
+               .lock_val       = __SIX_VAL(seq, 1),                    \
+               .lock_fail      = __SIX_LOCK_HELD_read,                 \
+               .unlock_val     = __SIX_VAL(seq, 1),                    \
+               .held_mask      = __SIX_LOCK_HELD_write,                \
+               .unlock_wakeup  = SIX_LOCK_read,                        \
+       },                                                              \
+}
+
+static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
+                                union six_lock_state old)
+{
+       if (type != SIX_LOCK_intent)
+               return;
+
+       if (!old.intent_lock) {
+               EBUG_ON(lock->owner);
+               lock->owner = current;
+       } else {
+               EBUG_ON(lock->owner != current);
+       }
+}
+
+static __always_inline bool do_six_trylock_type(struct six_lock *lock,
+                                               enum six_lock_type type)
+{
+       const struct six_lock_vals l[] = LOCK_VALS;
+       union six_lock_state old;
+       u64 v = READ_ONCE(lock->state.v);
+
+       EBUG_ON(type == SIX_LOCK_write && lock->owner != current);
+
+       do {
+               old.v = v;
+
+               EBUG_ON(type == SIX_LOCK_write &&
+                       ((old.v & __SIX_LOCK_HELD_write) ||
+                        !(old.v & __SIX_LOCK_HELD_intent)));
+
+               if (old.v & l[type].lock_fail)
+                       return false;
+       } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
+                               old.v,
+                               old.v + l[type].lock_val)) != old.v);
+
+       six_set_owner(lock, type, old);
+       return true;
+}
+
+__always_inline __flatten
+static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
+{
+       if (!do_six_trylock_type(lock, type))
+               return false;
+
+       if (type != SIX_LOCK_write)
+               six_acquire(&lock->dep_map, 1);
+       return true;
+}
+
+__always_inline __flatten
+static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
+                             unsigned seq)
+{
+       const struct six_lock_vals l[] = LOCK_VALS;
+       union six_lock_state old;
+       u64 v = READ_ONCE(lock->state.v);
+
+       do {
+               old.v = v;
+
+               if (old.seq != seq || old.v & l[type].lock_fail)
+                       return false;
+       } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
+                               old.v,
+                               old.v + l[type].lock_val)) != old.v);
+
+       six_set_owner(lock, type, old);
+       if (type != SIX_LOCK_write)
+               six_acquire(&lock->dep_map, 1);
+       return true;
+}
+
+struct six_lock_waiter {
+       struct list_head        list;
+       struct task_struct      *task;
+};
+
+/* This is probably up there with the more evil things I've done */
+#define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
+
+#ifdef CONFIG_LOCK_SPIN_ON_OWNER
+
+static inline int six_can_spin_on_owner(struct six_lock *lock)
+{
+       struct task_struct *owner;
+       int retval = 1;
+
+       if (need_resched())
+               return 0;
+
+       rcu_read_lock();
+       owner = READ_ONCE(lock->owner);
+       if (owner)
+               retval = owner->on_cpu;
+       rcu_read_unlock();
+       /*
+        * if lock->owner is not set, the mutex owner may have just acquired
+        * it and not set the owner yet or the mutex has been released.
+        */
+       return retval;
+}
+
+static inline bool six_spin_on_owner(struct six_lock *lock,
+                                    struct task_struct *owner)
+{
+       bool ret = true;
+
+       rcu_read_lock();
+       while (lock->owner == owner) {
+               /*
+                * Ensure we emit the owner->on_cpu, dereference _after_
+                * checking lock->owner still matches owner. If that fails,
+                * owner might point to freed memory. If it still matches,
+                * the rcu_read_lock() ensures the memory stays valid.
+                */
+               barrier();
+
+               if (!owner->on_cpu || need_resched()) {
+                       ret = false;
+                       break;
+               }
+
+               cpu_relax();
+       }
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
+{
+       struct task_struct *task = current;
+
+       if (type == SIX_LOCK_write)
+               return false;
+
+       preempt_disable();
+       if (!six_can_spin_on_owner(lock))
+               goto fail;
+
+       if (!osq_lock(&lock->osq))
+               goto fail;
+
+       while (1) {
+               struct task_struct *owner;
+
+               /*
+                * If there's an owner, wait for it to either
+                * release the lock or go to sleep.
+                */
+               owner = READ_ONCE(lock->owner);
+               if (owner && !six_spin_on_owner(lock, owner))
+                       break;
+
+               if (do_six_trylock_type(lock, type)) {
+                       osq_unlock(&lock->osq);
+                       preempt_enable();
+                       return true;
+               }
+
+               /*
+                * When there's no owner, we might have preempted between the
+                * owner acquiring the lock and setting the owner field. If
+                * we're an RT task that will live-lock because we won't let
+                * the owner complete.
+                */
+               if (!owner && (need_resched() || rt_task(task)))
+                       break;
+
+               /*
+                * The cpu_relax() call is a compiler barrier which forces
+                * everything in this loop to be re-loaded. We don't need
+                * memory barriers as we'll eventually observe the right
+                * values at the cost of a few extra spins.
+                */
+               cpu_relax();
+       }
+
+       osq_unlock(&lock->osq);
+fail:
+       preempt_enable();
+
+       /*
+        * If we fell out of the spin path because of need_resched(),
+        * reschedule now, before we try-lock again. This avoids getting
+        * scheduled out right after we obtained the lock.
+        */
+       if (need_resched())
+               schedule();
+
+       return false;
+}
+
+#else /* CONFIG_LOCK_SPIN_ON_OWNER */
+
+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
+{
+       return false;
+}
+
+#endif
+
+noinline
+static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type type,
+                                   six_lock_should_sleep_fn should_sleep_fn, void *p)
+{
+       const struct six_lock_vals l[] = LOCK_VALS;
+       union six_lock_state old, new;
+       struct six_lock_waiter wait;
+       int ret = 0;
+       u64 v;
+
+       ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
+       if (ret)
+               return ret;
+
+       if (six_optimistic_spin(lock, type))
+               return 0;
+
+       lock_contended(&lock->dep_map, _RET_IP_);
+
+       INIT_LIST_HEAD(&wait.list);
+       wait.task = current;
+
+       while (1) {
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               if (type == SIX_LOCK_write)
+                       EBUG_ON(lock->owner != current);
+               else if (list_empty_careful(&wait.list)) {
+                       raw_spin_lock(&lock->wait_lock);
+                       list_add_tail(&wait.list, &lock->wait_list[type]);
+                       raw_spin_unlock(&lock->wait_lock);
+               }
+
+               ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
+               if (ret)
+                       break;
+
+               v = READ_ONCE(lock->state.v);
+               do {
+                       new.v = old.v = v;
+
+                       if (!(old.v & l[type].lock_fail))
+                               new.v += l[type].lock_val;
+                       else if (!(new.waiters & (1 << type)))
+                               new.waiters |= 1 << type;
+                       else
+                               break; /* waiting bit already set */
+               } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
+                                       old.v, new.v)) != old.v);
+
+               if (!(old.v & l[type].lock_fail))
+                       break;
+
+               schedule();
+       }
+
+       if (!ret)
+               six_set_owner(lock, type, old);
+
+       __set_current_state(TASK_RUNNING);
+
+       if (!list_empty_careful(&wait.list)) {
+               raw_spin_lock(&lock->wait_lock);
+               list_del_init(&wait.list);
+               raw_spin_unlock(&lock->wait_lock);
+       }
+
+       return ret;
+}
+
+__always_inline
+static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
+                          six_lock_should_sleep_fn should_sleep_fn, void *p)
+{
+       int ret;
+
+       if (type != SIX_LOCK_write)
+               six_acquire(&lock->dep_map, 0);
+
+       ret = do_six_trylock_type(lock, type) ? 0
+               : __six_lock_type_slowpath(lock, type, should_sleep_fn, p);
+
+       if (ret && type != SIX_LOCK_write)
+               six_release(&lock->dep_map);
+       if (!ret)
+               lock_acquired(&lock->dep_map, _RET_IP_);
+
+       return ret;
+}
+
+static inline void six_lock_wakeup(struct six_lock *lock,
+                                  union six_lock_state state,
+                                  unsigned waitlist_id)
+{
+       struct list_head *wait_list = &lock->wait_list[waitlist_id];
+       struct six_lock_waiter *w, *next;
+
+       if (waitlist_id == SIX_LOCK_write && state.read_lock)
+               return;
+
+       if (!(state.waiters & (1 << waitlist_id)))
+               return;
+
+       clear_bit(waitlist_bitnr(waitlist_id),
+                 (unsigned long *) &lock->state.v);
+
+       if (waitlist_id == SIX_LOCK_write) {
+               struct task_struct *p = READ_ONCE(lock->owner);
+
+               if (p)
+                       wake_up_process(p);
+               return;
+       }
+
+       raw_spin_lock(&lock->wait_lock);
+
+       list_for_each_entry_safe(w, next, wait_list, list) {
+               list_del_init(&w->list);
+
+               if (wake_up_process(w->task) &&
+                   waitlist_id != SIX_LOCK_read) {
+                       if (!list_empty(wait_list))
+                               set_bit(waitlist_bitnr(waitlist_id),
+                                       (unsigned long *) &lock->state.v);
+                       break;
+               }
+       }
+
+       raw_spin_unlock(&lock->wait_lock);
+}
+
+__always_inline __flatten
+static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
+{
+       const struct six_lock_vals l[] = LOCK_VALS;
+       union six_lock_state state;
+
+       EBUG_ON(!(lock->state.v & l[type].held_mask));
+       EBUG_ON(type == SIX_LOCK_write &&
+               !(lock->state.v & __SIX_LOCK_HELD_intent));
+
+       if (type != SIX_LOCK_write)
+               six_release(&lock->dep_map);
+
+       if (type == SIX_LOCK_intent) {
+               EBUG_ON(lock->owner != current);
+
+               if (lock->intent_lock_recurse) {
+                       --lock->intent_lock_recurse;
+                       return;
+               }
+
+               lock->owner = NULL;
+       }
+
+       state.v = atomic64_add_return_release(l[type].unlock_val,
+                                             &lock->state.counter);
+       six_lock_wakeup(lock, state, l[type].unlock_wakeup);
+}
+
+#define __SIX_LOCK(type)                                               \
+bool six_trylock_##type(struct six_lock *lock)                         \
+{                                                                      \
+       return __six_trylock_type(lock, SIX_LOCK_##type);               \
+}                                                                      \
+EXPORT_SYMBOL_GPL(six_trylock_##type);                                 \
+                                                                       \
+bool six_relock_##type(struct six_lock *lock, u32 seq)                 \
+{                                                                      \
+       return __six_relock_type(lock, SIX_LOCK_##type, seq);           \
+}                                                                      \
+EXPORT_SYMBOL_GPL(six_relock_##type);                                  \
+                                                                       \
+int six_lock_##type(struct six_lock *lock,                             \
+                   six_lock_should_sleep_fn should_sleep_fn, void *p)  \
+{                                                                      \
+       return __six_lock_type(lock, SIX_LOCK_##type, should_sleep_fn, p);\
+}                                                                      \
+EXPORT_SYMBOL_GPL(six_lock_##type);                                    \
+                                                                       \
+void six_unlock_##type(struct six_lock *lock)                          \
+{                                                                      \
+       __six_unlock_type(lock, SIX_LOCK_##type);                       \
+}                                                                      \
+EXPORT_SYMBOL_GPL(six_unlock_##type);
+
+__SIX_LOCK(read)
+__SIX_LOCK(intent)
+__SIX_LOCK(write)
+
+#undef __SIX_LOCK
+
+/* Convert from intent to read: */
+void six_lock_downgrade(struct six_lock *lock)
+{
+       six_lock_increment(lock, SIX_LOCK_read);
+       six_unlock_intent(lock);
+}
+EXPORT_SYMBOL_GPL(six_lock_downgrade);
+
+bool six_lock_tryupgrade(struct six_lock *lock)
+{
+       const struct six_lock_vals l[] = LOCK_VALS;
+       union six_lock_state old, new;
+       u64 v = READ_ONCE(lock->state.v);
+
+       do {
+               new.v = old.v = v;
+
+               EBUG_ON(!(old.v & l[SIX_LOCK_read].held_mask));
+
+               new.v += l[SIX_LOCK_read].unlock_val;
+
+               if (new.v & l[SIX_LOCK_intent].lock_fail)
+                       return false;
+
+               new.v += l[SIX_LOCK_intent].lock_val;
+       } while ((v = atomic64_cmpxchg_acquire(&lock->state.counter,
+                               old.v, new.v)) != old.v);
+
+       six_set_owner(lock, SIX_LOCK_intent, old);
+       six_lock_wakeup(lock, new, l[SIX_LOCK_read].unlock_wakeup);
+
+       return true;
+}
+EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
+
+bool six_trylock_convert(struct six_lock *lock,
+                        enum six_lock_type from,
+                        enum six_lock_type to)
+{
+       EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
+
+       if (to == from)
+               return true;
+
+       if (to == SIX_LOCK_read) {
+               six_lock_downgrade(lock);
+               return true;
+       } else {
+               return six_lock_tryupgrade(lock);
+       }
+}
+EXPORT_SYMBOL_GPL(six_trylock_convert);
+
+/*
+ * Increment read/intent lock count, assuming we already have it read or intent
+ * locked:
+ */
+void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
+{
+       const struct six_lock_vals l[] = LOCK_VALS;
+
+       EBUG_ON(type == SIX_LOCK_write);
+       six_acquire(&lock->dep_map, 0);
+
+       /* XXX: assert already locked, and that we don't overflow: */
+
+       switch (type) {
+       case SIX_LOCK_read:
+               atomic64_add(l[type].lock_val, &lock->state.counter);
+               break;
+       case SIX_LOCK_intent:
+               lock->intent_lock_recurse++;
+               break;
+       case SIX_LOCK_write:
+               BUG();
+               break;
+       }
+}
+EXPORT_SYMBOL_GPL(six_lock_increment);
+
+void six_lock_wakeup_all(struct six_lock *lock)
+{
+       struct six_lock_waiter *w;
+
+       raw_spin_lock(&lock->wait_lock);
+
+       list_for_each_entry(w, &lock->wait_list[0], list)
+               wake_up_process(w->task);
+       list_for_each_entry(w, &lock->wait_list[1], list)
+               wake_up_process(w->task);
+
+       raw_spin_unlock(&lock->wait_lock);
+}
+EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
diff --git a/linux/string.c b/linux/string.c
new file mode 100644 (file)
index 0000000..fd2797e
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ *  linux/lib/string.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * stupid library routines.. The optimized versions should generally be found
+ * as inline code in <asm-xx/string.h>
+ *
+ * These are buggy as well..
+ *
+ * * Fri Jun 25 1999, Ingo Oeser <ioe@informatik.tu-chemnitz.de>
+ * -  Added strsep() which will replace strtok() soon (because strsep() is
+ *    reentrant and should be faster). Use only strsep() in new code, please.
+ *
+ * * Sat Feb 09 2002, Jason Thomas <jason@topic.com.au>,
+ *                    Matthew Hawkins <matt@mh.dropbear.id.au>
+ * -  Kissed strtok() goodbye
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <string.h>
+
+#include <linux/compiler.h>
+#include <linux/string.h>
+
+static char *skip_spaces(const char *str)
+{
+       while (isspace(*str))
+               ++str;
+       return (char *)str;
+}
+
+char *strim(char *s)
+{
+       size_t size;
+       char *end;
+
+       size = strlen(s);
+       if (!size)
+               return s;
+
+       end = s + size - 1;
+       while (end >= s && isspace(*end))
+               end--;
+       *(end + 1) = '\0';
+
+       return skip_spaces(s);
+}
+
+size_t strlcpy(char *dest, const char *src, size_t size)
+{
+       size_t ret = strlen(src);
+
+       if (size) {
+               size_t len = (ret >= size) ? size - 1 : ret;
+               memcpy(dest, src, len);
+               dest[len] = '\0';
+       }
+       return ret;
+}
+
+void memzero_explicit(void *s, size_t count)
+{
+       memset(s, 0, count);
+       barrier_data(s);
+}
+
+int match_string(const char * const *array, size_t n, const char *string)
+{
+       int index;
+       const char *item;
+
+       for (index = 0; index < n; index++) {
+               item = array[index];
+               if (!item)
+                       break;
+               if (!strcmp(item, string))
+                       return index;
+       }
+
+       return -EINVAL;
+}
diff --git a/linux/timer.c b/linux/timer.c
new file mode 100644 (file)
index 0000000..eb93786
--- /dev/null
@@ -0,0 +1,327 @@
+
+#include <pthread.h>
+#include <signal.h>
+#include <time.h>
+
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/timer.h>
+
+/**
+ * timespec_add_ns - Adds nanoseconds to a timespec
+ * @a:         pointer to timespec to be incremented
+ * @ns:                unsigned nanoseconds value to be added
+ *
+ * This must always be inlined because its used from the x86-64 vdso,
+ * which cannot call other kernel functions.
+ */
+static struct timespec timespec_add_ns(struct timespec a, u64 ns)
+{
+       a.tv_nsec       += ns;
+       a.tv_sec        += a.tv_nsec / NSEC_PER_SEC;
+       a.tv_nsec       %= NSEC_PER_SEC;
+       return a;
+}
+
+#define DECLARE_HEAP(type)                                             \
+struct {                                                               \
+       size_t size, used;                                              \
+       type *data;                                                     \
+}
+
+#define heap_init(heap, _size)                                         \
+({                                                                     \
+       size_t _bytes;                                                  \
+       (heap)->used = 0;                                               \
+       (heap)->size = (_size);                                         \
+       _bytes = (heap)->size * sizeof(*(heap)->data);                  \
+       (heap)->data = malloc(_bytes);                                  \
+       (heap)->data;                                                   \
+})
+
+#define heap_free(heap)                                                        \
+do {                                                                   \
+       kvfree((heap)->data);                                           \
+       (heap)->data = NULL;                                            \
+} while (0)
+
+#define heap_swap(h, i, j)     swap((h)->data[i], (h)->data[j])
+
+#define heap_sift(h, i, cmp)                                           \
+do {                                                                   \
+       size_t _r, _j = i;                                              \
+                                                                       \
+       for (; _j * 2 + 1 < (h)->used; _j = _r) {                       \
+               _r = _j * 2 + 1;                                        \
+               if (_r + 1 < (h)->used &&                               \
+                   cmp((h)->data[_r], (h)->data[_r + 1]))              \
+                       _r++;                                           \
+                                                                       \
+               if (cmp((h)->data[_r], (h)->data[_j]))                  \
+                       break;                                          \
+               heap_swap(h, _r, _j);                                   \
+       }                                                               \
+} while (0)
+
+#define heap_sift_down(h, i, cmp)                                      \
+do {                                                                   \
+       while (i) {                                                     \
+               size_t p = (i - 1) / 2;                                 \
+               if (cmp((h)->data[i], (h)->data[p]))                    \
+                       break;                                          \
+               heap_swap(h, i, p);                                     \
+               i = p;                                                  \
+       }                                                               \
+} while (0)
+
+#define heap_add(h, d, cmp)                                            \
+({                                                                     \
+       bool _r = !heap_full(h);                                        \
+       if (_r) {                                                       \
+               size_t _i = (h)->used++;                                \
+               (h)->data[_i] = d;                                      \
+                                                                       \
+               heap_sift_down(h, _i, cmp);                             \
+               heap_sift(h, _i, cmp);                                  \
+       }                                                               \
+       _r;                                                             \
+})
+
+#define heap_del(h, i, cmp)                                            \
+do {                                                                   \
+       size_t _i = (i);                                                \
+                                                                       \
+       BUG_ON(_i >= (h)->used);                                        \
+       (h)->used--;                                                    \
+       heap_swap(h, _i, (h)->used);                                    \
+       heap_sift_down(h, _i, cmp);                                     \
+       heap_sift(h, _i, cmp);                                          \
+} while (0)
+
+#define heap_pop(h, d, cmp)                                            \
+({                                                                     \
+       bool _r = (h)->used;                                            \
+       if (_r) {                                                       \
+               (d) = (h)->data[0];                                     \
+               heap_del(h, 0, cmp);                                    \
+       }                                                               \
+       _r;                                                             \
+})
+
+#define heap_peek(h)   ((h)->used ? &(h)->data[0] : NULL)
+#define heap_full(h)   ((h)->used == (h)->size)
+#define heap_empty(h)  ((h)->used == 0)
+
+#define heap_resort(heap, cmp)                                         \
+do {                                                                   \
+       ssize_t _i;                                                     \
+       for (_i = (ssize_t) (heap)->used / 2 -  1; _i >= 0; --_i)       \
+               heap_sift(heap, _i, cmp);                               \
+} while (0)
+
+struct pending_timer {
+       struct timer_list       *timer;
+       unsigned long           expires;
+};
+
+static inline bool pending_timer_cmp(struct pending_timer a,
+                                    struct pending_timer b)
+{
+       return a.expires < b.expires;
+}
+
+static DECLARE_HEAP(struct pending_timer) pending_timers;
+
+static pthread_mutex_t timer_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t  timer_cond = PTHREAD_COND_INITIALIZER;
+static pthread_cond_t  timer_running_cond = PTHREAD_COND_INITIALIZER;
+static unsigned long   timer_seq;
+
+static inline bool timer_running(void)
+{
+       return timer_seq & 1;
+}
+
+static ssize_t timer_idx(struct timer_list *timer)
+{
+       size_t i;
+
+       for (i = 0; i < pending_timers.used; i++)
+               if (pending_timers.data[i].timer == timer)
+                       return i;
+
+       return -1;
+}
+
+int del_timer(struct timer_list *timer)
+{
+       ssize_t idx;
+
+       pthread_mutex_lock(&timer_lock);
+       idx = timer_idx(timer);
+       if (idx >= 0)
+               heap_del(&pending_timers, idx, pending_timer_cmp);
+
+       timer->pending = false;
+       pthread_mutex_unlock(&timer_lock);
+
+       return idx >= 0;
+}
+
+void flush_timers(void)
+{
+       unsigned long seq;
+
+       pthread_mutex_lock(&timer_lock);
+       seq = timer_seq;
+       while (timer_running() && seq == timer_seq)
+               pthread_cond_wait(&timer_running_cond, &timer_lock);
+
+       pthread_mutex_unlock(&timer_lock);
+}
+
+int del_timer_sync(struct timer_list *timer)
+{
+       unsigned long seq;
+       ssize_t idx;
+
+       pthread_mutex_lock(&timer_lock);
+       idx = timer_idx(timer);
+       if (idx >= 0)
+               heap_del(&pending_timers, idx, pending_timer_cmp);
+
+       timer->pending = false;
+
+       seq = timer_seq;
+       while (timer_running() && seq == timer_seq)
+               pthread_cond_wait(&timer_running_cond, &timer_lock);
+       pthread_mutex_unlock(&timer_lock);
+
+       return idx >= 0;
+}
+
+int mod_timer(struct timer_list *timer, unsigned long expires)
+{
+       ssize_t idx;
+
+       pthread_mutex_lock(&timer_lock);
+       timer->expires = expires;
+       timer->pending = true;
+       idx = timer_idx(timer);
+
+       if (idx >= 0 &&
+           pending_timers.data[idx].expires == expires)
+               goto out;
+
+       if (idx >= 0) {
+               pending_timers.data[idx].expires = expires;
+
+               heap_sift_down(&pending_timers, idx, pending_timer_cmp);
+               heap_sift(&pending_timers, idx, pending_timer_cmp);
+       } else {
+               if (heap_full(&pending_timers)) {
+                       pending_timers.size *= 2;
+                       pending_timers.data =
+                               realloc(pending_timers.data,
+                                       pending_timers.size *
+                                       sizeof(struct pending_timer));
+
+                       BUG_ON(!pending_timers.data);
+               }
+
+               heap_add(&pending_timers,
+                        ((struct pending_timer) {
+                               .timer = timer,
+                               .expires = expires,
+                        }),
+                        pending_timer_cmp);
+       }
+
+       pthread_cond_signal(&timer_cond);
+out:
+       pthread_mutex_unlock(&timer_lock);
+
+       return idx >= 0;
+}
+
+static bool timer_thread_stop = false;
+
+static int timer_thread(void *arg)
+{
+       struct pending_timer *p;
+       struct timespec ts;
+       unsigned long now;
+       int ret;
+
+       pthread_mutex_lock(&timer_lock);
+
+       while (!timer_thread_stop) {
+               now = jiffies;
+               p = heap_peek(&pending_timers);
+
+               if (!p) {
+                       pthread_cond_wait(&timer_cond, &timer_lock);
+                       continue;
+               }
+
+               if (time_after_eq(now, p->expires)) {
+                       struct timer_list *timer = p->timer;
+
+                       heap_del(&pending_timers, 0, pending_timer_cmp);
+                       BUG_ON(!timer_pending(timer));
+                       timer->pending = false;
+
+                       timer_seq++;
+                       BUG_ON(!timer_running());
+
+                       pthread_mutex_unlock(&timer_lock);
+                       timer->function(timer);
+                       pthread_mutex_lock(&timer_lock);
+
+                       timer_seq++;
+                       pthread_cond_broadcast(&timer_running_cond);
+                       continue;
+               }
+
+
+               ret = clock_gettime(CLOCK_REALTIME, &ts);
+               BUG_ON(ret);
+
+               ts = timespec_add_ns(ts, jiffies_to_nsecs(p->expires - now));
+
+               pthread_cond_timedwait(&timer_cond, &timer_lock, &ts);
+       }
+
+       pthread_mutex_unlock(&timer_lock);
+
+       return 0;
+}
+
+struct task_struct *timer_task;
+
+__attribute__((constructor(103)))
+static void timers_init(void)
+{
+       heap_init(&pending_timers, 64);
+       BUG_ON(!pending_timers.data);
+
+       timer_task = kthread_run(timer_thread, NULL, "timers");
+       BUG_ON(IS_ERR(timer_task));
+}
+
+__attribute__((destructor(103)))
+static void timers_cleanup(void)
+{
+       get_task_struct(timer_task);
+
+       pthread_mutex_lock(&timer_lock);
+       timer_thread_stop = true;
+       pthread_cond_signal(&timer_cond);
+       pthread_mutex_unlock(&timer_lock);
+
+       int ret = kthread_stop(timer_task);
+       BUG_ON(ret);
+
+       put_task_struct(timer_task);
+       timer_task = NULL;
+}
diff --git a/linux/wait.c b/linux/wait.c
new file mode 100644 (file)
index 0000000..991875c
--- /dev/null
@@ -0,0 +1,245 @@
+/*
+ * Generic waiting primitives.
+ *
+ * (C) 2004 Nadia Yvette Chambers, Oracle
+ */
+
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+static inline int waitqueue_active(wait_queue_head_t *q)
+{
+       return !list_empty(&q->task_list);
+}
+
+static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
+{
+       list_add(&new->task_list, &head->task_list);
+}
+
+static inline void __add_wait_queue_tail(wait_queue_head_t *head,
+                                        wait_queue_t *new)
+{
+       list_add_tail(&new->task_list, &head->task_list);
+}
+
+static inline void
+__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+{
+       wait->flags |= WQ_FLAG_EXCLUSIVE;
+       __add_wait_queue_tail(q, wait);
+}
+
+static inline void
+__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
+{
+       list_del(&old->task_list);
+}
+
+static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+                            int nr_exclusive, int wake_flags, void *key)
+{
+       wait_queue_t *curr, *next;
+
+       list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
+               unsigned flags = curr->flags;
+
+               if (curr->func(curr, mode, wake_flags, key) &&
+                               (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+                       break;
+       }
+}
+
+static void __wake_up(wait_queue_head_t *q, unsigned int mode,
+                     int nr_exclusive, void *key)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&q->lock, flags);
+       __wake_up_common(q, mode, nr_exclusive, 0, key);
+       spin_unlock_irqrestore(&q->lock, flags);
+}
+
+void wake_up(wait_queue_head_t *q)
+{
+       __wake_up(q, TASK_NORMAL, 1, NULL);
+}
+
+static void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
+{
+       __wake_up_common(q, mode, nr, 0, NULL);
+}
+
+void
+prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+       unsigned long flags;
+
+       wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+       spin_lock_irqsave(&q->lock, flags);
+       if (list_empty(&wait->task_list))
+               __add_wait_queue(q, wait);
+       set_current_state(state);
+       spin_unlock_irqrestore(&q->lock, flags);
+}
+
+static void
+prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+       unsigned long flags;
+
+       wait->flags |= WQ_FLAG_EXCLUSIVE;
+       spin_lock_irqsave(&q->lock, flags);
+       if (list_empty(&wait->task_list))
+               __add_wait_queue_tail(q, wait);
+       set_current_state(state);
+       spin_unlock_irqrestore(&q->lock, flags);
+}
+
+void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+{
+       unsigned long flags;
+
+       __set_current_state(TASK_RUNNING);
+       /*
+        * We can check for list emptiness outside the lock
+        * IFF:
+        *  - we use the "careful" check that verifies both
+        *    the next and prev pointers, so that there cannot
+        *    be any half-pending updates in progress on other
+        *    CPU's that we haven't seen yet (and that might
+        *    still change the stack area.
+        * and
+        *  - all other users take the lock (ie we can only
+        *    have _one_ other CPU that looks at or modifies
+        *    the list).
+        */
+       if (!list_empty_careful(&wait->task_list)) {
+               spin_lock_irqsave(&q->lock, flags);
+               list_del_init(&wait->task_list);
+               spin_unlock_irqrestore(&q->lock, flags);
+       }
+}
+
+int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
+                         void *key)
+{
+       return wake_up_process(curr->private);
+}
+
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+       int ret = default_wake_function(wait, mode, sync, key);
+
+       if (ret)
+               list_del_init(&wait->task_list);
+       return ret;
+}
+
+struct wait_bit_key {
+       void                    *flags;
+       int                     bit_nr;
+       unsigned long           timeout;
+};
+
+struct wait_bit_queue {
+       struct wait_bit_key     key;
+       wait_queue_t            wait;
+};
+
+static int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
+{
+       struct wait_bit_key *key = arg;
+       struct wait_bit_queue *wait_bit =
+               container_of(wait, struct wait_bit_queue, wait);
+
+       return (wait_bit->key.flags == key->flags &&
+               wait_bit->key.bit_nr == key->bit_nr &&
+               !test_bit(key->bit_nr, key->flags))
+               ? autoremove_wake_function(wait, mode, sync, key) : 0;
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(bit_wq);
+
+#define __WAIT_BIT_KEY_INITIALIZER(word, bit)                          \
+       { .flags = word, .bit_nr = bit, }
+
+#define DEFINE_WAIT_BIT(name, word, bit)                               \
+       struct wait_bit_queue name = {                                  \
+               .key = __WAIT_BIT_KEY_INITIALIZER(word, bit),           \
+               .wait   = {                                             \
+                       .private        = current,                      \
+                       .func           = wake_bit_function,            \
+                       .task_list      =                               \
+                               LIST_HEAD_INIT((name).wait.task_list),  \
+               },                                                      \
+       }
+
+void wake_up_bit(void *word, int bit)
+{
+       struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
+
+       if (waitqueue_active(&bit_wq))
+               __wake_up(&bit_wq, TASK_NORMAL, 1, &key);
+}
+
+void __wait_on_bit(void *word, int bit, unsigned mode)
+{
+       DEFINE_WAIT_BIT(wait, word, bit);
+
+       do {
+               prepare_to_wait(&bit_wq, &wait.wait, mode);
+               if (test_bit(wait.key.bit_nr, wait.key.flags))
+                       schedule();
+       } while (test_bit(wait.key.bit_nr, wait.key.flags));
+
+       finish_wait(&bit_wq, &wait.wait);
+}
+
+void __wait_on_bit_lock(void *word, int bit, unsigned mode)
+{
+       DEFINE_WAIT_BIT(wait, word, bit);
+
+       do {
+               prepare_to_wait_exclusive(&bit_wq, &wait.wait, mode);
+               if (!test_bit(wait.key.bit_nr, wait.key.flags))
+                       continue;
+               schedule();
+       } while (test_and_set_bit(wait.key.bit_nr, wait.key.flags));
+       finish_wait(&bit_wq, &wait.wait);
+}
+
+void complete(struct completion *x)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&x->wait.lock, flags);
+       x->done++;
+       __wake_up_locked(&x->wait, TASK_NORMAL, 1);
+       spin_unlock_irqrestore(&x->wait.lock, flags);
+}
+
+void wait_for_completion(struct completion *x)
+{
+       spin_lock_irq(&x->wait.lock);
+
+       if (!x->done) {
+               DECLARE_WAITQUEUE(wait, current);
+
+               __add_wait_queue_tail_exclusive(&x->wait, &wait);
+               do {
+                       __set_current_state(TASK_UNINTERRUPTIBLE);
+                       spin_unlock_irq(&x->wait.lock);
+
+                       schedule();
+                       spin_lock_irq(&x->wait.lock);
+               } while (!x->done);
+               __remove_wait_queue(&x->wait, &wait);
+               if (!x->done)
+                       goto out;
+       }
+       x->done--;
+out:
+       spin_unlock_irq(&x->wait.lock);
+}
diff --git a/linux/workqueue.c b/linux/workqueue.c
new file mode 100644 (file)
index 0000000..0d5af3f
--- /dev/null
@@ -0,0 +1,346 @@
+#include <pthread.h>
+
+#include <linux/kthread.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+static pthread_mutex_t wq_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t  work_finished = PTHREAD_COND_INITIALIZER;
+static LIST_HEAD(wq_list);
+
+struct workqueue_struct {
+       struct list_head        list;
+
+       struct work_struct      *current_work;
+       struct list_head        pending_work;
+
+       struct task_struct      *worker;
+       char                    name[24];
+};
+
+enum {
+       WORK_PENDING_BIT,
+};
+
+static bool work_pending(struct work_struct *work)
+{
+       return test_bit(WORK_PENDING_BIT, work_data_bits(work));
+}
+
+static void clear_work_pending(struct work_struct *work)
+{
+       clear_bit(WORK_PENDING_BIT, work_data_bits(work));
+}
+
+static bool set_work_pending(struct work_struct *work)
+{
+       return !test_and_set_bit(WORK_PENDING_BIT, work_data_bits(work));
+}
+
+static void __queue_work(struct workqueue_struct *wq,
+                        struct work_struct *work)
+{
+       BUG_ON(!work_pending(work));
+       BUG_ON(!list_empty(&work->entry));
+
+       list_add_tail(&work->entry, &wq->pending_work);
+       wake_up_process(wq->worker);
+}
+
+bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
+{
+       bool ret;
+
+       pthread_mutex_lock(&wq_lock);
+       if ((ret = set_work_pending(work)))
+               __queue_work(wq, work);
+       pthread_mutex_unlock(&wq_lock);
+
+       return ret;
+}
+
+void delayed_work_timer_fn(struct timer_list *timer)
+{
+       struct delayed_work *dwork =
+               container_of(timer, struct delayed_work, timer);
+
+       pthread_mutex_lock(&wq_lock);
+       __queue_work(dwork->wq, &dwork->work);
+       pthread_mutex_unlock(&wq_lock);
+}
+
+static void __queue_delayed_work(struct workqueue_struct *wq,
+                                struct delayed_work *dwork,
+                                unsigned long delay)
+{
+       struct timer_list *timer = &dwork->timer;
+       struct work_struct *work = &dwork->work;
+
+       BUG_ON(timer->function != delayed_work_timer_fn);
+       BUG_ON(timer_pending(timer));
+       BUG_ON(!list_empty(&work->entry));
+
+       if (!delay) {
+               __queue_work(wq, &dwork->work);
+       } else {
+               dwork->wq = wq;
+               timer->expires = jiffies + delay;
+               add_timer(timer);
+       }
+}
+
+bool queue_delayed_work(struct workqueue_struct *wq,
+                       struct delayed_work *dwork,
+                       unsigned long delay)
+{
+       struct work_struct *work = &dwork->work;
+       bool ret;
+
+       pthread_mutex_lock(&wq_lock);
+       if ((ret = set_work_pending(work)))
+               __queue_delayed_work(wq, dwork, delay);
+       pthread_mutex_unlock(&wq_lock);
+
+       return ret;
+}
+
+static bool grab_pending(struct work_struct *work, bool is_dwork)
+{
+retry:
+       if (set_work_pending(work)) {
+               BUG_ON(!list_empty(&work->entry));
+               return false;
+       }
+
+       if (is_dwork) {
+               struct delayed_work *dwork = to_delayed_work(work);
+
+               if (likely(del_timer(&dwork->timer))) {
+                       BUG_ON(!list_empty(&work->entry));
+                       return true;
+               }
+       }
+
+       if (!list_empty(&work->entry)) {
+               list_del_init(&work->entry);
+               return true;
+       }
+
+       BUG_ON(!is_dwork);
+
+       pthread_mutex_unlock(&wq_lock);
+       flush_timers();
+       pthread_mutex_lock(&wq_lock);
+       goto retry;
+}
+
+static bool work_running(struct work_struct *work)
+{
+       struct workqueue_struct *wq;
+
+       list_for_each_entry(wq, &wq_list, list)
+               if (wq->current_work == work)
+                       return true;
+
+       return false;
+}
+
+bool flush_work(struct work_struct *work)
+{
+       bool ret = false;
+
+       pthread_mutex_lock(&wq_lock);
+       while (work_pending(work) || work_running(work)) {
+               pthread_cond_wait(&work_finished, &wq_lock);
+               ret = true;
+       }
+       pthread_mutex_unlock(&wq_lock);
+
+       return ret;
+}
+
+static bool __flush_work(struct work_struct *work)
+{
+       bool ret = false;
+
+       while (work_running(work)) {
+               pthread_cond_wait(&work_finished, &wq_lock);
+               ret = true;
+       }
+
+       return ret;
+}
+
+bool cancel_work_sync(struct work_struct *work)
+{
+       bool ret;
+
+       pthread_mutex_lock(&wq_lock);
+       ret = grab_pending(work, false);
+
+       __flush_work(work);
+       clear_work_pending(work);
+       pthread_mutex_unlock(&wq_lock);
+
+       return ret;
+}
+
+bool mod_delayed_work(struct workqueue_struct *wq,
+                     struct delayed_work *dwork,
+                     unsigned long delay)
+{
+       struct work_struct *work = &dwork->work;
+       bool ret;
+
+       pthread_mutex_lock(&wq_lock);
+       ret = grab_pending(work, true);
+
+       __queue_delayed_work(wq, dwork, delay);
+       pthread_mutex_unlock(&wq_lock);
+
+       return ret;
+}
+
+bool cancel_delayed_work(struct delayed_work *dwork)
+{
+       struct work_struct *work = &dwork->work;
+       bool ret;
+
+       pthread_mutex_lock(&wq_lock);
+       ret = grab_pending(work, true);
+
+       clear_work_pending(&dwork->work);
+       pthread_mutex_unlock(&wq_lock);
+
+       return ret;
+}
+
+bool cancel_delayed_work_sync(struct delayed_work *dwork)
+{
+       struct work_struct *work = &dwork->work;
+       bool ret;
+
+       pthread_mutex_lock(&wq_lock);
+       ret = grab_pending(work, true);
+
+       __flush_work(work);
+       clear_work_pending(work);
+       pthread_mutex_unlock(&wq_lock);
+
+       return ret;
+}
+
+static int worker_thread(void *arg)
+{
+       struct workqueue_struct *wq = arg;
+       struct work_struct *work;
+
+       pthread_mutex_lock(&wq_lock);
+       while (1) {
+               __set_current_state(TASK_INTERRUPTIBLE);
+               work = list_first_entry_or_null(&wq->pending_work,
+                               struct work_struct, entry);
+               wq->current_work = work;
+
+               if (kthread_should_stop()) {
+                       BUG_ON(wq->current_work);
+                       break;
+               }
+
+               if (!work) {
+                       pthread_mutex_unlock(&wq_lock);
+                       schedule();
+                       pthread_mutex_lock(&wq_lock);
+                       continue;
+               }
+
+               BUG_ON(!work_pending(work));
+               list_del_init(&work->entry);
+               clear_work_pending(work);
+
+               pthread_mutex_unlock(&wq_lock);
+               work->func(work);
+               pthread_mutex_lock(&wq_lock);
+
+               pthread_cond_broadcast(&work_finished);
+       }
+       pthread_mutex_unlock(&wq_lock);
+
+       return 0;
+}
+
+void destroy_workqueue(struct workqueue_struct *wq)
+{
+       kthread_stop(wq->worker);
+
+       pthread_mutex_lock(&wq_lock);
+       list_del(&wq->list);
+       pthread_mutex_unlock(&wq_lock);
+
+       kfree(wq);
+}
+
+struct workqueue_struct *alloc_workqueue(const char *fmt,
+                                        unsigned flags,
+                                        int max_active,
+                                        ...)
+{
+       va_list args;
+       struct workqueue_struct *wq;
+
+       wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+       if (!wq)
+               return NULL;
+
+       INIT_LIST_HEAD(&wq->list);
+       INIT_LIST_HEAD(&wq->pending_work);
+
+       va_start(args, max_active);
+       vsnprintf(wq->name, sizeof(wq->name), fmt, args);
+       va_end(args);
+
+       wq->worker = kthread_run(worker_thread, wq, "%s", wq->name);
+       if (IS_ERR(wq->worker)) {
+               kfree(wq);
+               return NULL;
+       }
+
+       pthread_mutex_lock(&wq_lock);
+       list_add(&wq->list, &wq_list);
+       pthread_mutex_unlock(&wq_lock);
+
+       return wq;
+}
+
+struct workqueue_struct *system_wq;
+struct workqueue_struct *system_highpri_wq;
+struct workqueue_struct *system_long_wq;
+struct workqueue_struct *system_unbound_wq;
+struct workqueue_struct *system_freezable_wq;
+
+__attribute__((constructor(102)))
+static void wq_init(void)
+{
+       system_wq = alloc_workqueue("events", 0, 0);
+       system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
+       system_long_wq = alloc_workqueue("events_long", 0, 0);
+       system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
+                                           WQ_UNBOUND_MAX_ACTIVE);
+       system_freezable_wq = alloc_workqueue("events_freezable",
+                                             WQ_FREEZABLE, 0);
+       BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
+              !system_unbound_wq || !system_freezable_wq);
+}
+
+__attribute__((destructor(102)))
+static void wq_cleanup(void)
+{
+       destroy_workqueue(system_freezable_wq);
+       destroy_workqueue(system_unbound_wq);
+       destroy_workqueue(system_long_wq);
+       destroy_workqueue(system_highpri_wq);
+       destroy_workqueue(system_wq);
+
+       system_wq = system_highpri_wq = system_long_wq = system_unbound_wq =
+               system_freezable_wq = NULL;
+}
diff --git a/mkfs.bcachefs b/mkfs.bcachefs
new file mode 100755 (executable)
index 0000000..b3631ba
--- /dev/null
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+SDIR="$(readlink -f "$0")"
+exec "${SDIR%/*}/bcachefs" format "$@"
diff --git a/mount.bcachefs.sh b/mount.bcachefs.sh
new file mode 100755 (executable)
index 0000000..b75fbf8
--- /dev/null
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+join_by()
+{
+    local IFS="$1"
+    shift
+    echo "$*"
+}
+
+args=$(getopt -u -o 'sfnvo:t:N:' -n 'mount.bcachefs' -- "$@")
+if [ $? -ne 0 ]; then
+    echo 'Terminating...' >&2
+    exit 1
+fi
+
+read -r -a argv <<< "$args"
+
+for i in ${!argv[@]}; do
+    [[ ${argv[$i]} == '--' ]] && break
+done
+
+i=$((i+1))
+
+if [[ $((i + 2)) < ${#argv[@]} ]]; then
+    echo "Insufficient arguments"
+    exit 1
+fi
+
+UUID=${argv[$i]}
+
+if [[ ${UUID//-/} =~ ^[[:xdigit:]]{32}$ ]]; then
+    PARTS=()
+
+    for part in $(tail -n +3 /proc/partitions|awk '{print $4}'); do
+       uuid_line=$(bcachefs show-super /dev/$part|& head -n1)
+
+       if [[ $uuid_line =~ $UUID ]]; then
+           PARTS+=(/dev/$part)
+       fi
+    done
+
+    if [[ ${#PARTS[@]} == 0 ]]; then
+       echo "uuid $UUID not found"
+       exit 1
+    fi
+
+    argv[$i]=$(join_by : "${PARTS[@]}")
+fi
+
+exec mount -i -t bcachefs ${argv[@]}
diff --git a/mount/Cargo.lock b/mount/Cargo.lock
new file mode 100644 (file)
index 0000000..77ccbba
--- /dev/null
@@ -0,0 +1,695 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+[[package]]
+name = "aho-corasick"
+version = "0.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "ansi_term"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9a60d744a80c30fcb657dfe2c1b22bcb3e814c1a1e3674f32bf5820b570fbff"
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d"
+
+[[package]]
+name = "bcachefs-mount"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "bindgen",
+ "bitfield",
+ "byteorder",
+ "clap",
+ "either",
+ "env_logger",
+ "errno",
+ "gag",
+ "getset",
+ "itertools",
+ "libc",
+ "log",
+ "memoffset",
+ "parse-display",
+ "pkg-config",
+ "rpassword",
+ "structopt",
+ "udev",
+ "uuid",
+]
+
+[[package]]
+name = "bindgen"
+version = "0.53.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6bb26d6a69a335b8cb0e7c7e9775cd5666611dc50a37177c3f2cedcfc040e8c8"
+dependencies = [
+ "bitflags",
+ "cexpr",
+ "cfg-if",
+ "clang-sys",
+ "lazy_static",
+ "lazycell",
+ "peeking_take_while",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+]
+
+[[package]]
+name = "bitfield"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46afbd2983a5d5a7bd740ccb198caf5b82f45c40c09c0eed36052d91cb92e719"
+
+[[package]]
+name = "bitflags"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
+
+[[package]]
+name = "byteorder"
+version = "1.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
+
+[[package]]
+name = "cexpr"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27"
+dependencies = [
+ "nom",
+]
+
+[[package]]
+name = "cfg-if"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
+
+[[package]]
+name = "clang-sys"
+version = "0.29.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe6837df1d5cba2397b835c8530f51723267e16abbf83892e9e5af4f0e5dd10a"
+dependencies = [
+ "glob",
+ "libc",
+]
+
+[[package]]
+name = "clap"
+version = "2.33.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
+dependencies = [
+ "ansi_term",
+ "atty",
+ "bitflags",
+ "strsim",
+ "term_size",
+ "textwrap",
+ "unicode-width",
+ "vec_map",
+]
+
+[[package]]
+name = "either"
+version = "1.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
+
+[[package]]
+name = "env_logger"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36"
+dependencies = [
+ "log",
+]
+
+[[package]]
+name = "errno"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b480f641ccf0faf324e20c1d3e53d81b7484c698b42ea677f6907ae4db195371"
+dependencies = [
+ "errno-dragonfly",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "errno-dragonfly"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14ca354e36190500e1e1fb267c647932382b54053c50b14970856c0b00a35067"
+dependencies = [
+ "gcc",
+ "libc",
+]
+
+[[package]]
+name = "gag"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8cc0b9f53275dc5fada808f1d2f82e3688a6c14d735633d1590b7be8eb2307b5"
+dependencies = [
+ "libc",
+ "tempfile",
+]
+
+[[package]]
+name = "gcc"
+version = "0.3.55"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f5f3913fa0bfe7ee1fd8248b6b9f42a5af4b9d65ec2dd2c3c26132b950ecfc2"
+
+[[package]]
+name = "getrandom"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "getset"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f62a139c59ae846c3964c392f12aac68f1997d1a40e9d3b40a89a4ab553e04a0"
+dependencies = [
+ "proc-macro-error 0.4.12",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
+
+[[package]]
+name = "heck"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
+dependencies = [
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "itertools"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "lazycell"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b294d6fa9ee409a054354afc4352b0b9ef7ca222c69b8812cbea9e7d2bf3783f"
+
+[[package]]
+name = "libc"
+version = "0.2.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "99e85c08494b21a9054e7fe1374a732aeadaff3980b6990b94bfd3a70f690005"
+
+[[package]]
+name = "libudev-sys"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c8469b4a23b962c1396b9b451dda50ef5b283e8dd309d69033475fa9b334324"
+dependencies = [
+ "libc",
+ "pkg-config",
+]
+
+[[package]]
+name = "log"
+version = "0.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "memchr"
+version = "2.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
+
+[[package]]
+name = "memoffset"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4fc2c02a7e374099d4ee95a193111f72d2110197fe200272371758f6c3643d8"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "nom"
+version = "5.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b471253da97532da4b61552249c521e01e736071f71c1a4f7ebbfbf0a06aad6"
+dependencies = [
+ "memchr",
+ "version_check",
+]
+
+[[package]]
+name = "parse-display"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "718b422bc6b056b6374f7ffc3b2d9b55180a4af59a089835df1963994676d8b6"
+dependencies = [
+ "lazy_static",
+ "parse-display-derive",
+ "regex",
+]
+
+[[package]]
+name = "parse-display-derive"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7cf2deb364a60cc0f633c1ffe619b42463993c91352ae367010b8420e442655"
+dependencies = [
+ "lazy_static",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "regex-syntax",
+ "syn",
+]
+
+[[package]]
+name = "peeking_take_while"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05da548ad6865900e60eaba7f589cc0783590a92e940c26953ff81ddbab2d677"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"
+
+[[package]]
+name = "proc-macro-error"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18f33027081eba0a6d8aba6d1b1c3a3be58cbb12106341c2d5759fcd9b5277e7"
+dependencies = [
+ "proc-macro-error-attr 0.4.12",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro-error"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "98e9e4b82e0ef281812565ea4751049f1bdcdfccda7d3f459f2e138a40c08678"
+dependencies = [
+ "proc-macro-error-attr 1.0.2",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro-error-attr"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a5b4b77fdb63c1eca72173d68d24501c54ab1269409f6b672c85deb18af69de"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "syn-mid",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro-error-attr"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f5444ead4e9935abd7f27dc51f7e852a0569ac888096d5ec2499470794e2e53"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "syn-mid",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8872cf6f48eee44265156c111456a700ab3483686b3f96df4cf5481c89157319"
+dependencies = [
+ "unicode-xid",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c1f4b0efa5fc5e8ceb705136bfee52cfdb6a4e3509f770b478cd6ed434232a7"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
+dependencies = [
+ "getrandom",
+ "libc",
+ "rand_chacha",
+ "rand_core",
+ "rand_hc",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rand_hc"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
+dependencies = [
+ "rand_core",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.1.56"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
+
+[[package]]
+name = "regex"
+version = "1.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6020f034922e3194c711b82a627453881bc4682166cabb07134a10c26ba7692"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+ "thread_local",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae"
+
+[[package]]
+name = "remove_dir_all"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "rpassword"
+version = "4.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "99371657d3c8e4d816fb6221db98fa408242b0b53bac08f8676a41f8554fe99f"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
+[[package]]
+name = "shlex"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2"
+
+[[package]]
+name = "strsim"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
+
+[[package]]
+name = "structopt"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef"
+dependencies = [
+ "clap",
+ "lazy_static",
+ "structopt-derive",
+]
+
+[[package]]
+name = "structopt-derive"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d239ca4b13aee7a2142e6795cbd69e457665ff8037aed33b3effdc430d2f927a"
+dependencies = [
+ "heck",
+ "proc-macro-error 1.0.2",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "410a7488c0a728c7ceb4ad59b9567eb4053d02e8cc7f5c0e0eeeb39518369213"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
+[[package]]
+name = "syn-mid"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7be3539f6c128a931cf19dcee741c1af532c7fd387baa739c03dd2e96479338a"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tempfile"
+version = "3.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "rand",
+ "redox_syscall",
+ "remove_dir_all",
+ "winapi",
+]
+
+[[package]]
+name = "term_size"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e4129646ca0ed8f45d09b929036bafad5377103edd06e50bf574b353d2b08d9"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
+dependencies = [
+ "term_size",
+ "unicode-width",
+]
+
+[[package]]
+name = "thread_local"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
+dependencies = [
+ "lazy_static",
+]
+
+[[package]]
+name = "udev"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24953d50a3bce0f5f5a9a2766567072dc9af8096f8c40ea81815da651066bc9f"
+dependencies = [
+ "libc",
+ "libudev-sys",
+]
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
+
+[[package]]
+name = "uuid"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fde2f6a4bea1d6e007c4ad38c6839fa71cbb63b6dbf5b595aa38dc9b1093c11"
+
+[[package]]
+name = "vec_map"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
+
+[[package]]
+name = "version_check"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "078775d0255232fb988e6fccf26ddc9d1ac274299aaedcedce21c6f72cc533ce"
+
+[[package]]
+name = "wasi"
+version = "0.9.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
+
+[[package]]
+name = "winapi"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
diff --git a/mount/Cargo.toml b/mount/Cargo.toml
new file mode 100644 (file)
index 0000000..4fd0d49
--- /dev/null
@@ -0,0 +1,34 @@
+[package]
+name = "bcachefs-mount"
+version = "0.1.0"
+authors = ["Yuxuan Shui <yshuiv7@gmail.com>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+log = "0.4"
+clap = { version = "2.33", features = [ "wrap_help" ] }
+env_logger = { version = "0.7", default-features = false }
+anyhow = "1.0"
+udev = "0.4"
+uuid = "0.8"
+libc = "0.2.69"
+gag = "0.1"
+bitfield = "0.13"
+memoffset = "0.5"
+getset = "0.1"
+itertools = "0.9"
+structopt = "0.3"
+parse-display = "0.1"
+errno = "0.2"
+either = "1.5"
+rpassword = "4"
+byteorder = "1.3"
+
+[lib]
+crate-type = ["staticlib"]
+
+[build-dependencies]
+pkg-config = "0.3"
+bindgen = { version = "0.53", default-features = false }
diff --git a/mount/build.rs b/mount/build.rs
new file mode 100644 (file)
index 0000000..6542889
--- /dev/null
@@ -0,0 +1,67 @@
+fn main() {
+       use std::path::PathBuf;
+       use std::process::Command;
+
+       let out_dir: PathBuf = std::env::var_os("OUT_DIR").unwrap().into();
+       let top_dir: PathBuf = std::env::var_os("CARGO_MANIFEST_DIR").unwrap().into();
+       let libbcachefs_inc_dir = std::env::var("LIBBCACHEFS_INCLUDE")
+               .unwrap_or_else(|_| top_dir.join("libbcachefs").display().to_string());
+       let libbcachefs_inc_dir = std::path::Path::new(&libbcachefs_inc_dir);
+       println!("{}", libbcachefs_inc_dir.display());
+
+       let libbcachefs_dir = top_dir.join("libbcachefs").join("libbcachefs");
+       let bindings = bindgen::builder()
+               .header(top_dir
+                       .join("src")
+                       .join("libbcachefs_wrapper.h")
+                       .display()
+                       .to_string())
+               .clang_arg(format!(
+                       "-I{}",
+                       libbcachefs_inc_dir.join("include").display()
+               ))
+               .clang_arg(format!("-I{}", libbcachefs_inc_dir.display()))
+               .clang_arg("-DZSTD_STATIC_LINKING_ONLY")
+               .clang_arg("-DNO_BCACHEFS_FS")
+               .clang_arg("-D_GNU_SOURCE")
+               .derive_debug(false)
+               .derive_default(true)
+               .default_enum_style(bindgen::EnumVariation::Rust {
+                       non_exhaustive: true,
+               })
+               .whitelist_function("bch2_read_super")
+               .whitelist_function("bch2_sb_field_.*")
+               .whitelist_function("bch2_chacha_encrypt_key")
+               .whitelist_function("derive_passphrase")
+               .whitelist_function("request_key")
+               .whitelist_function("add_key")
+               .whitelist_function("keyctl_search")
+               .whitelist_var("BCH_.*")
+               .whitelist_var("KEY_SPEC_.*")
+               .whitelist_type("bch_kdf_types")
+               .whitelist_type("bch_sb_field_.*")
+               .whitelist_type("bch_encrypted_key")
+               .whitelist_type("nonce")
+               .rustified_enum("bch_kdf_types")
+               .opaque_type("gendisk")
+               .opaque_type("bkey")
+               .generate()
+               .unwrap();
+       bindings.write_to_file(out_dir.join("bcachefs.rs")).unwrap();
+
+       let keyutils = pkg_config::probe_library("libkeyutils").unwrap();
+       let bindings = bindgen::builder()
+               .header(top_dir
+                       .join("src")
+                       .join("keyutils_wrapper.h")
+                       .display()
+                       .to_string())
+               .clang_args(
+                       keyutils.include_paths
+                               .iter()
+                               .map(|p| format!("-I{}", p.display())),
+               )
+               .generate()
+               .unwrap();
+       bindings.write_to_file(out_dir.join("keyutils.rs")).unwrap();
+}
diff --git a/mount/src/filesystem.rs b/mount/src/filesystem.rs
new file mode 100644 (file)
index 0000000..36af8c0
--- /dev/null
@@ -0,0 +1,174 @@
+extern "C" {
+       pub static stdout: *mut libc::FILE;
+}
+
+use getset::{CopyGetters, Getters};
+use std::path::PathBuf;
+#[derive(Getters, CopyGetters)]
+pub struct FileSystem {
+       /// External UUID of the bcachefs
+       #[getset(get = "pub")]
+       uuid: uuid::Uuid,
+       /// Whether filesystem is encrypted
+       #[getset(get_copy = "pub")]
+       encrypted: bool,
+       /// Super block
+       #[getset(get = "pub")]
+       sb: bcachefs::bch_sb_handle,
+       /// Member devices for this filesystem
+       #[getset(get = "pub")]
+       devices: Vec<PathBuf>,
+}
+
+/// Parse a comma-separated mount options and split out mountflags and filesystem
+/// specific options.
+fn parse_mount_options(options: impl AsRef<str>) -> (Option<String>, u64) {
+       use either::Either::*;
+       let (opts, flags) = options
+               .as_ref()
+               .split(",")
+               .map(|o| match o {
+                       "dirsync" => Left(libc::MS_DIRSYNC),
+                       "lazytime" => Left(1 << 25), // MS_LAZYTIME
+                       "mand" => Left(libc::MS_MANDLOCK),
+                       "noatime" => Left(libc::MS_NOATIME),
+                       "nodev" => Left(libc::MS_NODEV),
+                       "nodiratime" => Left(libc::MS_NODIRATIME),
+                       "noexec" => Left(libc::MS_NOEXEC),
+                       "nosuid" => Left(libc::MS_NOSUID),
+                       "ro" => Left(libc::MS_RDONLY),
+                       "rw" => Left(0),
+                       "relatime" => Left(libc::MS_RELATIME),
+                       "strictatime" => Left(libc::MS_STRICTATIME),
+                       "sync" => Left(libc::MS_SYNCHRONOUS),
+                       "" => Left(0),
+                       o @ _ => Right(o),
+               })
+               .fold((Vec::new(), 0), |(mut opts, flags), next| match next {
+                       Left(f) => (opts, flags | f),
+                       Right(o) => {
+                               opts.push(o);
+                               (opts, flags)
+                       }
+               });
+
+       use itertools::Itertools;
+       (
+               if opts.len() == 0 {
+                       None
+               } else {
+                       Some(opts.iter().join(","))
+               },
+               flags,
+       )
+}
+
+impl FileSystem {
+       pub(crate) fn new(sb: bcachefs::bch_sb_handle) -> Self {
+               Self {
+                       uuid: sb.sb().uuid(),
+                       encrypted: sb.sb().crypt().is_some(),
+                       sb: sb,
+                       devices: vec![],
+               }
+       }
+
+       pub fn mount(
+               &self,
+               target: impl AsRef<std::path::Path>,
+               options: impl AsRef<str>,
+       ) -> anyhow::Result<()> {
+               use itertools::Itertools;
+               use std::ffi::c_void;
+               use std::os::raw::c_char;
+               use std::os::unix::ffi::OsStrExt;
+               let src = self.devices.iter().map(|d| d.display()).join(":");
+               let (data, mountflags) = parse_mount_options(options);
+               let fstype = c_str!("bcachefs");
+
+               let src = std::ffi::CString::new(src)?; // bind the CString to keep it alive
+               let target = std::ffi::CString::new(target.as_ref().as_os_str().as_bytes())?; // ditto
+               let data = data.map(|data| std::ffi::CString::new(data)).transpose()?; // ditto
+
+               let src = src.as_c_str().to_bytes_with_nul().as_ptr() as *const c_char;
+               let target = target.as_c_str().to_bytes_with_nul().as_ptr() as *const c_char;
+               let data = data.as_ref().map_or(std::ptr::null(), |data| {
+                       data.as_c_str().to_bytes_with_nul().as_ptr() as *const c_void
+               });
+
+               let ret = unsafe { libc::mount(src, target, fstype, mountflags, data) };
+               if ret == 0 {
+                       Ok(())
+               } else {
+                       Err(crate::ErrnoError(errno::errno()).into())
+               }
+       }
+}
+
+use crate::bcachefs;
+use std::collections::HashMap;
+use uuid::Uuid;
+pub fn probe_filesystems() -> anyhow::Result<HashMap<Uuid, FileSystem>> {
+       use std::os::unix::ffi::OsStrExt;
+       let mut udev = udev::Enumerator::new()?;
+       let mut fss = HashMap::new();
+       udev.match_subsystem("block")?;
+
+       {
+               // Stop libbcachefs from spamming the output
+               let _gag = gag::Gag::stdout().unwrap();
+               for dev in udev.scan_devices()? {
+                       if let Some(p) = dev.devnode() {
+                               let path =
+                                       std::ffi::CString::new(p.as_os_str().as_bytes()).unwrap();
+                               let result = unsafe {
+                                       let mut opts = std::mem::MaybeUninit::zeroed();
+                                       let mut sb = std::mem::MaybeUninit::zeroed();
+                                       let ret = bcachefs::bch2_read_super(
+                                               path.as_ptr(),
+                                               opts.as_mut_ptr(),
+                                               sb.as_mut_ptr(),
+                                       );
+                                       if ret == -libc::EACCES {
+                                               Err(std::io::Error::new(
+                                                       std::io::ErrorKind::PermissionDenied,
+                                                       "no permission",
+                                               ))
+                                       } else if ret != 0 {
+                                               Err(std::io::Error::new(
+                                                       std::io::ErrorKind::Other,
+                                                       "failed to read super",
+                                               ))
+                                       } else {
+                                               Ok((opts.assume_init(), sb.assume_init()))
+                                       }
+                               };
+                               match result {
+                                       Ok((_, sb)) => match fss.get_mut(&sb.sb().uuid()) {
+                                               None => {
+                                                       let mut fs = FileSystem::new(sb);
+                                                       fs.devices.push(p.to_owned());
+                                                       fss.insert(fs.uuid, fs);
+                                               }
+                                               Some(fs) => {
+                                                       fs.devices.push(p.to_owned());
+                                               }
+                                       },
+                                       Err(e) if e.kind()
+                                               != std::io::ErrorKind::PermissionDenied =>
+                                       {
+                                               ()
+                                       }
+                                       e @ Err(_) => {
+                                               e?;
+                                       }
+                               }
+                       }
+               }
+               // Flush stdout so buffered output don't get printed after we remove the gag
+               unsafe {
+                       libc::fflush(stdout);
+               }
+       }
+       Ok(fss)
+}
diff --git a/mount/src/key.rs b/mount/src/key.rs
new file mode 100644 (file)
index 0000000..6769f52
--- /dev/null
@@ -0,0 +1,96 @@
+use log::info;
+
+fn check_for_key(key_name: &std::ffi::CStr) -> anyhow::Result<bool> {
+       use crate::keyutils::{self, keyctl_search};
+       let key_name = key_name.to_bytes_with_nul().as_ptr() as *const _;
+       let key_type = c_str!("logon");
+
+       let key_id =
+               unsafe { keyctl_search(keyutils::KEY_SPEC_USER_KEYRING, key_type, key_name, 0) };
+       if key_id > 0 {
+               info!("Key has became avaiable");
+               Ok(true)
+       } else if errno::errno().0 != libc::ENOKEY {
+               Err(crate::ErrnoError(errno::errno()).into())
+       } else {
+               Ok(false)
+       }
+}
+
+fn wait_for_key(uuid: &uuid::Uuid) -> anyhow::Result<()> {
+       let key_name = std::ffi::CString::new(format!("bcachefs:{}", uuid)).unwrap();
+       loop {
+               if check_for_key(&key_name)? {
+                       break Ok(());
+               }
+
+               std::thread::sleep(std::time::Duration::from_secs(1));
+       }
+}
+
+const BCH_KEY_MAGIC: &str = "bch**key";
+use crate::filesystem::FileSystem;
+fn ask_for_key(fs: &FileSystem) -> anyhow::Result<()> {
+       use crate::bcachefs::{self, bch2_chacha_encrypt_key, bch_encrypted_key, bch_key};
+       use anyhow::anyhow;
+       use byteorder::{LittleEndian, ReadBytesExt};
+       use std::os::raw::c_char;
+
+       let key_name = std::ffi::CString::new(format!("bcachefs:{}", fs.uuid())).unwrap();
+       if check_for_key(&key_name)? {
+               return Ok(());
+       }
+
+       let bch_key_magic = BCH_KEY_MAGIC.as_bytes().read_u64::<LittleEndian>().unwrap();
+       let crypt = fs.sb().sb().crypt().unwrap();
+       let pass = rpassword::read_password_from_tty(Some("Enter passphrase: "))?;
+       let pass = std::ffi::CString::new(pass.trim_end())?; // bind to keep the CString alive
+       let mut output: bch_key = unsafe {
+               bcachefs::derive_passphrase(
+                       crypt as *const _ as *mut _,
+                       pass.as_c_str().to_bytes_with_nul().as_ptr() as *const _,
+               )
+       };
+
+       let mut key = crypt.key().clone();
+       let ret = unsafe {
+               bch2_chacha_encrypt_key(
+                       &mut output as *mut _,
+                       fs.sb().sb().nonce(),
+                       &mut key as *mut _ as *mut _,
+                       std::mem::size_of::<bch_encrypted_key>() as u64,
+               )
+       };
+       if ret != 0 {
+               Err(anyhow!("chache decryption failure"))
+       } else if key.magic != bch_key_magic {
+               Err(anyhow!("failed to verify the password"))
+       } else {
+               let key_type = c_str!("logon");
+               let ret = unsafe {
+                       crate::keyutils::add_key(
+                               key_type,
+                               key_name.as_c_str().to_bytes_with_nul() as *const _
+                                       as *const c_char,
+                               &output as *const _ as *const _,
+                               std::mem::size_of::<bch_key>() as u64,
+                               crate::keyutils::KEY_SPEC_USER_KEYRING,
+                       )
+               };
+               if ret == -1 {
+                       Err(anyhow!("failed to add key to keyring: {}", errno::errno()))
+               } else {
+                       Ok(())
+               }
+       }
+}
+
+pub(crate) fn prepare_key(fs: &FileSystem, password: crate::KeyLocation) -> anyhow::Result<()> {
+       use crate::KeyLocation::*;
+       use anyhow::anyhow;
+       match password {
+               Fail => Err(anyhow!("no key available")),
+               Wait => Ok(wait_for_key(fs.uuid())?),
+               Ask => ask_for_key(fs),
+       }
+}
diff --git a/mount/src/keyutils_wrapper.h b/mount/src/keyutils_wrapper.h
new file mode 100644 (file)
index 0000000..857cee2
--- /dev/null
@@ -0,0 +1 @@
+#include <keyutils.h>
diff --git a/mount/src/lib.rs b/mount/src/lib.rs
new file mode 100644 (file)
index 0000000..751eab3
--- /dev/null
@@ -0,0 +1,190 @@
+use structopt::StructOpt;
+use anyhow::anyhow;
+
+#[macro_export]
+macro_rules! c_str {
+       ($lit:expr) => {
+               unsafe { std::ffi::CStr::from_ptr(concat!($lit, "\0").as_ptr() as *const std::os::raw::c_char)
+                              .to_bytes_with_nul()
+                              .as_ptr() as *const std::os::raw::c_char }
+       };
+}
+
+#[derive(Debug)]
+struct ErrnoError(errno::Errno);
+impl std::fmt::Display for ErrnoError {
+       fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+               self.0.fmt(f)
+       }
+}
+impl std::error::Error for ErrnoError {}
+
+#[derive(Debug)]
+pub(crate) enum KeyLocation {
+       Fail,
+       Wait,
+       Ask,
+}
+
+impl std::str::FromStr for KeyLocation {
+       type Err = anyhow::Error;
+       fn from_str(s: &str) -> anyhow::Result<Self> {
+               use anyhow::anyhow;
+               match s {
+                       "fail" => Ok(Self::Fail),
+                       "wait" => Ok(Self::Wait),
+                       "ask" => Ok(Self::Ask),
+                       _ => Err(anyhow!("invalid password option"))
+               }
+       }
+}
+
+#[derive(StructOpt, Debug)]
+/// Mount a bcachefs filesystem by its UUID.
+struct Options {
+       /// Where the password would be loaded from.
+       ///
+       /// Possible values are:
+       /// "fail" - don't ask for password, fail if filesystem is encrypted;
+       /// "wait" - wait for password to become available before mounting;
+       /// "ask" -  prompt the user for password;
+       #[structopt(short, long, default_value = "fail")]
+       key_location: KeyLocation,
+
+       /// External UUID of the bcachefs filesystem
+       uuid: uuid::Uuid,
+
+       /// Where the filesystem should be mounted. If not set, then the filesystem
+       /// won't actually be mounted. But all steps preceeding mounting the
+       /// filesystem (e.g. asking for passphrase) will still be performed.
+       mountpoint: Option<std::path::PathBuf>,
+
+       /// Mount options
+       #[structopt(short, default_value = "")]
+       options: String,
+}
+
+mod filesystem;
+mod key;
+mod keyutils {
+       #![allow(non_upper_case_globals)]
+       #![allow(non_camel_case_types)]
+       #![allow(non_snake_case)]
+       #![allow(unused)]
+
+       include!(concat!(env!("OUT_DIR"), "/keyutils.rs"));
+}
+
+mod bcachefs {
+       #![allow(non_upper_case_globals)]
+       #![allow(non_camel_case_types)]
+       #![allow(non_snake_case)]
+       #![allow(unused)]
+
+       include!(concat!(env!("OUT_DIR"), "/bcachefs.rs"));
+
+       use bitfield::bitfield;
+       bitfield! {
+               pub struct bch_scrypt_flags(u64);
+               pub N, _: 15, 0;
+               pub R, _: 31, 16;
+               pub P, _: 47, 32;
+       }
+       bitfield! {
+               pub struct bch_crypt_flags(u64);
+               TYPE, _: 4, 0;
+       }
+       use memoffset::offset_of;
+       impl bch_sb_field_crypt {
+               pub fn scrypt_flags(&self) -> Option<bch_scrypt_flags> {
+                       let t = bch_crypt_flags(self.flags);
+                       if t.TYPE() != bch_kdf_types::BCH_KDF_SCRYPT as u64 {
+                               None
+                       } else {
+                               Some(bch_scrypt_flags(self.kdf_flags))
+                       }
+               }
+               pub fn key(&self) -> &bch_encrypted_key {
+                       &self.key
+               }
+       }
+       impl bch_sb {
+               pub fn crypt(&self) -> Option<&bch_sb_field_crypt> {
+                       unsafe {
+                               let ptr = bch2_sb_field_get(
+                                       self as *const _ as *mut _,
+                                       bch_sb_field_type::BCH_SB_FIELD_crypt,
+                               ) as *const u8;
+                               if ptr.is_null() {
+                                       None
+                               } else {
+                                       let offset = offset_of!(bch_sb_field_crypt, field);
+                                       Some(&*((ptr.sub(offset)) as *const _))
+                               }
+                       }
+               }
+               pub fn uuid(&self) -> uuid::Uuid {
+                       uuid::Uuid::from_bytes(self.user_uuid.b)
+               }
+
+               /// Get the nonce used to encrypt the superblock
+               pub fn nonce(&self) -> nonce {
+                       use byteorder::{ReadBytesExt, LittleEndian};
+                       let mut internal_uuid = &self.uuid.b[..];
+                       let dword1 = internal_uuid.read_u32::<LittleEndian>().unwrap();
+                       let dword2 = internal_uuid.read_u32::<LittleEndian>().unwrap();
+                       nonce { d: [0, 0, dword1, dword2] }
+               }
+       }
+       impl bch_sb_handle {
+               pub fn sb(&self) -> &bch_sb {
+                       unsafe { &*self.sb }
+               }
+       }
+}
+
+fn main_inner() -> anyhow::Result<()> {
+       use itertools::Itertools;
+       use log::{info, trace};
+
+       env_logger::init();
+       let opt = Options::from_args();
+       trace!("{:?}", opt);
+
+       let fss = filesystem::probe_filesystems()?;
+       info!("Found {} bcachefs filesystems: ", fss.len());
+       for fs in fss.values() {
+               info!(
+                       "{} ({}): {}",
+                       fs.uuid(),
+                       if fs.encrypted() {
+                               "encrypted"
+                       } else {
+                               "unencrypted"
+                       },
+                       fs.devices().iter().map(|d| d.display()).join(" ")
+               );
+       }
+
+       if let Some(fs) = fss.get(&opt.uuid) {
+               if fs.encrypted() {
+                       info!("Making sure key is loaded for this filesystem");
+                       key::prepare_key(&fs, opt.key_location)?;
+               }
+
+               if let Some(p) = opt.mountpoint {
+                       fs.mount(&p, &opt.options)
+               } else {
+                       Ok(())
+               }
+       } else {
+               Err(anyhow!("Filesystem {} is not found", opt.uuid))
+       }
+}
+
+#[no_mangle]
+pub extern "C" fn main() {
+       if let Err(e) = main_inner() {
+               println!("Error: {:?}", e);
+       }
+}
diff --git a/mount/src/libbcachefs_wrapper.h b/mount/src/libbcachefs_wrapper.h
new file mode 100644 (file)
index 0000000..9d9754c
--- /dev/null
@@ -0,0 +1,4 @@
+#include "../libbcachefs/super-io.h"
+#include "../libbcachefs/checksum.h"
+#include "../libbcachefs/bcachefs_format.h"
+#include "../crypto.h"
diff --git a/nix/fetchnix.nix b/nix/fetchnix.nix
new file mode 100644 (file)
index 0000000..2f98788
--- /dev/null
@@ -0,0 +1,48 @@
+# `builtins.fetchTarball` only accepts a `sha256` argument in Nix version 1.12
+# or later, so here we provide a function that can provide a compatible interface
+# to Nix 1.11 or Nix 1.12
+#
+# TODO FIXME: remove this sometime after Nix 1.12 goes stable
+
+{ url                             # URL of the nixpkgs tarball to download
+, rev                             # The Git revision of nixpkgs to fetch
+, sha256                          # The SHA256 of the downloaded data
+, system ? builtins.currentSystem # This is overridable if necessary
+}:
+
+with {
+  ifThenElse = { bool, thenValue, elseValue }: (
+    if bool then thenValue else elseValue);
+};
+
+ifThenElse {
+  bool = (0 <= builtins.compareVersions builtins.nixVersion "1.12");
+
+  # In Nix 1.12, we can just give a `sha256` to `builtins.fetchTarball`.
+  thenValue = (builtins.fetchTarball { inherit url sha256; });
+
+  # This hack should at least work for Nix 1.11
+  elseValue = (
+    (rec {
+      tarball = import <nix/fetchurl.nix> { inherit url sha256; };
+      builtin-paths = import <nix/config.nix>;
+
+      script = builtins.toFile "nixpkgs-unpacker" ''
+        "$coreutils/mkdir" "$out"
+        cd "$out"
+        "$gzip" --decompress < "$tarball" | "$tar" -x --strip-components=1
+      '';
+
+      nixpkgs = builtins.derivation {
+        name = "nixpkgs-${builtins.substring 0 6 rev}";
+
+        builder = builtins.storePath builtin-paths.shell;
+        args = [ script ];
+
+        inherit tarball system;
+        tar       = builtins.storePath builtin-paths.tar;
+        gzip      = builtins.storePath builtin-paths.gzip;
+        coreutils = builtins.storePath builtin-paths.coreutils;
+      };
+    }).nixpkgs);
+}
diff --git a/nix/nixpkgs.json b/nix/nixpkgs.json
new file mode 100644 (file)
index 0000000..a5a11d0
--- /dev/null
@@ -0,0 +1,5 @@
+{
+  "url":    "https://github.com/nixos/nixpkgs/archive/5ae883b8c3b04e0c4a9c92a5ab3c7c84b9942943.tar.gz",
+  "rev":    "5ae883b8c3b04e0c4a9c92a5ab3c7c84b9942943",
+  "sha256": "1s2nhax586v2fax7r5qd1s3d2gdg25isva7k7r9pf9x9ay630cmb"
+}
diff --git a/nix/nixpkgs.nix b/nix/nixpkgs.nix
new file mode 100644 (file)
index 0000000..0067366
--- /dev/null
@@ -0,0 +1,9 @@
+let
+  # Grab the versions we specified in the JSON file
+  nixpkgs   = builtins.fromJSON (builtins.readFile ./nixpkgs.json);
+
+  # Bootstrap a copy of nixpkgs, based on this.
+  src = import ./fetchnix.nix { inherit (nixpkgs) url rev sha256; };
+
+# We use the default nixpkgs configuration during bootstrap.
+in import src { config = {}; }
diff --git a/nix/update-nixpkgs.sh b/nix/update-nixpkgs.sh
new file mode 100755 (executable)
index 0000000..770d280
--- /dev/null
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+set -e
+
+if [[ "x$1" == "x" ]]; then
+    echo "Must provide a revision argument"
+    echo "Usage:"
+    echo "  ./update-nixpkgs.sh <rev>"
+    echo "  ./update-nixpkgs.sh https://github.com/foo/nixpkgs <rev>"
+    exit 1
+fi
+
+if [[ "x$2" == "x" ]]; then
+    REV="$1"
+    URL="https://github.com/nixos/nixpkgs"
+else
+    REV="$2"
+    URL="$1"
+fi
+
+DOWNLOAD="$URL/archive/$REV.tar.gz"
+echo "Updating to nixpkgs revision $REV from $URL"
+SHA256=$(nix-prefetch-url "$DOWNLOAD")
+
+cat > nixpkgs.json <<EOF
+{
+  "url":    "$DOWNLOAD",
+  "rev":    "$REV",
+  "sha256": "$SHA256"
+}
+EOF
+
+echo "Updated nixpkgs.json"
diff --git a/packaging/README b/packaging/README
new file mode 100644 (file)
index 0000000..704c60d
--- /dev/null
@@ -0,0 +1,32 @@
+==== liburcu requirement ====
+
+https://liburcu.org/ is needed for this software to work, but older RHEL/CentOS and other
+distros might not have a reliable install. You might have to do the following...
+
+1. Make & install the latest liburcu tarball from their site.
+2. In the Makefile for bcachefs-tools...
+   a. Remove liburcu from PKGCONFIG_LIBS
+   b. Add -lurcu to LDLIBS
+3. Add LD_LIBRARY_PATH=/usr/local/lib to /etc/environment
+4. Remove "BuildRequires:  userspace-rcu-devel" and "Requires:   userspace-rcu" from the
+   spec file here.
+
+==== RHEL-CentOS 7 ====
+
+You need to use https://www.softwarecollections.org/ to install a newer GCC.
+
+As root: yum install devtoolset-8-gcc
+
+Before building: scl enable devtoolset-8 bash
+
+===== RHEL-CentOS (any) ====
+
+1. As root, "yum install rpmdevtools help2man"
+2. Make a non-root user to build RPMs with.
+3. "su - (non-root user)" and use "rpmdev-setuptree" to create an RPM build structure.
+4. Copy the SPEC file from this directory to the "~/rpmbuild/SPECS/" folder.
+6. Move the parent directory here to bcachefs-tools-(VERSION), and "cd" to its parent.
+7. "tar cjf bcachefs-tools-(VERSION).tar.bz2 (directory of bcachefs-tools-(VERSION))"
+8. "rpmbuild -bs ~/rpmbuild/SPECS/bcachefs-tools.spec"
+9. "rpmbuild -bb ~/rpmbuild/SPECS/bcachefs-tools.spec"
+10. The RPMs will be in "~/rpmbuild/RPMS" and "~/rpmbuild/SRPMS".
diff --git a/packaging/bcachefs-tools.spec b/packaging/bcachefs-tools.spec
new file mode 100644 (file)
index 0000000..4946cef
--- /dev/null
@@ -0,0 +1,73 @@
+Name:           bcachefs-tools
+Version:        2020.01.21
+Release:        1%{?dist}
+Summary:        Userspace tools for bcachefs
+
+License:        GPLv2
+URL:            https://github.com/koverstreet/bcachefs-tools
+Source0:        %{name}-%{version}.tar.bz2
+
+BuildRequires:  epel-release
+BuildRequires:  bzip2
+BuildRequires:  gcc
+BuildRequires:  make
+BuildRequires:  keyutils-libs-devel
+BuildRequires:  libaio-devel
+BuildRequires:  libattr-devel
+BuildRequires:  libblkid-devel
+BuildRequires:  libscrypt-devel
+BuildRequires:  libsodium-devel
+BuildRequires:  libtool-ltdl-devel
+BuildRequires:  libuuid-devel
+BuildRequires:  libvmmalloc-devel
+BuildRequires:  libzstd-devel
+BuildRequires:  lz4-devel
+BuildRequires:  userspace-rcu-devel
+BuildRequires:  valgrind-devel
+BuildRequires:  zlib-devel
+
+Requires:   epel-release
+Requires:   bzip2
+Requires:   keyutils-libs
+Requires:   libaio
+Requires:   libattr
+Requires:   libblkid
+Requires:   libscrypt
+Requires:   libsodium
+Requires:   libtool-ltdl
+Requires:   libuuid
+Requires:   libvmmalloc
+Requires:   libzstd
+Requires:   lz4
+Requires:   userspace-rcu
+Requires:   zlib
+
+%description
+The bcachefs tool, which has a number of subcommands for formatting and managing bcachefs filesystems. Run bcachefs --help for full list of commands.
+
+%prep
+%setup -q
+
+%build
+make
+
+%install
+rm -rf $RPM_BUILD_ROOT
+mkdir -p $RPM_BUILD_ROOT/usr/local/sbin
+mkdir -p $RPM_BUILD_ROOT/usr/local/share/man/man8
+%make_install
+
+%files
+/usr/local/sbin/bcachefs
+/usr/local/sbin/fsck.bcachefs
+/usr/local/sbin/mkfs.bcachefs
+/usr/local/share/man/man8/bcachefs.8
+/etc/initramfs-tools/hooks/bcachefs
+/etc/initramfs-tools/scripts/local-premount/bcachefs
+
+%changelog
+* Tue Jan 21 2020 Michael Adams <unquietwiki@gmail.com> - 2020.01.21-1
+- Updated RPM package definition to reflect that changes in codebase have occurred.
+* Tue Jan 07 2020 Michael Adams <unquietwiki@gmail.com> - 2020.01.07-1
+- Initial RPM package definition
+- Makefile needs further work to accomodate RPM macros.
diff --git a/packaging/userspace-rcu.spec b/packaging/userspace-rcu.spec
new file mode 100644 (file)
index 0000000..40516cc
--- /dev/null
@@ -0,0 +1,238 @@
+# rpmbuild with QA_RPATHS=$[0x0001]
+
+Name:           userspace-rcu
+Version:        0.11.1
+Release:        2%{?dist}
+Summary:        liburcu is a LGPLv2.1 userspace RCU (read-copy-update) library.
+
+License:        LGPLv2.1
+URL:            https://liburcu.org/
+Source0:        https://lttng.org/files/urcu/%{name}-%{version}.tar.bz2
+Source1:        https://lttng.org/files/urcu/%{name}-%{version}.tar.bz2.asc
+
+# "devel" files are installed with this package, also.
+Provides:      userspace-rcu-devel
+
+# Recommend using https://www.softwarecollections.org/en/scls/rhscl/devtoolset-8/ for this
+
+BuildRequires:  bzip2
+BuildRequires:  gcc
+BuildRequires:  make
+BuildRequires:  m4
+
+%description
+liburcu provides efficient data structures based on RCU and lock-free algorithms. Those structures include hash tables, queues, stacks, and doubly-linked lists.
+
+%prep
+%setup -q
+
+%configure
+
+%build
+make
+
+%install
+rm -rf $RPM_BUILD_ROOT
+%make_install
+
+%files
+%{_datadir}/doc/userspace-rcu/cds-api.md
+%{_datadir}/doc/userspace-rcu/examples/hlist/cds_hlist_add_head_rcu.c
+%{_datadir}/doc/userspace-rcu/examples/hlist/cds_hlist_del_rcu.c
+%{_datadir}/doc/userspace-rcu/examples/hlist/cds_hlist_for_each_entry_rcu.c
+%{_datadir}/doc/userspace-rcu/examples/hlist/cds_hlist_for_each_rcu.c
+%{_datadir}/doc/userspace-rcu/examples/hlist/Makefile
+%{_datadir}/doc/userspace-rcu/examples/hlist/Makefile.cds_hlist_add_head_rcu
+%{_datadir}/doc/userspace-rcu/examples/hlist/Makefile.cds_hlist_del_rcu
+%{_datadir}/doc/userspace-rcu/examples/hlist/Makefile.cds_hlist_for_each_entry_rcu
+%{_datadir}/doc/userspace-rcu/examples/hlist/Makefile.cds_hlist_for_each_rcu
+%{_datadir}/doc/userspace-rcu/examples/lfstack/cds_lfs_pop_all_blocking.c
+%{_datadir}/doc/userspace-rcu/examples/lfstack/cds_lfs_pop_blocking.c
+%{_datadir}/doc/userspace-rcu/examples/lfstack/cds_lfs_push.c
+%{_datadir}/doc/userspace-rcu/examples/lfstack/Makefile
+%{_datadir}/doc/userspace-rcu/examples/lfstack/Makefile.cds_lfs_pop_all_blocking
+%{_datadir}/doc/userspace-rcu/examples/lfstack/Makefile.cds_lfs_pop_blocking
+%{_datadir}/doc/userspace-rcu/examples/lfstack/Makefile.cds_lfs_push
+%{_datadir}/doc/userspace-rcu/examples/list/cds_list_add_rcu.c
+%{_datadir}/doc/userspace-rcu/examples/list/cds_list_add_tail_rcu.c
+%{_datadir}/doc/userspace-rcu/examples/list/cds_list_del_rcu.c
+%{_datadir}/doc/userspace-rcu/examples/list/cds_list_for_each_entry_rcu.c
+%{_datadir}/doc/userspace-rcu/examples/list/cds_list_for_each_rcu.c
+%{_datadir}/doc/userspace-rcu/examples/list/cds_list_replace_rcu.c
+%{_datadir}/doc/userspace-rcu/examples/list/Makefile
+%{_datadir}/doc/userspace-rcu/examples/list/Makefile.cds_list_add_rcu
+%{_datadir}/doc/userspace-rcu/examples/list/Makefile.cds_list_add_tail_rcu
+%{_datadir}/doc/userspace-rcu/examples/list/Makefile.cds_list_del_rcu
+%{_datadir}/doc/userspace-rcu/examples/list/Makefile.cds_list_for_each_entry_rcu
+%{_datadir}/doc/userspace-rcu/examples/list/Makefile.cds_list_for_each_rcu
+%{_datadir}/doc/userspace-rcu/examples/list/Makefile.cds_list_replace_rcu
+%{_datadir}/doc/userspace-rcu/examples/Makefile
+%{_datadir}/doc/userspace-rcu/examples/Makefile.examples.template
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/cds_lfht_add_replace.c
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/cds_lfht_add_unique.c
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/cds_lfht_add.c
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/cds_lfht_del.c
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/cds_lfht_destroy.c
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/cds_lfht_for_each_entry_duplicate.c
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/cds_lfht_lookup.c
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/jhash.h
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/Makefile
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/Makefile.cds_lfht_add
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/Makefile.cds_lfht_add_replace
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/Makefile.cds_lfht_add_unique
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/Makefile.cds_lfht_del
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/Makefile.cds_lfht_destroy
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/Makefile.cds_lfht_for_each_entry_duplicate
+%{_datadir}/doc/userspace-rcu/examples/rculfhash/Makefile.cds_lfht_lookup
+%{_datadir}/doc/userspace-rcu/examples/rculfqueue/cds_lfq_dequeue.c
+%{_datadir}/doc/userspace-rcu/examples/rculfqueue/cds_lfq_enqueue.c
+%{_datadir}/doc/userspace-rcu/examples/rculfqueue/Makefile
+%{_datadir}/doc/userspace-rcu/examples/rculfqueue/Makefile.cds_lfq_dequeue
+%{_datadir}/doc/userspace-rcu/examples/rculfqueue/Makefile.cds_lfq_enqueue
+%{_datadir}/doc/userspace-rcu/examples/urcu-flavors/bp.c
+%{_datadir}/doc/userspace-rcu/examples/urcu-flavors/Makefile
+%{_datadir}/doc/userspace-rcu/examples/urcu-flavors/Makefile.bp
+%{_datadir}/doc/userspace-rcu/examples/urcu-flavors/Makefile.mb
+%{_datadir}/doc/userspace-rcu/examples/urcu-flavors/Makefile.membarrier
+%{_datadir}/doc/userspace-rcu/examples/urcu-flavors/Makefile.qsbr
+%{_datadir}/doc/userspace-rcu/examples/urcu-flavors/Makefile.signal
+%{_datadir}/doc/userspace-rcu/examples/urcu-flavors/mb.c
+%{_datadir}/doc/userspace-rcu/examples/urcu-flavors/membarrier.c
+%{_datadir}/doc/userspace-rcu/examples/urcu-flavors/qsbr.c
+%{_datadir}/doc/userspace-rcu/examples/urcu-flavors/signal.c
+%{_datadir}/doc/userspace-rcu/examples/wfcqueue/cds_wfcq_dequeue.c
+%{_datadir}/doc/userspace-rcu/examples/wfcqueue/cds_wfcq_enqueue.c
+%{_datadir}/doc/userspace-rcu/examples/wfcqueue/cds_wfcq_splice.c
+%{_datadir}/doc/userspace-rcu/examples/wfcqueue/Makefile
+%{_datadir}/doc/userspace-rcu/examples/wfcqueue/Makefile.cds_wfcq_dequeue
+%{_datadir}/doc/userspace-rcu/examples/wfcqueue/Makefile.cds_wfcq_enqueue
+%{_datadir}/doc/userspace-rcu/examples/wfcqueue/Makefile.cds_wfcq_splice
+%{_datadir}/doc/userspace-rcu/examples/wfstack/cds_wfs_pop_all_blocking.c
+%{_datadir}/doc/userspace-rcu/examples/wfstack/cds_wfs_pop.c
+%{_datadir}/doc/userspace-rcu/examples/wfstack/cds_wfs_push.c
+%{_datadir}/doc/userspace-rcu/examples/wfstack/Makefile
+%{_datadir}/doc/userspace-rcu/examples/wfstack/Makefile.cds_wfs_pop
+%{_datadir}/doc/userspace-rcu/examples/wfstack/Makefile.cds_wfs_pop_all_blocking
+%{_datadir}/doc/userspace-rcu/examples/wfstack/Makefile.cds_wfs_push
+%{_datadir}/doc/userspace-rcu/LICENSE
+%{_datadir}/doc/userspace-rcu/rcu-api.md
+%{_datadir}/doc/userspace-rcu/README.md
+%{_datadir}/doc/userspace-rcu/solaris-build.md
+%{_datadir}/doc/userspace-rcu/uatomic-api.md
+%{_includedir}/urcu-bp.h
+%{_includedir}/urcu-call-rcu.h
+%{_includedir}/urcu-defer.h
+%{_includedir}/urcu-flavor.h
+%{_includedir}/urcu-pointer.h
+%{_includedir}/urcu-qsbr.h
+%{_includedir}/urcu.h
+%{_includedir}/urcu/arch.h
+%{_includedir}/urcu/arch/generic.h
+%{_includedir}/urcu/call-rcu.h
+%{_includedir}/urcu/cds.h
+%{_includedir}/urcu/compiler.h
+%{_includedir}/urcu/config.h
+%{_includedir}/urcu/debug.h
+%{_includedir}/urcu/defer.h
+%{_includedir}/urcu/flavor.h
+%{_includedir}/urcu/futex.h
+%{_includedir}/urcu/hlist.h
+%{_includedir}/urcu/lfstack.h
+%{_includedir}/urcu/list.h
+%{_includedir}/urcu/map/clear.h
+%{_includedir}/urcu/map/urcu-bp.h
+%{_includedir}/urcu/map/urcu-mb.h
+%{_includedir}/urcu/map/urcu-memb.h
+%{_includedir}/urcu/map/urcu-qsbr.h
+%{_includedir}/urcu/map/urcu-signal.h
+%{_includedir}/urcu/map/urcu.h
+%{_includedir}/urcu/pointer.h
+%{_includedir}/urcu/rcuhlist.h
+%{_includedir}/urcu/rculfhash.h
+%{_includedir}/urcu/rculfqueue.h
+%{_includedir}/urcu/rculfstack.h
+%{_includedir}/urcu/rculist.h
+%{_includedir}/urcu/ref.h
+%{_includedir}/urcu/static/lfstack.h
+%{_includedir}/urcu/static/pointer.h
+%{_includedir}/urcu/static/rculfqueue.h
+%{_includedir}/urcu/static/rculfstack.h
+%{_includedir}/urcu/static/urcu-bp.h
+%{_includedir}/urcu/static/urcu-common.h
+%{_includedir}/urcu/static/urcu-mb.h
+%{_includedir}/urcu/static/urcu-memb.h
+%{_includedir}/urcu/static/urcu-qsbr.h
+%{_includedir}/urcu/static/urcu-signal.h
+%{_includedir}/urcu/static/urcu.h
+%{_includedir}/urcu/static/wfcqueue.h
+%{_includedir}/urcu/static/wfqueue.h
+%{_includedir}/urcu/static/wfstack.h
+%{_includedir}/urcu/syscall-compat.h
+%{_includedir}/urcu/system.h
+%{_includedir}/urcu/tls-compat.h
+%{_includedir}/urcu/uatomic_arch.h
+%{_includedir}/urcu/uatomic.h
+%{_includedir}/urcu/uatomic/generic.h
+%{_includedir}/urcu/urcu_ref.h
+%{_includedir}/urcu/urcu-bp.h
+%{_includedir}/urcu/urcu-futex.h
+%{_includedir}/urcu/urcu-mb.h
+%{_includedir}/urcu/urcu-memb.h
+%{_includedir}/urcu/urcu-qsbr.h
+%{_includedir}/urcu/urcu-signal.h
+%{_includedir}/urcu/urcu.h
+%{_includedir}/urcu/wfcqueue.h
+%{_includedir}/urcu/wfqueue.h
+%{_includedir}/urcu/wfstack.h
+%{_libdir}/liburcu-bp.a
+%{_libdir}/liburcu-bp.la
+%{_libdir}/liburcu-bp.so
+%{_libdir}/liburcu-bp.so.6
+%{_libdir}/liburcu-bp.so.6.1.0
+%{_libdir}/liburcu-cds.a
+%{_libdir}/liburcu-cds.la
+%{_libdir}/liburcu-cds.so
+%{_libdir}/liburcu-cds.so.6
+%{_libdir}/liburcu-cds.so.6.1.0
+%{_libdir}/liburcu-common.a
+%{_libdir}/liburcu-common.la
+%{_libdir}/liburcu-common.so
+%{_libdir}/liburcu-common.so.6
+%{_libdir}/liburcu-common.so.6.1.0
+%{_libdir}/liburcu-mb.a
+%{_libdir}/liburcu-mb.la
+%{_libdir}/liburcu-mb.so
+%{_libdir}/liburcu-mb.so.6
+%{_libdir}/liburcu-mb.so.6.1.0
+%{_libdir}/liburcu-memb.a
+%{_libdir}/liburcu-memb.la
+%{_libdir}/liburcu-memb.so
+%{_libdir}/liburcu-memb.so.6
+%{_libdir}/liburcu-memb.so.6.1.0
+%{_libdir}/liburcu-qsbr.a
+%{_libdir}/liburcu-qsbr.la
+%{_libdir}/liburcu-qsbr.so
+%{_libdir}/liburcu-qsbr.so.6
+%{_libdir}/liburcu-qsbr.so.6.1.0
+%{_libdir}/liburcu-signal.a
+%{_libdir}/liburcu-signal.la
+%{_libdir}/liburcu-signal.so
+%{_libdir}/liburcu-signal.so.6
+%{_libdir}/liburcu-signal.so.6.1.0
+%{_libdir}/liburcu.a
+%{_libdir}/liburcu.la
+%{_libdir}/liburcu.so
+%{_libdir}/liburcu.so.6
+%{_libdir}/liburcu.so.6.1.0
+%{_libdir}/pkgconfig/liburcu-bp.pc
+%{_libdir}/pkgconfig/liburcu-cds.pc
+%{_libdir}/pkgconfig/liburcu-mb.pc
+%{_libdir}/pkgconfig/liburcu-qsbr.pc
+%{_libdir}/pkgconfig/liburcu-signal.pc
+%{_libdir}/pkgconfig/liburcu.pc
+
+%changelog
+* Mon Feb 24 2020 Michael Adams <unquietwiki@gmail.com> - 0.11-2
+- Try to fix RPM package install warning
+* Tue Jan 07 2020 Michael Adams <unquietwiki@gmail.com> - 0.11-1
+- Initial RPM package
diff --git a/qcow2.c b/qcow2.c
new file mode 100644 (file)
index 0000000..b7aa8c2
--- /dev/null
+++ b/qcow2.c
@@ -0,0 +1,131 @@
+
+#include <errno.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "qcow2.h"
+#include "tools-util.h"
+
+#define QCOW_MAGIC             (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+#define QCOW_VERSION           2
+#define QCOW_OFLAG_COPIED      (1LL << 63)
+
+struct qcow2_hdr {
+       u32                     magic;
+       u32                     version;
+
+       u64                     backing_file_offset;
+       u32                     backing_file_size;
+
+       u32                     block_bits;
+       u64                     size;
+       u32                     crypt_method;
+
+       u32                     l1_size;
+       u64                     l1_table_offset;
+
+       u64                     refcount_table_offset;
+       u32                     refcount_table_blocks;
+
+       u32                     nb_snapshots;
+       u64                     snapshots_offset;
+};
+
+struct qcow2_image {
+       int                     fd;
+       u32                     block_size;
+       u64                     *l1_table;
+       u64                     l1_offset;
+       u32                     l1_index;
+       u64                     *l2_table;
+       u64                     offset;
+};
+
+static void flush_l2(struct qcow2_image *img)
+{
+       if (img->l1_index != -1) {
+               img->l1_table[img->l1_index] =
+                       cpu_to_be64(img->offset|QCOW_OFLAG_COPIED);
+               xpwrite(img->fd, img->l2_table, img->block_size, img->offset);
+               img->offset += img->block_size;
+
+               memset(img->l2_table, 0, img->block_size);
+               img->l1_index = -1;
+       }
+}
+
+static void add_l2(struct qcow2_image *img, u64 src_blk, u64 dst_offset)
+{
+       unsigned l2_size = img->block_size / sizeof(u64);
+       u64 l1_index = src_blk / l2_size;
+       u64 l2_index = src_blk & (l2_size - 1);
+
+       if (img->l1_index != l1_index) {
+               flush_l2(img);
+               img->l1_index = l1_index;
+       }
+
+       img->l2_table[l2_index] = cpu_to_be64(dst_offset|QCOW_OFLAG_COPIED);
+}
+
+void qcow2_write_image(int infd, int outfd, ranges *data,
+                      unsigned block_size)
+{
+       u64 image_size = get_size(NULL, infd);
+       unsigned l2_size = block_size / sizeof(u64);
+       unsigned l1_size = DIV_ROUND_UP(image_size, (u64) block_size * l2_size);
+       struct qcow2_hdr hdr = { 0 };
+       struct qcow2_image img = {
+               .fd             = outfd,
+               .block_size     = block_size,
+               .l2_table       = xcalloc(l2_size, sizeof(u64)),
+               .l1_table       = xcalloc(l1_size, sizeof(u64)),
+               .l1_index       = -1,
+               .offset         = round_up(sizeof(hdr), block_size),
+       };
+       struct range *r;
+       char *buf = xmalloc(block_size);
+       u64 src_offset, dst_offset;
+
+       assert(is_power_of_2(block_size));
+
+       ranges_roundup(data, block_size);
+       ranges_sort_merge(data);
+
+       /* Write data: */
+       darray_foreach(r, *data)
+               for (src_offset = r->start;
+                    src_offset < r->end;
+                    src_offset += block_size) {
+                       dst_offset = img.offset;
+                       img.offset += img.block_size;
+
+                       xpread(infd, buf, block_size, src_offset);
+                       xpwrite(outfd, buf, block_size, dst_offset);
+
+                       add_l2(&img, src_offset / block_size, dst_offset);
+               }
+
+       flush_l2(&img);
+
+       /* Write L1 table: */
+       dst_offset              = img.offset;
+       img.offset              += round_up(l1_size * sizeof(u64), block_size);
+       xpwrite(img.fd, img.l1_table, l1_size * sizeof(u64), dst_offset);
+
+       /* Write header: */
+       hdr.magic               = cpu_to_be32(QCOW_MAGIC);
+       hdr.version             = cpu_to_be32(QCOW_VERSION);
+       hdr.block_bits          = cpu_to_be32(ilog2(block_size));
+       hdr.size                = cpu_to_be64(image_size);
+       hdr.l1_size             = cpu_to_be32(l1_size);
+       hdr.l1_table_offset     = cpu_to_be64(dst_offset);
+
+       memset(buf, 0, block_size);
+       memcpy(buf, &hdr, sizeof(hdr));
+       xpwrite(img.fd, buf, block_size, 0);
+
+       free(img.l2_table);
+       free(img.l1_table);
+       free(buf);
+}
diff --git a/qcow2.h b/qcow2.h
new file mode 100644 (file)
index 0000000..0943d55
--- /dev/null
+++ b/qcow2.h
@@ -0,0 +1,9 @@
+#ifndef _QCOW2_H
+#define _QCOW2_H
+
+#include <linux/types.h>
+#include "tools-util.h"
+
+void qcow2_write_image(int, int, ranges *, unsigned);
+
+#endif /* _QCOW2_H */
diff --git a/raid/COPYING b/raid/COPYING
new file mode 100644 (file)
index 0000000..a43ea21
--- /dev/null
@@ -0,0 +1,339 @@
+                   GNU GENERAL PUBLIC LICENSE
+                      Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                          675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                           Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+\f
+                   GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+\f
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+\f
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+\f
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                           NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                    END OF TERMS AND CONDITIONS
+\f
+       Appendix: How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) 19yy  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) 19yy name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/raid/check.c b/raid/check.c
new file mode 100644 (file)
index 0000000..9bed933
--- /dev/null
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2015 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "combo.h"
+#include "gf.h"
+
+/**
+ * Validate the provided failed blocks.
+ *
+ * This function checks if the specified failed blocks satisfy the redundancy
+ * information using the data from the known valid parity blocks.
+ *
+ * It's similar at raid_check(), just with a different format for arguments.
+ *
+ * The number of failed blocks @nr must be strictly less than the number of
+ * parities @nv, because you need one more parity to validate the recovering.
+ *
+ * No data or parity blocks are modified.
+ *
+ * @nr Number of failed data blocks.
+ * @id[] Vector of @nr indexes of the failed data blocks.
+ *   The indexes start from 0. They must be in order.
+ * @nv Number of valid parity blocks.
+ * @ip[] Vector of @nv indexes of the valid parity blocks.
+ *   The indexes start from 0. They must be in order.
+ * @nd Number of data blocks.
+ * @size Size of the blocks pointed by @v. It must be a multipler of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ *   It has (@nd + @ip[@nv - 1] + 1) elements. The starting elements are the
+ *   blocks for data, following with the parity blocks.
+ *   Each block has @size bytes. 
+ * @return 0 if the check is satisfied. -1 otherwise.
+ */
+static int raid_validate(int nr, int *id, int nv, int *ip, int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       const uint8_t *T[RAID_PARITY_MAX][RAID_PARITY_MAX];
+       uint8_t G[RAID_PARITY_MAX * RAID_PARITY_MAX];
+       uint8_t V[RAID_PARITY_MAX * RAID_PARITY_MAX];
+       size_t i;
+       int j, k, l;
+
+       BUG_ON(nr >= nv);
+
+       /* setup the coefficients matrix */
+       for (j = 0; j < nr; ++j)
+               for (k = 0; k < nr; ++k)
+                       G[j * nr + k] = A(ip[j], id[k]);
+
+       /* invert it to solve the system of linear equations */
+       raid_invert(G, V, nr);
+
+       /* get multiplication tables */
+       for (j = 0; j < nr; ++j)
+               for (k = 0; k < nr; ++k)
+                       T[j][k] = table(V[j * nr + k]);
+
+       /* check all positions */
+       for (i = 0; i < size; ++i) {
+               uint8_t p[RAID_PARITY_MAX];
+
+               /* get parity */
+               for (j = 0; j < nv; ++j)
+                       p[j] = v[nd + ip[j]][i];
+
+               /* compute delta parity, skipping broken disks */
+               for (j = 0, k = 0; j < nd; ++j) {
+                       uint8_t b;
+
+                       /* skip broken disks */
+                       if (k < nr && id[k] == j) {
+                               ++k;
+                               continue;
+                       }
+
+                       b = v[j][i];
+                       for (l = 0; l < nv; ++l)
+                               p[l] ^= gfmul[b][gfgen[ip[l]][j]];
+               }
+
+               /* reconstruct data */
+               for (j = 0; j < nr; ++j) {
+                       uint8_t b = 0;
+                       int idj = id[j];
+
+                       /* recompute the data */
+                       for (k = 0; k < nr; ++k)
+                               b ^= T[j][k][p[k]];
+
+                       /* add the parity contribution of the reconstructed data */
+                       for (l = nr; l < nv; ++l)
+                               p[l] ^= gfmul[b][gfgen[ip[l]][idj]];
+               }
+
+               /* check that the final parity is 0 */
+               for (l = nr; l < nv; ++l)
+                       if (p[l] != 0)
+                               return -1;
+       }
+
+       return 0;
+}
+
+int raid_check(int nr, int *ir, int nd, int np, size_t size, void **v)
+{
+       /* valid parity index */
+       int ip[RAID_PARITY_MAX];
+       int vp;
+       int rd;
+       int i, j;
+
+       /* enforce limit on size */
+       BUG_ON(size % 64 != 0);
+
+       /* enforce limit on number of failures */
+       BUG_ON(nr >= np); /* >= because we check with extra parity */
+       BUG_ON(np > RAID_PARITY_MAX);
+
+       /* enforce order in index vector */
+       BUG_ON(nr >= 2 && ir[0] >= ir[1]);
+       BUG_ON(nr >= 3 && ir[1] >= ir[2]);
+       BUG_ON(nr >= 4 && ir[2] >= ir[3]);
+       BUG_ON(nr >= 5 && ir[3] >= ir[4]);
+       BUG_ON(nr >= 6 && ir[4] >= ir[5]);
+
+       /* enforce limit on index vector */
+       BUG_ON(nr > 0 && ir[nr-1] >= nd + np);
+
+       /* count failed data disk */
+       rd = 0;
+       while (rd < nr && ir[rd] < nd)
+               ++rd;
+
+       /* put valid parities into ip[] */
+       vp = 0;
+       for (i = rd, j = 0; j < np; ++j) {
+               /* if parity is failed */
+               if (i < nr && ir[i] == nd + j) {
+                       /* skip broken parity */
+                       ++i;
+               } else {
+                       /* store valid parity */
+                       ip[vp] = j;
+                       ++vp;
+               }
+       }
+
+       return raid_validate(rd, ir, vp, ip, nd, size, v);
+}
+
+int raid_scan(int *ir, int nd, int np, size_t size, void **v)
+{
+       int r;
+
+       /* check the special case of no failure */
+       if (np != 0 && raid_check(0, 0, nd, np, size, v) == 0)
+               return 0;
+
+       /* for each number of possible failures */
+       for (r = 1; r < np; ++r) {
+               /* try all combinations of r failures on n disks */
+               combination_first(r, nd + np, ir);
+               do {
+                       /* verify if the combination is a valid one */
+                       if (raid_check(r, ir, nd, np, size, v) == 0)
+                               return r;
+               } while (combination_next(r, nd + np, ir));
+       }
+
+       /* no solution found */
+       return -1;
+}
+
diff --git a/raid/combo.h b/raid/combo.h
new file mode 100644 (file)
index 0000000..8efc31a
--- /dev/null
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_COMBO_H
+#define __RAID_COMBO_H
+
+#include <assert.h>
+
+/**
+ * Get the first permutation with repetition of r of n elements.
+ *
+ * Typical use is with permutation_next() in the form :
+ *
+ * int i[R];
+ * permutation_first(R, N, i);
+ * do {
+ *    code using i[0], i[1], ..., i[R-1]
+ * } while (permutation_next(R, N, i));
+ *
+ * It's equivalent at the code :
+ *
+ * for(i[0]=0;i[0]<N;++i[0])
+ *     for(i[1]=0;i[1]<N;++i[1])
+ *        ...
+ *            for(i[R-2]=0;i[R-2]<N;++i[R-2])
+ *                for(i[R-1]=0;i[R-1]<N;++i[R-1])
+ *                    code using i[0], i[1], ..., i[R-1]
+ */
+static __always_inline void permutation_first(int r, int n, int *c)
+{
+       int i;
+
+       (void)n; /* unused, but kept for clarity */
+       assert(0 < r && r <= n);
+
+       for (i = 0; i < r; ++i)
+               c[i] = 0;
+}
+
+/**
+ * Get the next permutation with repetition of r of n elements.
+ * Return ==0 when finished.
+ */
+static __always_inline int permutation_next(int r, int n, int *c)
+{
+       int i = r - 1; /* present position */
+
+recurse:
+       /* next element at position i */
+       ++c[i];
+
+       /* if the position has reached the max */
+       if (c[i] >= n) {
+
+               /* if we are at the first level, we have finished */
+               if (i == 0)
+                       return 0;
+
+               /* increase the previous position */
+               --i;
+               goto recurse;
+       }
+
+       ++i;
+
+       /* initialize all the next positions, if any */
+       while (i < r) {
+               c[i] = 0;
+               ++i;
+       }
+
+       return 1;
+}
+
+/**
+ * Get the first combination without repetition of r of n elements.
+ *
+ * Typical use is with combination_next() in the form :
+ *
+ * int i[R];
+ * combination_first(R, N, i);
+ * do {
+ *    code using i[0], i[1], ..., i[R-1]
+ * } while (combination_next(R, N, i));
+ *
+ * It's equivalent at the code :
+ *
+ * for(i[0]=0;i[0]<N-(R-1);++i[0])
+ *     for(i[1]=i[0]+1;i[1]<N-(R-2);++i[1])
+ *        ...
+ *            for(i[R-2]=i[R-3]+1;i[R-2]<N-1;++i[R-2])
+ *                for(i[R-1]=i[R-2]+1;i[R-1]<N;++i[R-1])
+ *                    code using i[0], i[1], ..., i[R-1]
+ */
+static __always_inline void combination_first(int r, int n, int *c)
+{
+       int i;
+
+       (void)n; /* unused, but kept for clarity */
+       assert(0 < r && r <= n);
+
+       for (i = 0; i < r; ++i)
+               c[i] = i;
+}
+
+/**
+ * Get the next combination without repetition of r of n elements.
+ * Return ==0 when finished.
+ */
+static __always_inline int combination_next(int r, int n, int *c)
+{
+       int i = r - 1; /* present position */
+       int h = n; /* high limit for this position */
+
+recurse:
+       /* next element at position i */
+       ++c[i];
+
+       /* if the position has reached the max */
+       if (c[i] >= h) {
+
+               /* if we are at the first level, we have finished */
+               if (i == 0)
+                       return 0;
+
+               /* increase the previous position */
+               --i;
+               --h;
+               goto recurse;
+       }
+
+       ++i;
+
+       /* initialize all the next positions, if any */
+       while (i < r) {
+               /* each position start at the next value of the previous one */
+               c[i] = c[i - 1] + 1;
+               ++i;
+       }
+
+       return 1;
+}
+#endif
+
diff --git a/raid/cpu.h b/raid/cpu.h
new file mode 100644 (file)
index 0000000..ed909bb
--- /dev/null
@@ -0,0 +1,331 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_CPU_H
+#define __RAID_CPU_H
+
+#ifdef CONFIG_X86
+
+static inline void raid_cpuid(uint32_t func_eax, uint32_t sub_ecx, uint32_t *reg)
+{
+       asm volatile (
+#if defined(__i386__) && defined(__PIC__)
+               /* allow compilation in PIC mode saving ebx */
+               "xchgl %%ebx, %1\n"
+               "cpuid\n"
+               "xchgl %%ebx, %1\n"
+               : "=a" (reg[0]), "=r" (reg[1]), "=c" (reg[2]), "=d" (reg[3])
+               : "0" (func_eax), "2" (sub_ecx)
+#else
+               "cpuid\n"
+               : "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3])
+               : "0" (func_eax), "2" (sub_ecx)
+#endif
+       );
+}
+
+static inline void raid_xgetbv(uint32_t* reg)
+{
+       /* get the value of the Extended Control Register ecx=0 */
+       asm volatile (
+               /* uses a direct encoding of the XGETBV instruction as only recent */
+               /* assemblers support it. */
+               /* the next line is equivalent at: "xgetbv\n" */
+               ".byte 0x0f, 0x01, 0xd0\n"
+               : "=a" (reg[0]), "=d" (reg[3])
+               : "c" (0)
+       );
+}
+
+#define CPU_VENDOR_MAX 13
+
+static inline void raid_cpu_info(char *vendor, unsigned *family, unsigned *model)
+{
+       uint32_t reg[4];
+       unsigned f, ef, m, em;
+
+       raid_cpuid(0, 0, reg);
+
+       ((uint32_t*)vendor)[0] = reg[1];
+       ((uint32_t*)vendor)[1] = reg[3];
+       ((uint32_t*)vendor)[2] = reg[2];
+       vendor[12] = 0;
+
+       raid_cpuid(1, 0, reg);
+
+       f = (reg[0] >> 8) & 0xF;
+       ef = (reg[0] >> 20) & 0xFF;
+       m = (reg[0] >> 4) & 0xF;
+       em = (reg[0] >> 16) & 0xF;
+
+       if (strcmp(vendor, "AuthenticAMD") == 0) {
+               if (f < 15) {
+                       *family = f;
+                       *model = m;
+               } else {
+                       *family = f + ef;
+                       *model = m + (em << 4);
+               }
+       } else {
+               *family = f + ef;
+               *model = m + (em << 4);
+       }
+}
+
+static inline int raid_cpu_match_sse(uint32_t cpuid_1_ecx, uint32_t cpuid_1_edx)
+{
+       uint32_t reg[4];
+
+       raid_cpuid(1, 0, reg);
+       if ((reg[2] & cpuid_1_ecx) != cpuid_1_ecx)
+               return 0;
+       if ((reg[3] & cpuid_1_edx) != cpuid_1_edx)
+               return 0;
+
+       return 1;
+}
+
+static inline int raid_cpu_match_avx(uint32_t cpuid_1_ecx, uint32_t cpuid_7_ebx, uint32_t xcr0)
+{
+       uint32_t reg[4];
+
+       raid_cpuid(1, 0, reg);
+       if ((reg[2] & cpuid_1_ecx) != cpuid_1_ecx)
+               return 0;
+
+       raid_xgetbv(reg);
+       if ((reg[0] & xcr0) != xcr0)
+               return 0;
+
+       raid_cpuid(7, 0, reg);
+       if ((reg[1] & cpuid_7_ebx) != cpuid_7_ebx)
+               return 0;
+
+       return 1;
+}
+
+static inline int raid_cpu_has_sse2(void)
+{
+       /*
+        * Intel® 64 and IA-32 Architectures Software Developer's Manual
+        * 325462-048US September 2013
+        *
+        * 11.6.2 Checking for SSE/SSE2 Support
+        * Before an application attempts to use the SSE and/or SSE2 extensions, it should check
+        * that they are present on the processor:
+        * 1. Check that the processor supports the CPUID instruction. Bit 21 of the EFLAGS
+        * register can be used to check processor's support the CPUID instruction.
+        * 2. Check that the processor supports the SSE and/or SSE2 extensions (true if
+        * CPUID.01H:EDX.SSE[bit 25] = 1 and/or CPUID.01H:EDX.SSE2[bit 26] = 1).
+        */
+       return raid_cpu_match_sse(
+               0,
+               1 << 26); /* SSE2 */
+}
+
+static inline int raid_cpu_has_ssse3(void)
+{
+       /*
+        * Intel® 64 and IA-32 Architectures Software Developer's Manual
+        * 325462-048US September 2013
+        *
+        * 12.7.2 Checking for SSSE3 Support
+        * Before an application attempts to use the SSSE3 extensions, the application should
+        * follow the steps illustrated in Section 11.6.2, "Checking for SSE/SSE2 Support."
+        * Next, use the additional step provided below:
+        * Check that the processor supports SSSE3 (if CPUID.01H:ECX.SSSE3[bit 9] = 1).
+        */
+       return raid_cpu_match_sse(
+               1 << 9, /* SSSE3 */
+               1 << 26); /* SSE2 */
+}
+
+static inline int raid_cpu_has_crc32(void)
+{
+       /*
+        * Intel® 64 and IA-32 Architectures Software Developer's Manual
+        * 325462-048US September 2013
+        *
+        * 12.12.3 Checking for SSE4.2 Support
+        * ...
+        * Before an application attempts to use the CRC32 instruction, it must check
+        * that the processor supports SSE4.2 (if CPUID.01H:ECX.SSE4_2[bit 20] = 1).
+        */
+       return raid_cpu_match_sse(
+               1 << 20, /* CRC32 */
+               0);
+}
+
+static inline int raid_cpu_has_avx2(void)
+{
+       /*
+        * Intel Architecture Instruction Set Extensions Programming Reference
+        * 319433-022 October 2014
+        *
+        * 14.3 Detection of AVX instructions
+        * 1) Detect CPUID.1:ECX.OSXSAVE[bit 27] = 1 (XGETBV enabled for application use1)
+        * 2) Issue XGETBV and verify that XCR0[2:1] = `11b' (XMM state and YMM state are enabled by OS).
+        * 3) detect CPUID.1:ECX.AVX[bit 28] = 1 (AVX instructions supported).
+        * (Step 3 can be done in any order relative to 1 and 2)
+        *
+        * 14.7.1 Detection of AVX2
+        * Hardware support for AVX2 is indicated by CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]=1.
+        * Application Software must identify that hardware supports AVX, after that it must
+        * also detect support for AVX2 by checking CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5].
+        */
+       return raid_cpu_match_avx(
+               (1 << 27) | (1 << 28), /* OSXSAVE and AVX */
+               1 << 5, /* AVX2 */
+               3 << 1); /* OS saves XMM and YMM registers */
+}
+
+static inline int raid_cpu_has_avx512bw(void)
+{
+       /*
+        * Intel Architecture Instruction Set Extensions Programming Reference
+        * 319433-022 October 2014
+        *
+        * 2.2 Detection of 512-bit Instruction Groups of Intel AVX-512 Family
+        * 1) Detect CPUID.1:ECX.OSXSAVE[bit 27] = 1 (XGETBV enabled for application use)
+        * 2) Execute XGETBV and verify that XCR0[7:5] = `111b' (OPMASK state, upper 256-bit of
+        * ZMM0-ZMM15 and ZMM16-ZMM31 state are enabled by OS) and that XCR0[2:1] = `11b'
+        * (XMM state and YMM state are enabled by OS).
+        * 3) Verify both CPUID.0x7.0:EBX.AVX512F[bit 16] = 1, CPUID.0x7.0:EBX.AVX512BW[bit 30] = 1.
+        */
+
+       /* note that intentionally we don't check for AVX and AVX2 */
+       /* because the documentation doesn't require that */
+       return raid_cpu_match_avx(
+               1 << 27, /* XSAVE/XGETBV */
+               (1 << 16) | (1 << 30), /* AVX512F and AVX512BW */
+               (3 << 1) | (7 << 5)); /* OS saves XMM, YMM and ZMM registers */
+}
+
+/**
+ * Check if it's an Intel Atom CPU.
+ */
+static inline int raid_cpu_is_atom(unsigned family, unsigned model)
+{
+       if (family != 6)
+               return 0;
+
+       /*
+        * x86 Architecture CPUID
+        * http://www.sandpile.org/x86/cpuid.htm
+        *
+        * Intel Atom
+        * 1C (28) Atom (45 nm) with 512 KB on-die L2
+        * 26 (38) Atom (45 nm) with 512 KB on-die L2
+        * 36 (54) Atom (32 nm) with 512 KB on-die L2
+        * 27 (39) Atom (32 nm) with 512 KB on-die L2
+        * 35 (53) Atom (?? nm) with ??? KB on-die L2
+        * 4A (74) Atom 2C (22 nm) 1 MB L2 + PowerVR (TGR)
+        * 5A (90) Atom 4C (22 nm) 2 MB L2 + PowerVR (ANN)
+        * 37 (55) Atom 4C (22 nm) 2 MB L2 + Intel Gen7 (BYT)
+        * 4C (76) Atom 4C (14 nm) 2 MB L2 + Intel Gen8 (BSW)
+        * 5D (93) Atom 4C (28 nm TSMC) 1 MB L2 + Mali (SoFIA)
+        * 4D (77) Atom 8C (22 nm) 4 MB L2 (AVN)
+        * ?? Atom ?C (14 nm) ? MB L2 (DVN)
+        */
+       return model == 28 || model == 38 || model == 54
+               || model == 39 || model == 53 || model == 74
+               || model == 90 || model == 55 || model == 76
+               || model == 93 || model == 77;
+}
+
+/**
+ * Check if the processor has a slow MULT implementation.
+ * If yes, it's better to use a hash not based on multiplication.
+ */
+static inline int raid_cpu_has_slowmult(void)
+{
+       char vendor[CPU_VENDOR_MAX];
+       unsigned family;
+       unsigned model;
+
+       /*
+        * In some cases Murmur3 based on MUL instruction,
+        * is a LOT slower than Spooky2 based on SHIFTs.
+        */
+       raid_cpu_info(vendor, &family, &model);
+
+       if (strcmp(vendor, "GenuineIntel") == 0) {
+               /*
+                * Intel Atom (Model 28)
+                * murmur3:378 MB/s, spooky2:3413 MB/s (x86)
+                *
+                * Intel Atom (Model 77)
+                * murmur3:1311 MB/s, spooky2:4056 MB/s (x64)
+                */
+               if (raid_cpu_is_atom(family, model))
+                       return 1;
+       }
+
+       return 0;
+}
+
+/**
+ * Check if the processor has a slow extended set of SSE registers.
+ * If yes, it's better to limit the unroll to the firsrt 8 registers.
+ */
+static inline int raid_cpu_has_slowextendedreg(void)
+{
+       char vendor[CPU_VENDOR_MAX];
+       unsigned family;
+       unsigned model;
+
+       /*
+        * In some cases the PAR2 implementation using 16 SSE registers
+        * is a LITTLE slower than the one using only the first 8 registers.
+        * This doesn't happen for PARZ.
+        */
+       raid_cpu_info(vendor, &family, &model);
+
+       if (strcmp(vendor, "AuthenticAMD") == 0) {
+               /*
+                * AMD Bulldozer
+                * par2_sse2:4922 MB/s, par2_sse2e:4465 MB/s
+                */
+               if (family == 21)
+                       return 1;
+       }
+
+       if (strcmp(vendor, "GenuineIntel") == 0) {
+               /*
+                * Intel Atom (Model 77)
+                * par2_sse2:5686 MB/s, par2_sse2e:5250 MB/s
+                * parz_sse2:3100 MB/s, parz_sse2e:3400 MB/s
+                * par3_sse3:1921 MB/s, par3_sse3e:1813 MB/s
+                * par4_sse3:1175 MB/s, par4_sse3e:1113 MB/s
+                * par5_sse3:876 MB/s, par5_sse3e:675 MB/s
+                * par6_sse3:705 MB/s, par6_sse3e:529 MB/s
+                *
+                * Intel Atom (Model 77) "Avoton C2750"
+                * par2_sse2:5661 MB/s, par2_sse2e:5382 MB/s
+                * parz_sse2:3110 MB/s, parz_sse2e:3450 MB/s
+                * par3_sse3:1769 MB/s, par3_sse3e:1856 MB/s
+                * par4_sse3:1221 MB/s, par4_sse3e:1141 MB/s
+                * par5_sse3:910 MB/s, par5_sse3e:675 MB/s
+                * par6_sse3:720 MB/s, par6_sse3e:534 MB/s
+                */
+               if (raid_cpu_is_atom(family, model))
+                       return 1;
+       }
+
+       return 0;
+}
+#endif
+
+#endif
+
diff --git a/raid/gf.h b/raid/gf.h
new file mode 100644 (file)
index 0000000..1702c28
--- /dev/null
+++ b/raid/gf.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_GF_H
+#define __RAID_GF_H
+
+/*
+ * Galois field operations.
+ *
+ * Basic range checks are implemented using BUG_ON().
+ */
+
+/*
+ * GF a*b.
+ */
+static __always_inline uint8_t mul(uint8_t a, uint8_t b)
+{
+       return gfmul[a][b];
+}
+
+/*
+ * GF 1/a.
+ * Not defined for a == 0.
+ */
+static __always_inline uint8_t inv(uint8_t v)
+{
+       BUG_ON(v == 0); /* division by zero */
+
+       return gfinv[v];
+}
+
+/*
+ * GF 2^a.
+ */
+static __always_inline uint8_t pow2(int v)
+{
+       BUG_ON(v < 0 || v > 254); /* invalid exponent */
+
+       return gfexp[v];
+}
+
+/*
+ * Gets the multiplication table for a specified value.
+ */
+static __always_inline const uint8_t *table(uint8_t v)
+{
+       return gfmul[v];
+}
+
+/*
+ * Gets the generator matrix coefficient for parity 'p' and disk 'd'.
+ */
+static __always_inline uint8_t A(int p, int d)
+{
+       return gfgen[p][d];
+}
+
+/*
+ * Dereference as uint8_t
+ */
+#define v_8(p) (*(uint8_t *)&(p))
+
+/*
+ * Dereference as uint32_t
+ */
+#define v_32(p) (*(uint32_t *)&(p))
+
+/*
+ * Dereference as uint64_t
+ */
+#define v_64(p) (*(uint64_t *)&(p))
+
+/*
+ * Multiply each byte of a uint32 by 2 in the GF(2^8).
+ */
+static __always_inline uint32_t x2_32(uint32_t v)
+{
+       uint32_t mask = v & 0x80808080U;
+
+       mask = (mask << 1) - (mask >> 7);
+       v = (v << 1) & 0xfefefefeU;
+       v ^= mask & 0x1d1d1d1dU;
+       return v;
+}
+
+/*
+ * Multiply each byte of a uint64 by 2 in the GF(2^8).
+ */
+static __always_inline uint64_t x2_64(uint64_t v)
+{
+       uint64_t mask = v & 0x8080808080808080ULL;
+
+       mask = (mask << 1) - (mask >> 7);
+       v = (v << 1) & 0xfefefefefefefefeULL;
+       v ^= mask & 0x1d1d1d1d1d1d1d1dULL;
+       return v;
+}
+
+/*
+ * Divide each byte of a uint32 by 2 in the GF(2^8).
+ */
+static __always_inline uint32_t d2_32(uint32_t v)
+{
+       uint32_t mask = v & 0x01010101U;
+
+       mask = (mask << 8) - mask;
+       v = (v >> 1) & 0x7f7f7f7fU;
+       v ^= mask & 0x8e8e8e8eU;
+       return v;
+}
+
+/*
+ * Divide each byte of a uint64 by 2 in the GF(2^8).
+ */
+static __always_inline uint64_t d2_64(uint64_t v)
+{
+       uint64_t mask = v & 0x0101010101010101ULL;
+
+       mask = (mask << 8) - mask;
+       v = (v >> 1) & 0x7f7f7f7f7f7f7f7fULL;
+       v ^= mask & 0x8e8e8e8e8e8e8e8eULL;
+       return v;
+}
+
+#endif
+
diff --git a/raid/helper.c b/raid/helper.c
new file mode 100644 (file)
index 0000000..f66093f
--- /dev/null
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+
+#define RAID_SWAP(a, b) \
+       do { \
+               if (v[a] > v[b]) { \
+                       int t = v[a]; \
+                       v[a] = v[b]; \
+                       v[b] = t; \
+               } \
+       } while (0)
+
+void raid_sort(int n, int *v)
+{
+       /* sorting networks generated with Batcher's Merge-Exchange */
+       switch (n) {
+       case 2:
+               RAID_SWAP(0, 1);
+               break;
+       case 3:
+               RAID_SWAP(0, 2);
+               RAID_SWAP(0, 1);
+               RAID_SWAP(1, 2);
+               break;
+       case 4:
+               RAID_SWAP(0, 2);
+               RAID_SWAP(1, 3);
+               RAID_SWAP(0, 1);
+               RAID_SWAP(2, 3);
+               RAID_SWAP(1, 2);
+               break;
+       case 5:
+               RAID_SWAP(0, 4);
+               RAID_SWAP(0, 2);
+               RAID_SWAP(1, 3);
+               RAID_SWAP(2, 4);
+               RAID_SWAP(0, 1);
+               RAID_SWAP(2, 3);
+               RAID_SWAP(1, 4);
+               RAID_SWAP(1, 2);
+               RAID_SWAP(3, 4);
+               break;
+       case 6:
+               RAID_SWAP(0, 4);
+               RAID_SWAP(1, 5);
+               RAID_SWAP(0, 2);
+               RAID_SWAP(1, 3);
+               RAID_SWAP(2, 4);
+               RAID_SWAP(3, 5);
+               RAID_SWAP(0, 1);
+               RAID_SWAP(2, 3);
+               RAID_SWAP(4, 5);
+               RAID_SWAP(1, 4);
+               RAID_SWAP(1, 2);
+               RAID_SWAP(3, 4);
+               break;
+       }
+}
+
+void raid_insert(int n, int *v, int i)
+{
+       /* we don't use binary search because this is intended */
+       /* for very small vectors and we want to optimize the case */
+       /* of elements inserted already in order */
+
+       /* insert at the end */
+       v[n] = i;
+
+       /* swap until in the correct position */
+       while (n > 0 && v[n - 1] > v[n]) {
+               /* swap */
+               int t = v[n - 1];
+
+               v[n - 1] = v[n];
+               v[n] = t;
+
+               /* previous position */
+               --n;
+       }
+}
+
diff --git a/raid/helper.h b/raid/helper.h
new file mode 100644 (file)
index 0000000..bf68288
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_HELPER_H
+#define __RAID_HELPER_H
+
+/**
+ * Inserts an integer in a sorted vector.
+ *
+ * This function can be used to insert indexes in order, ready to be used for
+ * calling raid_rec().
+ *
+ * @n Number of integers currently in the vector.
+ * @v Vector of integers already sorted.
+ *   It must have extra space for the new elemet at the end.
+ * @i Value to insert.
+ */
+void raid_insert(int n, int *v, int i);
+
+/**
+ * Sorts a small vector of integers.
+ *
+ * If you have indexes not in order, you can use this function to sort them
+ * before calling raid_rec().
+ *
+ * @n Number of integers. No more than RAID_PARITY_MAX.
+ * @v Vector of integers.
+ */
+void raid_sort(int n, int *v);
+
+#endif
+
diff --git a/raid/int.c b/raid/int.c
new file mode 100644 (file)
index 0000000..e16332a
--- /dev/null
@@ -0,0 +1,556 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "gf.h"
+
+/*
+ * GEN1 (RAID5 with xor) 32bit C implementation
+ */
+void raid_gen1_int32(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       int d, l;
+       size_t i;
+
+       uint32_t p0;
+       uint32_t p1;
+
+       l = nd - 1;
+       p = v[nd];
+
+       for (i = 0; i < size; i += 8) {
+               p0 = v_32(v[l][i]);
+               p1 = v_32(v[l][i + 4]);
+               for (d = l - 1; d >= 0; --d) {
+                       p0 ^= v_32(v[d][i]);
+                       p1 ^= v_32(v[d][i + 4]);
+               }
+               v_32(p[i]) = p0;
+               v_32(p[i + 4]) = p1;
+       }
+}
+
+/*
+ * GEN1 (RAID5 with xor) 64bit C implementation
+ */
+void raid_gen1_int64(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       int d, l;
+       size_t i;
+
+       uint64_t p0;
+       uint64_t p1;
+
+       l = nd - 1;
+       p = v[nd];
+
+       for (i = 0; i < size; i += 16) {
+               p0 = v_64(v[l][i]);
+               p1 = v_64(v[l][i + 8]);
+               for (d = l - 1; d >= 0; --d) {
+                       p0 ^= v_64(v[d][i]);
+                       p1 ^= v_64(v[d][i + 8]);
+               }
+               v_64(p[i]) = p0;
+               v_64(p[i + 8]) = p1;
+       }
+}
+
+/*
+ * GEN2 (RAID6 with powers of 2) 32bit C implementation
+ */
+void raid_gen2_int32(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       int d, l;
+       size_t i;
+
+       uint32_t d0, q0, p0;
+       uint32_t d1, q1, p1;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+
+       for (i = 0; i < size; i += 8) {
+               q0 = p0 = v_32(v[l][i]);
+               q1 = p1 = v_32(v[l][i + 4]);
+               for (d = l - 1; d >= 0; --d) {
+                       d0 = v_32(v[d][i]);
+                       d1 = v_32(v[d][i + 4]);
+
+                       p0 ^= d0;
+                       p1 ^= d1;
+
+                       q0 = x2_32(q0);
+                       q1 = x2_32(q1);
+
+                       q0 ^= d0;
+                       q1 ^= d1;
+               }
+               v_32(p[i]) = p0;
+               v_32(p[i + 4]) = p1;
+               v_32(q[i]) = q0;
+               v_32(q[i + 4]) = q1;
+       }
+}
+
+/*
+ * GEN2 (RAID6 with powers of 2) 64bit C implementation
+ */
+void raid_gen2_int64(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       int d, l;
+       size_t i;
+
+       uint64_t d0, q0, p0;
+       uint64_t d1, q1, p1;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+
+       for (i = 0; i < size; i += 16) {
+               q0 = p0 = v_64(v[l][i]);
+               q1 = p1 = v_64(v[l][i + 8]);
+               for (d = l - 1; d >= 0; --d) {
+                       d0 = v_64(v[d][i]);
+                       d1 = v_64(v[d][i + 8]);
+
+                       p0 ^= d0;
+                       p1 ^= d1;
+
+                       q0 = x2_64(q0);
+                       q1 = x2_64(q1);
+
+                       q0 ^= d0;
+                       q1 ^= d1;
+               }
+               v_64(p[i]) = p0;
+               v_64(p[i + 8]) = p1;
+               v_64(q[i]) = q0;
+               v_64(q[i + 8]) = q1;
+       }
+}
+
+/*
+ * GEN3 (triple parity with Cauchy matrix) 8bit C implementation
+ *
+ * Note that instead of a generic multiplication table, likely resulting
+ * in multiple cache misses, a precomputed table could be used.
+ * But this is only a kind of reference function, and we are not really
+ * interested in speed.
+ */
+void raid_gen3_int8(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       int d, l;
+       size_t i;
+
+       uint8_t d0, r0, q0, p0;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+
+       for (i = 0; i < size; i += 1) {
+               p0 = q0 = r0 = 0;
+               for (d = l; d > 0; --d) {
+                       d0 = v_8(v[d][i]);
+
+                       p0 ^= d0;
+                       q0 ^= gfmul[d0][gfgen[1][d]];
+                       r0 ^= gfmul[d0][gfgen[2][d]];
+               }
+
+               /* first disk with all coefficients at 1 */
+               d0 = v_8(v[0][i]);
+
+               p0 ^= d0;
+               q0 ^= d0;
+               r0 ^= d0;
+
+               v_8(p[i]) = p0;
+               v_8(q[i]) = q0;
+               v_8(r[i]) = r0;
+       }
+}
+
+/*
+ * GEN4 (quad parity with Cauchy matrix) 8bit C implementation
+ *
+ * Note that instead of a generic multiplication table, likely resulting
+ * in multiple cache misses, a precomputed table could be used.
+ * But this is only a kind of reference function, and we are not really
+ * interested in speed.
+ */
+void raid_gen4_int8(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       uint8_t *s;
+       int d, l;
+       size_t i;
+
+       uint8_t d0, s0, r0, q0, p0;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+       s = v[nd + 3];
+
+       for (i = 0; i < size; i += 1) {
+               p0 = q0 = r0 = s0 = 0;
+               for (d = l; d > 0; --d) {
+                       d0 = v_8(v[d][i]);
+
+                       p0 ^= d0;
+                       q0 ^= gfmul[d0][gfgen[1][d]];
+                       r0 ^= gfmul[d0][gfgen[2][d]];
+                       s0 ^= gfmul[d0][gfgen[3][d]];
+               }
+
+               /* first disk with all coefficients at 1 */
+               d0 = v_8(v[0][i]);
+
+               p0 ^= d0;
+               q0 ^= d0;
+               r0 ^= d0;
+               s0 ^= d0;
+
+               v_8(p[i]) = p0;
+               v_8(q[i]) = q0;
+               v_8(r[i]) = r0;
+               v_8(s[i]) = s0;
+       }
+}
+
+/*
+ * GEN5 (penta parity with Cauchy matrix) 8bit C implementation
+ *
+ * Note that instead of a generic multiplication table, likely resulting
+ * in multiple cache misses, a precomputed table could be used.
+ * But this is only a kind of reference function, and we are not really
+ * interested in speed.
+ */
+void raid_gen5_int8(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       uint8_t *s;
+       uint8_t *t;
+       int d, l;
+       size_t i;
+
+       uint8_t d0, t0, s0, r0, q0, p0;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+       s = v[nd + 3];
+       t = v[nd + 4];
+
+       for (i = 0; i < size; i += 1) {
+               p0 = q0 = r0 = s0 = t0 = 0;
+               for (d = l; d > 0; --d) {
+                       d0 = v_8(v[d][i]);
+
+                       p0 ^= d0;
+                       q0 ^= gfmul[d0][gfgen[1][d]];
+                       r0 ^= gfmul[d0][gfgen[2][d]];
+                       s0 ^= gfmul[d0][gfgen[3][d]];
+                       t0 ^= gfmul[d0][gfgen[4][d]];
+               }
+
+               /* first disk with all coefficients at 1 */
+               d0 = v_8(v[0][i]);
+
+               p0 ^= d0;
+               q0 ^= d0;
+               r0 ^= d0;
+               s0 ^= d0;
+               t0 ^= d0;
+
+               v_8(p[i]) = p0;
+               v_8(q[i]) = q0;
+               v_8(r[i]) = r0;
+               v_8(s[i]) = s0;
+               v_8(t[i]) = t0;
+       }
+}
+
+/*
+ * GEN6 (hexa parity with Cauchy matrix) 8bit C implementation
+ *
+ * Note that instead of a generic multiplication table, likely resulting
+ * in multiple cache misses, a precomputed table could be used.
+ * But this is only a kind of reference function, and we are not really
+ * interested in speed.
+ */
+void raid_gen6_int8(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       uint8_t *s;
+       uint8_t *t;
+       uint8_t *u;
+       int d, l;
+       size_t i;
+
+       uint8_t d0, u0, t0, s0, r0, q0, p0;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+       s = v[nd + 3];
+       t = v[nd + 4];
+       u = v[nd + 5];
+
+       for (i = 0; i < size; i += 1) {
+               p0 = q0 = r0 = s0 = t0 = u0 = 0;
+               for (d = l; d > 0; --d) {
+                       d0 = v_8(v[d][i]);
+
+                       p0 ^= d0;
+                       q0 ^= gfmul[d0][gfgen[1][d]];
+                       r0 ^= gfmul[d0][gfgen[2][d]];
+                       s0 ^= gfmul[d0][gfgen[3][d]];
+                       t0 ^= gfmul[d0][gfgen[4][d]];
+                       u0 ^= gfmul[d0][gfgen[5][d]];
+               }
+
+               /* first disk with all coefficients at 1 */
+               d0 = v_8(v[0][i]);
+
+               p0 ^= d0;
+               q0 ^= d0;
+               r0 ^= d0;
+               s0 ^= d0;
+               t0 ^= d0;
+               u0 ^= d0;
+
+               v_8(p[i]) = p0;
+               v_8(q[i]) = q0;
+               v_8(r[i]) = r0;
+               v_8(s[i]) = s0;
+               v_8(t[i]) = t0;
+               v_8(u[i]) = u0;
+       }
+}
+
+/*
+ * Recover failure of one data block at index id[0] using parity at index
+ * ip[0] for any RAID level.
+ *
+ * Starting from the equation:
+ *
+ * Pd = A[ip[0],id[0]] * Dx
+ *
+ * and solving we get:
+ *
+ * Dx = A[ip[0],id[0]]^-1 * Pd
+ */
+void raid_rec1_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *pa;
+       const uint8_t *T;
+       uint8_t G;
+       uint8_t V;
+       size_t i;
+
+       (void)nr; /* unused, it's always 1 */
+
+       /* if it's RAID5 uses the faster function */
+       if (ip[0] == 0) {
+               raid_rec1of1(id, nd, size, vv);
+               return;
+       }
+
+       /* setup the coefficients matrix */
+       G = A(ip[0], id[0]);
+
+       /* invert it to solve the system of linear equations */
+       V = inv(G);
+
+       /* get multiplication tables */
+       T = table(V);
+
+       /* compute delta parity */
+       raid_delta_gen(1, id, ip, nd, size, vv);
+
+       p = v[nd + ip[0]];
+       pa = v[id[0]];
+
+       for (i = 0; i < size; ++i) {
+               /* delta */
+               uint8_t Pd = p[i] ^ pa[i];
+
+               /* reconstruct */
+               pa[i] = T[Pd];
+       }
+}
+
+/*
+ * Recover failure of two data blocks at indexes id[0],id[1] using parity at
+ * indexes ip[0],ip[1] for any RAID level.
+ *
+ * Starting from the equations:
+ *
+ * Pd = A[ip[0],id[0]] * Dx + A[ip[0],id[1]] * Dy
+ * Qd = A[ip[1],id[0]] * Dx + A[ip[1],id[1]] * Dy
+ *
+ * we solve inverting the coefficients matrix.
+ */
+void raid_rec2_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *pa;
+       uint8_t *q;
+       uint8_t *qa;
+       const int N = 2;
+       const uint8_t *T[N][N];
+       uint8_t G[N * N];
+       uint8_t V[N * N];
+       size_t i;
+       int j, k;
+
+       (void)nr; /* unused, it's always 2 */
+
+       /* if it's RAID6 recovering with P and Q uses the faster function */
+       if (ip[0] == 0 && ip[1] == 1) {
+               raid_rec2of2_int8(id, ip, nd, size, vv);
+               return;
+       }
+
+       /* setup the coefficients matrix */
+       for (j = 0; j < N; ++j)
+               for (k = 0; k < N; ++k)
+                       G[j * N + k] = A(ip[j], id[k]);
+
+       /* invert it to solve the system of linear equations */
+       raid_invert(G, V, N);
+
+       /* get multiplication tables */
+       for (j = 0; j < N; ++j)
+               for (k = 0; k < N; ++k)
+                       T[j][k] = table(V[j * N + k]);
+
+       /* compute delta parity */
+       raid_delta_gen(2, id, ip, nd, size, vv);
+
+       p = v[nd + ip[0]];
+       q = v[nd + ip[1]];
+       pa = v[id[0]];
+       qa = v[id[1]];
+
+       for (i = 0; i < size; ++i) {
+               /* delta */
+               uint8_t Pd = p[i] ^ pa[i];
+               uint8_t Qd = q[i] ^ qa[i];
+
+               /* reconstruct */
+               pa[i] = T[0][0][Pd] ^ T[0][1][Qd];
+               qa[i] = T[1][0][Pd] ^ T[1][1][Qd];
+       }
+}
+
+/*
+ * Recover failure of N data blocks at indexes id[N] using parity at indexes
+ * ip[N] for any RAID level.
+ *
+ * Starting from the N equations, with 0<=i<N :
+ *
+ * PD[i] = sum(A[ip[i],id[j]] * D[i]) 0<=j<N
+ *
+ * we solve inverting the coefficients matrix.
+ *
+ * Note that referring at previous equations you have:
+ * PD[0] = Pd, PD[1] = Qd, PD[2] = Rd, ...
+ * D[0] = Dx, D[1] = Dy, D[2] = Dz, ...
+ */
+void raid_recX_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p[RAID_PARITY_MAX];
+       uint8_t *pa[RAID_PARITY_MAX];
+       const uint8_t *T[RAID_PARITY_MAX][RAID_PARITY_MAX];
+       uint8_t G[RAID_PARITY_MAX * RAID_PARITY_MAX];
+       uint8_t V[RAID_PARITY_MAX * RAID_PARITY_MAX];
+       size_t i;
+       int j, k;
+
+       /* setup the coefficients matrix */
+       for (j = 0; j < nr; ++j)
+               for (k = 0; k < nr; ++k)
+                       G[j * nr + k] = A(ip[j], id[k]);
+
+       /* invert it to solve the system of linear equations */
+       raid_invert(G, V, nr);
+
+       /* get multiplication tables */
+       for (j = 0; j < nr; ++j)
+               for (k = 0; k < nr; ++k)
+                       T[j][k] = table(V[j * nr + k]);
+
+       /* compute delta parity */
+       raid_delta_gen(nr, id, ip, nd, size, vv);
+
+       for (j = 0; j < nr; ++j) {
+               p[j] = v[nd + ip[j]];
+               pa[j] = v[id[j]];
+       }
+
+       for (i = 0; i < size; ++i) {
+               uint8_t PD[RAID_PARITY_MAX];
+
+               /* delta */
+               for (j = 0; j < nr; ++j)
+                       PD[j] = p[j][i] ^ pa[j][i];
+
+               /* reconstruct */
+               for (j = 0; j < nr; ++j) {
+                       uint8_t b = 0;
+
+                       for (k = 0; k < nr; ++k)
+                               b ^= T[j][k][PD[k]];
+                       pa[j][i] = b;
+               }
+       }
+}
+
diff --git a/raid/internal.h b/raid/internal.h
new file mode 100644 (file)
index 0000000..4465cb9
--- /dev/null
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_INTERNAL_H
+#define __RAID_INTERNAL_H
+
+/*
+ * Supported instruction sets.
+ *
+ * It may happen that the assembler is too old to support
+ * all instructions, even if the architecture supports them.
+ * These defines allow to exclude from the build the not supported ones.
+ *
+ * If in your project you use a predefined assembler, you can define them
+ * using fixed values, instead of using the HAVE_* defines.
+ */
+#if HAVE_CONFIG_H
+
+/* Includes the project configuration for HAVE_* defines */
+#include "config.h"
+
+/* If the compiler supports assembly */
+#if HAVE_ASSEMBLY
+/* Autodetect from the compiler */
+#if defined(__i386__)
+#define CONFIG_X86 1
+#define CONFIG_X86_32 1
+#endif
+#if defined(__x86_64__)
+#define CONFIG_X86 1
+#define CONFIG_X86_64 1
+#endif
+#endif
+
+/* Enables SSE2, SSSE3, AVX2 only if the assembler supports it */
+#if HAVE_SSE2
+#define CONFIG_SSE2 1
+#endif
+#if HAVE_SSSE3
+#define CONFIG_SSSE3 1
+#endif
+#if HAVE_AVX2
+#define CONFIG_AVX2 1
+#endif
+
+#else /* if HAVE_CONFIG_H is not defined */
+
+/* Assume that assembly is always supported */
+#if defined(__i386__)
+#define CONFIG_X86 1
+#define CONFIG_X86_32 1
+#endif
+
+#if defined(__x86_64__)
+#define CONFIG_X86 1
+#define CONFIG_X86_64 1
+#endif
+
+/* Assumes that the assembler supports everything */
+#ifdef CONFIG_X86
+#define CONFIG_SSE2 1
+#define CONFIG_SSSE3 1
+#define CONFIG_AVX2 1
+#endif
+#endif
+
+/*
+ * Includes anything required for compatibility.
+ */
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * Inverse assert.
+ */
+#define BUG_ON(a) assert(!(a))
+
+/*
+ * Forced inline.
+ */
+#ifndef __always_inline
+#define __always_inline inline __attribute__((always_inline))
+#endif
+
+/*
+ * Forced alignment.
+ */
+#ifndef __aligned
+#define __aligned(a) __attribute__((aligned(a)))
+#endif
+
+/*
+ * Align a pointer at the specified size.
+ */
+static __always_inline void *__align_ptr(void *ptr, uintptr_t size)
+{
+       uintptr_t offset = (uintptr_t)ptr;
+
+       offset = (offset + size - 1U) & ~(size - 1U);
+
+       return (void *)offset;
+}
+
+/*
+ * Includes the main interface headers.
+ */
+#include "raid.h"
+#include "helper.h"
+
+/*
+ * Internal functions.
+ *
+ * These are intended to provide access for testing.
+ */
+int raid_selftest(void);
+void raid_gen_ref(int nd, int np, size_t size, void **vv);
+void raid_invert(uint8_t *M, uint8_t *V, int n);
+void raid_delta_gen(int nr, int *id, int *ip, int nd, size_t size, void **v);
+void raid_rec1of1(int *id, int nd, size_t size, void **v);
+void raid_rec2of2_int8(int *id, int *ip, int nd, size_t size, void **vv);
+void raid_gen1_int32(int nd, size_t size, void **vv);
+void raid_gen1_int64(int nd, size_t size, void **vv);
+void raid_gen1_sse2(int nd, size_t size, void **vv);
+void raid_gen1_avx2(int nd, size_t size, void **vv);
+void raid_gen2_int32(int nd, size_t size, void **vv);
+void raid_gen2_int64(int nd, size_t size, void **vv);
+void raid_gen2_sse2(int nd, size_t size, void **vv);
+void raid_gen2_avx2(int nd, size_t size, void **vv);
+void raid_gen2_sse2ext(int nd, size_t size, void **vv);
+void raid_genz_int32(int nd, size_t size, void **vv);
+void raid_genz_int64(int nd, size_t size, void **vv);
+void raid_genz_sse2(int nd, size_t size, void **vv);
+void raid_genz_sse2ext(int nd, size_t size, void **vv);
+void raid_genz_avx2ext(int nd, size_t size, void **vv);
+void raid_gen3_int8(int nd, size_t size, void **vv);
+void raid_gen3_ssse3(int nd, size_t size, void **vv);
+void raid_gen3_ssse3ext(int nd, size_t size, void **vv);
+void raid_gen3_avx2ext(int nd, size_t size, void **vv);
+void raid_gen4_int8(int nd, size_t size, void **vv);
+void raid_gen4_ssse3(int nd, size_t size, void **vv);
+void raid_gen4_ssse3ext(int nd, size_t size, void **vv);
+void raid_gen4_avx2ext(int nd, size_t size, void **vv);
+void raid_gen5_int8(int nd, size_t size, void **vv);
+void raid_gen5_ssse3(int nd, size_t size, void **vv);
+void raid_gen5_ssse3ext(int nd, size_t size, void **vv);
+void raid_gen5_avx2ext(int nd, size_t size, void **vv);
+void raid_gen6_int8(int nd, size_t size, void **vv);
+void raid_gen6_ssse3(int nd, size_t size, void **vv);
+void raid_gen6_ssse3ext(int nd, size_t size, void **vv);
+void raid_gen6_avx2ext(int nd, size_t size, void **vv);
+void raid_rec1_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_rec2_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_recX_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_rec1_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_rec2_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_recX_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_rec1_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_rec2_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_recX_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+
+/*
+ * Internal naming.
+ *
+ * These are intented to provide access for testing.
+ */
+const char *raid_gen1_tag(void);
+const char *raid_gen2_tag(void);
+const char *raid_genz_tag(void);
+const char *raid_gen3_tag(void);
+const char *raid_gen4_tag(void);
+const char *raid_gen5_tag(void);
+const char *raid_gen6_tag(void);
+const char *raid_rec1_tag(void);
+const char *raid_rec2_tag(void);
+const char *raid_recX_tag(void);
+
+/*
+ * Internal forwarders.
+ */
+extern void (*raid_gen3_ptr)(int nd, size_t size, void **vv);
+extern void (*raid_genz_ptr)(int nd, size_t size, void **vv);
+extern void (*raid_gen_ptr[RAID_PARITY_MAX])(
+       int nd, size_t size, void **vv);
+extern void (*raid_rec_ptr[RAID_PARITY_MAX])(
+       int nr, int *id, int *ip, int nd, size_t size, void **vv);
+
+/*
+ * Tables.
+ */
+extern const uint8_t raid_gfmul[256][256] __aligned(256);
+extern const uint8_t raid_gfexp[256] __aligned(256);
+extern const uint8_t raid_gfinv[256] __aligned(256);
+extern const uint8_t raid_gfvandermonde[3][256] __aligned(256);
+extern const uint8_t raid_gfcauchy[6][256] __aligned(256);
+extern const uint8_t raid_gfcauchypshufb[251][4][2][16] __aligned(256);
+extern const uint8_t raid_gfmulpshufb[256][2][16] __aligned(256);
+extern const uint8_t (*raid_gfgen)[256];
+#define gfmul raid_gfmul
+#define gfexp raid_gfexp
+#define gfinv raid_gfinv
+#define gfvandermonde raid_gfvandermonde
+#define gfcauchy raid_gfcauchy
+#define gfgenpshufb raid_gfcauchypshufb
+#define gfmulpshufb raid_gfmulpshufb
+#define gfgen raid_gfgen
+
+/*
+ * Assembler blocks.
+ */
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSE2
+static __always_inline void raid_sse_begin(void)
+{
+}
+
+static __always_inline void raid_sse_end(void)
+{
+       /* SSE and AVX code uses non-temporal writes, like MOVNTDQ, */
+       /* that use a weak memory model. To ensure that other processors */
+       /* see correctly the data written, we use a store-store memory */
+       /* barrier at the end of the asm code */
+       asm volatile ("sfence" : : : "memory");
+
+       /* clobbers registers used in the asm code */
+       /* this is required because in the Windows ABI, */
+       /* registers xmm6-xmm15 should be kept by the callee. */
+       /* this clobber list force the compiler to save any */
+       /* register that needs to be saved */
+       /* we check for __SSE2_ because we require that the */
+       /* compiler supports SSE2 registers in the clobber list */
+#ifdef __SSE2__
+       asm volatile ("" : : : "%xmm0", "%xmm1", "%xmm2", "%xmm3");
+       asm volatile ("" : : : "%xmm4", "%xmm5", "%xmm6", "%xmm7");
+#ifdef CONFIG_X86_64
+       asm volatile ("" : : : "%xmm8", "%xmm9", "%xmm10", "%xmm11");
+       asm volatile ("" : : : "%xmm12", "%xmm13", "%xmm14", "%xmm15");
+#endif
+#endif
+}
+#endif
+
+#ifdef CONFIG_AVX2
+static __always_inline void raid_avx_begin(void)
+{
+       raid_sse_begin();
+}
+
+static __always_inline void raid_avx_end(void)
+{
+       raid_sse_end();
+
+       /* reset the upper part of the ymm registers */
+       /* to avoid the 70 clocks penality on the next */
+       /* xmm register use */
+       asm volatile ("vzeroupper" : : : "memory");
+}
+#endif
+#endif /* CONFIG_X86 */
+
+#endif
+
diff --git a/raid/intz.c b/raid/intz.c
new file mode 100644 (file)
index 0000000..80c2014
--- /dev/null
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "gf.h"
+
+/*
+ * GENz (triple parity with powers of 2^-1) 32bit C implementation
+ */
+void raid_genz_int32(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t**)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       int d, l;
+       size_t i;
+
+       uint32_t d0, r0, q0, p0;
+       uint32_t d1, r1, q1, p1;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+
+       for (i = 0; i < size; i += 8) {
+               r0 = q0 = p0 = v_32(v[l][i]);
+               r1 = q1 = p1 = v_32(v[l][i + 4]);
+               for (d = l - 1; d >= 0; --d) {
+                       d0 = v_32(v[d][i]);
+                       d1 = v_32(v[d][i + 4]);
+
+                       p0 ^= d0;
+                       p1 ^= d1;
+
+                       q0 = x2_32(q0);
+                       q1 = x2_32(q1);
+
+                       q0 ^= d0;
+                       q1 ^= d1;
+
+                       r0 = d2_32(r0);
+                       r1 = d2_32(r1);
+
+                       r0 ^= d0;
+                       r1 ^= d1;
+               }
+               v_32(p[i]) = p0;
+               v_32(p[i + 4]) = p1;
+               v_32(q[i]) = q0;
+               v_32(q[i + 4]) = q1;
+               v_32(r[i]) = r0;
+               v_32(r[i + 4]) = r1;
+       }
+}
+
+/*
+ * GENz (triple parity with powers of 2^-1) 64bit C implementation
+ */
+void raid_genz_int64(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t**)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       int d, l;
+       size_t i;
+
+       uint64_t d0, r0, q0, p0;
+       uint64_t d1, r1, q1, p1;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+
+       for (i = 0; i < size; i += 16) {
+               r0 = q0 = p0 = v_64(v[l][i]);
+               r1 = q1 = p1 = v_64(v[l][i + 8]);
+               for (d = l - 1; d >= 0; --d) {
+                       d0 = v_64(v[d][i]);
+                       d1 = v_64(v[d][i + 8]);
+
+                       p0 ^= d0;
+                       p1 ^= d1;
+
+                       q0 = x2_64(q0);
+                       q1 = x2_64(q1);
+
+                       q0 ^= d0;
+                       q1 ^= d1;
+
+                       r0 = d2_64(r0);
+                       r1 = d2_64(r1);
+
+                       r0 ^= d0;
+                       r1 ^= d1;
+               }
+               v_64(p[i]) = p0;
+               v_64(p[i + 8]) = p1;
+               v_64(q[i]) = q0;
+               v_64(q[i + 8]) = q1;
+               v_64(r[i]) = r0;
+               v_64(r[i + 8]) = r1;
+       }
+}
+
diff --git a/raid/memory.c b/raid/memory.c
new file mode 100644 (file)
index 0000000..02a5a92
--- /dev/null
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "memory.h"
+
+void *raid_malloc_align(size_t size, size_t align_size, void **freeptr)
+{
+       unsigned char *ptr;
+       uintptr_t offset;
+
+       ptr = malloc(size + align_size);
+       if (!ptr) {
+               /* LCOV_EXCL_START */
+               return 0;
+               /* LCOV_EXCL_STOP */
+       }
+
+       *freeptr = ptr;
+
+       offset = ((uintptr_t)ptr) % align_size;
+
+       if (offset != 0)
+               ptr += align_size - offset;
+
+       return ptr;
+}
+
+void *raid_malloc(size_t size, void **freeptr)
+{
+    return raid_malloc_align(size, RAID_MALLOC_ALIGN, freeptr);
+}
+
+void **raid_malloc_vector_align(int nd, int n, size_t size, size_t align_size, size_t displacement_size, void **freeptr)
+{
+       void **v;
+       unsigned char *va;
+       int i;
+
+       BUG_ON(n <= 0 || nd < 0);
+
+       v = malloc(n * sizeof(void *));
+       if (!v) {
+               /* LCOV_EXCL_START */
+               return 0;
+               /* LCOV_EXCL_STOP */
+       }
+
+       va = raid_malloc_align(n * (size + displacement_size), align_size, freeptr);
+       if (!va) {
+               /* LCOV_EXCL_START */
+               free(v);
+               return 0;
+               /* LCOV_EXCL_STOP */
+       }
+
+       for (i = 0; i < n; ++i) {
+               v[i] = va;
+               va += size + displacement_size;
+       }
+
+       /* reverse order of the data blocks */
+       /* because they are usually accessed from the last one */
+       for (i = 0; i < nd / 2; ++i) {
+               void *ptr = v[i];
+
+               v[i] = v[nd - 1 - i];
+               v[nd - 1 - i] = ptr;
+       }
+
+       return v;
+}
+
+void **raid_malloc_vector(int nd, int n, size_t size, void **freeptr)
+{
+    return raid_malloc_vector_align(nd, n, size, RAID_MALLOC_ALIGN, RAID_MALLOC_DISPLACEMENT, freeptr);
+}
+
+void raid_mrand_vector(unsigned seed, int n, size_t size, void **vv)
+{
+       unsigned char **v = (unsigned char **)vv;
+       int i;
+       size_t j;
+
+       for (i = 0; i < n; ++i)
+               for (j = 0; j < size; ++j) {
+                       /* basic C99/C11 linear congruential generator */
+                       seed = seed * 1103515245U + 12345U;
+
+                       v[i][j] = seed >> 16;
+               }
+}
+
+int raid_mtest_vector(int n, size_t size, void **vv)
+{
+       unsigned char **v = (unsigned char **)vv;
+       int i;
+       size_t j;
+       unsigned k;
+       unsigned char d;
+       unsigned char p;
+
+       /* fill with 0 */
+       d = 0;
+       for (i = 0; i < n; ++i)
+               for (j = 0; j < size; ++j)
+                       v[i][j] = d;
+
+       /* test with all the byte patterns */
+       for (k = 1; k < 256; ++k) {
+               p = d;
+               d = k;
+
+               /* forward fill */
+               for (i = 0; i < n; ++i) {
+                       for (j = 0; j < size; ++j) {
+                               if (v[i][j] != p) {
+                                       /* LCOV_EXCL_START */
+                                       return -1;
+                                       /* LCOV_EXCL_STOP */
+                               }
+                               v[i][j] = d;
+                       }
+               }
+
+               p = d;
+               d = ~p;
+               /* backward fill with complement */
+               for (i = 0; i < n; ++i) {
+                       for (j = size; j > 0; --j) {
+                               if (v[i][j - 1] != p) {
+                                       /* LCOV_EXCL_START */
+                                       return -1;
+                                       /* LCOV_EXCL_STOP */
+                               }
+                               v[i][j - 1] = d;
+                       }
+               }
+       }
+
+       return 0;
+}
+
diff --git a/raid/memory.h b/raid/memory.h
new file mode 100644 (file)
index 0000000..de00614
--- /dev/null
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_MEMORY_H
+#define __RAID_MEMORY_H
+
+/**
+ * Memory alignment provided by raid_malloc().
+ *
+ * It should guarantee good cache performance everywhere.
+ */
+#define RAID_MALLOC_ALIGN 256
+
+/**
+ * Memory displacement to avoid cache address sharing on contiguous blocks,
+ * used by raid_malloc_vector().
+ *
+ * When allocating a sequence of blocks with a size of power of 2,
+ * there is the risk that the addresses of each block are mapped into the
+ * same cache line and prefetching predictor, resulting in a lot of cache
+ * sharing if you access all the blocks in parallel, from the start to the
+ * end.
+ *
+ * To avoid this effect, it's better if all the blocks are allocated
+ * with a fixed displacement trying to reduce the cache addresses sharing.
+ *
+ * The selected displacement was chosen empirically with some speed tests
+ * with 8/12/16/20/24 data buffers of 256 KB.
+ *
+ * These are the results in MB/s with no displacement:
+ *
+ *            sse2
+ *    gen1   15368 [MB/s]
+ *    gen2    6814 [MB/s]
+ *    genz    3033 [MB/s]
+ *
+ * These are the results with displacement resulting in improvments
+ * in the order of 20% or more:
+ *
+ *            sse2
+ *    gen1   21936 [MB/s]
+ *    gen2   11902 [MB/s]
+ *    genz    5838 [MB/s]
+ *
+ */
+#define RAID_MALLOC_DISPLACEMENT (7*256)
+
+/**
+ * Aligned malloc.
+ * Use an alignment suitable for the raid functions.
+ */
+void *raid_malloc(size_t size, void **freeptr);
+
+/**
+ * Arbitrary aligned malloc.
+ */
+void *raid_malloc_align(size_t size, size_t align_size, void **freeptr);
+
+/**
+ * Aligned vector allocation.
+ * Use an alignment suitable for the raid functions.
+ * Returns a vector of @n pointers, each one pointing to a block of
+ * the specified @size.
+ * The first @nd elements are reversed in order.
+ */
+void **raid_malloc_vector(int nd, int n, size_t size, void **freeptr);
+
+/**
+ * Arbitrary aligned vector allocation.
+ */
+void **raid_malloc_vector_align(int nd, int n, size_t size, size_t align_size, size_t displacement_size, void **freeptr);
+
+/**
+ * Fills the memory vector with pseudo-random data based on the specified seed.
+ */
+void raid_mrand_vector(unsigned seed, int n, size_t size, void **vv);
+
+/**
+ * Tests the memory vector for RAM problems.
+ * If a problem is found, it crashes.
+ */
+int raid_mtest_vector(int n, size_t size, void **vv);
+
+#endif
+
diff --git a/raid/module.c b/raid/module.c
new file mode 100644 (file)
index 0000000..b688d22
--- /dev/null
@@ -0,0 +1,473 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "memory.h"
+#include "cpu.h"
+
+/*
+ * Initializes and selects the best algorithm.
+ */
+void raid_init(void)
+{
+       raid_gen3_ptr = raid_gen3_int8;
+       raid_gen_ptr[3] = raid_gen4_int8;
+       raid_gen_ptr[4] = raid_gen5_int8;
+       raid_gen_ptr[5] = raid_gen6_int8;
+
+       if (sizeof(void *) == 4) {
+               raid_gen_ptr[0] = raid_gen1_int32;
+               raid_gen_ptr[1] = raid_gen2_int32;
+               raid_genz_ptr = raid_genz_int32;
+       } else {
+               raid_gen_ptr[0] = raid_gen1_int64;
+               raid_gen_ptr[1] = raid_gen2_int64;
+               raid_genz_ptr = raid_genz_int64;
+       }
+
+       raid_rec_ptr[0] = raid_rec1_int8;
+       raid_rec_ptr[1] = raid_rec2_int8;
+       raid_rec_ptr[2] = raid_recX_int8;
+       raid_rec_ptr[3] = raid_recX_int8;
+       raid_rec_ptr[4] = raid_recX_int8;
+       raid_rec_ptr[5] = raid_recX_int8;
+
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSE2
+       if (raid_cpu_has_sse2()) {
+               raid_gen_ptr[0] = raid_gen1_sse2;
+#ifdef CONFIG_X86_64
+               if (raid_cpu_has_slowextendedreg()) {
+                       raid_gen_ptr[1] = raid_gen2_sse2;
+               } else {
+                       raid_gen_ptr[1] = raid_gen2_sse2ext;
+               }
+               /* note that raid_cpu_has_slowextendedreg() doesn't affect parz */
+               raid_genz_ptr = raid_genz_sse2ext;
+#else
+               raid_gen_ptr[1] = raid_gen2_sse2;
+               raid_genz_ptr = raid_genz_sse2;
+#endif
+       }
+#endif
+
+#ifdef CONFIG_SSSE3
+       if (raid_cpu_has_ssse3()) {
+#ifdef CONFIG_X86_64
+               if (raid_cpu_has_slowextendedreg()) {
+                       raid_gen3_ptr = raid_gen3_ssse3;
+                       raid_gen_ptr[3] = raid_gen4_ssse3;
+                       raid_gen_ptr[4] = raid_gen5_ssse3;
+                       raid_gen_ptr[5] = raid_gen6_ssse3;
+               } else {
+                       raid_gen3_ptr = raid_gen3_ssse3ext;
+                       raid_gen_ptr[3] = raid_gen4_ssse3ext;
+                       raid_gen_ptr[4] = raid_gen5_ssse3ext;
+                       raid_gen_ptr[5] = raid_gen6_ssse3ext;
+               }
+#else
+               raid_gen3_ptr = raid_gen3_ssse3;
+               raid_gen_ptr[3] = raid_gen4_ssse3;
+               raid_gen_ptr[4] = raid_gen5_ssse3;
+               raid_gen_ptr[5] = raid_gen6_ssse3;
+#endif
+               raid_rec_ptr[0] = raid_rec1_ssse3;
+               raid_rec_ptr[1] = raid_rec2_ssse3;
+               raid_rec_ptr[2] = raid_recX_ssse3;
+               raid_rec_ptr[3] = raid_recX_ssse3;
+               raid_rec_ptr[4] = raid_recX_ssse3;
+               raid_rec_ptr[5] = raid_recX_ssse3;
+       }
+#endif
+
+#ifdef CONFIG_AVX2
+       if (raid_cpu_has_avx2()) {
+               raid_gen_ptr[0] = raid_gen1_avx2;
+               raid_gen_ptr[1] = raid_gen2_avx2;
+#ifdef CONFIG_X86_64
+               raid_gen3_ptr = raid_gen3_avx2ext;
+               raid_genz_ptr = raid_genz_avx2ext;
+               raid_gen_ptr[3] = raid_gen4_avx2ext;
+               raid_gen_ptr[4] = raid_gen5_avx2ext;
+               raid_gen_ptr[5] = raid_gen6_avx2ext;
+#endif
+               raid_rec_ptr[0] = raid_rec1_avx2;
+               raid_rec_ptr[1] = raid_rec2_avx2;
+               raid_rec_ptr[2] = raid_recX_avx2;
+               raid_rec_ptr[3] = raid_recX_avx2;
+               raid_rec_ptr[4] = raid_recX_avx2;
+               raid_rec_ptr[5] = raid_recX_avx2;
+       }
+#endif
+#endif /* CONFIG_X86 */
+
+       /* set the default mode */
+       raid_mode(RAID_MODE_CAUCHY);
+}
+
+/*
+ * Reference parity computation.
+ */
+void raid_gen_ref(int nd, int np, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       size_t i;
+
+       for (i = 0; i < size; ++i) {
+               uint8_t p[RAID_PARITY_MAX];
+               int j, d;
+
+               for (j = 0; j < np; ++j)
+                       p[j] = 0;
+
+               for (d = 0; d < nd; ++d) {
+                       uint8_t b = v[d][i];
+
+                       for (j = 0; j < np; ++j)
+                               p[j] ^= gfmul[b][gfgen[j][d]];
+               }
+
+               for (j = 0; j < np; ++j)
+                       v[nd + j][i] = p[j];
+       }
+}
+
+/*
+ * Size of the blocks to test.
+ */
+#define TEST_SIZE 4096
+
+/*
+ * Number of data blocks to test.
+ */
+#define TEST_COUNT (65536 / TEST_SIZE)
+
+/*
+ * Parity generation test.
+ */
+static int raid_test_par(int nd, int np, size_t size, void **v, void **ref)
+{
+       int i;
+       void *t[TEST_COUNT + RAID_PARITY_MAX];
+
+       /* setup data */
+       for (i = 0; i < nd; ++i)
+               t[i] = ref[i];
+
+       /* setup parity */
+       for (i = 0; i < np; ++i)
+               t[nd + i] = v[nd + i];
+
+       raid_gen(nd, np, size, t);
+
+       /* compare parity */
+       for (i = 0; i < np; ++i) {
+               if (memcmp(t[nd + i], ref[nd + i], size) != 0) {
+                       /* LCOV_EXCL_START */
+                       return -1;
+                       /* LCOV_EXCL_STOP */
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Recovering test.
+ */
+static int raid_test_rec(int nr, int *ir, int nd, int np, size_t size, void **v, void **ref)
+{
+       int i, j;
+       void *t[TEST_COUNT + RAID_PARITY_MAX];
+
+       /* setup data and parity vector */
+       for (i = 0, j = 0; i < nd + np; ++i) {
+               if (j < nr && ir[j] == i) {
+                       /* this block has to be recovered */
+                       t[i] = v[i];
+                       ++j;
+               } else {
+                       /* this block is used for recovering */
+                       t[i] = ref[i];
+               }
+       }
+
+       raid_rec(nr, ir, nd, np, size, t);
+
+       /* compare all data and parity */
+       for (i = 0; i < nd + np; ++i) {
+               if (t[i] != ref[i]
+                       && memcmp(t[i], ref[i], size) != 0) {
+                       /* LCOV_EXCL_START */
+                       return -1;
+                       /* LCOV_EXCL_STOP */
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Recovering test for data.
+ */
+static int raid_test_data(int nr, int *id, int *ip, int nd, int np, size_t size, void **v, void **ref)
+{
+       int i, j;
+       void *t[TEST_COUNT + RAID_PARITY_MAX];
+
+       /* setup data vector */
+       for (i = 0, j = 0; i < nd; ++i) {
+               if (j < nr && id[j] == i) {
+                       /* this block has to be recovered */
+                       t[i] = v[i];
+                       ++j;
+               } else {
+                       /* this block is left unchanged */
+                       t[i] = ref[i];
+               }
+       }
+
+       /* setup parity vector */
+       for (i = 0, j = 0; i < np; ++i) {
+               if (j < nr && ip[j] == i) {
+                       /* this block is used for recovering */
+                       t[nd + i] = ref[nd + i];
+                       ++j;
+               } else {
+                       /* this block should not be read or written */
+                       t[nd + i] = 0;
+               }
+       }
+
+       raid_data(nr, id, ip, nd, size, t);
+
+       /* compare all data and parity */
+       for (i = 0; i < nd; ++i) {
+               if (t[i] != ref[i]
+                       && t[i] != 0
+                       && memcmp(t[i], ref[i], size) != 0) {
+                       /* LCOV_EXCL_START */
+                       return -1;
+                       /* LCOV_EXCL_STOP */
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Scan test.
+ */
+static int raid_test_scan(int nr, int *ir, int nd, int np, size_t size, void **v, void **ref)
+{
+       int i, j, ret;
+       void *t[TEST_COUNT + RAID_PARITY_MAX];
+       int is[RAID_PARITY_MAX];
+
+       /* setup data and parity vector */
+       for (i = 0, j = 0; i < nd + np; ++i) {
+               if (j < nr && ir[j] == i) {
+                       /* this block is bad */
+                       t[i] = v[i];
+                       ++j;
+               } else {
+                       /* this block is used for recovering */
+                       t[i] = ref[i];
+               }
+       }
+
+       ret = raid_scan(is, nd, np, size, t);
+
+       /* compare identified bad blocks */
+       if (ret != nr)
+               return -1;
+       for (i = 0; i < nr; ++i) {
+               if (ir[i] != is[i]) {
+                       /* LCOV_EXCL_START */
+                       return -1;
+                       /* LCOV_EXCL_STOP */
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Basic functionality self test.
+ */
+int raid_selftest(void)
+{
+       const int nd = TEST_COUNT;
+       const size_t size = TEST_SIZE;
+       const int nv = nd + RAID_PARITY_MAX * 2 + 1;
+       void *v_alloc;
+       void **v;
+       void *ref[nd + RAID_PARITY_MAX];
+       int ir[RAID_PARITY_MAX];
+       int ip[RAID_PARITY_MAX];
+       int i, np;
+       int ret = 0;
+
+       /* ensure to have enough space for data */
+       BUG_ON(nd * size > 65536);
+
+       v = raid_malloc_vector(nd, nv, size, &v_alloc);
+       if (!v) {
+               /* LCOV_EXCL_START */
+               return -1;
+               /* LCOV_EXCL_STOP */
+       }
+
+       memset(v[nv - 1], 0, size);
+       raid_zero(v[nv - 1]);
+
+       /* use the multiplication table as data */
+       for (i = 0; i < nd; ++i)
+               ref[i] = ((uint8_t *)gfmul) + size * i;
+
+       /* setup reference parity */
+       for (i = 0; i < RAID_PARITY_MAX; ++i)
+               ref[nd + i] = v[nd + RAID_PARITY_MAX + i];
+
+       /* compute reference parity */
+       raid_gen_ref(nd, RAID_PARITY_MAX, size, ref);
+
+       /* test for each parity level */
+       for (np = 1; np <= RAID_PARITY_MAX; ++np) {
+               /* test parity generation */
+               ret = raid_test_par(nd, np, size, v, ref);
+               if (ret != 0) {
+                       /* LCOV_EXCL_START */
+                       goto bail;
+                       /* LCOV_EXCL_STOP */
+               }
+
+               /* test recovering with broken ending data disks */
+               for (i = 0; i < np; ++i) {
+                       /* bad data */
+                       ir[i] = nd - np + i;
+
+                       /* good parity */
+                       ip[i] = i;
+               }
+
+               ret = raid_test_rec(np, ir, nd, np, size, v, ref);
+               if (ret != 0) {
+                       /* LCOV_EXCL_START */
+                       goto bail;
+                       /* LCOV_EXCL_STOP */
+               }
+
+               ret = raid_test_data(np, ir, ip, nd, np, size, v, ref);
+               if (ret != 0) {
+                       /* LCOV_EXCL_START */
+                       goto bail;
+                       /* LCOV_EXCL_STOP */
+               }
+
+               /* test recovering with broken leading data and broken leading parity */
+               for (i = 0; i < np / 2; ++i) {
+                       /* bad data */
+                       ir[i] = i;
+
+                       /* good parity */
+                       ip[i] = (np + 1) / 2 + i;
+               }
+
+               /* bad parity */
+               for (i = 0; i < (np + 1) / 2; ++i)
+                       ir[np / 2 + i] = nd + i;
+
+               ret = raid_test_rec(np, ir, nd, np, size, v, ref);
+               if (ret != 0) {
+                       /* LCOV_EXCL_START */
+                       goto bail;
+                       /* LCOV_EXCL_STOP */
+               }
+
+               ret = raid_test_data(np / 2, ir, ip, nd, np, size, v, ref);
+               if (ret != 0) {
+                       /* LCOV_EXCL_START */
+                       goto bail;
+                       /* LCOV_EXCL_STOP */
+               }
+
+               /* test recovering with broken leading data and broken ending parity */
+               for (i = 0; i < np / 2; ++i) {
+                       /* bad data */
+                       ir[i] = i;
+
+                       /* good parity */
+                       ip[i] = i;
+               }
+
+               /* bad parity */
+               for (i = 0; i < (np + 1) / 2; ++i)
+                       ir[np / 2 + i] = nd + np - (np + 1) / 2 + i;
+
+               ret = raid_test_rec(np, ir, nd, np, size, v, ref);
+               if (ret != 0) {
+                       /* LCOV_EXCL_START */
+                       goto bail;
+                       /* LCOV_EXCL_STOP */
+               }
+
+               ret = raid_test_data(np / 2, ir, ip, nd, np, size, v, ref);
+               if (ret != 0) {
+                       /* LCOV_EXCL_START */
+                       goto bail;
+                       /* LCOV_EXCL_STOP */
+               }
+
+               /* scan test with broken data and parity */
+               for (i = 0; i < np / 2; ++i) {
+                       /* bad data */
+                       ir[i] = i;
+               }
+               for (i = 0; i < (np - 1) / 2; ++i) {
+                       /* bad parity */
+                       ir[np / 2 + i] = nd + i;
+               }
+               for (i = 0; i < np - 1; ++i) {
+                       /* make blocks bad */
+                       /* we cannot fill them with 0, because the original */
+                       /* data may be already filled with 0 */
+                       memset(v[ir[i]], 0x55, size);
+               }
+
+               ret = raid_test_scan(np - 1, ir, nd, np, size, v, ref);
+               if (ret != 0) {
+                       /* LCOV_EXCL_START */
+                       goto bail;
+                       /* LCOV_EXCL_STOP */
+               }
+       }
+
+       /* scan test with no parity */
+       ret = raid_test_scan(0, 0, nd, 0, size, v, ref);
+       if (ret != -1) {
+               /* LCOV_EXCL_START */
+               goto bail;
+               /* LCOV_EXCL_STOP */
+       }
+
+       ret = 0;
+
+bail:
+       free(v);
+       free(v_alloc);
+
+       return ret;
+}
+
diff --git a/raid/raid.c b/raid/raid.c
new file mode 100644 (file)
index 0000000..3052675
--- /dev/null
@@ -0,0 +1,586 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "gf.h"
+
+/*
+ * This is a RAID implementation working in the Galois Field GF(2^8) with
+ * the primitive polynomial x^8 + x^4 + x^3 + x^2 + 1 (285 decimal), and
+ * supporting up to six parity levels.
+ *
+ * For RAID5 and RAID6 it works as as described in the H. Peter Anvin's
+ * paper "The mathematics of RAID-6" [1]. Please refer to this paper for a
+ * complete explanation.
+ *
+ * To support triple parity, it was first evaluated and then dropped, an
+ * extension of the same approach, with additional parity coefficients set
+ * as powers of 2^-1, with equations:
+ *
+ * P = sum(Di)
+ * Q = sum(2^i * Di)
+ * R = sum(2^-i * Di) with 0<=i<N
+ *
+ * This approach works well for triple parity and it's very efficient,
+ * because we can implement very fast parallel multiplications and
+ * divisions by 2 in GF(2^8).
+ *
+ * It's also similar at the approach used by ZFS RAIDZ3, with the
+ * difference that ZFS uses powers of 4 instead of 2^-1.
+ *
+ * Unfortunately it doesn't work beyond triple parity, because whatever
+ * value we choose to generate the power coefficients to compute other
+ * parities, the resulting equations are not solvable for some
+ * combinations of missing disks.
+ *
+ * This is expected, because the Vandermonde matrix used to compute the
+ * parity has no guarantee to have all submatrices not singular
+ * [2, Chap 11, Problem 7] and this is a requirement to have
+ * a MDS (Maximum Distance Separable) code [2, Chap 11, Theorem 8].
+ *
+ * To overcome this limitation, we use a Cauchy matrix [3][4] to compute
+ * the parity. A Cauchy matrix has the property to have all the square
+ * submatrices not singular, resulting in always solvable equations,
+ * for any combination of missing disks.
+ *
+ * The problem of this approach is that it requires the use of
+ * generic multiplications, and not only by 2 or 2^-1, potentially
+ * affecting badly the performance.
+ *
+ * Hopefully there is a method to implement parallel multiplications
+ * using SSSE3 or AVX2 instructions [1][5]. Method competitive with the
+ * computation of triple parity using power coefficients.
+ *
+ * Another important property of the Cauchy matrix is that we can setup
+ * the first two rows with coeffients equal at the RAID5 and RAID6 approach
+ * decribed, resulting in a compatible extension, and requiring SSSE3
+ * or AVX2 instructions only if triple parity or beyond is used.
+ *
+ * The matrix is also adjusted, multipling each row by a constant factor
+ * to make the first column of all 1, to optimize the computation for
+ * the first disk.
+ *
+ * This results in the matrix A[row,col] defined as:
+ *
+ * 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01...
+ * 01 02 04 08 10 20 40 80 1d 3a 74 e8 cd 87 13 26 4c 98 2d 5a b4 75...
+ * 01 f5 d2 c4 9a 71 f1 7f fc 87 c1 c6 19 2f 40 55 3d ba 53 04 9c 61...
+ * 01 bb a6 d7 c7 07 ce 82 4a 2f a5 9b b6 60 f1 ad e7 f4 06 d2 df 2e...
+ * 01 97 7f 9c 7c 18 bd a2 58 1a da 74 70 a3 e5 47 29 07 f5 80 23 e9...
+ * 01 2b 3f cf 73 2c d6 ed cb 74 15 78 8a c1 17 c9 89 68 21 ab 76 3b...
+ *
+ * This matrix supports 6 level of parity, one for each row, for up to 251
+ * data disks, one for each column, with all the 377,342,351,231 square
+ * submatrices not singular, verified also with brute-force.
+ *
+ * This matrix can be extended to support any number of parities, just
+ * adding additional rows, and removing one column for each new row.
+ * (see mktables.c for more details in how the matrix is generated)
+ *
+ * In details, parity is computed as:
+ *
+ * P = sum(Di)
+ * Q = sum(2^i *  Di)
+ * R = sum(A[2,i] * Di)
+ * S = sum(A[3,i] * Di)
+ * T = sum(A[4,i] * Di)
+ * U = sum(A[5,i] * Di) with 0<=i<N
+ *
+ * To recover from a failure of six disks at indexes x,y,z,h,v,w,
+ * with 0<=x<y<z<h<v<w<N, we compute the parity of the available N-6
+ * disks as:
+ *
+ * Pa = sum(Di)
+ * Qa = sum(2^i * Di)
+ * Ra = sum(A[2,i] * Di)
+ * Sa = sum(A[3,i] * Di)
+ * Ta = sum(A[4,i] * Di)
+ * Ua = sum(A[5,i] * Di) with 0<=i<N,i!=x,i!=y,i!=z,i!=h,i!=v,i!=w.
+ *
+ * And if we define:
+ *
+ * Pd = Pa + P
+ * Qd = Qa + Q
+ * Rd = Ra + R
+ * Sd = Sa + S
+ * Td = Ta + T
+ * Ud = Ua + U
+ *
+ * we can sum these two sets of equations, obtaining:
+ *
+ * Pd =          Dx +          Dy +          Dz +          Dh +          Dv +          Dw
+ * Qd =    2^x * Dx +    2^y * Dy +    2^z * Dz +    2^h * Dh +    2^v * Dv +    2^w * Dw
+ * Rd = A[2,x] * Dx + A[2,y] * Dy + A[2,z] * Dz + A[2,h] * Dh + A[2,v] * Dv + A[2,w] * Dw
+ * Sd = A[3,x] * Dx + A[3,y] * Dy + A[3,z] * Dz + A[3,h] * Dh + A[3,v] * Dv + A[3,w] * Dw
+ * Td = A[4,x] * Dx + A[4,y] * Dy + A[4,z] * Dz + A[4,h] * Dh + A[4,v] * Dv + A[4,w] * Dw
+ * Ud = A[5,x] * Dx + A[5,y] * Dy + A[5,z] * Dz + A[5,h] * Dh + A[5,v] * Dv + A[5,w] * Dw
+ *
+ * A linear system always solvable because the coefficients matrix is
+ * always not singular due the properties of the matrix A[].
+ *
+ * Resulting speed in x64, with 8 data disks, using a stripe of 256 KiB,
+ * for a Core i5-4670K Haswell Quad-Core 3.4GHz is:
+ *
+ *             int8   int32   int64    sse2   ssse3    avx2
+ *   gen1             13339   25438   45438           50588
+ *   gen2              4115    6514   21840           32201
+ *   gen3       814                           10154   18613
+ *   gen4       620                            7569   14229
+ *   gen5       496                            5149   10051
+ *   gen6       413                            4239    8190
+ *
+ * Values are in MiB/s of data processed by a single thread, not counting
+ * generated parity.
+ *
+ * You can replicate these results in your machine using the
+ * "raid/test/speedtest.c" program.
+ *
+ * For comparison, the triple parity computation using the power
+ * coeffients "1,2,2^-1" is only a little faster than the one based on
+ * the Cauchy matrix if SSSE3 or AVX2 is present.
+ *
+ *             int8   int32   int64    sse2   ssse3    avx2
+ *   genz              2337    2874   10920           18944
+ *
+ * In conclusion, the use of power coefficients, and specifically powers
+ * of 1,2,2^-1, is the best option to implement triple parity in CPUs
+ * without SSSE3 and AVX2.
+ * But if a modern CPU with SSSE3 or AVX2 is available, the Cauchy
+ * matrix is the best option because it provides a fast and general
+ * approach working for any number of parities.
+ *
+ * References:
+ * [1] Anvin, "The mathematics of RAID-6", 2004
+ * [2] MacWilliams, Sloane, "The Theory of Error-Correcting Codes", 1977
+ * [3] Blomer, "An XOR-Based Erasure-Resilient Coding Scheme", 1995
+ * [4] Roth, "Introduction to Coding Theory", 2006
+ * [5] Plank, "Screaming Fast Galois Field Arithmetic Using Intel SIMD Instructions", 2013
+ */
+
+/**
+ * Generator matrix currently used.
+ */
+const uint8_t (*raid_gfgen)[256];
+
+void raid_mode(int mode)
+{
+       if (mode == RAID_MODE_VANDERMONDE) {
+               raid_gen_ptr[2] = raid_genz_ptr;
+               raid_gfgen = gfvandermonde;
+       } else {
+               raid_gen_ptr[2] = raid_gen3_ptr;
+               raid_gfgen = gfcauchy;
+       }
+}
+
+/**
+ * Buffer filled with 0 used in recovering.
+ */
+static void *raid_zero_block;
+
+void raid_zero(void *zero)
+{
+       raid_zero_block = zero;
+}
+
+/*
+ * Forwarders for parity computation.
+ *
+ * These functions compute the parity blocks from the provided data.
+ *
+ * The number of parities to compute is implicit in the position in the
+ * forwarder vector. Position at index #i, computes (#i+1) parities.
+ *
+ * All these functions give the guarantee that parities are written
+ * in order. First parity P, then parity Q, and so on.
+ * This allows to specify the same memory buffer for multiple parities
+ * knowning that you'll get the latest written one.
+ * This characteristic is used by the raid_delta_gen() function to
+ * avoid to damage unused parities in recovering.
+ *
+ * @nd Number of data blocks
+ * @size Size of the blocks pointed by @v. It must be a multipler of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ *   It has (@nd + #parities) elements. The starting elements are the blocks
+ *   for data, following with the parity blocks.
+ *   Each block has @size bytes.
+ */
+void (*raid_gen_ptr[RAID_PARITY_MAX])(int nd, size_t size, void **vv);
+void (*raid_gen3_ptr)(int nd, size_t size, void **vv);
+void (*raid_genz_ptr)(int nd, size_t size, void **vv);
+
+void raid_gen(int nd, int np, size_t size, void **v)
+{
+       /* enforce limit on size */
+       BUG_ON(size % 64 != 0);
+
+       /* enforce limit on number of failures */
+       BUG_ON(np < 1);
+       BUG_ON(np > RAID_PARITY_MAX);
+
+       raid_gen_ptr[np - 1](nd, size, v);
+}
+
+/**
+ * Inverts the square matrix M of size nxn into V.
+ *
+ * This is not a general matrix inversion because we assume the matrix M
+ * to have all the square submatrix not singular.
+ * We use Gauss elimination to invert.
+ *
+ * @M Matrix to invert with @n rows and @n columns.
+ * @V Destination matrix where the result is put.
+ * @n Number of rows and columns of the matrix.
+ */
+void raid_invert(uint8_t *M, uint8_t *V, int n)
+{
+       int i, j, k;
+
+       /* set the identity matrix in V */
+       for (i = 0; i < n; ++i)
+               for (j = 0; j < n; ++j)
+                       V[i * n + j] = i == j;
+
+       /* for each element in the diagonal */
+       for (k = 0; k < n; ++k) {
+               uint8_t f;
+
+               /* the diagonal element cannot be 0 because */
+               /* we are inverting matrices with all the square */
+               /* submatrices not singular */
+               BUG_ON(M[k * n + k] == 0);
+
+               /* make the diagonal element to be 1 */
+               f = inv(M[k * n + k]);
+               for (j = 0; j < n; ++j) {
+                       M[k * n + j] = mul(f, M[k * n + j]);
+                       V[k * n + j] = mul(f, V[k * n + j]);
+               }
+
+               /* make all the elements over and under the diagonal */
+               /* to be zero */
+               for (i = 0; i < n; ++i) {
+                       if (i == k)
+                               continue;
+                       f = M[i * n + k];
+                       for (j = 0; j < n; ++j) {
+                               M[i * n + j] ^= mul(f, M[k * n + j]);
+                               V[i * n + j] ^= mul(f, V[k * n + j]);
+                       }
+               }
+       }
+}
+
+/**
+ * Computes the parity without the missing data blocks
+ * and store it in the buffers of such data blocks.
+ *
+ * This is the parity expressed as Pa,Qa,Ra,Sa,Ta,Ua in the equations.
+ */
+void raid_delta_gen(int nr, int *id, int *ip, int nd, size_t size, void **v)
+{
+       void *p[RAID_PARITY_MAX];
+       void *pa[RAID_PARITY_MAX];
+       int i, j;
+       int np;
+       void *latest;
+
+       /* total number of parities we are going to process */
+       /* they are both the used and the unused ones */
+       np = ip[nr - 1] + 1;
+
+       /* latest missing data block */
+       latest = v[id[nr - 1]];
+
+       /* setup pointers for delta computation */
+       for (i = 0, j = 0; i < np; ++i) {
+               /* keep a copy of the original parity vector */
+               p[i] = v[nd + i];
+
+               if (ip[j] == i) {
+                       /*
+                        * Set used parities to point to the missing
+                        * data blocks.
+                        *
+                        * The related data blocks are instead set
+                        * to point to the "zero" buffer.
+                        */
+
+                       /* the latest parity to use ends the for loop and */
+                       /* then it cannot happen to process more of them */
+                       BUG_ON(j >= nr);
+
+                       /* buffer for missing data blocks */
+                       pa[j] = v[id[j]];
+
+                       /* set at zero the missing data blocks */
+                       v[id[j]] = raid_zero_block;
+
+                       /* compute the parity over the missing data blocks */
+                       v[nd + i] = pa[j];
+
+                       /* check for the next used entry */
+                       ++j;
+               } else {
+                       /*
+                        * Unused parities are going to be rewritten with
+                        * not significative data, becase we don't have
+                        * functions able to compute only a subset of
+                        * parities.
+                        *
+                        * To avoid this, we reuse parity buffers,
+                        * assuming that all the parity functions write
+                        * parities in order.
+                        *
+                        * We assign the unused parity block to the same
+                        * block of the latest used parity that we know it
+                        * will be written.
+                        *
+                        * This means that this block will be written
+                        * multiple times and only the latest write will
+                        * contain the correct data.
+                        */
+                       v[nd + i] = latest;
+               }
+       }
+
+       /* all the parities have to be processed */
+       BUG_ON(j != nr);
+
+       /* recompute the parity, note that np may be smaller than the */
+       /* total number of parities available */
+       raid_gen(nd, np, size, v);
+
+       /* restore data buffers as before */
+       for (j = 0; j < nr; ++j)
+               v[id[j]] = pa[j];
+
+       /* restore parity buffers as before */
+       for (i = 0; i < np; ++i)
+               v[nd + i] = p[i];
+}
+
+/**
+ * Recover failure of one data block for PAR1.
+ *
+ * Starting from the equation:
+ *
+ * Pd = Dx
+ *
+ * and solving we get:
+ *
+ * Dx = Pd
+ */
+void raid_rec1of1(int *id, int nd, size_t size, void **v)
+{
+       void *p;
+       void *pa;
+
+       /* for PAR1 we can directly compute the missing block */
+       /* and we don't need to use the zero buffer */
+       p = v[nd];
+       pa = v[id[0]];
+
+       /* use the parity as missing data block */
+       v[id[0]] = p;
+
+       /* compute the parity over the missing data block */
+       v[nd] = pa;
+
+       /* compute */
+       raid_gen(nd, 1, size, v);
+
+       /* restore as before */
+       v[id[0]] = pa;
+       v[nd] = p;
+}
+
+/**
+ * Recover failure of two data blocks for PAR2.
+ *
+ * Starting from the equations:
+ *
+ * Pd = Dx + Dy
+ * Qd = 2^id[0] * Dx + 2^id[1] * Dy
+ *
+ * and solving we get:
+ *
+ *               1                     2^(-id[0])
+ * Dy = ------------------- * Pd + ------------------- * Qd
+ *      2^(id[1]-id[0]) + 1        2^(id[1]-id[0]) + 1
+ *
+ * Dx = Dy + Pd
+ *
+ * with conditions:
+ *
+ * 2^id[0] != 0
+ * 2^(id[1]-id[0]) + 1 != 0
+ *
+ * That are always satisfied for any 0<=id[0]<id[1]<255.
+ */
+void raid_rec2of2_int8(int *id, int *ip, int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       size_t i;
+       uint8_t *p;
+       uint8_t *pa;
+       uint8_t *q;
+       uint8_t *qa;
+       const uint8_t *T[2];
+
+       /* get multiplication tables */
+       T[0] = table(inv(pow2(id[1] - id[0]) ^ 1));
+       T[1] = table(inv(pow2(id[0]) ^ pow2(id[1])));
+
+       /* compute delta parity */
+       raid_delta_gen(2, id, ip, nd, size, vv);
+
+       p = v[nd];
+       q = v[nd + 1];
+       pa = v[id[0]];
+       qa = v[id[1]];
+
+       for (i = 0; i < size; ++i) {
+               /* delta */
+               uint8_t Pd = p[i] ^ pa[i];
+               uint8_t Qd = q[i] ^ qa[i];
+
+               /* reconstruct */
+               uint8_t Dy = T[0][Pd] ^ T[1][Qd];
+               uint8_t Dx = Pd ^ Dy;
+
+               /* set */
+               pa[i] = Dx;
+               qa[i] = Dy;
+       }
+}
+
+/*
+ * Forwarders for data recovery.
+ *
+ * These functions recover data blocks using the specified parity
+ * to recompute the missing data.
+ *
+ * Note that the format of vectors @id/@ip is different than raid_rec().
+ * For example, in the vector @ip the first parity is represented with the
+ * value 0 and not @nd.
+ *
+ * @nr Number of failed data blocks to recover.
+ * @id[] Vector of @nr indexes of the data blocks to recover.
+ *   The indexes start from 0. They must be in order.
+ * @ip[] Vector of @nr indexes of the parity blocks to use in the recovering.
+ *   The indexes start from 0. They must be in order.
+ * @nd Number of data blocks.
+ * @np Number of parity blocks.
+ * @size Size of the blocks pointed by @v. It must be a multipler of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ *   It has (@nd + @np) elements. The starting elements are the blocks
+ *   for data, following with the parity blocks.
+ *   Each block has @size bytes.
+ */
+void (*raid_rec_ptr[RAID_PARITY_MAX])(
+       int nr, int *id, int *ip, int nd, size_t size, void **vv);
+
+void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v)
+{
+       int nrd; /* number of data blocks to recover */
+       int nrp; /* number of parity blocks to recover */
+
+       /* enforce limit on size */
+       BUG_ON(size % 64 != 0);
+
+       /* enforce limit on number of failures */
+       BUG_ON(nr > np);
+       BUG_ON(np > RAID_PARITY_MAX);
+
+       /* enforce order in index vector */
+       BUG_ON(nr >= 2 && ir[0] >= ir[1]);
+       BUG_ON(nr >= 3 && ir[1] >= ir[2]);
+       BUG_ON(nr >= 4 && ir[2] >= ir[3]);
+       BUG_ON(nr >= 5 && ir[3] >= ir[4]);
+       BUG_ON(nr >= 6 && ir[4] >= ir[5]);
+
+       /* enforce limit on index vector */
+       BUG_ON(nr > 0 && ir[nr-1] >= nd + np);
+
+       /* count the number of data blocks to recover */
+       nrd = 0;
+       while (nrd < nr && ir[nrd] < nd)
+               ++nrd;
+
+       /* all the remaining are parity */
+       nrp = nr - nrd;
+
+       /* enforce limit on number of failures */
+       BUG_ON(nrd > nd);
+       BUG_ON(nrp > np);
+
+       /* if failed data is present */
+       if (nrd != 0) {
+               int ip[RAID_PARITY_MAX];
+               int i, j, k;
+
+               /* setup the vector of parities to use */
+               for (i = 0, j = 0, k = 0; i < np; ++i) {
+                       if (j < nrp && ir[nrd + j] == nd + i) {
+                               /* this parity has to be recovered */
+                               ++j;
+                       } else {
+                               /* this parity is used for recovering */
+                               ip[k] = i;
+                               ++k;
+                       }
+               }
+
+               /* recover the nrd data blocks specified in ir[], */
+               /* using the first nrd parity in ip[] for recovering */
+               raid_rec_ptr[nrd - 1](nrd, ir, ip, nd, size, v);
+       }
+
+       /* recompute all the parities up to the last bad one */
+       if (nrp != 0)
+               raid_gen(nd, ir[nr - 1] - nd + 1, size, v);
+}
+
+void raid_data(int nr, int *id, int *ip, int nd, size_t size, void **v)
+{
+       /* enforce limit on size */
+       BUG_ON(size % 64 != 0);
+
+       /* enforce limit on number of failures */
+       BUG_ON(nr > nd);
+       BUG_ON(nr > RAID_PARITY_MAX);
+
+       /* enforce order in index vector for data */
+       BUG_ON(nr >= 2 && id[0] >= id[1]);
+       BUG_ON(nr >= 3 && id[1] >= id[2]);
+       BUG_ON(nr >= 4 && id[2] >= id[3]);
+       BUG_ON(nr >= 5 && id[3] >= id[4]);
+       BUG_ON(nr >= 6 && id[4] >= id[5]);
+
+       /* enforce limit on index vector for data */
+       BUG_ON(nr > 0 && id[nr-1] >= nd);
+
+       /* enforce order in index vector for parity */
+       BUG_ON(nr >= 2 && ip[0] >= ip[1]);
+       BUG_ON(nr >= 3 && ip[1] >= ip[2]);
+       BUG_ON(nr >= 4 && ip[2] >= ip[3]);
+       BUG_ON(nr >= 5 && ip[3] >= ip[4]);
+       BUG_ON(nr >= 6 && ip[4] >= ip[5]);
+
+       /* if failed data is present */
+       if (nr != 0)
+               raid_rec_ptr[nr - 1](nr, id, ip, nd, size, v);
+}
+
diff --git a/raid/raid.h b/raid/raid.h
new file mode 100644 (file)
index 0000000..aeeb39f
--- /dev/null
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_H
+#define __RAID_H
+
+/**
+ * RAID mode supporting up to 6 parities.
+ *
+ * It requires SSSE3 to get good performance with triple or more parities.
+ *
+ * This is the default mode set after calling raid_init().
+ */
+#define RAID_MODE_CAUCHY 0
+
+/**
+ * RAID mode supporting up to 3 parities,
+ *
+ * It has a fast triple parity implementation without SSSE3, but it cannot
+ * go beyond triple parity.
+ *
+ * This is mostly intended for low end CPUs like ARM and AMD Athlon.
+ */
+#define RAID_MODE_VANDERMONDE 1
+
+/**
+ * Maximum number of parity disks supported.
+ */
+#define RAID_PARITY_MAX 6
+
+/**
+ * Maximum number of data disks supported.
+ */
+#define RAID_DATA_MAX 251
+
+/**
+ * Initializes the RAID system.
+ *
+ * You must call this function before any other.
+ *
+ * The RAID system is initialized in the RAID_MODE_CAUCHY mode.
+ */
+void raid_init(void);
+
+/**
+ * Runs a basic functionality self test.
+ *
+ * The test is immediate, and it's intended to be run at application
+ * startup to check the integrity of the RAID system.
+ *
+ * It returns 0 on success.
+ */
+int raid_selftest(void);
+
+/**
+ * Sets the mode to use. One of RAID_MODE_*.
+ *
+ * You can change mode at any time, and it will affect next calls to raid_gen(),
+ * raid_rec() and raid_data().
+ *
+ * The two modes are compatible for the first two levels of parity.
+ * The third one is different.
+ */
+void raid_mode(int mode);
+
+/**
+ * Sets the zero buffer to use in recovering.
+ *
+ * Before calling raid_rec() and raid_data() you must provide a memory
+ * buffer filled with zero with the same size of the blocks to recover.
+ *
+ * This buffer is only read and never written.
+ */
+void raid_zero(void *zero);
+
+/**
+ * Computes parity blocks.
+ *
+ * This function computes the specified number of parity blocks of the
+ * provided set of data blocks.
+ *
+ * Each parity block allows to recover one data block.
+ *
+ * @nd Number of data blocks.
+ * @np Number of parities blocks to compute.
+ * @size Size of the blocks pointed by @v. It must be a multiplier of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ *   It has (@nd + @np) elements. The starting elements are the blocks for
+ *   data, following with the parity blocks.
+ *   Data blocks are only read and not modified. Parity blocks are written.
+ *   Each block has @size bytes.
+ */
+void raid_gen(int nd, int np, size_t size, void **v);
+
+/**
+ * Recovers failures in data and parity blocks.
+ *
+ * This function recovers all the data and parity blocks marked as bad
+ * in the @ir vector.
+ *
+ * Ensure to have @nr <= @np, otherwise recovering is not possible.
+ *
+ * The parities blocks used for recovering are automatically selected from
+ * the ones NOT present in the @ir vector.
+ *
+ * In case there are more parity blocks than needed, the parities at lower
+ * indexes are used in the recovering, and the others are ignored.
+ *
+ * Note that no internal integrity check is done when recovering. If the
+ * provided parities are correct, the resulting data will be correct.
+ * If parities are wrong, the resulting recovered data will be wrong.
+ * This happens even in the case you have more parities blocks than needed,
+ * and some form of integrity verification would be possible.
+ *
+ * @nr Number of failed data and parity blocks to recover.
+ * @ir[] Vector of @nr indexes of the failed data and parity blocks.
+ *   The indexes start from 0. They must be in order.
+ *   The first parity is represented with value @nd, the second with value
+ *   @nd + 1, just like positions in the @v vector.
+ * @nd Number of data blocks.
+ * @np Number of parity blocks.
+ * @size Size of the blocks pointed by @v. It must be a multiplier of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ *   It has (@nd + @np) elements. The starting elements are the blocks
+ *   for data, following with the parity blocks.
+ *   Each block has @size bytes.
+ */
+void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v);
+
+/**
+ * Recovers failures in data blocks only.
+ *
+ * This function recovers all the data blocks marked as bad in the @id vector.
+ * The parity blocks are not modified.
+ *
+ * @nr Number of failed data blocks to recover.
+ * @id[] Vector of @nr indexes of the data blocks to recover.
+ *   The indexes start from 0. They must be in order.
+ * @ip[] Vector of @nr indexes of the parity blocks to use for recovering.
+ *   The indexes start from 0. They must be in order.
+ * @nd Number of data blocks.
+ * @size Size of the blocks pointed by @v. It must be a multiplier of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ *   It has (@nd + @ip[@nr - 1] + 1) elements. The starting elements are the
+ *   blocks for data, following with the parity blocks.
+ *   Each blocks has @size bytes.
+ */
+void raid_data(int nr, int *id, int *ip, int nd, size_t size, void **v);
+
+/**
+ * Check the provided failed blocks combination.
+ *
+ * This function checks if the specified failed blocks combination satisfies
+ * the redundancy information. A combination is assumed matching, if the
+ * remaining valid parity is matching the expected value after recovering.
+ *
+ * The number of failed blocks @nr must be strictly less than the number of
+ * parities @np, because you need one more parity to validate the recovering.
+ *
+ * No data or parity blocks are modified.
+ *
+ * @nr Number of failed data and parity blocks.
+ * @ir[] Vector of @nr indexes of the failed data and parity blocks.
+ *   The indexes start from 0. They must be in order.
+ *   The first parity is represented with value @nd, the second with value
+ *   @nd + 1, just like positions in the @v vector.
+ * @nd Number of data blocks.
+ * @np Number of parity blocks.
+ * @size Size of the blocks pointed by @v. It must be a multiplier of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ *   It has (@nd + @np) elements. The starting elements are the blocks
+ *   for data, following with the parity blocks.
+ *   Each block has @size bytes.
+ * @return 0 if the check is satisfied. -1 otherwise.
+ */
+int raid_check(int nr, int *ir, int nd, int np, size_t size, void **v);
+
+/**
+ * Scan for failed blocks.
+ *
+ * This function identifies the failed data and parity blocks using the
+ * available redundancy.
+ *
+ * It uses a brute force method, and then the call can be expansive.
+ * The expected execution time is proportional at the binomial coefficient
+ * @np + @nd choose @np - 1, usually written as:
+ *
+ * ( @np + @nd )
+ * (           )
+ * (  @np - 1  )
+ *
+ * No data or parity blocks are modified.
+ *
+ * The failed block indexes are returned in the @ir vector.
+ * It must have space for at least @np - 1 values.
+ *
+ * The returned @ir vector can then be used in a raid_rec() call to recover
+ * the failed data and parity blocks.
+ *
+ * @ir[] Vector filled with the indexes of the failed data and parity blocks.
+ *   The indexes start from 0 and they are in order.
+ *   The first parity is represented with value @nd, the second with value
+ *   @nd + 1, just like positions in the @v vector.
+ * @nd Number of data blocks.
+ * @np Number of parity blocks.
+ * @size Size of the blocks pointed by @v. It must be a multiplier of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ *   It has (@nd + @np) elements. The starting elements are the blocks
+ *   for data, following with the parity blocks.
+ *   Each block has @size bytes.
+ * @return Number of block indexes returned in the @ir vector.
+ *   0 if no error is detected.
+ *   -1 if it's not possible to identify the failed disks.
+ */
+int raid_scan(int *ir, int nd, int np, size_t size, void **v);
+
+#endif
+
diff --git a/raid/tables.c b/raid/tables.c
new file mode 100644 (file)
index 0000000..4903502
--- /dev/null
@@ -0,0 +1,14696 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+
+const uint8_t __aligned(256) raid_gfmul[256][256] =
+{
+       {
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+               0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+       },
+       {
+               0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+               0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+               0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+               0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+               0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+               0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+               0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+               0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+               0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+               0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+               0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+               0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+               0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+               0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+               0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+               0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+               0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+               0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+               0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+               0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+               0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+               0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+               0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+               0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+               0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+               0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+               0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+               0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+               0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+               0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+               0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+               0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+       },
+       {
+               0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
+               0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
+               0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e,
+               0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
+               0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e,
+               0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+               0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e,
+               0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
+               0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e,
+               0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
+               0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae,
+               0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
+               0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce,
+               0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
+               0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee,
+               0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
+               0x1d, 0x1f, 0x19, 0x1b, 0x15, 0x17, 0x11, 0x13,
+               0x0d, 0x0f, 0x09, 0x0b, 0x05, 0x07, 0x01, 0x03,
+               0x3d, 0x3f, 0x39, 0x3b, 0x35, 0x37, 0x31, 0x33,
+               0x2d, 0x2f, 0x29, 0x2b, 0x25, 0x27, 0x21, 0x23,
+               0x5d, 0x5f, 0x59, 0x5b, 0x55, 0x57, 0x51, 0x53,
+               0x4d, 0x4f, 0x49, 0x4b, 0x45, 0x47, 0x41, 0x43,
+               0x7d, 0x7f, 0x79, 0x7b, 0x75, 0x77, 0x71, 0x73,
+               0x6d, 0x6f, 0x69, 0x6b, 0x65, 0x67, 0x61, 0x63,
+               0x9d, 0x9f, 0x99, 0x9b, 0x95, 0x97, 0x91, 0x93,
+               0x8d, 0x8f, 0x89, 0x8b, 0x85, 0x87, 0x81, 0x83,
+               0xbd, 0xbf, 0xb9, 0xbb, 0xb5, 0xb7, 0xb1, 0xb3,
+               0xad, 0xaf, 0xa9, 0xab, 0xa5, 0xa7, 0xa1, 0xa3,
+               0xdd, 0xdf, 0xd9, 0xdb, 0xd5, 0xd7, 0xd1, 0xd3,
+               0xcd, 0xcf, 0xc9, 0xcb, 0xc5, 0xc7, 0xc1, 0xc3,
+               0xfd, 0xff, 0xf9, 0xfb, 0xf5, 0xf7, 0xf1, 0xf3,
+               0xed, 0xef, 0xe9, 0xeb, 0xe5, 0xe7, 0xe1, 0xe3,
+       },
+       {
+               0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09,
+               0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
+               0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39,
+               0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
+               0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69,
+               0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
+               0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59,
+               0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
+               0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9,
+               0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
+               0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9,
+               0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
+               0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9,
+               0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
+               0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99,
+               0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
+               0x9d, 0x9e, 0x9b, 0x98, 0x91, 0x92, 0x97, 0x94,
+               0x85, 0x86, 0x83, 0x80, 0x89, 0x8a, 0x8f, 0x8c,
+               0xad, 0xae, 0xab, 0xa8, 0xa1, 0xa2, 0xa7, 0xa4,
+               0xb5, 0xb6, 0xb3, 0xb0, 0xb9, 0xba, 0xbf, 0xbc,
+               0xfd, 0xfe, 0xfb, 0xf8, 0xf1, 0xf2, 0xf7, 0xf4,
+               0xe5, 0xe6, 0xe3, 0xe0, 0xe9, 0xea, 0xef, 0xec,
+               0xcd, 0xce, 0xcb, 0xc8, 0xc1, 0xc2, 0xc7, 0xc4,
+               0xd5, 0xd6, 0xd3, 0xd0, 0xd9, 0xda, 0xdf, 0xdc,
+               0x5d, 0x5e, 0x5b, 0x58, 0x51, 0x52, 0x57, 0x54,
+               0x45, 0x46, 0x43, 0x40, 0x49, 0x4a, 0x4f, 0x4c,
+               0x6d, 0x6e, 0x6b, 0x68, 0x61, 0x62, 0x67, 0x64,
+               0x75, 0x76, 0x73, 0x70, 0x79, 0x7a, 0x7f, 0x7c,
+               0x3d, 0x3e, 0x3b, 0x38, 0x31, 0x32, 0x37, 0x34,
+               0x25, 0x26, 0x23, 0x20, 0x29, 0x2a, 0x2f, 0x2c,
+               0x0d, 0x0e, 0x0b, 0x08, 0x01, 0x02, 0x07, 0x04,
+               0x15, 0x16, 0x13, 0x10, 0x19, 0x1a, 0x1f, 0x1c,
+       },
+       {
+               0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
+               0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c,
+               0x40, 0x44, 0x48, 0x4c, 0x50, 0x54, 0x58, 0x5c,
+               0x60, 0x64, 0x68, 0x6c, 0x70, 0x74, 0x78, 0x7c,
+               0x80, 0x84, 0x88, 0x8c, 0x90, 0x94, 0x98, 0x9c,
+               0xa0, 0xa4, 0xa8, 0xac, 0xb0, 0xb4, 0xb8, 0xbc,
+               0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc,
+               0xe0, 0xe4, 0xe8, 0xec, 0xf0, 0xf4, 0xf8, 0xfc,
+               0x1d, 0x19, 0x15, 0x11, 0x0d, 0x09, 0x05, 0x01,
+               0x3d, 0x39, 0x35, 0x31, 0x2d, 0x29, 0x25, 0x21,
+               0x5d, 0x59, 0x55, 0x51, 0x4d, 0x49, 0x45, 0x41,
+               0x7d, 0x79, 0x75, 0x71, 0x6d, 0x69, 0x65, 0x61,
+               0x9d, 0x99, 0x95, 0x91, 0x8d, 0x89, 0x85, 0x81,
+               0xbd, 0xb9, 0xb5, 0xb1, 0xad, 0xa9, 0xa5, 0xa1,
+               0xdd, 0xd9, 0xd5, 0xd1, 0xcd, 0xc9, 0xc5, 0xc1,
+               0xfd, 0xf9, 0xf5, 0xf1, 0xed, 0xe9, 0xe5, 0xe1,
+               0x3a, 0x3e, 0x32, 0x36, 0x2a, 0x2e, 0x22, 0x26,
+               0x1a, 0x1e, 0x12, 0x16, 0x0a, 0x0e, 0x02, 0x06,
+               0x7a, 0x7e, 0x72, 0x76, 0x6a, 0x6e, 0x62, 0x66,
+               0x5a, 0x5e, 0x52, 0x56, 0x4a, 0x4e, 0x42, 0x46,
+               0xba, 0xbe, 0xb2, 0xb6, 0xaa, 0xae, 0xa2, 0xa6,
+               0x9a, 0x9e, 0x92, 0x96, 0x8a, 0x8e, 0x82, 0x86,
+               0xfa, 0xfe, 0xf2, 0xf6, 0xea, 0xee, 0xe2, 0xe6,
+               0xda, 0xde, 0xd2, 0xd6, 0xca, 0xce, 0xc2, 0xc6,
+               0x27, 0x23, 0x2f, 0x2b, 0x37, 0x33, 0x3f, 0x3b,
+               0x07, 0x03, 0x0f, 0x0b, 0x17, 0x13, 0x1f, 0x1b,
+               0x67, 0x63, 0x6f, 0x6b, 0x77, 0x73, 0x7f, 0x7b,
+               0x47, 0x43, 0x4f, 0x4b, 0x57, 0x53, 0x5f, 0x5b,
+               0xa7, 0xa3, 0xaf, 0xab, 0xb7, 0xb3, 0xbf, 0xbb,
+               0x87, 0x83, 0x8f, 0x8b, 0x97, 0x93, 0x9f, 0x9b,
+               0xe7, 0xe3, 0xef, 0xeb, 0xf7, 0xf3, 0xff, 0xfb,
+               0xc7, 0xc3, 0xcf, 0xcb, 0xd7, 0xd3, 0xdf, 0xdb,
+       },
+       {
+               0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b,
+               0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33,
+               0x50, 0x55, 0x5a, 0x5f, 0x44, 0x41, 0x4e, 0x4b,
+               0x78, 0x7d, 0x72, 0x77, 0x6c, 0x69, 0x66, 0x63,
+               0xa0, 0xa5, 0xaa, 0xaf, 0xb4, 0xb1, 0xbe, 0xbb,
+               0x88, 0x8d, 0x82, 0x87, 0x9c, 0x99, 0x96, 0x93,
+               0xf0, 0xf5, 0xfa, 0xff, 0xe4, 0xe1, 0xee, 0xeb,
+               0xd8, 0xdd, 0xd2, 0xd7, 0xcc, 0xc9, 0xc6, 0xc3,
+               0x5d, 0x58, 0x57, 0x52, 0x49, 0x4c, 0x43, 0x46,
+               0x75, 0x70, 0x7f, 0x7a, 0x61, 0x64, 0x6b, 0x6e,
+               0x0d, 0x08, 0x07, 0x02, 0x19, 0x1c, 0x13, 0x16,
+               0x25, 0x20, 0x2f, 0x2a, 0x31, 0x34, 0x3b, 0x3e,
+               0xfd, 0xf8, 0xf7, 0xf2, 0xe9, 0xec, 0xe3, 0xe6,
+               0xd5, 0xd0, 0xdf, 0xda, 0xc1, 0xc4, 0xcb, 0xce,
+               0xad, 0xa8, 0xa7, 0xa2, 0xb9, 0xbc, 0xb3, 0xb6,
+               0x85, 0x80, 0x8f, 0x8a, 0x91, 0x94, 0x9b, 0x9e,
+               0xba, 0xbf, 0xb0, 0xb5, 0xae, 0xab, 0xa4, 0xa1,
+               0x92, 0x97, 0x98, 0x9d, 0x86, 0x83, 0x8c, 0x89,
+               0xea, 0xef, 0xe0, 0xe5, 0xfe, 0xfb, 0xf4, 0xf1,
+               0xc2, 0xc7, 0xc8, 0xcd, 0xd6, 0xd3, 0xdc, 0xd9,
+               0x1a, 0x1f, 0x10, 0x15, 0x0e, 0x0b, 0x04, 0x01,
+               0x32, 0x37, 0x38, 0x3d, 0x26, 0x23, 0x2c, 0x29,
+               0x4a, 0x4f, 0x40, 0x45, 0x5e, 0x5b, 0x54, 0x51,
+               0x62, 0x67, 0x68, 0x6d, 0x76, 0x73, 0x7c, 0x79,
+               0xe7, 0xe2, 0xed, 0xe8, 0xf3, 0xf6, 0xf9, 0xfc,
+               0xcf, 0xca, 0xc5, 0xc0, 0xdb, 0xde, 0xd1, 0xd4,
+               0xb7, 0xb2, 0xbd, 0xb8, 0xa3, 0xa6, 0xa9, 0xac,
+               0x9f, 0x9a, 0x95, 0x90, 0x8b, 0x8e, 0x81, 0x84,
+               0x47, 0x42, 0x4d, 0x48, 0x53, 0x56, 0x59, 0x5c,
+               0x6f, 0x6a, 0x65, 0x60, 0x7b, 0x7e, 0x71, 0x74,
+               0x17, 0x12, 0x1d, 0x18, 0x03, 0x06, 0x09, 0x0c,
+               0x3f, 0x3a, 0x35, 0x30, 0x2b, 0x2e, 0x21, 0x24,
+       },
+       {
+               0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12,
+               0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22,
+               0x60, 0x66, 0x6c, 0x6a, 0x78, 0x7e, 0x74, 0x72,
+               0x50, 0x56, 0x5c, 0x5a, 0x48, 0x4e, 0x44, 0x42,
+               0xc0, 0xc6, 0xcc, 0xca, 0xd8, 0xde, 0xd4, 0xd2,
+               0xf0, 0xf6, 0xfc, 0xfa, 0xe8, 0xee, 0xe4, 0xe2,
+               0xa0, 0xa6, 0xac, 0xaa, 0xb8, 0xbe, 0xb4, 0xb2,
+               0x90, 0x96, 0x9c, 0x9a, 0x88, 0x8e, 0x84, 0x82,
+               0x9d, 0x9b, 0x91, 0x97, 0x85, 0x83, 0x89, 0x8f,
+               0xad, 0xab, 0xa1, 0xa7, 0xb5, 0xb3, 0xb9, 0xbf,
+               0xfd, 0xfb, 0xf1, 0xf7, 0xe5, 0xe3, 0xe9, 0xef,
+               0xcd, 0xcb, 0xc1, 0xc7, 0xd5, 0xd3, 0xd9, 0xdf,
+               0x5d, 0x5b, 0x51, 0x57, 0x45, 0x43, 0x49, 0x4f,
+               0x6d, 0x6b, 0x61, 0x67, 0x75, 0x73, 0x79, 0x7f,
+               0x3d, 0x3b, 0x31, 0x37, 0x25, 0x23, 0x29, 0x2f,
+               0x0d, 0x0b, 0x01, 0x07, 0x15, 0x13, 0x19, 0x1f,
+               0x27, 0x21, 0x2b, 0x2d, 0x3f, 0x39, 0x33, 0x35,
+               0x17, 0x11, 0x1b, 0x1d, 0x0f, 0x09, 0x03, 0x05,
+               0x47, 0x41, 0x4b, 0x4d, 0x5f, 0x59, 0x53, 0x55,
+               0x77, 0x71, 0x7b, 0x7d, 0x6f, 0x69, 0x63, 0x65,
+               0xe7, 0xe1, 0xeb, 0xed, 0xff, 0xf9, 0xf3, 0xf5,
+               0xd7, 0xd1, 0xdb, 0xdd, 0xcf, 0xc9, 0xc3, 0xc5,
+               0x87, 0x81, 0x8b, 0x8d, 0x9f, 0x99, 0x93, 0x95,
+               0xb7, 0xb1, 0xbb, 0xbd, 0xaf, 0xa9, 0xa3, 0xa5,
+               0xba, 0xbc, 0xb6, 0xb0, 0xa2, 0xa4, 0xae, 0xa8,
+               0x8a, 0x8c, 0x86, 0x80, 0x92, 0x94, 0x9e, 0x98,
+               0xda, 0xdc, 0xd6, 0xd0, 0xc2, 0xc4, 0xce, 0xc8,
+               0xea, 0xec, 0xe6, 0xe0, 0xf2, 0xf4, 0xfe, 0xf8,
+               0x7a, 0x7c, 0x76, 0x70, 0x62, 0x64, 0x6e, 0x68,
+               0x4a, 0x4c, 0x46, 0x40, 0x52, 0x54, 0x5e, 0x58,
+               0x1a, 0x1c, 0x16, 0x10, 0x02, 0x04, 0x0e, 0x08,
+               0x2a, 0x2c, 0x26, 0x20, 0x32, 0x34, 0x3e, 0x38,
+       },
+       {
+               0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15,
+               0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d,
+               0x70, 0x77, 0x7e, 0x79, 0x6c, 0x6b, 0x62, 0x65,
+               0x48, 0x4f, 0x46, 0x41, 0x54, 0x53, 0x5a, 0x5d,
+               0xe0, 0xe7, 0xee, 0xe9, 0xfc, 0xfb, 0xf2, 0xf5,
+               0xd8, 0xdf, 0xd6, 0xd1, 0xc4, 0xc3, 0xca, 0xcd,
+               0x90, 0x97, 0x9e, 0x99, 0x8c, 0x8b, 0x82, 0x85,
+               0xa8, 0xaf, 0xa6, 0xa1, 0xb4, 0xb3, 0xba, 0xbd,
+               0xdd, 0xda, 0xd3, 0xd4, 0xc1, 0xc6, 0xcf, 0xc8,
+               0xe5, 0xe2, 0xeb, 0xec, 0xf9, 0xfe, 0xf7, 0xf0,
+               0xad, 0xaa, 0xa3, 0xa4, 0xb1, 0xb6, 0xbf, 0xb8,
+               0x95, 0x92, 0x9b, 0x9c, 0x89, 0x8e, 0x87, 0x80,
+               0x3d, 0x3a, 0x33, 0x34, 0x21, 0x26, 0x2f, 0x28,
+               0x05, 0x02, 0x0b, 0x0c, 0x19, 0x1e, 0x17, 0x10,
+               0x4d, 0x4a, 0x43, 0x44, 0x51, 0x56, 0x5f, 0x58,
+               0x75, 0x72, 0x7b, 0x7c, 0x69, 0x6e, 0x67, 0x60,
+               0xa7, 0xa0, 0xa9, 0xae, 0xbb, 0xbc, 0xb5, 0xb2,
+               0x9f, 0x98, 0x91, 0x96, 0x83, 0x84, 0x8d, 0x8a,
+               0xd7, 0xd0, 0xd9, 0xde, 0xcb, 0xcc, 0xc5, 0xc2,
+               0xef, 0xe8, 0xe1, 0xe6, 0xf3, 0xf4, 0xfd, 0xfa,
+               0x47, 0x40, 0x49, 0x4e, 0x5b, 0x5c, 0x55, 0x52,
+               0x7f, 0x78, 0x71, 0x76, 0x63, 0x64, 0x6d, 0x6a,
+               0x37, 0x30, 0x39, 0x3e, 0x2b, 0x2c, 0x25, 0x22,
+               0x0f, 0x08, 0x01, 0x06, 0x13, 0x14, 0x1d, 0x1a,
+               0x7a, 0x7d, 0x74, 0x73, 0x66, 0x61, 0x68, 0x6f,
+               0x42, 0x45, 0x4c, 0x4b, 0x5e, 0x59, 0x50, 0x57,
+               0x0a, 0x0d, 0x04, 0x03, 0x16, 0x11, 0x18, 0x1f,
+               0x32, 0x35, 0x3c, 0x3b, 0x2e, 0x29, 0x20, 0x27,
+               0x9a, 0x9d, 0x94, 0x93, 0x86, 0x81, 0x88, 0x8f,
+               0xa2, 0xa5, 0xac, 0xab, 0xbe, 0xb9, 0xb0, 0xb7,
+               0xea, 0xed, 0xe4, 0xe3, 0xf6, 0xf1, 0xf8, 0xff,
+               0xd2, 0xd5, 0xdc, 0xdb, 0xce, 0xc9, 0xc0, 0xc7,
+       },
+       {
+               0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38,
+               0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78,
+               0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8,
+               0xc0, 0xc8, 0xd0, 0xd8, 0xe0, 0xe8, 0xf0, 0xf8,
+               0x1d, 0x15, 0x0d, 0x05, 0x3d, 0x35, 0x2d, 0x25,
+               0x5d, 0x55, 0x4d, 0x45, 0x7d, 0x75, 0x6d, 0x65,
+               0x9d, 0x95, 0x8d, 0x85, 0xbd, 0xb5, 0xad, 0xa5,
+               0xdd, 0xd5, 0xcd, 0xc5, 0xfd, 0xf5, 0xed, 0xe5,
+               0x3a, 0x32, 0x2a, 0x22, 0x1a, 0x12, 0x0a, 0x02,
+               0x7a, 0x72, 0x6a, 0x62, 0x5a, 0x52, 0x4a, 0x42,
+               0xba, 0xb2, 0xaa, 0xa2, 0x9a, 0x92, 0x8a, 0x82,
+               0xfa, 0xf2, 0xea, 0xe2, 0xda, 0xd2, 0xca, 0xc2,
+               0x27, 0x2f, 0x37, 0x3f, 0x07, 0x0f, 0x17, 0x1f,
+               0x67, 0x6f, 0x77, 0x7f, 0x47, 0x4f, 0x57, 0x5f,
+               0xa7, 0xaf, 0xb7, 0xbf, 0x87, 0x8f, 0x97, 0x9f,
+               0xe7, 0xef, 0xf7, 0xff, 0xc7, 0xcf, 0xd7, 0xdf,
+               0x74, 0x7c, 0x64, 0x6c, 0x54, 0x5c, 0x44, 0x4c,
+               0x34, 0x3c, 0x24, 0x2c, 0x14, 0x1c, 0x04, 0x0c,
+               0xf4, 0xfc, 0xe4, 0xec, 0xd4, 0xdc, 0xc4, 0xcc,
+               0xb4, 0xbc, 0xa4, 0xac, 0x94, 0x9c, 0x84, 0x8c,
+               0x69, 0x61, 0x79, 0x71, 0x49, 0x41, 0x59, 0x51,
+               0x29, 0x21, 0x39, 0x31, 0x09, 0x01, 0x19, 0x11,
+               0xe9, 0xe1, 0xf9, 0xf1, 0xc9, 0xc1, 0xd9, 0xd1,
+               0xa9, 0xa1, 0xb9, 0xb1, 0x89, 0x81, 0x99, 0x91,
+               0x4e, 0x46, 0x5e, 0x56, 0x6e, 0x66, 0x7e, 0x76,
+               0x0e, 0x06, 0x1e, 0x16, 0x2e, 0x26, 0x3e, 0x36,
+               0xce, 0xc6, 0xde, 0xd6, 0xee, 0xe6, 0xfe, 0xf6,
+               0x8e, 0x86, 0x9e, 0x96, 0xae, 0xa6, 0xbe, 0xb6,
+               0x53, 0x5b, 0x43, 0x4b, 0x73, 0x7b, 0x63, 0x6b,
+               0x13, 0x1b, 0x03, 0x0b, 0x33, 0x3b, 0x23, 0x2b,
+               0xd3, 0xdb, 0xc3, 0xcb, 0xf3, 0xfb, 0xe3, 0xeb,
+               0x93, 0x9b, 0x83, 0x8b, 0xb3, 0xbb, 0xa3, 0xab,
+       },
+       {
+               0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f,
+               0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
+               0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf,
+               0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
+               0x3d, 0x34, 0x2f, 0x26, 0x19, 0x10, 0x0b, 0x02,
+               0x75, 0x7c, 0x67, 0x6e, 0x51, 0x58, 0x43, 0x4a,
+               0xad, 0xa4, 0xbf, 0xb6, 0x89, 0x80, 0x9b, 0x92,
+               0xe5, 0xec, 0xf7, 0xfe, 0xc1, 0xc8, 0xd3, 0xda,
+               0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45,
+               0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d,
+               0xea, 0xe3, 0xf8, 0xf1, 0xce, 0xc7, 0xdc, 0xd5,
+               0xa2, 0xab, 0xb0, 0xb9, 0x86, 0x8f, 0x94, 0x9d,
+               0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78,
+               0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
+               0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8,
+               0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
+               0xf4, 0xfd, 0xe6, 0xef, 0xd0, 0xd9, 0xc2, 0xcb,
+               0xbc, 0xb5, 0xae, 0xa7, 0x98, 0x91, 0x8a, 0x83,
+               0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b,
+               0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13,
+               0xc9, 0xc0, 0xdb, 0xd2, 0xed, 0xe4, 0xff, 0xf6,
+               0x81, 0x88, 0x93, 0x9a, 0xa5, 0xac, 0xb7, 0xbe,
+               0x59, 0x50, 0x4b, 0x42, 0x7d, 0x74, 0x6f, 0x66,
+               0x11, 0x18, 0x03, 0x0a, 0x35, 0x3c, 0x27, 0x2e,
+               0x8e, 0x87, 0x9c, 0x95, 0xaa, 0xa3, 0xb8, 0xb1,
+               0xc6, 0xcf, 0xd4, 0xdd, 0xe2, 0xeb, 0xf0, 0xf9,
+               0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21,
+               0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69,
+               0xb3, 0xba, 0xa1, 0xa8, 0x97, 0x9e, 0x85, 0x8c,
+               0xfb, 0xf2, 0xe9, 0xe0, 0xdf, 0xd6, 0xcd, 0xc4,
+               0x23, 0x2a, 0x31, 0x38, 0x07, 0x0e, 0x15, 0x1c,
+               0x6b, 0x62, 0x79, 0x70, 0x4f, 0x46, 0x5d, 0x54,
+       },
+       {
+               0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36,
+               0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66,
+               0xa0, 0xaa, 0xb4, 0xbe, 0x88, 0x82, 0x9c, 0x96,
+               0xf0, 0xfa, 0xe4, 0xee, 0xd8, 0xd2, 0xcc, 0xc6,
+               0x5d, 0x57, 0x49, 0x43, 0x75, 0x7f, 0x61, 0x6b,
+               0x0d, 0x07, 0x19, 0x13, 0x25, 0x2f, 0x31, 0x3b,
+               0xfd, 0xf7, 0xe9, 0xe3, 0xd5, 0xdf, 0xc1, 0xcb,
+               0xad, 0xa7, 0xb9, 0xb3, 0x85, 0x8f, 0x91, 0x9b,
+               0xba, 0xb0, 0xae, 0xa4, 0x92, 0x98, 0x86, 0x8c,
+               0xea, 0xe0, 0xfe, 0xf4, 0xc2, 0xc8, 0xd6, 0xdc,
+               0x1a, 0x10, 0x0e, 0x04, 0x32, 0x38, 0x26, 0x2c,
+               0x4a, 0x40, 0x5e, 0x54, 0x62, 0x68, 0x76, 0x7c,
+               0xe7, 0xed, 0xf3, 0xf9, 0xcf, 0xc5, 0xdb, 0xd1,
+               0xb7, 0xbd, 0xa3, 0xa9, 0x9f, 0x95, 0x8b, 0x81,
+               0x47, 0x4d, 0x53, 0x59, 0x6f, 0x65, 0x7b, 0x71,
+               0x17, 0x1d, 0x03, 0x09, 0x3f, 0x35, 0x2b, 0x21,
+               0x69, 0x63, 0x7d, 0x77, 0x41, 0x4b, 0x55, 0x5f,
+               0x39, 0x33, 0x2d, 0x27, 0x11, 0x1b, 0x05, 0x0f,
+               0xc9, 0xc3, 0xdd, 0xd7, 0xe1, 0xeb, 0xf5, 0xff,
+               0x99, 0x93, 0x8d, 0x87, 0xb1, 0xbb, 0xa5, 0xaf,
+               0x34, 0x3e, 0x20, 0x2a, 0x1c, 0x16, 0x08, 0x02,
+               0x64, 0x6e, 0x70, 0x7a, 0x4c, 0x46, 0x58, 0x52,
+               0x94, 0x9e, 0x80, 0x8a, 0xbc, 0xb6, 0xa8, 0xa2,
+               0xc4, 0xce, 0xd0, 0xda, 0xec, 0xe6, 0xf8, 0xf2,
+               0xd3, 0xd9, 0xc7, 0xcd, 0xfb, 0xf1, 0xef, 0xe5,
+               0x83, 0x89, 0x97, 0x9d, 0xab, 0xa1, 0xbf, 0xb5,
+               0x73, 0x79, 0x67, 0x6d, 0x5b, 0x51, 0x4f, 0x45,
+               0x23, 0x29, 0x37, 0x3d, 0x0b, 0x01, 0x1f, 0x15,
+               0x8e, 0x84, 0x9a, 0x90, 0xa6, 0xac, 0xb2, 0xb8,
+               0xde, 0xd4, 0xca, 0xc0, 0xf6, 0xfc, 0xe2, 0xe8,
+               0x2e, 0x24, 0x3a, 0x30, 0x06, 0x0c, 0x12, 0x18,
+               0x7e, 0x74, 0x6a, 0x60, 0x56, 0x5c, 0x42, 0x48,
+       },
+       {
+               0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31,
+               0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
+               0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81,
+               0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
+               0x7d, 0x76, 0x6b, 0x60, 0x51, 0x5a, 0x47, 0x4c,
+               0x25, 0x2e, 0x33, 0x38, 0x09, 0x02, 0x1f, 0x14,
+               0xcd, 0xc6, 0xdb, 0xd0, 0xe1, 0xea, 0xf7, 0xfc,
+               0x95, 0x9e, 0x83, 0x88, 0xb9, 0xb2, 0xaf, 0xa4,
+               0xfa, 0xf1, 0xec, 0xe7, 0xd6, 0xdd, 0xc0, 0xcb,
+               0xa2, 0xa9, 0xb4, 0xbf, 0x8e, 0x85, 0x98, 0x93,
+               0x4a, 0x41, 0x5c, 0x57, 0x66, 0x6d, 0x70, 0x7b,
+               0x12, 0x19, 0x04, 0x0f, 0x3e, 0x35, 0x28, 0x23,
+               0x87, 0x8c, 0x91, 0x9a, 0xab, 0xa0, 0xbd, 0xb6,
+               0xdf, 0xd4, 0xc9, 0xc2, 0xf3, 0xf8, 0xe5, 0xee,
+               0x37, 0x3c, 0x21, 0x2a, 0x1b, 0x10, 0x0d, 0x06,
+               0x6f, 0x64, 0x79, 0x72, 0x43, 0x48, 0x55, 0x5e,
+               0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
+               0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80,
+               0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
+               0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30,
+               0x94, 0x9f, 0x82, 0x89, 0xb8, 0xb3, 0xae, 0xa5,
+               0xcc, 0xc7, 0xda, 0xd1, 0xe0, 0xeb, 0xf6, 0xfd,
+               0x24, 0x2f, 0x32, 0x39, 0x08, 0x03, 0x1e, 0x15,
+               0x7c, 0x77, 0x6a, 0x61, 0x50, 0x5b, 0x46, 0x4d,
+               0x13, 0x18, 0x05, 0x0e, 0x3f, 0x34, 0x29, 0x22,
+               0x4b, 0x40, 0x5d, 0x56, 0x67, 0x6c, 0x71, 0x7a,
+               0xa3, 0xa8, 0xb5, 0xbe, 0x8f, 0x84, 0x99, 0x92,
+               0xfb, 0xf0, 0xed, 0xe6, 0xd7, 0xdc, 0xc1, 0xca,
+               0x6e, 0x65, 0x78, 0x73, 0x42, 0x49, 0x54, 0x5f,
+               0x36, 0x3d, 0x20, 0x2b, 0x1a, 0x11, 0x0c, 0x07,
+               0xde, 0xd5, 0xc8, 0xc3, 0xf2, 0xf9, 0xe4, 0xef,
+               0x86, 0x8d, 0x90, 0x9b, 0xaa, 0xa1, 0xbc, 0xb7,
+       },
+       {
+               0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24,
+               0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44,
+               0xc0, 0xcc, 0xd8, 0xd4, 0xf0, 0xfc, 0xe8, 0xe4,
+               0xa0, 0xac, 0xb8, 0xb4, 0x90, 0x9c, 0x88, 0x84,
+               0x9d, 0x91, 0x85, 0x89, 0xad, 0xa1, 0xb5, 0xb9,
+               0xfd, 0xf1, 0xe5, 0xe9, 0xcd, 0xc1, 0xd5, 0xd9,
+               0x5d, 0x51, 0x45, 0x49, 0x6d, 0x61, 0x75, 0x79,
+               0x3d, 0x31, 0x25, 0x29, 0x0d, 0x01, 0x15, 0x19,
+               0x27, 0x2b, 0x3f, 0x33, 0x17, 0x1b, 0x0f, 0x03,
+               0x47, 0x4b, 0x5f, 0x53, 0x77, 0x7b, 0x6f, 0x63,
+               0xe7, 0xeb, 0xff, 0xf3, 0xd7, 0xdb, 0xcf, 0xc3,
+               0x87, 0x8b, 0x9f, 0x93, 0xb7, 0xbb, 0xaf, 0xa3,
+               0xba, 0xb6, 0xa2, 0xae, 0x8a, 0x86, 0x92, 0x9e,
+               0xda, 0xd6, 0xc2, 0xce, 0xea, 0xe6, 0xf2, 0xfe,
+               0x7a, 0x76, 0x62, 0x6e, 0x4a, 0x46, 0x52, 0x5e,
+               0x1a, 0x16, 0x02, 0x0e, 0x2a, 0x26, 0x32, 0x3e,
+               0x4e, 0x42, 0x56, 0x5a, 0x7e, 0x72, 0x66, 0x6a,
+               0x2e, 0x22, 0x36, 0x3a, 0x1e, 0x12, 0x06, 0x0a,
+               0x8e, 0x82, 0x96, 0x9a, 0xbe, 0xb2, 0xa6, 0xaa,
+               0xee, 0xe2, 0xf6, 0xfa, 0xde, 0xd2, 0xc6, 0xca,
+               0xd3, 0xdf, 0xcb, 0xc7, 0xe3, 0xef, 0xfb, 0xf7,
+               0xb3, 0xbf, 0xab, 0xa7, 0x83, 0x8f, 0x9b, 0x97,
+               0x13, 0x1f, 0x0b, 0x07, 0x23, 0x2f, 0x3b, 0x37,
+               0x73, 0x7f, 0x6b, 0x67, 0x43, 0x4f, 0x5b, 0x57,
+               0x69, 0x65, 0x71, 0x7d, 0x59, 0x55, 0x41, 0x4d,
+               0x09, 0x05, 0x11, 0x1d, 0x39, 0x35, 0x21, 0x2d,
+               0xa9, 0xa5, 0xb1, 0xbd, 0x99, 0x95, 0x81, 0x8d,
+               0xc9, 0xc5, 0xd1, 0xdd, 0xf9, 0xf5, 0xe1, 0xed,
+               0xf4, 0xf8, 0xec, 0xe0, 0xc4, 0xc8, 0xdc, 0xd0,
+               0x94, 0x98, 0x8c, 0x80, 0xa4, 0xa8, 0xbc, 0xb0,
+               0x34, 0x38, 0x2c, 0x20, 0x04, 0x08, 0x1c, 0x10,
+               0x54, 0x58, 0x4c, 0x40, 0x64, 0x68, 0x7c, 0x70,
+       },
+       {
+               0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23,
+               0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
+               0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3,
+               0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
+               0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e,
+               0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
+               0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e,
+               0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
+               0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44,
+               0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
+               0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94,
+               0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
+               0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9,
+               0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
+               0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29,
+               0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
+               0xce, 0xc3, 0xd4, 0xd9, 0xfa, 0xf7, 0xe0, 0xed,
+               0xa6, 0xab, 0xbc, 0xb1, 0x92, 0x9f, 0x88, 0x85,
+               0x1e, 0x13, 0x04, 0x09, 0x2a, 0x27, 0x30, 0x3d,
+               0x76, 0x7b, 0x6c, 0x61, 0x42, 0x4f, 0x58, 0x55,
+               0x73, 0x7e, 0x69, 0x64, 0x47, 0x4a, 0x5d, 0x50,
+               0x1b, 0x16, 0x01, 0x0c, 0x2f, 0x22, 0x35, 0x38,
+               0xa3, 0xae, 0xb9, 0xb4, 0x97, 0x9a, 0x8d, 0x80,
+               0xcb, 0xc6, 0xd1, 0xdc, 0xff, 0xf2, 0xe5, 0xe8,
+               0xa9, 0xa4, 0xb3, 0xbe, 0x9d, 0x90, 0x87, 0x8a,
+               0xc1, 0xcc, 0xdb, 0xd6, 0xf5, 0xf8, 0xef, 0xe2,
+               0x79, 0x74, 0x63, 0x6e, 0x4d, 0x40, 0x57, 0x5a,
+               0x11, 0x1c, 0x0b, 0x06, 0x25, 0x28, 0x3f, 0x32,
+               0x14, 0x19, 0x0e, 0x03, 0x20, 0x2d, 0x3a, 0x37,
+               0x7c, 0x71, 0x66, 0x6b, 0x48, 0x45, 0x52, 0x5f,
+               0xc4, 0xc9, 0xde, 0xd3, 0xf0, 0xfd, 0xea, 0xe7,
+               0xac, 0xa1, 0xb6, 0xbb, 0x98, 0x95, 0x82, 0x8f,
+       },
+       {
+               0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a,
+               0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
+               0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca,
+               0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
+               0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
+               0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87,
+               0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
+               0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67,
+               0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d,
+               0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd,
+               0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
+               0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d,
+               0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50,
+               0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
+               0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0,
+               0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
+               0x53, 0x5d, 0x4f, 0x41, 0x6b, 0x65, 0x77, 0x79,
+               0x23, 0x2d, 0x3f, 0x31, 0x1b, 0x15, 0x07, 0x09,
+               0xb3, 0xbd, 0xaf, 0xa1, 0x8b, 0x85, 0x97, 0x99,
+               0xc3, 0xcd, 0xdf, 0xd1, 0xfb, 0xf5, 0xe7, 0xe9,
+               0x8e, 0x80, 0x92, 0x9c, 0xb6, 0xb8, 0xaa, 0xa4,
+               0xfe, 0xf0, 0xe2, 0xec, 0xc6, 0xc8, 0xda, 0xd4,
+               0x6e, 0x60, 0x72, 0x7c, 0x56, 0x58, 0x4a, 0x44,
+               0x1e, 0x10, 0x02, 0x0c, 0x26, 0x28, 0x3a, 0x34,
+               0xf4, 0xfa, 0xe8, 0xe6, 0xcc, 0xc2, 0xd0, 0xde,
+               0x84, 0x8a, 0x98, 0x96, 0xbc, 0xb2, 0xa0, 0xae,
+               0x14, 0x1a, 0x08, 0x06, 0x2c, 0x22, 0x30, 0x3e,
+               0x64, 0x6a, 0x78, 0x76, 0x5c, 0x52, 0x40, 0x4e,
+               0x29, 0x27, 0x35, 0x3b, 0x11, 0x1f, 0x0d, 0x03,
+               0x59, 0x57, 0x45, 0x4b, 0x61, 0x6f, 0x7d, 0x73,
+               0xc9, 0xc7, 0xd5, 0xdb, 0xf1, 0xff, 0xed, 0xe3,
+               0xb9, 0xb7, 0xa5, 0xab, 0x81, 0x8f, 0x9d, 0x93,
+       },
+       {
+               0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d,
+               0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55,
+               0xf0, 0xff, 0xee, 0xe1, 0xcc, 0xc3, 0xd2, 0xdd,
+               0x88, 0x87, 0x96, 0x99, 0xb4, 0xbb, 0xaa, 0xa5,
+               0xfd, 0xf2, 0xe3, 0xec, 0xc1, 0xce, 0xdf, 0xd0,
+               0x85, 0x8a, 0x9b, 0x94, 0xb9, 0xb6, 0xa7, 0xa8,
+               0x0d, 0x02, 0x13, 0x1c, 0x31, 0x3e, 0x2f, 0x20,
+               0x75, 0x7a, 0x6b, 0x64, 0x49, 0x46, 0x57, 0x58,
+               0xe7, 0xe8, 0xf9, 0xf6, 0xdb, 0xd4, 0xc5, 0xca,
+               0x9f, 0x90, 0x81, 0x8e, 0xa3, 0xac, 0xbd, 0xb2,
+               0x17, 0x18, 0x09, 0x06, 0x2b, 0x24, 0x35, 0x3a,
+               0x6f, 0x60, 0x71, 0x7e, 0x53, 0x5c, 0x4d, 0x42,
+               0x1a, 0x15, 0x04, 0x0b, 0x26, 0x29, 0x38, 0x37,
+               0x62, 0x6d, 0x7c, 0x73, 0x5e, 0x51, 0x40, 0x4f,
+               0xea, 0xe5, 0xf4, 0xfb, 0xd6, 0xd9, 0xc8, 0xc7,
+               0x92, 0x9d, 0x8c, 0x83, 0xae, 0xa1, 0xb0, 0xbf,
+               0xd3, 0xdc, 0xcd, 0xc2, 0xef, 0xe0, 0xf1, 0xfe,
+               0xab, 0xa4, 0xb5, 0xba, 0x97, 0x98, 0x89, 0x86,
+               0x23, 0x2c, 0x3d, 0x32, 0x1f, 0x10, 0x01, 0x0e,
+               0x5b, 0x54, 0x45, 0x4a, 0x67, 0x68, 0x79, 0x76,
+               0x2e, 0x21, 0x30, 0x3f, 0x12, 0x1d, 0x0c, 0x03,
+               0x56, 0x59, 0x48, 0x47, 0x6a, 0x65, 0x74, 0x7b,
+               0xde, 0xd1, 0xc0, 0xcf, 0xe2, 0xed, 0xfc, 0xf3,
+               0xa6, 0xa9, 0xb8, 0xb7, 0x9a, 0x95, 0x84, 0x8b,
+               0x34, 0x3b, 0x2a, 0x25, 0x08, 0x07, 0x16, 0x19,
+               0x4c, 0x43, 0x52, 0x5d, 0x70, 0x7f, 0x6e, 0x61,
+               0xc4, 0xcb, 0xda, 0xd5, 0xf8, 0xf7, 0xe6, 0xe9,
+               0xbc, 0xb3, 0xa2, 0xad, 0x80, 0x8f, 0x9e, 0x91,
+               0xc9, 0xc6, 0xd7, 0xd8, 0xf5, 0xfa, 0xeb, 0xe4,
+               0xb1, 0xbe, 0xaf, 0xa0, 0x8d, 0x82, 0x93, 0x9c,
+               0x39, 0x36, 0x27, 0x28, 0x05, 0x0a, 0x1b, 0x14,
+               0x41, 0x4e, 0x5f, 0x50, 0x7d, 0x72, 0x63, 0x6c,
+       },
+       {
+               0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+               0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0,
+               0x1d, 0x0d, 0x3d, 0x2d, 0x5d, 0x4d, 0x7d, 0x6d,
+               0x9d, 0x8d, 0xbd, 0xad, 0xdd, 0xcd, 0xfd, 0xed,
+               0x3a, 0x2a, 0x1a, 0x0a, 0x7a, 0x6a, 0x5a, 0x4a,
+               0xba, 0xaa, 0x9a, 0x8a, 0xfa, 0xea, 0xda, 0xca,
+               0x27, 0x37, 0x07, 0x17, 0x67, 0x77, 0x47, 0x57,
+               0xa7, 0xb7, 0x87, 0x97, 0xe7, 0xf7, 0xc7, 0xd7,
+               0x74, 0x64, 0x54, 0x44, 0x34, 0x24, 0x14, 0x04,
+               0xf4, 0xe4, 0xd4, 0xc4, 0xb4, 0xa4, 0x94, 0x84,
+               0x69, 0x79, 0x49, 0x59, 0x29, 0x39, 0x09, 0x19,
+               0xe9, 0xf9, 0xc9, 0xd9, 0xa9, 0xb9, 0x89, 0x99,
+               0x4e, 0x5e, 0x6e, 0x7e, 0x0e, 0x1e, 0x2e, 0x3e,
+               0xce, 0xde, 0xee, 0xfe, 0x8e, 0x9e, 0xae, 0xbe,
+               0x53, 0x43, 0x73, 0x63, 0x13, 0x03, 0x33, 0x23,
+               0xd3, 0xc3, 0xf3, 0xe3, 0x93, 0x83, 0xb3, 0xa3,
+               0xe8, 0xf8, 0xc8, 0xd8, 0xa8, 0xb8, 0x88, 0x98,
+               0x68, 0x78, 0x48, 0x58, 0x28, 0x38, 0x08, 0x18,
+               0xf5, 0xe5, 0xd5, 0xc5, 0xb5, 0xa5, 0x95, 0x85,
+               0x75, 0x65, 0x55, 0x45, 0x35, 0x25, 0x15, 0x05,
+               0xd2, 0xc2, 0xf2, 0xe2, 0x92, 0x82, 0xb2, 0xa2,
+               0x52, 0x42, 0x72, 0x62, 0x12, 0x02, 0x32, 0x22,
+               0xcf, 0xdf, 0xef, 0xff, 0x8f, 0x9f, 0xaf, 0xbf,
+               0x4f, 0x5f, 0x6f, 0x7f, 0x0f, 0x1f, 0x2f, 0x3f,
+               0x9c, 0x8c, 0xbc, 0xac, 0xdc, 0xcc, 0xfc, 0xec,
+               0x1c, 0x0c, 0x3c, 0x2c, 0x5c, 0x4c, 0x7c, 0x6c,
+               0x81, 0x91, 0xa1, 0xb1, 0xc1, 0xd1, 0xe1, 0xf1,
+               0x01, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71,
+               0xa6, 0xb6, 0x86, 0x96, 0xe6, 0xf6, 0xc6, 0xd6,
+               0x26, 0x36, 0x06, 0x16, 0x66, 0x76, 0x46, 0x56,
+               0xbb, 0xab, 0x9b, 0x8b, 0xfb, 0xeb, 0xdb, 0xcb,
+               0x3b, 0x2b, 0x1b, 0x0b, 0x7b, 0x6b, 0x5b, 0x4b,
+       },
+       {
+               0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+               0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff,
+               0x0d, 0x1c, 0x2f, 0x3e, 0x49, 0x58, 0x6b, 0x7a,
+               0x85, 0x94, 0xa7, 0xb6, 0xc1, 0xd0, 0xe3, 0xf2,
+               0x1a, 0x0b, 0x38, 0x29, 0x5e, 0x4f, 0x7c, 0x6d,
+               0x92, 0x83, 0xb0, 0xa1, 0xd6, 0xc7, 0xf4, 0xe5,
+               0x17, 0x06, 0x35, 0x24, 0x53, 0x42, 0x71, 0x60,
+               0x9f, 0x8e, 0xbd, 0xac, 0xdb, 0xca, 0xf9, 0xe8,
+               0x34, 0x25, 0x16, 0x07, 0x70, 0x61, 0x52, 0x43,
+               0xbc, 0xad, 0x9e, 0x8f, 0xf8, 0xe9, 0xda, 0xcb,
+               0x39, 0x28, 0x1b, 0x0a, 0x7d, 0x6c, 0x5f, 0x4e,
+               0xb1, 0xa0, 0x93, 0x82, 0xf5, 0xe4, 0xd7, 0xc6,
+               0x2e, 0x3f, 0x0c, 0x1d, 0x6a, 0x7b, 0x48, 0x59,
+               0xa6, 0xb7, 0x84, 0x95, 0xe2, 0xf3, 0xc0, 0xd1,
+               0x23, 0x32, 0x01, 0x10, 0x67, 0x76, 0x45, 0x54,
+               0xab, 0xba, 0x89, 0x98, 0xef, 0xfe, 0xcd, 0xdc,
+               0x68, 0x79, 0x4a, 0x5b, 0x2c, 0x3d, 0x0e, 0x1f,
+               0xe0, 0xf1, 0xc2, 0xd3, 0xa4, 0xb5, 0x86, 0x97,
+               0x65, 0x74, 0x47, 0x56, 0x21, 0x30, 0x03, 0x12,
+               0xed, 0xfc, 0xcf, 0xde, 0xa9, 0xb8, 0x8b, 0x9a,
+               0x72, 0x63, 0x50, 0x41, 0x36, 0x27, 0x14, 0x05,
+               0xfa, 0xeb, 0xd8, 0xc9, 0xbe, 0xaf, 0x9c, 0x8d,
+               0x7f, 0x6e, 0x5d, 0x4c, 0x3b, 0x2a, 0x19, 0x08,
+               0xf7, 0xe6, 0xd5, 0xc4, 0xb3, 0xa2, 0x91, 0x80,
+               0x5c, 0x4d, 0x7e, 0x6f, 0x18, 0x09, 0x3a, 0x2b,
+               0xd4, 0xc5, 0xf6, 0xe7, 0x90, 0x81, 0xb2, 0xa3,
+               0x51, 0x40, 0x73, 0x62, 0x15, 0x04, 0x37, 0x26,
+               0xd9, 0xc8, 0xfb, 0xea, 0x9d, 0x8c, 0xbf, 0xae,
+               0x46, 0x57, 0x64, 0x75, 0x02, 0x13, 0x20, 0x31,
+               0xce, 0xdf, 0xec, 0xfd, 0x8a, 0x9b, 0xa8, 0xb9,
+               0x4b, 0x5a, 0x69, 0x78, 0x0f, 0x1e, 0x2d, 0x3c,
+               0xc3, 0xd2, 0xe1, 0xf0, 0x87, 0x96, 0xa5, 0xb4,
+       },
+       {
+               0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,
+               0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
+               0x3d, 0x2f, 0x19, 0x0b, 0x75, 0x67, 0x51, 0x43,
+               0xad, 0xbf, 0x89, 0x9b, 0xe5, 0xf7, 0xc1, 0xd3,
+               0x7a, 0x68, 0x5e, 0x4c, 0x32, 0x20, 0x16, 0x04,
+               0xea, 0xf8, 0xce, 0xdc, 0xa2, 0xb0, 0x86, 0x94,
+               0x47, 0x55, 0x63, 0x71, 0x0f, 0x1d, 0x2b, 0x39,
+               0xd7, 0xc5, 0xf3, 0xe1, 0x9f, 0x8d, 0xbb, 0xa9,
+               0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
+               0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a,
+               0xc9, 0xdb, 0xed, 0xff, 0x81, 0x93, 0xa5, 0xb7,
+               0x59, 0x4b, 0x7d, 0x6f, 0x11, 0x03, 0x35, 0x27,
+               0x8e, 0x9c, 0xaa, 0xb8, 0xc6, 0xd4, 0xe2, 0xf0,
+               0x1e, 0x0c, 0x3a, 0x28, 0x56, 0x44, 0x72, 0x60,
+               0xb3, 0xa1, 0x97, 0x85, 0xfb, 0xe9, 0xdf, 0xcd,
+               0x23, 0x31, 0x07, 0x15, 0x6b, 0x79, 0x4f, 0x5d,
+               0xf5, 0xe7, 0xd1, 0xc3, 0xbd, 0xaf, 0x99, 0x8b,
+               0x65, 0x77, 0x41, 0x53, 0x2d, 0x3f, 0x09, 0x1b,
+               0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,
+               0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
+               0x8f, 0x9d, 0xab, 0xb9, 0xc7, 0xd5, 0xe3, 0xf1,
+               0x1f, 0x0d, 0x3b, 0x29, 0x57, 0x45, 0x73, 0x61,
+               0xb2, 0xa0, 0x96, 0x84, 0xfa, 0xe8, 0xde, 0xcc,
+               0x22, 0x30, 0x06, 0x14, 0x6a, 0x78, 0x4e, 0x5c,
+               0x01, 0x13, 0x25, 0x37, 0x49, 0x5b, 0x6d, 0x7f,
+               0x91, 0x83, 0xb5, 0xa7, 0xd9, 0xcb, 0xfd, 0xef,
+               0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
+               0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2,
+               0x7b, 0x69, 0x5f, 0x4d, 0x33, 0x21, 0x17, 0x05,
+               0xeb, 0xf9, 0xcf, 0xdd, 0xa3, 0xb1, 0x87, 0x95,
+               0x46, 0x54, 0x62, 0x70, 0x0e, 0x1c, 0x2a, 0x38,
+               0xd6, 0xc4, 0xf2, 0xe0, 0x9e, 0x8c, 0xba, 0xa8,
+       },
+       {
+               0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79,
+               0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1,
+               0x2d, 0x3e, 0x0b, 0x18, 0x61, 0x72, 0x47, 0x54,
+               0xb5, 0xa6, 0x93, 0x80, 0xf9, 0xea, 0xdf, 0xcc,
+               0x5a, 0x49, 0x7c, 0x6f, 0x16, 0x05, 0x30, 0x23,
+               0xc2, 0xd1, 0xe4, 0xf7, 0x8e, 0x9d, 0xa8, 0xbb,
+               0x77, 0x64, 0x51, 0x42, 0x3b, 0x28, 0x1d, 0x0e,
+               0xef, 0xfc, 0xc9, 0xda, 0xa3, 0xb0, 0x85, 0x96,
+               0xb4, 0xa7, 0x92, 0x81, 0xf8, 0xeb, 0xde, 0xcd,
+               0x2c, 0x3f, 0x0a, 0x19, 0x60, 0x73, 0x46, 0x55,
+               0x99, 0x8a, 0xbf, 0xac, 0xd5, 0xc6, 0xf3, 0xe0,
+               0x01, 0x12, 0x27, 0x34, 0x4d, 0x5e, 0x6b, 0x78,
+               0xee, 0xfd, 0xc8, 0xdb, 0xa2, 0xb1, 0x84, 0x97,
+               0x76, 0x65, 0x50, 0x43, 0x3a, 0x29, 0x1c, 0x0f,
+               0xc3, 0xd0, 0xe5, 0xf6, 0x8f, 0x9c, 0xa9, 0xba,
+               0x5b, 0x48, 0x7d, 0x6e, 0x17, 0x04, 0x31, 0x22,
+               0x75, 0x66, 0x53, 0x40, 0x39, 0x2a, 0x1f, 0x0c,
+               0xed, 0xfe, 0xcb, 0xd8, 0xa1, 0xb2, 0x87, 0x94,
+               0x58, 0x4b, 0x7e, 0x6d, 0x14, 0x07, 0x32, 0x21,
+               0xc0, 0xd3, 0xe6, 0xf5, 0x8c, 0x9f, 0xaa, 0xb9,
+               0x2f, 0x3c, 0x09, 0x1a, 0x63, 0x70, 0x45, 0x56,
+               0xb7, 0xa4, 0x91, 0x82, 0xfb, 0xe8, 0xdd, 0xce,
+               0x02, 0x11, 0x24, 0x37, 0x4e, 0x5d, 0x68, 0x7b,
+               0x9a, 0x89, 0xbc, 0xaf, 0xd6, 0xc5, 0xf0, 0xe3,
+               0xc1, 0xd2, 0xe7, 0xf4, 0x8d, 0x9e, 0xab, 0xb8,
+               0x59, 0x4a, 0x7f, 0x6c, 0x15, 0x06, 0x33, 0x20,
+               0xec, 0xff, 0xca, 0xd9, 0xa0, 0xb3, 0x86, 0x95,
+               0x74, 0x67, 0x52, 0x41, 0x38, 0x2b, 0x1e, 0x0d,
+               0x9b, 0x88, 0xbd, 0xae, 0xd7, 0xc4, 0xf1, 0xe2,
+               0x03, 0x10, 0x25, 0x36, 0x4f, 0x5c, 0x69, 0x7a,
+               0xb6, 0xa5, 0x90, 0x83, 0xfa, 0xe9, 0xdc, 0xcf,
+               0x2e, 0x3d, 0x08, 0x1b, 0x62, 0x71, 0x44, 0x57,
+       },
+       {
+               0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c,
+               0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc,
+               0x5d, 0x49, 0x75, 0x61, 0x0d, 0x19, 0x25, 0x31,
+               0xfd, 0xe9, 0xd5, 0xc1, 0xad, 0xb9, 0x85, 0x91,
+               0xba, 0xae, 0x92, 0x86, 0xea, 0xfe, 0xc2, 0xd6,
+               0x1a, 0x0e, 0x32, 0x26, 0x4a, 0x5e, 0x62, 0x76,
+               0xe7, 0xf3, 0xcf, 0xdb, 0xb7, 0xa3, 0x9f, 0x8b,
+               0x47, 0x53, 0x6f, 0x7b, 0x17, 0x03, 0x3f, 0x2b,
+               0x69, 0x7d, 0x41, 0x55, 0x39, 0x2d, 0x11, 0x05,
+               0xc9, 0xdd, 0xe1, 0xf5, 0x99, 0x8d, 0xb1, 0xa5,
+               0x34, 0x20, 0x1c, 0x08, 0x64, 0x70, 0x4c, 0x58,
+               0x94, 0x80, 0xbc, 0xa8, 0xc4, 0xd0, 0xec, 0xf8,
+               0xd3, 0xc7, 0xfb, 0xef, 0x83, 0x97, 0xab, 0xbf,
+               0x73, 0x67, 0x5b, 0x4f, 0x23, 0x37, 0x0b, 0x1f,
+               0x8e, 0x9a, 0xa6, 0xb2, 0xde, 0xca, 0xf6, 0xe2,
+               0x2e, 0x3a, 0x06, 0x12, 0x7e, 0x6a, 0x56, 0x42,
+               0xd2, 0xc6, 0xfa, 0xee, 0x82, 0x96, 0xaa, 0xbe,
+               0x72, 0x66, 0x5a, 0x4e, 0x22, 0x36, 0x0a, 0x1e,
+               0x8f, 0x9b, 0xa7, 0xb3, 0xdf, 0xcb, 0xf7, 0xe3,
+               0x2f, 0x3b, 0x07, 0x13, 0x7f, 0x6b, 0x57, 0x43,
+               0x68, 0x7c, 0x40, 0x54, 0x38, 0x2c, 0x10, 0x04,
+               0xc8, 0xdc, 0xe0, 0xf4, 0x98, 0x8c, 0xb0, 0xa4,
+               0x35, 0x21, 0x1d, 0x09, 0x65, 0x71, 0x4d, 0x59,
+               0x95, 0x81, 0xbd, 0xa9, 0xc5, 0xd1, 0xed, 0xf9,
+               0xbb, 0xaf, 0x93, 0x87, 0xeb, 0xff, 0xc3, 0xd7,
+               0x1b, 0x0f, 0x33, 0x27, 0x4b, 0x5f, 0x63, 0x77,
+               0xe6, 0xf2, 0xce, 0xda, 0xb6, 0xa2, 0x9e, 0x8a,
+               0x46, 0x52, 0x6e, 0x7a, 0x16, 0x02, 0x3e, 0x2a,
+               0x01, 0x15, 0x29, 0x3d, 0x51, 0x45, 0x79, 0x6d,
+               0xa1, 0xb5, 0x89, 0x9d, 0xf1, 0xe5, 0xd9, 0xcd,
+               0x5c, 0x48, 0x74, 0x60, 0x0c, 0x18, 0x24, 0x30,
+               0xfc, 0xe8, 0xd4, 0xc0, 0xac, 0xb8, 0x84, 0x90,
+       },
+       {
+               0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b,
+               0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3,
+               0x4d, 0x58, 0x67, 0x72, 0x19, 0x0c, 0x33, 0x26,
+               0xe5, 0xf0, 0xcf, 0xda, 0xb1, 0xa4, 0x9b, 0x8e,
+               0x9a, 0x8f, 0xb0, 0xa5, 0xce, 0xdb, 0xe4, 0xf1,
+               0x32, 0x27, 0x18, 0x0d, 0x66, 0x73, 0x4c, 0x59,
+               0xd7, 0xc2, 0xfd, 0xe8, 0x83, 0x96, 0xa9, 0xbc,
+               0x7f, 0x6a, 0x55, 0x40, 0x2b, 0x3e, 0x01, 0x14,
+               0x29, 0x3c, 0x03, 0x16, 0x7d, 0x68, 0x57, 0x42,
+               0x81, 0x94, 0xab, 0xbe, 0xd5, 0xc0, 0xff, 0xea,
+               0x64, 0x71, 0x4e, 0x5b, 0x30, 0x25, 0x1a, 0x0f,
+               0xcc, 0xd9, 0xe6, 0xf3, 0x98, 0x8d, 0xb2, 0xa7,
+               0xb3, 0xa6, 0x99, 0x8c, 0xe7, 0xf2, 0xcd, 0xd8,
+               0x1b, 0x0e, 0x31, 0x24, 0x4f, 0x5a, 0x65, 0x70,
+               0xfe, 0xeb, 0xd4, 0xc1, 0xaa, 0xbf, 0x80, 0x95,
+               0x56, 0x43, 0x7c, 0x69, 0x02, 0x17, 0x28, 0x3d,
+               0x52, 0x47, 0x78, 0x6d, 0x06, 0x13, 0x2c, 0x39,
+               0xfa, 0xef, 0xd0, 0xc5, 0xae, 0xbb, 0x84, 0x91,
+               0x1f, 0x0a, 0x35, 0x20, 0x4b, 0x5e, 0x61, 0x74,
+               0xb7, 0xa2, 0x9d, 0x88, 0xe3, 0xf6, 0xc9, 0xdc,
+               0xc8, 0xdd, 0xe2, 0xf7, 0x9c, 0x89, 0xb6, 0xa3,
+               0x60, 0x75, 0x4a, 0x5f, 0x34, 0x21, 0x1e, 0x0b,
+               0x85, 0x90, 0xaf, 0xba, 0xd1, 0xc4, 0xfb, 0xee,
+               0x2d, 0x38, 0x07, 0x12, 0x79, 0x6c, 0x53, 0x46,
+               0x7b, 0x6e, 0x51, 0x44, 0x2f, 0x3a, 0x05, 0x10,
+               0xd3, 0xc6, 0xf9, 0xec, 0x87, 0x92, 0xad, 0xb8,
+               0x36, 0x23, 0x1c, 0x09, 0x62, 0x77, 0x48, 0x5d,
+               0x9e, 0x8b, 0xb4, 0xa1, 0xca, 0xdf, 0xe0, 0xf5,
+               0xe1, 0xf4, 0xcb, 0xde, 0xb5, 0xa0, 0x9f, 0x8a,
+               0x49, 0x5c, 0x63, 0x76, 0x1d, 0x08, 0x37, 0x22,
+               0xac, 0xb9, 0x86, 0x93, 0xf8, 0xed, 0xd2, 0xc7,
+               0x04, 0x11, 0x2e, 0x3b, 0x50, 0x45, 0x7a, 0x6f,
+       },
+       {
+               0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62,
+               0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2,
+               0x7d, 0x6b, 0x51, 0x47, 0x25, 0x33, 0x09, 0x1f,
+               0xcd, 0xdb, 0xe1, 0xf7, 0x95, 0x83, 0xb9, 0xaf,
+               0xfa, 0xec, 0xd6, 0xc0, 0xa2, 0xb4, 0x8e, 0x98,
+               0x4a, 0x5c, 0x66, 0x70, 0x12, 0x04, 0x3e, 0x28,
+               0x87, 0x91, 0xab, 0xbd, 0xdf, 0xc9, 0xf3, 0xe5,
+               0x37, 0x21, 0x1b, 0x0d, 0x6f, 0x79, 0x43, 0x55,
+               0xe9, 0xff, 0xc5, 0xd3, 0xb1, 0xa7, 0x9d, 0x8b,
+               0x59, 0x4f, 0x75, 0x63, 0x01, 0x17, 0x2d, 0x3b,
+               0x94, 0x82, 0xb8, 0xae, 0xcc, 0xda, 0xe0, 0xf6,
+               0x24, 0x32, 0x08, 0x1e, 0x7c, 0x6a, 0x50, 0x46,
+               0x13, 0x05, 0x3f, 0x29, 0x4b, 0x5d, 0x67, 0x71,
+               0xa3, 0xb5, 0x8f, 0x99, 0xfb, 0xed, 0xd7, 0xc1,
+               0x6e, 0x78, 0x42, 0x54, 0x36, 0x20, 0x1a, 0x0c,
+               0xde, 0xc8, 0xf2, 0xe4, 0x86, 0x90, 0xaa, 0xbc,
+               0xcf, 0xd9, 0xe3, 0xf5, 0x97, 0x81, 0xbb, 0xad,
+               0x7f, 0x69, 0x53, 0x45, 0x27, 0x31, 0x0b, 0x1d,
+               0xb2, 0xa4, 0x9e, 0x88, 0xea, 0xfc, 0xc6, 0xd0,
+               0x02, 0x14, 0x2e, 0x38, 0x5a, 0x4c, 0x76, 0x60,
+               0x35, 0x23, 0x19, 0x0f, 0x6d, 0x7b, 0x41, 0x57,
+               0x85, 0x93, 0xa9, 0xbf, 0xdd, 0xcb, 0xf1, 0xe7,
+               0x48, 0x5e, 0x64, 0x72, 0x10, 0x06, 0x3c, 0x2a,
+               0xf8, 0xee, 0xd4, 0xc2, 0xa0, 0xb6, 0x8c, 0x9a,
+               0x26, 0x30, 0x0a, 0x1c, 0x7e, 0x68, 0x52, 0x44,
+               0x96, 0x80, 0xba, 0xac, 0xce, 0xd8, 0xe2, 0xf4,
+               0x5b, 0x4d, 0x77, 0x61, 0x03, 0x15, 0x2f, 0x39,
+               0xeb, 0xfd, 0xc7, 0xd1, 0xb3, 0xa5, 0x9f, 0x89,
+               0xdc, 0xca, 0xf0, 0xe6, 0x84, 0x92, 0xa8, 0xbe,
+               0x6c, 0x7a, 0x40, 0x56, 0x34, 0x22, 0x18, 0x0e,
+               0xa1, 0xb7, 0x8d, 0x9b, 0xf9, 0xef, 0xd5, 0xc3,
+               0x11, 0x07, 0x3d, 0x2b, 0x49, 0x5f, 0x65, 0x73,
+       },
+       {
+               0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65,
+               0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd,
+               0x6d, 0x7a, 0x43, 0x54, 0x31, 0x26, 0x1f, 0x08,
+               0xd5, 0xc2, 0xfb, 0xec, 0x89, 0x9e, 0xa7, 0xb0,
+               0xda, 0xcd, 0xf4, 0xe3, 0x86, 0x91, 0xa8, 0xbf,
+               0x62, 0x75, 0x4c, 0x5b, 0x3e, 0x29, 0x10, 0x07,
+               0xb7, 0xa0, 0x99, 0x8e, 0xeb, 0xfc, 0xc5, 0xd2,
+               0x0f, 0x18, 0x21, 0x36, 0x53, 0x44, 0x7d, 0x6a,
+               0xa9, 0xbe, 0x87, 0x90, 0xf5, 0xe2, 0xdb, 0xcc,
+               0x11, 0x06, 0x3f, 0x28, 0x4d, 0x5a, 0x63, 0x74,
+               0xc4, 0xd3, 0xea, 0xfd, 0x98, 0x8f, 0xb6, 0xa1,
+               0x7c, 0x6b, 0x52, 0x45, 0x20, 0x37, 0x0e, 0x19,
+               0x73, 0x64, 0x5d, 0x4a, 0x2f, 0x38, 0x01, 0x16,
+               0xcb, 0xdc, 0xe5, 0xf2, 0x97, 0x80, 0xb9, 0xae,
+               0x1e, 0x09, 0x30, 0x27, 0x42, 0x55, 0x6c, 0x7b,
+               0xa6, 0xb1, 0x88, 0x9f, 0xfa, 0xed, 0xd4, 0xc3,
+               0x4f, 0x58, 0x61, 0x76, 0x13, 0x04, 0x3d, 0x2a,
+               0xf7, 0xe0, 0xd9, 0xce, 0xab, 0xbc, 0x85, 0x92,
+               0x22, 0x35, 0x0c, 0x1b, 0x7e, 0x69, 0x50, 0x47,
+               0x9a, 0x8d, 0xb4, 0xa3, 0xc6, 0xd1, 0xe8, 0xff,
+               0x95, 0x82, 0xbb, 0xac, 0xc9, 0xde, 0xe7, 0xf0,
+               0x2d, 0x3a, 0x03, 0x14, 0x71, 0x66, 0x5f, 0x48,
+               0xf8, 0xef, 0xd6, 0xc1, 0xa4, 0xb3, 0x8a, 0x9d,
+               0x40, 0x57, 0x6e, 0x79, 0x1c, 0x0b, 0x32, 0x25,
+               0xe6, 0xf1, 0xc8, 0xdf, 0xba, 0xad, 0x94, 0x83,
+               0x5e, 0x49, 0x70, 0x67, 0x02, 0x15, 0x2c, 0x3b,
+               0x8b, 0x9c, 0xa5, 0xb2, 0xd7, 0xc0, 0xf9, 0xee,
+               0x33, 0x24, 0x1d, 0x0a, 0x6f, 0x78, 0x41, 0x56,
+               0x3c, 0x2b, 0x12, 0x05, 0x60, 0x77, 0x4e, 0x59,
+               0x84, 0x93, 0xaa, 0xbd, 0xd8, 0xcf, 0xf6, 0xe1,
+               0x51, 0x46, 0x7f, 0x68, 0x0d, 0x1a, 0x23, 0x34,
+               0xe9, 0xfe, 0xc7, 0xd0, 0xb5, 0xa2, 0x9b, 0x8c,
+       },
+       {
+               0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48,
+               0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88,
+               0x9d, 0x85, 0xad, 0xb5, 0xfd, 0xe5, 0xcd, 0xd5,
+               0x5d, 0x45, 0x6d, 0x75, 0x3d, 0x25, 0x0d, 0x15,
+               0x27, 0x3f, 0x17, 0x0f, 0x47, 0x5f, 0x77, 0x6f,
+               0xe7, 0xff, 0xd7, 0xcf, 0x87, 0x9f, 0xb7, 0xaf,
+               0xba, 0xa2, 0x8a, 0x92, 0xda, 0xc2, 0xea, 0xf2,
+               0x7a, 0x62, 0x4a, 0x52, 0x1a, 0x02, 0x2a, 0x32,
+               0x4e, 0x56, 0x7e, 0x66, 0x2e, 0x36, 0x1e, 0x06,
+               0x8e, 0x96, 0xbe, 0xa6, 0xee, 0xf6, 0xde, 0xc6,
+               0xd3, 0xcb, 0xe3, 0xfb, 0xb3, 0xab, 0x83, 0x9b,
+               0x13, 0x0b, 0x23, 0x3b, 0x73, 0x6b, 0x43, 0x5b,
+               0x69, 0x71, 0x59, 0x41, 0x09, 0x11, 0x39, 0x21,
+               0xa9, 0xb1, 0x99, 0x81, 0xc9, 0xd1, 0xf9, 0xe1,
+               0xf4, 0xec, 0xc4, 0xdc, 0x94, 0x8c, 0xa4, 0xbc,
+               0x34, 0x2c, 0x04, 0x1c, 0x54, 0x4c, 0x64, 0x7c,
+               0x9c, 0x84, 0xac, 0xb4, 0xfc, 0xe4, 0xcc, 0xd4,
+               0x5c, 0x44, 0x6c, 0x74, 0x3c, 0x24, 0x0c, 0x14,
+               0x01, 0x19, 0x31, 0x29, 0x61, 0x79, 0x51, 0x49,
+               0xc1, 0xd9, 0xf1, 0xe9, 0xa1, 0xb9, 0x91, 0x89,
+               0xbb, 0xa3, 0x8b, 0x93, 0xdb, 0xc3, 0xeb, 0xf3,
+               0x7b, 0x63, 0x4b, 0x53, 0x1b, 0x03, 0x2b, 0x33,
+               0x26, 0x3e, 0x16, 0x0e, 0x46, 0x5e, 0x76, 0x6e,
+               0xe6, 0xfe, 0xd6, 0xce, 0x86, 0x9e, 0xb6, 0xae,
+               0xd2, 0xca, 0xe2, 0xfa, 0xb2, 0xaa, 0x82, 0x9a,
+               0x12, 0x0a, 0x22, 0x3a, 0x72, 0x6a, 0x42, 0x5a,
+               0x4f, 0x57, 0x7f, 0x67, 0x2f, 0x37, 0x1f, 0x07,
+               0x8f, 0x97, 0xbf, 0xa7, 0xef, 0xf7, 0xdf, 0xc7,
+               0xf5, 0xed, 0xc5, 0xdd, 0x95, 0x8d, 0xa5, 0xbd,
+               0x35, 0x2d, 0x05, 0x1d, 0x55, 0x4d, 0x65, 0x7d,
+               0x68, 0x70, 0x58, 0x40, 0x08, 0x10, 0x38, 0x20,
+               0xa8, 0xb0, 0x98, 0x80, 0xc8, 0xd0, 0xf8, 0xe0,
+       },
+       {
+               0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f,
+               0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87,
+               0x8d, 0x94, 0xbf, 0xa6, 0xe9, 0xf0, 0xdb, 0xc2,
+               0x45, 0x5c, 0x77, 0x6e, 0x21, 0x38, 0x13, 0x0a,
+               0x07, 0x1e, 0x35, 0x2c, 0x63, 0x7a, 0x51, 0x48,
+               0xcf, 0xd6, 0xfd, 0xe4, 0xab, 0xb2, 0x99, 0x80,
+               0x8a, 0x93, 0xb8, 0xa1, 0xee, 0xf7, 0xdc, 0xc5,
+               0x42, 0x5b, 0x70, 0x69, 0x26, 0x3f, 0x14, 0x0d,
+               0x0e, 0x17, 0x3c, 0x25, 0x6a, 0x73, 0x58, 0x41,
+               0xc6, 0xdf, 0xf4, 0xed, 0xa2, 0xbb, 0x90, 0x89,
+               0x83, 0x9a, 0xb1, 0xa8, 0xe7, 0xfe, 0xd5, 0xcc,
+               0x4b, 0x52, 0x79, 0x60, 0x2f, 0x36, 0x1d, 0x04,
+               0x09, 0x10, 0x3b, 0x22, 0x6d, 0x74, 0x5f, 0x46,
+               0xc1, 0xd8, 0xf3, 0xea, 0xa5, 0xbc, 0x97, 0x8e,
+               0x84, 0x9d, 0xb6, 0xaf, 0xe0, 0xf9, 0xd2, 0xcb,
+               0x4c, 0x55, 0x7e, 0x67, 0x28, 0x31, 0x1a, 0x03,
+               0x1c, 0x05, 0x2e, 0x37, 0x78, 0x61, 0x4a, 0x53,
+               0xd4, 0xcd, 0xe6, 0xff, 0xb0, 0xa9, 0x82, 0x9b,
+               0x91, 0x88, 0xa3, 0xba, 0xf5, 0xec, 0xc7, 0xde,
+               0x59, 0x40, 0x6b, 0x72, 0x3d, 0x24, 0x0f, 0x16,
+               0x1b, 0x02, 0x29, 0x30, 0x7f, 0x66, 0x4d, 0x54,
+               0xd3, 0xca, 0xe1, 0xf8, 0xb7, 0xae, 0x85, 0x9c,
+               0x96, 0x8f, 0xa4, 0xbd, 0xf2, 0xeb, 0xc0, 0xd9,
+               0x5e, 0x47, 0x6c, 0x75, 0x3a, 0x23, 0x08, 0x11,
+               0x12, 0x0b, 0x20, 0x39, 0x76, 0x6f, 0x44, 0x5d,
+               0xda, 0xc3, 0xe8, 0xf1, 0xbe, 0xa7, 0x8c, 0x95,
+               0x9f, 0x86, 0xad, 0xb4, 0xfb, 0xe2, 0xc9, 0xd0,
+               0x57, 0x4e, 0x65, 0x7c, 0x33, 0x2a, 0x01, 0x18,
+               0x15, 0x0c, 0x27, 0x3e, 0x71, 0x68, 0x43, 0x5a,
+               0xdd, 0xc4, 0xef, 0xf6, 0xb9, 0xa0, 0x8b, 0x92,
+               0x98, 0x81, 0xaa, 0xb3, 0xfc, 0xe5, 0xce, 0xd7,
+               0x50, 0x49, 0x62, 0x7b, 0x34, 0x2d, 0x06, 0x1f,
+       },
+       {
+               0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46,
+               0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96,
+               0xbd, 0xa7, 0x89, 0x93, 0xd5, 0xcf, 0xe1, 0xfb,
+               0x6d, 0x77, 0x59, 0x43, 0x05, 0x1f, 0x31, 0x2b,
+               0x67, 0x7d, 0x53, 0x49, 0x0f, 0x15, 0x3b, 0x21,
+               0xb7, 0xad, 0x83, 0x99, 0xdf, 0xc5, 0xeb, 0xf1,
+               0xda, 0xc0, 0xee, 0xf4, 0xb2, 0xa8, 0x86, 0x9c,
+               0x0a, 0x10, 0x3e, 0x24, 0x62, 0x78, 0x56, 0x4c,
+               0xce, 0xd4, 0xfa, 0xe0, 0xa6, 0xbc, 0x92, 0x88,
+               0x1e, 0x04, 0x2a, 0x30, 0x76, 0x6c, 0x42, 0x58,
+               0x73, 0x69, 0x47, 0x5d, 0x1b, 0x01, 0x2f, 0x35,
+               0xa3, 0xb9, 0x97, 0x8d, 0xcb, 0xd1, 0xff, 0xe5,
+               0xa9, 0xb3, 0x9d, 0x87, 0xc1, 0xdb, 0xf5, 0xef,
+               0x79, 0x63, 0x4d, 0x57, 0x11, 0x0b, 0x25, 0x3f,
+               0x14, 0x0e, 0x20, 0x3a, 0x7c, 0x66, 0x48, 0x52,
+               0xc4, 0xde, 0xf0, 0xea, 0xac, 0xb6, 0x98, 0x82,
+               0x81, 0x9b, 0xb5, 0xaf, 0xe9, 0xf3, 0xdd, 0xc7,
+               0x51, 0x4b, 0x65, 0x7f, 0x39, 0x23, 0x0d, 0x17,
+               0x3c, 0x26, 0x08, 0x12, 0x54, 0x4e, 0x60, 0x7a,
+               0xec, 0xf6, 0xd8, 0xc2, 0x84, 0x9e, 0xb0, 0xaa,
+               0xe6, 0xfc, 0xd2, 0xc8, 0x8e, 0x94, 0xba, 0xa0,
+               0x36, 0x2c, 0x02, 0x18, 0x5e, 0x44, 0x6a, 0x70,
+               0x5b, 0x41, 0x6f, 0x75, 0x33, 0x29, 0x07, 0x1d,
+               0x8b, 0x91, 0xbf, 0xa5, 0xe3, 0xf9, 0xd7, 0xcd,
+               0x4f, 0x55, 0x7b, 0x61, 0x27, 0x3d, 0x13, 0x09,
+               0x9f, 0x85, 0xab, 0xb1, 0xf7, 0xed, 0xc3, 0xd9,
+               0xf2, 0xe8, 0xc6, 0xdc, 0x9a, 0x80, 0xae, 0xb4,
+               0x22, 0x38, 0x16, 0x0c, 0x4a, 0x50, 0x7e, 0x64,
+               0x28, 0x32, 0x1c, 0x06, 0x40, 0x5a, 0x74, 0x6e,
+               0xf8, 0xe2, 0xcc, 0xd6, 0x90, 0x8a, 0xa4, 0xbe,
+               0x95, 0x8f, 0xa1, 0xbb, 0xfd, 0xe7, 0xc9, 0xd3,
+               0x45, 0x5f, 0x71, 0x6b, 0x2d, 0x37, 0x19, 0x03,
+       },
+       {
+               0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41,
+               0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99,
+               0xad, 0xb6, 0x9b, 0x80, 0xc1, 0xda, 0xf7, 0xec,
+               0x75, 0x6e, 0x43, 0x58, 0x19, 0x02, 0x2f, 0x34,
+               0x47, 0x5c, 0x71, 0x6a, 0x2b, 0x30, 0x1d, 0x06,
+               0x9f, 0x84, 0xa9, 0xb2, 0xf3, 0xe8, 0xc5, 0xde,
+               0xea, 0xf1, 0xdc, 0xc7, 0x86, 0x9d, 0xb0, 0xab,
+               0x32, 0x29, 0x04, 0x1f, 0x5e, 0x45, 0x68, 0x73,
+               0x8e, 0x95, 0xb8, 0xa3, 0xe2, 0xf9, 0xd4, 0xcf,
+               0x56, 0x4d, 0x60, 0x7b, 0x3a, 0x21, 0x0c, 0x17,
+               0x23, 0x38, 0x15, 0x0e, 0x4f, 0x54, 0x79, 0x62,
+               0xfb, 0xe0, 0xcd, 0xd6, 0x97, 0x8c, 0xa1, 0xba,
+               0xc9, 0xd2, 0xff, 0xe4, 0xa5, 0xbe, 0x93, 0x88,
+               0x11, 0x0a, 0x27, 0x3c, 0x7d, 0x66, 0x4b, 0x50,
+               0x64, 0x7f, 0x52, 0x49, 0x08, 0x13, 0x3e, 0x25,
+               0xbc, 0xa7, 0x8a, 0x91, 0xd0, 0xcb, 0xe6, 0xfd,
+               0x01, 0x1a, 0x37, 0x2c, 0x6d, 0x76, 0x5b, 0x40,
+               0xd9, 0xc2, 0xef, 0xf4, 0xb5, 0xae, 0x83, 0x98,
+               0xac, 0xb7, 0x9a, 0x81, 0xc0, 0xdb, 0xf6, 0xed,
+               0x74, 0x6f, 0x42, 0x59, 0x18, 0x03, 0x2e, 0x35,
+               0x46, 0x5d, 0x70, 0x6b, 0x2a, 0x31, 0x1c, 0x07,
+               0x9e, 0x85, 0xa8, 0xb3, 0xf2, 0xe9, 0xc4, 0xdf,
+               0xeb, 0xf0, 0xdd, 0xc6, 0x87, 0x9c, 0xb1, 0xaa,
+               0x33, 0x28, 0x05, 0x1e, 0x5f, 0x44, 0x69, 0x72,
+               0x8f, 0x94, 0xb9, 0xa2, 0xe3, 0xf8, 0xd5, 0xce,
+               0x57, 0x4c, 0x61, 0x7a, 0x3b, 0x20, 0x0d, 0x16,
+               0x22, 0x39, 0x14, 0x0f, 0x4e, 0x55, 0x78, 0x63,
+               0xfa, 0xe1, 0xcc, 0xd7, 0x96, 0x8d, 0xa0, 0xbb,
+               0xc8, 0xd3, 0xfe, 0xe5, 0xa4, 0xbf, 0x92, 0x89,
+               0x10, 0x0b, 0x26, 0x3d, 0x7c, 0x67, 0x4a, 0x51,
+               0x65, 0x7e, 0x53, 0x48, 0x09, 0x12, 0x3f, 0x24,
+               0xbd, 0xa6, 0x8b, 0x90, 0xd1, 0xca, 0xe7, 0xfc,
+       },
+       {
+               0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54,
+               0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4,
+               0xdd, 0xc1, 0xe5, 0xf9, 0xad, 0xb1, 0x95, 0x89,
+               0x3d, 0x21, 0x05, 0x19, 0x4d, 0x51, 0x75, 0x69,
+               0xa7, 0xbb, 0x9f, 0x83, 0xd7, 0xcb, 0xef, 0xf3,
+               0x47, 0x5b, 0x7f, 0x63, 0x37, 0x2b, 0x0f, 0x13,
+               0x7a, 0x66, 0x42, 0x5e, 0x0a, 0x16, 0x32, 0x2e,
+               0x9a, 0x86, 0xa2, 0xbe, 0xea, 0xf6, 0xd2, 0xce,
+               0x53, 0x4f, 0x6b, 0x77, 0x23, 0x3f, 0x1b, 0x07,
+               0xb3, 0xaf, 0x8b, 0x97, 0xc3, 0xdf, 0xfb, 0xe7,
+               0x8e, 0x92, 0xb6, 0xaa, 0xfe, 0xe2, 0xc6, 0xda,
+               0x6e, 0x72, 0x56, 0x4a, 0x1e, 0x02, 0x26, 0x3a,
+               0xf4, 0xe8, 0xcc, 0xd0, 0x84, 0x98, 0xbc, 0xa0,
+               0x14, 0x08, 0x2c, 0x30, 0x64, 0x78, 0x5c, 0x40,
+               0x29, 0x35, 0x11, 0x0d, 0x59, 0x45, 0x61, 0x7d,
+               0xc9, 0xd5, 0xf1, 0xed, 0xb9, 0xa5, 0x81, 0x9d,
+               0xa6, 0xba, 0x9e, 0x82, 0xd6, 0xca, 0xee, 0xf2,
+               0x46, 0x5a, 0x7e, 0x62, 0x36, 0x2a, 0x0e, 0x12,
+               0x7b, 0x67, 0x43, 0x5f, 0x0b, 0x17, 0x33, 0x2f,
+               0x9b, 0x87, 0xa3, 0xbf, 0xeb, 0xf7, 0xd3, 0xcf,
+               0x01, 0x1d, 0x39, 0x25, 0x71, 0x6d, 0x49, 0x55,
+               0xe1, 0xfd, 0xd9, 0xc5, 0x91, 0x8d, 0xa9, 0xb5,
+               0xdc, 0xc0, 0xe4, 0xf8, 0xac, 0xb0, 0x94, 0x88,
+               0x3c, 0x20, 0x04, 0x18, 0x4c, 0x50, 0x74, 0x68,
+               0xf5, 0xe9, 0xcd, 0xd1, 0x85, 0x99, 0xbd, 0xa1,
+               0x15, 0x09, 0x2d, 0x31, 0x65, 0x79, 0x5d, 0x41,
+               0x28, 0x34, 0x10, 0x0c, 0x58, 0x44, 0x60, 0x7c,
+               0xc8, 0xd4, 0xf0, 0xec, 0xb8, 0xa4, 0x80, 0x9c,
+               0x52, 0x4e, 0x6a, 0x76, 0x22, 0x3e, 0x1a, 0x06,
+               0xb2, 0xae, 0x8a, 0x96, 0xc2, 0xde, 0xfa, 0xe6,
+               0x8f, 0x93, 0xb7, 0xab, 0xff, 0xe3, 0xc7, 0xdb,
+               0x6f, 0x73, 0x57, 0x4b, 0x1f, 0x03, 0x27, 0x3b,
+       },
+       {
+               0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+               0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb,
+               0xcd, 0xd0, 0xf7, 0xea, 0xb9, 0xa4, 0x83, 0x9e,
+               0x25, 0x38, 0x1f, 0x02, 0x51, 0x4c, 0x6b, 0x76,
+               0x87, 0x9a, 0xbd, 0xa0, 0xf3, 0xee, 0xc9, 0xd4,
+               0x6f, 0x72, 0x55, 0x48, 0x1b, 0x06, 0x21, 0x3c,
+               0x4a, 0x57, 0x70, 0x6d, 0x3e, 0x23, 0x04, 0x19,
+               0xa2, 0xbf, 0x98, 0x85, 0xd6, 0xcb, 0xec, 0xf1,
+               0x13, 0x0e, 0x29, 0x34, 0x67, 0x7a, 0x5d, 0x40,
+               0xfb, 0xe6, 0xc1, 0xdc, 0x8f, 0x92, 0xb5, 0xa8,
+               0xde, 0xc3, 0xe4, 0xf9, 0xaa, 0xb7, 0x90, 0x8d,
+               0x36, 0x2b, 0x0c, 0x11, 0x42, 0x5f, 0x78, 0x65,
+               0x94, 0x89, 0xae, 0xb3, 0xe0, 0xfd, 0xda, 0xc7,
+               0x7c, 0x61, 0x46, 0x5b, 0x08, 0x15, 0x32, 0x2f,
+               0x59, 0x44, 0x63, 0x7e, 0x2d, 0x30, 0x17, 0x0a,
+               0xb1, 0xac, 0x8b, 0x96, 0xc5, 0xd8, 0xff, 0xe2,
+               0x26, 0x3b, 0x1c, 0x01, 0x52, 0x4f, 0x68, 0x75,
+               0xce, 0xd3, 0xf4, 0xe9, 0xba, 0xa7, 0x80, 0x9d,
+               0xeb, 0xf6, 0xd1, 0xcc, 0x9f, 0x82, 0xa5, 0xb8,
+               0x03, 0x1e, 0x39, 0x24, 0x77, 0x6a, 0x4d, 0x50,
+               0xa1, 0xbc, 0x9b, 0x86, 0xd5, 0xc8, 0xef, 0xf2,
+               0x49, 0x54, 0x73, 0x6e, 0x3d, 0x20, 0x07, 0x1a,
+               0x6c, 0x71, 0x56, 0x4b, 0x18, 0x05, 0x22, 0x3f,
+               0x84, 0x99, 0xbe, 0xa3, 0xf0, 0xed, 0xca, 0xd7,
+               0x35, 0x28, 0x0f, 0x12, 0x41, 0x5c, 0x7b, 0x66,
+               0xdd, 0xc0, 0xe7, 0xfa, 0xa9, 0xb4, 0x93, 0x8e,
+               0xf8, 0xe5, 0xc2, 0xdf, 0x8c, 0x91, 0xb6, 0xab,
+               0x10, 0x0d, 0x2a, 0x37, 0x64, 0x79, 0x5e, 0x43,
+               0xb2, 0xaf, 0x88, 0x95, 0xc6, 0xdb, 0xfc, 0xe1,
+               0x5a, 0x47, 0x60, 0x7d, 0x2e, 0x33, 0x14, 0x09,
+               0x7f, 0x62, 0x45, 0x58, 0x0b, 0x16, 0x31, 0x2c,
+               0x97, 0x8a, 0xad, 0xb0, 0xe3, 0xfe, 0xd9, 0xc4,
+       },
+       {
+               0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a,
+               0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa,
+               0xfd, 0xe3, 0xc1, 0xdf, 0x85, 0x9b, 0xb9, 0xa7,
+               0x0d, 0x13, 0x31, 0x2f, 0x75, 0x6b, 0x49, 0x57,
+               0xe7, 0xf9, 0xdb, 0xc5, 0x9f, 0x81, 0xa3, 0xbd,
+               0x17, 0x09, 0x2b, 0x35, 0x6f, 0x71, 0x53, 0x4d,
+               0x1a, 0x04, 0x26, 0x38, 0x62, 0x7c, 0x5e, 0x40,
+               0xea, 0xf4, 0xd6, 0xc8, 0x92, 0x8c, 0xae, 0xb0,
+               0xd3, 0xcd, 0xef, 0xf1, 0xab, 0xb5, 0x97, 0x89,
+               0x23, 0x3d, 0x1f, 0x01, 0x5b, 0x45, 0x67, 0x79,
+               0x2e, 0x30, 0x12, 0x0c, 0x56, 0x48, 0x6a, 0x74,
+               0xde, 0xc0, 0xe2, 0xfc, 0xa6, 0xb8, 0x9a, 0x84,
+               0x34, 0x2a, 0x08, 0x16, 0x4c, 0x52, 0x70, 0x6e,
+               0xc4, 0xda, 0xf8, 0xe6, 0xbc, 0xa2, 0x80, 0x9e,
+               0xc9, 0xd7, 0xf5, 0xeb, 0xb1, 0xaf, 0x8d, 0x93,
+               0x39, 0x27, 0x05, 0x1b, 0x41, 0x5f, 0x7d, 0x63,
+               0xbb, 0xa5, 0x87, 0x99, 0xc3, 0xdd, 0xff, 0xe1,
+               0x4b, 0x55, 0x77, 0x69, 0x33, 0x2d, 0x0f, 0x11,
+               0x46, 0x58, 0x7a, 0x64, 0x3e, 0x20, 0x02, 0x1c,
+               0xb6, 0xa8, 0x8a, 0x94, 0xce, 0xd0, 0xf2, 0xec,
+               0x5c, 0x42, 0x60, 0x7e, 0x24, 0x3a, 0x18, 0x06,
+               0xac, 0xb2, 0x90, 0x8e, 0xd4, 0xca, 0xe8, 0xf6,
+               0xa1, 0xbf, 0x9d, 0x83, 0xd9, 0xc7, 0xe5, 0xfb,
+               0x51, 0x4f, 0x6d, 0x73, 0x29, 0x37, 0x15, 0x0b,
+               0x68, 0x76, 0x54, 0x4a, 0x10, 0x0e, 0x2c, 0x32,
+               0x98, 0x86, 0xa4, 0xba, 0xe0, 0xfe, 0xdc, 0xc2,
+               0x95, 0x8b, 0xa9, 0xb7, 0xed, 0xf3, 0xd1, 0xcf,
+               0x65, 0x7b, 0x59, 0x47, 0x1d, 0x03, 0x21, 0x3f,
+               0x8f, 0x91, 0xb3, 0xad, 0xf7, 0xe9, 0xcb, 0xd5,
+               0x7f, 0x61, 0x43, 0x5d, 0x07, 0x19, 0x3b, 0x25,
+               0x72, 0x6c, 0x4e, 0x50, 0x0a, 0x14, 0x36, 0x28,
+               0x82, 0x9c, 0xbe, 0xa0, 0xfa, 0xe4, 0xc6, 0xd8,
+       },
+       {
+               0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d,
+               0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5,
+               0xed, 0xf2, 0xd3, 0xcc, 0x91, 0x8e, 0xaf, 0xb0,
+               0x15, 0x0a, 0x2b, 0x34, 0x69, 0x76, 0x57, 0x48,
+               0xc7, 0xd8, 0xf9, 0xe6, 0xbb, 0xa4, 0x85, 0x9a,
+               0x3f, 0x20, 0x01, 0x1e, 0x43, 0x5c, 0x7d, 0x62,
+               0x2a, 0x35, 0x14, 0x0b, 0x56, 0x49, 0x68, 0x77,
+               0xd2, 0xcd, 0xec, 0xf3, 0xae, 0xb1, 0x90, 0x8f,
+               0x93, 0x8c, 0xad, 0xb2, 0xef, 0xf0, 0xd1, 0xce,
+               0x6b, 0x74, 0x55, 0x4a, 0x17, 0x08, 0x29, 0x36,
+               0x7e, 0x61, 0x40, 0x5f, 0x02, 0x1d, 0x3c, 0x23,
+               0x86, 0x99, 0xb8, 0xa7, 0xfa, 0xe5, 0xc4, 0xdb,
+               0x54, 0x4b, 0x6a, 0x75, 0x28, 0x37, 0x16, 0x09,
+               0xac, 0xb3, 0x92, 0x8d, 0xd0, 0xcf, 0xee, 0xf1,
+               0xb9, 0xa6, 0x87, 0x98, 0xc5, 0xda, 0xfb, 0xe4,
+               0x41, 0x5e, 0x7f, 0x60, 0x3d, 0x22, 0x03, 0x1c,
+               0x3b, 0x24, 0x05, 0x1a, 0x47, 0x58, 0x79, 0x66,
+               0xc3, 0xdc, 0xfd, 0xe2, 0xbf, 0xa0, 0x81, 0x9e,
+               0xd6, 0xc9, 0xe8, 0xf7, 0xaa, 0xb5, 0x94, 0x8b,
+               0x2e, 0x31, 0x10, 0x0f, 0x52, 0x4d, 0x6c, 0x73,
+               0xfc, 0xe3, 0xc2, 0xdd, 0x80, 0x9f, 0xbe, 0xa1,
+               0x04, 0x1b, 0x3a, 0x25, 0x78, 0x67, 0x46, 0x59,
+               0x11, 0x0e, 0x2f, 0x30, 0x6d, 0x72, 0x53, 0x4c,
+               0xe9, 0xf6, 0xd7, 0xc8, 0x95, 0x8a, 0xab, 0xb4,
+               0xa8, 0xb7, 0x96, 0x89, 0xd4, 0xcb, 0xea, 0xf5,
+               0x50, 0x4f, 0x6e, 0x71, 0x2c, 0x33, 0x12, 0x0d,
+               0x45, 0x5a, 0x7b, 0x64, 0x39, 0x26, 0x07, 0x18,
+               0xbd, 0xa2, 0x83, 0x9c, 0xc1, 0xde, 0xff, 0xe0,
+               0x6f, 0x70, 0x51, 0x4e, 0x13, 0x0c, 0x2d, 0x32,
+               0x97, 0x88, 0xa9, 0xb6, 0xeb, 0xf4, 0xd5, 0xca,
+               0x82, 0x9d, 0xbc, 0xa3, 0xfe, 0xe1, 0xc0, 0xdf,
+               0x7a, 0x65, 0x44, 0x5b, 0x06, 0x19, 0x38, 0x27,
+       },
+       {
+               0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+               0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd,
+               0x3a, 0x1a, 0x7a, 0x5a, 0xba, 0x9a, 0xfa, 0xda,
+               0x27, 0x07, 0x67, 0x47, 0xa7, 0x87, 0xe7, 0xc7,
+               0x74, 0x54, 0x34, 0x14, 0xf4, 0xd4, 0xb4, 0x94,
+               0x69, 0x49, 0x29, 0x09, 0xe9, 0xc9, 0xa9, 0x89,
+               0x4e, 0x6e, 0x0e, 0x2e, 0xce, 0xee, 0x8e, 0xae,
+               0x53, 0x73, 0x13, 0x33, 0xd3, 0xf3, 0x93, 0xb3,
+               0xe8, 0xc8, 0xa8, 0x88, 0x68, 0x48, 0x28, 0x08,
+               0xf5, 0xd5, 0xb5, 0x95, 0x75, 0x55, 0x35, 0x15,
+               0xd2, 0xf2, 0x92, 0xb2, 0x52, 0x72, 0x12, 0x32,
+               0xcf, 0xef, 0x8f, 0xaf, 0x4f, 0x6f, 0x0f, 0x2f,
+               0x9c, 0xbc, 0xdc, 0xfc, 0x1c, 0x3c, 0x5c, 0x7c,
+               0x81, 0xa1, 0xc1, 0xe1, 0x01, 0x21, 0x41, 0x61,
+               0xa6, 0x86, 0xe6, 0xc6, 0x26, 0x06, 0x66, 0x46,
+               0xbb, 0x9b, 0xfb, 0xdb, 0x3b, 0x1b, 0x7b, 0x5b,
+               0xcd, 0xed, 0x8d, 0xad, 0x4d, 0x6d, 0x0d, 0x2d,
+               0xd0, 0xf0, 0x90, 0xb0, 0x50, 0x70, 0x10, 0x30,
+               0xf7, 0xd7, 0xb7, 0x97, 0x77, 0x57, 0x37, 0x17,
+               0xea, 0xca, 0xaa, 0x8a, 0x6a, 0x4a, 0x2a, 0x0a,
+               0xb9, 0x99, 0xf9, 0xd9, 0x39, 0x19, 0x79, 0x59,
+               0xa4, 0x84, 0xe4, 0xc4, 0x24, 0x04, 0x64, 0x44,
+               0x83, 0xa3, 0xc3, 0xe3, 0x03, 0x23, 0x43, 0x63,
+               0x9e, 0xbe, 0xde, 0xfe, 0x1e, 0x3e, 0x5e, 0x7e,
+               0x25, 0x05, 0x65, 0x45, 0xa5, 0x85, 0xe5, 0xc5,
+               0x38, 0x18, 0x78, 0x58, 0xb8, 0x98, 0xf8, 0xd8,
+               0x1f, 0x3f, 0x5f, 0x7f, 0x9f, 0xbf, 0xdf, 0xff,
+               0x02, 0x22, 0x42, 0x62, 0x82, 0xa2, 0xc2, 0xe2,
+               0x51, 0x71, 0x11, 0x31, 0xd1, 0xf1, 0x91, 0xb1,
+               0x4c, 0x6c, 0x0c, 0x2c, 0xcc, 0xec, 0x8c, 0xac,
+               0x6b, 0x4b, 0x2b, 0x0b, 0xeb, 0xcb, 0xab, 0x8b,
+               0x76, 0x56, 0x36, 0x16, 0xf6, 0xd6, 0xb6, 0x96,
+       },
+       {
+               0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7,
+               0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2,
+               0x2a, 0x0b, 0x68, 0x49, 0xae, 0x8f, 0xec, 0xcd,
+               0x3f, 0x1e, 0x7d, 0x5c, 0xbb, 0x9a, 0xf9, 0xd8,
+               0x54, 0x75, 0x16, 0x37, 0xd0, 0xf1, 0x92, 0xb3,
+               0x41, 0x60, 0x03, 0x22, 0xc5, 0xe4, 0x87, 0xa6,
+               0x7e, 0x5f, 0x3c, 0x1d, 0xfa, 0xdb, 0xb8, 0x99,
+               0x6b, 0x4a, 0x29, 0x08, 0xef, 0xce, 0xad, 0x8c,
+               0xa8, 0x89, 0xea, 0xcb, 0x2c, 0x0d, 0x6e, 0x4f,
+               0xbd, 0x9c, 0xff, 0xde, 0x39, 0x18, 0x7b, 0x5a,
+               0x82, 0xa3, 0xc0, 0xe1, 0x06, 0x27, 0x44, 0x65,
+               0x97, 0xb6, 0xd5, 0xf4, 0x13, 0x32, 0x51, 0x70,
+               0xfc, 0xdd, 0xbe, 0x9f, 0x78, 0x59, 0x3a, 0x1b,
+               0xe9, 0xc8, 0xab, 0x8a, 0x6d, 0x4c, 0x2f, 0x0e,
+               0xd6, 0xf7, 0x94, 0xb5, 0x52, 0x73, 0x10, 0x31,
+               0xc3, 0xe2, 0x81, 0xa0, 0x47, 0x66, 0x05, 0x24,
+               0x4d, 0x6c, 0x0f, 0x2e, 0xc9, 0xe8, 0x8b, 0xaa,
+               0x58, 0x79, 0x1a, 0x3b, 0xdc, 0xfd, 0x9e, 0xbf,
+               0x67, 0x46, 0x25, 0x04, 0xe3, 0xc2, 0xa1, 0x80,
+               0x72, 0x53, 0x30, 0x11, 0xf6, 0xd7, 0xb4, 0x95,
+               0x19, 0x38, 0x5b, 0x7a, 0x9d, 0xbc, 0xdf, 0xfe,
+               0x0c, 0x2d, 0x4e, 0x6f, 0x88, 0xa9, 0xca, 0xeb,
+               0x33, 0x12, 0x71, 0x50, 0xb7, 0x96, 0xf5, 0xd4,
+               0x26, 0x07, 0x64, 0x45, 0xa2, 0x83, 0xe0, 0xc1,
+               0xe5, 0xc4, 0xa7, 0x86, 0x61, 0x40, 0x23, 0x02,
+               0xf0, 0xd1, 0xb2, 0x93, 0x74, 0x55, 0x36, 0x17,
+               0xcf, 0xee, 0x8d, 0xac, 0x4b, 0x6a, 0x09, 0x28,
+               0xda, 0xfb, 0x98, 0xb9, 0x5e, 0x7f, 0x1c, 0x3d,
+               0xb1, 0x90, 0xf3, 0xd2, 0x35, 0x14, 0x77, 0x56,
+               0xa4, 0x85, 0xe6, 0xc7, 0x20, 0x01, 0x62, 0x43,
+               0x9b, 0xba, 0xd9, 0xf8, 0x1f, 0x3e, 0x5d, 0x7c,
+               0x8e, 0xaf, 0xcc, 0xed, 0x0a, 0x2b, 0x48, 0x69,
+       },
+       {
+               0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee,
+               0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3,
+               0x1a, 0x38, 0x5e, 0x7c, 0x92, 0xb0, 0xd6, 0xf4,
+               0x17, 0x35, 0x53, 0x71, 0x9f, 0xbd, 0xdb, 0xf9,
+               0x34, 0x16, 0x70, 0x52, 0xbc, 0x9e, 0xf8, 0xda,
+               0x39, 0x1b, 0x7d, 0x5f, 0xb1, 0x93, 0xf5, 0xd7,
+               0x2e, 0x0c, 0x6a, 0x48, 0xa6, 0x84, 0xe2, 0xc0,
+               0x23, 0x01, 0x67, 0x45, 0xab, 0x89, 0xef, 0xcd,
+               0x68, 0x4a, 0x2c, 0x0e, 0xe0, 0xc2, 0xa4, 0x86,
+               0x65, 0x47, 0x21, 0x03, 0xed, 0xcf, 0xa9, 0x8b,
+               0x72, 0x50, 0x36, 0x14, 0xfa, 0xd8, 0xbe, 0x9c,
+               0x7f, 0x5d, 0x3b, 0x19, 0xf7, 0xd5, 0xb3, 0x91,
+               0x5c, 0x7e, 0x18, 0x3a, 0xd4, 0xf6, 0x90, 0xb2,
+               0x51, 0x73, 0x15, 0x37, 0xd9, 0xfb, 0x9d, 0xbf,
+               0x46, 0x64, 0x02, 0x20, 0xce, 0xec, 0x8a, 0xa8,
+               0x4b, 0x69, 0x0f, 0x2d, 0xc3, 0xe1, 0x87, 0xa5,
+               0xd0, 0xf2, 0x94, 0xb6, 0x58, 0x7a, 0x1c, 0x3e,
+               0xdd, 0xff, 0x99, 0xbb, 0x55, 0x77, 0x11, 0x33,
+               0xca, 0xe8, 0x8e, 0xac, 0x42, 0x60, 0x06, 0x24,
+               0xc7, 0xe5, 0x83, 0xa1, 0x4f, 0x6d, 0x0b, 0x29,
+               0xe4, 0xc6, 0xa0, 0x82, 0x6c, 0x4e, 0x28, 0x0a,
+               0xe9, 0xcb, 0xad, 0x8f, 0x61, 0x43, 0x25, 0x07,
+               0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
+               0xf3, 0xd1, 0xb7, 0x95, 0x7b, 0x59, 0x3f, 0x1d,
+               0xb8, 0x9a, 0xfc, 0xde, 0x30, 0x12, 0x74, 0x56,
+               0xb5, 0x97, 0xf1, 0xd3, 0x3d, 0x1f, 0x79, 0x5b,
+               0xa2, 0x80, 0xe6, 0xc4, 0x2a, 0x08, 0x6e, 0x4c,
+               0xaf, 0x8d, 0xeb, 0xc9, 0x27, 0x05, 0x63, 0x41,
+               0x8c, 0xae, 0xc8, 0xea, 0x04, 0x26, 0x40, 0x62,
+               0x81, 0xa3, 0xc5, 0xe7, 0x09, 0x2b, 0x4d, 0x6f,
+               0x96, 0xb4, 0xd2, 0xf0, 0x1e, 0x3c, 0x5a, 0x78,
+               0x9b, 0xb9, 0xdf, 0xfd, 0x13, 0x31, 0x57, 0x75,
+       },
+       {
+               0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9,
+               0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec,
+               0x0a, 0x29, 0x4c, 0x6f, 0x86, 0xa5, 0xc0, 0xe3,
+               0x0f, 0x2c, 0x49, 0x6a, 0x83, 0xa0, 0xc5, 0xe6,
+               0x14, 0x37, 0x52, 0x71, 0x98, 0xbb, 0xde, 0xfd,
+               0x11, 0x32, 0x57, 0x74, 0x9d, 0xbe, 0xdb, 0xf8,
+               0x1e, 0x3d, 0x58, 0x7b, 0x92, 0xb1, 0xd4, 0xf7,
+               0x1b, 0x38, 0x5d, 0x7e, 0x97, 0xb4, 0xd1, 0xf2,
+               0x28, 0x0b, 0x6e, 0x4d, 0xa4, 0x87, 0xe2, 0xc1,
+               0x2d, 0x0e, 0x6b, 0x48, 0xa1, 0x82, 0xe7, 0xc4,
+               0x22, 0x01, 0x64, 0x47, 0xae, 0x8d, 0xe8, 0xcb,
+               0x27, 0x04, 0x61, 0x42, 0xab, 0x88, 0xed, 0xce,
+               0x3c, 0x1f, 0x7a, 0x59, 0xb0, 0x93, 0xf6, 0xd5,
+               0x39, 0x1a, 0x7f, 0x5c, 0xb5, 0x96, 0xf3, 0xd0,
+               0x36, 0x15, 0x70, 0x53, 0xba, 0x99, 0xfc, 0xdf,
+               0x33, 0x10, 0x75, 0x56, 0xbf, 0x9c, 0xf9, 0xda,
+               0x50, 0x73, 0x16, 0x35, 0xdc, 0xff, 0x9a, 0xb9,
+               0x55, 0x76, 0x13, 0x30, 0xd9, 0xfa, 0x9f, 0xbc,
+               0x5a, 0x79, 0x1c, 0x3f, 0xd6, 0xf5, 0x90, 0xb3,
+               0x5f, 0x7c, 0x19, 0x3a, 0xd3, 0xf0, 0x95, 0xb6,
+               0x44, 0x67, 0x02, 0x21, 0xc8, 0xeb, 0x8e, 0xad,
+               0x41, 0x62, 0x07, 0x24, 0xcd, 0xee, 0x8b, 0xa8,
+               0x4e, 0x6d, 0x08, 0x2b, 0xc2, 0xe1, 0x84, 0xa7,
+               0x4b, 0x68, 0x0d, 0x2e, 0xc7, 0xe4, 0x81, 0xa2,
+               0x78, 0x5b, 0x3e, 0x1d, 0xf4, 0xd7, 0xb2, 0x91,
+               0x7d, 0x5e, 0x3b, 0x18, 0xf1, 0xd2, 0xb7, 0x94,
+               0x72, 0x51, 0x34, 0x17, 0xfe, 0xdd, 0xb8, 0x9b,
+               0x77, 0x54, 0x31, 0x12, 0xfb, 0xd8, 0xbd, 0x9e,
+               0x6c, 0x4f, 0x2a, 0x09, 0xe0, 0xc3, 0xa6, 0x85,
+               0x69, 0x4a, 0x2f, 0x0c, 0xe5, 0xc6, 0xa3, 0x80,
+               0x66, 0x45, 0x20, 0x03, 0xea, 0xc9, 0xac, 0x8f,
+               0x63, 0x40, 0x25, 0x06, 0xef, 0xcc, 0xa9, 0x8a,
+       },
+       {
+               0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc,
+               0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1,
+               0x7a, 0x5e, 0x32, 0x16, 0xea, 0xce, 0xa2, 0x86,
+               0x47, 0x63, 0x0f, 0x2b, 0xd7, 0xf3, 0x9f, 0xbb,
+               0xf4, 0xd0, 0xbc, 0x98, 0x64, 0x40, 0x2c, 0x08,
+               0xc9, 0xed, 0x81, 0xa5, 0x59, 0x7d, 0x11, 0x35,
+               0x8e, 0xaa, 0xc6, 0xe2, 0x1e, 0x3a, 0x56, 0x72,
+               0xb3, 0x97, 0xfb, 0xdf, 0x23, 0x07, 0x6b, 0x4f,
+               0xf5, 0xd1, 0xbd, 0x99, 0x65, 0x41, 0x2d, 0x09,
+               0xc8, 0xec, 0x80, 0xa4, 0x58, 0x7c, 0x10, 0x34,
+               0x8f, 0xab, 0xc7, 0xe3, 0x1f, 0x3b, 0x57, 0x73,
+               0xb2, 0x96, 0xfa, 0xde, 0x22, 0x06, 0x6a, 0x4e,
+               0x01, 0x25, 0x49, 0x6d, 0x91, 0xb5, 0xd9, 0xfd,
+               0x3c, 0x18, 0x74, 0x50, 0xac, 0x88, 0xe4, 0xc0,
+               0x7b, 0x5f, 0x33, 0x17, 0xeb, 0xcf, 0xa3, 0x87,
+               0x46, 0x62, 0x0e, 0x2a, 0xd6, 0xf2, 0x9e, 0xba,
+               0xf7, 0xd3, 0xbf, 0x9b, 0x67, 0x43, 0x2f, 0x0b,
+               0xca, 0xee, 0x82, 0xa6, 0x5a, 0x7e, 0x12, 0x36,
+               0x8d, 0xa9, 0xc5, 0xe1, 0x1d, 0x39, 0x55, 0x71,
+               0xb0, 0x94, 0xf8, 0xdc, 0x20, 0x04, 0x68, 0x4c,
+               0x03, 0x27, 0x4b, 0x6f, 0x93, 0xb7, 0xdb, 0xff,
+               0x3e, 0x1a, 0x76, 0x52, 0xae, 0x8a, 0xe6, 0xc2,
+               0x79, 0x5d, 0x31, 0x15, 0xe9, 0xcd, 0xa1, 0x85,
+               0x44, 0x60, 0x0c, 0x28, 0xd4, 0xf0, 0x9c, 0xb8,
+               0x02, 0x26, 0x4a, 0x6e, 0x92, 0xb6, 0xda, 0xfe,
+               0x3f, 0x1b, 0x77, 0x53, 0xaf, 0x8b, 0xe7, 0xc3,
+               0x78, 0x5c, 0x30, 0x14, 0xe8, 0xcc, 0xa0, 0x84,
+               0x45, 0x61, 0x0d, 0x29, 0xd5, 0xf1, 0x9d, 0xb9,
+               0xf6, 0xd2, 0xbe, 0x9a, 0x66, 0x42, 0x2e, 0x0a,
+               0xcb, 0xef, 0x83, 0xa7, 0x5b, 0x7f, 0x13, 0x37,
+               0x8c, 0xa8, 0xc4, 0xe0, 0x1c, 0x38, 0x54, 0x70,
+               0xb1, 0x95, 0xf9, 0xdd, 0x21, 0x05, 0x69, 0x4d,
+       },
+       {
+               0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb,
+               0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce,
+               0x6a, 0x4f, 0x20, 0x05, 0xfe, 0xdb, 0xb4, 0x91,
+               0x5f, 0x7a, 0x15, 0x30, 0xcb, 0xee, 0x81, 0xa4,
+               0xd4, 0xf1, 0x9e, 0xbb, 0x40, 0x65, 0x0a, 0x2f,
+               0xe1, 0xc4, 0xab, 0x8e, 0x75, 0x50, 0x3f, 0x1a,
+               0xbe, 0x9b, 0xf4, 0xd1, 0x2a, 0x0f, 0x60, 0x45,
+               0x8b, 0xae, 0xc1, 0xe4, 0x1f, 0x3a, 0x55, 0x70,
+               0xb5, 0x90, 0xff, 0xda, 0x21, 0x04, 0x6b, 0x4e,
+               0x80, 0xa5, 0xca, 0xef, 0x14, 0x31, 0x5e, 0x7b,
+               0xdf, 0xfa, 0x95, 0xb0, 0x4b, 0x6e, 0x01, 0x24,
+               0xea, 0xcf, 0xa0, 0x85, 0x7e, 0x5b, 0x34, 0x11,
+               0x61, 0x44, 0x2b, 0x0e, 0xf5, 0xd0, 0xbf, 0x9a,
+               0x54, 0x71, 0x1e, 0x3b, 0xc0, 0xe5, 0x8a, 0xaf,
+               0x0b, 0x2e, 0x41, 0x64, 0x9f, 0xba, 0xd5, 0xf0,
+               0x3e, 0x1b, 0x74, 0x51, 0xaa, 0x8f, 0xe0, 0xc5,
+               0x77, 0x52, 0x3d, 0x18, 0xe3, 0xc6, 0xa9, 0x8c,
+               0x42, 0x67, 0x08, 0x2d, 0xd6, 0xf3, 0x9c, 0xb9,
+               0x1d, 0x38, 0x57, 0x72, 0x89, 0xac, 0xc3, 0xe6,
+               0x28, 0x0d, 0x62, 0x47, 0xbc, 0x99, 0xf6, 0xd3,
+               0xa3, 0x86, 0xe9, 0xcc, 0x37, 0x12, 0x7d, 0x58,
+               0x96, 0xb3, 0xdc, 0xf9, 0x02, 0x27, 0x48, 0x6d,
+               0xc9, 0xec, 0x83, 0xa6, 0x5d, 0x78, 0x17, 0x32,
+               0xfc, 0xd9, 0xb6, 0x93, 0x68, 0x4d, 0x22, 0x07,
+               0xc2, 0xe7, 0x88, 0xad, 0x56, 0x73, 0x1c, 0x39,
+               0xf7, 0xd2, 0xbd, 0x98, 0x63, 0x46, 0x29, 0x0c,
+               0xa8, 0x8d, 0xe2, 0xc7, 0x3c, 0x19, 0x76, 0x53,
+               0x9d, 0xb8, 0xd7, 0xf2, 0x09, 0x2c, 0x43, 0x66,
+               0x16, 0x33, 0x5c, 0x79, 0x82, 0xa7, 0xc8, 0xed,
+               0x23, 0x06, 0x69, 0x4c, 0xb7, 0x92, 0xfd, 0xd8,
+               0x7c, 0x59, 0x36, 0x13, 0xe8, 0xcd, 0xa2, 0x87,
+               0x49, 0x6c, 0x03, 0x26, 0xdd, 0xf8, 0x97, 0xb2,
+       },
+       {
+               0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2,
+               0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf,
+               0x5a, 0x7c, 0x16, 0x30, 0xc2, 0xe4, 0x8e, 0xa8,
+               0x77, 0x51, 0x3b, 0x1d, 0xef, 0xc9, 0xa3, 0x85,
+               0xb4, 0x92, 0xf8, 0xde, 0x2c, 0x0a, 0x60, 0x46,
+               0x99, 0xbf, 0xd5, 0xf3, 0x01, 0x27, 0x4d, 0x6b,
+               0xee, 0xc8, 0xa2, 0x84, 0x76, 0x50, 0x3a, 0x1c,
+               0xc3, 0xe5, 0x8f, 0xa9, 0x5b, 0x7d, 0x17, 0x31,
+               0x75, 0x53, 0x39, 0x1f, 0xed, 0xcb, 0xa1, 0x87,
+               0x58, 0x7e, 0x14, 0x32, 0xc0, 0xe6, 0x8c, 0xaa,
+               0x2f, 0x09, 0x63, 0x45, 0xb7, 0x91, 0xfb, 0xdd,
+               0x02, 0x24, 0x4e, 0x68, 0x9a, 0xbc, 0xd6, 0xf0,
+               0xc1, 0xe7, 0x8d, 0xab, 0x59, 0x7f, 0x15, 0x33,
+               0xec, 0xca, 0xa0, 0x86, 0x74, 0x52, 0x38, 0x1e,
+               0x9b, 0xbd, 0xd7, 0xf1, 0x03, 0x25, 0x4f, 0x69,
+               0xb6, 0x90, 0xfa, 0xdc, 0x2e, 0x08, 0x62, 0x44,
+               0xea, 0xcc, 0xa6, 0x80, 0x72, 0x54, 0x3e, 0x18,
+               0xc7, 0xe1, 0x8b, 0xad, 0x5f, 0x79, 0x13, 0x35,
+               0xb0, 0x96, 0xfc, 0xda, 0x28, 0x0e, 0x64, 0x42,
+               0x9d, 0xbb, 0xd1, 0xf7, 0x05, 0x23, 0x49, 0x6f,
+               0x5e, 0x78, 0x12, 0x34, 0xc6, 0xe0, 0x8a, 0xac,
+               0x73, 0x55, 0x3f, 0x19, 0xeb, 0xcd, 0xa7, 0x81,
+               0x04, 0x22, 0x48, 0x6e, 0x9c, 0xba, 0xd0, 0xf6,
+               0x29, 0x0f, 0x65, 0x43, 0xb1, 0x97, 0xfd, 0xdb,
+               0x9f, 0xb9, 0xd3, 0xf5, 0x07, 0x21, 0x4b, 0x6d,
+               0xb2, 0x94, 0xfe, 0xd8, 0x2a, 0x0c, 0x66, 0x40,
+               0xc5, 0xe3, 0x89, 0xaf, 0x5d, 0x7b, 0x11, 0x37,
+               0xe8, 0xce, 0xa4, 0x82, 0x70, 0x56, 0x3c, 0x1a,
+               0x2b, 0x0d, 0x67, 0x41, 0xb3, 0x95, 0xff, 0xd9,
+               0x06, 0x20, 0x4a, 0x6c, 0x9e, 0xb8, 0xd2, 0xf4,
+               0x71, 0x57, 0x3d, 0x1b, 0xe9, 0xcf, 0xa5, 0x83,
+               0x5c, 0x7a, 0x10, 0x36, 0xc4, 0xe2, 0x88, 0xae,
+       },
+       {
+               0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+               0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0,
+               0x4a, 0x6d, 0x04, 0x23, 0xd6, 0xf1, 0x98, 0xbf,
+               0x6f, 0x48, 0x21, 0x06, 0xf3, 0xd4, 0xbd, 0x9a,
+               0x94, 0xb3, 0xda, 0xfd, 0x08, 0x2f, 0x46, 0x61,
+               0xb1, 0x96, 0xff, 0xd8, 0x2d, 0x0a, 0x63, 0x44,
+               0xde, 0xf9, 0x90, 0xb7, 0x42, 0x65, 0x0c, 0x2b,
+               0xfb, 0xdc, 0xb5, 0x92, 0x67, 0x40, 0x29, 0x0e,
+               0x35, 0x12, 0x7b, 0x5c, 0xa9, 0x8e, 0xe7, 0xc0,
+               0x10, 0x37, 0x5e, 0x79, 0x8c, 0xab, 0xc2, 0xe5,
+               0x7f, 0x58, 0x31, 0x16, 0xe3, 0xc4, 0xad, 0x8a,
+               0x5a, 0x7d, 0x14, 0x33, 0xc6, 0xe1, 0x88, 0xaf,
+               0xa1, 0x86, 0xef, 0xc8, 0x3d, 0x1a, 0x73, 0x54,
+               0x84, 0xa3, 0xca, 0xed, 0x18, 0x3f, 0x56, 0x71,
+               0xeb, 0xcc, 0xa5, 0x82, 0x77, 0x50, 0x39, 0x1e,
+               0xce, 0xe9, 0x80, 0xa7, 0x52, 0x75, 0x1c, 0x3b,
+               0x6a, 0x4d, 0x24, 0x03, 0xf6, 0xd1, 0xb8, 0x9f,
+               0x4f, 0x68, 0x01, 0x26, 0xd3, 0xf4, 0x9d, 0xba,
+               0x20, 0x07, 0x6e, 0x49, 0xbc, 0x9b, 0xf2, 0xd5,
+               0x05, 0x22, 0x4b, 0x6c, 0x99, 0xbe, 0xd7, 0xf0,
+               0xfe, 0xd9, 0xb0, 0x97, 0x62, 0x45, 0x2c, 0x0b,
+               0xdb, 0xfc, 0x95, 0xb2, 0x47, 0x60, 0x09, 0x2e,
+               0xb4, 0x93, 0xfa, 0xdd, 0x28, 0x0f, 0x66, 0x41,
+               0x91, 0xb6, 0xdf, 0xf8, 0x0d, 0x2a, 0x43, 0x64,
+               0x5f, 0x78, 0x11, 0x36, 0xc3, 0xe4, 0x8d, 0xaa,
+               0x7a, 0x5d, 0x34, 0x13, 0xe6, 0xc1, 0xa8, 0x8f,
+               0x15, 0x32, 0x5b, 0x7c, 0x89, 0xae, 0xc7, 0xe0,
+               0x30, 0x17, 0x7e, 0x59, 0xac, 0x8b, 0xe2, 0xc5,
+               0xcb, 0xec, 0x85, 0xa2, 0x57, 0x70, 0x19, 0x3e,
+               0xee, 0xc9, 0xa0, 0x87, 0x72, 0x55, 0x3c, 0x1b,
+               0x81, 0xa6, 0xcf, 0xe8, 0x1d, 0x3a, 0x53, 0x74,
+               0xa4, 0x83, 0xea, 0xcd, 0x38, 0x1f, 0x76, 0x51,
+       },
+       {
+               0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8,
+               0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85,
+               0xba, 0x92, 0xea, 0xc2, 0x1a, 0x32, 0x4a, 0x62,
+               0xe7, 0xcf, 0xb7, 0x9f, 0x47, 0x6f, 0x17, 0x3f,
+               0x69, 0x41, 0x39, 0x11, 0xc9, 0xe1, 0x99, 0xb1,
+               0x34, 0x1c, 0x64, 0x4c, 0x94, 0xbc, 0xc4, 0xec,
+               0xd3, 0xfb, 0x83, 0xab, 0x73, 0x5b, 0x23, 0x0b,
+               0x8e, 0xa6, 0xde, 0xf6, 0x2e, 0x06, 0x7e, 0x56,
+               0xd2, 0xfa, 0x82, 0xaa, 0x72, 0x5a, 0x22, 0x0a,
+               0x8f, 0xa7, 0xdf, 0xf7, 0x2f, 0x07, 0x7f, 0x57,
+               0x68, 0x40, 0x38, 0x10, 0xc8, 0xe0, 0x98, 0xb0,
+               0x35, 0x1d, 0x65, 0x4d, 0x95, 0xbd, 0xc5, 0xed,
+               0xbb, 0x93, 0xeb, 0xc3, 0x1b, 0x33, 0x4b, 0x63,
+               0xe6, 0xce, 0xb6, 0x9e, 0x46, 0x6e, 0x16, 0x3e,
+               0x01, 0x29, 0x51, 0x79, 0xa1, 0x89, 0xf1, 0xd9,
+               0x5c, 0x74, 0x0c, 0x24, 0xfc, 0xd4, 0xac, 0x84,
+               0xb9, 0x91, 0xe9, 0xc1, 0x19, 0x31, 0x49, 0x61,
+               0xe4, 0xcc, 0xb4, 0x9c, 0x44, 0x6c, 0x14, 0x3c,
+               0x03, 0x2b, 0x53, 0x7b, 0xa3, 0x8b, 0xf3, 0xdb,
+               0x5e, 0x76, 0x0e, 0x26, 0xfe, 0xd6, 0xae, 0x86,
+               0xd0, 0xf8, 0x80, 0xa8, 0x70, 0x58, 0x20, 0x08,
+               0x8d, 0xa5, 0xdd, 0xf5, 0x2d, 0x05, 0x7d, 0x55,
+               0x6a, 0x42, 0x3a, 0x12, 0xca, 0xe2, 0x9a, 0xb2,
+               0x37, 0x1f, 0x67, 0x4f, 0x97, 0xbf, 0xc7, 0xef,
+               0x6b, 0x43, 0x3b, 0x13, 0xcb, 0xe3, 0x9b, 0xb3,
+               0x36, 0x1e, 0x66, 0x4e, 0x96, 0xbe, 0xc6, 0xee,
+               0xd1, 0xf9, 0x81, 0xa9, 0x71, 0x59, 0x21, 0x09,
+               0x8c, 0xa4, 0xdc, 0xf4, 0x2c, 0x04, 0x7c, 0x54,
+               0x02, 0x2a, 0x52, 0x7a, 0xa2, 0x8a, 0xf2, 0xda,
+               0x5f, 0x77, 0x0f, 0x27, 0xff, 0xd7, 0xaf, 0x87,
+               0xb8, 0x90, 0xe8, 0xc0, 0x18, 0x30, 0x48, 0x60,
+               0xe5, 0xcd, 0xb5, 0x9d, 0x45, 0x6d, 0x15, 0x3d,
+       },
+       {
+               0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf,
+               0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a,
+               0xaa, 0x83, 0xf8, 0xd1, 0x0e, 0x27, 0x5c, 0x75,
+               0xff, 0xd6, 0xad, 0x84, 0x5b, 0x72, 0x09, 0x20,
+               0x49, 0x60, 0x1b, 0x32, 0xed, 0xc4, 0xbf, 0x96,
+               0x1c, 0x35, 0x4e, 0x67, 0xb8, 0x91, 0xea, 0xc3,
+               0xe3, 0xca, 0xb1, 0x98, 0x47, 0x6e, 0x15, 0x3c,
+               0xb6, 0x9f, 0xe4, 0xcd, 0x12, 0x3b, 0x40, 0x69,
+               0x92, 0xbb, 0xc0, 0xe9, 0x36, 0x1f, 0x64, 0x4d,
+               0xc7, 0xee, 0x95, 0xbc, 0x63, 0x4a, 0x31, 0x18,
+               0x38, 0x11, 0x6a, 0x43, 0x9c, 0xb5, 0xce, 0xe7,
+               0x6d, 0x44, 0x3f, 0x16, 0xc9, 0xe0, 0x9b, 0xb2,
+               0xdb, 0xf2, 0x89, 0xa0, 0x7f, 0x56, 0x2d, 0x04,
+               0x8e, 0xa7, 0xdc, 0xf5, 0x2a, 0x03, 0x78, 0x51,
+               0x71, 0x58, 0x23, 0x0a, 0xd5, 0xfc, 0x87, 0xae,
+               0x24, 0x0d, 0x76, 0x5f, 0x80, 0xa9, 0xd2, 0xfb,
+               0x39, 0x10, 0x6b, 0x42, 0x9d, 0xb4, 0xcf, 0xe6,
+               0x6c, 0x45, 0x3e, 0x17, 0xc8, 0xe1, 0x9a, 0xb3,
+               0x93, 0xba, 0xc1, 0xe8, 0x37, 0x1e, 0x65, 0x4c,
+               0xc6, 0xef, 0x94, 0xbd, 0x62, 0x4b, 0x30, 0x19,
+               0x70, 0x59, 0x22, 0x0b, 0xd4, 0xfd, 0x86, 0xaf,
+               0x25, 0x0c, 0x77, 0x5e, 0x81, 0xa8, 0xd3, 0xfa,
+               0xda, 0xf3, 0x88, 0xa1, 0x7e, 0x57, 0x2c, 0x05,
+               0x8f, 0xa6, 0xdd, 0xf4, 0x2b, 0x02, 0x79, 0x50,
+               0xab, 0x82, 0xf9, 0xd0, 0x0f, 0x26, 0x5d, 0x74,
+               0xfe, 0xd7, 0xac, 0x85, 0x5a, 0x73, 0x08, 0x21,
+               0x01, 0x28, 0x53, 0x7a, 0xa5, 0x8c, 0xf7, 0xde,
+               0x54, 0x7d, 0x06, 0x2f, 0xf0, 0xd9, 0xa2, 0x8b,
+               0xe2, 0xcb, 0xb0, 0x99, 0x46, 0x6f, 0x14, 0x3d,
+               0xb7, 0x9e, 0xe5, 0xcc, 0x13, 0x3a, 0x41, 0x68,
+               0x48, 0x61, 0x1a, 0x33, 0xec, 0xc5, 0xbe, 0x97,
+               0x1d, 0x34, 0x4f, 0x66, 0xb9, 0x90, 0xeb, 0xc2,
+       },
+       {
+               0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6,
+               0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b,
+               0x9a, 0xb0, 0xce, 0xe4, 0x32, 0x18, 0x66, 0x4c,
+               0xd7, 0xfd, 0x83, 0xa9, 0x7f, 0x55, 0x2b, 0x01,
+               0x29, 0x03, 0x7d, 0x57, 0x81, 0xab, 0xd5, 0xff,
+               0x64, 0x4e, 0x30, 0x1a, 0xcc, 0xe6, 0x98, 0xb2,
+               0xb3, 0x99, 0xe7, 0xcd, 0x1b, 0x31, 0x4f, 0x65,
+               0xfe, 0xd4, 0xaa, 0x80, 0x56, 0x7c, 0x02, 0x28,
+               0x52, 0x78, 0x06, 0x2c, 0xfa, 0xd0, 0xae, 0x84,
+               0x1f, 0x35, 0x4b, 0x61, 0xb7, 0x9d, 0xe3, 0xc9,
+               0xc8, 0xe2, 0x9c, 0xb6, 0x60, 0x4a, 0x34, 0x1e,
+               0x85, 0xaf, 0xd1, 0xfb, 0x2d, 0x07, 0x79, 0x53,
+               0x7b, 0x51, 0x2f, 0x05, 0xd3, 0xf9, 0x87, 0xad,
+               0x36, 0x1c, 0x62, 0x48, 0x9e, 0xb4, 0xca, 0xe0,
+               0xe1, 0xcb, 0xb5, 0x9f, 0x49, 0x63, 0x1d, 0x37,
+               0xac, 0x86, 0xf8, 0xd2, 0x04, 0x2e, 0x50, 0x7a,
+               0xa4, 0x8e, 0xf0, 0xda, 0x0c, 0x26, 0x58, 0x72,
+               0xe9, 0xc3, 0xbd, 0x97, 0x41, 0x6b, 0x15, 0x3f,
+               0x3e, 0x14, 0x6a, 0x40, 0x96, 0xbc, 0xc2, 0xe8,
+               0x73, 0x59, 0x27, 0x0d, 0xdb, 0xf1, 0x8f, 0xa5,
+               0x8d, 0xa7, 0xd9, 0xf3, 0x25, 0x0f, 0x71, 0x5b,
+               0xc0, 0xea, 0x94, 0xbe, 0x68, 0x42, 0x3c, 0x16,
+               0x17, 0x3d, 0x43, 0x69, 0xbf, 0x95, 0xeb, 0xc1,
+               0x5a, 0x70, 0x0e, 0x24, 0xf2, 0xd8, 0xa6, 0x8c,
+               0xf6, 0xdc, 0xa2, 0x88, 0x5e, 0x74, 0x0a, 0x20,
+               0xbb, 0x91, 0xef, 0xc5, 0x13, 0x39, 0x47, 0x6d,
+               0x6c, 0x46, 0x38, 0x12, 0xc4, 0xee, 0x90, 0xba,
+               0x21, 0x0b, 0x75, 0x5f, 0x89, 0xa3, 0xdd, 0xf7,
+               0xdf, 0xf5, 0x8b, 0xa1, 0x77, 0x5d, 0x23, 0x09,
+               0x92, 0xb8, 0xc6, 0xec, 0x3a, 0x10, 0x6e, 0x44,
+               0x45, 0x6f, 0x11, 0x3b, 0xed, 0xc7, 0xb9, 0x93,
+               0x08, 0x22, 0x5c, 0x76, 0xa0, 0x8a, 0xf4, 0xde,
+       },
+       {
+               0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1,
+               0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94,
+               0x8a, 0xa1, 0xdc, 0xf7, 0x26, 0x0d, 0x70, 0x5b,
+               0xcf, 0xe4, 0x99, 0xb2, 0x63, 0x48, 0x35, 0x1e,
+               0x09, 0x22, 0x5f, 0x74, 0xa5, 0x8e, 0xf3, 0xd8,
+               0x4c, 0x67, 0x1a, 0x31, 0xe0, 0xcb, 0xb6, 0x9d,
+               0x83, 0xa8, 0xd5, 0xfe, 0x2f, 0x04, 0x79, 0x52,
+               0xc6, 0xed, 0x90, 0xbb, 0x6a, 0x41, 0x3c, 0x17,
+               0x12, 0x39, 0x44, 0x6f, 0xbe, 0x95, 0xe8, 0xc3,
+               0x57, 0x7c, 0x01, 0x2a, 0xfb, 0xd0, 0xad, 0x86,
+               0x98, 0xb3, 0xce, 0xe5, 0x34, 0x1f, 0x62, 0x49,
+               0xdd, 0xf6, 0x8b, 0xa0, 0x71, 0x5a, 0x27, 0x0c,
+               0x1b, 0x30, 0x4d, 0x66, 0xb7, 0x9c, 0xe1, 0xca,
+               0x5e, 0x75, 0x08, 0x23, 0xf2, 0xd9, 0xa4, 0x8f,
+               0x91, 0xba, 0xc7, 0xec, 0x3d, 0x16, 0x6b, 0x40,
+               0xd4, 0xff, 0x82, 0xa9, 0x78, 0x53, 0x2e, 0x05,
+               0x24, 0x0f, 0x72, 0x59, 0x88, 0xa3, 0xde, 0xf5,
+               0x61, 0x4a, 0x37, 0x1c, 0xcd, 0xe6, 0x9b, 0xb0,
+               0xae, 0x85, 0xf8, 0xd3, 0x02, 0x29, 0x54, 0x7f,
+               0xeb, 0xc0, 0xbd, 0x96, 0x47, 0x6c, 0x11, 0x3a,
+               0x2d, 0x06, 0x7b, 0x50, 0x81, 0xaa, 0xd7, 0xfc,
+               0x68, 0x43, 0x3e, 0x15, 0xc4, 0xef, 0x92, 0xb9,
+               0xa7, 0x8c, 0xf1, 0xda, 0x0b, 0x20, 0x5d, 0x76,
+               0xe2, 0xc9, 0xb4, 0x9f, 0x4e, 0x65, 0x18, 0x33,
+               0x36, 0x1d, 0x60, 0x4b, 0x9a, 0xb1, 0xcc, 0xe7,
+               0x73, 0x58, 0x25, 0x0e, 0xdf, 0xf4, 0x89, 0xa2,
+               0xbc, 0x97, 0xea, 0xc1, 0x10, 0x3b, 0x46, 0x6d,
+               0xf9, 0xd2, 0xaf, 0x84, 0x55, 0x7e, 0x03, 0x28,
+               0x3f, 0x14, 0x69, 0x42, 0x93, 0xb8, 0xc5, 0xee,
+               0x7a, 0x51, 0x2c, 0x07, 0xd6, 0xfd, 0x80, 0xab,
+               0xb5, 0x9e, 0xe3, 0xc8, 0x19, 0x32, 0x4f, 0x64,
+               0xf0, 0xdb, 0xa6, 0x8d, 0x5c, 0x77, 0x0a, 0x21,
+       },
+       {
+               0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4,
+               0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9,
+               0xfa, 0xd6, 0xa2, 0x8e, 0x4a, 0x66, 0x12, 0x3e,
+               0x87, 0xab, 0xdf, 0xf3, 0x37, 0x1b, 0x6f, 0x43,
+               0xe9, 0xc5, 0xb1, 0x9d, 0x59, 0x75, 0x01, 0x2d,
+               0x94, 0xb8, 0xcc, 0xe0, 0x24, 0x08, 0x7c, 0x50,
+               0x13, 0x3f, 0x4b, 0x67, 0xa3, 0x8f, 0xfb, 0xd7,
+               0x6e, 0x42, 0x36, 0x1a, 0xde, 0xf2, 0x86, 0xaa,
+               0xcf, 0xe3, 0x97, 0xbb, 0x7f, 0x53, 0x27, 0x0b,
+               0xb2, 0x9e, 0xea, 0xc6, 0x02, 0x2e, 0x5a, 0x76,
+               0x35, 0x19, 0x6d, 0x41, 0x85, 0xa9, 0xdd, 0xf1,
+               0x48, 0x64, 0x10, 0x3c, 0xf8, 0xd4, 0xa0, 0x8c,
+               0x26, 0x0a, 0x7e, 0x52, 0x96, 0xba, 0xce, 0xe2,
+               0x5b, 0x77, 0x03, 0x2f, 0xeb, 0xc7, 0xb3, 0x9f,
+               0xdc, 0xf0, 0x84, 0xa8, 0x6c, 0x40, 0x34, 0x18,
+               0xa1, 0x8d, 0xf9, 0xd5, 0x11, 0x3d, 0x49, 0x65,
+               0x83, 0xaf, 0xdb, 0xf7, 0x33, 0x1f, 0x6b, 0x47,
+               0xfe, 0xd2, 0xa6, 0x8a, 0x4e, 0x62, 0x16, 0x3a,
+               0x79, 0x55, 0x21, 0x0d, 0xc9, 0xe5, 0x91, 0xbd,
+               0x04, 0x28, 0x5c, 0x70, 0xb4, 0x98, 0xec, 0xc0,
+               0x6a, 0x46, 0x32, 0x1e, 0xda, 0xf6, 0x82, 0xae,
+               0x17, 0x3b, 0x4f, 0x63, 0xa7, 0x8b, 0xff, 0xd3,
+               0x90, 0xbc, 0xc8, 0xe4, 0x20, 0x0c, 0x78, 0x54,
+               0xed, 0xc1, 0xb5, 0x99, 0x5d, 0x71, 0x05, 0x29,
+               0x4c, 0x60, 0x14, 0x38, 0xfc, 0xd0, 0xa4, 0x88,
+               0x31, 0x1d, 0x69, 0x45, 0x81, 0xad, 0xd9, 0xf5,
+               0xb6, 0x9a, 0xee, 0xc2, 0x06, 0x2a, 0x5e, 0x72,
+               0xcb, 0xe7, 0x93, 0xbf, 0x7b, 0x57, 0x23, 0x0f,
+               0xa5, 0x89, 0xfd, 0xd1, 0x15, 0x39, 0x4d, 0x61,
+               0xd8, 0xf4, 0x80, 0xac, 0x68, 0x44, 0x30, 0x1c,
+               0x5f, 0x73, 0x07, 0x2b, 0xef, 0xc3, 0xb7, 0x9b,
+               0x22, 0x0e, 0x7a, 0x56, 0x92, 0xbe, 0xca, 0xe6,
+       },
+       {
+               0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3,
+               0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6,
+               0xea, 0xc7, 0xb0, 0x9d, 0x5e, 0x73, 0x04, 0x29,
+               0x9f, 0xb2, 0xc5, 0xe8, 0x2b, 0x06, 0x71, 0x5c,
+               0xc9, 0xe4, 0x93, 0xbe, 0x7d, 0x50, 0x27, 0x0a,
+               0xbc, 0x91, 0xe6, 0xcb, 0x08, 0x25, 0x52, 0x7f,
+               0x23, 0x0e, 0x79, 0x54, 0x97, 0xba, 0xcd, 0xe0,
+               0x56, 0x7b, 0x0c, 0x21, 0xe2, 0xcf, 0xb8, 0x95,
+               0x8f, 0xa2, 0xd5, 0xf8, 0x3b, 0x16, 0x61, 0x4c,
+               0xfa, 0xd7, 0xa0, 0x8d, 0x4e, 0x63, 0x14, 0x39,
+               0x65, 0x48, 0x3f, 0x12, 0xd1, 0xfc, 0x8b, 0xa6,
+               0x10, 0x3d, 0x4a, 0x67, 0xa4, 0x89, 0xfe, 0xd3,
+               0x46, 0x6b, 0x1c, 0x31, 0xf2, 0xdf, 0xa8, 0x85,
+               0x33, 0x1e, 0x69, 0x44, 0x87, 0xaa, 0xdd, 0xf0,
+               0xac, 0x81, 0xf6, 0xdb, 0x18, 0x35, 0x42, 0x6f,
+               0xd9, 0xf4, 0x83, 0xae, 0x6d, 0x40, 0x37, 0x1a,
+               0x03, 0x2e, 0x59, 0x74, 0xb7, 0x9a, 0xed, 0xc0,
+               0x76, 0x5b, 0x2c, 0x01, 0xc2, 0xef, 0x98, 0xb5,
+               0xe9, 0xc4, 0xb3, 0x9e, 0x5d, 0x70, 0x07, 0x2a,
+               0x9c, 0xb1, 0xc6, 0xeb, 0x28, 0x05, 0x72, 0x5f,
+               0xca, 0xe7, 0x90, 0xbd, 0x7e, 0x53, 0x24, 0x09,
+               0xbf, 0x92, 0xe5, 0xc8, 0x0b, 0x26, 0x51, 0x7c,
+               0x20, 0x0d, 0x7a, 0x57, 0x94, 0xb9, 0xce, 0xe3,
+               0x55, 0x78, 0x0f, 0x22, 0xe1, 0xcc, 0xbb, 0x96,
+               0x8c, 0xa1, 0xd6, 0xfb, 0x38, 0x15, 0x62, 0x4f,
+               0xf9, 0xd4, 0xa3, 0x8e, 0x4d, 0x60, 0x17, 0x3a,
+               0x66, 0x4b, 0x3c, 0x11, 0xd2, 0xff, 0x88, 0xa5,
+               0x13, 0x3e, 0x49, 0x64, 0xa7, 0x8a, 0xfd, 0xd0,
+               0x45, 0x68, 0x1f, 0x32, 0xf1, 0xdc, 0xab, 0x86,
+               0x30, 0x1d, 0x6a, 0x47, 0x84, 0xa9, 0xde, 0xf3,
+               0xaf, 0x82, 0xf5, 0xd8, 0x1b, 0x36, 0x41, 0x6c,
+               0xda, 0xf7, 0x80, 0xad, 0x6e, 0x43, 0x34, 0x19,
+       },
+       {
+               0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca,
+               0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7,
+               0xda, 0xf4, 0x86, 0xa8, 0x62, 0x4c, 0x3e, 0x10,
+               0xb7, 0x99, 0xeb, 0xc5, 0x0f, 0x21, 0x53, 0x7d,
+               0xa9, 0x87, 0xf5, 0xdb, 0x11, 0x3f, 0x4d, 0x63,
+               0xc4, 0xea, 0x98, 0xb6, 0x7c, 0x52, 0x20, 0x0e,
+               0x73, 0x5d, 0x2f, 0x01, 0xcb, 0xe5, 0x97, 0xb9,
+               0x1e, 0x30, 0x42, 0x6c, 0xa6, 0x88, 0xfa, 0xd4,
+               0x4f, 0x61, 0x13, 0x3d, 0xf7, 0xd9, 0xab, 0x85,
+               0x22, 0x0c, 0x7e, 0x50, 0x9a, 0xb4, 0xc6, 0xe8,
+               0x95, 0xbb, 0xc9, 0xe7, 0x2d, 0x03, 0x71, 0x5f,
+               0xf8, 0xd6, 0xa4, 0x8a, 0x40, 0x6e, 0x1c, 0x32,
+               0xe6, 0xc8, 0xba, 0x94, 0x5e, 0x70, 0x02, 0x2c,
+               0x8b, 0xa5, 0xd7, 0xf9, 0x33, 0x1d, 0x6f, 0x41,
+               0x3c, 0x12, 0x60, 0x4e, 0x84, 0xaa, 0xd8, 0xf6,
+               0x51, 0x7f, 0x0d, 0x23, 0xe9, 0xc7, 0xb5, 0x9b,
+               0x9e, 0xb0, 0xc2, 0xec, 0x26, 0x08, 0x7a, 0x54,
+               0xf3, 0xdd, 0xaf, 0x81, 0x4b, 0x65, 0x17, 0x39,
+               0x44, 0x6a, 0x18, 0x36, 0xfc, 0xd2, 0xa0, 0x8e,
+               0x29, 0x07, 0x75, 0x5b, 0x91, 0xbf, 0xcd, 0xe3,
+               0x37, 0x19, 0x6b, 0x45, 0x8f, 0xa1, 0xd3, 0xfd,
+               0x5a, 0x74, 0x06, 0x28, 0xe2, 0xcc, 0xbe, 0x90,
+               0xed, 0xc3, 0xb1, 0x9f, 0x55, 0x7b, 0x09, 0x27,
+               0x80, 0xae, 0xdc, 0xf2, 0x38, 0x16, 0x64, 0x4a,
+               0xd1, 0xff, 0x8d, 0xa3, 0x69, 0x47, 0x35, 0x1b,
+               0xbc, 0x92, 0xe0, 0xce, 0x04, 0x2a, 0x58, 0x76,
+               0x0b, 0x25, 0x57, 0x79, 0xb3, 0x9d, 0xef, 0xc1,
+               0x66, 0x48, 0x3a, 0x14, 0xde, 0xf0, 0x82, 0xac,
+               0x78, 0x56, 0x24, 0x0a, 0xc0, 0xee, 0x9c, 0xb2,
+               0x15, 0x3b, 0x49, 0x67, 0xad, 0x83, 0xf1, 0xdf,
+               0xa2, 0x8c, 0xfe, 0xd0, 0x1a, 0x34, 0x46, 0x68,
+               0xcf, 0xe1, 0x93, 0xbd, 0x77, 0x59, 0x2b, 0x05,
+       },
+       {
+               0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd,
+               0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8,
+               0xca, 0xe5, 0x94, 0xbb, 0x76, 0x59, 0x28, 0x07,
+               0xaf, 0x80, 0xf1, 0xde, 0x13, 0x3c, 0x4d, 0x62,
+               0x89, 0xa6, 0xd7, 0xf8, 0x35, 0x1a, 0x6b, 0x44,
+               0xec, 0xc3, 0xb2, 0x9d, 0x50, 0x7f, 0x0e, 0x21,
+               0x43, 0x6c, 0x1d, 0x32, 0xff, 0xd0, 0xa1, 0x8e,
+               0x26, 0x09, 0x78, 0x57, 0x9a, 0xb5, 0xc4, 0xeb,
+               0x0f, 0x20, 0x51, 0x7e, 0xb3, 0x9c, 0xed, 0xc2,
+               0x6a, 0x45, 0x34, 0x1b, 0xd6, 0xf9, 0x88, 0xa7,
+               0xc5, 0xea, 0x9b, 0xb4, 0x79, 0x56, 0x27, 0x08,
+               0xa0, 0x8f, 0xfe, 0xd1, 0x1c, 0x33, 0x42, 0x6d,
+               0x86, 0xa9, 0xd8, 0xf7, 0x3a, 0x15, 0x64, 0x4b,
+               0xe3, 0xcc, 0xbd, 0x92, 0x5f, 0x70, 0x01, 0x2e,
+               0x4c, 0x63, 0x12, 0x3d, 0xf0, 0xdf, 0xae, 0x81,
+               0x29, 0x06, 0x77, 0x58, 0x95, 0xba, 0xcb, 0xe4,
+               0x1e, 0x31, 0x40, 0x6f, 0xa2, 0x8d, 0xfc, 0xd3,
+               0x7b, 0x54, 0x25, 0x0a, 0xc7, 0xe8, 0x99, 0xb6,
+               0xd4, 0xfb, 0x8a, 0xa5, 0x68, 0x47, 0x36, 0x19,
+               0xb1, 0x9e, 0xef, 0xc0, 0x0d, 0x22, 0x53, 0x7c,
+               0x97, 0xb8, 0xc9, 0xe6, 0x2b, 0x04, 0x75, 0x5a,
+               0xf2, 0xdd, 0xac, 0x83, 0x4e, 0x61, 0x10, 0x3f,
+               0x5d, 0x72, 0x03, 0x2c, 0xe1, 0xce, 0xbf, 0x90,
+               0x38, 0x17, 0x66, 0x49, 0x84, 0xab, 0xda, 0xf5,
+               0x11, 0x3e, 0x4f, 0x60, 0xad, 0x82, 0xf3, 0xdc,
+               0x74, 0x5b, 0x2a, 0x05, 0xc8, 0xe7, 0x96, 0xb9,
+               0xdb, 0xf4, 0x85, 0xaa, 0x67, 0x48, 0x39, 0x16,
+               0xbe, 0x91, 0xe0, 0xcf, 0x02, 0x2d, 0x5c, 0x73,
+               0x98, 0xb7, 0xc6, 0xe9, 0x24, 0x0b, 0x7a, 0x55,
+               0xfd, 0xd2, 0xa3, 0x8c, 0x41, 0x6e, 0x1f, 0x30,
+               0x52, 0x7d, 0x0c, 0x23, 0xee, 0xc1, 0xb0, 0x9f,
+               0x37, 0x18, 0x69, 0x46, 0x8b, 0xa4, 0xd5, 0xfa,
+       },
+       {
+               0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+               0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d,
+               0x27, 0x17, 0x47, 0x77, 0xe7, 0xd7, 0x87, 0xb7,
+               0xba, 0x8a, 0xda, 0xea, 0x7a, 0x4a, 0x1a, 0x2a,
+               0x4e, 0x7e, 0x2e, 0x1e, 0x8e, 0xbe, 0xee, 0xde,
+               0xd3, 0xe3, 0xb3, 0x83, 0x13, 0x23, 0x73, 0x43,
+               0x69, 0x59, 0x09, 0x39, 0xa9, 0x99, 0xc9, 0xf9,
+               0xf4, 0xc4, 0x94, 0xa4, 0x34, 0x04, 0x54, 0x64,
+               0x9c, 0xac, 0xfc, 0xcc, 0x5c, 0x6c, 0x3c, 0x0c,
+               0x01, 0x31, 0x61, 0x51, 0xc1, 0xf1, 0xa1, 0x91,
+               0xbb, 0x8b, 0xdb, 0xeb, 0x7b, 0x4b, 0x1b, 0x2b,
+               0x26, 0x16, 0x46, 0x76, 0xe6, 0xd6, 0x86, 0xb6,
+               0xd2, 0xe2, 0xb2, 0x82, 0x12, 0x22, 0x72, 0x42,
+               0x4f, 0x7f, 0x2f, 0x1f, 0x8f, 0xbf, 0xef, 0xdf,
+               0xf5, 0xc5, 0x95, 0xa5, 0x35, 0x05, 0x55, 0x65,
+               0x68, 0x58, 0x08, 0x38, 0xa8, 0x98, 0xc8, 0xf8,
+               0x25, 0x15, 0x45, 0x75, 0xe5, 0xd5, 0x85, 0xb5,
+               0xb8, 0x88, 0xd8, 0xe8, 0x78, 0x48, 0x18, 0x28,
+               0x02, 0x32, 0x62, 0x52, 0xc2, 0xf2, 0xa2, 0x92,
+               0x9f, 0xaf, 0xff, 0xcf, 0x5f, 0x6f, 0x3f, 0x0f,
+               0x6b, 0x5b, 0x0b, 0x3b, 0xab, 0x9b, 0xcb, 0xfb,
+               0xf6, 0xc6, 0x96, 0xa6, 0x36, 0x06, 0x56, 0x66,
+               0x4c, 0x7c, 0x2c, 0x1c, 0x8c, 0xbc, 0xec, 0xdc,
+               0xd1, 0xe1, 0xb1, 0x81, 0x11, 0x21, 0x71, 0x41,
+               0xb9, 0x89, 0xd9, 0xe9, 0x79, 0x49, 0x19, 0x29,
+               0x24, 0x14, 0x44, 0x74, 0xe4, 0xd4, 0x84, 0xb4,
+               0x9e, 0xae, 0xfe, 0xce, 0x5e, 0x6e, 0x3e, 0x0e,
+               0x03, 0x33, 0x63, 0x53, 0xc3, 0xf3, 0xa3, 0x93,
+               0xf7, 0xc7, 0x97, 0xa7, 0x37, 0x07, 0x57, 0x67,
+               0x6a, 0x5a, 0x0a, 0x3a, 0xaa, 0x9a, 0xca, 0xfa,
+               0xd0, 0xe0, 0xb0, 0x80, 0x10, 0x20, 0x70, 0x40,
+               0x4d, 0x7d, 0x2d, 0x1d, 0x8d, 0xbd, 0xed, 0xdd,
+       },
+       {
+               0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97,
+               0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02,
+               0x37, 0x06, 0x55, 0x64, 0xf3, 0xc2, 0x91, 0xa0,
+               0xa2, 0x93, 0xc0, 0xf1, 0x66, 0x57, 0x04, 0x35,
+               0x6e, 0x5f, 0x0c, 0x3d, 0xaa, 0x9b, 0xc8, 0xf9,
+               0xfb, 0xca, 0x99, 0xa8, 0x3f, 0x0e, 0x5d, 0x6c,
+               0x59, 0x68, 0x3b, 0x0a, 0x9d, 0xac, 0xff, 0xce,
+               0xcc, 0xfd, 0xae, 0x9f, 0x08, 0x39, 0x6a, 0x5b,
+               0xdc, 0xed, 0xbe, 0x8f, 0x18, 0x29, 0x7a, 0x4b,
+               0x49, 0x78, 0x2b, 0x1a, 0x8d, 0xbc, 0xef, 0xde,
+               0xeb, 0xda, 0x89, 0xb8, 0x2f, 0x1e, 0x4d, 0x7c,
+               0x7e, 0x4f, 0x1c, 0x2d, 0xba, 0x8b, 0xd8, 0xe9,
+               0xb2, 0x83, 0xd0, 0xe1, 0x76, 0x47, 0x14, 0x25,
+               0x27, 0x16, 0x45, 0x74, 0xe3, 0xd2, 0x81, 0xb0,
+               0x85, 0xb4, 0xe7, 0xd6, 0x41, 0x70, 0x23, 0x12,
+               0x10, 0x21, 0x72, 0x43, 0xd4, 0xe5, 0xb6, 0x87,
+               0xa5, 0x94, 0xc7, 0xf6, 0x61, 0x50, 0x03, 0x32,
+               0x30, 0x01, 0x52, 0x63, 0xf4, 0xc5, 0x96, 0xa7,
+               0x92, 0xa3, 0xf0, 0xc1, 0x56, 0x67, 0x34, 0x05,
+               0x07, 0x36, 0x65, 0x54, 0xc3, 0xf2, 0xa1, 0x90,
+               0xcb, 0xfa, 0xa9, 0x98, 0x0f, 0x3e, 0x6d, 0x5c,
+               0x5e, 0x6f, 0x3c, 0x0d, 0x9a, 0xab, 0xf8, 0xc9,
+               0xfc, 0xcd, 0x9e, 0xaf, 0x38, 0x09, 0x5a, 0x6b,
+               0x69, 0x58, 0x0b, 0x3a, 0xad, 0x9c, 0xcf, 0xfe,
+               0x79, 0x48, 0x1b, 0x2a, 0xbd, 0x8c, 0xdf, 0xee,
+               0xec, 0xdd, 0x8e, 0xbf, 0x28, 0x19, 0x4a, 0x7b,
+               0x4e, 0x7f, 0x2c, 0x1d, 0x8a, 0xbb, 0xe8, 0xd9,
+               0xdb, 0xea, 0xb9, 0x88, 0x1f, 0x2e, 0x7d, 0x4c,
+               0x17, 0x26, 0x75, 0x44, 0xd3, 0xe2, 0xb1, 0x80,
+               0x82, 0xb3, 0xe0, 0xd1, 0x46, 0x77, 0x24, 0x15,
+               0x20, 0x11, 0x42, 0x73, 0xe4, 0xd5, 0x86, 0xb7,
+               0xb5, 0x84, 0xd7, 0xe6, 0x71, 0x40, 0x13, 0x22,
+       },
+       {
+               0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e,
+               0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13,
+               0x07, 0x35, 0x63, 0x51, 0xcf, 0xfd, 0xab, 0x99,
+               0x8a, 0xb8, 0xee, 0xdc, 0x42, 0x70, 0x26, 0x14,
+               0x0e, 0x3c, 0x6a, 0x58, 0xc6, 0xf4, 0xa2, 0x90,
+               0x83, 0xb1, 0xe7, 0xd5, 0x4b, 0x79, 0x2f, 0x1d,
+               0x09, 0x3b, 0x6d, 0x5f, 0xc1, 0xf3, 0xa5, 0x97,
+               0x84, 0xb6, 0xe0, 0xd2, 0x4c, 0x7e, 0x28, 0x1a,
+               0x1c, 0x2e, 0x78, 0x4a, 0xd4, 0xe6, 0xb0, 0x82,
+               0x91, 0xa3, 0xf5, 0xc7, 0x59, 0x6b, 0x3d, 0x0f,
+               0x1b, 0x29, 0x7f, 0x4d, 0xd3, 0xe1, 0xb7, 0x85,
+               0x96, 0xa4, 0xf2, 0xc0, 0x5e, 0x6c, 0x3a, 0x08,
+               0x12, 0x20, 0x76, 0x44, 0xda, 0xe8, 0xbe, 0x8c,
+               0x9f, 0xad, 0xfb, 0xc9, 0x57, 0x65, 0x33, 0x01,
+               0x15, 0x27, 0x71, 0x43, 0xdd, 0xef, 0xb9, 0x8b,
+               0x98, 0xaa, 0xfc, 0xce, 0x50, 0x62, 0x34, 0x06,
+               0x38, 0x0a, 0x5c, 0x6e, 0xf0, 0xc2, 0x94, 0xa6,
+               0xb5, 0x87, 0xd1, 0xe3, 0x7d, 0x4f, 0x19, 0x2b,
+               0x3f, 0x0d, 0x5b, 0x69, 0xf7, 0xc5, 0x93, 0xa1,
+               0xb2, 0x80, 0xd6, 0xe4, 0x7a, 0x48, 0x1e, 0x2c,
+               0x36, 0x04, 0x52, 0x60, 0xfe, 0xcc, 0x9a, 0xa8,
+               0xbb, 0x89, 0xdf, 0xed, 0x73, 0x41, 0x17, 0x25,
+               0x31, 0x03, 0x55, 0x67, 0xf9, 0xcb, 0x9d, 0xaf,
+               0xbc, 0x8e, 0xd8, 0xea, 0x74, 0x46, 0x10, 0x22,
+               0x24, 0x16, 0x40, 0x72, 0xec, 0xde, 0x88, 0xba,
+               0xa9, 0x9b, 0xcd, 0xff, 0x61, 0x53, 0x05, 0x37,
+               0x23, 0x11, 0x47, 0x75, 0xeb, 0xd9, 0x8f, 0xbd,
+               0xae, 0x9c, 0xca, 0xf8, 0x66, 0x54, 0x02, 0x30,
+               0x2a, 0x18, 0x4e, 0x7c, 0xe2, 0xd0, 0x86, 0xb4,
+               0xa7, 0x95, 0xc3, 0xf1, 0x6f, 0x5d, 0x0b, 0x39,
+               0x2d, 0x1f, 0x49, 0x7b, 0xe5, 0xd7, 0x81, 0xb3,
+               0xa0, 0x92, 0xc4, 0xf6, 0x68, 0x5a, 0x0c, 0x3e,
+       },
+       {
+               0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99,
+               0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c,
+               0x17, 0x24, 0x71, 0x42, 0xdb, 0xe8, 0xbd, 0x8e,
+               0x92, 0xa1, 0xf4, 0xc7, 0x5e, 0x6d, 0x38, 0x0b,
+               0x2e, 0x1d, 0x48, 0x7b, 0xe2, 0xd1, 0x84, 0xb7,
+               0xab, 0x98, 0xcd, 0xfe, 0x67, 0x54, 0x01, 0x32,
+               0x39, 0x0a, 0x5f, 0x6c, 0xf5, 0xc6, 0x93, 0xa0,
+               0xbc, 0x8f, 0xda, 0xe9, 0x70, 0x43, 0x16, 0x25,
+               0x5c, 0x6f, 0x3a, 0x09, 0x90, 0xa3, 0xf6, 0xc5,
+               0xd9, 0xea, 0xbf, 0x8c, 0x15, 0x26, 0x73, 0x40,
+               0x4b, 0x78, 0x2d, 0x1e, 0x87, 0xb4, 0xe1, 0xd2,
+               0xce, 0xfd, 0xa8, 0x9b, 0x02, 0x31, 0x64, 0x57,
+               0x72, 0x41, 0x14, 0x27, 0xbe, 0x8d, 0xd8, 0xeb,
+               0xf7, 0xc4, 0x91, 0xa2, 0x3b, 0x08, 0x5d, 0x6e,
+               0x65, 0x56, 0x03, 0x30, 0xa9, 0x9a, 0xcf, 0xfc,
+               0xe0, 0xd3, 0x86, 0xb5, 0x2c, 0x1f, 0x4a, 0x79,
+               0xb8, 0x8b, 0xde, 0xed, 0x74, 0x47, 0x12, 0x21,
+               0x3d, 0x0e, 0x5b, 0x68, 0xf1, 0xc2, 0x97, 0xa4,
+               0xaf, 0x9c, 0xc9, 0xfa, 0x63, 0x50, 0x05, 0x36,
+               0x2a, 0x19, 0x4c, 0x7f, 0xe6, 0xd5, 0x80, 0xb3,
+               0x96, 0xa5, 0xf0, 0xc3, 0x5a, 0x69, 0x3c, 0x0f,
+               0x13, 0x20, 0x75, 0x46, 0xdf, 0xec, 0xb9, 0x8a,
+               0x81, 0xb2, 0xe7, 0xd4, 0x4d, 0x7e, 0x2b, 0x18,
+               0x04, 0x37, 0x62, 0x51, 0xc8, 0xfb, 0xae, 0x9d,
+               0xe4, 0xd7, 0x82, 0xb1, 0x28, 0x1b, 0x4e, 0x7d,
+               0x61, 0x52, 0x07, 0x34, 0xad, 0x9e, 0xcb, 0xf8,
+               0xf3, 0xc0, 0x95, 0xa6, 0x3f, 0x0c, 0x59, 0x6a,
+               0x76, 0x45, 0x10, 0x23, 0xba, 0x89, 0xdc, 0xef,
+               0xca, 0xf9, 0xac, 0x9f, 0x06, 0x35, 0x60, 0x53,
+               0x4f, 0x7c, 0x29, 0x1a, 0x83, 0xb0, 0xe5, 0xd6,
+               0xdd, 0xee, 0xbb, 0x88, 0x11, 0x22, 0x77, 0x44,
+               0x58, 0x6b, 0x3e, 0x0d, 0x94, 0xa7, 0xf2, 0xc1,
+       },
+       {
+               0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c,
+               0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31,
+               0x67, 0x53, 0x0f, 0x3b, 0xb7, 0x83, 0xdf, 0xeb,
+               0xda, 0xee, 0xb2, 0x86, 0x0a, 0x3e, 0x62, 0x56,
+               0xce, 0xfa, 0xa6, 0x92, 0x1e, 0x2a, 0x76, 0x42,
+               0x73, 0x47, 0x1b, 0x2f, 0xa3, 0x97, 0xcb, 0xff,
+               0xa9, 0x9d, 0xc1, 0xf5, 0x79, 0x4d, 0x11, 0x25,
+               0x14, 0x20, 0x7c, 0x48, 0xc4, 0xf0, 0xac, 0x98,
+               0x81, 0xb5, 0xe9, 0xdd, 0x51, 0x65, 0x39, 0x0d,
+               0x3c, 0x08, 0x54, 0x60, 0xec, 0xd8, 0x84, 0xb0,
+               0xe6, 0xd2, 0x8e, 0xba, 0x36, 0x02, 0x5e, 0x6a,
+               0x5b, 0x6f, 0x33, 0x07, 0x8b, 0xbf, 0xe3, 0xd7,
+               0x4f, 0x7b, 0x27, 0x13, 0x9f, 0xab, 0xf7, 0xc3,
+               0xf2, 0xc6, 0x9a, 0xae, 0x22, 0x16, 0x4a, 0x7e,
+               0x28, 0x1c, 0x40, 0x74, 0xf8, 0xcc, 0x90, 0xa4,
+               0x95, 0xa1, 0xfd, 0xc9, 0x45, 0x71, 0x2d, 0x19,
+               0x1f, 0x2b, 0x77, 0x43, 0xcf, 0xfb, 0xa7, 0x93,
+               0xa2, 0x96, 0xca, 0xfe, 0x72, 0x46, 0x1a, 0x2e,
+               0x78, 0x4c, 0x10, 0x24, 0xa8, 0x9c, 0xc0, 0xf4,
+               0xc5, 0xf1, 0xad, 0x99, 0x15, 0x21, 0x7d, 0x49,
+               0xd1, 0xe5, 0xb9, 0x8d, 0x01, 0x35, 0x69, 0x5d,
+               0x6c, 0x58, 0x04, 0x30, 0xbc, 0x88, 0xd4, 0xe0,
+               0xb6, 0x82, 0xde, 0xea, 0x66, 0x52, 0x0e, 0x3a,
+               0x0b, 0x3f, 0x63, 0x57, 0xdb, 0xef, 0xb3, 0x87,
+               0x9e, 0xaa, 0xf6, 0xc2, 0x4e, 0x7a, 0x26, 0x12,
+               0x23, 0x17, 0x4b, 0x7f, 0xf3, 0xc7, 0x9b, 0xaf,
+               0xf9, 0xcd, 0x91, 0xa5, 0x29, 0x1d, 0x41, 0x75,
+               0x44, 0x70, 0x2c, 0x18, 0x94, 0xa0, 0xfc, 0xc8,
+               0x50, 0x64, 0x38, 0x0c, 0x80, 0xb4, 0xe8, 0xdc,
+               0xed, 0xd9, 0x85, 0xb1, 0x3d, 0x09, 0x55, 0x61,
+               0x37, 0x03, 0x5f, 0x6b, 0xe7, 0xd3, 0x8f, 0xbb,
+               0x8a, 0xbe, 0xe2, 0xd6, 0x5a, 0x6e, 0x32, 0x06,
+       },
+       {
+               0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b,
+               0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e,
+               0x77, 0x42, 0x1d, 0x28, 0xa3, 0x96, 0xc9, 0xfc,
+               0xc2, 0xf7, 0xa8, 0x9d, 0x16, 0x23, 0x7c, 0x49,
+               0xee, 0xdb, 0x84, 0xb1, 0x3a, 0x0f, 0x50, 0x65,
+               0x5b, 0x6e, 0x31, 0x04, 0x8f, 0xba, 0xe5, 0xd0,
+               0x99, 0xac, 0xf3, 0xc6, 0x4d, 0x78, 0x27, 0x12,
+               0x2c, 0x19, 0x46, 0x73, 0xf8, 0xcd, 0x92, 0xa7,
+               0xc1, 0xf4, 0xab, 0x9e, 0x15, 0x20, 0x7f, 0x4a,
+               0x74, 0x41, 0x1e, 0x2b, 0xa0, 0x95, 0xca, 0xff,
+               0xb6, 0x83, 0xdc, 0xe9, 0x62, 0x57, 0x08, 0x3d,
+               0x03, 0x36, 0x69, 0x5c, 0xd7, 0xe2, 0xbd, 0x88,
+               0x2f, 0x1a, 0x45, 0x70, 0xfb, 0xce, 0x91, 0xa4,
+               0x9a, 0xaf, 0xf0, 0xc5, 0x4e, 0x7b, 0x24, 0x11,
+               0x58, 0x6d, 0x32, 0x07, 0x8c, 0xb9, 0xe6, 0xd3,
+               0xed, 0xd8, 0x87, 0xb2, 0x39, 0x0c, 0x53, 0x66,
+               0x9f, 0xaa, 0xf5, 0xc0, 0x4b, 0x7e, 0x21, 0x14,
+               0x2a, 0x1f, 0x40, 0x75, 0xfe, 0xcb, 0x94, 0xa1,
+               0xe8, 0xdd, 0x82, 0xb7, 0x3c, 0x09, 0x56, 0x63,
+               0x5d, 0x68, 0x37, 0x02, 0x89, 0xbc, 0xe3, 0xd6,
+               0x71, 0x44, 0x1b, 0x2e, 0xa5, 0x90, 0xcf, 0xfa,
+               0xc4, 0xf1, 0xae, 0x9b, 0x10, 0x25, 0x7a, 0x4f,
+               0x06, 0x33, 0x6c, 0x59, 0xd2, 0xe7, 0xb8, 0x8d,
+               0xb3, 0x86, 0xd9, 0xec, 0x67, 0x52, 0x0d, 0x38,
+               0x5e, 0x6b, 0x34, 0x01, 0x8a, 0xbf, 0xe0, 0xd5,
+               0xeb, 0xde, 0x81, 0xb4, 0x3f, 0x0a, 0x55, 0x60,
+               0x29, 0x1c, 0x43, 0x76, 0xfd, 0xc8, 0x97, 0xa2,
+               0x9c, 0xa9, 0xf6, 0xc3, 0x48, 0x7d, 0x22, 0x17,
+               0xb0, 0x85, 0xda, 0xef, 0x64, 0x51, 0x0e, 0x3b,
+               0x05, 0x30, 0x6f, 0x5a, 0xd1, 0xe4, 0xbb, 0x8e,
+               0xc7, 0xf2, 0xad, 0x98, 0x13, 0x26, 0x79, 0x4c,
+               0x72, 0x47, 0x18, 0x2d, 0xa6, 0x93, 0xcc, 0xf9,
+       },
+       {
+               0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82,
+               0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f,
+               0x47, 0x71, 0x2b, 0x1d, 0x9f, 0xa9, 0xf3, 0xc5,
+               0xea, 0xdc, 0x86, 0xb0, 0x32, 0x04, 0x5e, 0x68,
+               0x8e, 0xb8, 0xe2, 0xd4, 0x56, 0x60, 0x3a, 0x0c,
+               0x23, 0x15, 0x4f, 0x79, 0xfb, 0xcd, 0x97, 0xa1,
+               0xc9, 0xff, 0xa5, 0x93, 0x11, 0x27, 0x7d, 0x4b,
+               0x64, 0x52, 0x08, 0x3e, 0xbc, 0x8a, 0xd0, 0xe6,
+               0x01, 0x37, 0x6d, 0x5b, 0xd9, 0xef, 0xb5, 0x83,
+               0xac, 0x9a, 0xc0, 0xf6, 0x74, 0x42, 0x18, 0x2e,
+               0x46, 0x70, 0x2a, 0x1c, 0x9e, 0xa8, 0xf2, 0xc4,
+               0xeb, 0xdd, 0x87, 0xb1, 0x33, 0x05, 0x5f, 0x69,
+               0x8f, 0xb9, 0xe3, 0xd5, 0x57, 0x61, 0x3b, 0x0d,
+               0x22, 0x14, 0x4e, 0x78, 0xfa, 0xcc, 0x96, 0xa0,
+               0xc8, 0xfe, 0xa4, 0x92, 0x10, 0x26, 0x7c, 0x4a,
+               0x65, 0x53, 0x09, 0x3f, 0xbd, 0x8b, 0xd1, 0xe7,
+               0x02, 0x34, 0x6e, 0x58, 0xda, 0xec, 0xb6, 0x80,
+               0xaf, 0x99, 0xc3, 0xf5, 0x77, 0x41, 0x1b, 0x2d,
+               0x45, 0x73, 0x29, 0x1f, 0x9d, 0xab, 0xf1, 0xc7,
+               0xe8, 0xde, 0x84, 0xb2, 0x30, 0x06, 0x5c, 0x6a,
+               0x8c, 0xba, 0xe0, 0xd6, 0x54, 0x62, 0x38, 0x0e,
+               0x21, 0x17, 0x4d, 0x7b, 0xf9, 0xcf, 0x95, 0xa3,
+               0xcb, 0xfd, 0xa7, 0x91, 0x13, 0x25, 0x7f, 0x49,
+               0x66, 0x50, 0x0a, 0x3c, 0xbe, 0x88, 0xd2, 0xe4,
+               0x03, 0x35, 0x6f, 0x59, 0xdb, 0xed, 0xb7, 0x81,
+               0xae, 0x98, 0xc2, 0xf4, 0x76, 0x40, 0x1a, 0x2c,
+               0x44, 0x72, 0x28, 0x1e, 0x9c, 0xaa, 0xf0, 0xc6,
+               0xe9, 0xdf, 0x85, 0xb3, 0x31, 0x07, 0x5d, 0x6b,
+               0x8d, 0xbb, 0xe1, 0xd7, 0x55, 0x63, 0x39, 0x0f,
+               0x20, 0x16, 0x4c, 0x7a, 0xf8, 0xce, 0x94, 0xa2,
+               0xca, 0xfc, 0xa6, 0x90, 0x12, 0x24, 0x7e, 0x48,
+               0x67, 0x51, 0x0b, 0x3d, 0xbf, 0x89, 0xd3, 0xe5,
+       },
+       {
+               0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85,
+               0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20,
+               0x57, 0x60, 0x39, 0x0e, 0x8b, 0xbc, 0xe5, 0xd2,
+               0xf2, 0xc5, 0x9c, 0xab, 0x2e, 0x19, 0x40, 0x77,
+               0xae, 0x99, 0xc0, 0xf7, 0x72, 0x45, 0x1c, 0x2b,
+               0x0b, 0x3c, 0x65, 0x52, 0xd7, 0xe0, 0xb9, 0x8e,
+               0xf9, 0xce, 0x97, 0xa0, 0x25, 0x12, 0x4b, 0x7c,
+               0x5c, 0x6b, 0x32, 0x05, 0x80, 0xb7, 0xee, 0xd9,
+               0x41, 0x76, 0x2f, 0x18, 0x9d, 0xaa, 0xf3, 0xc4,
+               0xe4, 0xd3, 0x8a, 0xbd, 0x38, 0x0f, 0x56, 0x61,
+               0x16, 0x21, 0x78, 0x4f, 0xca, 0xfd, 0xa4, 0x93,
+               0xb3, 0x84, 0xdd, 0xea, 0x6f, 0x58, 0x01, 0x36,
+               0xef, 0xd8, 0x81, 0xb6, 0x33, 0x04, 0x5d, 0x6a,
+               0x4a, 0x7d, 0x24, 0x13, 0x96, 0xa1, 0xf8, 0xcf,
+               0xb8, 0x8f, 0xd6, 0xe1, 0x64, 0x53, 0x0a, 0x3d,
+               0x1d, 0x2a, 0x73, 0x44, 0xc1, 0xf6, 0xaf, 0x98,
+               0x82, 0xb5, 0xec, 0xdb, 0x5e, 0x69, 0x30, 0x07,
+               0x27, 0x10, 0x49, 0x7e, 0xfb, 0xcc, 0x95, 0xa2,
+               0xd5, 0xe2, 0xbb, 0x8c, 0x09, 0x3e, 0x67, 0x50,
+               0x70, 0x47, 0x1e, 0x29, 0xac, 0x9b, 0xc2, 0xf5,
+               0x2c, 0x1b, 0x42, 0x75, 0xf0, 0xc7, 0x9e, 0xa9,
+               0x89, 0xbe, 0xe7, 0xd0, 0x55, 0x62, 0x3b, 0x0c,
+               0x7b, 0x4c, 0x15, 0x22, 0xa7, 0x90, 0xc9, 0xfe,
+               0xde, 0xe9, 0xb0, 0x87, 0x02, 0x35, 0x6c, 0x5b,
+               0xc3, 0xf4, 0xad, 0x9a, 0x1f, 0x28, 0x71, 0x46,
+               0x66, 0x51, 0x08, 0x3f, 0xba, 0x8d, 0xd4, 0xe3,
+               0x94, 0xa3, 0xfa, 0xcd, 0x48, 0x7f, 0x26, 0x11,
+               0x31, 0x06, 0x5f, 0x68, 0xed, 0xda, 0x83, 0xb4,
+               0x6d, 0x5a, 0x03, 0x34, 0xb1, 0x86, 0xdf, 0xe8,
+               0xc8, 0xff, 0xa6, 0x91, 0x14, 0x23, 0x7a, 0x4d,
+               0x3a, 0x0d, 0x54, 0x63, 0xe6, 0xd1, 0x88, 0xbf,
+               0x9f, 0xa8, 0xf1, 0xc6, 0x43, 0x74, 0x2d, 0x1a,
+       },
+       {
+               0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8,
+               0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75,
+               0xa7, 0x9f, 0xd7, 0xef, 0x47, 0x7f, 0x37, 0x0f,
+               0x7a, 0x42, 0x0a, 0x32, 0x9a, 0xa2, 0xea, 0xd2,
+               0x53, 0x6b, 0x23, 0x1b, 0xb3, 0x8b, 0xc3, 0xfb,
+               0x8e, 0xb6, 0xfe, 0xc6, 0x6e, 0x56, 0x1e, 0x26,
+               0xf4, 0xcc, 0x84, 0xbc, 0x14, 0x2c, 0x64, 0x5c,
+               0x29, 0x11, 0x59, 0x61, 0xc9, 0xf1, 0xb9, 0x81,
+               0xa6, 0x9e, 0xd6, 0xee, 0x46, 0x7e, 0x36, 0x0e,
+               0x7b, 0x43, 0x0b, 0x33, 0x9b, 0xa3, 0xeb, 0xd3,
+               0x01, 0x39, 0x71, 0x49, 0xe1, 0xd9, 0x91, 0xa9,
+               0xdc, 0xe4, 0xac, 0x94, 0x3c, 0x04, 0x4c, 0x74,
+               0xf5, 0xcd, 0x85, 0xbd, 0x15, 0x2d, 0x65, 0x5d,
+               0x28, 0x10, 0x58, 0x60, 0xc8, 0xf0, 0xb8, 0x80,
+               0x52, 0x6a, 0x22, 0x1a, 0xb2, 0x8a, 0xc2, 0xfa,
+               0x8f, 0xb7, 0xff, 0xc7, 0x6f, 0x57, 0x1f, 0x27,
+               0x51, 0x69, 0x21, 0x19, 0xb1, 0x89, 0xc1, 0xf9,
+               0x8c, 0xb4, 0xfc, 0xc4, 0x6c, 0x54, 0x1c, 0x24,
+               0xf6, 0xce, 0x86, 0xbe, 0x16, 0x2e, 0x66, 0x5e,
+               0x2b, 0x13, 0x5b, 0x63, 0xcb, 0xf3, 0xbb, 0x83,
+               0x02, 0x3a, 0x72, 0x4a, 0xe2, 0xda, 0x92, 0xaa,
+               0xdf, 0xe7, 0xaf, 0x97, 0x3f, 0x07, 0x4f, 0x77,
+               0xa5, 0x9d, 0xd5, 0xed, 0x45, 0x7d, 0x35, 0x0d,
+               0x78, 0x40, 0x08, 0x30, 0x98, 0xa0, 0xe8, 0xd0,
+               0xf7, 0xcf, 0x87, 0xbf, 0x17, 0x2f, 0x67, 0x5f,
+               0x2a, 0x12, 0x5a, 0x62, 0xca, 0xf2, 0xba, 0x82,
+               0x50, 0x68, 0x20, 0x18, 0xb0, 0x88, 0xc0, 0xf8,
+               0x8d, 0xb5, 0xfd, 0xc5, 0x6d, 0x55, 0x1d, 0x25,
+               0xa4, 0x9c, 0xd4, 0xec, 0x44, 0x7c, 0x34, 0x0c,
+               0x79, 0x41, 0x09, 0x31, 0x99, 0xa1, 0xe9, 0xd1,
+               0x03, 0x3b, 0x73, 0x4b, 0xe3, 0xdb, 0x93, 0xab,
+               0xde, 0xe6, 0xae, 0x96, 0x3e, 0x06, 0x4e, 0x76,
+       },
+       {
+               0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf,
+               0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a,
+               0xb7, 0x8e, 0xc5, 0xfc, 0x53, 0x6a, 0x21, 0x18,
+               0x62, 0x5b, 0x10, 0x29, 0x86, 0xbf, 0xf4, 0xcd,
+               0x73, 0x4a, 0x01, 0x38, 0x97, 0xae, 0xe5, 0xdc,
+               0xa6, 0x9f, 0xd4, 0xed, 0x42, 0x7b, 0x30, 0x09,
+               0xc4, 0xfd, 0xb6, 0x8f, 0x20, 0x19, 0x52, 0x6b,
+               0x11, 0x28, 0x63, 0x5a, 0xf5, 0xcc, 0x87, 0xbe,
+               0xe6, 0xdf, 0x94, 0xad, 0x02, 0x3b, 0x70, 0x49,
+               0x33, 0x0a, 0x41, 0x78, 0xd7, 0xee, 0xa5, 0x9c,
+               0x51, 0x68, 0x23, 0x1a, 0xb5, 0x8c, 0xc7, 0xfe,
+               0x84, 0xbd, 0xf6, 0xcf, 0x60, 0x59, 0x12, 0x2b,
+               0x95, 0xac, 0xe7, 0xde, 0x71, 0x48, 0x03, 0x3a,
+               0x40, 0x79, 0x32, 0x0b, 0xa4, 0x9d, 0xd6, 0xef,
+               0x22, 0x1b, 0x50, 0x69, 0xc6, 0xff, 0xb4, 0x8d,
+               0xf7, 0xce, 0x85, 0xbc, 0x13, 0x2a, 0x61, 0x58,
+               0xd1, 0xe8, 0xa3, 0x9a, 0x35, 0x0c, 0x47, 0x7e,
+               0x04, 0x3d, 0x76, 0x4f, 0xe0, 0xd9, 0x92, 0xab,
+               0x66, 0x5f, 0x14, 0x2d, 0x82, 0xbb, 0xf0, 0xc9,
+               0xb3, 0x8a, 0xc1, 0xf8, 0x57, 0x6e, 0x25, 0x1c,
+               0xa2, 0x9b, 0xd0, 0xe9, 0x46, 0x7f, 0x34, 0x0d,
+               0x77, 0x4e, 0x05, 0x3c, 0x93, 0xaa, 0xe1, 0xd8,
+               0x15, 0x2c, 0x67, 0x5e, 0xf1, 0xc8, 0x83, 0xba,
+               0xc0, 0xf9, 0xb2, 0x8b, 0x24, 0x1d, 0x56, 0x6f,
+               0x37, 0x0e, 0x45, 0x7c, 0xd3, 0xea, 0xa1, 0x98,
+               0xe2, 0xdb, 0x90, 0xa9, 0x06, 0x3f, 0x74, 0x4d,
+               0x80, 0xb9, 0xf2, 0xcb, 0x64, 0x5d, 0x16, 0x2f,
+               0x55, 0x6c, 0x27, 0x1e, 0xb1, 0x88, 0xc3, 0xfa,
+               0x44, 0x7d, 0x36, 0x0f, 0xa0, 0x99, 0xd2, 0xeb,
+               0x91, 0xa8, 0xe3, 0xda, 0x75, 0x4c, 0x07, 0x3e,
+               0xf3, 0xca, 0x81, 0xb8, 0x17, 0x2e, 0x65, 0x5c,
+               0x26, 0x1f, 0x54, 0x6d, 0xc2, 0xfb, 0xb0, 0x89,
+       },
+       {
+               0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+               0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b,
+               0x87, 0xbd, 0xf3, 0xc9, 0x6f, 0x55, 0x1b, 0x21,
+               0x4a, 0x70, 0x3e, 0x04, 0xa2, 0x98, 0xd6, 0xec,
+               0x13, 0x29, 0x67, 0x5d, 0xfb, 0xc1, 0x8f, 0xb5,
+               0xde, 0xe4, 0xaa, 0x90, 0x36, 0x0c, 0x42, 0x78,
+               0x94, 0xae, 0xe0, 0xda, 0x7c, 0x46, 0x08, 0x32,
+               0x59, 0x63, 0x2d, 0x17, 0xb1, 0x8b, 0xc5, 0xff,
+               0x26, 0x1c, 0x52, 0x68, 0xce, 0xf4, 0xba, 0x80,
+               0xeb, 0xd1, 0x9f, 0xa5, 0x03, 0x39, 0x77, 0x4d,
+               0xa1, 0x9b, 0xd5, 0xef, 0x49, 0x73, 0x3d, 0x07,
+               0x6c, 0x56, 0x18, 0x22, 0x84, 0xbe, 0xf0, 0xca,
+               0x35, 0x0f, 0x41, 0x7b, 0xdd, 0xe7, 0xa9, 0x93,
+               0xf8, 0xc2, 0x8c, 0xb6, 0x10, 0x2a, 0x64, 0x5e,
+               0xb2, 0x88, 0xc6, 0xfc, 0x5a, 0x60, 0x2e, 0x14,
+               0x7f, 0x45, 0x0b, 0x31, 0x97, 0xad, 0xe3, 0xd9,
+               0x4c, 0x76, 0x38, 0x02, 0xa4, 0x9e, 0xd0, 0xea,
+               0x81, 0xbb, 0xf5, 0xcf, 0x69, 0x53, 0x1d, 0x27,
+               0xcb, 0xf1, 0xbf, 0x85, 0x23, 0x19, 0x57, 0x6d,
+               0x06, 0x3c, 0x72, 0x48, 0xee, 0xd4, 0x9a, 0xa0,
+               0x5f, 0x65, 0x2b, 0x11, 0xb7, 0x8d, 0xc3, 0xf9,
+               0x92, 0xa8, 0xe6, 0xdc, 0x7a, 0x40, 0x0e, 0x34,
+               0xd8, 0xe2, 0xac, 0x96, 0x30, 0x0a, 0x44, 0x7e,
+               0x15, 0x2f, 0x61, 0x5b, 0xfd, 0xc7, 0x89, 0xb3,
+               0x6a, 0x50, 0x1e, 0x24, 0x82, 0xb8, 0xf6, 0xcc,
+               0xa7, 0x9d, 0xd3, 0xe9, 0x4f, 0x75, 0x3b, 0x01,
+               0xed, 0xd7, 0x99, 0xa3, 0x05, 0x3f, 0x71, 0x4b,
+               0x20, 0x1a, 0x54, 0x6e, 0xc8, 0xf2, 0xbc, 0x86,
+               0x79, 0x43, 0x0d, 0x37, 0x91, 0xab, 0xe5, 0xdf,
+               0xb4, 0x8e, 0xc0, 0xfa, 0x5c, 0x66, 0x28, 0x12,
+               0xfe, 0xc4, 0x8a, 0xb0, 0x16, 0x2c, 0x62, 0x58,
+               0x33, 0x09, 0x47, 0x7d, 0xdb, 0xe1, 0xaf, 0x95,
+       },
+       {
+               0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1,
+               0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64,
+               0x97, 0xac, 0xe1, 0xda, 0x7b, 0x40, 0x0d, 0x36,
+               0x52, 0x69, 0x24, 0x1f, 0xbe, 0x85, 0xc8, 0xf3,
+               0x33, 0x08, 0x45, 0x7e, 0xdf, 0xe4, 0xa9, 0x92,
+               0xf6, 0xcd, 0x80, 0xbb, 0x1a, 0x21, 0x6c, 0x57,
+               0xa4, 0x9f, 0xd2, 0xe9, 0x48, 0x73, 0x3e, 0x05,
+               0x61, 0x5a, 0x17, 0x2c, 0x8d, 0xb6, 0xfb, 0xc0,
+               0x66, 0x5d, 0x10, 0x2b, 0x8a, 0xb1, 0xfc, 0xc7,
+               0xa3, 0x98, 0xd5, 0xee, 0x4f, 0x74, 0x39, 0x02,
+               0xf1, 0xca, 0x87, 0xbc, 0x1d, 0x26, 0x6b, 0x50,
+               0x34, 0x0f, 0x42, 0x79, 0xd8, 0xe3, 0xae, 0x95,
+               0x55, 0x6e, 0x23, 0x18, 0xb9, 0x82, 0xcf, 0xf4,
+               0x90, 0xab, 0xe6, 0xdd, 0x7c, 0x47, 0x0a, 0x31,
+               0xc2, 0xf9, 0xb4, 0x8f, 0x2e, 0x15, 0x58, 0x63,
+               0x07, 0x3c, 0x71, 0x4a, 0xeb, 0xd0, 0x9d, 0xa6,
+               0xcc, 0xf7, 0xba, 0x81, 0x20, 0x1b, 0x56, 0x6d,
+               0x09, 0x32, 0x7f, 0x44, 0xe5, 0xde, 0x93, 0xa8,
+               0x5b, 0x60, 0x2d, 0x16, 0xb7, 0x8c, 0xc1, 0xfa,
+               0x9e, 0xa5, 0xe8, 0xd3, 0x72, 0x49, 0x04, 0x3f,
+               0xff, 0xc4, 0x89, 0xb2, 0x13, 0x28, 0x65, 0x5e,
+               0x3a, 0x01, 0x4c, 0x77, 0xd6, 0xed, 0xa0, 0x9b,
+               0x68, 0x53, 0x1e, 0x25, 0x84, 0xbf, 0xf2, 0xc9,
+               0xad, 0x96, 0xdb, 0xe0, 0x41, 0x7a, 0x37, 0x0c,
+               0xaa, 0x91, 0xdc, 0xe7, 0x46, 0x7d, 0x30, 0x0b,
+               0x6f, 0x54, 0x19, 0x22, 0x83, 0xb8, 0xf5, 0xce,
+               0x3d, 0x06, 0x4b, 0x70, 0xd1, 0xea, 0xa7, 0x9c,
+               0xf8, 0xc3, 0x8e, 0xb5, 0x14, 0x2f, 0x62, 0x59,
+               0x99, 0xa2, 0xef, 0xd4, 0x75, 0x4e, 0x03, 0x38,
+               0x5c, 0x67, 0x2a, 0x11, 0xb0, 0x8b, 0xc6, 0xfd,
+               0x0e, 0x35, 0x78, 0x43, 0xe2, 0xd9, 0x94, 0xaf,
+               0xcb, 0xf0, 0xbd, 0x86, 0x27, 0x1c, 0x51, 0x6a,
+       },
+       {
+               0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4,
+               0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49,
+               0xe7, 0xdb, 0x9f, 0xa3, 0x17, 0x2b, 0x6f, 0x53,
+               0x1a, 0x26, 0x62, 0x5e, 0xea, 0xd6, 0x92, 0xae,
+               0xd3, 0xef, 0xab, 0x97, 0x23, 0x1f, 0x5b, 0x67,
+               0x2e, 0x12, 0x56, 0x6a, 0xde, 0xe2, 0xa6, 0x9a,
+               0x34, 0x08, 0x4c, 0x70, 0xc4, 0xf8, 0xbc, 0x80,
+               0xc9, 0xf5, 0xb1, 0x8d, 0x39, 0x05, 0x41, 0x7d,
+               0xbb, 0x87, 0xc3, 0xff, 0x4b, 0x77, 0x33, 0x0f,
+               0x46, 0x7a, 0x3e, 0x02, 0xb6, 0x8a, 0xce, 0xf2,
+               0x5c, 0x60, 0x24, 0x18, 0xac, 0x90, 0xd4, 0xe8,
+               0xa1, 0x9d, 0xd9, 0xe5, 0x51, 0x6d, 0x29, 0x15,
+               0x68, 0x54, 0x10, 0x2c, 0x98, 0xa4, 0xe0, 0xdc,
+               0x95, 0xa9, 0xed, 0xd1, 0x65, 0x59, 0x1d, 0x21,
+               0x8f, 0xb3, 0xf7, 0xcb, 0x7f, 0x43, 0x07, 0x3b,
+               0x72, 0x4e, 0x0a, 0x36, 0x82, 0xbe, 0xfa, 0xc6,
+               0x6b, 0x57, 0x13, 0x2f, 0x9b, 0xa7, 0xe3, 0xdf,
+               0x96, 0xaa, 0xee, 0xd2, 0x66, 0x5a, 0x1e, 0x22,
+               0x8c, 0xb0, 0xf4, 0xc8, 0x7c, 0x40, 0x04, 0x38,
+               0x71, 0x4d, 0x09, 0x35, 0x81, 0xbd, 0xf9, 0xc5,
+               0xb8, 0x84, 0xc0, 0xfc, 0x48, 0x74, 0x30, 0x0c,
+               0x45, 0x79, 0x3d, 0x01, 0xb5, 0x89, 0xcd, 0xf1,
+               0x5f, 0x63, 0x27, 0x1b, 0xaf, 0x93, 0xd7, 0xeb,
+               0xa2, 0x9e, 0xda, 0xe6, 0x52, 0x6e, 0x2a, 0x16,
+               0xd0, 0xec, 0xa8, 0x94, 0x20, 0x1c, 0x58, 0x64,
+               0x2d, 0x11, 0x55, 0x69, 0xdd, 0xe1, 0xa5, 0x99,
+               0x37, 0x0b, 0x4f, 0x73, 0xc7, 0xfb, 0xbf, 0x83,
+               0xca, 0xf6, 0xb2, 0x8e, 0x3a, 0x06, 0x42, 0x7e,
+               0x03, 0x3f, 0x7b, 0x47, 0xf3, 0xcf, 0x8b, 0xb7,
+               0xfe, 0xc2, 0x86, 0xba, 0x0e, 0x32, 0x76, 0x4a,
+               0xe4, 0xd8, 0x9c, 0xa0, 0x14, 0x28, 0x6c, 0x50,
+               0x19, 0x25, 0x61, 0x5d, 0xe9, 0xd5, 0x91, 0xad,
+       },
+       {
+               0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3,
+               0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46,
+               0xf7, 0xca, 0x8d, 0xb0, 0x03, 0x3e, 0x79, 0x44,
+               0x02, 0x3f, 0x78, 0x45, 0xf6, 0xcb, 0x8c, 0xb1,
+               0xf3, 0xce, 0x89, 0xb4, 0x07, 0x3a, 0x7d, 0x40,
+               0x06, 0x3b, 0x7c, 0x41, 0xf2, 0xcf, 0x88, 0xb5,
+               0x04, 0x39, 0x7e, 0x43, 0xf0, 0xcd, 0x8a, 0xb7,
+               0xf1, 0xcc, 0x8b, 0xb6, 0x05, 0x38, 0x7f, 0x42,
+               0xfb, 0xc6, 0x81, 0xbc, 0x0f, 0x32, 0x75, 0x48,
+               0x0e, 0x33, 0x74, 0x49, 0xfa, 0xc7, 0x80, 0xbd,
+               0x0c, 0x31, 0x76, 0x4b, 0xf8, 0xc5, 0x82, 0xbf,
+               0xf9, 0xc4, 0x83, 0xbe, 0x0d, 0x30, 0x77, 0x4a,
+               0x08, 0x35, 0x72, 0x4f, 0xfc, 0xc1, 0x86, 0xbb,
+               0xfd, 0xc0, 0x87, 0xba, 0x09, 0x34, 0x73, 0x4e,
+               0xff, 0xc2, 0x85, 0xb8, 0x0b, 0x36, 0x71, 0x4c,
+               0x0a, 0x37, 0x70, 0x4d, 0xfe, 0xc3, 0x84, 0xb9,
+               0xeb, 0xd6, 0x91, 0xac, 0x1f, 0x22, 0x65, 0x58,
+               0x1e, 0x23, 0x64, 0x59, 0xea, 0xd7, 0x90, 0xad,
+               0x1c, 0x21, 0x66, 0x5b, 0xe8, 0xd5, 0x92, 0xaf,
+               0xe9, 0xd4, 0x93, 0xae, 0x1d, 0x20, 0x67, 0x5a,
+               0x18, 0x25, 0x62, 0x5f, 0xec, 0xd1, 0x96, 0xab,
+               0xed, 0xd0, 0x97, 0xaa, 0x19, 0x24, 0x63, 0x5e,
+               0xef, 0xd2, 0x95, 0xa8, 0x1b, 0x26, 0x61, 0x5c,
+               0x1a, 0x27, 0x60, 0x5d, 0xee, 0xd3, 0x94, 0xa9,
+               0x10, 0x2d, 0x6a, 0x57, 0xe4, 0xd9, 0x9e, 0xa3,
+               0xe5, 0xd8, 0x9f, 0xa2, 0x11, 0x2c, 0x6b, 0x56,
+               0xe7, 0xda, 0x9d, 0xa0, 0x13, 0x2e, 0x69, 0x54,
+               0x12, 0x2f, 0x68, 0x55, 0xe6, 0xdb, 0x9c, 0xa1,
+               0xe3, 0xde, 0x99, 0xa4, 0x17, 0x2a, 0x6d, 0x50,
+               0x16, 0x2b, 0x6c, 0x51, 0xe2, 0xdf, 0x98, 0xa5,
+               0x14, 0x29, 0x6e, 0x53, 0xe0, 0xdd, 0x9a, 0xa7,
+               0xe1, 0xdc, 0x9b, 0xa6, 0x15, 0x28, 0x6f, 0x52,
+       },
+       {
+               0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba,
+               0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57,
+               0xc7, 0xf9, 0xbb, 0x85, 0x3f, 0x01, 0x43, 0x7d,
+               0x2a, 0x14, 0x56, 0x68, 0xd2, 0xec, 0xae, 0x90,
+               0x93, 0xad, 0xef, 0xd1, 0x6b, 0x55, 0x17, 0x29,
+               0x7e, 0x40, 0x02, 0x3c, 0x86, 0xb8, 0xfa, 0xc4,
+               0x54, 0x6a, 0x28, 0x16, 0xac, 0x92, 0xd0, 0xee,
+               0xb9, 0x87, 0xc5, 0xfb, 0x41, 0x7f, 0x3d, 0x03,
+               0x3b, 0x05, 0x47, 0x79, 0xc3, 0xfd, 0xbf, 0x81,
+               0xd6, 0xe8, 0xaa, 0x94, 0x2e, 0x10, 0x52, 0x6c,
+               0xfc, 0xc2, 0x80, 0xbe, 0x04, 0x3a, 0x78, 0x46,
+               0x11, 0x2f, 0x6d, 0x53, 0xe9, 0xd7, 0x95, 0xab,
+               0xa8, 0x96, 0xd4, 0xea, 0x50, 0x6e, 0x2c, 0x12,
+               0x45, 0x7b, 0x39, 0x07, 0xbd, 0x83, 0xc1, 0xff,
+               0x6f, 0x51, 0x13, 0x2d, 0x97, 0xa9, 0xeb, 0xd5,
+               0x82, 0xbc, 0xfe, 0xc0, 0x7a, 0x44, 0x06, 0x38,
+               0x76, 0x48, 0x0a, 0x34, 0x8e, 0xb0, 0xf2, 0xcc,
+               0x9b, 0xa5, 0xe7, 0xd9, 0x63, 0x5d, 0x1f, 0x21,
+               0xb1, 0x8f, 0xcd, 0xf3, 0x49, 0x77, 0x35, 0x0b,
+               0x5c, 0x62, 0x20, 0x1e, 0xa4, 0x9a, 0xd8, 0xe6,
+               0xe5, 0xdb, 0x99, 0xa7, 0x1d, 0x23, 0x61, 0x5f,
+               0x08, 0x36, 0x74, 0x4a, 0xf0, 0xce, 0x8c, 0xb2,
+               0x22, 0x1c, 0x5e, 0x60, 0xda, 0xe4, 0xa6, 0x98,
+               0xcf, 0xf1, 0xb3, 0x8d, 0x37, 0x09, 0x4b, 0x75,
+               0x4d, 0x73, 0x31, 0x0f, 0xb5, 0x8b, 0xc9, 0xf7,
+               0xa0, 0x9e, 0xdc, 0xe2, 0x58, 0x66, 0x24, 0x1a,
+               0x8a, 0xb4, 0xf6, 0xc8, 0x72, 0x4c, 0x0e, 0x30,
+               0x67, 0x59, 0x1b, 0x25, 0x9f, 0xa1, 0xe3, 0xdd,
+               0xde, 0xe0, 0xa2, 0x9c, 0x26, 0x18, 0x5a, 0x64,
+               0x33, 0x0d, 0x4f, 0x71, 0xcb, 0xf5, 0xb7, 0x89,
+               0x19, 0x27, 0x65, 0x5b, 0xe1, 0xdf, 0x9d, 0xa3,
+               0xf4, 0xca, 0x88, 0xb6, 0x0c, 0x32, 0x70, 0x4e,
+       },
+       {
+               0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd,
+               0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58,
+               0xd7, 0xe8, 0xa9, 0x96, 0x2b, 0x14, 0x55, 0x6a,
+               0x32, 0x0d, 0x4c, 0x73, 0xce, 0xf1, 0xb0, 0x8f,
+               0xb3, 0x8c, 0xcd, 0xf2, 0x4f, 0x70, 0x31, 0x0e,
+               0x56, 0x69, 0x28, 0x17, 0xaa, 0x95, 0xd4, 0xeb,
+               0x64, 0x5b, 0x1a, 0x25, 0x98, 0xa7, 0xe6, 0xd9,
+               0x81, 0xbe, 0xff, 0xc0, 0x7d, 0x42, 0x03, 0x3c,
+               0x7b, 0x44, 0x05, 0x3a, 0x87, 0xb8, 0xf9, 0xc6,
+               0x9e, 0xa1, 0xe0, 0xdf, 0x62, 0x5d, 0x1c, 0x23,
+               0xac, 0x93, 0xd2, 0xed, 0x50, 0x6f, 0x2e, 0x11,
+               0x49, 0x76, 0x37, 0x08, 0xb5, 0x8a, 0xcb, 0xf4,
+               0xc8, 0xf7, 0xb6, 0x89, 0x34, 0x0b, 0x4a, 0x75,
+               0x2d, 0x12, 0x53, 0x6c, 0xd1, 0xee, 0xaf, 0x90,
+               0x1f, 0x20, 0x61, 0x5e, 0xe3, 0xdc, 0x9d, 0xa2,
+               0xfa, 0xc5, 0x84, 0xbb, 0x06, 0x39, 0x78, 0x47,
+               0xf6, 0xc9, 0x88, 0xb7, 0x0a, 0x35, 0x74, 0x4b,
+               0x13, 0x2c, 0x6d, 0x52, 0xef, 0xd0, 0x91, 0xae,
+               0x21, 0x1e, 0x5f, 0x60, 0xdd, 0xe2, 0xa3, 0x9c,
+               0xc4, 0xfb, 0xba, 0x85, 0x38, 0x07, 0x46, 0x79,
+               0x45, 0x7a, 0x3b, 0x04, 0xb9, 0x86, 0xc7, 0xf8,
+               0xa0, 0x9f, 0xde, 0xe1, 0x5c, 0x63, 0x22, 0x1d,
+               0x92, 0xad, 0xec, 0xd3, 0x6e, 0x51, 0x10, 0x2f,
+               0x77, 0x48, 0x09, 0x36, 0x8b, 0xb4, 0xf5, 0xca,
+               0x8d, 0xb2, 0xf3, 0xcc, 0x71, 0x4e, 0x0f, 0x30,
+               0x68, 0x57, 0x16, 0x29, 0x94, 0xab, 0xea, 0xd5,
+               0x5a, 0x65, 0x24, 0x1b, 0xa6, 0x99, 0xd8, 0xe7,
+               0xbf, 0x80, 0xc1, 0xfe, 0x43, 0x7c, 0x3d, 0x02,
+               0x3e, 0x01, 0x40, 0x7f, 0xc2, 0xfd, 0xbc, 0x83,
+               0xdb, 0xe4, 0xa5, 0x9a, 0x27, 0x18, 0x59, 0x66,
+               0xe9, 0xd6, 0x97, 0xa8, 0x15, 0x2a, 0x6b, 0x54,
+               0x0c, 0x33, 0x72, 0x4d, 0xf0, 0xcf, 0x8e, 0xb1,
+       },
+       {
+               0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd,
+               0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7,
+               0x74, 0x34, 0xf4, 0xb4, 0x69, 0x29, 0xe9, 0xa9,
+               0x4e, 0x0e, 0xce, 0x8e, 0x53, 0x13, 0xd3, 0x93,
+               0xe8, 0xa8, 0x68, 0x28, 0xf5, 0xb5, 0x75, 0x35,
+               0xd2, 0x92, 0x52, 0x12, 0xcf, 0x8f, 0x4f, 0x0f,
+               0x9c, 0xdc, 0x1c, 0x5c, 0x81, 0xc1, 0x01, 0x41,
+               0xa6, 0xe6, 0x26, 0x66, 0xbb, 0xfb, 0x3b, 0x7b,
+               0xcd, 0x8d, 0x4d, 0x0d, 0xd0, 0x90, 0x50, 0x10,
+               0xf7, 0xb7, 0x77, 0x37, 0xea, 0xaa, 0x6a, 0x2a,
+               0xb9, 0xf9, 0x39, 0x79, 0xa4, 0xe4, 0x24, 0x64,
+               0x83, 0xc3, 0x03, 0x43, 0x9e, 0xde, 0x1e, 0x5e,
+               0x25, 0x65, 0xa5, 0xe5, 0x38, 0x78, 0xb8, 0xf8,
+               0x1f, 0x5f, 0x9f, 0xdf, 0x02, 0x42, 0x82, 0xc2,
+               0x51, 0x11, 0xd1, 0x91, 0x4c, 0x0c, 0xcc, 0x8c,
+               0x6b, 0x2b, 0xeb, 0xab, 0x76, 0x36, 0xf6, 0xb6,
+               0x87, 0xc7, 0x07, 0x47, 0x9a, 0xda, 0x1a, 0x5a,
+               0xbd, 0xfd, 0x3d, 0x7d, 0xa0, 0xe0, 0x20, 0x60,
+               0xf3, 0xb3, 0x73, 0x33, 0xee, 0xae, 0x6e, 0x2e,
+               0xc9, 0x89, 0x49, 0x09, 0xd4, 0x94, 0x54, 0x14,
+               0x6f, 0x2f, 0xef, 0xaf, 0x72, 0x32, 0xf2, 0xb2,
+               0x55, 0x15, 0xd5, 0x95, 0x48, 0x08, 0xc8, 0x88,
+               0x1b, 0x5b, 0x9b, 0xdb, 0x06, 0x46, 0x86, 0xc6,
+               0x21, 0x61, 0xa1, 0xe1, 0x3c, 0x7c, 0xbc, 0xfc,
+               0x4a, 0x0a, 0xca, 0x8a, 0x57, 0x17, 0xd7, 0x97,
+               0x70, 0x30, 0xf0, 0xb0, 0x6d, 0x2d, 0xed, 0xad,
+               0x3e, 0x7e, 0xbe, 0xfe, 0x23, 0x63, 0xa3, 0xe3,
+               0x04, 0x44, 0x84, 0xc4, 0x19, 0x59, 0x99, 0xd9,
+               0xa2, 0xe2, 0x22, 0x62, 0xbf, 0xff, 0x3f, 0x7f,
+               0x98, 0xd8, 0x18, 0x58, 0x85, 0xc5, 0x05, 0x45,
+               0xd6, 0x96, 0x56, 0x16, 0xcb, 0x8b, 0x4b, 0x0b,
+               0xec, 0xac, 0x6c, 0x2c, 0xf1, 0xb1, 0x71, 0x31,
+       },
+       {
+               0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda,
+               0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8,
+               0x64, 0x25, 0xe6, 0xa7, 0x7d, 0x3c, 0xff, 0xbe,
+               0x56, 0x17, 0xd4, 0x95, 0x4f, 0x0e, 0xcd, 0x8c,
+               0xc8, 0x89, 0x4a, 0x0b, 0xd1, 0x90, 0x53, 0x12,
+               0xfa, 0xbb, 0x78, 0x39, 0xe3, 0xa2, 0x61, 0x20,
+               0xac, 0xed, 0x2e, 0x6f, 0xb5, 0xf4, 0x37, 0x76,
+               0x9e, 0xdf, 0x1c, 0x5d, 0x87, 0xc6, 0x05, 0x44,
+               0x8d, 0xcc, 0x0f, 0x4e, 0x94, 0xd5, 0x16, 0x57,
+               0xbf, 0xfe, 0x3d, 0x7c, 0xa6, 0xe7, 0x24, 0x65,
+               0xe9, 0xa8, 0x6b, 0x2a, 0xf0, 0xb1, 0x72, 0x33,
+               0xdb, 0x9a, 0x59, 0x18, 0xc2, 0x83, 0x40, 0x01,
+               0x45, 0x04, 0xc7, 0x86, 0x5c, 0x1d, 0xde, 0x9f,
+               0x77, 0x36, 0xf5, 0xb4, 0x6e, 0x2f, 0xec, 0xad,
+               0x21, 0x60, 0xa3, 0xe2, 0x38, 0x79, 0xba, 0xfb,
+               0x13, 0x52, 0x91, 0xd0, 0x0a, 0x4b, 0x88, 0xc9,
+               0x07, 0x46, 0x85, 0xc4, 0x1e, 0x5f, 0x9c, 0xdd,
+               0x35, 0x74, 0xb7, 0xf6, 0x2c, 0x6d, 0xae, 0xef,
+               0x63, 0x22, 0xe1, 0xa0, 0x7a, 0x3b, 0xf8, 0xb9,
+               0x51, 0x10, 0xd3, 0x92, 0x48, 0x09, 0xca, 0x8b,
+               0xcf, 0x8e, 0x4d, 0x0c, 0xd6, 0x97, 0x54, 0x15,
+               0xfd, 0xbc, 0x7f, 0x3e, 0xe4, 0xa5, 0x66, 0x27,
+               0xab, 0xea, 0x29, 0x68, 0xb2, 0xf3, 0x30, 0x71,
+               0x99, 0xd8, 0x1b, 0x5a, 0x80, 0xc1, 0x02, 0x43,
+               0x8a, 0xcb, 0x08, 0x49, 0x93, 0xd2, 0x11, 0x50,
+               0xb8, 0xf9, 0x3a, 0x7b, 0xa1, 0xe0, 0x23, 0x62,
+               0xee, 0xaf, 0x6c, 0x2d, 0xf7, 0xb6, 0x75, 0x34,
+               0xdc, 0x9d, 0x5e, 0x1f, 0xc5, 0x84, 0x47, 0x06,
+               0x42, 0x03, 0xc0, 0x81, 0x5b, 0x1a, 0xd9, 0x98,
+               0x70, 0x31, 0xf2, 0xb3, 0x69, 0x28, 0xeb, 0xaa,
+               0x26, 0x67, 0xa4, 0xe5, 0x3f, 0x7e, 0xbd, 0xfc,
+               0x14, 0x55, 0x96, 0xd7, 0x0d, 0x4c, 0x8f, 0xce,
+       },
+       {
+               0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3,
+               0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9,
+               0x54, 0x16, 0xd0, 0x92, 0x41, 0x03, 0xc5, 0x87,
+               0x7e, 0x3c, 0xfa, 0xb8, 0x6b, 0x29, 0xef, 0xad,
+               0xa8, 0xea, 0x2c, 0x6e, 0xbd, 0xff, 0x39, 0x7b,
+               0x82, 0xc0, 0x06, 0x44, 0x97, 0xd5, 0x13, 0x51,
+               0xfc, 0xbe, 0x78, 0x3a, 0xe9, 0xab, 0x6d, 0x2f,
+               0xd6, 0x94, 0x52, 0x10, 0xc3, 0x81, 0x47, 0x05,
+               0x4d, 0x0f, 0xc9, 0x8b, 0x58, 0x1a, 0xdc, 0x9e,
+               0x67, 0x25, 0xe3, 0xa1, 0x72, 0x30, 0xf6, 0xb4,
+               0x19, 0x5b, 0x9d, 0xdf, 0x0c, 0x4e, 0x88, 0xca,
+               0x33, 0x71, 0xb7, 0xf5, 0x26, 0x64, 0xa2, 0xe0,
+               0xe5, 0xa7, 0x61, 0x23, 0xf0, 0xb2, 0x74, 0x36,
+               0xcf, 0x8d, 0x4b, 0x09, 0xda, 0x98, 0x5e, 0x1c,
+               0xb1, 0xf3, 0x35, 0x77, 0xa4, 0xe6, 0x20, 0x62,
+               0x9b, 0xd9, 0x1f, 0x5d, 0x8e, 0xcc, 0x0a, 0x48,
+               0x9a, 0xd8, 0x1e, 0x5c, 0x8f, 0xcd, 0x0b, 0x49,
+               0xb0, 0xf2, 0x34, 0x76, 0xa5, 0xe7, 0x21, 0x63,
+               0xce, 0x8c, 0x4a, 0x08, 0xdb, 0x99, 0x5f, 0x1d,
+               0xe4, 0xa6, 0x60, 0x22, 0xf1, 0xb3, 0x75, 0x37,
+               0x32, 0x70, 0xb6, 0xf4, 0x27, 0x65, 0xa3, 0xe1,
+               0x18, 0x5a, 0x9c, 0xde, 0x0d, 0x4f, 0x89, 0xcb,
+               0x66, 0x24, 0xe2, 0xa0, 0x73, 0x31, 0xf7, 0xb5,
+               0x4c, 0x0e, 0xc8, 0x8a, 0x59, 0x1b, 0xdd, 0x9f,
+               0xd7, 0x95, 0x53, 0x11, 0xc2, 0x80, 0x46, 0x04,
+               0xfd, 0xbf, 0x79, 0x3b, 0xe8, 0xaa, 0x6c, 0x2e,
+               0x83, 0xc1, 0x07, 0x45, 0x96, 0xd4, 0x12, 0x50,
+               0xa9, 0xeb, 0x2d, 0x6f, 0xbc, 0xfe, 0x38, 0x7a,
+               0x7f, 0x3d, 0xfb, 0xb9, 0x6a, 0x28, 0xee, 0xac,
+               0x55, 0x17, 0xd1, 0x93, 0x40, 0x02, 0xc4, 0x86,
+               0x2b, 0x69, 0xaf, 0xed, 0x3e, 0x7c, 0xba, 0xf8,
+               0x01, 0x43, 0x85, 0xc7, 0x14, 0x56, 0x90, 0xd2,
+       },
+       {
+               0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4,
+               0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6,
+               0x44, 0x07, 0xc2, 0x81, 0x55, 0x16, 0xd3, 0x90,
+               0x66, 0x25, 0xe0, 0xa3, 0x77, 0x34, 0xf1, 0xb2,
+               0x88, 0xcb, 0x0e, 0x4d, 0x99, 0xda, 0x1f, 0x5c,
+               0xaa, 0xe9, 0x2c, 0x6f, 0xbb, 0xf8, 0x3d, 0x7e,
+               0xcc, 0x8f, 0x4a, 0x09, 0xdd, 0x9e, 0x5b, 0x18,
+               0xee, 0xad, 0x68, 0x2b, 0xff, 0xbc, 0x79, 0x3a,
+               0x0d, 0x4e, 0x8b, 0xc8, 0x1c, 0x5f, 0x9a, 0xd9,
+               0x2f, 0x6c, 0xa9, 0xea, 0x3e, 0x7d, 0xb8, 0xfb,
+               0x49, 0x0a, 0xcf, 0x8c, 0x58, 0x1b, 0xde, 0x9d,
+               0x6b, 0x28, 0xed, 0xae, 0x7a, 0x39, 0xfc, 0xbf,
+               0x85, 0xc6, 0x03, 0x40, 0x94, 0xd7, 0x12, 0x51,
+               0xa7, 0xe4, 0x21, 0x62, 0xb6, 0xf5, 0x30, 0x73,
+               0xc1, 0x82, 0x47, 0x04, 0xd0, 0x93, 0x56, 0x15,
+               0xe3, 0xa0, 0x65, 0x26, 0xf2, 0xb1, 0x74, 0x37,
+               0x1a, 0x59, 0x9c, 0xdf, 0x0b, 0x48, 0x8d, 0xce,
+               0x38, 0x7b, 0xbe, 0xfd, 0x29, 0x6a, 0xaf, 0xec,
+               0x5e, 0x1d, 0xd8, 0x9b, 0x4f, 0x0c, 0xc9, 0x8a,
+               0x7c, 0x3f, 0xfa, 0xb9, 0x6d, 0x2e, 0xeb, 0xa8,
+               0x92, 0xd1, 0x14, 0x57, 0x83, 0xc0, 0x05, 0x46,
+               0xb0, 0xf3, 0x36, 0x75, 0xa1, 0xe2, 0x27, 0x64,
+               0xd6, 0x95, 0x50, 0x13, 0xc7, 0x84, 0x41, 0x02,
+               0xf4, 0xb7, 0x72, 0x31, 0xe5, 0xa6, 0x63, 0x20,
+               0x17, 0x54, 0x91, 0xd2, 0x06, 0x45, 0x80, 0xc3,
+               0x35, 0x76, 0xb3, 0xf0, 0x24, 0x67, 0xa2, 0xe1,
+               0x53, 0x10, 0xd5, 0x96, 0x42, 0x01, 0xc4, 0x87,
+               0x71, 0x32, 0xf7, 0xb4, 0x60, 0x23, 0xe6, 0xa5,
+               0x9f, 0xdc, 0x19, 0x5a, 0x8e, 0xcd, 0x08, 0x4b,
+               0xbd, 0xfe, 0x3b, 0x78, 0xac, 0xef, 0x2a, 0x69,
+               0xdb, 0x98, 0x5d, 0x1e, 0xca, 0x89, 0x4c, 0x0f,
+               0xf9, 0xba, 0x7f, 0x3c, 0xe8, 0xab, 0x6e, 0x2d,
+       },
+       {
+               0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1,
+               0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb,
+               0x34, 0x70, 0xbc, 0xf8, 0x39, 0x7d, 0xb1, 0xf5,
+               0x2e, 0x6a, 0xa6, 0xe2, 0x23, 0x67, 0xab, 0xef,
+               0x68, 0x2c, 0xe0, 0xa4, 0x65, 0x21, 0xed, 0xa9,
+               0x72, 0x36, 0xfa, 0xbe, 0x7f, 0x3b, 0xf7, 0xb3,
+               0x5c, 0x18, 0xd4, 0x90, 0x51, 0x15, 0xd9, 0x9d,
+               0x46, 0x02, 0xce, 0x8a, 0x4b, 0x0f, 0xc3, 0x87,
+               0xd0, 0x94, 0x58, 0x1c, 0xdd, 0x99, 0x55, 0x11,
+               0xca, 0x8e, 0x42, 0x06, 0xc7, 0x83, 0x4f, 0x0b,
+               0xe4, 0xa0, 0x6c, 0x28, 0xe9, 0xad, 0x61, 0x25,
+               0xfe, 0xba, 0x76, 0x32, 0xf3, 0xb7, 0x7b, 0x3f,
+               0xb8, 0xfc, 0x30, 0x74, 0xb5, 0xf1, 0x3d, 0x79,
+               0xa2, 0xe6, 0x2a, 0x6e, 0xaf, 0xeb, 0x27, 0x63,
+               0x8c, 0xc8, 0x04, 0x40, 0x81, 0xc5, 0x09, 0x4d,
+               0x96, 0xd2, 0x1e, 0x5a, 0x9b, 0xdf, 0x13, 0x57,
+               0xbd, 0xf9, 0x35, 0x71, 0xb0, 0xf4, 0x38, 0x7c,
+               0xa7, 0xe3, 0x2f, 0x6b, 0xaa, 0xee, 0x22, 0x66,
+               0x89, 0xcd, 0x01, 0x45, 0x84, 0xc0, 0x0c, 0x48,
+               0x93, 0xd7, 0x1b, 0x5f, 0x9e, 0xda, 0x16, 0x52,
+               0xd5, 0x91, 0x5d, 0x19, 0xd8, 0x9c, 0x50, 0x14,
+               0xcf, 0x8b, 0x47, 0x03, 0xc2, 0x86, 0x4a, 0x0e,
+               0xe1, 0xa5, 0x69, 0x2d, 0xec, 0xa8, 0x64, 0x20,
+               0xfb, 0xbf, 0x73, 0x37, 0xf6, 0xb2, 0x7e, 0x3a,
+               0x6d, 0x29, 0xe5, 0xa1, 0x60, 0x24, 0xe8, 0xac,
+               0x77, 0x33, 0xff, 0xbb, 0x7a, 0x3e, 0xf2, 0xb6,
+               0x59, 0x1d, 0xd1, 0x95, 0x54, 0x10, 0xdc, 0x98,
+               0x43, 0x07, 0xcb, 0x8f, 0x4e, 0x0a, 0xc6, 0x82,
+               0x05, 0x41, 0x8d, 0xc9, 0x08, 0x4c, 0x80, 0xc4,
+               0x1f, 0x5b, 0x97, 0xd3, 0x12, 0x56, 0x9a, 0xde,
+               0x31, 0x75, 0xb9, 0xfd, 0x3c, 0x78, 0xb4, 0xf0,
+               0x2b, 0x6f, 0xa3, 0xe7, 0x26, 0x62, 0xae, 0xea,
+       },
+       {
+               0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6,
+               0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4,
+               0x24, 0x61, 0xae, 0xeb, 0x2d, 0x68, 0xa7, 0xe2,
+               0x36, 0x73, 0xbc, 0xf9, 0x3f, 0x7a, 0xb5, 0xf0,
+               0x48, 0x0d, 0xc2, 0x87, 0x41, 0x04, 0xcb, 0x8e,
+               0x5a, 0x1f, 0xd0, 0x95, 0x53, 0x16, 0xd9, 0x9c,
+               0x6c, 0x29, 0xe6, 0xa3, 0x65, 0x20, 0xef, 0xaa,
+               0x7e, 0x3b, 0xf4, 0xb1, 0x77, 0x32, 0xfd, 0xb8,
+               0x90, 0xd5, 0x1a, 0x5f, 0x99, 0xdc, 0x13, 0x56,
+               0x82, 0xc7, 0x08, 0x4d, 0x8b, 0xce, 0x01, 0x44,
+               0xb4, 0xf1, 0x3e, 0x7b, 0xbd, 0xf8, 0x37, 0x72,
+               0xa6, 0xe3, 0x2c, 0x69, 0xaf, 0xea, 0x25, 0x60,
+               0xd8, 0x9d, 0x52, 0x17, 0xd1, 0x94, 0x5b, 0x1e,
+               0xca, 0x8f, 0x40, 0x05, 0xc3, 0x86, 0x49, 0x0c,
+               0xfc, 0xb9, 0x76, 0x33, 0xf5, 0xb0, 0x7f, 0x3a,
+               0xee, 0xab, 0x64, 0x21, 0xe7, 0xa2, 0x6d, 0x28,
+               0x3d, 0x78, 0xb7, 0xf2, 0x34, 0x71, 0xbe, 0xfb,
+               0x2f, 0x6a, 0xa5, 0xe0, 0x26, 0x63, 0xac, 0xe9,
+               0x19, 0x5c, 0x93, 0xd6, 0x10, 0x55, 0x9a, 0xdf,
+               0x0b, 0x4e, 0x81, 0xc4, 0x02, 0x47, 0x88, 0xcd,
+               0x75, 0x30, 0xff, 0xba, 0x7c, 0x39, 0xf6, 0xb3,
+               0x67, 0x22, 0xed, 0xa8, 0x6e, 0x2b, 0xe4, 0xa1,
+               0x51, 0x14, 0xdb, 0x9e, 0x58, 0x1d, 0xd2, 0x97,
+               0x43, 0x06, 0xc9, 0x8c, 0x4a, 0x0f, 0xc0, 0x85,
+               0xad, 0xe8, 0x27, 0x62, 0xa4, 0xe1, 0x2e, 0x6b,
+               0xbf, 0xfa, 0x35, 0x70, 0xb6, 0xf3, 0x3c, 0x79,
+               0x89, 0xcc, 0x03, 0x46, 0x80, 0xc5, 0x0a, 0x4f,
+               0x9b, 0xde, 0x11, 0x54, 0x92, 0xd7, 0x18, 0x5d,
+               0xe5, 0xa0, 0x6f, 0x2a, 0xec, 0xa9, 0x66, 0x23,
+               0xf7, 0xb2, 0x7d, 0x38, 0xfe, 0xbb, 0x74, 0x31,
+               0xc1, 0x84, 0x4b, 0x0e, 0xc8, 0x8d, 0x42, 0x07,
+               0xd3, 0x96, 0x59, 0x1c, 0xda, 0x9f, 0x50, 0x15,
+       },
+       {
+               0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf,
+               0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5,
+               0x14, 0x52, 0x98, 0xde, 0x11, 0x57, 0x9d, 0xdb,
+               0x1e, 0x58, 0x92, 0xd4, 0x1b, 0x5d, 0x97, 0xd1,
+               0x28, 0x6e, 0xa4, 0xe2, 0x2d, 0x6b, 0xa1, 0xe7,
+               0x22, 0x64, 0xae, 0xe8, 0x27, 0x61, 0xab, 0xed,
+               0x3c, 0x7a, 0xb0, 0xf6, 0x39, 0x7f, 0xb5, 0xf3,
+               0x36, 0x70, 0xba, 0xfc, 0x33, 0x75, 0xbf, 0xf9,
+               0x50, 0x16, 0xdc, 0x9a, 0x55, 0x13, 0xd9, 0x9f,
+               0x5a, 0x1c, 0xd6, 0x90, 0x5f, 0x19, 0xd3, 0x95,
+               0x44, 0x02, 0xc8, 0x8e, 0x41, 0x07, 0xcd, 0x8b,
+               0x4e, 0x08, 0xc2, 0x84, 0x4b, 0x0d, 0xc7, 0x81,
+               0x78, 0x3e, 0xf4, 0xb2, 0x7d, 0x3b, 0xf1, 0xb7,
+               0x72, 0x34, 0xfe, 0xb8, 0x77, 0x31, 0xfb, 0xbd,
+               0x6c, 0x2a, 0xe0, 0xa6, 0x69, 0x2f, 0xe5, 0xa3,
+               0x66, 0x20, 0xea, 0xac, 0x63, 0x25, 0xef, 0xa9,
+               0xa0, 0xe6, 0x2c, 0x6a, 0xa5, 0xe3, 0x29, 0x6f,
+               0xaa, 0xec, 0x26, 0x60, 0xaf, 0xe9, 0x23, 0x65,
+               0xb4, 0xf2, 0x38, 0x7e, 0xb1, 0xf7, 0x3d, 0x7b,
+               0xbe, 0xf8, 0x32, 0x74, 0xbb, 0xfd, 0x37, 0x71,
+               0x88, 0xce, 0x04, 0x42, 0x8d, 0xcb, 0x01, 0x47,
+               0x82, 0xc4, 0x0e, 0x48, 0x87, 0xc1, 0x0b, 0x4d,
+               0x9c, 0xda, 0x10, 0x56, 0x99, 0xdf, 0x15, 0x53,
+               0x96, 0xd0, 0x1a, 0x5c, 0x93, 0xd5, 0x1f, 0x59,
+               0xf0, 0xb6, 0x7c, 0x3a, 0xf5, 0xb3, 0x79, 0x3f,
+               0xfa, 0xbc, 0x76, 0x30, 0xff, 0xb9, 0x73, 0x35,
+               0xe4, 0xa2, 0x68, 0x2e, 0xe1, 0xa7, 0x6d, 0x2b,
+               0xee, 0xa8, 0x62, 0x24, 0xeb, 0xad, 0x67, 0x21,
+               0xd8, 0x9e, 0x54, 0x12, 0xdd, 0x9b, 0x51, 0x17,
+               0xd2, 0x94, 0x5e, 0x18, 0xd7, 0x91, 0x5b, 0x1d,
+               0xcc, 0x8a, 0x40, 0x06, 0xc9, 0x8f, 0x45, 0x03,
+               0xc6, 0x80, 0x4a, 0x0c, 0xc3, 0x85, 0x4f, 0x09,
+       },
+       {
+               0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8,
+               0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca,
+               0x04, 0x43, 0x8a, 0xcd, 0x05, 0x42, 0x8b, 0xcc,
+               0x06, 0x41, 0x88, 0xcf, 0x07, 0x40, 0x89, 0xce,
+               0x08, 0x4f, 0x86, 0xc1, 0x09, 0x4e, 0x87, 0xc0,
+               0x0a, 0x4d, 0x84, 0xc3, 0x0b, 0x4c, 0x85, 0xc2,
+               0x0c, 0x4b, 0x82, 0xc5, 0x0d, 0x4a, 0x83, 0xc4,
+               0x0e, 0x49, 0x80, 0xc7, 0x0f, 0x48, 0x81, 0xc6,
+               0x10, 0x57, 0x9e, 0xd9, 0x11, 0x56, 0x9f, 0xd8,
+               0x12, 0x55, 0x9c, 0xdb, 0x13, 0x54, 0x9d, 0xda,
+               0x14, 0x53, 0x9a, 0xdd, 0x15, 0x52, 0x9b, 0xdc,
+               0x16, 0x51, 0x98, 0xdf, 0x17, 0x50, 0x99, 0xde,
+               0x18, 0x5f, 0x96, 0xd1, 0x19, 0x5e, 0x97, 0xd0,
+               0x1a, 0x5d, 0x94, 0xd3, 0x1b, 0x5c, 0x95, 0xd2,
+               0x1c, 0x5b, 0x92, 0xd5, 0x1d, 0x5a, 0x93, 0xd4,
+               0x1e, 0x59, 0x90, 0xd7, 0x1f, 0x58, 0x91, 0xd6,
+               0x20, 0x67, 0xae, 0xe9, 0x21, 0x66, 0xaf, 0xe8,
+               0x22, 0x65, 0xac, 0xeb, 0x23, 0x64, 0xad, 0xea,
+               0x24, 0x63, 0xaa, 0xed, 0x25, 0x62, 0xab, 0xec,
+               0x26, 0x61, 0xa8, 0xef, 0x27, 0x60, 0xa9, 0xee,
+               0x28, 0x6f, 0xa6, 0xe1, 0x29, 0x6e, 0xa7, 0xe0,
+               0x2a, 0x6d, 0xa4, 0xe3, 0x2b, 0x6c, 0xa5, 0xe2,
+               0x2c, 0x6b, 0xa2, 0xe5, 0x2d, 0x6a, 0xa3, 0xe4,
+               0x2e, 0x69, 0xa0, 0xe7, 0x2f, 0x68, 0xa1, 0xe6,
+               0x30, 0x77, 0xbe, 0xf9, 0x31, 0x76, 0xbf, 0xf8,
+               0x32, 0x75, 0xbc, 0xfb, 0x33, 0x74, 0xbd, 0xfa,
+               0x34, 0x73, 0xba, 0xfd, 0x35, 0x72, 0xbb, 0xfc,
+               0x36, 0x71, 0xb8, 0xff, 0x37, 0x70, 0xb9, 0xfe,
+               0x38, 0x7f, 0xb6, 0xf1, 0x39, 0x7e, 0xb7, 0xf0,
+               0x3a, 0x7d, 0xb4, 0xf3, 0x3b, 0x7c, 0xb5, 0xf2,
+               0x3c, 0x7b, 0xb2, 0xf5, 0x3d, 0x7a, 0xb3, 0xf4,
+               0x3e, 0x79, 0xb0, 0xf7, 0x3f, 0x78, 0xb1, 0xf6,
+       },
+       {
+               0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5,
+               0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f,
+               0xf4, 0xbc, 0x64, 0x2c, 0xc9, 0x81, 0x59, 0x11,
+               0x8e, 0xc6, 0x1e, 0x56, 0xb3, 0xfb, 0x23, 0x6b,
+               0xf5, 0xbd, 0x65, 0x2d, 0xc8, 0x80, 0x58, 0x10,
+               0x8f, 0xc7, 0x1f, 0x57, 0xb2, 0xfa, 0x22, 0x6a,
+               0x01, 0x49, 0x91, 0xd9, 0x3c, 0x74, 0xac, 0xe4,
+               0x7b, 0x33, 0xeb, 0xa3, 0x46, 0x0e, 0xd6, 0x9e,
+               0xf7, 0xbf, 0x67, 0x2f, 0xca, 0x82, 0x5a, 0x12,
+               0x8d, 0xc5, 0x1d, 0x55, 0xb0, 0xf8, 0x20, 0x68,
+               0x03, 0x4b, 0x93, 0xdb, 0x3e, 0x76, 0xae, 0xe6,
+               0x79, 0x31, 0xe9, 0xa1, 0x44, 0x0c, 0xd4, 0x9c,
+               0x02, 0x4a, 0x92, 0xda, 0x3f, 0x77, 0xaf, 0xe7,
+               0x78, 0x30, 0xe8, 0xa0, 0x45, 0x0d, 0xd5, 0x9d,
+               0xf6, 0xbe, 0x66, 0x2e, 0xcb, 0x83, 0x5b, 0x13,
+               0x8c, 0xc4, 0x1c, 0x54, 0xb1, 0xf9, 0x21, 0x69,
+               0xf3, 0xbb, 0x63, 0x2b, 0xce, 0x86, 0x5e, 0x16,
+               0x89, 0xc1, 0x19, 0x51, 0xb4, 0xfc, 0x24, 0x6c,
+               0x07, 0x4f, 0x97, 0xdf, 0x3a, 0x72, 0xaa, 0xe2,
+               0x7d, 0x35, 0xed, 0xa5, 0x40, 0x08, 0xd0, 0x98,
+               0x06, 0x4e, 0x96, 0xde, 0x3b, 0x73, 0xab, 0xe3,
+               0x7c, 0x34, 0xec, 0xa4, 0x41, 0x09, 0xd1, 0x99,
+               0xf2, 0xba, 0x62, 0x2a, 0xcf, 0x87, 0x5f, 0x17,
+               0x88, 0xc0, 0x18, 0x50, 0xb5, 0xfd, 0x25, 0x6d,
+               0x04, 0x4c, 0x94, 0xdc, 0x39, 0x71, 0xa9, 0xe1,
+               0x7e, 0x36, 0xee, 0xa6, 0x43, 0x0b, 0xd3, 0x9b,
+               0xf0, 0xb8, 0x60, 0x28, 0xcd, 0x85, 0x5d, 0x15,
+               0x8a, 0xc2, 0x1a, 0x52, 0xb7, 0xff, 0x27, 0x6f,
+               0xf1, 0xb9, 0x61, 0x29, 0xcc, 0x84, 0x5c, 0x14,
+               0x8b, 0xc3, 0x1b, 0x53, 0xb6, 0xfe, 0x26, 0x6e,
+               0x05, 0x4d, 0x95, 0xdd, 0x38, 0x70, 0xa8, 0xe0,
+               0x7f, 0x37, 0xef, 0xa7, 0x42, 0x0a, 0xd2, 0x9a,
+       },
+       {
+               0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2,
+               0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90,
+               0xe4, 0xad, 0x76, 0x3f, 0xdd, 0x94, 0x4f, 0x06,
+               0x96, 0xdf, 0x04, 0x4d, 0xaf, 0xe6, 0x3d, 0x74,
+               0xd5, 0x9c, 0x47, 0x0e, 0xec, 0xa5, 0x7e, 0x37,
+               0xa7, 0xee, 0x35, 0x7c, 0x9e, 0xd7, 0x0c, 0x45,
+               0x31, 0x78, 0xa3, 0xea, 0x08, 0x41, 0x9a, 0xd3,
+               0x43, 0x0a, 0xd1, 0x98, 0x7a, 0x33, 0xe8, 0xa1,
+               0xb7, 0xfe, 0x25, 0x6c, 0x8e, 0xc7, 0x1c, 0x55,
+               0xc5, 0x8c, 0x57, 0x1e, 0xfc, 0xb5, 0x6e, 0x27,
+               0x53, 0x1a, 0xc1, 0x88, 0x6a, 0x23, 0xf8, 0xb1,
+               0x21, 0x68, 0xb3, 0xfa, 0x18, 0x51, 0x8a, 0xc3,
+               0x62, 0x2b, 0xf0, 0xb9, 0x5b, 0x12, 0xc9, 0x80,
+               0x10, 0x59, 0x82, 0xcb, 0x29, 0x60, 0xbb, 0xf2,
+               0x86, 0xcf, 0x14, 0x5d, 0xbf, 0xf6, 0x2d, 0x64,
+               0xf4, 0xbd, 0x66, 0x2f, 0xcd, 0x84, 0x5f, 0x16,
+               0x73, 0x3a, 0xe1, 0xa8, 0x4a, 0x03, 0xd8, 0x91,
+               0x01, 0x48, 0x93, 0xda, 0x38, 0x71, 0xaa, 0xe3,
+               0x97, 0xde, 0x05, 0x4c, 0xae, 0xe7, 0x3c, 0x75,
+               0xe5, 0xac, 0x77, 0x3e, 0xdc, 0x95, 0x4e, 0x07,
+               0xa6, 0xef, 0x34, 0x7d, 0x9f, 0xd6, 0x0d, 0x44,
+               0xd4, 0x9d, 0x46, 0x0f, 0xed, 0xa4, 0x7f, 0x36,
+               0x42, 0x0b, 0xd0, 0x99, 0x7b, 0x32, 0xe9, 0xa0,
+               0x30, 0x79, 0xa2, 0xeb, 0x09, 0x40, 0x9b, 0xd2,
+               0xc4, 0x8d, 0x56, 0x1f, 0xfd, 0xb4, 0x6f, 0x26,
+               0xb6, 0xff, 0x24, 0x6d, 0x8f, 0xc6, 0x1d, 0x54,
+               0x20, 0x69, 0xb2, 0xfb, 0x19, 0x50, 0x8b, 0xc2,
+               0x52, 0x1b, 0xc0, 0x89, 0x6b, 0x22, 0xf9, 0xb0,
+               0x11, 0x58, 0x83, 0xca, 0x28, 0x61, 0xba, 0xf3,
+               0x63, 0x2a, 0xf1, 0xb8, 0x5a, 0x13, 0xc8, 0x81,
+               0xf5, 0xbc, 0x67, 0x2e, 0xcc, 0x85, 0x5e, 0x17,
+               0x87, 0xce, 0x15, 0x5c, 0xbe, 0xf7, 0x2c, 0x65,
+       },
+       {
+               0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb,
+               0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81,
+               0xd4, 0x9e, 0x40, 0x0a, 0xe1, 0xab, 0x75, 0x3f,
+               0xbe, 0xf4, 0x2a, 0x60, 0x8b, 0xc1, 0x1f, 0x55,
+               0xb5, 0xff, 0x21, 0x6b, 0x80, 0xca, 0x14, 0x5e,
+               0xdf, 0x95, 0x4b, 0x01, 0xea, 0xa0, 0x7e, 0x34,
+               0x61, 0x2b, 0xf5, 0xbf, 0x54, 0x1e, 0xc0, 0x8a,
+               0x0b, 0x41, 0x9f, 0xd5, 0x3e, 0x74, 0xaa, 0xe0,
+               0x77, 0x3d, 0xe3, 0xa9, 0x42, 0x08, 0xd6, 0x9c,
+               0x1d, 0x57, 0x89, 0xc3, 0x28, 0x62, 0xbc, 0xf6,
+               0xa3, 0xe9, 0x37, 0x7d, 0x96, 0xdc, 0x02, 0x48,
+               0xc9, 0x83, 0x5d, 0x17, 0xfc, 0xb6, 0x68, 0x22,
+               0xc2, 0x88, 0x56, 0x1c, 0xf7, 0xbd, 0x63, 0x29,
+               0xa8, 0xe2, 0x3c, 0x76, 0x9d, 0xd7, 0x09, 0x43,
+               0x16, 0x5c, 0x82, 0xc8, 0x23, 0x69, 0xb7, 0xfd,
+               0x7c, 0x36, 0xe8, 0xa2, 0x49, 0x03, 0xdd, 0x97,
+               0xee, 0xa4, 0x7a, 0x30, 0xdb, 0x91, 0x4f, 0x05,
+               0x84, 0xce, 0x10, 0x5a, 0xb1, 0xfb, 0x25, 0x6f,
+               0x3a, 0x70, 0xae, 0xe4, 0x0f, 0x45, 0x9b, 0xd1,
+               0x50, 0x1a, 0xc4, 0x8e, 0x65, 0x2f, 0xf1, 0xbb,
+               0x5b, 0x11, 0xcf, 0x85, 0x6e, 0x24, 0xfa, 0xb0,
+               0x31, 0x7b, 0xa5, 0xef, 0x04, 0x4e, 0x90, 0xda,
+               0x8f, 0xc5, 0x1b, 0x51, 0xba, 0xf0, 0x2e, 0x64,
+               0xe5, 0xaf, 0x71, 0x3b, 0xd0, 0x9a, 0x44, 0x0e,
+               0x99, 0xd3, 0x0d, 0x47, 0xac, 0xe6, 0x38, 0x72,
+               0xf3, 0xb9, 0x67, 0x2d, 0xc6, 0x8c, 0x52, 0x18,
+               0x4d, 0x07, 0xd9, 0x93, 0x78, 0x32, 0xec, 0xa6,
+               0x27, 0x6d, 0xb3, 0xf9, 0x12, 0x58, 0x86, 0xcc,
+               0x2c, 0x66, 0xb8, 0xf2, 0x19, 0x53, 0x8d, 0xc7,
+               0x46, 0x0c, 0xd2, 0x98, 0x73, 0x39, 0xe7, 0xad,
+               0xf8, 0xb2, 0x6c, 0x26, 0xcd, 0x87, 0x59, 0x13,
+               0x92, 0xd8, 0x06, 0x4c, 0xa7, 0xed, 0x33, 0x79,
+       },
+       {
+               0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec,
+               0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e,
+               0xc4, 0x8f, 0x52, 0x19, 0xf5, 0xbe, 0x63, 0x28,
+               0xa6, 0xed, 0x30, 0x7b, 0x97, 0xdc, 0x01, 0x4a,
+               0x95, 0xde, 0x03, 0x48, 0xa4, 0xef, 0x32, 0x79,
+               0xf7, 0xbc, 0x61, 0x2a, 0xc6, 0x8d, 0x50, 0x1b,
+               0x51, 0x1a, 0xc7, 0x8c, 0x60, 0x2b, 0xf6, 0xbd,
+               0x33, 0x78, 0xa5, 0xee, 0x02, 0x49, 0x94, 0xdf,
+               0x37, 0x7c, 0xa1, 0xea, 0x06, 0x4d, 0x90, 0xdb,
+               0x55, 0x1e, 0xc3, 0x88, 0x64, 0x2f, 0xf2, 0xb9,
+               0xf3, 0xb8, 0x65, 0x2e, 0xc2, 0x89, 0x54, 0x1f,
+               0x91, 0xda, 0x07, 0x4c, 0xa0, 0xeb, 0x36, 0x7d,
+               0xa2, 0xe9, 0x34, 0x7f, 0x93, 0xd8, 0x05, 0x4e,
+               0xc0, 0x8b, 0x56, 0x1d, 0xf1, 0xba, 0x67, 0x2c,
+               0x66, 0x2d, 0xf0, 0xbb, 0x57, 0x1c, 0xc1, 0x8a,
+               0x04, 0x4f, 0x92, 0xd9, 0x35, 0x7e, 0xa3, 0xe8,
+               0x6e, 0x25, 0xf8, 0xb3, 0x5f, 0x14, 0xc9, 0x82,
+               0x0c, 0x47, 0x9a, 0xd1, 0x3d, 0x76, 0xab, 0xe0,
+               0xaa, 0xe1, 0x3c, 0x77, 0x9b, 0xd0, 0x0d, 0x46,
+               0xc8, 0x83, 0x5e, 0x15, 0xf9, 0xb2, 0x6f, 0x24,
+               0xfb, 0xb0, 0x6d, 0x26, 0xca, 0x81, 0x5c, 0x17,
+               0x99, 0xd2, 0x0f, 0x44, 0xa8, 0xe3, 0x3e, 0x75,
+               0x3f, 0x74, 0xa9, 0xe2, 0x0e, 0x45, 0x98, 0xd3,
+               0x5d, 0x16, 0xcb, 0x80, 0x6c, 0x27, 0xfa, 0xb1,
+               0x59, 0x12, 0xcf, 0x84, 0x68, 0x23, 0xfe, 0xb5,
+               0x3b, 0x70, 0xad, 0xe6, 0x0a, 0x41, 0x9c, 0xd7,
+               0x9d, 0xd6, 0x0b, 0x40, 0xac, 0xe7, 0x3a, 0x71,
+               0xff, 0xb4, 0x69, 0x22, 0xce, 0x85, 0x58, 0x13,
+               0xcc, 0x87, 0x5a, 0x11, 0xfd, 0xb6, 0x6b, 0x20,
+               0xae, 0xe5, 0x38, 0x73, 0x9f, 0xd4, 0x09, 0x42,
+               0x08, 0x43, 0x9e, 0xd5, 0x39, 0x72, 0xaf, 0xe4,
+               0x6a, 0x21, 0xfc, 0xb7, 0x5b, 0x10, 0xcd, 0x86,
+       },
+       {
+               0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9,
+               0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3,
+               0xb4, 0xf8, 0x2c, 0x60, 0x99, 0xd5, 0x01, 0x4d,
+               0xee, 0xa2, 0x76, 0x3a, 0xc3, 0x8f, 0x5b, 0x17,
+               0x75, 0x39, 0xed, 0xa1, 0x58, 0x14, 0xc0, 0x8c,
+               0x2f, 0x63, 0xb7, 0xfb, 0x02, 0x4e, 0x9a, 0xd6,
+               0xc1, 0x8d, 0x59, 0x15, 0xec, 0xa0, 0x74, 0x38,
+               0x9b, 0xd7, 0x03, 0x4f, 0xb6, 0xfa, 0x2e, 0x62,
+               0xea, 0xa6, 0x72, 0x3e, 0xc7, 0x8b, 0x5f, 0x13,
+               0xb0, 0xfc, 0x28, 0x64, 0x9d, 0xd1, 0x05, 0x49,
+               0x5e, 0x12, 0xc6, 0x8a, 0x73, 0x3f, 0xeb, 0xa7,
+               0x04, 0x48, 0x9c, 0xd0, 0x29, 0x65, 0xb1, 0xfd,
+               0x9f, 0xd3, 0x07, 0x4b, 0xb2, 0xfe, 0x2a, 0x66,
+               0xc5, 0x89, 0x5d, 0x11, 0xe8, 0xa4, 0x70, 0x3c,
+               0x2b, 0x67, 0xb3, 0xff, 0x06, 0x4a, 0x9e, 0xd2,
+               0x71, 0x3d, 0xe9, 0xa5, 0x5c, 0x10, 0xc4, 0x88,
+               0xc9, 0x85, 0x51, 0x1d, 0xe4, 0xa8, 0x7c, 0x30,
+               0x93, 0xdf, 0x0b, 0x47, 0xbe, 0xf2, 0x26, 0x6a,
+               0x7d, 0x31, 0xe5, 0xa9, 0x50, 0x1c, 0xc8, 0x84,
+               0x27, 0x6b, 0xbf, 0xf3, 0x0a, 0x46, 0x92, 0xde,
+               0xbc, 0xf0, 0x24, 0x68, 0x91, 0xdd, 0x09, 0x45,
+               0xe6, 0xaa, 0x7e, 0x32, 0xcb, 0x87, 0x53, 0x1f,
+               0x08, 0x44, 0x90, 0xdc, 0x25, 0x69, 0xbd, 0xf1,
+               0x52, 0x1e, 0xca, 0x86, 0x7f, 0x33, 0xe7, 0xab,
+               0x23, 0x6f, 0xbb, 0xf7, 0x0e, 0x42, 0x96, 0xda,
+               0x79, 0x35, 0xe1, 0xad, 0x54, 0x18, 0xcc, 0x80,
+               0x97, 0xdb, 0x0f, 0x43, 0xba, 0xf6, 0x22, 0x6e,
+               0xcd, 0x81, 0x55, 0x19, 0xe0, 0xac, 0x78, 0x34,
+               0x56, 0x1a, 0xce, 0x82, 0x7b, 0x37, 0xe3, 0xaf,
+               0x0c, 0x40, 0x94, 0xd8, 0x21, 0x6d, 0xb9, 0xf5,
+               0xe2, 0xae, 0x7a, 0x36, 0xcf, 0x83, 0x57, 0x1b,
+               0xb8, 0xf4, 0x20, 0x6c, 0x95, 0xd9, 0x0d, 0x41,
+       },
+       {
+               0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe,
+               0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac,
+               0xa4, 0xe9, 0x3e, 0x73, 0x8d, 0xc0, 0x17, 0x5a,
+               0xf6, 0xbb, 0x6c, 0x21, 0xdf, 0x92, 0x45, 0x08,
+               0x55, 0x18, 0xcf, 0x82, 0x7c, 0x31, 0xe6, 0xab,
+               0x07, 0x4a, 0x9d, 0xd0, 0x2e, 0x63, 0xb4, 0xf9,
+               0xf1, 0xbc, 0x6b, 0x26, 0xd8, 0x95, 0x42, 0x0f,
+               0xa3, 0xee, 0x39, 0x74, 0x8a, 0xc7, 0x10, 0x5d,
+               0xaa, 0xe7, 0x30, 0x7d, 0x83, 0xce, 0x19, 0x54,
+               0xf8, 0xb5, 0x62, 0x2f, 0xd1, 0x9c, 0x4b, 0x06,
+               0x0e, 0x43, 0x94, 0xd9, 0x27, 0x6a, 0xbd, 0xf0,
+               0x5c, 0x11, 0xc6, 0x8b, 0x75, 0x38, 0xef, 0xa2,
+               0xff, 0xb2, 0x65, 0x28, 0xd6, 0x9b, 0x4c, 0x01,
+               0xad, 0xe0, 0x37, 0x7a, 0x84, 0xc9, 0x1e, 0x53,
+               0x5b, 0x16, 0xc1, 0x8c, 0x72, 0x3f, 0xe8, 0xa5,
+               0x09, 0x44, 0x93, 0xde, 0x20, 0x6d, 0xba, 0xf7,
+               0x49, 0x04, 0xd3, 0x9e, 0x60, 0x2d, 0xfa, 0xb7,
+               0x1b, 0x56, 0x81, 0xcc, 0x32, 0x7f, 0xa8, 0xe5,
+               0xed, 0xa0, 0x77, 0x3a, 0xc4, 0x89, 0x5e, 0x13,
+               0xbf, 0xf2, 0x25, 0x68, 0x96, 0xdb, 0x0c, 0x41,
+               0x1c, 0x51, 0x86, 0xcb, 0x35, 0x78, 0xaf, 0xe2,
+               0x4e, 0x03, 0xd4, 0x99, 0x67, 0x2a, 0xfd, 0xb0,
+               0xb8, 0xf5, 0x22, 0x6f, 0x91, 0xdc, 0x0b, 0x46,
+               0xea, 0xa7, 0x70, 0x3d, 0xc3, 0x8e, 0x59, 0x14,
+               0xe3, 0xae, 0x79, 0x34, 0xca, 0x87, 0x50, 0x1d,
+               0xb1, 0xfc, 0x2b, 0x66, 0x98, 0xd5, 0x02, 0x4f,
+               0x47, 0x0a, 0xdd, 0x90, 0x6e, 0x23, 0xf4, 0xb9,
+               0x15, 0x58, 0x8f, 0xc2, 0x3c, 0x71, 0xa6, 0xeb,
+               0xb6, 0xfb, 0x2c, 0x61, 0x9f, 0xd2, 0x05, 0x48,
+               0xe4, 0xa9, 0x7e, 0x33, 0xcd, 0x80, 0x57, 0x1a,
+               0x12, 0x5f, 0x88, 0xc5, 0x3b, 0x76, 0xa1, 0xec,
+               0x40, 0x0d, 0xda, 0x97, 0x69, 0x24, 0xf3, 0xbe,
+       },
+       {
+               0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+               0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd,
+               0x94, 0xda, 0x08, 0x46, 0xb1, 0xff, 0x2d, 0x63,
+               0xde, 0x90, 0x42, 0x0c, 0xfb, 0xb5, 0x67, 0x29,
+               0x35, 0x7b, 0xa9, 0xe7, 0x10, 0x5e, 0x8c, 0xc2,
+               0x7f, 0x31, 0xe3, 0xad, 0x5a, 0x14, 0xc6, 0x88,
+               0xa1, 0xef, 0x3d, 0x73, 0x84, 0xca, 0x18, 0x56,
+               0xeb, 0xa5, 0x77, 0x39, 0xce, 0x80, 0x52, 0x1c,
+               0x6a, 0x24, 0xf6, 0xb8, 0x4f, 0x01, 0xd3, 0x9d,
+               0x20, 0x6e, 0xbc, 0xf2, 0x05, 0x4b, 0x99, 0xd7,
+               0xfe, 0xb0, 0x62, 0x2c, 0xdb, 0x95, 0x47, 0x09,
+               0xb4, 0xfa, 0x28, 0x66, 0x91, 0xdf, 0x0d, 0x43,
+               0x5f, 0x11, 0xc3, 0x8d, 0x7a, 0x34, 0xe6, 0xa8,
+               0x15, 0x5b, 0x89, 0xc7, 0x30, 0x7e, 0xac, 0xe2,
+               0xcb, 0x85, 0x57, 0x19, 0xee, 0xa0, 0x72, 0x3c,
+               0x81, 0xcf, 0x1d, 0x53, 0xa4, 0xea, 0x38, 0x76,
+               0xd4, 0x9a, 0x48, 0x06, 0xf1, 0xbf, 0x6d, 0x23,
+               0x9e, 0xd0, 0x02, 0x4c, 0xbb, 0xf5, 0x27, 0x69,
+               0x40, 0x0e, 0xdc, 0x92, 0x65, 0x2b, 0xf9, 0xb7,
+               0x0a, 0x44, 0x96, 0xd8, 0x2f, 0x61, 0xb3, 0xfd,
+               0xe1, 0xaf, 0x7d, 0x33, 0xc4, 0x8a, 0x58, 0x16,
+               0xab, 0xe5, 0x37, 0x79, 0x8e, 0xc0, 0x12, 0x5c,
+               0x75, 0x3b, 0xe9, 0xa7, 0x50, 0x1e, 0xcc, 0x82,
+               0x3f, 0x71, 0xa3, 0xed, 0x1a, 0x54, 0x86, 0xc8,
+               0xbe, 0xf0, 0x22, 0x6c, 0x9b, 0xd5, 0x07, 0x49,
+               0xf4, 0xba, 0x68, 0x26, 0xd1, 0x9f, 0x4d, 0x03,
+               0x2a, 0x64, 0xb6, 0xf8, 0x0f, 0x41, 0x93, 0xdd,
+               0x60, 0x2e, 0xfc, 0xb2, 0x45, 0x0b, 0xd9, 0x97,
+               0x8b, 0xc5, 0x17, 0x59, 0xae, 0xe0, 0x32, 0x7c,
+               0xc1, 0x8f, 0x5d, 0x13, 0xe4, 0xaa, 0x78, 0x36,
+               0x1f, 0x51, 0x83, 0xcd, 0x3a, 0x74, 0xa6, 0xe8,
+               0x55, 0x1b, 0xc9, 0x87, 0x70, 0x3e, 0xec, 0xa2,
+       },
+       {
+               0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0,
+               0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2,
+               0x84, 0xcb, 0x1a, 0x55, 0xa5, 0xea, 0x3b, 0x74,
+               0xc6, 0x89, 0x58, 0x17, 0xe7, 0xa8, 0x79, 0x36,
+               0x15, 0x5a, 0x8b, 0xc4, 0x34, 0x7b, 0xaa, 0xe5,
+               0x57, 0x18, 0xc9, 0x86, 0x76, 0x39, 0xe8, 0xa7,
+               0x91, 0xde, 0x0f, 0x40, 0xb0, 0xff, 0x2e, 0x61,
+               0xd3, 0x9c, 0x4d, 0x02, 0xf2, 0xbd, 0x6c, 0x23,
+               0x2a, 0x65, 0xb4, 0xfb, 0x0b, 0x44, 0x95, 0xda,
+               0x68, 0x27, 0xf6, 0xb9, 0x49, 0x06, 0xd7, 0x98,
+               0xae, 0xe1, 0x30, 0x7f, 0x8f, 0xc0, 0x11, 0x5e,
+               0xec, 0xa3, 0x72, 0x3d, 0xcd, 0x82, 0x53, 0x1c,
+               0x3f, 0x70, 0xa1, 0xee, 0x1e, 0x51, 0x80, 0xcf,
+               0x7d, 0x32, 0xe3, 0xac, 0x5c, 0x13, 0xc2, 0x8d,
+               0xbb, 0xf4, 0x25, 0x6a, 0x9a, 0xd5, 0x04, 0x4b,
+               0xf9, 0xb6, 0x67, 0x28, 0xd8, 0x97, 0x46, 0x09,
+               0x54, 0x1b, 0xca, 0x85, 0x75, 0x3a, 0xeb, 0xa4,
+               0x16, 0x59, 0x88, 0xc7, 0x37, 0x78, 0xa9, 0xe6,
+               0xd0, 0x9f, 0x4e, 0x01, 0xf1, 0xbe, 0x6f, 0x20,
+               0x92, 0xdd, 0x0c, 0x43, 0xb3, 0xfc, 0x2d, 0x62,
+               0x41, 0x0e, 0xdf, 0x90, 0x60, 0x2f, 0xfe, 0xb1,
+               0x03, 0x4c, 0x9d, 0xd2, 0x22, 0x6d, 0xbc, 0xf3,
+               0xc5, 0x8a, 0x5b, 0x14, 0xe4, 0xab, 0x7a, 0x35,
+               0x87, 0xc8, 0x19, 0x56, 0xa6, 0xe9, 0x38, 0x77,
+               0x7e, 0x31, 0xe0, 0xaf, 0x5f, 0x10, 0xc1, 0x8e,
+               0x3c, 0x73, 0xa2, 0xed, 0x1d, 0x52, 0x83, 0xcc,
+               0xfa, 0xb5, 0x64, 0x2b, 0xdb, 0x94, 0x45, 0x0a,
+               0xb8, 0xf7, 0x26, 0x69, 0x99, 0xd6, 0x07, 0x48,
+               0x6b, 0x24, 0xf5, 0xba, 0x4a, 0x05, 0xd4, 0x9b,
+               0x29, 0x66, 0xb7, 0xf8, 0x08, 0x47, 0x96, 0xd9,
+               0xef, 0xa0, 0x71, 0x3e, 0xce, 0x81, 0x50, 0x1f,
+               0xad, 0xe2, 0x33, 0x7c, 0x8c, 0xc3, 0x12, 0x5d,
+       },
+       {
+               0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad,
+               0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17,
+               0x69, 0x39, 0xc9, 0x99, 0x34, 0x64, 0x94, 0xc4,
+               0xd3, 0x83, 0x73, 0x23, 0x8e, 0xde, 0x2e, 0x7e,
+               0xd2, 0x82, 0x72, 0x22, 0x8f, 0xdf, 0x2f, 0x7f,
+               0x68, 0x38, 0xc8, 0x98, 0x35, 0x65, 0x95, 0xc5,
+               0xbb, 0xeb, 0x1b, 0x4b, 0xe6, 0xb6, 0x46, 0x16,
+               0x01, 0x51, 0xa1, 0xf1, 0x5c, 0x0c, 0xfc, 0xac,
+               0xb9, 0xe9, 0x19, 0x49, 0xe4, 0xb4, 0x44, 0x14,
+               0x03, 0x53, 0xa3, 0xf3, 0x5e, 0x0e, 0xfe, 0xae,
+               0xd0, 0x80, 0x70, 0x20, 0x8d, 0xdd, 0x2d, 0x7d,
+               0x6a, 0x3a, 0xca, 0x9a, 0x37, 0x67, 0x97, 0xc7,
+               0x6b, 0x3b, 0xcb, 0x9b, 0x36, 0x66, 0x96, 0xc6,
+               0xd1, 0x81, 0x71, 0x21, 0x8c, 0xdc, 0x2c, 0x7c,
+               0x02, 0x52, 0xa2, 0xf2, 0x5f, 0x0f, 0xff, 0xaf,
+               0xb8, 0xe8, 0x18, 0x48, 0xe5, 0xb5, 0x45, 0x15,
+               0x6f, 0x3f, 0xcf, 0x9f, 0x32, 0x62, 0x92, 0xc2,
+               0xd5, 0x85, 0x75, 0x25, 0x88, 0xd8, 0x28, 0x78,
+               0x06, 0x56, 0xa6, 0xf6, 0x5b, 0x0b, 0xfb, 0xab,
+               0xbc, 0xec, 0x1c, 0x4c, 0xe1, 0xb1, 0x41, 0x11,
+               0xbd, 0xed, 0x1d, 0x4d, 0xe0, 0xb0, 0x40, 0x10,
+               0x07, 0x57, 0xa7, 0xf7, 0x5a, 0x0a, 0xfa, 0xaa,
+               0xd4, 0x84, 0x74, 0x24, 0x89, 0xd9, 0x29, 0x79,
+               0x6e, 0x3e, 0xce, 0x9e, 0x33, 0x63, 0x93, 0xc3,
+               0xd6, 0x86, 0x76, 0x26, 0x8b, 0xdb, 0x2b, 0x7b,
+               0x6c, 0x3c, 0xcc, 0x9c, 0x31, 0x61, 0x91, 0xc1,
+               0xbf, 0xef, 0x1f, 0x4f, 0xe2, 0xb2, 0x42, 0x12,
+               0x05, 0x55, 0xa5, 0xf5, 0x58, 0x08, 0xf8, 0xa8,
+               0x04, 0x54, 0xa4, 0xf4, 0x59, 0x09, 0xf9, 0xa9,
+               0xbe, 0xee, 0x1e, 0x4e, 0xe3, 0xb3, 0x43, 0x13,
+               0x6d, 0x3d, 0xcd, 0x9d, 0x30, 0x60, 0x90, 0xc0,
+               0xd7, 0x87, 0x77, 0x27, 0x8a, 0xda, 0x2a, 0x7a,
+       },
+       {
+               0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa,
+               0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18,
+               0x79, 0x28, 0xdb, 0x8a, 0x20, 0x71, 0x82, 0xd3,
+               0xcb, 0x9a, 0x69, 0x38, 0x92, 0xc3, 0x30, 0x61,
+               0xf2, 0xa3, 0x50, 0x01, 0xab, 0xfa, 0x09, 0x58,
+               0x40, 0x11, 0xe2, 0xb3, 0x19, 0x48, 0xbb, 0xea,
+               0x8b, 0xda, 0x29, 0x78, 0xd2, 0x83, 0x70, 0x21,
+               0x39, 0x68, 0x9b, 0xca, 0x60, 0x31, 0xc2, 0x93,
+               0xf9, 0xa8, 0x5b, 0x0a, 0xa0, 0xf1, 0x02, 0x53,
+               0x4b, 0x1a, 0xe9, 0xb8, 0x12, 0x43, 0xb0, 0xe1,
+               0x80, 0xd1, 0x22, 0x73, 0xd9, 0x88, 0x7b, 0x2a,
+               0x32, 0x63, 0x90, 0xc1, 0x6b, 0x3a, 0xc9, 0x98,
+               0x0b, 0x5a, 0xa9, 0xf8, 0x52, 0x03, 0xf0, 0xa1,
+               0xb9, 0xe8, 0x1b, 0x4a, 0xe0, 0xb1, 0x42, 0x13,
+               0x72, 0x23, 0xd0, 0x81, 0x2b, 0x7a, 0x89, 0xd8,
+               0xc0, 0x91, 0x62, 0x33, 0x99, 0xc8, 0x3b, 0x6a,
+               0xef, 0xbe, 0x4d, 0x1c, 0xb6, 0xe7, 0x14, 0x45,
+               0x5d, 0x0c, 0xff, 0xae, 0x04, 0x55, 0xa6, 0xf7,
+               0x96, 0xc7, 0x34, 0x65, 0xcf, 0x9e, 0x6d, 0x3c,
+               0x24, 0x75, 0x86, 0xd7, 0x7d, 0x2c, 0xdf, 0x8e,
+               0x1d, 0x4c, 0xbf, 0xee, 0x44, 0x15, 0xe6, 0xb7,
+               0xaf, 0xfe, 0x0d, 0x5c, 0xf6, 0xa7, 0x54, 0x05,
+               0x64, 0x35, 0xc6, 0x97, 0x3d, 0x6c, 0x9f, 0xce,
+               0xd6, 0x87, 0x74, 0x25, 0x8f, 0xde, 0x2d, 0x7c,
+               0x16, 0x47, 0xb4, 0xe5, 0x4f, 0x1e, 0xed, 0xbc,
+               0xa4, 0xf5, 0x06, 0x57, 0xfd, 0xac, 0x5f, 0x0e,
+               0x6f, 0x3e, 0xcd, 0x9c, 0x36, 0x67, 0x94, 0xc5,
+               0xdd, 0x8c, 0x7f, 0x2e, 0x84, 0xd5, 0x26, 0x77,
+               0xe4, 0xb5, 0x46, 0x17, 0xbd, 0xec, 0x1f, 0x4e,
+               0x56, 0x07, 0xf4, 0xa5, 0x0f, 0x5e, 0xad, 0xfc,
+               0x9d, 0xcc, 0x3f, 0x6e, 0xc4, 0x95, 0x66, 0x37,
+               0x2f, 0x7e, 0x8d, 0xdc, 0x76, 0x27, 0xd4, 0x85,
+       },
+       {
+               0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3,
+               0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09,
+               0x49, 0x1b, 0xed, 0xbf, 0x1c, 0x4e, 0xb8, 0xea,
+               0xe3, 0xb1, 0x47, 0x15, 0xb6, 0xe4, 0x12, 0x40,
+               0x92, 0xc0, 0x36, 0x64, 0xc7, 0x95, 0x63, 0x31,
+               0x38, 0x6a, 0x9c, 0xce, 0x6d, 0x3f, 0xc9, 0x9b,
+               0xdb, 0x89, 0x7f, 0x2d, 0x8e, 0xdc, 0x2a, 0x78,
+               0x71, 0x23, 0xd5, 0x87, 0x24, 0x76, 0x80, 0xd2,
+               0x39, 0x6b, 0x9d, 0xcf, 0x6c, 0x3e, 0xc8, 0x9a,
+               0x93, 0xc1, 0x37, 0x65, 0xc6, 0x94, 0x62, 0x30,
+               0x70, 0x22, 0xd4, 0x86, 0x25, 0x77, 0x81, 0xd3,
+               0xda, 0x88, 0x7e, 0x2c, 0x8f, 0xdd, 0x2b, 0x79,
+               0xab, 0xf9, 0x0f, 0x5d, 0xfe, 0xac, 0x5a, 0x08,
+               0x01, 0x53, 0xa5, 0xf7, 0x54, 0x06, 0xf0, 0xa2,
+               0xe2, 0xb0, 0x46, 0x14, 0xb7, 0xe5, 0x13, 0x41,
+               0x48, 0x1a, 0xec, 0xbe, 0x1d, 0x4f, 0xb9, 0xeb,
+               0x72, 0x20, 0xd6, 0x84, 0x27, 0x75, 0x83, 0xd1,
+               0xd8, 0x8a, 0x7c, 0x2e, 0x8d, 0xdf, 0x29, 0x7b,
+               0x3b, 0x69, 0x9f, 0xcd, 0x6e, 0x3c, 0xca, 0x98,
+               0x91, 0xc3, 0x35, 0x67, 0xc4, 0x96, 0x60, 0x32,
+               0xe0, 0xb2, 0x44, 0x16, 0xb5, 0xe7, 0x11, 0x43,
+               0x4a, 0x18, 0xee, 0xbc, 0x1f, 0x4d, 0xbb, 0xe9,
+               0xa9, 0xfb, 0x0d, 0x5f, 0xfc, 0xae, 0x58, 0x0a,
+               0x03, 0x51, 0xa7, 0xf5, 0x56, 0x04, 0xf2, 0xa0,
+               0x4b, 0x19, 0xef, 0xbd, 0x1e, 0x4c, 0xba, 0xe8,
+               0xe1, 0xb3, 0x45, 0x17, 0xb4, 0xe6, 0x10, 0x42,
+               0x02, 0x50, 0xa6, 0xf4, 0x57, 0x05, 0xf3, 0xa1,
+               0xa8, 0xfa, 0x0c, 0x5e, 0xfd, 0xaf, 0x59, 0x0b,
+               0xd9, 0x8b, 0x7d, 0x2f, 0x8c, 0xde, 0x28, 0x7a,
+               0x73, 0x21, 0xd7, 0x85, 0x26, 0x74, 0x82, 0xd0,
+               0x90, 0xc2, 0x34, 0x66, 0xc5, 0x97, 0x61, 0x33,
+               0x3a, 0x68, 0x9e, 0xcc, 0x6f, 0x3d, 0xcb, 0x99,
+       },
+       {
+               0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+               0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06,
+               0x59, 0x0a, 0xff, 0xac, 0x08, 0x5b, 0xae, 0xfd,
+               0xfb, 0xa8, 0x5d, 0x0e, 0xaa, 0xf9, 0x0c, 0x5f,
+               0xb2, 0xe1, 0x14, 0x47, 0xe3, 0xb0, 0x45, 0x16,
+               0x10, 0x43, 0xb6, 0xe5, 0x41, 0x12, 0xe7, 0xb4,
+               0xeb, 0xb8, 0x4d, 0x1e, 0xba, 0xe9, 0x1c, 0x4f,
+               0x49, 0x1a, 0xef, 0xbc, 0x18, 0x4b, 0xbe, 0xed,
+               0x79, 0x2a, 0xdf, 0x8c, 0x28, 0x7b, 0x8e, 0xdd,
+               0xdb, 0x88, 0x7d, 0x2e, 0x8a, 0xd9, 0x2c, 0x7f,
+               0x20, 0x73, 0x86, 0xd5, 0x71, 0x22, 0xd7, 0x84,
+               0x82, 0xd1, 0x24, 0x77, 0xd3, 0x80, 0x75, 0x26,
+               0xcb, 0x98, 0x6d, 0x3e, 0x9a, 0xc9, 0x3c, 0x6f,
+               0x69, 0x3a, 0xcf, 0x9c, 0x38, 0x6b, 0x9e, 0xcd,
+               0x92, 0xc1, 0x34, 0x67, 0xc3, 0x90, 0x65, 0x36,
+               0x30, 0x63, 0x96, 0xc5, 0x61, 0x32, 0xc7, 0x94,
+               0xf2, 0xa1, 0x54, 0x07, 0xa3, 0xf0, 0x05, 0x56,
+               0x50, 0x03, 0xf6, 0xa5, 0x01, 0x52, 0xa7, 0xf4,
+               0xab, 0xf8, 0x0d, 0x5e, 0xfa, 0xa9, 0x5c, 0x0f,
+               0x09, 0x5a, 0xaf, 0xfc, 0x58, 0x0b, 0xfe, 0xad,
+               0x40, 0x13, 0xe6, 0xb5, 0x11, 0x42, 0xb7, 0xe4,
+               0xe2, 0xb1, 0x44, 0x17, 0xb3, 0xe0, 0x15, 0x46,
+               0x19, 0x4a, 0xbf, 0xec, 0x48, 0x1b, 0xee, 0xbd,
+               0xbb, 0xe8, 0x1d, 0x4e, 0xea, 0xb9, 0x4c, 0x1f,
+               0x8b, 0xd8, 0x2d, 0x7e, 0xda, 0x89, 0x7c, 0x2f,
+               0x29, 0x7a, 0x8f, 0xdc, 0x78, 0x2b, 0xde, 0x8d,
+               0xd2, 0x81, 0x74, 0x27, 0x83, 0xd0, 0x25, 0x76,
+               0x70, 0x23, 0xd6, 0x85, 0x21, 0x72, 0x87, 0xd4,
+               0x39, 0x6a, 0x9f, 0xcc, 0x68, 0x3b, 0xce, 0x9d,
+               0x9b, 0xc8, 0x3d, 0x6e, 0xca, 0x99, 0x6c, 0x3f,
+               0x60, 0x33, 0xc6, 0x95, 0x31, 0x62, 0x97, 0xc4,
+               0xc2, 0x91, 0x64, 0x37, 0x93, 0xc0, 0x35, 0x66,
+       },
+       {
+               0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1,
+               0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b,
+               0x29, 0x7d, 0x81, 0xd5, 0x64, 0x30, 0xcc, 0x98,
+               0xb3, 0xe7, 0x1b, 0x4f, 0xfe, 0xaa, 0x56, 0x02,
+               0x52, 0x06, 0xfa, 0xae, 0x1f, 0x4b, 0xb7, 0xe3,
+               0xc8, 0x9c, 0x60, 0x34, 0x85, 0xd1, 0x2d, 0x79,
+               0x7b, 0x2f, 0xd3, 0x87, 0x36, 0x62, 0x9e, 0xca,
+               0xe1, 0xb5, 0x49, 0x1d, 0xac, 0xf8, 0x04, 0x50,
+               0xa4, 0xf0, 0x0c, 0x58, 0xe9, 0xbd, 0x41, 0x15,
+               0x3e, 0x6a, 0x96, 0xc2, 0x73, 0x27, 0xdb, 0x8f,
+               0x8d, 0xd9, 0x25, 0x71, 0xc0, 0x94, 0x68, 0x3c,
+               0x17, 0x43, 0xbf, 0xeb, 0x5a, 0x0e, 0xf2, 0xa6,
+               0xf6, 0xa2, 0x5e, 0x0a, 0xbb, 0xef, 0x13, 0x47,
+               0x6c, 0x38, 0xc4, 0x90, 0x21, 0x75, 0x89, 0xdd,
+               0xdf, 0x8b, 0x77, 0x23, 0x92, 0xc6, 0x3a, 0x6e,
+               0x45, 0x11, 0xed, 0xb9, 0x08, 0x5c, 0xa0, 0xf4,
+               0x55, 0x01, 0xfd, 0xa9, 0x18, 0x4c, 0xb0, 0xe4,
+               0xcf, 0x9b, 0x67, 0x33, 0x82, 0xd6, 0x2a, 0x7e,
+               0x7c, 0x28, 0xd4, 0x80, 0x31, 0x65, 0x99, 0xcd,
+               0xe6, 0xb2, 0x4e, 0x1a, 0xab, 0xff, 0x03, 0x57,
+               0x07, 0x53, 0xaf, 0xfb, 0x4a, 0x1e, 0xe2, 0xb6,
+               0x9d, 0xc9, 0x35, 0x61, 0xd0, 0x84, 0x78, 0x2c,
+               0x2e, 0x7a, 0x86, 0xd2, 0x63, 0x37, 0xcb, 0x9f,
+               0xb4, 0xe0, 0x1c, 0x48, 0xf9, 0xad, 0x51, 0x05,
+               0xf1, 0xa5, 0x59, 0x0d, 0xbc, 0xe8, 0x14, 0x40,
+               0x6b, 0x3f, 0xc3, 0x97, 0x26, 0x72, 0x8e, 0xda,
+               0xd8, 0x8c, 0x70, 0x24, 0x95, 0xc1, 0x3d, 0x69,
+               0x42, 0x16, 0xea, 0xbe, 0x0f, 0x5b, 0xa7, 0xf3,
+               0xa3, 0xf7, 0x0b, 0x5f, 0xee, 0xba, 0x46, 0x12,
+               0x39, 0x6d, 0x91, 0xc5, 0x74, 0x20, 0xdc, 0x88,
+               0x8a, 0xde, 0x22, 0x76, 0xc7, 0x93, 0x6f, 0x3b,
+               0x10, 0x44, 0xb8, 0xec, 0x5d, 0x09, 0xf5, 0xa1,
+       },
+       {
+               0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6,
+               0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24,
+               0x39, 0x6c, 0x93, 0xc6, 0x70, 0x25, 0xda, 0x8f,
+               0xab, 0xfe, 0x01, 0x54, 0xe2, 0xb7, 0x48, 0x1d,
+               0x72, 0x27, 0xd8, 0x8d, 0x3b, 0x6e, 0x91, 0xc4,
+               0xe0, 0xb5, 0x4a, 0x1f, 0xa9, 0xfc, 0x03, 0x56,
+               0x4b, 0x1e, 0xe1, 0xb4, 0x02, 0x57, 0xa8, 0xfd,
+               0xd9, 0x8c, 0x73, 0x26, 0x90, 0xc5, 0x3a, 0x6f,
+               0xe4, 0xb1, 0x4e, 0x1b, 0xad, 0xf8, 0x07, 0x52,
+               0x76, 0x23, 0xdc, 0x89, 0x3f, 0x6a, 0x95, 0xc0,
+               0xdd, 0x88, 0x77, 0x22, 0x94, 0xc1, 0x3e, 0x6b,
+               0x4f, 0x1a, 0xe5, 0xb0, 0x06, 0x53, 0xac, 0xf9,
+               0x96, 0xc3, 0x3c, 0x69, 0xdf, 0x8a, 0x75, 0x20,
+               0x04, 0x51, 0xae, 0xfb, 0x4d, 0x18, 0xe7, 0xb2,
+               0xaf, 0xfa, 0x05, 0x50, 0xe6, 0xb3, 0x4c, 0x19,
+               0x3d, 0x68, 0x97, 0xc2, 0x74, 0x21, 0xde, 0x8b,
+               0xd5, 0x80, 0x7f, 0x2a, 0x9c, 0xc9, 0x36, 0x63,
+               0x47, 0x12, 0xed, 0xb8, 0x0e, 0x5b, 0xa4, 0xf1,
+               0xec, 0xb9, 0x46, 0x13, 0xa5, 0xf0, 0x0f, 0x5a,
+               0x7e, 0x2b, 0xd4, 0x81, 0x37, 0x62, 0x9d, 0xc8,
+               0xa7, 0xf2, 0x0d, 0x58, 0xee, 0xbb, 0x44, 0x11,
+               0x35, 0x60, 0x9f, 0xca, 0x7c, 0x29, 0xd6, 0x83,
+               0x9e, 0xcb, 0x34, 0x61, 0xd7, 0x82, 0x7d, 0x28,
+               0x0c, 0x59, 0xa6, 0xf3, 0x45, 0x10, 0xef, 0xba,
+               0x31, 0x64, 0x9b, 0xce, 0x78, 0x2d, 0xd2, 0x87,
+               0xa3, 0xf6, 0x09, 0x5c, 0xea, 0xbf, 0x40, 0x15,
+               0x08, 0x5d, 0xa2, 0xf7, 0x41, 0x14, 0xeb, 0xbe,
+               0x9a, 0xcf, 0x30, 0x65, 0xd3, 0x86, 0x79, 0x2c,
+               0x43, 0x16, 0xe9, 0xbc, 0x0a, 0x5f, 0xa0, 0xf5,
+               0xd1, 0x84, 0x7b, 0x2e, 0x98, 0xcd, 0x32, 0x67,
+               0x7a, 0x2f, 0xd0, 0x85, 0x33, 0x66, 0x99, 0xcc,
+               0xe8, 0xbd, 0x42, 0x17, 0xa1, 0xf4, 0x0b, 0x5e,
+       },
+       {
+               0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf,
+               0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35,
+               0x09, 0x5f, 0xa5, 0xf3, 0x4c, 0x1a, 0xe0, 0xb6,
+               0x83, 0xd5, 0x2f, 0x79, 0xc6, 0x90, 0x6a, 0x3c,
+               0x12, 0x44, 0xbe, 0xe8, 0x57, 0x01, 0xfb, 0xad,
+               0x98, 0xce, 0x34, 0x62, 0xdd, 0x8b, 0x71, 0x27,
+               0x1b, 0x4d, 0xb7, 0xe1, 0x5e, 0x08, 0xf2, 0xa4,
+               0x91, 0xc7, 0x3d, 0x6b, 0xd4, 0x82, 0x78, 0x2e,
+               0x24, 0x72, 0x88, 0xde, 0x61, 0x37, 0xcd, 0x9b,
+               0xae, 0xf8, 0x02, 0x54, 0xeb, 0xbd, 0x47, 0x11,
+               0x2d, 0x7b, 0x81, 0xd7, 0x68, 0x3e, 0xc4, 0x92,
+               0xa7, 0xf1, 0x0b, 0x5d, 0xe2, 0xb4, 0x4e, 0x18,
+               0x36, 0x60, 0x9a, 0xcc, 0x73, 0x25, 0xdf, 0x89,
+               0xbc, 0xea, 0x10, 0x46, 0xf9, 0xaf, 0x55, 0x03,
+               0x3f, 0x69, 0x93, 0xc5, 0x7a, 0x2c, 0xd6, 0x80,
+               0xb5, 0xe3, 0x19, 0x4f, 0xf0, 0xa6, 0x5c, 0x0a,
+               0x48, 0x1e, 0xe4, 0xb2, 0x0d, 0x5b, 0xa1, 0xf7,
+               0xc2, 0x94, 0x6e, 0x38, 0x87, 0xd1, 0x2b, 0x7d,
+               0x41, 0x17, 0xed, 0xbb, 0x04, 0x52, 0xa8, 0xfe,
+               0xcb, 0x9d, 0x67, 0x31, 0x8e, 0xd8, 0x22, 0x74,
+               0x5a, 0x0c, 0xf6, 0xa0, 0x1f, 0x49, 0xb3, 0xe5,
+               0xd0, 0x86, 0x7c, 0x2a, 0x95, 0xc3, 0x39, 0x6f,
+               0x53, 0x05, 0xff, 0xa9, 0x16, 0x40, 0xba, 0xec,
+               0xd9, 0x8f, 0x75, 0x23, 0x9c, 0xca, 0x30, 0x66,
+               0x6c, 0x3a, 0xc0, 0x96, 0x29, 0x7f, 0x85, 0xd3,
+               0xe6, 0xb0, 0x4a, 0x1c, 0xa3, 0xf5, 0x0f, 0x59,
+               0x65, 0x33, 0xc9, 0x9f, 0x20, 0x76, 0x8c, 0xda,
+               0xef, 0xb9, 0x43, 0x15, 0xaa, 0xfc, 0x06, 0x50,
+               0x7e, 0x28, 0xd2, 0x84, 0x3b, 0x6d, 0x97, 0xc1,
+               0xf4, 0xa2, 0x58, 0x0e, 0xb1, 0xe7, 0x1d, 0x4b,
+               0x77, 0x21, 0xdb, 0x8d, 0x32, 0x64, 0x9e, 0xc8,
+               0xfd, 0xab, 0x51, 0x07, 0xb8, 0xee, 0x14, 0x42,
+       },
+       {
+               0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8,
+               0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a,
+               0x19, 0x4e, 0xb7, 0xe0, 0x58, 0x0f, 0xf6, 0xa1,
+               0x9b, 0xcc, 0x35, 0x62, 0xda, 0x8d, 0x74, 0x23,
+               0x32, 0x65, 0x9c, 0xcb, 0x73, 0x24, 0xdd, 0x8a,
+               0xb0, 0xe7, 0x1e, 0x49, 0xf1, 0xa6, 0x5f, 0x08,
+               0x2b, 0x7c, 0x85, 0xd2, 0x6a, 0x3d, 0xc4, 0x93,
+               0xa9, 0xfe, 0x07, 0x50, 0xe8, 0xbf, 0x46, 0x11,
+               0x64, 0x33, 0xca, 0x9d, 0x25, 0x72, 0x8b, 0xdc,
+               0xe6, 0xb1, 0x48, 0x1f, 0xa7, 0xf0, 0x09, 0x5e,
+               0x7d, 0x2a, 0xd3, 0x84, 0x3c, 0x6b, 0x92, 0xc5,
+               0xff, 0xa8, 0x51, 0x06, 0xbe, 0xe9, 0x10, 0x47,
+               0x56, 0x01, 0xf8, 0xaf, 0x17, 0x40, 0xb9, 0xee,
+               0xd4, 0x83, 0x7a, 0x2d, 0x95, 0xc2, 0x3b, 0x6c,
+               0x4f, 0x18, 0xe1, 0xb6, 0x0e, 0x59, 0xa0, 0xf7,
+               0xcd, 0x9a, 0x63, 0x34, 0x8c, 0xdb, 0x22, 0x75,
+               0xc8, 0x9f, 0x66, 0x31, 0x89, 0xde, 0x27, 0x70,
+               0x4a, 0x1d, 0xe4, 0xb3, 0x0b, 0x5c, 0xa5, 0xf2,
+               0xd1, 0x86, 0x7f, 0x28, 0x90, 0xc7, 0x3e, 0x69,
+               0x53, 0x04, 0xfd, 0xaa, 0x12, 0x45, 0xbc, 0xeb,
+               0xfa, 0xad, 0x54, 0x03, 0xbb, 0xec, 0x15, 0x42,
+               0x78, 0x2f, 0xd6, 0x81, 0x39, 0x6e, 0x97, 0xc0,
+               0xe3, 0xb4, 0x4d, 0x1a, 0xa2, 0xf5, 0x0c, 0x5b,
+               0x61, 0x36, 0xcf, 0x98, 0x20, 0x77, 0x8e, 0xd9,
+               0xac, 0xfb, 0x02, 0x55, 0xed, 0xba, 0x43, 0x14,
+               0x2e, 0x79, 0x80, 0xd7, 0x6f, 0x38, 0xc1, 0x96,
+               0xb5, 0xe2, 0x1b, 0x4c, 0xf4, 0xa3, 0x5a, 0x0d,
+               0x37, 0x60, 0x99, 0xce, 0x76, 0x21, 0xd8, 0x8f,
+               0x9e, 0xc9, 0x30, 0x67, 0xdf, 0x88, 0x71, 0x26,
+               0x1c, 0x4b, 0xb2, 0xe5, 0x5d, 0x0a, 0xf3, 0xa4,
+               0x87, 0xd0, 0x29, 0x7e, 0xc6, 0x91, 0x68, 0x3f,
+               0x05, 0x52, 0xab, 0xfc, 0x44, 0x13, 0xea, 0xbd,
+       },
+       {
+               0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95,
+               0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f,
+               0xe9, 0xb1, 0x59, 0x01, 0x94, 0xcc, 0x24, 0x7c,
+               0x13, 0x4b, 0xa3, 0xfb, 0x6e, 0x36, 0xde, 0x86,
+               0xcf, 0x97, 0x7f, 0x27, 0xb2, 0xea, 0x02, 0x5a,
+               0x35, 0x6d, 0x85, 0xdd, 0x48, 0x10, 0xf8, 0xa0,
+               0x26, 0x7e, 0x96, 0xce, 0x5b, 0x03, 0xeb, 0xb3,
+               0xdc, 0x84, 0x6c, 0x34, 0xa1, 0xf9, 0x11, 0x49,
+               0x83, 0xdb, 0x33, 0x6b, 0xfe, 0xa6, 0x4e, 0x16,
+               0x79, 0x21, 0xc9, 0x91, 0x04, 0x5c, 0xb4, 0xec,
+               0x6a, 0x32, 0xda, 0x82, 0x17, 0x4f, 0xa7, 0xff,
+               0x90, 0xc8, 0x20, 0x78, 0xed, 0xb5, 0x5d, 0x05,
+               0x4c, 0x14, 0xfc, 0xa4, 0x31, 0x69, 0x81, 0xd9,
+               0xb6, 0xee, 0x06, 0x5e, 0xcb, 0x93, 0x7b, 0x23,
+               0xa5, 0xfd, 0x15, 0x4d, 0xd8, 0x80, 0x68, 0x30,
+               0x5f, 0x07, 0xef, 0xb7, 0x22, 0x7a, 0x92, 0xca,
+               0x1b, 0x43, 0xab, 0xf3, 0x66, 0x3e, 0xd6, 0x8e,
+               0xe1, 0xb9, 0x51, 0x09, 0x9c, 0xc4, 0x2c, 0x74,
+               0xf2, 0xaa, 0x42, 0x1a, 0x8f, 0xd7, 0x3f, 0x67,
+               0x08, 0x50, 0xb8, 0xe0, 0x75, 0x2d, 0xc5, 0x9d,
+               0xd4, 0x8c, 0x64, 0x3c, 0xa9, 0xf1, 0x19, 0x41,
+               0x2e, 0x76, 0x9e, 0xc6, 0x53, 0x0b, 0xe3, 0xbb,
+               0x3d, 0x65, 0x8d, 0xd5, 0x40, 0x18, 0xf0, 0xa8,
+               0xc7, 0x9f, 0x77, 0x2f, 0xba, 0xe2, 0x0a, 0x52,
+               0x98, 0xc0, 0x28, 0x70, 0xe5, 0xbd, 0x55, 0x0d,
+               0x62, 0x3a, 0xd2, 0x8a, 0x1f, 0x47, 0xaf, 0xf7,
+               0x71, 0x29, 0xc1, 0x99, 0x0c, 0x54, 0xbc, 0xe4,
+               0x8b, 0xd3, 0x3b, 0x63, 0xf6, 0xae, 0x46, 0x1e,
+               0x57, 0x0f, 0xe7, 0xbf, 0x2a, 0x72, 0x9a, 0xc2,
+               0xad, 0xf5, 0x1d, 0x45, 0xd0, 0x88, 0x60, 0x38,
+               0xbe, 0xe6, 0x0e, 0x56, 0xc3, 0x9b, 0x73, 0x2b,
+               0x44, 0x1c, 0xf4, 0xac, 0x39, 0x61, 0x89, 0xd1,
+       },
+       {
+               0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92,
+               0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60,
+               0xf9, 0xa0, 0x4b, 0x12, 0x80, 0xd9, 0x32, 0x6b,
+               0x0b, 0x52, 0xb9, 0xe0, 0x72, 0x2b, 0xc0, 0x99,
+               0xef, 0xb6, 0x5d, 0x04, 0x96, 0xcf, 0x24, 0x7d,
+               0x1d, 0x44, 0xaf, 0xf6, 0x64, 0x3d, 0xd6, 0x8f,
+               0x16, 0x4f, 0xa4, 0xfd, 0x6f, 0x36, 0xdd, 0x84,
+               0xe4, 0xbd, 0x56, 0x0f, 0x9d, 0xc4, 0x2f, 0x76,
+               0xc3, 0x9a, 0x71, 0x28, 0xba, 0xe3, 0x08, 0x51,
+               0x31, 0x68, 0x83, 0xda, 0x48, 0x11, 0xfa, 0xa3,
+               0x3a, 0x63, 0x88, 0xd1, 0x43, 0x1a, 0xf1, 0xa8,
+               0xc8, 0x91, 0x7a, 0x23, 0xb1, 0xe8, 0x03, 0x5a,
+               0x2c, 0x75, 0x9e, 0xc7, 0x55, 0x0c, 0xe7, 0xbe,
+               0xde, 0x87, 0x6c, 0x35, 0xa7, 0xfe, 0x15, 0x4c,
+               0xd5, 0x8c, 0x67, 0x3e, 0xac, 0xf5, 0x1e, 0x47,
+               0x27, 0x7e, 0x95, 0xcc, 0x5e, 0x07, 0xec, 0xb5,
+               0x9b, 0xc2, 0x29, 0x70, 0xe2, 0xbb, 0x50, 0x09,
+               0x69, 0x30, 0xdb, 0x82, 0x10, 0x49, 0xa2, 0xfb,
+               0x62, 0x3b, 0xd0, 0x89, 0x1b, 0x42, 0xa9, 0xf0,
+               0x90, 0xc9, 0x22, 0x7b, 0xe9, 0xb0, 0x5b, 0x02,
+               0x74, 0x2d, 0xc6, 0x9f, 0x0d, 0x54, 0xbf, 0xe6,
+               0x86, 0xdf, 0x34, 0x6d, 0xff, 0xa6, 0x4d, 0x14,
+               0x8d, 0xd4, 0x3f, 0x66, 0xf4, 0xad, 0x46, 0x1f,
+               0x7f, 0x26, 0xcd, 0x94, 0x06, 0x5f, 0xb4, 0xed,
+               0x58, 0x01, 0xea, 0xb3, 0x21, 0x78, 0x93, 0xca,
+               0xaa, 0xf3, 0x18, 0x41, 0xd3, 0x8a, 0x61, 0x38,
+               0xa1, 0xf8, 0x13, 0x4a, 0xd8, 0x81, 0x6a, 0x33,
+               0x53, 0x0a, 0xe1, 0xb8, 0x2a, 0x73, 0x98, 0xc1,
+               0xb7, 0xee, 0x05, 0x5c, 0xce, 0x97, 0x7c, 0x25,
+               0x45, 0x1c, 0xf7, 0xae, 0x3c, 0x65, 0x8e, 0xd7,
+               0x4e, 0x17, 0xfc, 0xa5, 0x37, 0x6e, 0x85, 0xdc,
+               0xbc, 0xe5, 0x0e, 0x57, 0xc5, 0x9c, 0x77, 0x2e,
+       },
+       {
+               0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b,
+               0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71,
+               0xc9, 0x93, 0x7d, 0x27, 0xbc, 0xe6, 0x08, 0x52,
+               0x23, 0x79, 0x97, 0xcd, 0x56, 0x0c, 0xe2, 0xb8,
+               0x8f, 0xd5, 0x3b, 0x61, 0xfa, 0xa0, 0x4e, 0x14,
+               0x65, 0x3f, 0xd1, 0x8b, 0x10, 0x4a, 0xa4, 0xfe,
+               0x46, 0x1c, 0xf2, 0xa8, 0x33, 0x69, 0x87, 0xdd,
+               0xac, 0xf6, 0x18, 0x42, 0xd9, 0x83, 0x6d, 0x37,
+               0x03, 0x59, 0xb7, 0xed, 0x76, 0x2c, 0xc2, 0x98,
+               0xe9, 0xb3, 0x5d, 0x07, 0x9c, 0xc6, 0x28, 0x72,
+               0xca, 0x90, 0x7e, 0x24, 0xbf, 0xe5, 0x0b, 0x51,
+               0x20, 0x7a, 0x94, 0xce, 0x55, 0x0f, 0xe1, 0xbb,
+               0x8c, 0xd6, 0x38, 0x62, 0xf9, 0xa3, 0x4d, 0x17,
+               0x66, 0x3c, 0xd2, 0x88, 0x13, 0x49, 0xa7, 0xfd,
+               0x45, 0x1f, 0xf1, 0xab, 0x30, 0x6a, 0x84, 0xde,
+               0xaf, 0xf5, 0x1b, 0x41, 0xda, 0x80, 0x6e, 0x34,
+               0x06, 0x5c, 0xb2, 0xe8, 0x73, 0x29, 0xc7, 0x9d,
+               0xec, 0xb6, 0x58, 0x02, 0x99, 0xc3, 0x2d, 0x77,
+               0xcf, 0x95, 0x7b, 0x21, 0xba, 0xe0, 0x0e, 0x54,
+               0x25, 0x7f, 0x91, 0xcb, 0x50, 0x0a, 0xe4, 0xbe,
+               0x89, 0xd3, 0x3d, 0x67, 0xfc, 0xa6, 0x48, 0x12,
+               0x63, 0x39, 0xd7, 0x8d, 0x16, 0x4c, 0xa2, 0xf8,
+               0x40, 0x1a, 0xf4, 0xae, 0x35, 0x6f, 0x81, 0xdb,
+               0xaa, 0xf0, 0x1e, 0x44, 0xdf, 0x85, 0x6b, 0x31,
+               0x05, 0x5f, 0xb1, 0xeb, 0x70, 0x2a, 0xc4, 0x9e,
+               0xef, 0xb5, 0x5b, 0x01, 0x9a, 0xc0, 0x2e, 0x74,
+               0xcc, 0x96, 0x78, 0x22, 0xb9, 0xe3, 0x0d, 0x57,
+               0x26, 0x7c, 0x92, 0xc8, 0x53, 0x09, 0xe7, 0xbd,
+               0x8a, 0xd0, 0x3e, 0x64, 0xff, 0xa5, 0x4b, 0x11,
+               0x60, 0x3a, 0xd4, 0x8e, 0x15, 0x4f, 0xa1, 0xfb,
+               0x43, 0x19, 0xf7, 0xad, 0x36, 0x6c, 0x82, 0xd8,
+               0xa9, 0xf3, 0x1d, 0x47, 0xdc, 0x86, 0x68, 0x32,
+       },
+       {
+               0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c,
+               0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e,
+               0xd9, 0x82, 0x6f, 0x34, 0xa8, 0xf3, 0x1e, 0x45,
+               0x3b, 0x60, 0x8d, 0xd6, 0x4a, 0x11, 0xfc, 0xa7,
+               0xaf, 0xf4, 0x19, 0x42, 0xde, 0x85, 0x68, 0x33,
+               0x4d, 0x16, 0xfb, 0xa0, 0x3c, 0x67, 0x8a, 0xd1,
+               0x76, 0x2d, 0xc0, 0x9b, 0x07, 0x5c, 0xb1, 0xea,
+               0x94, 0xcf, 0x22, 0x79, 0xe5, 0xbe, 0x53, 0x08,
+               0x43, 0x18, 0xf5, 0xae, 0x32, 0x69, 0x84, 0xdf,
+               0xa1, 0xfa, 0x17, 0x4c, 0xd0, 0x8b, 0x66, 0x3d,
+               0x9a, 0xc1, 0x2c, 0x77, 0xeb, 0xb0, 0x5d, 0x06,
+               0x78, 0x23, 0xce, 0x95, 0x09, 0x52, 0xbf, 0xe4,
+               0xec, 0xb7, 0x5a, 0x01, 0x9d, 0xc6, 0x2b, 0x70,
+               0x0e, 0x55, 0xb8, 0xe3, 0x7f, 0x24, 0xc9, 0x92,
+               0x35, 0x6e, 0x83, 0xd8, 0x44, 0x1f, 0xf2, 0xa9,
+               0xd7, 0x8c, 0x61, 0x3a, 0xa6, 0xfd, 0x10, 0x4b,
+               0x86, 0xdd, 0x30, 0x6b, 0xf7, 0xac, 0x41, 0x1a,
+               0x64, 0x3f, 0xd2, 0x89, 0x15, 0x4e, 0xa3, 0xf8,
+               0x5f, 0x04, 0xe9, 0xb2, 0x2e, 0x75, 0x98, 0xc3,
+               0xbd, 0xe6, 0x0b, 0x50, 0xcc, 0x97, 0x7a, 0x21,
+               0x29, 0x72, 0x9f, 0xc4, 0x58, 0x03, 0xee, 0xb5,
+               0xcb, 0x90, 0x7d, 0x26, 0xba, 0xe1, 0x0c, 0x57,
+               0xf0, 0xab, 0x46, 0x1d, 0x81, 0xda, 0x37, 0x6c,
+               0x12, 0x49, 0xa4, 0xff, 0x63, 0x38, 0xd5, 0x8e,
+               0xc5, 0x9e, 0x73, 0x28, 0xb4, 0xef, 0x02, 0x59,
+               0x27, 0x7c, 0x91, 0xca, 0x56, 0x0d, 0xe0, 0xbb,
+               0x1c, 0x47, 0xaa, 0xf1, 0x6d, 0x36, 0xdb, 0x80,
+               0xfe, 0xa5, 0x48, 0x13, 0x8f, 0xd4, 0x39, 0x62,
+               0x6a, 0x31, 0xdc, 0x87, 0x1b, 0x40, 0xad, 0xf6,
+               0x88, 0xd3, 0x3e, 0x65, 0xf9, 0xa2, 0x4f, 0x14,
+               0xb3, 0xe8, 0x05, 0x5e, 0xc2, 0x99, 0x74, 0x2f,
+               0x51, 0x0a, 0xe7, 0xbc, 0x20, 0x7b, 0x96, 0xcd,
+       },
+       {
+               0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89,
+               0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53,
+               0xa9, 0xf5, 0x11, 0x4d, 0xc4, 0x98, 0x7c, 0x20,
+               0x73, 0x2f, 0xcb, 0x97, 0x1e, 0x42, 0xa6, 0xfa,
+               0x4f, 0x13, 0xf7, 0xab, 0x22, 0x7e, 0x9a, 0xc6,
+               0x95, 0xc9, 0x2d, 0x71, 0xf8, 0xa4, 0x40, 0x1c,
+               0xe6, 0xba, 0x5e, 0x02, 0x8b, 0xd7, 0x33, 0x6f,
+               0x3c, 0x60, 0x84, 0xd8, 0x51, 0x0d, 0xe9, 0xb5,
+               0x9e, 0xc2, 0x26, 0x7a, 0xf3, 0xaf, 0x4b, 0x17,
+               0x44, 0x18, 0xfc, 0xa0, 0x29, 0x75, 0x91, 0xcd,
+               0x37, 0x6b, 0x8f, 0xd3, 0x5a, 0x06, 0xe2, 0xbe,
+               0xed, 0xb1, 0x55, 0x09, 0x80, 0xdc, 0x38, 0x64,
+               0xd1, 0x8d, 0x69, 0x35, 0xbc, 0xe0, 0x04, 0x58,
+               0x0b, 0x57, 0xb3, 0xef, 0x66, 0x3a, 0xde, 0x82,
+               0x78, 0x24, 0xc0, 0x9c, 0x15, 0x49, 0xad, 0xf1,
+               0xa2, 0xfe, 0x1a, 0x46, 0xcf, 0x93, 0x77, 0x2b,
+               0x21, 0x7d, 0x99, 0xc5, 0x4c, 0x10, 0xf4, 0xa8,
+               0xfb, 0xa7, 0x43, 0x1f, 0x96, 0xca, 0x2e, 0x72,
+               0x88, 0xd4, 0x30, 0x6c, 0xe5, 0xb9, 0x5d, 0x01,
+               0x52, 0x0e, 0xea, 0xb6, 0x3f, 0x63, 0x87, 0xdb,
+               0x6e, 0x32, 0xd6, 0x8a, 0x03, 0x5f, 0xbb, 0xe7,
+               0xb4, 0xe8, 0x0c, 0x50, 0xd9, 0x85, 0x61, 0x3d,
+               0xc7, 0x9b, 0x7f, 0x23, 0xaa, 0xf6, 0x12, 0x4e,
+               0x1d, 0x41, 0xa5, 0xf9, 0x70, 0x2c, 0xc8, 0x94,
+               0xbf, 0xe3, 0x07, 0x5b, 0xd2, 0x8e, 0x6a, 0x36,
+               0x65, 0x39, 0xdd, 0x81, 0x08, 0x54, 0xb0, 0xec,
+               0x16, 0x4a, 0xae, 0xf2, 0x7b, 0x27, 0xc3, 0x9f,
+               0xcc, 0x90, 0x74, 0x28, 0xa1, 0xfd, 0x19, 0x45,
+               0xf0, 0xac, 0x48, 0x14, 0x9d, 0xc1, 0x25, 0x79,
+               0x2a, 0x76, 0x92, 0xce, 0x47, 0x1b, 0xff, 0xa3,
+               0x59, 0x05, 0xe1, 0xbd, 0x34, 0x68, 0x8c, 0xd0,
+               0x83, 0xdf, 0x3b, 0x67, 0xee, 0xb2, 0x56, 0x0a,
+       },
+       {
+               0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e,
+               0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c,
+               0xb9, 0xe4, 0x03, 0x5e, 0xd0, 0x8d, 0x6a, 0x37,
+               0x6b, 0x36, 0xd1, 0x8c, 0x02, 0x5f, 0xb8, 0xe5,
+               0x6f, 0x32, 0xd5, 0x88, 0x06, 0x5b, 0xbc, 0xe1,
+               0xbd, 0xe0, 0x07, 0x5a, 0xd4, 0x89, 0x6e, 0x33,
+               0xd6, 0x8b, 0x6c, 0x31, 0xbf, 0xe2, 0x05, 0x58,
+               0x04, 0x59, 0xbe, 0xe3, 0x6d, 0x30, 0xd7, 0x8a,
+               0xde, 0x83, 0x64, 0x39, 0xb7, 0xea, 0x0d, 0x50,
+               0x0c, 0x51, 0xb6, 0xeb, 0x65, 0x38, 0xdf, 0x82,
+               0x67, 0x3a, 0xdd, 0x80, 0x0e, 0x53, 0xb4, 0xe9,
+               0xb5, 0xe8, 0x0f, 0x52, 0xdc, 0x81, 0x66, 0x3b,
+               0xb1, 0xec, 0x0b, 0x56, 0xd8, 0x85, 0x62, 0x3f,
+               0x63, 0x3e, 0xd9, 0x84, 0x0a, 0x57, 0xb0, 0xed,
+               0x08, 0x55, 0xb2, 0xef, 0x61, 0x3c, 0xdb, 0x86,
+               0xda, 0x87, 0x60, 0x3d, 0xb3, 0xee, 0x09, 0x54,
+               0xa1, 0xfc, 0x1b, 0x46, 0xc8, 0x95, 0x72, 0x2f,
+               0x73, 0x2e, 0xc9, 0x94, 0x1a, 0x47, 0xa0, 0xfd,
+               0x18, 0x45, 0xa2, 0xff, 0x71, 0x2c, 0xcb, 0x96,
+               0xca, 0x97, 0x70, 0x2d, 0xa3, 0xfe, 0x19, 0x44,
+               0xce, 0x93, 0x74, 0x29, 0xa7, 0xfa, 0x1d, 0x40,
+               0x1c, 0x41, 0xa6, 0xfb, 0x75, 0x28, 0xcf, 0x92,
+               0x77, 0x2a, 0xcd, 0x90, 0x1e, 0x43, 0xa4, 0xf9,
+               0xa5, 0xf8, 0x1f, 0x42, 0xcc, 0x91, 0x76, 0x2b,
+               0x7f, 0x22, 0xc5, 0x98, 0x16, 0x4b, 0xac, 0xf1,
+               0xad, 0xf0, 0x17, 0x4a, 0xc4, 0x99, 0x7e, 0x23,
+               0xc6, 0x9b, 0x7c, 0x21, 0xaf, 0xf2, 0x15, 0x48,
+               0x14, 0x49, 0xae, 0xf3, 0x7d, 0x20, 0xc7, 0x9a,
+               0x10, 0x4d, 0xaa, 0xf7, 0x79, 0x24, 0xc3, 0x9e,
+               0xc2, 0x9f, 0x78, 0x25, 0xab, 0xf6, 0x11, 0x4c,
+               0xa9, 0xf4, 0x13, 0x4e, 0xc0, 0x9d, 0x7a, 0x27,
+               0x7b, 0x26, 0xc1, 0x9c, 0x12, 0x4f, 0xa8, 0xf5,
+       },
+       {
+               0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87,
+               0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d,
+               0x89, 0xd7, 0x35, 0x6b, 0xec, 0xb2, 0x50, 0x0e,
+               0x43, 0x1d, 0xff, 0xa1, 0x26, 0x78, 0x9a, 0xc4,
+               0x0f, 0x51, 0xb3, 0xed, 0x6a, 0x34, 0xd6, 0x88,
+               0xc5, 0x9b, 0x79, 0x27, 0xa0, 0xfe, 0x1c, 0x42,
+               0x86, 0xd8, 0x3a, 0x64, 0xe3, 0xbd, 0x5f, 0x01,
+               0x4c, 0x12, 0xf0, 0xae, 0x29, 0x77, 0x95, 0xcb,
+               0x1e, 0x40, 0xa2, 0xfc, 0x7b, 0x25, 0xc7, 0x99,
+               0xd4, 0x8a, 0x68, 0x36, 0xb1, 0xef, 0x0d, 0x53,
+               0x97, 0xc9, 0x2b, 0x75, 0xf2, 0xac, 0x4e, 0x10,
+               0x5d, 0x03, 0xe1, 0xbf, 0x38, 0x66, 0x84, 0xda,
+               0x11, 0x4f, 0xad, 0xf3, 0x74, 0x2a, 0xc8, 0x96,
+               0xdb, 0x85, 0x67, 0x39, 0xbe, 0xe0, 0x02, 0x5c,
+               0x98, 0xc6, 0x24, 0x7a, 0xfd, 0xa3, 0x41, 0x1f,
+               0x52, 0x0c, 0xee, 0xb0, 0x37, 0x69, 0x8b, 0xd5,
+               0x3c, 0x62, 0x80, 0xde, 0x59, 0x07, 0xe5, 0xbb,
+               0xf6, 0xa8, 0x4a, 0x14, 0x93, 0xcd, 0x2f, 0x71,
+               0xb5, 0xeb, 0x09, 0x57, 0xd0, 0x8e, 0x6c, 0x32,
+               0x7f, 0x21, 0xc3, 0x9d, 0x1a, 0x44, 0xa6, 0xf8,
+               0x33, 0x6d, 0x8f, 0xd1, 0x56, 0x08, 0xea, 0xb4,
+               0xf9, 0xa7, 0x45, 0x1b, 0x9c, 0xc2, 0x20, 0x7e,
+               0xba, 0xe4, 0x06, 0x58, 0xdf, 0x81, 0x63, 0x3d,
+               0x70, 0x2e, 0xcc, 0x92, 0x15, 0x4b, 0xa9, 0xf7,
+               0x22, 0x7c, 0x9e, 0xc0, 0x47, 0x19, 0xfb, 0xa5,
+               0xe8, 0xb6, 0x54, 0x0a, 0x8d, 0xd3, 0x31, 0x6f,
+               0xab, 0xf5, 0x17, 0x49, 0xce, 0x90, 0x72, 0x2c,
+               0x61, 0x3f, 0xdd, 0x83, 0x04, 0x5a, 0xb8, 0xe6,
+               0x2d, 0x73, 0x91, 0xcf, 0x48, 0x16, 0xf4, 0xaa,
+               0xe7, 0xb9, 0x5b, 0x05, 0x82, 0xdc, 0x3e, 0x60,
+               0xa4, 0xfa, 0x18, 0x46, 0xc1, 0x9f, 0x7d, 0x23,
+               0x6e, 0x30, 0xd2, 0x8c, 0x0b, 0x55, 0xb7, 0xe9,
+       },
+       {
+               0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80,
+               0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42,
+               0x99, 0xc6, 0x27, 0x78, 0xf8, 0xa7, 0x46, 0x19,
+               0x5b, 0x04, 0xe5, 0xba, 0x3a, 0x65, 0x84, 0xdb,
+               0x2f, 0x70, 0x91, 0xce, 0x4e, 0x11, 0xf0, 0xaf,
+               0xed, 0xb2, 0x53, 0x0c, 0x8c, 0xd3, 0x32, 0x6d,
+               0xb6, 0xe9, 0x08, 0x57, 0xd7, 0x88, 0x69, 0x36,
+               0x74, 0x2b, 0xca, 0x95, 0x15, 0x4a, 0xab, 0xf4,
+               0x5e, 0x01, 0xe0, 0xbf, 0x3f, 0x60, 0x81, 0xde,
+               0x9c, 0xc3, 0x22, 0x7d, 0xfd, 0xa2, 0x43, 0x1c,
+               0xc7, 0x98, 0x79, 0x26, 0xa6, 0xf9, 0x18, 0x47,
+               0x05, 0x5a, 0xbb, 0xe4, 0x64, 0x3b, 0xda, 0x85,
+               0x71, 0x2e, 0xcf, 0x90, 0x10, 0x4f, 0xae, 0xf1,
+               0xb3, 0xec, 0x0d, 0x52, 0xd2, 0x8d, 0x6c, 0x33,
+               0xe8, 0xb7, 0x56, 0x09, 0x89, 0xd6, 0x37, 0x68,
+               0x2a, 0x75, 0x94, 0xcb, 0x4b, 0x14, 0xf5, 0xaa,
+               0xbc, 0xe3, 0x02, 0x5d, 0xdd, 0x82, 0x63, 0x3c,
+               0x7e, 0x21, 0xc0, 0x9f, 0x1f, 0x40, 0xa1, 0xfe,
+               0x25, 0x7a, 0x9b, 0xc4, 0x44, 0x1b, 0xfa, 0xa5,
+               0xe7, 0xb8, 0x59, 0x06, 0x86, 0xd9, 0x38, 0x67,
+               0x93, 0xcc, 0x2d, 0x72, 0xf2, 0xad, 0x4c, 0x13,
+               0x51, 0x0e, 0xef, 0xb0, 0x30, 0x6f, 0x8e, 0xd1,
+               0x0a, 0x55, 0xb4, 0xeb, 0x6b, 0x34, 0xd5, 0x8a,
+               0xc8, 0x97, 0x76, 0x29, 0xa9, 0xf6, 0x17, 0x48,
+               0xe2, 0xbd, 0x5c, 0x03, 0x83, 0xdc, 0x3d, 0x62,
+               0x20, 0x7f, 0x9e, 0xc1, 0x41, 0x1e, 0xff, 0xa0,
+               0x7b, 0x24, 0xc5, 0x9a, 0x1a, 0x45, 0xa4, 0xfb,
+               0xb9, 0xe6, 0x07, 0x58, 0xd8, 0x87, 0x66, 0x39,
+               0xcd, 0x92, 0x73, 0x2c, 0xac, 0xf3, 0x12, 0x4d,
+               0x0f, 0x50, 0xb1, 0xee, 0x6e, 0x31, 0xd0, 0x8f,
+               0x54, 0x0b, 0xea, 0xb5, 0x35, 0x6a, 0x8b, 0xd4,
+               0x96, 0xc9, 0x28, 0x77, 0xf7, 0xa8, 0x49, 0x16,
+       },
+       {
+               0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d,
+               0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a,
+               0x4e, 0x2e, 0x8e, 0xee, 0xd3, 0xb3, 0x13, 0x73,
+               0x69, 0x09, 0xa9, 0xc9, 0xf4, 0x94, 0x34, 0x54,
+               0x9c, 0xfc, 0x5c, 0x3c, 0x01, 0x61, 0xc1, 0xa1,
+               0xbb, 0xdb, 0x7b, 0x1b, 0x26, 0x46, 0xe6, 0x86,
+               0xd2, 0xb2, 0x12, 0x72, 0x4f, 0x2f, 0x8f, 0xef,
+               0xf5, 0x95, 0x35, 0x55, 0x68, 0x08, 0xa8, 0xc8,
+               0x25, 0x45, 0xe5, 0x85, 0xb8, 0xd8, 0x78, 0x18,
+               0x02, 0x62, 0xc2, 0xa2, 0x9f, 0xff, 0x5f, 0x3f,
+               0x6b, 0x0b, 0xab, 0xcb, 0xf6, 0x96, 0x36, 0x56,
+               0x4c, 0x2c, 0x8c, 0xec, 0xd1, 0xb1, 0x11, 0x71,
+               0xb9, 0xd9, 0x79, 0x19, 0x24, 0x44, 0xe4, 0x84,
+               0x9e, 0xfe, 0x5e, 0x3e, 0x03, 0x63, 0xc3, 0xa3,
+               0xf7, 0x97, 0x37, 0x57, 0x6a, 0x0a, 0xaa, 0xca,
+               0xd0, 0xb0, 0x10, 0x70, 0x4d, 0x2d, 0x8d, 0xed,
+               0x4a, 0x2a, 0x8a, 0xea, 0xd7, 0xb7, 0x17, 0x77,
+               0x6d, 0x0d, 0xad, 0xcd, 0xf0, 0x90, 0x30, 0x50,
+               0x04, 0x64, 0xc4, 0xa4, 0x99, 0xf9, 0x59, 0x39,
+               0x23, 0x43, 0xe3, 0x83, 0xbe, 0xde, 0x7e, 0x1e,
+               0xd6, 0xb6, 0x16, 0x76, 0x4b, 0x2b, 0x8b, 0xeb,
+               0xf1, 0x91, 0x31, 0x51, 0x6c, 0x0c, 0xac, 0xcc,
+               0x98, 0xf8, 0x58, 0x38, 0x05, 0x65, 0xc5, 0xa5,
+               0xbf, 0xdf, 0x7f, 0x1f, 0x22, 0x42, 0xe2, 0x82,
+               0x6f, 0x0f, 0xaf, 0xcf, 0xf2, 0x92, 0x32, 0x52,
+               0x48, 0x28, 0x88, 0xe8, 0xd5, 0xb5, 0x15, 0x75,
+               0x21, 0x41, 0xe1, 0x81, 0xbc, 0xdc, 0x7c, 0x1c,
+               0x06, 0x66, 0xc6, 0xa6, 0x9b, 0xfb, 0x5b, 0x3b,
+               0xf3, 0x93, 0x33, 0x53, 0x6e, 0x0e, 0xae, 0xce,
+               0xd4, 0xb4, 0x14, 0x74, 0x49, 0x29, 0x89, 0xe9,
+               0xbd, 0xdd, 0x7d, 0x1d, 0x20, 0x40, 0xe0, 0x80,
+               0x9a, 0xfa, 0x5a, 0x3a, 0x07, 0x67, 0xc7, 0xa7,
+       },
+       {
+               0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a,
+               0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15,
+               0x5e, 0x3f, 0x9c, 0xfd, 0xc7, 0xa6, 0x05, 0x64,
+               0x71, 0x10, 0xb3, 0xd2, 0xe8, 0x89, 0x2a, 0x4b,
+               0xbc, 0xdd, 0x7e, 0x1f, 0x25, 0x44, 0xe7, 0x86,
+               0x93, 0xf2, 0x51, 0x30, 0x0a, 0x6b, 0xc8, 0xa9,
+               0xe2, 0x83, 0x20, 0x41, 0x7b, 0x1a, 0xb9, 0xd8,
+               0xcd, 0xac, 0x0f, 0x6e, 0x54, 0x35, 0x96, 0xf7,
+               0x65, 0x04, 0xa7, 0xc6, 0xfc, 0x9d, 0x3e, 0x5f,
+               0x4a, 0x2b, 0x88, 0xe9, 0xd3, 0xb2, 0x11, 0x70,
+               0x3b, 0x5a, 0xf9, 0x98, 0xa2, 0xc3, 0x60, 0x01,
+               0x14, 0x75, 0xd6, 0xb7, 0x8d, 0xec, 0x4f, 0x2e,
+               0xd9, 0xb8, 0x1b, 0x7a, 0x40, 0x21, 0x82, 0xe3,
+               0xf6, 0x97, 0x34, 0x55, 0x6f, 0x0e, 0xad, 0xcc,
+               0x87, 0xe6, 0x45, 0x24, 0x1e, 0x7f, 0xdc, 0xbd,
+               0xa8, 0xc9, 0x6a, 0x0b, 0x31, 0x50, 0xf3, 0x92,
+               0xca, 0xab, 0x08, 0x69, 0x53, 0x32, 0x91, 0xf0,
+               0xe5, 0x84, 0x27, 0x46, 0x7c, 0x1d, 0xbe, 0xdf,
+               0x94, 0xf5, 0x56, 0x37, 0x0d, 0x6c, 0xcf, 0xae,
+               0xbb, 0xda, 0x79, 0x18, 0x22, 0x43, 0xe0, 0x81,
+               0x76, 0x17, 0xb4, 0xd5, 0xef, 0x8e, 0x2d, 0x4c,
+               0x59, 0x38, 0x9b, 0xfa, 0xc0, 0xa1, 0x02, 0x63,
+               0x28, 0x49, 0xea, 0x8b, 0xb1, 0xd0, 0x73, 0x12,
+               0x07, 0x66, 0xc5, 0xa4, 0x9e, 0xff, 0x5c, 0x3d,
+               0xaf, 0xce, 0x6d, 0x0c, 0x36, 0x57, 0xf4, 0x95,
+               0x80, 0xe1, 0x42, 0x23, 0x19, 0x78, 0xdb, 0xba,
+               0xf1, 0x90, 0x33, 0x52, 0x68, 0x09, 0xaa, 0xcb,
+               0xde, 0xbf, 0x1c, 0x7d, 0x47, 0x26, 0x85, 0xe4,
+               0x13, 0x72, 0xd1, 0xb0, 0x8a, 0xeb, 0x48, 0x29,
+               0x3c, 0x5d, 0xfe, 0x9f, 0xa5, 0xc4, 0x67, 0x06,
+               0x4d, 0x2c, 0x8f, 0xee, 0xd4, 0xb5, 0x16, 0x77,
+               0x62, 0x03, 0xa0, 0xc1, 0xfb, 0x9a, 0x39, 0x58,
+       },
+       {
+               0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33,
+               0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04,
+               0x6e, 0x0c, 0xaa, 0xc8, 0xfb, 0x99, 0x3f, 0x5d,
+               0x59, 0x3b, 0x9d, 0xff, 0xcc, 0xae, 0x08, 0x6a,
+               0xdc, 0xbe, 0x18, 0x7a, 0x49, 0x2b, 0x8d, 0xef,
+               0xeb, 0x89, 0x2f, 0x4d, 0x7e, 0x1c, 0xba, 0xd8,
+               0xb2, 0xd0, 0x76, 0x14, 0x27, 0x45, 0xe3, 0x81,
+               0x85, 0xe7, 0x41, 0x23, 0x10, 0x72, 0xd4, 0xb6,
+               0xa5, 0xc7, 0x61, 0x03, 0x30, 0x52, 0xf4, 0x96,
+               0x92, 0xf0, 0x56, 0x34, 0x07, 0x65, 0xc3, 0xa1,
+               0xcb, 0xa9, 0x0f, 0x6d, 0x5e, 0x3c, 0x9a, 0xf8,
+               0xfc, 0x9e, 0x38, 0x5a, 0x69, 0x0b, 0xad, 0xcf,
+               0x79, 0x1b, 0xbd, 0xdf, 0xec, 0x8e, 0x28, 0x4a,
+               0x4e, 0x2c, 0x8a, 0xe8, 0xdb, 0xb9, 0x1f, 0x7d,
+               0x17, 0x75, 0xd3, 0xb1, 0x82, 0xe0, 0x46, 0x24,
+               0x20, 0x42, 0xe4, 0x86, 0xb5, 0xd7, 0x71, 0x13,
+               0x57, 0x35, 0x93, 0xf1, 0xc2, 0xa0, 0x06, 0x64,
+               0x60, 0x02, 0xa4, 0xc6, 0xf5, 0x97, 0x31, 0x53,
+               0x39, 0x5b, 0xfd, 0x9f, 0xac, 0xce, 0x68, 0x0a,
+               0x0e, 0x6c, 0xca, 0xa8, 0x9b, 0xf9, 0x5f, 0x3d,
+               0x8b, 0xe9, 0x4f, 0x2d, 0x1e, 0x7c, 0xda, 0xb8,
+               0xbc, 0xde, 0x78, 0x1a, 0x29, 0x4b, 0xed, 0x8f,
+               0xe5, 0x87, 0x21, 0x43, 0x70, 0x12, 0xb4, 0xd6,
+               0xd2, 0xb0, 0x16, 0x74, 0x47, 0x25, 0x83, 0xe1,
+               0xf2, 0x90, 0x36, 0x54, 0x67, 0x05, 0xa3, 0xc1,
+               0xc5, 0xa7, 0x01, 0x63, 0x50, 0x32, 0x94, 0xf6,
+               0x9c, 0xfe, 0x58, 0x3a, 0x09, 0x6b, 0xcd, 0xaf,
+               0xab, 0xc9, 0x6f, 0x0d, 0x3e, 0x5c, 0xfa, 0x98,
+               0x2e, 0x4c, 0xea, 0x88, 0xbb, 0xd9, 0x7f, 0x1d,
+               0x19, 0x7b, 0xdd, 0xbf, 0x8c, 0xee, 0x48, 0x2a,
+               0x40, 0x22, 0x84, 0xe6, 0xd5, 0xb7, 0x11, 0x73,
+               0x77, 0x15, 0xb3, 0xd1, 0xe2, 0x80, 0x26, 0x44,
+       },
+       {
+               0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34,
+               0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b,
+               0x7e, 0x1d, 0xb8, 0xdb, 0xef, 0x8c, 0x29, 0x4a,
+               0x41, 0x22, 0x87, 0xe4, 0xd0, 0xb3, 0x16, 0x75,
+               0xfc, 0x9f, 0x3a, 0x59, 0x6d, 0x0e, 0xab, 0xc8,
+               0xc3, 0xa0, 0x05, 0x66, 0x52, 0x31, 0x94, 0xf7,
+               0x82, 0xe1, 0x44, 0x27, 0x13, 0x70, 0xd5, 0xb6,
+               0xbd, 0xde, 0x7b, 0x18, 0x2c, 0x4f, 0xea, 0x89,
+               0xe5, 0x86, 0x23, 0x40, 0x74, 0x17, 0xb2, 0xd1,
+               0xda, 0xb9, 0x1c, 0x7f, 0x4b, 0x28, 0x8d, 0xee,
+               0x9b, 0xf8, 0x5d, 0x3e, 0x0a, 0x69, 0xcc, 0xaf,
+               0xa4, 0xc7, 0x62, 0x01, 0x35, 0x56, 0xf3, 0x90,
+               0x19, 0x7a, 0xdf, 0xbc, 0x88, 0xeb, 0x4e, 0x2d,
+               0x26, 0x45, 0xe0, 0x83, 0xb7, 0xd4, 0x71, 0x12,
+               0x67, 0x04, 0xa1, 0xc2, 0xf6, 0x95, 0x30, 0x53,
+               0x58, 0x3b, 0x9e, 0xfd, 0xc9, 0xaa, 0x0f, 0x6c,
+               0xd7, 0xb4, 0x11, 0x72, 0x46, 0x25, 0x80, 0xe3,
+               0xe8, 0x8b, 0x2e, 0x4d, 0x79, 0x1a, 0xbf, 0xdc,
+               0xa9, 0xca, 0x6f, 0x0c, 0x38, 0x5b, 0xfe, 0x9d,
+               0x96, 0xf5, 0x50, 0x33, 0x07, 0x64, 0xc1, 0xa2,
+               0x2b, 0x48, 0xed, 0x8e, 0xba, 0xd9, 0x7c, 0x1f,
+               0x14, 0x77, 0xd2, 0xb1, 0x85, 0xe6, 0x43, 0x20,
+               0x55, 0x36, 0x93, 0xf0, 0xc4, 0xa7, 0x02, 0x61,
+               0x6a, 0x09, 0xac, 0xcf, 0xfb, 0x98, 0x3d, 0x5e,
+               0x32, 0x51, 0xf4, 0x97, 0xa3, 0xc0, 0x65, 0x06,
+               0x0d, 0x6e, 0xcb, 0xa8, 0x9c, 0xff, 0x5a, 0x39,
+               0x4c, 0x2f, 0x8a, 0xe9, 0xdd, 0xbe, 0x1b, 0x78,
+               0x73, 0x10, 0xb5, 0xd6, 0xe2, 0x81, 0x24, 0x47,
+               0xce, 0xad, 0x08, 0x6b, 0x5f, 0x3c, 0x99, 0xfa,
+               0xf1, 0x92, 0x37, 0x54, 0x60, 0x03, 0xa6, 0xc5,
+               0xb0, 0xd3, 0x76, 0x15, 0x21, 0x42, 0xe7, 0x84,
+               0x8f, 0xec, 0x49, 0x2a, 0x1e, 0x7d, 0xd8, 0xbb,
+       },
+       {
+               0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21,
+               0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26,
+               0x0e, 0x6a, 0xc6, 0xa2, 0x83, 0xe7, 0x4b, 0x2f,
+               0x09, 0x6d, 0xc1, 0xa5, 0x84, 0xe0, 0x4c, 0x28,
+               0x1c, 0x78, 0xd4, 0xb0, 0x91, 0xf5, 0x59, 0x3d,
+               0x1b, 0x7f, 0xd3, 0xb7, 0x96, 0xf2, 0x5e, 0x3a,
+               0x12, 0x76, 0xda, 0xbe, 0x9f, 0xfb, 0x57, 0x33,
+               0x15, 0x71, 0xdd, 0xb9, 0x98, 0xfc, 0x50, 0x34,
+               0x38, 0x5c, 0xf0, 0x94, 0xb5, 0xd1, 0x7d, 0x19,
+               0x3f, 0x5b, 0xf7, 0x93, 0xb2, 0xd6, 0x7a, 0x1e,
+               0x36, 0x52, 0xfe, 0x9a, 0xbb, 0xdf, 0x73, 0x17,
+               0x31, 0x55, 0xf9, 0x9d, 0xbc, 0xd8, 0x74, 0x10,
+               0x24, 0x40, 0xec, 0x88, 0xa9, 0xcd, 0x61, 0x05,
+               0x23, 0x47, 0xeb, 0x8f, 0xae, 0xca, 0x66, 0x02,
+               0x2a, 0x4e, 0xe2, 0x86, 0xa7, 0xc3, 0x6f, 0x0b,
+               0x2d, 0x49, 0xe5, 0x81, 0xa0, 0xc4, 0x68, 0x0c,
+               0x70, 0x14, 0xb8, 0xdc, 0xfd, 0x99, 0x35, 0x51,
+               0x77, 0x13, 0xbf, 0xdb, 0xfa, 0x9e, 0x32, 0x56,
+               0x7e, 0x1a, 0xb6, 0xd2, 0xf3, 0x97, 0x3b, 0x5f,
+               0x79, 0x1d, 0xb1, 0xd5, 0xf4, 0x90, 0x3c, 0x58,
+               0x6c, 0x08, 0xa4, 0xc0, 0xe1, 0x85, 0x29, 0x4d,
+               0x6b, 0x0f, 0xa3, 0xc7, 0xe6, 0x82, 0x2e, 0x4a,
+               0x62, 0x06, 0xaa, 0xce, 0xef, 0x8b, 0x27, 0x43,
+               0x65, 0x01, 0xad, 0xc9, 0xe8, 0x8c, 0x20, 0x44,
+               0x48, 0x2c, 0x80, 0xe4, 0xc5, 0xa1, 0x0d, 0x69,
+               0x4f, 0x2b, 0x87, 0xe3, 0xc2, 0xa6, 0x0a, 0x6e,
+               0x46, 0x22, 0x8e, 0xea, 0xcb, 0xaf, 0x03, 0x67,
+               0x41, 0x25, 0x89, 0xed, 0xcc, 0xa8, 0x04, 0x60,
+               0x54, 0x30, 0x9c, 0xf8, 0xd9, 0xbd, 0x11, 0x75,
+               0x53, 0x37, 0x9b, 0xff, 0xde, 0xba, 0x16, 0x72,
+               0x5a, 0x3e, 0x92, 0xf6, 0xd7, 0xb3, 0x1f, 0x7b,
+               0x5d, 0x39, 0x95, 0xf1, 0xd0, 0xb4, 0x18, 0x7c,
+       },
+       {
+               0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26,
+               0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29,
+               0x1e, 0x7b, 0xd4, 0xb1, 0x97, 0xf2, 0x5d, 0x38,
+               0x11, 0x74, 0xdb, 0xbe, 0x98, 0xfd, 0x52, 0x37,
+               0x3c, 0x59, 0xf6, 0x93, 0xb5, 0xd0, 0x7f, 0x1a,
+               0x33, 0x56, 0xf9, 0x9c, 0xba, 0xdf, 0x70, 0x15,
+               0x22, 0x47, 0xe8, 0x8d, 0xab, 0xce, 0x61, 0x04,
+               0x2d, 0x48, 0xe7, 0x82, 0xa4, 0xc1, 0x6e, 0x0b,
+               0x78, 0x1d, 0xb2, 0xd7, 0xf1, 0x94, 0x3b, 0x5e,
+               0x77, 0x12, 0xbd, 0xd8, 0xfe, 0x9b, 0x34, 0x51,
+               0x66, 0x03, 0xac, 0xc9, 0xef, 0x8a, 0x25, 0x40,
+               0x69, 0x0c, 0xa3, 0xc6, 0xe0, 0x85, 0x2a, 0x4f,
+               0x44, 0x21, 0x8e, 0xeb, 0xcd, 0xa8, 0x07, 0x62,
+               0x4b, 0x2e, 0x81, 0xe4, 0xc2, 0xa7, 0x08, 0x6d,
+               0x5a, 0x3f, 0x90, 0xf5, 0xd3, 0xb6, 0x19, 0x7c,
+               0x55, 0x30, 0x9f, 0xfa, 0xdc, 0xb9, 0x16, 0x73,
+               0xf0, 0x95, 0x3a, 0x5f, 0x79, 0x1c, 0xb3, 0xd6,
+               0xff, 0x9a, 0x35, 0x50, 0x76, 0x13, 0xbc, 0xd9,
+               0xee, 0x8b, 0x24, 0x41, 0x67, 0x02, 0xad, 0xc8,
+               0xe1, 0x84, 0x2b, 0x4e, 0x68, 0x0d, 0xa2, 0xc7,
+               0xcc, 0xa9, 0x06, 0x63, 0x45, 0x20, 0x8f, 0xea,
+               0xc3, 0xa6, 0x09, 0x6c, 0x4a, 0x2f, 0x80, 0xe5,
+               0xd2, 0xb7, 0x18, 0x7d, 0x5b, 0x3e, 0x91, 0xf4,
+               0xdd, 0xb8, 0x17, 0x72, 0x54, 0x31, 0x9e, 0xfb,
+               0x88, 0xed, 0x42, 0x27, 0x01, 0x64, 0xcb, 0xae,
+               0x87, 0xe2, 0x4d, 0x28, 0x0e, 0x6b, 0xc4, 0xa1,
+               0x96, 0xf3, 0x5c, 0x39, 0x1f, 0x7a, 0xd5, 0xb0,
+               0x99, 0xfc, 0x53, 0x36, 0x10, 0x75, 0xda, 0xbf,
+               0xb4, 0xd1, 0x7e, 0x1b, 0x3d, 0x58, 0xf7, 0x92,
+               0xbb, 0xde, 0x71, 0x14, 0x32, 0x57, 0xf8, 0x9d,
+               0xaa, 0xcf, 0x60, 0x05, 0x23, 0x46, 0xe9, 0x8c,
+               0xa5, 0xc0, 0x6f, 0x0a, 0x2c, 0x49, 0xe6, 0x83,
+       },
+       {
+               0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f,
+               0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38,
+               0x2e, 0x48, 0xe2, 0x84, 0xab, 0xcd, 0x67, 0x01,
+               0x39, 0x5f, 0xf5, 0x93, 0xbc, 0xda, 0x70, 0x16,
+               0x5c, 0x3a, 0x90, 0xf6, 0xd9, 0xbf, 0x15, 0x73,
+               0x4b, 0x2d, 0x87, 0xe1, 0xce, 0xa8, 0x02, 0x64,
+               0x72, 0x14, 0xbe, 0xd8, 0xf7, 0x91, 0x3b, 0x5d,
+               0x65, 0x03, 0xa9, 0xcf, 0xe0, 0x86, 0x2c, 0x4a,
+               0xb8, 0xde, 0x74, 0x12, 0x3d, 0x5b, 0xf1, 0x97,
+               0xaf, 0xc9, 0x63, 0x05, 0x2a, 0x4c, 0xe6, 0x80,
+               0x96, 0xf0, 0x5a, 0x3c, 0x13, 0x75, 0xdf, 0xb9,
+               0x81, 0xe7, 0x4d, 0x2b, 0x04, 0x62, 0xc8, 0xae,
+               0xe4, 0x82, 0x28, 0x4e, 0x61, 0x07, 0xad, 0xcb,
+               0xf3, 0x95, 0x3f, 0x59, 0x76, 0x10, 0xba, 0xdc,
+               0xca, 0xac, 0x06, 0x60, 0x4f, 0x29, 0x83, 0xe5,
+               0xdd, 0xbb, 0x11, 0x77, 0x58, 0x3e, 0x94, 0xf2,
+               0x6d, 0x0b, 0xa1, 0xc7, 0xe8, 0x8e, 0x24, 0x42,
+               0x7a, 0x1c, 0xb6, 0xd0, 0xff, 0x99, 0x33, 0x55,
+               0x43, 0x25, 0x8f, 0xe9, 0xc6, 0xa0, 0x0a, 0x6c,
+               0x54, 0x32, 0x98, 0xfe, 0xd1, 0xb7, 0x1d, 0x7b,
+               0x31, 0x57, 0xfd, 0x9b, 0xb4, 0xd2, 0x78, 0x1e,
+               0x26, 0x40, 0xea, 0x8c, 0xa3, 0xc5, 0x6f, 0x09,
+               0x1f, 0x79, 0xd3, 0xb5, 0x9a, 0xfc, 0x56, 0x30,
+               0x08, 0x6e, 0xc4, 0xa2, 0x8d, 0xeb, 0x41, 0x27,
+               0xd5, 0xb3, 0x19, 0x7f, 0x50, 0x36, 0x9c, 0xfa,
+               0xc2, 0xa4, 0x0e, 0x68, 0x47, 0x21, 0x8b, 0xed,
+               0xfb, 0x9d, 0x37, 0x51, 0x7e, 0x18, 0xb2, 0xd4,
+               0xec, 0x8a, 0x20, 0x46, 0x69, 0x0f, 0xa5, 0xc3,
+               0x89, 0xef, 0x45, 0x23, 0x0c, 0x6a, 0xc0, 0xa6,
+               0x9e, 0xf8, 0x52, 0x34, 0x1b, 0x7d, 0xd7, 0xb1,
+               0xa7, 0xc1, 0x6b, 0x0d, 0x22, 0x44, 0xee, 0x88,
+               0xb0, 0xd6, 0x7c, 0x1a, 0x35, 0x53, 0xf9, 0x9f,
+       },
+       {
+               0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28,
+               0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37,
+               0x3e, 0x59, 0xf0, 0x97, 0xbf, 0xd8, 0x71, 0x16,
+               0x21, 0x46, 0xef, 0x88, 0xa0, 0xc7, 0x6e, 0x09,
+               0x7c, 0x1b, 0xb2, 0xd5, 0xfd, 0x9a, 0x33, 0x54,
+               0x63, 0x04, 0xad, 0xca, 0xe2, 0x85, 0x2c, 0x4b,
+               0x42, 0x25, 0x8c, 0xeb, 0xc3, 0xa4, 0x0d, 0x6a,
+               0x5d, 0x3a, 0x93, 0xf4, 0xdc, 0xbb, 0x12, 0x75,
+               0xf8, 0x9f, 0x36, 0x51, 0x79, 0x1e, 0xb7, 0xd0,
+               0xe7, 0x80, 0x29, 0x4e, 0x66, 0x01, 0xa8, 0xcf,
+               0xc6, 0xa1, 0x08, 0x6f, 0x47, 0x20, 0x89, 0xee,
+               0xd9, 0xbe, 0x17, 0x70, 0x58, 0x3f, 0x96, 0xf1,
+               0x84, 0xe3, 0x4a, 0x2d, 0x05, 0x62, 0xcb, 0xac,
+               0x9b, 0xfc, 0x55, 0x32, 0x1a, 0x7d, 0xd4, 0xb3,
+               0xba, 0xdd, 0x74, 0x13, 0x3b, 0x5c, 0xf5, 0x92,
+               0xa5, 0xc2, 0x6b, 0x0c, 0x24, 0x43, 0xea, 0x8d,
+               0xed, 0x8a, 0x23, 0x44, 0x6c, 0x0b, 0xa2, 0xc5,
+               0xf2, 0x95, 0x3c, 0x5b, 0x73, 0x14, 0xbd, 0xda,
+               0xd3, 0xb4, 0x1d, 0x7a, 0x52, 0x35, 0x9c, 0xfb,
+               0xcc, 0xab, 0x02, 0x65, 0x4d, 0x2a, 0x83, 0xe4,
+               0x91, 0xf6, 0x5f, 0x38, 0x10, 0x77, 0xde, 0xb9,
+               0x8e, 0xe9, 0x40, 0x27, 0x0f, 0x68, 0xc1, 0xa6,
+               0xaf, 0xc8, 0x61, 0x06, 0x2e, 0x49, 0xe0, 0x87,
+               0xb0, 0xd7, 0x7e, 0x19, 0x31, 0x56, 0xff, 0x98,
+               0x15, 0x72, 0xdb, 0xbc, 0x94, 0xf3, 0x5a, 0x3d,
+               0x0a, 0x6d, 0xc4, 0xa3, 0x8b, 0xec, 0x45, 0x22,
+               0x2b, 0x4c, 0xe5, 0x82, 0xaa, 0xcd, 0x64, 0x03,
+               0x34, 0x53, 0xfa, 0x9d, 0xb5, 0xd2, 0x7b, 0x1c,
+               0x69, 0x0e, 0xa7, 0xc0, 0xe8, 0x8f, 0x26, 0x41,
+               0x76, 0x11, 0xb8, 0xdf, 0xf7, 0x90, 0x39, 0x5e,
+               0x57, 0x30, 0x99, 0xfe, 0xd6, 0xb1, 0x18, 0x7f,
+               0x48, 0x2f, 0x86, 0xe1, 0xc9, 0xae, 0x07, 0x60,
+       },
+       {
+               0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05,
+               0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62,
+               0xce, 0xa6, 0x1e, 0x76, 0x73, 0x1b, 0xa3, 0xcb,
+               0xa9, 0xc1, 0x79, 0x11, 0x14, 0x7c, 0xc4, 0xac,
+               0x81, 0xe9, 0x51, 0x39, 0x3c, 0x54, 0xec, 0x84,
+               0xe6, 0x8e, 0x36, 0x5e, 0x5b, 0x33, 0x8b, 0xe3,
+               0x4f, 0x27, 0x9f, 0xf7, 0xf2, 0x9a, 0x22, 0x4a,
+               0x28, 0x40, 0xf8, 0x90, 0x95, 0xfd, 0x45, 0x2d,
+               0x1f, 0x77, 0xcf, 0xa7, 0xa2, 0xca, 0x72, 0x1a,
+               0x78, 0x10, 0xa8, 0xc0, 0xc5, 0xad, 0x15, 0x7d,
+               0xd1, 0xb9, 0x01, 0x69, 0x6c, 0x04, 0xbc, 0xd4,
+               0xb6, 0xde, 0x66, 0x0e, 0x0b, 0x63, 0xdb, 0xb3,
+               0x9e, 0xf6, 0x4e, 0x26, 0x23, 0x4b, 0xf3, 0x9b,
+               0xf9, 0x91, 0x29, 0x41, 0x44, 0x2c, 0x94, 0xfc,
+               0x50, 0x38, 0x80, 0xe8, 0xed, 0x85, 0x3d, 0x55,
+               0x37, 0x5f, 0xe7, 0x8f, 0x8a, 0xe2, 0x5a, 0x32,
+               0x3e, 0x56, 0xee, 0x86, 0x83, 0xeb, 0x53, 0x3b,
+               0x59, 0x31, 0x89, 0xe1, 0xe4, 0x8c, 0x34, 0x5c,
+               0xf0, 0x98, 0x20, 0x48, 0x4d, 0x25, 0x9d, 0xf5,
+               0x97, 0xff, 0x47, 0x2f, 0x2a, 0x42, 0xfa, 0x92,
+               0xbf, 0xd7, 0x6f, 0x07, 0x02, 0x6a, 0xd2, 0xba,
+               0xd8, 0xb0, 0x08, 0x60, 0x65, 0x0d, 0xb5, 0xdd,
+               0x71, 0x19, 0xa1, 0xc9, 0xcc, 0xa4, 0x1c, 0x74,
+               0x16, 0x7e, 0xc6, 0xae, 0xab, 0xc3, 0x7b, 0x13,
+               0x21, 0x49, 0xf1, 0x99, 0x9c, 0xf4, 0x4c, 0x24,
+               0x46, 0x2e, 0x96, 0xfe, 0xfb, 0x93, 0x2b, 0x43,
+               0xef, 0x87, 0x3f, 0x57, 0x52, 0x3a, 0x82, 0xea,
+               0x88, 0xe0, 0x58, 0x30, 0x35, 0x5d, 0xe5, 0x8d,
+               0xa0, 0xc8, 0x70, 0x18, 0x1d, 0x75, 0xcd, 0xa5,
+               0xc7, 0xaf, 0x17, 0x7f, 0x7a, 0x12, 0xaa, 0xc2,
+               0x6e, 0x06, 0xbe, 0xd6, 0xd3, 0xbb, 0x03, 0x6b,
+               0x09, 0x61, 0xd9, 0xb1, 0xb4, 0xdc, 0x64, 0x0c,
+       },
+       {
+               0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+               0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d,
+               0xde, 0xb7, 0x0c, 0x65, 0x67, 0x0e, 0xb5, 0xdc,
+               0xb1, 0xd8, 0x63, 0x0a, 0x08, 0x61, 0xda, 0xb3,
+               0xa1, 0xc8, 0x73, 0x1a, 0x18, 0x71, 0xca, 0xa3,
+               0xce, 0xa7, 0x1c, 0x75, 0x77, 0x1e, 0xa5, 0xcc,
+               0x7f, 0x16, 0xad, 0xc4, 0xc6, 0xaf, 0x14, 0x7d,
+               0x10, 0x79, 0xc2, 0xab, 0xa9, 0xc0, 0x7b, 0x12,
+               0x5f, 0x36, 0x8d, 0xe4, 0xe6, 0x8f, 0x34, 0x5d,
+               0x30, 0x59, 0xe2, 0x8b, 0x89, 0xe0, 0x5b, 0x32,
+               0x81, 0xe8, 0x53, 0x3a, 0x38, 0x51, 0xea, 0x83,
+               0xee, 0x87, 0x3c, 0x55, 0x57, 0x3e, 0x85, 0xec,
+               0xfe, 0x97, 0x2c, 0x45, 0x47, 0x2e, 0x95, 0xfc,
+               0x91, 0xf8, 0x43, 0x2a, 0x28, 0x41, 0xfa, 0x93,
+               0x20, 0x49, 0xf2, 0x9b, 0x99, 0xf0, 0x4b, 0x22,
+               0x4f, 0x26, 0x9d, 0xf4, 0xf6, 0x9f, 0x24, 0x4d,
+               0xbe, 0xd7, 0x6c, 0x05, 0x07, 0x6e, 0xd5, 0xbc,
+               0xd1, 0xb8, 0x03, 0x6a, 0x68, 0x01, 0xba, 0xd3,
+               0x60, 0x09, 0xb2, 0xdb, 0xd9, 0xb0, 0x0b, 0x62,
+               0x0f, 0x66, 0xdd, 0xb4, 0xb6, 0xdf, 0x64, 0x0d,
+               0x1f, 0x76, 0xcd, 0xa4, 0xa6, 0xcf, 0x74, 0x1d,
+               0x70, 0x19, 0xa2, 0xcb, 0xc9, 0xa0, 0x1b, 0x72,
+               0xc1, 0xa8, 0x13, 0x7a, 0x78, 0x11, 0xaa, 0xc3,
+               0xae, 0xc7, 0x7c, 0x15, 0x17, 0x7e, 0xc5, 0xac,
+               0xe1, 0x88, 0x33, 0x5a, 0x58, 0x31, 0x8a, 0xe3,
+               0x8e, 0xe7, 0x5c, 0x35, 0x37, 0x5e, 0xe5, 0x8c,
+               0x3f, 0x56, 0xed, 0x84, 0x86, 0xef, 0x54, 0x3d,
+               0x50, 0x39, 0x82, 0xeb, 0xe9, 0x80, 0x3b, 0x52,
+               0x40, 0x29, 0x92, 0xfb, 0xf9, 0x90, 0x2b, 0x42,
+               0x2f, 0x46, 0xfd, 0x94, 0x96, 0xff, 0x44, 0x2d,
+               0x9e, 0xf7, 0x4c, 0x25, 0x27, 0x4e, 0xf5, 0x9c,
+               0xf1, 0x98, 0x23, 0x4a, 0x48, 0x21, 0x9a, 0xf3,
+       },
+       {
+               0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b,
+               0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c,
+               0xee, 0x84, 0x3a, 0x50, 0x5b, 0x31, 0x8f, 0xe5,
+               0x99, 0xf3, 0x4d, 0x27, 0x2c, 0x46, 0xf8, 0x92,
+               0xc1, 0xab, 0x15, 0x7f, 0x74, 0x1e, 0xa0, 0xca,
+               0xb6, 0xdc, 0x62, 0x08, 0x03, 0x69, 0xd7, 0xbd,
+               0x2f, 0x45, 0xfb, 0x91, 0x9a, 0xf0, 0x4e, 0x24,
+               0x58, 0x32, 0x8c, 0xe6, 0xed, 0x87, 0x39, 0x53,
+               0x9f, 0xf5, 0x4b, 0x21, 0x2a, 0x40, 0xfe, 0x94,
+               0xe8, 0x82, 0x3c, 0x56, 0x5d, 0x37, 0x89, 0xe3,
+               0x71, 0x1b, 0xa5, 0xcf, 0xc4, 0xae, 0x10, 0x7a,
+               0x06, 0x6c, 0xd2, 0xb8, 0xb3, 0xd9, 0x67, 0x0d,
+               0x5e, 0x34, 0x8a, 0xe0, 0xeb, 0x81, 0x3f, 0x55,
+               0x29, 0x43, 0xfd, 0x97, 0x9c, 0xf6, 0x48, 0x22,
+               0xb0, 0xda, 0x64, 0x0e, 0x05, 0x6f, 0xd1, 0xbb,
+               0xc7, 0xad, 0x13, 0x79, 0x72, 0x18, 0xa6, 0xcc,
+               0x23, 0x49, 0xf7, 0x9d, 0x96, 0xfc, 0x42, 0x28,
+               0x54, 0x3e, 0x80, 0xea, 0xe1, 0x8b, 0x35, 0x5f,
+               0xcd, 0xa7, 0x19, 0x73, 0x78, 0x12, 0xac, 0xc6,
+               0xba, 0xd0, 0x6e, 0x04, 0x0f, 0x65, 0xdb, 0xb1,
+               0xe2, 0x88, 0x36, 0x5c, 0x57, 0x3d, 0x83, 0xe9,
+               0x95, 0xff, 0x41, 0x2b, 0x20, 0x4a, 0xf4, 0x9e,
+               0x0c, 0x66, 0xd8, 0xb2, 0xb9, 0xd3, 0x6d, 0x07,
+               0x7b, 0x11, 0xaf, 0xc5, 0xce, 0xa4, 0x1a, 0x70,
+               0xbc, 0xd6, 0x68, 0x02, 0x09, 0x63, 0xdd, 0xb7,
+               0xcb, 0xa1, 0x1f, 0x75, 0x7e, 0x14, 0xaa, 0xc0,
+               0x52, 0x38, 0x86, 0xec, 0xe7, 0x8d, 0x33, 0x59,
+               0x25, 0x4f, 0xf1, 0x9b, 0x90, 0xfa, 0x44, 0x2e,
+               0x7d, 0x17, 0xa9, 0xc3, 0xc8, 0xa2, 0x1c, 0x76,
+               0x0a, 0x60, 0xde, 0xb4, 0xbf, 0xd5, 0x6b, 0x01,
+               0x93, 0xf9, 0x47, 0x2d, 0x26, 0x4c, 0xf2, 0x98,
+               0xe4, 0x8e, 0x30, 0x5a, 0x51, 0x3b, 0x85, 0xef,
+       },
+       {
+               0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c,
+               0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73,
+               0xfe, 0x95, 0x28, 0x43, 0x4f, 0x24, 0x99, 0xf2,
+               0x81, 0xea, 0x57, 0x3c, 0x30, 0x5b, 0xe6, 0x8d,
+               0xe1, 0x8a, 0x37, 0x5c, 0x50, 0x3b, 0x86, 0xed,
+               0x9e, 0xf5, 0x48, 0x23, 0x2f, 0x44, 0xf9, 0x92,
+               0x1f, 0x74, 0xc9, 0xa2, 0xae, 0xc5, 0x78, 0x13,
+               0x60, 0x0b, 0xb6, 0xdd, 0xd1, 0xba, 0x07, 0x6c,
+               0xdf, 0xb4, 0x09, 0x62, 0x6e, 0x05, 0xb8, 0xd3,
+               0xa0, 0xcb, 0x76, 0x1d, 0x11, 0x7a, 0xc7, 0xac,
+               0x21, 0x4a, 0xf7, 0x9c, 0x90, 0xfb, 0x46, 0x2d,
+               0x5e, 0x35, 0x88, 0xe3, 0xef, 0x84, 0x39, 0x52,
+               0x3e, 0x55, 0xe8, 0x83, 0x8f, 0xe4, 0x59, 0x32,
+               0x41, 0x2a, 0x97, 0xfc, 0xf0, 0x9b, 0x26, 0x4d,
+               0xc0, 0xab, 0x16, 0x7d, 0x71, 0x1a, 0xa7, 0xcc,
+               0xbf, 0xd4, 0x69, 0x02, 0x0e, 0x65, 0xd8, 0xb3,
+               0xa3, 0xc8, 0x75, 0x1e, 0x12, 0x79, 0xc4, 0xaf,
+               0xdc, 0xb7, 0x0a, 0x61, 0x6d, 0x06, 0xbb, 0xd0,
+               0x5d, 0x36, 0x8b, 0xe0, 0xec, 0x87, 0x3a, 0x51,
+               0x22, 0x49, 0xf4, 0x9f, 0x93, 0xf8, 0x45, 0x2e,
+               0x42, 0x29, 0x94, 0xff, 0xf3, 0x98, 0x25, 0x4e,
+               0x3d, 0x56, 0xeb, 0x80, 0x8c, 0xe7, 0x5a, 0x31,
+               0xbc, 0xd7, 0x6a, 0x01, 0x0d, 0x66, 0xdb, 0xb0,
+               0xc3, 0xa8, 0x15, 0x7e, 0x72, 0x19, 0xa4, 0xcf,
+               0x7c, 0x17, 0xaa, 0xc1, 0xcd, 0xa6, 0x1b, 0x70,
+               0x03, 0x68, 0xd5, 0xbe, 0xb2, 0xd9, 0x64, 0x0f,
+               0x82, 0xe9, 0x54, 0x3f, 0x33, 0x58, 0xe5, 0x8e,
+               0xfd, 0x96, 0x2b, 0x40, 0x4c, 0x27, 0x9a, 0xf1,
+               0x9d, 0xf6, 0x4b, 0x20, 0x2c, 0x47, 0xfa, 0x91,
+               0xe2, 0x89, 0x34, 0x5f, 0x53, 0x38, 0x85, 0xee,
+               0x63, 0x08, 0xb5, 0xde, 0xd2, 0xb9, 0x04, 0x6f,
+               0x1c, 0x77, 0xca, 0xa1, 0xad, 0xc6, 0x7b, 0x10,
+       },
+       {
+               0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19,
+               0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e,
+               0x8e, 0xe2, 0x56, 0x3a, 0x23, 0x4f, 0xfb, 0x97,
+               0xc9, 0xa5, 0x11, 0x7d, 0x64, 0x08, 0xbc, 0xd0,
+               0x01, 0x6d, 0xd9, 0xb5, 0xac, 0xc0, 0x74, 0x18,
+               0x46, 0x2a, 0x9e, 0xf2, 0xeb, 0x87, 0x33, 0x5f,
+               0x8f, 0xe3, 0x57, 0x3b, 0x22, 0x4e, 0xfa, 0x96,
+               0xc8, 0xa4, 0x10, 0x7c, 0x65, 0x09, 0xbd, 0xd1,
+               0x02, 0x6e, 0xda, 0xb6, 0xaf, 0xc3, 0x77, 0x1b,
+               0x45, 0x29, 0x9d, 0xf1, 0xe8, 0x84, 0x30, 0x5c,
+               0x8c, 0xe0, 0x54, 0x38, 0x21, 0x4d, 0xf9, 0x95,
+               0xcb, 0xa7, 0x13, 0x7f, 0x66, 0x0a, 0xbe, 0xd2,
+               0x03, 0x6f, 0xdb, 0xb7, 0xae, 0xc2, 0x76, 0x1a,
+               0x44, 0x28, 0x9c, 0xf0, 0xe9, 0x85, 0x31, 0x5d,
+               0x8d, 0xe1, 0x55, 0x39, 0x20, 0x4c, 0xf8, 0x94,
+               0xca, 0xa6, 0x12, 0x7e, 0x67, 0x0b, 0xbf, 0xd3,
+               0x04, 0x68, 0xdc, 0xb0, 0xa9, 0xc5, 0x71, 0x1d,
+               0x43, 0x2f, 0x9b, 0xf7, 0xee, 0x82, 0x36, 0x5a,
+               0x8a, 0xe6, 0x52, 0x3e, 0x27, 0x4b, 0xff, 0x93,
+               0xcd, 0xa1, 0x15, 0x79, 0x60, 0x0c, 0xb8, 0xd4,
+               0x05, 0x69, 0xdd, 0xb1, 0xa8, 0xc4, 0x70, 0x1c,
+               0x42, 0x2e, 0x9a, 0xf6, 0xef, 0x83, 0x37, 0x5b,
+               0x8b, 0xe7, 0x53, 0x3f, 0x26, 0x4a, 0xfe, 0x92,
+               0xcc, 0xa0, 0x14, 0x78, 0x61, 0x0d, 0xb9, 0xd5,
+               0x06, 0x6a, 0xde, 0xb2, 0xab, 0xc7, 0x73, 0x1f,
+               0x41, 0x2d, 0x99, 0xf5, 0xec, 0x80, 0x34, 0x58,
+               0x88, 0xe4, 0x50, 0x3c, 0x25, 0x49, 0xfd, 0x91,
+               0xcf, 0xa3, 0x17, 0x7b, 0x62, 0x0e, 0xba, 0xd6,
+               0x07, 0x6b, 0xdf, 0xb3, 0xaa, 0xc6, 0x72, 0x1e,
+               0x40, 0x2c, 0x98, 0xf4, 0xed, 0x81, 0x35, 0x59,
+               0x89, 0xe5, 0x51, 0x3d, 0x24, 0x48, 0xfc, 0x90,
+               0xce, 0xa2, 0x16, 0x7a, 0x63, 0x0f, 0xbb, 0xd7,
+       },
+       {
+               0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e,
+               0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51,
+               0x9e, 0xf3, 0x44, 0x29, 0x37, 0x5a, 0xed, 0x80,
+               0xd1, 0xbc, 0x0b, 0x66, 0x78, 0x15, 0xa2, 0xcf,
+               0x21, 0x4c, 0xfb, 0x96, 0x88, 0xe5, 0x52, 0x3f,
+               0x6e, 0x03, 0xb4, 0xd9, 0xc7, 0xaa, 0x1d, 0x70,
+               0xbf, 0xd2, 0x65, 0x08, 0x16, 0x7b, 0xcc, 0xa1,
+               0xf0, 0x9d, 0x2a, 0x47, 0x59, 0x34, 0x83, 0xee,
+               0x42, 0x2f, 0x98, 0xf5, 0xeb, 0x86, 0x31, 0x5c,
+               0x0d, 0x60, 0xd7, 0xba, 0xa4, 0xc9, 0x7e, 0x13,
+               0xdc, 0xb1, 0x06, 0x6b, 0x75, 0x18, 0xaf, 0xc2,
+               0x93, 0xfe, 0x49, 0x24, 0x3a, 0x57, 0xe0, 0x8d,
+               0x63, 0x0e, 0xb9, 0xd4, 0xca, 0xa7, 0x10, 0x7d,
+               0x2c, 0x41, 0xf6, 0x9b, 0x85, 0xe8, 0x5f, 0x32,
+               0xfd, 0x90, 0x27, 0x4a, 0x54, 0x39, 0x8e, 0xe3,
+               0xb2, 0xdf, 0x68, 0x05, 0x1b, 0x76, 0xc1, 0xac,
+               0x84, 0xe9, 0x5e, 0x33, 0x2d, 0x40, 0xf7, 0x9a,
+               0xcb, 0xa6, 0x11, 0x7c, 0x62, 0x0f, 0xb8, 0xd5,
+               0x1a, 0x77, 0xc0, 0xad, 0xb3, 0xde, 0x69, 0x04,
+               0x55, 0x38, 0x8f, 0xe2, 0xfc, 0x91, 0x26, 0x4b,
+               0xa5, 0xc8, 0x7f, 0x12, 0x0c, 0x61, 0xd6, 0xbb,
+               0xea, 0x87, 0x30, 0x5d, 0x43, 0x2e, 0x99, 0xf4,
+               0x3b, 0x56, 0xe1, 0x8c, 0x92, 0xff, 0x48, 0x25,
+               0x74, 0x19, 0xae, 0xc3, 0xdd, 0xb0, 0x07, 0x6a,
+               0xc6, 0xab, 0x1c, 0x71, 0x6f, 0x02, 0xb5, 0xd8,
+               0x89, 0xe4, 0x53, 0x3e, 0x20, 0x4d, 0xfa, 0x97,
+               0x58, 0x35, 0x82, 0xef, 0xf1, 0x9c, 0x2b, 0x46,
+               0x17, 0x7a, 0xcd, 0xa0, 0xbe, 0xd3, 0x64, 0x09,
+               0xe7, 0x8a, 0x3d, 0x50, 0x4e, 0x23, 0x94, 0xf9,
+               0xa8, 0xc5, 0x72, 0x1f, 0x01, 0x6c, 0xdb, 0xb6,
+               0x79, 0x14, 0xa3, 0xce, 0xd0, 0xbd, 0x0a, 0x67,
+               0x36, 0x5b, 0xec, 0x81, 0x9f, 0xf2, 0x45, 0x28,
+       },
+       {
+               0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17,
+               0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40,
+               0xae, 0xc0, 0x72, 0x1c, 0x0b, 0x65, 0xd7, 0xb9,
+               0xf9, 0x97, 0x25, 0x4b, 0x5c, 0x32, 0x80, 0xee,
+               0x41, 0x2f, 0x9d, 0xf3, 0xe4, 0x8a, 0x38, 0x56,
+               0x16, 0x78, 0xca, 0xa4, 0xb3, 0xdd, 0x6f, 0x01,
+               0xef, 0x81, 0x33, 0x5d, 0x4a, 0x24, 0x96, 0xf8,
+               0xb8, 0xd6, 0x64, 0x0a, 0x1d, 0x73, 0xc1, 0xaf,
+               0x82, 0xec, 0x5e, 0x30, 0x27, 0x49, 0xfb, 0x95,
+               0xd5, 0xbb, 0x09, 0x67, 0x70, 0x1e, 0xac, 0xc2,
+               0x2c, 0x42, 0xf0, 0x9e, 0x89, 0xe7, 0x55, 0x3b,
+               0x7b, 0x15, 0xa7, 0xc9, 0xde, 0xb0, 0x02, 0x6c,
+               0xc3, 0xad, 0x1f, 0x71, 0x66, 0x08, 0xba, 0xd4,
+               0x94, 0xfa, 0x48, 0x26, 0x31, 0x5f, 0xed, 0x83,
+               0x6d, 0x03, 0xb1, 0xdf, 0xc8, 0xa6, 0x14, 0x7a,
+               0x3a, 0x54, 0xe6, 0x88, 0x9f, 0xf1, 0x43, 0x2d,
+               0x19, 0x77, 0xc5, 0xab, 0xbc, 0xd2, 0x60, 0x0e,
+               0x4e, 0x20, 0x92, 0xfc, 0xeb, 0x85, 0x37, 0x59,
+               0xb7, 0xd9, 0x6b, 0x05, 0x12, 0x7c, 0xce, 0xa0,
+               0xe0, 0x8e, 0x3c, 0x52, 0x45, 0x2b, 0x99, 0xf7,
+               0x58, 0x36, 0x84, 0xea, 0xfd, 0x93, 0x21, 0x4f,
+               0x0f, 0x61, 0xd3, 0xbd, 0xaa, 0xc4, 0x76, 0x18,
+               0xf6, 0x98, 0x2a, 0x44, 0x53, 0x3d, 0x8f, 0xe1,
+               0xa1, 0xcf, 0x7d, 0x13, 0x04, 0x6a, 0xd8, 0xb6,
+               0x9b, 0xf5, 0x47, 0x29, 0x3e, 0x50, 0xe2, 0x8c,
+               0xcc, 0xa2, 0x10, 0x7e, 0x69, 0x07, 0xb5, 0xdb,
+               0x35, 0x5b, 0xe9, 0x87, 0x90, 0xfe, 0x4c, 0x22,
+               0x62, 0x0c, 0xbe, 0xd0, 0xc7, 0xa9, 0x1b, 0x75,
+               0xda, 0xb4, 0x06, 0x68, 0x7f, 0x11, 0xa3, 0xcd,
+               0x8d, 0xe3, 0x51, 0x3f, 0x28, 0x46, 0xf4, 0x9a,
+               0x74, 0x1a, 0xa8, 0xc6, 0xd1, 0xbf, 0x0d, 0x63,
+               0x23, 0x4d, 0xff, 0x91, 0x86, 0xe8, 0x5a, 0x34,
+       },
+       {
+               0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10,
+               0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f,
+               0xbe, 0xd1, 0x60, 0x0f, 0x1f, 0x70, 0xc1, 0xae,
+               0xe1, 0x8e, 0x3f, 0x50, 0x40, 0x2f, 0x9e, 0xf1,
+               0x61, 0x0e, 0xbf, 0xd0, 0xc0, 0xaf, 0x1e, 0x71,
+               0x3e, 0x51, 0xe0, 0x8f, 0x9f, 0xf0, 0x41, 0x2e,
+               0xdf, 0xb0, 0x01, 0x6e, 0x7e, 0x11, 0xa0, 0xcf,
+               0x80, 0xef, 0x5e, 0x31, 0x21, 0x4e, 0xff, 0x90,
+               0xc2, 0xad, 0x1c, 0x73, 0x63, 0x0c, 0xbd, 0xd2,
+               0x9d, 0xf2, 0x43, 0x2c, 0x3c, 0x53, 0xe2, 0x8d,
+               0x7c, 0x13, 0xa2, 0xcd, 0xdd, 0xb2, 0x03, 0x6c,
+               0x23, 0x4c, 0xfd, 0x92, 0x82, 0xed, 0x5c, 0x33,
+               0xa3, 0xcc, 0x7d, 0x12, 0x02, 0x6d, 0xdc, 0xb3,
+               0xfc, 0x93, 0x22, 0x4d, 0x5d, 0x32, 0x83, 0xec,
+               0x1d, 0x72, 0xc3, 0xac, 0xbc, 0xd3, 0x62, 0x0d,
+               0x42, 0x2d, 0x9c, 0xf3, 0xe3, 0x8c, 0x3d, 0x52,
+               0x99, 0xf6, 0x47, 0x28, 0x38, 0x57, 0xe6, 0x89,
+               0xc6, 0xa9, 0x18, 0x77, 0x67, 0x08, 0xb9, 0xd6,
+               0x27, 0x48, 0xf9, 0x96, 0x86, 0xe9, 0x58, 0x37,
+               0x78, 0x17, 0xa6, 0xc9, 0xd9, 0xb6, 0x07, 0x68,
+               0xf8, 0x97, 0x26, 0x49, 0x59, 0x36, 0x87, 0xe8,
+               0xa7, 0xc8, 0x79, 0x16, 0x06, 0x69, 0xd8, 0xb7,
+               0x46, 0x29, 0x98, 0xf7, 0xe7, 0x88, 0x39, 0x56,
+               0x19, 0x76, 0xc7, 0xa8, 0xb8, 0xd7, 0x66, 0x09,
+               0x5b, 0x34, 0x85, 0xea, 0xfa, 0x95, 0x24, 0x4b,
+               0x04, 0x6b, 0xda, 0xb5, 0xa5, 0xca, 0x7b, 0x14,
+               0xe5, 0x8a, 0x3b, 0x54, 0x44, 0x2b, 0x9a, 0xf5,
+               0xba, 0xd5, 0x64, 0x0b, 0x1b, 0x74, 0xc5, 0xaa,
+               0x3a, 0x55, 0xe4, 0x8b, 0x9b, 0xf4, 0x45, 0x2a,
+               0x65, 0x0a, 0xbb, 0xd4, 0xc4, 0xab, 0x1a, 0x75,
+               0x84, 0xeb, 0x5a, 0x35, 0x25, 0x4a, 0xfb, 0x94,
+               0xdb, 0xb4, 0x05, 0x6a, 0x7a, 0x15, 0xa4, 0xcb,
+       },
+       {
+               0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d,
+               0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea,
+               0x53, 0x23, 0xb3, 0xc3, 0x8e, 0xfe, 0x6e, 0x1e,
+               0xf4, 0x84, 0x14, 0x64, 0x29, 0x59, 0xc9, 0xb9,
+               0xa6, 0xd6, 0x46, 0x36, 0x7b, 0x0b, 0x9b, 0xeb,
+               0x01, 0x71, 0xe1, 0x91, 0xdc, 0xac, 0x3c, 0x4c,
+               0xf5, 0x85, 0x15, 0x65, 0x28, 0x58, 0xc8, 0xb8,
+               0x52, 0x22, 0xb2, 0xc2, 0x8f, 0xff, 0x6f, 0x1f,
+               0x51, 0x21, 0xb1, 0xc1, 0x8c, 0xfc, 0x6c, 0x1c,
+               0xf6, 0x86, 0x16, 0x66, 0x2b, 0x5b, 0xcb, 0xbb,
+               0x02, 0x72, 0xe2, 0x92, 0xdf, 0xaf, 0x3f, 0x4f,
+               0xa5, 0xd5, 0x45, 0x35, 0x78, 0x08, 0x98, 0xe8,
+               0xf7, 0x87, 0x17, 0x67, 0x2a, 0x5a, 0xca, 0xba,
+               0x50, 0x20, 0xb0, 0xc0, 0x8d, 0xfd, 0x6d, 0x1d,
+               0xa4, 0xd4, 0x44, 0x34, 0x79, 0x09, 0x99, 0xe9,
+               0x03, 0x73, 0xe3, 0x93, 0xde, 0xae, 0x3e, 0x4e,
+               0xa2, 0xd2, 0x42, 0x32, 0x7f, 0x0f, 0x9f, 0xef,
+               0x05, 0x75, 0xe5, 0x95, 0xd8, 0xa8, 0x38, 0x48,
+               0xf1, 0x81, 0x11, 0x61, 0x2c, 0x5c, 0xcc, 0xbc,
+               0x56, 0x26, 0xb6, 0xc6, 0x8b, 0xfb, 0x6b, 0x1b,
+               0x04, 0x74, 0xe4, 0x94, 0xd9, 0xa9, 0x39, 0x49,
+               0xa3, 0xd3, 0x43, 0x33, 0x7e, 0x0e, 0x9e, 0xee,
+               0x57, 0x27, 0xb7, 0xc7, 0x8a, 0xfa, 0x6a, 0x1a,
+               0xf0, 0x80, 0x10, 0x60, 0x2d, 0x5d, 0xcd, 0xbd,
+               0xf3, 0x83, 0x13, 0x63, 0x2e, 0x5e, 0xce, 0xbe,
+               0x54, 0x24, 0xb4, 0xc4, 0x89, 0xf9, 0x69, 0x19,
+               0xa0, 0xd0, 0x40, 0x30, 0x7d, 0x0d, 0x9d, 0xed,
+               0x07, 0x77, 0xe7, 0x97, 0xda, 0xaa, 0x3a, 0x4a,
+               0x55, 0x25, 0xb5, 0xc5, 0x88, 0xf8, 0x68, 0x18,
+               0xf2, 0x82, 0x12, 0x62, 0x2f, 0x5f, 0xcf, 0xbf,
+               0x06, 0x76, 0xe6, 0x96, 0xdb, 0xab, 0x3b, 0x4b,
+               0xa1, 0xd1, 0x41, 0x31, 0x7c, 0x0c, 0x9c, 0xec,
+       },
+       {
+               0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a,
+               0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5,
+               0x43, 0x32, 0xa1, 0xd0, 0x9a, 0xeb, 0x78, 0x09,
+               0xec, 0x9d, 0x0e, 0x7f, 0x35, 0x44, 0xd7, 0xa6,
+               0x86, 0xf7, 0x64, 0x15, 0x5f, 0x2e, 0xbd, 0xcc,
+               0x29, 0x58, 0xcb, 0xba, 0xf0, 0x81, 0x12, 0x63,
+               0xc5, 0xb4, 0x27, 0x56, 0x1c, 0x6d, 0xfe, 0x8f,
+               0x6a, 0x1b, 0x88, 0xf9, 0xb3, 0xc2, 0x51, 0x20,
+               0x11, 0x60, 0xf3, 0x82, 0xc8, 0xb9, 0x2a, 0x5b,
+               0xbe, 0xcf, 0x5c, 0x2d, 0x67, 0x16, 0x85, 0xf4,
+               0x52, 0x23, 0xb0, 0xc1, 0x8b, 0xfa, 0x69, 0x18,
+               0xfd, 0x8c, 0x1f, 0x6e, 0x24, 0x55, 0xc6, 0xb7,
+               0x97, 0xe6, 0x75, 0x04, 0x4e, 0x3f, 0xac, 0xdd,
+               0x38, 0x49, 0xda, 0xab, 0xe1, 0x90, 0x03, 0x72,
+               0xd4, 0xa5, 0x36, 0x47, 0x0d, 0x7c, 0xef, 0x9e,
+               0x7b, 0x0a, 0x99, 0xe8, 0xa2, 0xd3, 0x40, 0x31,
+               0x22, 0x53, 0xc0, 0xb1, 0xfb, 0x8a, 0x19, 0x68,
+               0x8d, 0xfc, 0x6f, 0x1e, 0x54, 0x25, 0xb6, 0xc7,
+               0x61, 0x10, 0x83, 0xf2, 0xb8, 0xc9, 0x5a, 0x2b,
+               0xce, 0xbf, 0x2c, 0x5d, 0x17, 0x66, 0xf5, 0x84,
+               0xa4, 0xd5, 0x46, 0x37, 0x7d, 0x0c, 0x9f, 0xee,
+               0x0b, 0x7a, 0xe9, 0x98, 0xd2, 0xa3, 0x30, 0x41,
+               0xe7, 0x96, 0x05, 0x74, 0x3e, 0x4f, 0xdc, 0xad,
+               0x48, 0x39, 0xaa, 0xdb, 0x91, 0xe0, 0x73, 0x02,
+               0x33, 0x42, 0xd1, 0xa0, 0xea, 0x9b, 0x08, 0x79,
+               0x9c, 0xed, 0x7e, 0x0f, 0x45, 0x34, 0xa7, 0xd6,
+               0x70, 0x01, 0x92, 0xe3, 0xa9, 0xd8, 0x4b, 0x3a,
+               0xdf, 0xae, 0x3d, 0x4c, 0x06, 0x77, 0xe4, 0x95,
+               0xb5, 0xc4, 0x57, 0x26, 0x6c, 0x1d, 0x8e, 0xff,
+               0x1a, 0x6b, 0xf8, 0x89, 0xc3, 0xb2, 0x21, 0x50,
+               0xf6, 0x87, 0x14, 0x65, 0x2f, 0x5e, 0xcd, 0xbc,
+               0x59, 0x28, 0xbb, 0xca, 0x80, 0xf1, 0x62, 0x13,
+       },
+       {
+               0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43,
+               0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4,
+               0x73, 0x01, 0x97, 0xe5, 0xa6, 0xd4, 0x42, 0x30,
+               0xc4, 0xb6, 0x20, 0x52, 0x11, 0x63, 0xf5, 0x87,
+               0xe6, 0x94, 0x02, 0x70, 0x33, 0x41, 0xd7, 0xa5,
+               0x51, 0x23, 0xb5, 0xc7, 0x84, 0xf6, 0x60, 0x12,
+               0x95, 0xe7, 0x71, 0x03, 0x40, 0x32, 0xa4, 0xd6,
+               0x22, 0x50, 0xc6, 0xb4, 0xf7, 0x85, 0x13, 0x61,
+               0xd1, 0xa3, 0x35, 0x47, 0x04, 0x76, 0xe0, 0x92,
+               0x66, 0x14, 0x82, 0xf0, 0xb3, 0xc1, 0x57, 0x25,
+               0xa2, 0xd0, 0x46, 0x34, 0x77, 0x05, 0x93, 0xe1,
+               0x15, 0x67, 0xf1, 0x83, 0xc0, 0xb2, 0x24, 0x56,
+               0x37, 0x45, 0xd3, 0xa1, 0xe2, 0x90, 0x06, 0x74,
+               0x80, 0xf2, 0x64, 0x16, 0x55, 0x27, 0xb1, 0xc3,
+               0x44, 0x36, 0xa0, 0xd2, 0x91, 0xe3, 0x75, 0x07,
+               0xf3, 0x81, 0x17, 0x65, 0x26, 0x54, 0xc2, 0xb0,
+               0xbf, 0xcd, 0x5b, 0x29, 0x6a, 0x18, 0x8e, 0xfc,
+               0x08, 0x7a, 0xec, 0x9e, 0xdd, 0xaf, 0x39, 0x4b,
+               0xcc, 0xbe, 0x28, 0x5a, 0x19, 0x6b, 0xfd, 0x8f,
+               0x7b, 0x09, 0x9f, 0xed, 0xae, 0xdc, 0x4a, 0x38,
+               0x59, 0x2b, 0xbd, 0xcf, 0x8c, 0xfe, 0x68, 0x1a,
+               0xee, 0x9c, 0x0a, 0x78, 0x3b, 0x49, 0xdf, 0xad,
+               0x2a, 0x58, 0xce, 0xbc, 0xff, 0x8d, 0x1b, 0x69,
+               0x9d, 0xef, 0x79, 0x0b, 0x48, 0x3a, 0xac, 0xde,
+               0x6e, 0x1c, 0x8a, 0xf8, 0xbb, 0xc9, 0x5f, 0x2d,
+               0xd9, 0xab, 0x3d, 0x4f, 0x0c, 0x7e, 0xe8, 0x9a,
+               0x1d, 0x6f, 0xf9, 0x8b, 0xc8, 0xba, 0x2c, 0x5e,
+               0xaa, 0xd8, 0x4e, 0x3c, 0x7f, 0x0d, 0x9b, 0xe9,
+               0x88, 0xfa, 0x6c, 0x1e, 0x5d, 0x2f, 0xb9, 0xcb,
+               0x3f, 0x4d, 0xdb, 0xa9, 0xea, 0x98, 0x0e, 0x7c,
+               0xfb, 0x89, 0x1f, 0x6d, 0x2e, 0x5c, 0xca, 0xb8,
+               0x4c, 0x3e, 0xa8, 0xda, 0x99, 0xeb, 0x7d, 0x0f,
+       },
+       {
+               0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44,
+               0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb,
+               0x63, 0x10, 0x85, 0xf6, 0xb2, 0xc1, 0x54, 0x27,
+               0xdc, 0xaf, 0x3a, 0x49, 0x0d, 0x7e, 0xeb, 0x98,
+               0xc6, 0xb5, 0x20, 0x53, 0x17, 0x64, 0xf1, 0x82,
+               0x79, 0x0a, 0x9f, 0xec, 0xa8, 0xdb, 0x4e, 0x3d,
+               0xa5, 0xd6, 0x43, 0x30, 0x74, 0x07, 0x92, 0xe1,
+               0x1a, 0x69, 0xfc, 0x8f, 0xcb, 0xb8, 0x2d, 0x5e,
+               0x91, 0xe2, 0x77, 0x04, 0x40, 0x33, 0xa6, 0xd5,
+               0x2e, 0x5d, 0xc8, 0xbb, 0xff, 0x8c, 0x19, 0x6a,
+               0xf2, 0x81, 0x14, 0x67, 0x23, 0x50, 0xc5, 0xb6,
+               0x4d, 0x3e, 0xab, 0xd8, 0x9c, 0xef, 0x7a, 0x09,
+               0x57, 0x24, 0xb1, 0xc2, 0x86, 0xf5, 0x60, 0x13,
+               0xe8, 0x9b, 0x0e, 0x7d, 0x39, 0x4a, 0xdf, 0xac,
+               0x34, 0x47, 0xd2, 0xa1, 0xe5, 0x96, 0x03, 0x70,
+               0x8b, 0xf8, 0x6d, 0x1e, 0x5a, 0x29, 0xbc, 0xcf,
+               0x3f, 0x4c, 0xd9, 0xaa, 0xee, 0x9d, 0x08, 0x7b,
+               0x80, 0xf3, 0x66, 0x15, 0x51, 0x22, 0xb7, 0xc4,
+               0x5c, 0x2f, 0xba, 0xc9, 0x8d, 0xfe, 0x6b, 0x18,
+               0xe3, 0x90, 0x05, 0x76, 0x32, 0x41, 0xd4, 0xa7,
+               0xf9, 0x8a, 0x1f, 0x6c, 0x28, 0x5b, 0xce, 0xbd,
+               0x46, 0x35, 0xa0, 0xd3, 0x97, 0xe4, 0x71, 0x02,
+               0x9a, 0xe9, 0x7c, 0x0f, 0x4b, 0x38, 0xad, 0xde,
+               0x25, 0x56, 0xc3, 0xb0, 0xf4, 0x87, 0x12, 0x61,
+               0xae, 0xdd, 0x48, 0x3b, 0x7f, 0x0c, 0x99, 0xea,
+               0x11, 0x62, 0xf7, 0x84, 0xc0, 0xb3, 0x26, 0x55,
+               0xcd, 0xbe, 0x2b, 0x58, 0x1c, 0x6f, 0xfa, 0x89,
+               0x72, 0x01, 0x94, 0xe7, 0xa3, 0xd0, 0x45, 0x36,
+               0x68, 0x1b, 0x8e, 0xfd, 0xb9, 0xca, 0x5f, 0x2c,
+               0xd7, 0xa4, 0x31, 0x42, 0x06, 0x75, 0xe0, 0x93,
+               0x0b, 0x78, 0xed, 0x9e, 0xda, 0xa9, 0x3c, 0x4f,
+               0xb4, 0xc7, 0x52, 0x21, 0x65, 0x16, 0x83, 0xf0,
+       },
+       {
+               0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+               0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6,
+               0x13, 0x67, 0xfb, 0x8f, 0xde, 0xaa, 0x36, 0x42,
+               0x94, 0xe0, 0x7c, 0x08, 0x59, 0x2d, 0xb1, 0xc5,
+               0x26, 0x52, 0xce, 0xba, 0xeb, 0x9f, 0x03, 0x77,
+               0xa1, 0xd5, 0x49, 0x3d, 0x6c, 0x18, 0x84, 0xf0,
+               0x35, 0x41, 0xdd, 0xa9, 0xf8, 0x8c, 0x10, 0x64,
+               0xb2, 0xc6, 0x5a, 0x2e, 0x7f, 0x0b, 0x97, 0xe3,
+               0x4c, 0x38, 0xa4, 0xd0, 0x81, 0xf5, 0x69, 0x1d,
+               0xcb, 0xbf, 0x23, 0x57, 0x06, 0x72, 0xee, 0x9a,
+               0x5f, 0x2b, 0xb7, 0xc3, 0x92, 0xe6, 0x7a, 0x0e,
+               0xd8, 0xac, 0x30, 0x44, 0x15, 0x61, 0xfd, 0x89,
+               0x6a, 0x1e, 0x82, 0xf6, 0xa7, 0xd3, 0x4f, 0x3b,
+               0xed, 0x99, 0x05, 0x71, 0x20, 0x54, 0xc8, 0xbc,
+               0x79, 0x0d, 0x91, 0xe5, 0xb4, 0xc0, 0x5c, 0x28,
+               0xfe, 0x8a, 0x16, 0x62, 0x33, 0x47, 0xdb, 0xaf,
+               0x98, 0xec, 0x70, 0x04, 0x55, 0x21, 0xbd, 0xc9,
+               0x1f, 0x6b, 0xf7, 0x83, 0xd2, 0xa6, 0x3a, 0x4e,
+               0x8b, 0xff, 0x63, 0x17, 0x46, 0x32, 0xae, 0xda,
+               0x0c, 0x78, 0xe4, 0x90, 0xc1, 0xb5, 0x29, 0x5d,
+               0xbe, 0xca, 0x56, 0x22, 0x73, 0x07, 0x9b, 0xef,
+               0x39, 0x4d, 0xd1, 0xa5, 0xf4, 0x80, 0x1c, 0x68,
+               0xad, 0xd9, 0x45, 0x31, 0x60, 0x14, 0x88, 0xfc,
+               0x2a, 0x5e, 0xc2, 0xb6, 0xe7, 0x93, 0x0f, 0x7b,
+               0xd4, 0xa0, 0x3c, 0x48, 0x19, 0x6d, 0xf1, 0x85,
+               0x53, 0x27, 0xbb, 0xcf, 0x9e, 0xea, 0x76, 0x02,
+               0xc7, 0xb3, 0x2f, 0x5b, 0x0a, 0x7e, 0xe2, 0x96,
+               0x40, 0x34, 0xa8, 0xdc, 0x8d, 0xf9, 0x65, 0x11,
+               0xf2, 0x86, 0x1a, 0x6e, 0x3f, 0x4b, 0xd7, 0xa3,
+               0x75, 0x01, 0x9d, 0xe9, 0xb8, 0xcc, 0x50, 0x24,
+               0xe1, 0x95, 0x09, 0x7d, 0x2c, 0x58, 0xc4, 0xb0,
+               0x66, 0x12, 0x8e, 0xfa, 0xab, 0xdf, 0x43, 0x37,
+       },
+       {
+               0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56,
+               0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9,
+               0x03, 0x76, 0xe9, 0x9c, 0xca, 0xbf, 0x20, 0x55,
+               0x8c, 0xf9, 0x66, 0x13, 0x45, 0x30, 0xaf, 0xda,
+               0x06, 0x73, 0xec, 0x99, 0xcf, 0xba, 0x25, 0x50,
+               0x89, 0xfc, 0x63, 0x16, 0x40, 0x35, 0xaa, 0xdf,
+               0x05, 0x70, 0xef, 0x9a, 0xcc, 0xb9, 0x26, 0x53,
+               0x8a, 0xff, 0x60, 0x15, 0x43, 0x36, 0xa9, 0xdc,
+               0x0c, 0x79, 0xe6, 0x93, 0xc5, 0xb0, 0x2f, 0x5a,
+               0x83, 0xf6, 0x69, 0x1c, 0x4a, 0x3f, 0xa0, 0xd5,
+               0x0f, 0x7a, 0xe5, 0x90, 0xc6, 0xb3, 0x2c, 0x59,
+               0x80, 0xf5, 0x6a, 0x1f, 0x49, 0x3c, 0xa3, 0xd6,
+               0x0a, 0x7f, 0xe0, 0x95, 0xc3, 0xb6, 0x29, 0x5c,
+               0x85, 0xf0, 0x6f, 0x1a, 0x4c, 0x39, 0xa6, 0xd3,
+               0x09, 0x7c, 0xe3, 0x96, 0xc0, 0xb5, 0x2a, 0x5f,
+               0x86, 0xf3, 0x6c, 0x19, 0x4f, 0x3a, 0xa5, 0xd0,
+               0x18, 0x6d, 0xf2, 0x87, 0xd1, 0xa4, 0x3b, 0x4e,
+               0x97, 0xe2, 0x7d, 0x08, 0x5e, 0x2b, 0xb4, 0xc1,
+               0x1b, 0x6e, 0xf1, 0x84, 0xd2, 0xa7, 0x38, 0x4d,
+               0x94, 0xe1, 0x7e, 0x0b, 0x5d, 0x28, 0xb7, 0xc2,
+               0x1e, 0x6b, 0xf4, 0x81, 0xd7, 0xa2, 0x3d, 0x48,
+               0x91, 0xe4, 0x7b, 0x0e, 0x58, 0x2d, 0xb2, 0xc7,
+               0x1d, 0x68, 0xf7, 0x82, 0xd4, 0xa1, 0x3e, 0x4b,
+               0x92, 0xe7, 0x78, 0x0d, 0x5b, 0x2e, 0xb1, 0xc4,
+               0x14, 0x61, 0xfe, 0x8b, 0xdd, 0xa8, 0x37, 0x42,
+               0x9b, 0xee, 0x71, 0x04, 0x52, 0x27, 0xb8, 0xcd,
+               0x17, 0x62, 0xfd, 0x88, 0xde, 0xab, 0x34, 0x41,
+               0x98, 0xed, 0x72, 0x07, 0x51, 0x24, 0xbb, 0xce,
+               0x12, 0x67, 0xf8, 0x8d, 0xdb, 0xae, 0x31, 0x44,
+               0x9d, 0xe8, 0x77, 0x02, 0x54, 0x21, 0xbe, 0xcb,
+               0x11, 0x64, 0xfb, 0x8e, 0xd8, 0xad, 0x32, 0x47,
+               0x9e, 0xeb, 0x74, 0x01, 0x57, 0x22, 0xbd, 0xc8,
+       },
+       {
+               0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f,
+               0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8,
+               0x33, 0x45, 0xdf, 0xa9, 0xf6, 0x80, 0x1a, 0x6c,
+               0xa4, 0xd2, 0x48, 0x3e, 0x61, 0x17, 0x8d, 0xfb,
+               0x66, 0x10, 0x8a, 0xfc, 0xa3, 0xd5, 0x4f, 0x39,
+               0xf1, 0x87, 0x1d, 0x6b, 0x34, 0x42, 0xd8, 0xae,
+               0x55, 0x23, 0xb9, 0xcf, 0x90, 0xe6, 0x7c, 0x0a,
+               0xc2, 0xb4, 0x2e, 0x58, 0x07, 0x71, 0xeb, 0x9d,
+               0xcc, 0xba, 0x20, 0x56, 0x09, 0x7f, 0xe5, 0x93,
+               0x5b, 0x2d, 0xb7, 0xc1, 0x9e, 0xe8, 0x72, 0x04,
+               0xff, 0x89, 0x13, 0x65, 0x3a, 0x4c, 0xd6, 0xa0,
+               0x68, 0x1e, 0x84, 0xf2, 0xad, 0xdb, 0x41, 0x37,
+               0xaa, 0xdc, 0x46, 0x30, 0x6f, 0x19, 0x83, 0xf5,
+               0x3d, 0x4b, 0xd1, 0xa7, 0xf8, 0x8e, 0x14, 0x62,
+               0x99, 0xef, 0x75, 0x03, 0x5c, 0x2a, 0xb0, 0xc6,
+               0x0e, 0x78, 0xe2, 0x94, 0xcb, 0xbd, 0x27, 0x51,
+               0x85, 0xf3, 0x69, 0x1f, 0x40, 0x36, 0xac, 0xda,
+               0x12, 0x64, 0xfe, 0x88, 0xd7, 0xa1, 0x3b, 0x4d,
+               0xb6, 0xc0, 0x5a, 0x2c, 0x73, 0x05, 0x9f, 0xe9,
+               0x21, 0x57, 0xcd, 0xbb, 0xe4, 0x92, 0x08, 0x7e,
+               0xe3, 0x95, 0x0f, 0x79, 0x26, 0x50, 0xca, 0xbc,
+               0x74, 0x02, 0x98, 0xee, 0xb1, 0xc7, 0x5d, 0x2b,
+               0xd0, 0xa6, 0x3c, 0x4a, 0x15, 0x63, 0xf9, 0x8f,
+               0x47, 0x31, 0xab, 0xdd, 0x82, 0xf4, 0x6e, 0x18,
+               0x49, 0x3f, 0xa5, 0xd3, 0x8c, 0xfa, 0x60, 0x16,
+               0xde, 0xa8, 0x32, 0x44, 0x1b, 0x6d, 0xf7, 0x81,
+               0x7a, 0x0c, 0x96, 0xe0, 0xbf, 0xc9, 0x53, 0x25,
+               0xed, 0x9b, 0x01, 0x77, 0x28, 0x5e, 0xc4, 0xb2,
+               0x2f, 0x59, 0xc3, 0xb5, 0xea, 0x9c, 0x06, 0x70,
+               0xb8, 0xce, 0x54, 0x22, 0x7d, 0x0b, 0x91, 0xe7,
+               0x1c, 0x6a, 0xf0, 0x86, 0xd9, 0xaf, 0x35, 0x43,
+               0x8b, 0xfd, 0x67, 0x11, 0x4e, 0x38, 0xa2, 0xd4,
+       },
+       {
+               0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58,
+               0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7,
+               0x23, 0x54, 0xcd, 0xba, 0xe2, 0x95, 0x0c, 0x7b,
+               0xbc, 0xcb, 0x52, 0x25, 0x7d, 0x0a, 0x93, 0xe4,
+               0x46, 0x31, 0xa8, 0xdf, 0x87, 0xf0, 0x69, 0x1e,
+               0xd9, 0xae, 0x37, 0x40, 0x18, 0x6f, 0xf6, 0x81,
+               0x65, 0x12, 0x8b, 0xfc, 0xa4, 0xd3, 0x4a, 0x3d,
+               0xfa, 0x8d, 0x14, 0x63, 0x3b, 0x4c, 0xd5, 0xa2,
+               0x8c, 0xfb, 0x62, 0x15, 0x4d, 0x3a, 0xa3, 0xd4,
+               0x13, 0x64, 0xfd, 0x8a, 0xd2, 0xa5, 0x3c, 0x4b,
+               0xaf, 0xd8, 0x41, 0x36, 0x6e, 0x19, 0x80, 0xf7,
+               0x30, 0x47, 0xde, 0xa9, 0xf1, 0x86, 0x1f, 0x68,
+               0xca, 0xbd, 0x24, 0x53, 0x0b, 0x7c, 0xe5, 0x92,
+               0x55, 0x22, 0xbb, 0xcc, 0x94, 0xe3, 0x7a, 0x0d,
+               0xe9, 0x9e, 0x07, 0x70, 0x28, 0x5f, 0xc6, 0xb1,
+               0x76, 0x01, 0x98, 0xef, 0xb7, 0xc0, 0x59, 0x2e,
+               0x05, 0x72, 0xeb, 0x9c, 0xc4, 0xb3, 0x2a, 0x5d,
+               0x9a, 0xed, 0x74, 0x03, 0x5b, 0x2c, 0xb5, 0xc2,
+               0x26, 0x51, 0xc8, 0xbf, 0xe7, 0x90, 0x09, 0x7e,
+               0xb9, 0xce, 0x57, 0x20, 0x78, 0x0f, 0x96, 0xe1,
+               0x43, 0x34, 0xad, 0xda, 0x82, 0xf5, 0x6c, 0x1b,
+               0xdc, 0xab, 0x32, 0x45, 0x1d, 0x6a, 0xf3, 0x84,
+               0x60, 0x17, 0x8e, 0xf9, 0xa1, 0xd6, 0x4f, 0x38,
+               0xff, 0x88, 0x11, 0x66, 0x3e, 0x49, 0xd0, 0xa7,
+               0x89, 0xfe, 0x67, 0x10, 0x48, 0x3f, 0xa6, 0xd1,
+               0x16, 0x61, 0xf8, 0x8f, 0xd7, 0xa0, 0x39, 0x4e,
+               0xaa, 0xdd, 0x44, 0x33, 0x6b, 0x1c, 0x85, 0xf2,
+               0x35, 0x42, 0xdb, 0xac, 0xf4, 0x83, 0x1a, 0x6d,
+               0xcf, 0xb8, 0x21, 0x56, 0x0e, 0x79, 0xe0, 0x97,
+               0x50, 0x27, 0xbe, 0xc9, 0x91, 0xe6, 0x7f, 0x08,
+               0xec, 0x9b, 0x02, 0x75, 0x2d, 0x5a, 0xc3, 0xb4,
+               0x73, 0x04, 0x9d, 0xea, 0xb2, 0xc5, 0x5c, 0x2b,
+       },
+       {
+               0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75,
+               0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92,
+               0xd3, 0xab, 0x23, 0x5b, 0x2e, 0x56, 0xde, 0xa6,
+               0x34, 0x4c, 0xc4, 0xbc, 0xc9, 0xb1, 0x39, 0x41,
+               0xbb, 0xc3, 0x4b, 0x33, 0x46, 0x3e, 0xb6, 0xce,
+               0x5c, 0x24, 0xac, 0xd4, 0xa1, 0xd9, 0x51, 0x29,
+               0x68, 0x10, 0x98, 0xe0, 0x95, 0xed, 0x65, 0x1d,
+               0x8f, 0xf7, 0x7f, 0x07, 0x72, 0x0a, 0x82, 0xfa,
+               0x6b, 0x13, 0x9b, 0xe3, 0x96, 0xee, 0x66, 0x1e,
+               0x8c, 0xf4, 0x7c, 0x04, 0x71, 0x09, 0x81, 0xf9,
+               0xb8, 0xc0, 0x48, 0x30, 0x45, 0x3d, 0xb5, 0xcd,
+               0x5f, 0x27, 0xaf, 0xd7, 0xa2, 0xda, 0x52, 0x2a,
+               0xd0, 0xa8, 0x20, 0x58, 0x2d, 0x55, 0xdd, 0xa5,
+               0x37, 0x4f, 0xc7, 0xbf, 0xca, 0xb2, 0x3a, 0x42,
+               0x03, 0x7b, 0xf3, 0x8b, 0xfe, 0x86, 0x0e, 0x76,
+               0xe4, 0x9c, 0x14, 0x6c, 0x19, 0x61, 0xe9, 0x91,
+               0xd6, 0xae, 0x26, 0x5e, 0x2b, 0x53, 0xdb, 0xa3,
+               0x31, 0x49, 0xc1, 0xb9, 0xcc, 0xb4, 0x3c, 0x44,
+               0x05, 0x7d, 0xf5, 0x8d, 0xf8, 0x80, 0x08, 0x70,
+               0xe2, 0x9a, 0x12, 0x6a, 0x1f, 0x67, 0xef, 0x97,
+               0x6d, 0x15, 0x9d, 0xe5, 0x90, 0xe8, 0x60, 0x18,
+               0x8a, 0xf2, 0x7a, 0x02, 0x77, 0x0f, 0x87, 0xff,
+               0xbe, 0xc6, 0x4e, 0x36, 0x43, 0x3b, 0xb3, 0xcb,
+               0x59, 0x21, 0xa9, 0xd1, 0xa4, 0xdc, 0x54, 0x2c,
+               0xbd, 0xc5, 0x4d, 0x35, 0x40, 0x38, 0xb0, 0xc8,
+               0x5a, 0x22, 0xaa, 0xd2, 0xa7, 0xdf, 0x57, 0x2f,
+               0x6e, 0x16, 0x9e, 0xe6, 0x93, 0xeb, 0x63, 0x1b,
+               0x89, 0xf1, 0x79, 0x01, 0x74, 0x0c, 0x84, 0xfc,
+               0x06, 0x7e, 0xf6, 0x8e, 0xfb, 0x83, 0x0b, 0x73,
+               0xe1, 0x99, 0x11, 0x69, 0x1c, 0x64, 0xec, 0x94,
+               0xd5, 0xad, 0x25, 0x5d, 0x28, 0x50, 0xd8, 0xa0,
+               0x32, 0x4a, 0xc2, 0xba, 0xcf, 0xb7, 0x3f, 0x47,
+       },
+       {
+               0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72,
+               0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d,
+               0xc3, 0xba, 0x31, 0x48, 0x3a, 0x43, 0xc8, 0xb1,
+               0x2c, 0x55, 0xde, 0xa7, 0xd5, 0xac, 0x27, 0x5e,
+               0x9b, 0xe2, 0x69, 0x10, 0x62, 0x1b, 0x90, 0xe9,
+               0x74, 0x0d, 0x86, 0xff, 0x8d, 0xf4, 0x7f, 0x06,
+               0x58, 0x21, 0xaa, 0xd3, 0xa1, 0xd8, 0x53, 0x2a,
+               0xb7, 0xce, 0x45, 0x3c, 0x4e, 0x37, 0xbc, 0xc5,
+               0x2b, 0x52, 0xd9, 0xa0, 0xd2, 0xab, 0x20, 0x59,
+               0xc4, 0xbd, 0x36, 0x4f, 0x3d, 0x44, 0xcf, 0xb6,
+               0xe8, 0x91, 0x1a, 0x63, 0x11, 0x68, 0xe3, 0x9a,
+               0x07, 0x7e, 0xf5, 0x8c, 0xfe, 0x87, 0x0c, 0x75,
+               0xb0, 0xc9, 0x42, 0x3b, 0x49, 0x30, 0xbb, 0xc2,
+               0x5f, 0x26, 0xad, 0xd4, 0xa6, 0xdf, 0x54, 0x2d,
+               0x73, 0x0a, 0x81, 0xf8, 0x8a, 0xf3, 0x78, 0x01,
+               0x9c, 0xe5, 0x6e, 0x17, 0x65, 0x1c, 0x97, 0xee,
+               0x56, 0x2f, 0xa4, 0xdd, 0xaf, 0xd6, 0x5d, 0x24,
+               0xb9, 0xc0, 0x4b, 0x32, 0x40, 0x39, 0xb2, 0xcb,
+               0x95, 0xec, 0x67, 0x1e, 0x6c, 0x15, 0x9e, 0xe7,
+               0x7a, 0x03, 0x88, 0xf1, 0x83, 0xfa, 0x71, 0x08,
+               0xcd, 0xb4, 0x3f, 0x46, 0x34, 0x4d, 0xc6, 0xbf,
+               0x22, 0x5b, 0xd0, 0xa9, 0xdb, 0xa2, 0x29, 0x50,
+               0x0e, 0x77, 0xfc, 0x85, 0xf7, 0x8e, 0x05, 0x7c,
+               0xe1, 0x98, 0x13, 0x6a, 0x18, 0x61, 0xea, 0x93,
+               0x7d, 0x04, 0x8f, 0xf6, 0x84, 0xfd, 0x76, 0x0f,
+               0x92, 0xeb, 0x60, 0x19, 0x6b, 0x12, 0x99, 0xe0,
+               0xbe, 0xc7, 0x4c, 0x35, 0x47, 0x3e, 0xb5, 0xcc,
+               0x51, 0x28, 0xa3, 0xda, 0xa8, 0xd1, 0x5a, 0x23,
+               0xe6, 0x9f, 0x14, 0x6d, 0x1f, 0x66, 0xed, 0x94,
+               0x09, 0x70, 0xfb, 0x82, 0xf0, 0x89, 0x02, 0x7b,
+               0x25, 0x5c, 0xd7, 0xae, 0xdc, 0xa5, 0x2e, 0x57,
+               0xca, 0xb3, 0x38, 0x41, 0x33, 0x4a, 0xc1, 0xb8,
+       },
+       {
+               0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b,
+               0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c,
+               0xf3, 0x89, 0x07, 0x7d, 0x06, 0x7c, 0xf2, 0x88,
+               0x04, 0x7e, 0xf0, 0x8a, 0xf1, 0x8b, 0x05, 0x7f,
+               0xfb, 0x81, 0x0f, 0x75, 0x0e, 0x74, 0xfa, 0x80,
+               0x0c, 0x76, 0xf8, 0x82, 0xf9, 0x83, 0x0d, 0x77,
+               0x08, 0x72, 0xfc, 0x86, 0xfd, 0x87, 0x09, 0x73,
+               0xff, 0x85, 0x0b, 0x71, 0x0a, 0x70, 0xfe, 0x84,
+               0xeb, 0x91, 0x1f, 0x65, 0x1e, 0x64, 0xea, 0x90,
+               0x1c, 0x66, 0xe8, 0x92, 0xe9, 0x93, 0x1d, 0x67,
+               0x18, 0x62, 0xec, 0x96, 0xed, 0x97, 0x19, 0x63,
+               0xef, 0x95, 0x1b, 0x61, 0x1a, 0x60, 0xee, 0x94,
+               0x10, 0x6a, 0xe4, 0x9e, 0xe5, 0x9f, 0x11, 0x6b,
+               0xe7, 0x9d, 0x13, 0x69, 0x12, 0x68, 0xe6, 0x9c,
+               0xe3, 0x99, 0x17, 0x6d, 0x16, 0x6c, 0xe2, 0x98,
+               0x14, 0x6e, 0xe0, 0x9a, 0xe1, 0x9b, 0x15, 0x6f,
+               0xcb, 0xb1, 0x3f, 0x45, 0x3e, 0x44, 0xca, 0xb0,
+               0x3c, 0x46, 0xc8, 0xb2, 0xc9, 0xb3, 0x3d, 0x47,
+               0x38, 0x42, 0xcc, 0xb6, 0xcd, 0xb7, 0x39, 0x43,
+               0xcf, 0xb5, 0x3b, 0x41, 0x3a, 0x40, 0xce, 0xb4,
+               0x30, 0x4a, 0xc4, 0xbe, 0xc5, 0xbf, 0x31, 0x4b,
+               0xc7, 0xbd, 0x33, 0x49, 0x32, 0x48, 0xc6, 0xbc,
+               0xc3, 0xb9, 0x37, 0x4d, 0x36, 0x4c, 0xc2, 0xb8,
+               0x34, 0x4e, 0xc0, 0xba, 0xc1, 0xbb, 0x35, 0x4f,
+               0x20, 0x5a, 0xd4, 0xae, 0xd5, 0xaf, 0x21, 0x5b,
+               0xd7, 0xad, 0x23, 0x59, 0x22, 0x58, 0xd6, 0xac,
+               0xd3, 0xa9, 0x27, 0x5d, 0x26, 0x5c, 0xd2, 0xa8,
+               0x24, 0x5e, 0xd0, 0xaa, 0xd1, 0xab, 0x25, 0x5f,
+               0xdb, 0xa1, 0x2f, 0x55, 0x2e, 0x54, 0xda, 0xa0,
+               0x2c, 0x56, 0xd8, 0xa2, 0xd9, 0xa3, 0x2d, 0x57,
+               0x28, 0x52, 0xdc, 0xa6, 0xdd, 0xa7, 0x29, 0x53,
+               0xdf, 0xa5, 0x2b, 0x51, 0x2a, 0x50, 0xde, 0xa4,
+       },
+       {
+               0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c,
+               0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83,
+               0xe3, 0x98, 0x15, 0x6e, 0x12, 0x69, 0xe4, 0x9f,
+               0x1c, 0x67, 0xea, 0x91, 0xed, 0x96, 0x1b, 0x60,
+               0xdb, 0xa0, 0x2d, 0x56, 0x2a, 0x51, 0xdc, 0xa7,
+               0x24, 0x5f, 0xd2, 0xa9, 0xd5, 0xae, 0x23, 0x58,
+               0x38, 0x43, 0xce, 0xb5, 0xc9, 0xb2, 0x3f, 0x44,
+               0xc7, 0xbc, 0x31, 0x4a, 0x36, 0x4d, 0xc0, 0xbb,
+               0xab, 0xd0, 0x5d, 0x26, 0x5a, 0x21, 0xac, 0xd7,
+               0x54, 0x2f, 0xa2, 0xd9, 0xa5, 0xde, 0x53, 0x28,
+               0x48, 0x33, 0xbe, 0xc5, 0xb9, 0xc2, 0x4f, 0x34,
+               0xb7, 0xcc, 0x41, 0x3a, 0x46, 0x3d, 0xb0, 0xcb,
+               0x70, 0x0b, 0x86, 0xfd, 0x81, 0xfa, 0x77, 0x0c,
+               0x8f, 0xf4, 0x79, 0x02, 0x7e, 0x05, 0x88, 0xf3,
+               0x93, 0xe8, 0x65, 0x1e, 0x62, 0x19, 0x94, 0xef,
+               0x6c, 0x17, 0x9a, 0xe1, 0x9d, 0xe6, 0x6b, 0x10,
+               0x4b, 0x30, 0xbd, 0xc6, 0xba, 0xc1, 0x4c, 0x37,
+               0xb4, 0xcf, 0x42, 0x39, 0x45, 0x3e, 0xb3, 0xc8,
+               0xa8, 0xd3, 0x5e, 0x25, 0x59, 0x22, 0xaf, 0xd4,
+               0x57, 0x2c, 0xa1, 0xda, 0xa6, 0xdd, 0x50, 0x2b,
+               0x90, 0xeb, 0x66, 0x1d, 0x61, 0x1a, 0x97, 0xec,
+               0x6f, 0x14, 0x99, 0xe2, 0x9e, 0xe5, 0x68, 0x13,
+               0x73, 0x08, 0x85, 0xfe, 0x82, 0xf9, 0x74, 0x0f,
+               0x8c, 0xf7, 0x7a, 0x01, 0x7d, 0x06, 0x8b, 0xf0,
+               0xe0, 0x9b, 0x16, 0x6d, 0x11, 0x6a, 0xe7, 0x9c,
+               0x1f, 0x64, 0xe9, 0x92, 0xee, 0x95, 0x18, 0x63,
+               0x03, 0x78, 0xf5, 0x8e, 0xf2, 0x89, 0x04, 0x7f,
+               0xfc, 0x87, 0x0a, 0x71, 0x0d, 0x76, 0xfb, 0x80,
+               0x3b, 0x40, 0xcd, 0xb6, 0xca, 0xb1, 0x3c, 0x47,
+               0xc4, 0xbf, 0x32, 0x49, 0x35, 0x4e, 0xc3, 0xb8,
+               0xd8, 0xa3, 0x2e, 0x55, 0x29, 0x52, 0xdf, 0xa4,
+               0x27, 0x5c, 0xd1, 0xaa, 0xd6, 0xad, 0x20, 0x5b,
+       },
+       {
+               0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69,
+               0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae,
+               0x93, 0xef, 0x6b, 0x17, 0x7e, 0x02, 0x86, 0xfa,
+               0x54, 0x28, 0xac, 0xd0, 0xb9, 0xc5, 0x41, 0x3d,
+               0x3b, 0x47, 0xc3, 0xbf, 0xd6, 0xaa, 0x2e, 0x52,
+               0xfc, 0x80, 0x04, 0x78, 0x11, 0x6d, 0xe9, 0x95,
+               0xa8, 0xd4, 0x50, 0x2c, 0x45, 0x39, 0xbd, 0xc1,
+               0x6f, 0x13, 0x97, 0xeb, 0x82, 0xfe, 0x7a, 0x06,
+               0x76, 0x0a, 0x8e, 0xf2, 0x9b, 0xe7, 0x63, 0x1f,
+               0xb1, 0xcd, 0x49, 0x35, 0x5c, 0x20, 0xa4, 0xd8,
+               0xe5, 0x99, 0x1d, 0x61, 0x08, 0x74, 0xf0, 0x8c,
+               0x22, 0x5e, 0xda, 0xa6, 0xcf, 0xb3, 0x37, 0x4b,
+               0x4d, 0x31, 0xb5, 0xc9, 0xa0, 0xdc, 0x58, 0x24,
+               0x8a, 0xf6, 0x72, 0x0e, 0x67, 0x1b, 0x9f, 0xe3,
+               0xde, 0xa2, 0x26, 0x5a, 0x33, 0x4f, 0xcb, 0xb7,
+               0x19, 0x65, 0xe1, 0x9d, 0xf4, 0x88, 0x0c, 0x70,
+               0xec, 0x90, 0x14, 0x68, 0x01, 0x7d, 0xf9, 0x85,
+               0x2b, 0x57, 0xd3, 0xaf, 0xc6, 0xba, 0x3e, 0x42,
+               0x7f, 0x03, 0x87, 0xfb, 0x92, 0xee, 0x6a, 0x16,
+               0xb8, 0xc4, 0x40, 0x3c, 0x55, 0x29, 0xad, 0xd1,
+               0xd7, 0xab, 0x2f, 0x53, 0x3a, 0x46, 0xc2, 0xbe,
+               0x10, 0x6c, 0xe8, 0x94, 0xfd, 0x81, 0x05, 0x79,
+               0x44, 0x38, 0xbc, 0xc0, 0xa9, 0xd5, 0x51, 0x2d,
+               0x83, 0xff, 0x7b, 0x07, 0x6e, 0x12, 0x96, 0xea,
+               0x9a, 0xe6, 0x62, 0x1e, 0x77, 0x0b, 0x8f, 0xf3,
+               0x5d, 0x21, 0xa5, 0xd9, 0xb0, 0xcc, 0x48, 0x34,
+               0x09, 0x75, 0xf1, 0x8d, 0xe4, 0x98, 0x1c, 0x60,
+               0xce, 0xb2, 0x36, 0x4a, 0x23, 0x5f, 0xdb, 0xa7,
+               0xa1, 0xdd, 0x59, 0x25, 0x4c, 0x30, 0xb4, 0xc8,
+               0x66, 0x1a, 0x9e, 0xe2, 0x8b, 0xf7, 0x73, 0x0f,
+               0x32, 0x4e, 0xca, 0xb6, 0xdf, 0xa3, 0x27, 0x5b,
+               0xf5, 0x89, 0x0d, 0x71, 0x18, 0x64, 0xe0, 0x9c,
+       },
+       {
+               0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e,
+               0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1,
+               0x83, 0xfe, 0x79, 0x04, 0x6a, 0x17, 0x90, 0xed,
+               0x4c, 0x31, 0xb6, 0xcb, 0xa5, 0xd8, 0x5f, 0x22,
+               0x1b, 0x66, 0xe1, 0x9c, 0xf2, 0x8f, 0x08, 0x75,
+               0xd4, 0xa9, 0x2e, 0x53, 0x3d, 0x40, 0xc7, 0xba,
+               0x98, 0xe5, 0x62, 0x1f, 0x71, 0x0c, 0x8b, 0xf6,
+               0x57, 0x2a, 0xad, 0xd0, 0xbe, 0xc3, 0x44, 0x39,
+               0x36, 0x4b, 0xcc, 0xb1, 0xdf, 0xa2, 0x25, 0x58,
+               0xf9, 0x84, 0x03, 0x7e, 0x10, 0x6d, 0xea, 0x97,
+               0xb5, 0xc8, 0x4f, 0x32, 0x5c, 0x21, 0xa6, 0xdb,
+               0x7a, 0x07, 0x80, 0xfd, 0x93, 0xee, 0x69, 0x14,
+               0x2d, 0x50, 0xd7, 0xaa, 0xc4, 0xb9, 0x3e, 0x43,
+               0xe2, 0x9f, 0x18, 0x65, 0x0b, 0x76, 0xf1, 0x8c,
+               0xae, 0xd3, 0x54, 0x29, 0x47, 0x3a, 0xbd, 0xc0,
+               0x61, 0x1c, 0x9b, 0xe6, 0x88, 0xf5, 0x72, 0x0f,
+               0x6c, 0x11, 0x96, 0xeb, 0x85, 0xf8, 0x7f, 0x02,
+               0xa3, 0xde, 0x59, 0x24, 0x4a, 0x37, 0xb0, 0xcd,
+               0xef, 0x92, 0x15, 0x68, 0x06, 0x7b, 0xfc, 0x81,
+               0x20, 0x5d, 0xda, 0xa7, 0xc9, 0xb4, 0x33, 0x4e,
+               0x77, 0x0a, 0x8d, 0xf0, 0x9e, 0xe3, 0x64, 0x19,
+               0xb8, 0xc5, 0x42, 0x3f, 0x51, 0x2c, 0xab, 0xd6,
+               0xf4, 0x89, 0x0e, 0x73, 0x1d, 0x60, 0xe7, 0x9a,
+               0x3b, 0x46, 0xc1, 0xbc, 0xd2, 0xaf, 0x28, 0x55,
+               0x5a, 0x27, 0xa0, 0xdd, 0xb3, 0xce, 0x49, 0x34,
+               0x95, 0xe8, 0x6f, 0x12, 0x7c, 0x01, 0x86, 0xfb,
+               0xd9, 0xa4, 0x23, 0x5e, 0x30, 0x4d, 0xca, 0xb7,
+               0x16, 0x6b, 0xec, 0x91, 0xff, 0x82, 0x05, 0x78,
+               0x41, 0x3c, 0xbb, 0xc6, 0xa8, 0xd5, 0x52, 0x2f,
+               0x8e, 0xf3, 0x74, 0x09, 0x67, 0x1a, 0x9d, 0xe0,
+               0xc2, 0xbf, 0x38, 0x45, 0x2b, 0x56, 0xd1, 0xac,
+               0x0d, 0x70, 0xf7, 0x8a, 0xe4, 0x99, 0x1e, 0x63,
+       },
+       {
+               0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67,
+               0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0,
+               0xb3, 0xcd, 0x4f, 0x31, 0x56, 0x28, 0xaa, 0xd4,
+               0x64, 0x1a, 0x98, 0xe6, 0x81, 0xff, 0x7d, 0x03,
+               0x7b, 0x05, 0x87, 0xf9, 0x9e, 0xe0, 0x62, 0x1c,
+               0xac, 0xd2, 0x50, 0x2e, 0x49, 0x37, 0xb5, 0xcb,
+               0xc8, 0xb6, 0x34, 0x4a, 0x2d, 0x53, 0xd1, 0xaf,
+               0x1f, 0x61, 0xe3, 0x9d, 0xfa, 0x84, 0x06, 0x78,
+               0xf6, 0x88, 0x0a, 0x74, 0x13, 0x6d, 0xef, 0x91,
+               0x21, 0x5f, 0xdd, 0xa3, 0xc4, 0xba, 0x38, 0x46,
+               0x45, 0x3b, 0xb9, 0xc7, 0xa0, 0xde, 0x5c, 0x22,
+               0x92, 0xec, 0x6e, 0x10, 0x77, 0x09, 0x8b, 0xf5,
+               0x8d, 0xf3, 0x71, 0x0f, 0x68, 0x16, 0x94, 0xea,
+               0x5a, 0x24, 0xa6, 0xd8, 0xbf, 0xc1, 0x43, 0x3d,
+               0x3e, 0x40, 0xc2, 0xbc, 0xdb, 0xa5, 0x27, 0x59,
+               0xe9, 0x97, 0x15, 0x6b, 0x0c, 0x72, 0xf0, 0x8e,
+               0xf1, 0x8f, 0x0d, 0x73, 0x14, 0x6a, 0xe8, 0x96,
+               0x26, 0x58, 0xda, 0xa4, 0xc3, 0xbd, 0x3f, 0x41,
+               0x42, 0x3c, 0xbe, 0xc0, 0xa7, 0xd9, 0x5b, 0x25,
+               0x95, 0xeb, 0x69, 0x17, 0x70, 0x0e, 0x8c, 0xf2,
+               0x8a, 0xf4, 0x76, 0x08, 0x6f, 0x11, 0x93, 0xed,
+               0x5d, 0x23, 0xa1, 0xdf, 0xb8, 0xc6, 0x44, 0x3a,
+               0x39, 0x47, 0xc5, 0xbb, 0xdc, 0xa2, 0x20, 0x5e,
+               0xee, 0x90, 0x12, 0x6c, 0x0b, 0x75, 0xf7, 0x89,
+               0x07, 0x79, 0xfb, 0x85, 0xe2, 0x9c, 0x1e, 0x60,
+               0xd0, 0xae, 0x2c, 0x52, 0x35, 0x4b, 0xc9, 0xb7,
+               0xb4, 0xca, 0x48, 0x36, 0x51, 0x2f, 0xad, 0xd3,
+               0x63, 0x1d, 0x9f, 0xe1, 0x86, 0xf8, 0x7a, 0x04,
+               0x7c, 0x02, 0x80, 0xfe, 0x99, 0xe7, 0x65, 0x1b,
+               0xab, 0xd5, 0x57, 0x29, 0x4e, 0x30, 0xb2, 0xcc,
+               0xcf, 0xb1, 0x33, 0x4d, 0x2a, 0x54, 0xd6, 0xa8,
+               0x18, 0x66, 0xe4, 0x9a, 0xfd, 0x83, 0x01, 0x7f,
+       },
+       {
+               0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60,
+               0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf,
+               0xa3, 0xdc, 0x5d, 0x22, 0x42, 0x3d, 0xbc, 0xc3,
+               0x7c, 0x03, 0x82, 0xfd, 0x9d, 0xe2, 0x63, 0x1c,
+               0x5b, 0x24, 0xa5, 0xda, 0xba, 0xc5, 0x44, 0x3b,
+               0x84, 0xfb, 0x7a, 0x05, 0x65, 0x1a, 0x9b, 0xe4,
+               0xf8, 0x87, 0x06, 0x79, 0x19, 0x66, 0xe7, 0x98,
+               0x27, 0x58, 0xd9, 0xa6, 0xc6, 0xb9, 0x38, 0x47,
+               0xb6, 0xc9, 0x48, 0x37, 0x57, 0x28, 0xa9, 0xd6,
+               0x69, 0x16, 0x97, 0xe8, 0x88, 0xf7, 0x76, 0x09,
+               0x15, 0x6a, 0xeb, 0x94, 0xf4, 0x8b, 0x0a, 0x75,
+               0xca, 0xb5, 0x34, 0x4b, 0x2b, 0x54, 0xd5, 0xaa,
+               0xed, 0x92, 0x13, 0x6c, 0x0c, 0x73, 0xf2, 0x8d,
+               0x32, 0x4d, 0xcc, 0xb3, 0xd3, 0xac, 0x2d, 0x52,
+               0x4e, 0x31, 0xb0, 0xcf, 0xaf, 0xd0, 0x51, 0x2e,
+               0x91, 0xee, 0x6f, 0x10, 0x70, 0x0f, 0x8e, 0xf1,
+               0x71, 0x0e, 0x8f, 0xf0, 0x90, 0xef, 0x6e, 0x11,
+               0xae, 0xd1, 0x50, 0x2f, 0x4f, 0x30, 0xb1, 0xce,
+               0xd2, 0xad, 0x2c, 0x53, 0x33, 0x4c, 0xcd, 0xb2,
+               0x0d, 0x72, 0xf3, 0x8c, 0xec, 0x93, 0x12, 0x6d,
+               0x2a, 0x55, 0xd4, 0xab, 0xcb, 0xb4, 0x35, 0x4a,
+               0xf5, 0x8a, 0x0b, 0x74, 0x14, 0x6b, 0xea, 0x95,
+               0x89, 0xf6, 0x77, 0x08, 0x68, 0x17, 0x96, 0xe9,
+               0x56, 0x29, 0xa8, 0xd7, 0xb7, 0xc8, 0x49, 0x36,
+               0xc7, 0xb8, 0x39, 0x46, 0x26, 0x59, 0xd8, 0xa7,
+               0x18, 0x67, 0xe6, 0x99, 0xf9, 0x86, 0x07, 0x78,
+               0x64, 0x1b, 0x9a, 0xe5, 0x85, 0xfa, 0x7b, 0x04,
+               0xbb, 0xc4, 0x45, 0x3a, 0x5a, 0x25, 0xa4, 0xdb,
+               0x9c, 0xe3, 0x62, 0x1d, 0x7d, 0x02, 0x83, 0xfc,
+               0x43, 0x3c, 0xbd, 0xc2, 0xa2, 0xdd, 0x5c, 0x23,
+               0x3f, 0x40, 0xc1, 0xbe, 0xde, 0xa1, 0x20, 0x5f,
+               0xe0, 0x9f, 0x1e, 0x61, 0x01, 0x7e, 0xff, 0x80,
+       },
+       {
+               0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7,
+               0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3,
+               0xe8, 0x68, 0xf5, 0x75, 0xd2, 0x52, 0xcf, 0x4f,
+               0x9c, 0x1c, 0x81, 0x01, 0xa6, 0x26, 0xbb, 0x3b,
+               0xcd, 0x4d, 0xd0, 0x50, 0xf7, 0x77, 0xea, 0x6a,
+               0xb9, 0x39, 0xa4, 0x24, 0x83, 0x03, 0x9e, 0x1e,
+               0x25, 0xa5, 0x38, 0xb8, 0x1f, 0x9f, 0x02, 0x82,
+               0x51, 0xd1, 0x4c, 0xcc, 0x6b, 0xeb, 0x76, 0xf6,
+               0x87, 0x07, 0x9a, 0x1a, 0xbd, 0x3d, 0xa0, 0x20,
+               0xf3, 0x73, 0xee, 0x6e, 0xc9, 0x49, 0xd4, 0x54,
+               0x6f, 0xef, 0x72, 0xf2, 0x55, 0xd5, 0x48, 0xc8,
+               0x1b, 0x9b, 0x06, 0x86, 0x21, 0xa1, 0x3c, 0xbc,
+               0x4a, 0xca, 0x57, 0xd7, 0x70, 0xf0, 0x6d, 0xed,
+               0x3e, 0xbe, 0x23, 0xa3, 0x04, 0x84, 0x19, 0x99,
+               0xa2, 0x22, 0xbf, 0x3f, 0x98, 0x18, 0x85, 0x05,
+               0xd6, 0x56, 0xcb, 0x4b, 0xec, 0x6c, 0xf1, 0x71,
+               0x13, 0x93, 0x0e, 0x8e, 0x29, 0xa9, 0x34, 0xb4,
+               0x67, 0xe7, 0x7a, 0xfa, 0x5d, 0xdd, 0x40, 0xc0,
+               0xfb, 0x7b, 0xe6, 0x66, 0xc1, 0x41, 0xdc, 0x5c,
+               0x8f, 0x0f, 0x92, 0x12, 0xb5, 0x35, 0xa8, 0x28,
+               0xde, 0x5e, 0xc3, 0x43, 0xe4, 0x64, 0xf9, 0x79,
+               0xaa, 0x2a, 0xb7, 0x37, 0x90, 0x10, 0x8d, 0x0d,
+               0x36, 0xb6, 0x2b, 0xab, 0x0c, 0x8c, 0x11, 0x91,
+               0x42, 0xc2, 0x5f, 0xdf, 0x78, 0xf8, 0x65, 0xe5,
+               0x94, 0x14, 0x89, 0x09, 0xae, 0x2e, 0xb3, 0x33,
+               0xe0, 0x60, 0xfd, 0x7d, 0xda, 0x5a, 0xc7, 0x47,
+               0x7c, 0xfc, 0x61, 0xe1, 0x46, 0xc6, 0x5b, 0xdb,
+               0x08, 0x88, 0x15, 0x95, 0x32, 0xb2, 0x2f, 0xaf,
+               0x59, 0xd9, 0x44, 0xc4, 0x63, 0xe3, 0x7e, 0xfe,
+               0x2d, 0xad, 0x30, 0xb0, 0x17, 0x97, 0x0a, 0x8a,
+               0xb1, 0x31, 0xac, 0x2c, 0x8b, 0x0b, 0x96, 0x16,
+               0xc5, 0x45, 0xd8, 0x58, 0xff, 0x7f, 0xe2, 0x62,
+       },
+       {
+               0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+               0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc,
+               0xf8, 0x79, 0xe7, 0x66, 0xc6, 0x47, 0xd9, 0x58,
+               0x84, 0x05, 0x9b, 0x1a, 0xba, 0x3b, 0xa5, 0x24,
+               0xed, 0x6c, 0xf2, 0x73, 0xd3, 0x52, 0xcc, 0x4d,
+               0x91, 0x10, 0x8e, 0x0f, 0xaf, 0x2e, 0xb0, 0x31,
+               0x15, 0x94, 0x0a, 0x8b, 0x2b, 0xaa, 0x34, 0xb5,
+               0x69, 0xe8, 0x76, 0xf7, 0x57, 0xd6, 0x48, 0xc9,
+               0xc7, 0x46, 0xd8, 0x59, 0xf9, 0x78, 0xe6, 0x67,
+               0xbb, 0x3a, 0xa4, 0x25, 0x85, 0x04, 0x9a, 0x1b,
+               0x3f, 0xbe, 0x20, 0xa1, 0x01, 0x80, 0x1e, 0x9f,
+               0x43, 0xc2, 0x5c, 0xdd, 0x7d, 0xfc, 0x62, 0xe3,
+               0x2a, 0xab, 0x35, 0xb4, 0x14, 0x95, 0x0b, 0x8a,
+               0x56, 0xd7, 0x49, 0xc8, 0x68, 0xe9, 0x77, 0xf6,
+               0xd2, 0x53, 0xcd, 0x4c, 0xec, 0x6d, 0xf3, 0x72,
+               0xae, 0x2f, 0xb1, 0x30, 0x90, 0x11, 0x8f, 0x0e,
+               0x93, 0x12, 0x8c, 0x0d, 0xad, 0x2c, 0xb2, 0x33,
+               0xef, 0x6e, 0xf0, 0x71, 0xd1, 0x50, 0xce, 0x4f,
+               0x6b, 0xea, 0x74, 0xf5, 0x55, 0xd4, 0x4a, 0xcb,
+               0x17, 0x96, 0x08, 0x89, 0x29, 0xa8, 0x36, 0xb7,
+               0x7e, 0xff, 0x61, 0xe0, 0x40, 0xc1, 0x5f, 0xde,
+               0x02, 0x83, 0x1d, 0x9c, 0x3c, 0xbd, 0x23, 0xa2,
+               0x86, 0x07, 0x99, 0x18, 0xb8, 0x39, 0xa7, 0x26,
+               0xfa, 0x7b, 0xe5, 0x64, 0xc4, 0x45, 0xdb, 0x5a,
+               0x54, 0xd5, 0x4b, 0xca, 0x6a, 0xeb, 0x75, 0xf4,
+               0x28, 0xa9, 0x37, 0xb6, 0x16, 0x97, 0x09, 0x88,
+               0xac, 0x2d, 0xb3, 0x32, 0x92, 0x13, 0x8d, 0x0c,
+               0xd0, 0x51, 0xcf, 0x4e, 0xee, 0x6f, 0xf1, 0x70,
+               0xb9, 0x38, 0xa6, 0x27, 0x87, 0x06, 0x98, 0x19,
+               0xc5, 0x44, 0xda, 0x5b, 0xfb, 0x7a, 0xe4, 0x65,
+               0x41, 0xc0, 0x5e, 0xdf, 0x7f, 0xfe, 0x60, 0xe1,
+               0x3d, 0xbc, 0x22, 0xa3, 0x03, 0x82, 0x1c, 0x9d,
+       },
+       {
+               0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9,
+               0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd,
+               0xc8, 0x4a, 0xd1, 0x53, 0xfa, 0x78, 0xe3, 0x61,
+               0xac, 0x2e, 0xb5, 0x37, 0x9e, 0x1c, 0x87, 0x05,
+               0x8d, 0x0f, 0x94, 0x16, 0xbf, 0x3d, 0xa6, 0x24,
+               0xe9, 0x6b, 0xf0, 0x72, 0xdb, 0x59, 0xc2, 0x40,
+               0x45, 0xc7, 0x5c, 0xde, 0x77, 0xf5, 0x6e, 0xec,
+               0x21, 0xa3, 0x38, 0xba, 0x13, 0x91, 0x0a, 0x88,
+               0x07, 0x85, 0x1e, 0x9c, 0x35, 0xb7, 0x2c, 0xae,
+               0x63, 0xe1, 0x7a, 0xf8, 0x51, 0xd3, 0x48, 0xca,
+               0xcf, 0x4d, 0xd6, 0x54, 0xfd, 0x7f, 0xe4, 0x66,
+               0xab, 0x29, 0xb2, 0x30, 0x99, 0x1b, 0x80, 0x02,
+               0x8a, 0x08, 0x93, 0x11, 0xb8, 0x3a, 0xa1, 0x23,
+               0xee, 0x6c, 0xf7, 0x75, 0xdc, 0x5e, 0xc5, 0x47,
+               0x42, 0xc0, 0x5b, 0xd9, 0x70, 0xf2, 0x69, 0xeb,
+               0x26, 0xa4, 0x3f, 0xbd, 0x14, 0x96, 0x0d, 0x8f,
+               0x0e, 0x8c, 0x17, 0x95, 0x3c, 0xbe, 0x25, 0xa7,
+               0x6a, 0xe8, 0x73, 0xf1, 0x58, 0xda, 0x41, 0xc3,
+               0xc6, 0x44, 0xdf, 0x5d, 0xf4, 0x76, 0xed, 0x6f,
+               0xa2, 0x20, 0xbb, 0x39, 0x90, 0x12, 0x89, 0x0b,
+               0x83, 0x01, 0x9a, 0x18, 0xb1, 0x33, 0xa8, 0x2a,
+               0xe7, 0x65, 0xfe, 0x7c, 0xd5, 0x57, 0xcc, 0x4e,
+               0x4b, 0xc9, 0x52, 0xd0, 0x79, 0xfb, 0x60, 0xe2,
+               0x2f, 0xad, 0x36, 0xb4, 0x1d, 0x9f, 0x04, 0x86,
+               0x09, 0x8b, 0x10, 0x92, 0x3b, 0xb9, 0x22, 0xa0,
+               0x6d, 0xef, 0x74, 0xf6, 0x5f, 0xdd, 0x46, 0xc4,
+               0xc1, 0x43, 0xd8, 0x5a, 0xf3, 0x71, 0xea, 0x68,
+               0xa5, 0x27, 0xbc, 0x3e, 0x97, 0x15, 0x8e, 0x0c,
+               0x84, 0x06, 0x9d, 0x1f, 0xb6, 0x34, 0xaf, 0x2d,
+               0xe0, 0x62, 0xf9, 0x7b, 0xd2, 0x50, 0xcb, 0x49,
+               0x4c, 0xce, 0x55, 0xd7, 0x7e, 0xfc, 0x67, 0xe5,
+               0x28, 0xaa, 0x31, 0xb3, 0x1a, 0x98, 0x03, 0x81,
+       },
+       {
+               0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae,
+               0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2,
+               0xd8, 0x5b, 0xc3, 0x40, 0xee, 0x6d, 0xf5, 0x76,
+               0xb4, 0x37, 0xaf, 0x2c, 0x82, 0x01, 0x99, 0x1a,
+               0xad, 0x2e, 0xb6, 0x35, 0x9b, 0x18, 0x80, 0x03,
+               0xc1, 0x42, 0xda, 0x59, 0xf7, 0x74, 0xec, 0x6f,
+               0x75, 0xf6, 0x6e, 0xed, 0x43, 0xc0, 0x58, 0xdb,
+               0x19, 0x9a, 0x02, 0x81, 0x2f, 0xac, 0x34, 0xb7,
+               0x47, 0xc4, 0x5c, 0xdf, 0x71, 0xf2, 0x6a, 0xe9,
+               0x2b, 0xa8, 0x30, 0xb3, 0x1d, 0x9e, 0x06, 0x85,
+               0x9f, 0x1c, 0x84, 0x07, 0xa9, 0x2a, 0xb2, 0x31,
+               0xf3, 0x70, 0xe8, 0x6b, 0xc5, 0x46, 0xde, 0x5d,
+               0xea, 0x69, 0xf1, 0x72, 0xdc, 0x5f, 0xc7, 0x44,
+               0x86, 0x05, 0x9d, 0x1e, 0xb0, 0x33, 0xab, 0x28,
+               0x32, 0xb1, 0x29, 0xaa, 0x04, 0x87, 0x1f, 0x9c,
+               0x5e, 0xdd, 0x45, 0xc6, 0x68, 0xeb, 0x73, 0xf0,
+               0x8e, 0x0d, 0x95, 0x16, 0xb8, 0x3b, 0xa3, 0x20,
+               0xe2, 0x61, 0xf9, 0x7a, 0xd4, 0x57, 0xcf, 0x4c,
+               0x56, 0xd5, 0x4d, 0xce, 0x60, 0xe3, 0x7b, 0xf8,
+               0x3a, 0xb9, 0x21, 0xa2, 0x0c, 0x8f, 0x17, 0x94,
+               0x23, 0xa0, 0x38, 0xbb, 0x15, 0x96, 0x0e, 0x8d,
+               0x4f, 0xcc, 0x54, 0xd7, 0x79, 0xfa, 0x62, 0xe1,
+               0xfb, 0x78, 0xe0, 0x63, 0xcd, 0x4e, 0xd6, 0x55,
+               0x97, 0x14, 0x8c, 0x0f, 0xa1, 0x22, 0xba, 0x39,
+               0xc9, 0x4a, 0xd2, 0x51, 0xff, 0x7c, 0xe4, 0x67,
+               0xa5, 0x26, 0xbe, 0x3d, 0x93, 0x10, 0x88, 0x0b,
+               0x11, 0x92, 0x0a, 0x89, 0x27, 0xa4, 0x3c, 0xbf,
+               0x7d, 0xfe, 0x66, 0xe5, 0x4b, 0xc8, 0x50, 0xd3,
+               0x64, 0xe7, 0x7f, 0xfc, 0x52, 0xd1, 0x49, 0xca,
+               0x08, 0x8b, 0x13, 0x90, 0x3e, 0xbd, 0x25, 0xa6,
+               0xbc, 0x3f, 0xa7, 0x24, 0x8a, 0x09, 0x91, 0x12,
+               0xd0, 0x53, 0xcb, 0x48, 0xe6, 0x65, 0xfd, 0x7e,
+       },
+       {
+               0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb,
+               0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef,
+               0xa8, 0x2c, 0xbd, 0x39, 0x82, 0x06, 0x97, 0x13,
+               0xfc, 0x78, 0xe9, 0x6d, 0xd6, 0x52, 0xc3, 0x47,
+               0x4d, 0xc9, 0x58, 0xdc, 0x67, 0xe3, 0x72, 0xf6,
+               0x19, 0x9d, 0x0c, 0x88, 0x33, 0xb7, 0x26, 0xa2,
+               0xe5, 0x61, 0xf0, 0x74, 0xcf, 0x4b, 0xda, 0x5e,
+               0xb1, 0x35, 0xa4, 0x20, 0x9b, 0x1f, 0x8e, 0x0a,
+               0x9a, 0x1e, 0x8f, 0x0b, 0xb0, 0x34, 0xa5, 0x21,
+               0xce, 0x4a, 0xdb, 0x5f, 0xe4, 0x60, 0xf1, 0x75,
+               0x32, 0xb6, 0x27, 0xa3, 0x18, 0x9c, 0x0d, 0x89,
+               0x66, 0xe2, 0x73, 0xf7, 0x4c, 0xc8, 0x59, 0xdd,
+               0xd7, 0x53, 0xc2, 0x46, 0xfd, 0x79, 0xe8, 0x6c,
+               0x83, 0x07, 0x96, 0x12, 0xa9, 0x2d, 0xbc, 0x38,
+               0x7f, 0xfb, 0x6a, 0xee, 0x55, 0xd1, 0x40, 0xc4,
+               0x2b, 0xaf, 0x3e, 0xba, 0x01, 0x85, 0x14, 0x90,
+               0x29, 0xad, 0x3c, 0xb8, 0x03, 0x87, 0x16, 0x92,
+               0x7d, 0xf9, 0x68, 0xec, 0x57, 0xd3, 0x42, 0xc6,
+               0x81, 0x05, 0x94, 0x10, 0xab, 0x2f, 0xbe, 0x3a,
+               0xd5, 0x51, 0xc0, 0x44, 0xff, 0x7b, 0xea, 0x6e,
+               0x64, 0xe0, 0x71, 0xf5, 0x4e, 0xca, 0x5b, 0xdf,
+               0x30, 0xb4, 0x25, 0xa1, 0x1a, 0x9e, 0x0f, 0x8b,
+               0xcc, 0x48, 0xd9, 0x5d, 0xe6, 0x62, 0xf3, 0x77,
+               0x98, 0x1c, 0x8d, 0x09, 0xb2, 0x36, 0xa7, 0x23,
+               0xb3, 0x37, 0xa6, 0x22, 0x99, 0x1d, 0x8c, 0x08,
+               0xe7, 0x63, 0xf2, 0x76, 0xcd, 0x49, 0xd8, 0x5c,
+               0x1b, 0x9f, 0x0e, 0x8a, 0x31, 0xb5, 0x24, 0xa0,
+               0x4f, 0xcb, 0x5a, 0xde, 0x65, 0xe1, 0x70, 0xf4,
+               0xfe, 0x7a, 0xeb, 0x6f, 0xd4, 0x50, 0xc1, 0x45,
+               0xaa, 0x2e, 0xbf, 0x3b, 0x80, 0x04, 0x95, 0x11,
+               0x56, 0xd2, 0x43, 0xc7, 0x7c, 0xf8, 0x69, 0xed,
+               0x02, 0x86, 0x17, 0x93, 0x28, 0xac, 0x3d, 0xb9,
+       },
+       {
+               0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc,
+               0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0,
+               0xb8, 0x3d, 0xaf, 0x2a, 0x96, 0x13, 0x81, 0x04,
+               0xe4, 0x61, 0xf3, 0x76, 0xca, 0x4f, 0xdd, 0x58,
+               0x6d, 0xe8, 0x7a, 0xff, 0x43, 0xc6, 0x54, 0xd1,
+               0x31, 0xb4, 0x26, 0xa3, 0x1f, 0x9a, 0x08, 0x8d,
+               0xd5, 0x50, 0xc2, 0x47, 0xfb, 0x7e, 0xec, 0x69,
+               0x89, 0x0c, 0x9e, 0x1b, 0xa7, 0x22, 0xb0, 0x35,
+               0xda, 0x5f, 0xcd, 0x48, 0xf4, 0x71, 0xe3, 0x66,
+               0x86, 0x03, 0x91, 0x14, 0xa8, 0x2d, 0xbf, 0x3a,
+               0x62, 0xe7, 0x75, 0xf0, 0x4c, 0xc9, 0x5b, 0xde,
+               0x3e, 0xbb, 0x29, 0xac, 0x10, 0x95, 0x07, 0x82,
+               0xb7, 0x32, 0xa0, 0x25, 0x99, 0x1c, 0x8e, 0x0b,
+               0xeb, 0x6e, 0xfc, 0x79, 0xc5, 0x40, 0xd2, 0x57,
+               0x0f, 0x8a, 0x18, 0x9d, 0x21, 0xa4, 0x36, 0xb3,
+               0x53, 0xd6, 0x44, 0xc1, 0x7d, 0xf8, 0x6a, 0xef,
+               0xa9, 0x2c, 0xbe, 0x3b, 0x87, 0x02, 0x90, 0x15,
+               0xf5, 0x70, 0xe2, 0x67, 0xdb, 0x5e, 0xcc, 0x49,
+               0x11, 0x94, 0x06, 0x83, 0x3f, 0xba, 0x28, 0xad,
+               0x4d, 0xc8, 0x5a, 0xdf, 0x63, 0xe6, 0x74, 0xf1,
+               0xc4, 0x41, 0xd3, 0x56, 0xea, 0x6f, 0xfd, 0x78,
+               0x98, 0x1d, 0x8f, 0x0a, 0xb6, 0x33, 0xa1, 0x24,
+               0x7c, 0xf9, 0x6b, 0xee, 0x52, 0xd7, 0x45, 0xc0,
+               0x20, 0xa5, 0x37, 0xb2, 0x0e, 0x8b, 0x19, 0x9c,
+               0x73, 0xf6, 0x64, 0xe1, 0x5d, 0xd8, 0x4a, 0xcf,
+               0x2f, 0xaa, 0x38, 0xbd, 0x01, 0x84, 0x16, 0x93,
+               0xcb, 0x4e, 0xdc, 0x59, 0xe5, 0x60, 0xf2, 0x77,
+               0x97, 0x12, 0x80, 0x05, 0xb9, 0x3c, 0xae, 0x2b,
+               0x1e, 0x9b, 0x09, 0x8c, 0x30, 0xb5, 0x27, 0xa2,
+               0x42, 0xc7, 0x55, 0xd0, 0x6c, 0xe9, 0x7b, 0xfe,
+               0xa6, 0x23, 0xb1, 0x34, 0x88, 0x0d, 0x9f, 0x1a,
+               0xfa, 0x7f, 0xed, 0x68, 0xd4, 0x51, 0xc3, 0x46,
+       },
+       {
+               0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5,
+               0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1,
+               0x88, 0x0e, 0x99, 0x1f, 0xaa, 0x2c, 0xbb, 0x3d,
+               0xcc, 0x4a, 0xdd, 0x5b, 0xee, 0x68, 0xff, 0x79,
+               0x0d, 0x8b, 0x1c, 0x9a, 0x2f, 0xa9, 0x3e, 0xb8,
+               0x49, 0xcf, 0x58, 0xde, 0x6b, 0xed, 0x7a, 0xfc,
+               0x85, 0x03, 0x94, 0x12, 0xa7, 0x21, 0xb6, 0x30,
+               0xc1, 0x47, 0xd0, 0x56, 0xe3, 0x65, 0xf2, 0x74,
+               0x1a, 0x9c, 0x0b, 0x8d, 0x38, 0xbe, 0x29, 0xaf,
+               0x5e, 0xd8, 0x4f, 0xc9, 0x7c, 0xfa, 0x6d, 0xeb,
+               0x92, 0x14, 0x83, 0x05, 0xb0, 0x36, 0xa1, 0x27,
+               0xd6, 0x50, 0xc7, 0x41, 0xf4, 0x72, 0xe5, 0x63,
+               0x17, 0x91, 0x06, 0x80, 0x35, 0xb3, 0x24, 0xa2,
+               0x53, 0xd5, 0x42, 0xc4, 0x71, 0xf7, 0x60, 0xe6,
+               0x9f, 0x19, 0x8e, 0x08, 0xbd, 0x3b, 0xac, 0x2a,
+               0xdb, 0x5d, 0xca, 0x4c, 0xf9, 0x7f, 0xe8, 0x6e,
+               0x34, 0xb2, 0x25, 0xa3, 0x16, 0x90, 0x07, 0x81,
+               0x70, 0xf6, 0x61, 0xe7, 0x52, 0xd4, 0x43, 0xc5,
+               0xbc, 0x3a, 0xad, 0x2b, 0x9e, 0x18, 0x8f, 0x09,
+               0xf8, 0x7e, 0xe9, 0x6f, 0xda, 0x5c, 0xcb, 0x4d,
+               0x39, 0xbf, 0x28, 0xae, 0x1b, 0x9d, 0x0a, 0x8c,
+               0x7d, 0xfb, 0x6c, 0xea, 0x5f, 0xd9, 0x4e, 0xc8,
+               0xb1, 0x37, 0xa0, 0x26, 0x93, 0x15, 0x82, 0x04,
+               0xf5, 0x73, 0xe4, 0x62, 0xd7, 0x51, 0xc6, 0x40,
+               0x2e, 0xa8, 0x3f, 0xb9, 0x0c, 0x8a, 0x1d, 0x9b,
+               0x6a, 0xec, 0x7b, 0xfd, 0x48, 0xce, 0x59, 0xdf,
+               0xa6, 0x20, 0xb7, 0x31, 0x84, 0x02, 0x95, 0x13,
+               0xe2, 0x64, 0xf3, 0x75, 0xc0, 0x46, 0xd1, 0x57,
+               0x23, 0xa5, 0x32, 0xb4, 0x01, 0x87, 0x10, 0x96,
+               0x67, 0xe1, 0x76, 0xf0, 0x45, 0xc3, 0x54, 0xd2,
+               0xab, 0x2d, 0xba, 0x3c, 0x89, 0x0f, 0x98, 0x1e,
+               0xef, 0x69, 0xfe, 0x78, 0xcd, 0x4b, 0xdc, 0x5a,
+       },
+       {
+               0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2,
+               0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe,
+               0x98, 0x1f, 0x8b, 0x0c, 0xbe, 0x39, 0xad, 0x2a,
+               0xd4, 0x53, 0xc7, 0x40, 0xf2, 0x75, 0xe1, 0x66,
+               0x2d, 0xaa, 0x3e, 0xb9, 0x0b, 0x8c, 0x18, 0x9f,
+               0x61, 0xe6, 0x72, 0xf5, 0x47, 0xc0, 0x54, 0xd3,
+               0xb5, 0x32, 0xa6, 0x21, 0x93, 0x14, 0x80, 0x07,
+               0xf9, 0x7e, 0xea, 0x6d, 0xdf, 0x58, 0xcc, 0x4b,
+               0x5a, 0xdd, 0x49, 0xce, 0x7c, 0xfb, 0x6f, 0xe8,
+               0x16, 0x91, 0x05, 0x82, 0x30, 0xb7, 0x23, 0xa4,
+               0xc2, 0x45, 0xd1, 0x56, 0xe4, 0x63, 0xf7, 0x70,
+               0x8e, 0x09, 0x9d, 0x1a, 0xa8, 0x2f, 0xbb, 0x3c,
+               0x77, 0xf0, 0x64, 0xe3, 0x51, 0xd6, 0x42, 0xc5,
+               0x3b, 0xbc, 0x28, 0xaf, 0x1d, 0x9a, 0x0e, 0x89,
+               0xef, 0x68, 0xfc, 0x7b, 0xc9, 0x4e, 0xda, 0x5d,
+               0xa3, 0x24, 0xb0, 0x37, 0x85, 0x02, 0x96, 0x11,
+               0xb4, 0x33, 0xa7, 0x20, 0x92, 0x15, 0x81, 0x06,
+               0xf8, 0x7f, 0xeb, 0x6c, 0xde, 0x59, 0xcd, 0x4a,
+               0x2c, 0xab, 0x3f, 0xb8, 0x0a, 0x8d, 0x19, 0x9e,
+               0x60, 0xe7, 0x73, 0xf4, 0x46, 0xc1, 0x55, 0xd2,
+               0x99, 0x1e, 0x8a, 0x0d, 0xbf, 0x38, 0xac, 0x2b,
+               0xd5, 0x52, 0xc6, 0x41, 0xf3, 0x74, 0xe0, 0x67,
+               0x01, 0x86, 0x12, 0x95, 0x27, 0xa0, 0x34, 0xb3,
+               0x4d, 0xca, 0x5e, 0xd9, 0x6b, 0xec, 0x78, 0xff,
+               0xee, 0x69, 0xfd, 0x7a, 0xc8, 0x4f, 0xdb, 0x5c,
+               0xa2, 0x25, 0xb1, 0x36, 0x84, 0x03, 0x97, 0x10,
+               0x76, 0xf1, 0x65, 0xe2, 0x50, 0xd7, 0x43, 0xc4,
+               0x3a, 0xbd, 0x29, 0xae, 0x1c, 0x9b, 0x0f, 0x88,
+               0xc3, 0x44, 0xd0, 0x57, 0xe5, 0x62, 0xf6, 0x71,
+               0x8f, 0x08, 0x9c, 0x1b, 0xa9, 0x2e, 0xba, 0x3d,
+               0x5b, 0xdc, 0x48, 0xcf, 0x7d, 0xfa, 0x6e, 0xe9,
+               0x17, 0x90, 0x04, 0x83, 0x31, 0xb6, 0x22, 0xa5,
+       },
+       {
+               0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f,
+               0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab,
+               0x68, 0xe0, 0x65, 0xed, 0x72, 0xfa, 0x7f, 0xf7,
+               0x5c, 0xd4, 0x51, 0xd9, 0x46, 0xce, 0x4b, 0xc3,
+               0xd0, 0x58, 0xdd, 0x55, 0xca, 0x42, 0xc7, 0x4f,
+               0xe4, 0x6c, 0xe9, 0x61, 0xfe, 0x76, 0xf3, 0x7b,
+               0xb8, 0x30, 0xb5, 0x3d, 0xa2, 0x2a, 0xaf, 0x27,
+               0x8c, 0x04, 0x81, 0x09, 0x96, 0x1e, 0x9b, 0x13,
+               0xbd, 0x35, 0xb0, 0x38, 0xa7, 0x2f, 0xaa, 0x22,
+               0x89, 0x01, 0x84, 0x0c, 0x93, 0x1b, 0x9e, 0x16,
+               0xd5, 0x5d, 0xd8, 0x50, 0xcf, 0x47, 0xc2, 0x4a,
+               0xe1, 0x69, 0xec, 0x64, 0xfb, 0x73, 0xf6, 0x7e,
+               0x6d, 0xe5, 0x60, 0xe8, 0x77, 0xff, 0x7a, 0xf2,
+               0x59, 0xd1, 0x54, 0xdc, 0x43, 0xcb, 0x4e, 0xc6,
+               0x05, 0x8d, 0x08, 0x80, 0x1f, 0x97, 0x12, 0x9a,
+               0x31, 0xb9, 0x3c, 0xb4, 0x2b, 0xa3, 0x26, 0xae,
+               0x67, 0xef, 0x6a, 0xe2, 0x7d, 0xf5, 0x70, 0xf8,
+               0x53, 0xdb, 0x5e, 0xd6, 0x49, 0xc1, 0x44, 0xcc,
+               0x0f, 0x87, 0x02, 0x8a, 0x15, 0x9d, 0x18, 0x90,
+               0x3b, 0xb3, 0x36, 0xbe, 0x21, 0xa9, 0x2c, 0xa4,
+               0xb7, 0x3f, 0xba, 0x32, 0xad, 0x25, 0xa0, 0x28,
+               0x83, 0x0b, 0x8e, 0x06, 0x99, 0x11, 0x94, 0x1c,
+               0xdf, 0x57, 0xd2, 0x5a, 0xc5, 0x4d, 0xc8, 0x40,
+               0xeb, 0x63, 0xe6, 0x6e, 0xf1, 0x79, 0xfc, 0x74,
+               0xda, 0x52, 0xd7, 0x5f, 0xc0, 0x48, 0xcd, 0x45,
+               0xee, 0x66, 0xe3, 0x6b, 0xf4, 0x7c, 0xf9, 0x71,
+               0xb2, 0x3a, 0xbf, 0x37, 0xa8, 0x20, 0xa5, 0x2d,
+               0x86, 0x0e, 0x8b, 0x03, 0x9c, 0x14, 0x91, 0x19,
+               0x0a, 0x82, 0x07, 0x8f, 0x10, 0x98, 0x1d, 0x95,
+               0x3e, 0xb6, 0x33, 0xbb, 0x24, 0xac, 0x29, 0xa1,
+               0x62, 0xea, 0x6f, 0xe7, 0x78, 0xf0, 0x75, 0xfd,
+               0x56, 0xde, 0x5b, 0xd3, 0x4c, 0xc4, 0x41, 0xc9,
+       },
+       {
+               0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98,
+               0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4,
+               0x78, 0xf1, 0x77, 0xfe, 0x66, 0xef, 0x69, 0xe0,
+               0x44, 0xcd, 0x4b, 0xc2, 0x5a, 0xd3, 0x55, 0xdc,
+               0xf0, 0x79, 0xff, 0x76, 0xee, 0x67, 0xe1, 0x68,
+               0xcc, 0x45, 0xc3, 0x4a, 0xd2, 0x5b, 0xdd, 0x54,
+               0x88, 0x01, 0x87, 0x0e, 0x96, 0x1f, 0x99, 0x10,
+               0xb4, 0x3d, 0xbb, 0x32, 0xaa, 0x23, 0xa5, 0x2c,
+               0xfd, 0x74, 0xf2, 0x7b, 0xe3, 0x6a, 0xec, 0x65,
+               0xc1, 0x48, 0xce, 0x47, 0xdf, 0x56, 0xd0, 0x59,
+               0x85, 0x0c, 0x8a, 0x03, 0x9b, 0x12, 0x94, 0x1d,
+               0xb9, 0x30, 0xb6, 0x3f, 0xa7, 0x2e, 0xa8, 0x21,
+               0x0d, 0x84, 0x02, 0x8b, 0x13, 0x9a, 0x1c, 0x95,
+               0x31, 0xb8, 0x3e, 0xb7, 0x2f, 0xa6, 0x20, 0xa9,
+               0x75, 0xfc, 0x7a, 0xf3, 0x6b, 0xe2, 0x64, 0xed,
+               0x49, 0xc0, 0x46, 0xcf, 0x57, 0xde, 0x58, 0xd1,
+               0xe7, 0x6e, 0xe8, 0x61, 0xf9, 0x70, 0xf6, 0x7f,
+               0xdb, 0x52, 0xd4, 0x5d, 0xc5, 0x4c, 0xca, 0x43,
+               0x9f, 0x16, 0x90, 0x19, 0x81, 0x08, 0x8e, 0x07,
+               0xa3, 0x2a, 0xac, 0x25, 0xbd, 0x34, 0xb2, 0x3b,
+               0x17, 0x9e, 0x18, 0x91, 0x09, 0x80, 0x06, 0x8f,
+               0x2b, 0xa2, 0x24, 0xad, 0x35, 0xbc, 0x3a, 0xb3,
+               0x6f, 0xe6, 0x60, 0xe9, 0x71, 0xf8, 0x7e, 0xf7,
+               0x53, 0xda, 0x5c, 0xd5, 0x4d, 0xc4, 0x42, 0xcb,
+               0x1a, 0x93, 0x15, 0x9c, 0x04, 0x8d, 0x0b, 0x82,
+               0x26, 0xaf, 0x29, 0xa0, 0x38, 0xb1, 0x37, 0xbe,
+               0x62, 0xeb, 0x6d, 0xe4, 0x7c, 0xf5, 0x73, 0xfa,
+               0x5e, 0xd7, 0x51, 0xd8, 0x40, 0xc9, 0x4f, 0xc6,
+               0xea, 0x63, 0xe5, 0x6c, 0xf4, 0x7d, 0xfb, 0x72,
+               0xd6, 0x5f, 0xd9, 0x50, 0xc8, 0x41, 0xc7, 0x4e,
+               0x92, 0x1b, 0x9d, 0x14, 0x8c, 0x05, 0x83, 0x0a,
+               0xae, 0x27, 0xa1, 0x28, 0xb0, 0x39, 0xbf, 0x36,
+       },
+       {
+               0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91,
+               0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5,
+               0x48, 0xc2, 0x41, 0xcb, 0x5a, 0xd0, 0x53, 0xd9,
+               0x6c, 0xe6, 0x65, 0xef, 0x7e, 0xf4, 0x77, 0xfd,
+               0x90, 0x1a, 0x99, 0x13, 0x82, 0x08, 0x8b, 0x01,
+               0xb4, 0x3e, 0xbd, 0x37, 0xa6, 0x2c, 0xaf, 0x25,
+               0xd8, 0x52, 0xd1, 0x5b, 0xca, 0x40, 0xc3, 0x49,
+               0xfc, 0x76, 0xf5, 0x7f, 0xee, 0x64, 0xe7, 0x6d,
+               0x3d, 0xb7, 0x34, 0xbe, 0x2f, 0xa5, 0x26, 0xac,
+               0x19, 0x93, 0x10, 0x9a, 0x0b, 0x81, 0x02, 0x88,
+               0x75, 0xff, 0x7c, 0xf6, 0x67, 0xed, 0x6e, 0xe4,
+               0x51, 0xdb, 0x58, 0xd2, 0x43, 0xc9, 0x4a, 0xc0,
+               0xad, 0x27, 0xa4, 0x2e, 0xbf, 0x35, 0xb6, 0x3c,
+               0x89, 0x03, 0x80, 0x0a, 0x9b, 0x11, 0x92, 0x18,
+               0xe5, 0x6f, 0xec, 0x66, 0xf7, 0x7d, 0xfe, 0x74,
+               0xc1, 0x4b, 0xc8, 0x42, 0xd3, 0x59, 0xda, 0x50,
+               0x7a, 0xf0, 0x73, 0xf9, 0x68, 0xe2, 0x61, 0xeb,
+               0x5e, 0xd4, 0x57, 0xdd, 0x4c, 0xc6, 0x45, 0xcf,
+               0x32, 0xb8, 0x3b, 0xb1, 0x20, 0xaa, 0x29, 0xa3,
+               0x16, 0x9c, 0x1f, 0x95, 0x04, 0x8e, 0x0d, 0x87,
+               0xea, 0x60, 0xe3, 0x69, 0xf8, 0x72, 0xf1, 0x7b,
+               0xce, 0x44, 0xc7, 0x4d, 0xdc, 0x56, 0xd5, 0x5f,
+               0xa2, 0x28, 0xab, 0x21, 0xb0, 0x3a, 0xb9, 0x33,
+               0x86, 0x0c, 0x8f, 0x05, 0x94, 0x1e, 0x9d, 0x17,
+               0x47, 0xcd, 0x4e, 0xc4, 0x55, 0xdf, 0x5c, 0xd6,
+               0x63, 0xe9, 0x6a, 0xe0, 0x71, 0xfb, 0x78, 0xf2,
+               0x0f, 0x85, 0x06, 0x8c, 0x1d, 0x97, 0x14, 0x9e,
+               0x2b, 0xa1, 0x22, 0xa8, 0x39, 0xb3, 0x30, 0xba,
+               0xd7, 0x5d, 0xde, 0x54, 0xc5, 0x4f, 0xcc, 0x46,
+               0xf3, 0x79, 0xfa, 0x70, 0xe1, 0x6b, 0xe8, 0x62,
+               0x9f, 0x15, 0x96, 0x1c, 0x8d, 0x07, 0x84, 0x0e,
+               0xbb, 0x31, 0xb2, 0x38, 0xa9, 0x23, 0xa0, 0x2a,
+       },
+       {
+               0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96,
+               0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba,
+               0x58, 0xd3, 0x53, 0xd8, 0x4e, 0xc5, 0x45, 0xce,
+               0x74, 0xff, 0x7f, 0xf4, 0x62, 0xe9, 0x69, 0xe2,
+               0xb0, 0x3b, 0xbb, 0x30, 0xa6, 0x2d, 0xad, 0x26,
+               0x9c, 0x17, 0x97, 0x1c, 0x8a, 0x01, 0x81, 0x0a,
+               0xe8, 0x63, 0xe3, 0x68, 0xfe, 0x75, 0xf5, 0x7e,
+               0xc4, 0x4f, 0xcf, 0x44, 0xd2, 0x59, 0xd9, 0x52,
+               0x7d, 0xf6, 0x76, 0xfd, 0x6b, 0xe0, 0x60, 0xeb,
+               0x51, 0xda, 0x5a, 0xd1, 0x47, 0xcc, 0x4c, 0xc7,
+               0x25, 0xae, 0x2e, 0xa5, 0x33, 0xb8, 0x38, 0xb3,
+               0x09, 0x82, 0x02, 0x89, 0x1f, 0x94, 0x14, 0x9f,
+               0xcd, 0x46, 0xc6, 0x4d, 0xdb, 0x50, 0xd0, 0x5b,
+               0xe1, 0x6a, 0xea, 0x61, 0xf7, 0x7c, 0xfc, 0x77,
+               0x95, 0x1e, 0x9e, 0x15, 0x83, 0x08, 0x88, 0x03,
+               0xb9, 0x32, 0xb2, 0x39, 0xaf, 0x24, 0xa4, 0x2f,
+               0xfa, 0x71, 0xf1, 0x7a, 0xec, 0x67, 0xe7, 0x6c,
+               0xd6, 0x5d, 0xdd, 0x56, 0xc0, 0x4b, 0xcb, 0x40,
+               0xa2, 0x29, 0xa9, 0x22, 0xb4, 0x3f, 0xbf, 0x34,
+               0x8e, 0x05, 0x85, 0x0e, 0x98, 0x13, 0x93, 0x18,
+               0x4a, 0xc1, 0x41, 0xca, 0x5c, 0xd7, 0x57, 0xdc,
+               0x66, 0xed, 0x6d, 0xe6, 0x70, 0xfb, 0x7b, 0xf0,
+               0x12, 0x99, 0x19, 0x92, 0x04, 0x8f, 0x0f, 0x84,
+               0x3e, 0xb5, 0x35, 0xbe, 0x28, 0xa3, 0x23, 0xa8,
+               0x87, 0x0c, 0x8c, 0x07, 0x91, 0x1a, 0x9a, 0x11,
+               0xab, 0x20, 0xa0, 0x2b, 0xbd, 0x36, 0xb6, 0x3d,
+               0xdf, 0x54, 0xd4, 0x5f, 0xc9, 0x42, 0xc2, 0x49,
+               0xf3, 0x78, 0xf8, 0x73, 0xe5, 0x6e, 0xee, 0x65,
+               0x37, 0xbc, 0x3c, 0xb7, 0x21, 0xaa, 0x2a, 0xa1,
+               0x1b, 0x90, 0x10, 0x9b, 0x0d, 0x86, 0x06, 0x8d,
+               0x6f, 0xe4, 0x64, 0xef, 0x79, 0xf2, 0x72, 0xf9,
+               0x43, 0xc8, 0x48, 0xc3, 0x55, 0xde, 0x5e, 0xd5,
+       },
+       {
+               0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83,
+               0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97,
+               0x28, 0xa4, 0x2d, 0xa1, 0x22, 0xae, 0x27, 0xab,
+               0x3c, 0xb0, 0x39, 0xb5, 0x36, 0xba, 0x33, 0xbf,
+               0x50, 0xdc, 0x55, 0xd9, 0x5a, 0xd6, 0x5f, 0xd3,
+               0x44, 0xc8, 0x41, 0xcd, 0x4e, 0xc2, 0x4b, 0xc7,
+               0x78, 0xf4, 0x7d, 0xf1, 0x72, 0xfe, 0x77, 0xfb,
+               0x6c, 0xe0, 0x69, 0xe5, 0x66, 0xea, 0x63, 0xef,
+               0xa0, 0x2c, 0xa5, 0x29, 0xaa, 0x26, 0xaf, 0x23,
+               0xb4, 0x38, 0xb1, 0x3d, 0xbe, 0x32, 0xbb, 0x37,
+               0x88, 0x04, 0x8d, 0x01, 0x82, 0x0e, 0x87, 0x0b,
+               0x9c, 0x10, 0x99, 0x15, 0x96, 0x1a, 0x93, 0x1f,
+               0xf0, 0x7c, 0xf5, 0x79, 0xfa, 0x76, 0xff, 0x73,
+               0xe4, 0x68, 0xe1, 0x6d, 0xee, 0x62, 0xeb, 0x67,
+               0xd8, 0x54, 0xdd, 0x51, 0xd2, 0x5e, 0xd7, 0x5b,
+               0xcc, 0x40, 0xc9, 0x45, 0xc6, 0x4a, 0xc3, 0x4f,
+               0x5d, 0xd1, 0x58, 0xd4, 0x57, 0xdb, 0x52, 0xde,
+               0x49, 0xc5, 0x4c, 0xc0, 0x43, 0xcf, 0x46, 0xca,
+               0x75, 0xf9, 0x70, 0xfc, 0x7f, 0xf3, 0x7a, 0xf6,
+               0x61, 0xed, 0x64, 0xe8, 0x6b, 0xe7, 0x6e, 0xe2,
+               0x0d, 0x81, 0x08, 0x84, 0x07, 0x8b, 0x02, 0x8e,
+               0x19, 0x95, 0x1c, 0x90, 0x13, 0x9f, 0x16, 0x9a,
+               0x25, 0xa9, 0x20, 0xac, 0x2f, 0xa3, 0x2a, 0xa6,
+               0x31, 0xbd, 0x34, 0xb8, 0x3b, 0xb7, 0x3e, 0xb2,
+               0xfd, 0x71, 0xf8, 0x74, 0xf7, 0x7b, 0xf2, 0x7e,
+               0xe9, 0x65, 0xec, 0x60, 0xe3, 0x6f, 0xe6, 0x6a,
+               0xd5, 0x59, 0xd0, 0x5c, 0xdf, 0x53, 0xda, 0x56,
+               0xc1, 0x4d, 0xc4, 0x48, 0xcb, 0x47, 0xce, 0x42,
+               0xad, 0x21, 0xa8, 0x24, 0xa7, 0x2b, 0xa2, 0x2e,
+               0xb9, 0x35, 0xbc, 0x30, 0xb3, 0x3f, 0xb6, 0x3a,
+               0x85, 0x09, 0x80, 0x0c, 0x8f, 0x03, 0x8a, 0x06,
+               0x91, 0x1d, 0x94, 0x18, 0x9b, 0x17, 0x9e, 0x12,
+       },
+       {
+               0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84,
+               0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98,
+               0x38, 0xb5, 0x3f, 0xb2, 0x36, 0xbb, 0x31, 0xbc,
+               0x24, 0xa9, 0x23, 0xae, 0x2a, 0xa7, 0x2d, 0xa0,
+               0x70, 0xfd, 0x77, 0xfa, 0x7e, 0xf3, 0x79, 0xf4,
+               0x6c, 0xe1, 0x6b, 0xe6, 0x62, 0xef, 0x65, 0xe8,
+               0x48, 0xc5, 0x4f, 0xc2, 0x46, 0xcb, 0x41, 0xcc,
+               0x54, 0xd9, 0x53, 0xde, 0x5a, 0xd7, 0x5d, 0xd0,
+               0xe0, 0x6d, 0xe7, 0x6a, 0xee, 0x63, 0xe9, 0x64,
+               0xfc, 0x71, 0xfb, 0x76, 0xf2, 0x7f, 0xf5, 0x78,
+               0xd8, 0x55, 0xdf, 0x52, 0xd6, 0x5b, 0xd1, 0x5c,
+               0xc4, 0x49, 0xc3, 0x4e, 0xca, 0x47, 0xcd, 0x40,
+               0x90, 0x1d, 0x97, 0x1a, 0x9e, 0x13, 0x99, 0x14,
+               0x8c, 0x01, 0x8b, 0x06, 0x82, 0x0f, 0x85, 0x08,
+               0xa8, 0x25, 0xaf, 0x22, 0xa6, 0x2b, 0xa1, 0x2c,
+               0xb4, 0x39, 0xb3, 0x3e, 0xba, 0x37, 0xbd, 0x30,
+               0xdd, 0x50, 0xda, 0x57, 0xd3, 0x5e, 0xd4, 0x59,
+               0xc1, 0x4c, 0xc6, 0x4b, 0xcf, 0x42, 0xc8, 0x45,
+               0xe5, 0x68, 0xe2, 0x6f, 0xeb, 0x66, 0xec, 0x61,
+               0xf9, 0x74, 0xfe, 0x73, 0xf7, 0x7a, 0xf0, 0x7d,
+               0xad, 0x20, 0xaa, 0x27, 0xa3, 0x2e, 0xa4, 0x29,
+               0xb1, 0x3c, 0xb6, 0x3b, 0xbf, 0x32, 0xb8, 0x35,
+               0x95, 0x18, 0x92, 0x1f, 0x9b, 0x16, 0x9c, 0x11,
+               0x89, 0x04, 0x8e, 0x03, 0x87, 0x0a, 0x80, 0x0d,
+               0x3d, 0xb0, 0x3a, 0xb7, 0x33, 0xbe, 0x34, 0xb9,
+               0x21, 0xac, 0x26, 0xab, 0x2f, 0xa2, 0x28, 0xa5,
+               0x05, 0x88, 0x02, 0x8f, 0x0b, 0x86, 0x0c, 0x81,
+               0x19, 0x94, 0x1e, 0x93, 0x17, 0x9a, 0x10, 0x9d,
+               0x4d, 0xc0, 0x4a, 0xc7, 0x43, 0xce, 0x44, 0xc9,
+               0x51, 0xdc, 0x56, 0xdb, 0x5f, 0xd2, 0x58, 0xd5,
+               0x75, 0xf8, 0x72, 0xff, 0x7b, 0xf6, 0x7c, 0xf1,
+               0x69, 0xe4, 0x6e, 0xe3, 0x67, 0xea, 0x60, 0xed,
+       },
+       {
+               0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d,
+               0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89,
+               0x08, 0x86, 0x09, 0x87, 0x0a, 0x84, 0x0b, 0x85,
+               0x0c, 0x82, 0x0d, 0x83, 0x0e, 0x80, 0x0f, 0x81,
+               0x10, 0x9e, 0x11, 0x9f, 0x12, 0x9c, 0x13, 0x9d,
+               0x14, 0x9a, 0x15, 0x9b, 0x16, 0x98, 0x17, 0x99,
+               0x18, 0x96, 0x19, 0x97, 0x1a, 0x94, 0x1b, 0x95,
+               0x1c, 0x92, 0x1d, 0x93, 0x1e, 0x90, 0x1f, 0x91,
+               0x20, 0xae, 0x21, 0xaf, 0x22, 0xac, 0x23, 0xad,
+               0x24, 0xaa, 0x25, 0xab, 0x26, 0xa8, 0x27, 0xa9,
+               0x28, 0xa6, 0x29, 0xa7, 0x2a, 0xa4, 0x2b, 0xa5,
+               0x2c, 0xa2, 0x2d, 0xa3, 0x2e, 0xa0, 0x2f, 0xa1,
+               0x30, 0xbe, 0x31, 0xbf, 0x32, 0xbc, 0x33, 0xbd,
+               0x34, 0xba, 0x35, 0xbb, 0x36, 0xb8, 0x37, 0xb9,
+               0x38, 0xb6, 0x39, 0xb7, 0x3a, 0xb4, 0x3b, 0xb5,
+               0x3c, 0xb2, 0x3d, 0xb3, 0x3e, 0xb0, 0x3f, 0xb1,
+               0x40, 0xce, 0x41, 0xcf, 0x42, 0xcc, 0x43, 0xcd,
+               0x44, 0xca, 0x45, 0xcb, 0x46, 0xc8, 0x47, 0xc9,
+               0x48, 0xc6, 0x49, 0xc7, 0x4a, 0xc4, 0x4b, 0xc5,
+               0x4c, 0xc2, 0x4d, 0xc3, 0x4e, 0xc0, 0x4f, 0xc1,
+               0x50, 0xde, 0x51, 0xdf, 0x52, 0xdc, 0x53, 0xdd,
+               0x54, 0xda, 0x55, 0xdb, 0x56, 0xd8, 0x57, 0xd9,
+               0x58, 0xd6, 0x59, 0xd7, 0x5a, 0xd4, 0x5b, 0xd5,
+               0x5c, 0xd2, 0x5d, 0xd3, 0x5e, 0xd0, 0x5f, 0xd1,
+               0x60, 0xee, 0x61, 0xef, 0x62, 0xec, 0x63, 0xed,
+               0x64, 0xea, 0x65, 0xeb, 0x66, 0xe8, 0x67, 0xe9,
+               0x68, 0xe6, 0x69, 0xe7, 0x6a, 0xe4, 0x6b, 0xe5,
+               0x6c, 0xe2, 0x6d, 0xe3, 0x6e, 0xe0, 0x6f, 0xe1,
+               0x70, 0xfe, 0x71, 0xff, 0x72, 0xfc, 0x73, 0xfd,
+               0x74, 0xfa, 0x75, 0xfb, 0x76, 0xf8, 0x77, 0xf9,
+               0x78, 0xf6, 0x79, 0xf7, 0x7a, 0xf4, 0x7b, 0xf5,
+               0x7c, 0xf2, 0x7d, 0xf3, 0x7e, 0xf0, 0x7f, 0xf1,
+       },
+       {
+               0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a,
+               0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86,
+               0x18, 0x97, 0x1b, 0x94, 0x1e, 0x91, 0x1d, 0x92,
+               0x14, 0x9b, 0x17, 0x98, 0x12, 0x9d, 0x11, 0x9e,
+               0x30, 0xbf, 0x33, 0xbc, 0x36, 0xb9, 0x35, 0xba,
+               0x3c, 0xb3, 0x3f, 0xb0, 0x3a, 0xb5, 0x39, 0xb6,
+               0x28, 0xa7, 0x2b, 0xa4, 0x2e, 0xa1, 0x2d, 0xa2,
+               0x24, 0xab, 0x27, 0xa8, 0x22, 0xad, 0x21, 0xae,
+               0x60, 0xef, 0x63, 0xec, 0x66, 0xe9, 0x65, 0xea,
+               0x6c, 0xe3, 0x6f, 0xe0, 0x6a, 0xe5, 0x69, 0xe6,
+               0x78, 0xf7, 0x7b, 0xf4, 0x7e, 0xf1, 0x7d, 0xf2,
+               0x74, 0xfb, 0x77, 0xf8, 0x72, 0xfd, 0x71, 0xfe,
+               0x50, 0xdf, 0x53, 0xdc, 0x56, 0xd9, 0x55, 0xda,
+               0x5c, 0xd3, 0x5f, 0xd0, 0x5a, 0xd5, 0x59, 0xd6,
+               0x48, 0xc7, 0x4b, 0xc4, 0x4e, 0xc1, 0x4d, 0xc2,
+               0x44, 0xcb, 0x47, 0xc8, 0x42, 0xcd, 0x41, 0xce,
+               0xc0, 0x4f, 0xc3, 0x4c, 0xc6, 0x49, 0xc5, 0x4a,
+               0xcc, 0x43, 0xcf, 0x40, 0xca, 0x45, 0xc9, 0x46,
+               0xd8, 0x57, 0xdb, 0x54, 0xde, 0x51, 0xdd, 0x52,
+               0xd4, 0x5b, 0xd7, 0x58, 0xd2, 0x5d, 0xd1, 0x5e,
+               0xf0, 0x7f, 0xf3, 0x7c, 0xf6, 0x79, 0xf5, 0x7a,
+               0xfc, 0x73, 0xff, 0x70, 0xfa, 0x75, 0xf9, 0x76,
+               0xe8, 0x67, 0xeb, 0x64, 0xee, 0x61, 0xed, 0x62,
+               0xe4, 0x6b, 0xe7, 0x68, 0xe2, 0x6d, 0xe1, 0x6e,
+               0xa0, 0x2f, 0xa3, 0x2c, 0xa6, 0x29, 0xa5, 0x2a,
+               0xac, 0x23, 0xaf, 0x20, 0xaa, 0x25, 0xa9, 0x26,
+               0xb8, 0x37, 0xbb, 0x34, 0xbe, 0x31, 0xbd, 0x32,
+               0xb4, 0x3b, 0xb7, 0x38, 0xb2, 0x3d, 0xb1, 0x3e,
+               0x90, 0x1f, 0x93, 0x1c, 0x96, 0x19, 0x95, 0x1a,
+               0x9c, 0x13, 0x9f, 0x10, 0x9a, 0x15, 0x99, 0x16,
+               0x88, 0x07, 0x8b, 0x04, 0x8e, 0x01, 0x8d, 0x02,
+               0x84, 0x0b, 0x87, 0x08, 0x82, 0x0d, 0x81, 0x0e,
+       },
+       {
+               0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7,
+               0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23,
+               0xf5, 0x65, 0xc8, 0x58, 0x8f, 0x1f, 0xb2, 0x22,
+               0x01, 0x91, 0x3c, 0xac, 0x7b, 0xeb, 0x46, 0xd6,
+               0xf7, 0x67, 0xca, 0x5a, 0x8d, 0x1d, 0xb0, 0x20,
+               0x03, 0x93, 0x3e, 0xae, 0x79, 0xe9, 0x44, 0xd4,
+               0x02, 0x92, 0x3f, 0xaf, 0x78, 0xe8, 0x45, 0xd5,
+               0xf6, 0x66, 0xcb, 0x5b, 0x8c, 0x1c, 0xb1, 0x21,
+               0xf3, 0x63, 0xce, 0x5e, 0x89, 0x19, 0xb4, 0x24,
+               0x07, 0x97, 0x3a, 0xaa, 0x7d, 0xed, 0x40, 0xd0,
+               0x06, 0x96, 0x3b, 0xab, 0x7c, 0xec, 0x41, 0xd1,
+               0xf2, 0x62, 0xcf, 0x5f, 0x88, 0x18, 0xb5, 0x25,
+               0x04, 0x94, 0x39, 0xa9, 0x7e, 0xee, 0x43, 0xd3,
+               0xf0, 0x60, 0xcd, 0x5d, 0x8a, 0x1a, 0xb7, 0x27,
+               0xf1, 0x61, 0xcc, 0x5c, 0x8b, 0x1b, 0xb6, 0x26,
+               0x05, 0x95, 0x38, 0xa8, 0x7f, 0xef, 0x42, 0xd2,
+               0xfb, 0x6b, 0xc6, 0x56, 0x81, 0x11, 0xbc, 0x2c,
+               0x0f, 0x9f, 0x32, 0xa2, 0x75, 0xe5, 0x48, 0xd8,
+               0x0e, 0x9e, 0x33, 0xa3, 0x74, 0xe4, 0x49, 0xd9,
+               0xfa, 0x6a, 0xc7, 0x57, 0x80, 0x10, 0xbd, 0x2d,
+               0x0c, 0x9c, 0x31, 0xa1, 0x76, 0xe6, 0x4b, 0xdb,
+               0xf8, 0x68, 0xc5, 0x55, 0x82, 0x12, 0xbf, 0x2f,
+               0xf9, 0x69, 0xc4, 0x54, 0x83, 0x13, 0xbe, 0x2e,
+               0x0d, 0x9d, 0x30, 0xa0, 0x77, 0xe7, 0x4a, 0xda,
+               0x08, 0x98, 0x35, 0xa5, 0x72, 0xe2, 0x4f, 0xdf,
+               0xfc, 0x6c, 0xc1, 0x51, 0x86, 0x16, 0xbb, 0x2b,
+               0xfd, 0x6d, 0xc0, 0x50, 0x87, 0x17, 0xba, 0x2a,
+               0x09, 0x99, 0x34, 0xa4, 0x73, 0xe3, 0x4e, 0xde,
+               0xff, 0x6f, 0xc2, 0x52, 0x85, 0x15, 0xb8, 0x28,
+               0x0b, 0x9b, 0x36, 0xa6, 0x71, 0xe1, 0x4c, 0xdc,
+               0x0a, 0x9a, 0x37, 0xa7, 0x70, 0xe0, 0x4d, 0xdd,
+               0xfe, 0x6e, 0xc3, 0x53, 0x84, 0x14, 0xb9, 0x29,
+       },
+       {
+               0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0,
+               0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c,
+               0xe5, 0x74, 0xda, 0x4b, 0x9b, 0x0a, 0xa4, 0x35,
+               0x19, 0x88, 0x26, 0xb7, 0x67, 0xf6, 0x58, 0xc9,
+               0xd7, 0x46, 0xe8, 0x79, 0xa9, 0x38, 0x96, 0x07,
+               0x2b, 0xba, 0x14, 0x85, 0x55, 0xc4, 0x6a, 0xfb,
+               0x32, 0xa3, 0x0d, 0x9c, 0x4c, 0xdd, 0x73, 0xe2,
+               0xce, 0x5f, 0xf1, 0x60, 0xb0, 0x21, 0x8f, 0x1e,
+               0xb3, 0x22, 0x8c, 0x1d, 0xcd, 0x5c, 0xf2, 0x63,
+               0x4f, 0xde, 0x70, 0xe1, 0x31, 0xa0, 0x0e, 0x9f,
+               0x56, 0xc7, 0x69, 0xf8, 0x28, 0xb9, 0x17, 0x86,
+               0xaa, 0x3b, 0x95, 0x04, 0xd4, 0x45, 0xeb, 0x7a,
+               0x64, 0xf5, 0x5b, 0xca, 0x1a, 0x8b, 0x25, 0xb4,
+               0x98, 0x09, 0xa7, 0x36, 0xe6, 0x77, 0xd9, 0x48,
+               0x81, 0x10, 0xbe, 0x2f, 0xff, 0x6e, 0xc0, 0x51,
+               0x7d, 0xec, 0x42, 0xd3, 0x03, 0x92, 0x3c, 0xad,
+               0x7b, 0xea, 0x44, 0xd5, 0x05, 0x94, 0x3a, 0xab,
+               0x87, 0x16, 0xb8, 0x29, 0xf9, 0x68, 0xc6, 0x57,
+               0x9e, 0x0f, 0xa1, 0x30, 0xe0, 0x71, 0xdf, 0x4e,
+               0x62, 0xf3, 0x5d, 0xcc, 0x1c, 0x8d, 0x23, 0xb2,
+               0xac, 0x3d, 0x93, 0x02, 0xd2, 0x43, 0xed, 0x7c,
+               0x50, 0xc1, 0x6f, 0xfe, 0x2e, 0xbf, 0x11, 0x80,
+               0x49, 0xd8, 0x76, 0xe7, 0x37, 0xa6, 0x08, 0x99,
+               0xb5, 0x24, 0x8a, 0x1b, 0xcb, 0x5a, 0xf4, 0x65,
+               0xc8, 0x59, 0xf7, 0x66, 0xb6, 0x27, 0x89, 0x18,
+               0x34, 0xa5, 0x0b, 0x9a, 0x4a, 0xdb, 0x75, 0xe4,
+               0x2d, 0xbc, 0x12, 0x83, 0x53, 0xc2, 0x6c, 0xfd,
+               0xd1, 0x40, 0xee, 0x7f, 0xaf, 0x3e, 0x90, 0x01,
+               0x1f, 0x8e, 0x20, 0xb1, 0x61, 0xf0, 0x5e, 0xcf,
+               0xe3, 0x72, 0xdc, 0x4d, 0x9d, 0x0c, 0xa2, 0x33,
+               0xfa, 0x6b, 0xc5, 0x54, 0x84, 0x15, 0xbb, 0x2a,
+               0x06, 0x97, 0x39, 0xa8, 0x78, 0xe9, 0x47, 0xd6,
+       },
+       {
+               0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9,
+               0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d,
+               0xd5, 0x47, 0xec, 0x7e, 0xa7, 0x35, 0x9e, 0x0c,
+               0x31, 0xa3, 0x08, 0x9a, 0x43, 0xd1, 0x7a, 0xe8,
+               0xb7, 0x25, 0x8e, 0x1c, 0xc5, 0x57, 0xfc, 0x6e,
+               0x53, 0xc1, 0x6a, 0xf8, 0x21, 0xb3, 0x18, 0x8a,
+               0x62, 0xf0, 0x5b, 0xc9, 0x10, 0x82, 0x29, 0xbb,
+               0x86, 0x14, 0xbf, 0x2d, 0xf4, 0x66, 0xcd, 0x5f,
+               0x73, 0xe1, 0x4a, 0xd8, 0x01, 0x93, 0x38, 0xaa,
+               0x97, 0x05, 0xae, 0x3c, 0xe5, 0x77, 0xdc, 0x4e,
+               0xa6, 0x34, 0x9f, 0x0d, 0xd4, 0x46, 0xed, 0x7f,
+               0x42, 0xd0, 0x7b, 0xe9, 0x30, 0xa2, 0x09, 0x9b,
+               0xc4, 0x56, 0xfd, 0x6f, 0xb6, 0x24, 0x8f, 0x1d,
+               0x20, 0xb2, 0x19, 0x8b, 0x52, 0xc0, 0x6b, 0xf9,
+               0x11, 0x83, 0x28, 0xba, 0x63, 0xf1, 0x5a, 0xc8,
+               0xf5, 0x67, 0xcc, 0x5e, 0x87, 0x15, 0xbe, 0x2c,
+               0xe6, 0x74, 0xdf, 0x4d, 0x94, 0x06, 0xad, 0x3f,
+               0x02, 0x90, 0x3b, 0xa9, 0x70, 0xe2, 0x49, 0xdb,
+               0x33, 0xa1, 0x0a, 0x98, 0x41, 0xd3, 0x78, 0xea,
+               0xd7, 0x45, 0xee, 0x7c, 0xa5, 0x37, 0x9c, 0x0e,
+               0x51, 0xc3, 0x68, 0xfa, 0x23, 0xb1, 0x1a, 0x88,
+               0xb5, 0x27, 0x8c, 0x1e, 0xc7, 0x55, 0xfe, 0x6c,
+               0x84, 0x16, 0xbd, 0x2f, 0xf6, 0x64, 0xcf, 0x5d,
+               0x60, 0xf2, 0x59, 0xcb, 0x12, 0x80, 0x2b, 0xb9,
+               0x95, 0x07, 0xac, 0x3e, 0xe7, 0x75, 0xde, 0x4c,
+               0x71, 0xe3, 0x48, 0xda, 0x03, 0x91, 0x3a, 0xa8,
+               0x40, 0xd2, 0x79, 0xeb, 0x32, 0xa0, 0x0b, 0x99,
+               0xa4, 0x36, 0x9d, 0x0f, 0xd6, 0x44, 0xef, 0x7d,
+               0x22, 0xb0, 0x1b, 0x89, 0x50, 0xc2, 0x69, 0xfb,
+               0xc6, 0x54, 0xff, 0x6d, 0xb4, 0x26, 0x8d, 0x1f,
+               0xf7, 0x65, 0xce, 0x5c, 0x85, 0x17, 0xbc, 0x2e,
+               0x13, 0x81, 0x2a, 0xb8, 0x61, 0xf3, 0x58, 0xca,
+       },
+       {
+               0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde,
+               0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32,
+               0xc5, 0x56, 0xfe, 0x6d, 0xb3, 0x20, 0x88, 0x1b,
+               0x29, 0xba, 0x12, 0x81, 0x5f, 0xcc, 0x64, 0xf7,
+               0x97, 0x04, 0xac, 0x3f, 0xe1, 0x72, 0xda, 0x49,
+               0x7b, 0xe8, 0x40, 0xd3, 0x0d, 0x9e, 0x36, 0xa5,
+               0x52, 0xc1, 0x69, 0xfa, 0x24, 0xb7, 0x1f, 0x8c,
+               0xbe, 0x2d, 0x85, 0x16, 0xc8, 0x5b, 0xf3, 0x60,
+               0x33, 0xa0, 0x08, 0x9b, 0x45, 0xd6, 0x7e, 0xed,
+               0xdf, 0x4c, 0xe4, 0x77, 0xa9, 0x3a, 0x92, 0x01,
+               0xf6, 0x65, 0xcd, 0x5e, 0x80, 0x13, 0xbb, 0x28,
+               0x1a, 0x89, 0x21, 0xb2, 0x6c, 0xff, 0x57, 0xc4,
+               0xa4, 0x37, 0x9f, 0x0c, 0xd2, 0x41, 0xe9, 0x7a,
+               0x48, 0xdb, 0x73, 0xe0, 0x3e, 0xad, 0x05, 0x96,
+               0x61, 0xf2, 0x5a, 0xc9, 0x17, 0x84, 0x2c, 0xbf,
+               0x8d, 0x1e, 0xb6, 0x25, 0xfb, 0x68, 0xc0, 0x53,
+               0x66, 0xf5, 0x5d, 0xce, 0x10, 0x83, 0x2b, 0xb8,
+               0x8a, 0x19, 0xb1, 0x22, 0xfc, 0x6f, 0xc7, 0x54,
+               0xa3, 0x30, 0x98, 0x0b, 0xd5, 0x46, 0xee, 0x7d,
+               0x4f, 0xdc, 0x74, 0xe7, 0x39, 0xaa, 0x02, 0x91,
+               0xf1, 0x62, 0xca, 0x59, 0x87, 0x14, 0xbc, 0x2f,
+               0x1d, 0x8e, 0x26, 0xb5, 0x6b, 0xf8, 0x50, 0xc3,
+               0x34, 0xa7, 0x0f, 0x9c, 0x42, 0xd1, 0x79, 0xea,
+               0xd8, 0x4b, 0xe3, 0x70, 0xae, 0x3d, 0x95, 0x06,
+               0x55, 0xc6, 0x6e, 0xfd, 0x23, 0xb0, 0x18, 0x8b,
+               0xb9, 0x2a, 0x82, 0x11, 0xcf, 0x5c, 0xf4, 0x67,
+               0x90, 0x03, 0xab, 0x38, 0xe6, 0x75, 0xdd, 0x4e,
+               0x7c, 0xef, 0x47, 0xd4, 0x0a, 0x99, 0x31, 0xa2,
+               0xc2, 0x51, 0xf9, 0x6a, 0xb4, 0x27, 0x8f, 0x1c,
+               0x2e, 0xbd, 0x15, 0x86, 0x58, 0xcb, 0x63, 0xf0,
+               0x07, 0x94, 0x3c, 0xaf, 0x71, 0xe2, 0x4a, 0xd9,
+               0xeb, 0x78, 0xd0, 0x43, 0x9d, 0x0e, 0xa6, 0x35,
+       },
+       {
+               0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb,
+               0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f,
+               0xb5, 0x21, 0x80, 0x14, 0xdf, 0x4b, 0xea, 0x7e,
+               0x61, 0xf5, 0x54, 0xc0, 0x0b, 0x9f, 0x3e, 0xaa,
+               0x77, 0xe3, 0x42, 0xd6, 0x1d, 0x89, 0x28, 0xbc,
+               0xa3, 0x37, 0x96, 0x02, 0xc9, 0x5d, 0xfc, 0x68,
+               0xc2, 0x56, 0xf7, 0x63, 0xa8, 0x3c, 0x9d, 0x09,
+               0x16, 0x82, 0x23, 0xb7, 0x7c, 0xe8, 0x49, 0xdd,
+               0xee, 0x7a, 0xdb, 0x4f, 0x84, 0x10, 0xb1, 0x25,
+               0x3a, 0xae, 0x0f, 0x9b, 0x50, 0xc4, 0x65, 0xf1,
+               0x5b, 0xcf, 0x6e, 0xfa, 0x31, 0xa5, 0x04, 0x90,
+               0x8f, 0x1b, 0xba, 0x2e, 0xe5, 0x71, 0xd0, 0x44,
+               0x99, 0x0d, 0xac, 0x38, 0xf3, 0x67, 0xc6, 0x52,
+               0x4d, 0xd9, 0x78, 0xec, 0x27, 0xb3, 0x12, 0x86,
+               0x2c, 0xb8, 0x19, 0x8d, 0x46, 0xd2, 0x73, 0xe7,
+               0xf8, 0x6c, 0xcd, 0x59, 0x92, 0x06, 0xa7, 0x33,
+               0xc1, 0x55, 0xf4, 0x60, 0xab, 0x3f, 0x9e, 0x0a,
+               0x15, 0x81, 0x20, 0xb4, 0x7f, 0xeb, 0x4a, 0xde,
+               0x74, 0xe0, 0x41, 0xd5, 0x1e, 0x8a, 0x2b, 0xbf,
+               0xa0, 0x34, 0x95, 0x01, 0xca, 0x5e, 0xff, 0x6b,
+               0xb6, 0x22, 0x83, 0x17, 0xdc, 0x48, 0xe9, 0x7d,
+               0x62, 0xf6, 0x57, 0xc3, 0x08, 0x9c, 0x3d, 0xa9,
+               0x03, 0x97, 0x36, 0xa2, 0x69, 0xfd, 0x5c, 0xc8,
+               0xd7, 0x43, 0xe2, 0x76, 0xbd, 0x29, 0x88, 0x1c,
+               0x2f, 0xbb, 0x1a, 0x8e, 0x45, 0xd1, 0x70, 0xe4,
+               0xfb, 0x6f, 0xce, 0x5a, 0x91, 0x05, 0xa4, 0x30,
+               0x9a, 0x0e, 0xaf, 0x3b, 0xf0, 0x64, 0xc5, 0x51,
+               0x4e, 0xda, 0x7b, 0xef, 0x24, 0xb0, 0x11, 0x85,
+               0x58, 0xcc, 0x6d, 0xf9, 0x32, 0xa6, 0x07, 0x93,
+               0x8c, 0x18, 0xb9, 0x2d, 0xe6, 0x72, 0xd3, 0x47,
+               0xed, 0x79, 0xd8, 0x4c, 0x87, 0x13, 0xb2, 0x26,
+               0x39, 0xad, 0x0c, 0x98, 0x53, 0xc7, 0x66, 0xf2,
+       },
+       {
+               0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc,
+               0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10,
+               0xa5, 0x30, 0x92, 0x07, 0xcb, 0x5e, 0xfc, 0x69,
+               0x79, 0xec, 0x4e, 0xdb, 0x17, 0x82, 0x20, 0xb5,
+               0x57, 0xc2, 0x60, 0xf5, 0x39, 0xac, 0x0e, 0x9b,
+               0x8b, 0x1e, 0xbc, 0x29, 0xe5, 0x70, 0xd2, 0x47,
+               0xf2, 0x67, 0xc5, 0x50, 0x9c, 0x09, 0xab, 0x3e,
+               0x2e, 0xbb, 0x19, 0x8c, 0x40, 0xd5, 0x77, 0xe2,
+               0xae, 0x3b, 0x99, 0x0c, 0xc0, 0x55, 0xf7, 0x62,
+               0x72, 0xe7, 0x45, 0xd0, 0x1c, 0x89, 0x2b, 0xbe,
+               0x0b, 0x9e, 0x3c, 0xa9, 0x65, 0xf0, 0x52, 0xc7,
+               0xd7, 0x42, 0xe0, 0x75, 0xb9, 0x2c, 0x8e, 0x1b,
+               0xf9, 0x6c, 0xce, 0x5b, 0x97, 0x02, 0xa0, 0x35,
+               0x25, 0xb0, 0x12, 0x87, 0x4b, 0xde, 0x7c, 0xe9,
+               0x5c, 0xc9, 0x6b, 0xfe, 0x32, 0xa7, 0x05, 0x90,
+               0x80, 0x15, 0xb7, 0x22, 0xee, 0x7b, 0xd9, 0x4c,
+               0x41, 0xd4, 0x76, 0xe3, 0x2f, 0xba, 0x18, 0x8d,
+               0x9d, 0x08, 0xaa, 0x3f, 0xf3, 0x66, 0xc4, 0x51,
+               0xe4, 0x71, 0xd3, 0x46, 0x8a, 0x1f, 0xbd, 0x28,
+               0x38, 0xad, 0x0f, 0x9a, 0x56, 0xc3, 0x61, 0xf4,
+               0x16, 0x83, 0x21, 0xb4, 0x78, 0xed, 0x4f, 0xda,
+               0xca, 0x5f, 0xfd, 0x68, 0xa4, 0x31, 0x93, 0x06,
+               0xb3, 0x26, 0x84, 0x11, 0xdd, 0x48, 0xea, 0x7f,
+               0x6f, 0xfa, 0x58, 0xcd, 0x01, 0x94, 0x36, 0xa3,
+               0xef, 0x7a, 0xd8, 0x4d, 0x81, 0x14, 0xb6, 0x23,
+               0x33, 0xa6, 0x04, 0x91, 0x5d, 0xc8, 0x6a, 0xff,
+               0x4a, 0xdf, 0x7d, 0xe8, 0x24, 0xb1, 0x13, 0x86,
+               0x96, 0x03, 0xa1, 0x34, 0xf8, 0x6d, 0xcf, 0x5a,
+               0xb8, 0x2d, 0x8f, 0x1a, 0xd6, 0x43, 0xe1, 0x74,
+               0x64, 0xf1, 0x53, 0xc6, 0x0a, 0x9f, 0x3d, 0xa8,
+               0x1d, 0x88, 0x2a, 0xbf, 0x73, 0xe6, 0x44, 0xd1,
+               0xc1, 0x54, 0xf6, 0x63, 0xaf, 0x3a, 0x98, 0x0d,
+       },
+       {
+               0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5,
+               0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01,
+               0x95, 0x03, 0xa4, 0x32, 0xf7, 0x61, 0xc6, 0x50,
+               0x51, 0xc7, 0x60, 0xf6, 0x33, 0xa5, 0x02, 0x94,
+               0x37, 0xa1, 0x06, 0x90, 0x55, 0xc3, 0x64, 0xf2,
+               0xf3, 0x65, 0xc2, 0x54, 0x91, 0x07, 0xa0, 0x36,
+               0xa2, 0x34, 0x93, 0x05, 0xc0, 0x56, 0xf1, 0x67,
+               0x66, 0xf0, 0x57, 0xc1, 0x04, 0x92, 0x35, 0xa3,
+               0x6e, 0xf8, 0x5f, 0xc9, 0x0c, 0x9a, 0x3d, 0xab,
+               0xaa, 0x3c, 0x9b, 0x0d, 0xc8, 0x5e, 0xf9, 0x6f,
+               0xfb, 0x6d, 0xca, 0x5c, 0x99, 0x0f, 0xa8, 0x3e,
+               0x3f, 0xa9, 0x0e, 0x98, 0x5d, 0xcb, 0x6c, 0xfa,
+               0x59, 0xcf, 0x68, 0xfe, 0x3b, 0xad, 0x0a, 0x9c,
+               0x9d, 0x0b, 0xac, 0x3a, 0xff, 0x69, 0xce, 0x58,
+               0xcc, 0x5a, 0xfd, 0x6b, 0xae, 0x38, 0x9f, 0x09,
+               0x08, 0x9e, 0x39, 0xaf, 0x6a, 0xfc, 0x5b, 0xcd,
+               0xdc, 0x4a, 0xed, 0x7b, 0xbe, 0x28, 0x8f, 0x19,
+               0x18, 0x8e, 0x29, 0xbf, 0x7a, 0xec, 0x4b, 0xdd,
+               0x49, 0xdf, 0x78, 0xee, 0x2b, 0xbd, 0x1a, 0x8c,
+               0x8d, 0x1b, 0xbc, 0x2a, 0xef, 0x79, 0xde, 0x48,
+               0xeb, 0x7d, 0xda, 0x4c, 0x89, 0x1f, 0xb8, 0x2e,
+               0x2f, 0xb9, 0x1e, 0x88, 0x4d, 0xdb, 0x7c, 0xea,
+               0x7e, 0xe8, 0x4f, 0xd9, 0x1c, 0x8a, 0x2d, 0xbb,
+               0xba, 0x2c, 0x8b, 0x1d, 0xd8, 0x4e, 0xe9, 0x7f,
+               0xb2, 0x24, 0x83, 0x15, 0xd0, 0x46, 0xe1, 0x77,
+               0x76, 0xe0, 0x47, 0xd1, 0x14, 0x82, 0x25, 0xb3,
+               0x27, 0xb1, 0x16, 0x80, 0x45, 0xd3, 0x74, 0xe2,
+               0xe3, 0x75, 0xd2, 0x44, 0x81, 0x17, 0xb0, 0x26,
+               0x85, 0x13, 0xb4, 0x22, 0xe7, 0x71, 0xd6, 0x40,
+               0x41, 0xd7, 0x70, 0xe6, 0x23, 0xb5, 0x12, 0x84,
+               0x10, 0x86, 0x21, 0xb7, 0x72, 0xe4, 0x43, 0xd5,
+               0xd4, 0x42, 0xe5, 0x73, 0xb6, 0x20, 0x87, 0x11,
+       },
+       {
+               0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2,
+               0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e,
+               0x85, 0x12, 0xb6, 0x21, 0xe3, 0x74, 0xd0, 0x47,
+               0x49, 0xde, 0x7a, 0xed, 0x2f, 0xb8, 0x1c, 0x8b,
+               0x17, 0x80, 0x24, 0xb3, 0x71, 0xe6, 0x42, 0xd5,
+               0xdb, 0x4c, 0xe8, 0x7f, 0xbd, 0x2a, 0x8e, 0x19,
+               0x92, 0x05, 0xa1, 0x36, 0xf4, 0x63, 0xc7, 0x50,
+               0x5e, 0xc9, 0x6d, 0xfa, 0x38, 0xaf, 0x0b, 0x9c,
+               0x2e, 0xb9, 0x1d, 0x8a, 0x48, 0xdf, 0x7b, 0xec,
+               0xe2, 0x75, 0xd1, 0x46, 0x84, 0x13, 0xb7, 0x20,
+               0xab, 0x3c, 0x98, 0x0f, 0xcd, 0x5a, 0xfe, 0x69,
+               0x67, 0xf0, 0x54, 0xc3, 0x01, 0x96, 0x32, 0xa5,
+               0x39, 0xae, 0x0a, 0x9d, 0x5f, 0xc8, 0x6c, 0xfb,
+               0xf5, 0x62, 0xc6, 0x51, 0x93, 0x04, 0xa0, 0x37,
+               0xbc, 0x2b, 0x8f, 0x18, 0xda, 0x4d, 0xe9, 0x7e,
+               0x70, 0xe7, 0x43, 0xd4, 0x16, 0x81, 0x25, 0xb2,
+               0x5c, 0xcb, 0x6f, 0xf8, 0x3a, 0xad, 0x09, 0x9e,
+               0x90, 0x07, 0xa3, 0x34, 0xf6, 0x61, 0xc5, 0x52,
+               0xd9, 0x4e, 0xea, 0x7d, 0xbf, 0x28, 0x8c, 0x1b,
+               0x15, 0x82, 0x26, 0xb1, 0x73, 0xe4, 0x40, 0xd7,
+               0x4b, 0xdc, 0x78, 0xef, 0x2d, 0xba, 0x1e, 0x89,
+               0x87, 0x10, 0xb4, 0x23, 0xe1, 0x76, 0xd2, 0x45,
+               0xce, 0x59, 0xfd, 0x6a, 0xa8, 0x3f, 0x9b, 0x0c,
+               0x02, 0x95, 0x31, 0xa6, 0x64, 0xf3, 0x57, 0xc0,
+               0x72, 0xe5, 0x41, 0xd6, 0x14, 0x83, 0x27, 0xb0,
+               0xbe, 0x29, 0x8d, 0x1a, 0xd8, 0x4f, 0xeb, 0x7c,
+               0xf7, 0x60, 0xc4, 0x53, 0x91, 0x06, 0xa2, 0x35,
+               0x3b, 0xac, 0x08, 0x9f, 0x5d, 0xca, 0x6e, 0xf9,
+               0x65, 0xf2, 0x56, 0xc1, 0x03, 0x94, 0x30, 0xa7,
+               0xa9, 0x3e, 0x9a, 0x0d, 0xcf, 0x58, 0xfc, 0x6b,
+               0xe0, 0x77, 0xd3, 0x44, 0x86, 0x11, 0xb5, 0x22,
+               0x2c, 0xbb, 0x1f, 0x88, 0x4a, 0xdd, 0x79, 0xee,
+       },
+       {
+               0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef,
+               0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b,
+               0x75, 0xed, 0x58, 0xc0, 0x2f, 0xb7, 0x02, 0x9a,
+               0xc1, 0x59, 0xec, 0x74, 0x9b, 0x03, 0xb6, 0x2e,
+               0xea, 0x72, 0xc7, 0x5f, 0xb0, 0x28, 0x9d, 0x05,
+               0x5e, 0xc6, 0x73, 0xeb, 0x04, 0x9c, 0x29, 0xb1,
+               0x9f, 0x07, 0xb2, 0x2a, 0xc5, 0x5d, 0xe8, 0x70,
+               0x2b, 0xb3, 0x06, 0x9e, 0x71, 0xe9, 0x5c, 0xc4,
+               0xc9, 0x51, 0xe4, 0x7c, 0x93, 0x0b, 0xbe, 0x26,
+               0x7d, 0xe5, 0x50, 0xc8, 0x27, 0xbf, 0x0a, 0x92,
+               0xbc, 0x24, 0x91, 0x09, 0xe6, 0x7e, 0xcb, 0x53,
+               0x08, 0x90, 0x25, 0xbd, 0x52, 0xca, 0x7f, 0xe7,
+               0x23, 0xbb, 0x0e, 0x96, 0x79, 0xe1, 0x54, 0xcc,
+               0x97, 0x0f, 0xba, 0x22, 0xcd, 0x55, 0xe0, 0x78,
+               0x56, 0xce, 0x7b, 0xe3, 0x0c, 0x94, 0x21, 0xb9,
+               0xe2, 0x7a, 0xcf, 0x57, 0xb8, 0x20, 0x95, 0x0d,
+               0x8f, 0x17, 0xa2, 0x3a, 0xd5, 0x4d, 0xf8, 0x60,
+               0x3b, 0xa3, 0x16, 0x8e, 0x61, 0xf9, 0x4c, 0xd4,
+               0xfa, 0x62, 0xd7, 0x4f, 0xa0, 0x38, 0x8d, 0x15,
+               0x4e, 0xd6, 0x63, 0xfb, 0x14, 0x8c, 0x39, 0xa1,
+               0x65, 0xfd, 0x48, 0xd0, 0x3f, 0xa7, 0x12, 0x8a,
+               0xd1, 0x49, 0xfc, 0x64, 0x8b, 0x13, 0xa6, 0x3e,
+               0x10, 0x88, 0x3d, 0xa5, 0x4a, 0xd2, 0x67, 0xff,
+               0xa4, 0x3c, 0x89, 0x11, 0xfe, 0x66, 0xd3, 0x4b,
+               0x46, 0xde, 0x6b, 0xf3, 0x1c, 0x84, 0x31, 0xa9,
+               0xf2, 0x6a, 0xdf, 0x47, 0xa8, 0x30, 0x85, 0x1d,
+               0x33, 0xab, 0x1e, 0x86, 0x69, 0xf1, 0x44, 0xdc,
+               0x87, 0x1f, 0xaa, 0x32, 0xdd, 0x45, 0xf0, 0x68,
+               0xac, 0x34, 0x81, 0x19, 0xf6, 0x6e, 0xdb, 0x43,
+               0x18, 0x80, 0x35, 0xad, 0x42, 0xda, 0x6f, 0xf7,
+               0xd9, 0x41, 0xf4, 0x6c, 0x83, 0x1b, 0xae, 0x36,
+               0x6d, 0xf5, 0x40, 0xd8, 0x37, 0xaf, 0x1a, 0x82,
+       },
+       {
+               0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8,
+               0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54,
+               0x65, 0xfc, 0x4a, 0xd3, 0x3b, 0xa2, 0x14, 0x8d,
+               0xd9, 0x40, 0xf6, 0x6f, 0x87, 0x1e, 0xa8, 0x31,
+               0xca, 0x53, 0xe5, 0x7c, 0x94, 0x0d, 0xbb, 0x22,
+               0x76, 0xef, 0x59, 0xc0, 0x28, 0xb1, 0x07, 0x9e,
+               0xaf, 0x36, 0x80, 0x19, 0xf1, 0x68, 0xde, 0x47,
+               0x13, 0x8a, 0x3c, 0xa5, 0x4d, 0xd4, 0x62, 0xfb,
+               0x89, 0x10, 0xa6, 0x3f, 0xd7, 0x4e, 0xf8, 0x61,
+               0x35, 0xac, 0x1a, 0x83, 0x6b, 0xf2, 0x44, 0xdd,
+               0xec, 0x75, 0xc3, 0x5a, 0xb2, 0x2b, 0x9d, 0x04,
+               0x50, 0xc9, 0x7f, 0xe6, 0x0e, 0x97, 0x21, 0xb8,
+               0x43, 0xda, 0x6c, 0xf5, 0x1d, 0x84, 0x32, 0xab,
+               0xff, 0x66, 0xd0, 0x49, 0xa1, 0x38, 0x8e, 0x17,
+               0x26, 0xbf, 0x09, 0x90, 0x78, 0xe1, 0x57, 0xce,
+               0x9a, 0x03, 0xb5, 0x2c, 0xc4, 0x5d, 0xeb, 0x72,
+               0x0f, 0x96, 0x20, 0xb9, 0x51, 0xc8, 0x7e, 0xe7,
+               0xb3, 0x2a, 0x9c, 0x05, 0xed, 0x74, 0xc2, 0x5b,
+               0x6a, 0xf3, 0x45, 0xdc, 0x34, 0xad, 0x1b, 0x82,
+               0xd6, 0x4f, 0xf9, 0x60, 0x88, 0x11, 0xa7, 0x3e,
+               0xc5, 0x5c, 0xea, 0x73, 0x9b, 0x02, 0xb4, 0x2d,
+               0x79, 0xe0, 0x56, 0xcf, 0x27, 0xbe, 0x08, 0x91,
+               0xa0, 0x39, 0x8f, 0x16, 0xfe, 0x67, 0xd1, 0x48,
+               0x1c, 0x85, 0x33, 0xaa, 0x42, 0xdb, 0x6d, 0xf4,
+               0x86, 0x1f, 0xa9, 0x30, 0xd8, 0x41, 0xf7, 0x6e,
+               0x3a, 0xa3, 0x15, 0x8c, 0x64, 0xfd, 0x4b, 0xd2,
+               0xe3, 0x7a, 0xcc, 0x55, 0xbd, 0x24, 0x92, 0x0b,
+               0x5f, 0xc6, 0x70, 0xe9, 0x01, 0x98, 0x2e, 0xb7,
+               0x4c, 0xd5, 0x63, 0xfa, 0x12, 0x8b, 0x3d, 0xa4,
+               0xf0, 0x69, 0xdf, 0x46, 0xae, 0x37, 0x81, 0x18,
+               0x29, 0xb0, 0x06, 0x9f, 0x77, 0xee, 0x58, 0xc1,
+               0x95, 0x0c, 0xba, 0x23, 0xcb, 0x52, 0xe4, 0x7d,
+       },
+       {
+               0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1,
+               0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45,
+               0x55, 0xcf, 0x7c, 0xe6, 0x07, 0x9d, 0x2e, 0xb4,
+               0xf1, 0x6b, 0xd8, 0x42, 0xa3, 0x39, 0x8a, 0x10,
+               0xaa, 0x30, 0x83, 0x19, 0xf8, 0x62, 0xd1, 0x4b,
+               0x0e, 0x94, 0x27, 0xbd, 0x5c, 0xc6, 0x75, 0xef,
+               0xff, 0x65, 0xd6, 0x4c, 0xad, 0x37, 0x84, 0x1e,
+               0x5b, 0xc1, 0x72, 0xe8, 0x09, 0x93, 0x20, 0xba,
+               0x49, 0xd3, 0x60, 0xfa, 0x1b, 0x81, 0x32, 0xa8,
+               0xed, 0x77, 0xc4, 0x5e, 0xbf, 0x25, 0x96, 0x0c,
+               0x1c, 0x86, 0x35, 0xaf, 0x4e, 0xd4, 0x67, 0xfd,
+               0xb8, 0x22, 0x91, 0x0b, 0xea, 0x70, 0xc3, 0x59,
+               0xe3, 0x79, 0xca, 0x50, 0xb1, 0x2b, 0x98, 0x02,
+               0x47, 0xdd, 0x6e, 0xf4, 0x15, 0x8f, 0x3c, 0xa6,
+               0xb6, 0x2c, 0x9f, 0x05, 0xe4, 0x7e, 0xcd, 0x57,
+               0x12, 0x88, 0x3b, 0xa1, 0x40, 0xda, 0x69, 0xf3,
+               0x92, 0x08, 0xbb, 0x21, 0xc0, 0x5a, 0xe9, 0x73,
+               0x36, 0xac, 0x1f, 0x85, 0x64, 0xfe, 0x4d, 0xd7,
+               0xc7, 0x5d, 0xee, 0x74, 0x95, 0x0f, 0xbc, 0x26,
+               0x63, 0xf9, 0x4a, 0xd0, 0x31, 0xab, 0x18, 0x82,
+               0x38, 0xa2, 0x11, 0x8b, 0x6a, 0xf0, 0x43, 0xd9,
+               0x9c, 0x06, 0xb5, 0x2f, 0xce, 0x54, 0xe7, 0x7d,
+               0x6d, 0xf7, 0x44, 0xde, 0x3f, 0xa5, 0x16, 0x8c,
+               0xc9, 0x53, 0xe0, 0x7a, 0x9b, 0x01, 0xb2, 0x28,
+               0xdb, 0x41, 0xf2, 0x68, 0x89, 0x13, 0xa0, 0x3a,
+               0x7f, 0xe5, 0x56, 0xcc, 0x2d, 0xb7, 0x04, 0x9e,
+               0x8e, 0x14, 0xa7, 0x3d, 0xdc, 0x46, 0xf5, 0x6f,
+               0x2a, 0xb0, 0x03, 0x99, 0x78, 0xe2, 0x51, 0xcb,
+               0x71, 0xeb, 0x58, 0xc2, 0x23, 0xb9, 0x0a, 0x90,
+               0xd5, 0x4f, 0xfc, 0x66, 0x87, 0x1d, 0xae, 0x34,
+               0x24, 0xbe, 0x0d, 0x97, 0x76, 0xec, 0x5f, 0xc5,
+               0x80, 0x1a, 0xa9, 0x33, 0xd2, 0x48, 0xfb, 0x61,
+       },
+       {
+               0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6,
+               0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a,
+               0x45, 0xde, 0x6e, 0xf5, 0x13, 0x88, 0x38, 0xa3,
+               0xe9, 0x72, 0xc2, 0x59, 0xbf, 0x24, 0x94, 0x0f,
+               0x8a, 0x11, 0xa1, 0x3a, 0xdc, 0x47, 0xf7, 0x6c,
+               0x26, 0xbd, 0x0d, 0x96, 0x70, 0xeb, 0x5b, 0xc0,
+               0xcf, 0x54, 0xe4, 0x7f, 0x99, 0x02, 0xb2, 0x29,
+               0x63, 0xf8, 0x48, 0xd3, 0x35, 0xae, 0x1e, 0x85,
+               0x09, 0x92, 0x22, 0xb9, 0x5f, 0xc4, 0x74, 0xef,
+               0xa5, 0x3e, 0x8e, 0x15, 0xf3, 0x68, 0xd8, 0x43,
+               0x4c, 0xd7, 0x67, 0xfc, 0x1a, 0x81, 0x31, 0xaa,
+               0xe0, 0x7b, 0xcb, 0x50, 0xb6, 0x2d, 0x9d, 0x06,
+               0x83, 0x18, 0xa8, 0x33, 0xd5, 0x4e, 0xfe, 0x65,
+               0x2f, 0xb4, 0x04, 0x9f, 0x79, 0xe2, 0x52, 0xc9,
+               0xc6, 0x5d, 0xed, 0x76, 0x90, 0x0b, 0xbb, 0x20,
+               0x6a, 0xf1, 0x41, 0xda, 0x3c, 0xa7, 0x17, 0x8c,
+               0x12, 0x89, 0x39, 0xa2, 0x44, 0xdf, 0x6f, 0xf4,
+               0xbe, 0x25, 0x95, 0x0e, 0xe8, 0x73, 0xc3, 0x58,
+               0x57, 0xcc, 0x7c, 0xe7, 0x01, 0x9a, 0x2a, 0xb1,
+               0xfb, 0x60, 0xd0, 0x4b, 0xad, 0x36, 0x86, 0x1d,
+               0x98, 0x03, 0xb3, 0x28, 0xce, 0x55, 0xe5, 0x7e,
+               0x34, 0xaf, 0x1f, 0x84, 0x62, 0xf9, 0x49, 0xd2,
+               0xdd, 0x46, 0xf6, 0x6d, 0x8b, 0x10, 0xa0, 0x3b,
+               0x71, 0xea, 0x5a, 0xc1, 0x27, 0xbc, 0x0c, 0x97,
+               0x1b, 0x80, 0x30, 0xab, 0x4d, 0xd6, 0x66, 0xfd,
+               0xb7, 0x2c, 0x9c, 0x07, 0xe1, 0x7a, 0xca, 0x51,
+               0x5e, 0xc5, 0x75, 0xee, 0x08, 0x93, 0x23, 0xb8,
+               0xf2, 0x69, 0xd9, 0x42, 0xa4, 0x3f, 0x8f, 0x14,
+               0x91, 0x0a, 0xba, 0x21, 0xc7, 0x5c, 0xec, 0x77,
+               0x3d, 0xa6, 0x16, 0x8d, 0x6b, 0xf0, 0x40, 0xdb,
+               0xd4, 0x4f, 0xff, 0x64, 0x82, 0x19, 0xa9, 0x32,
+               0x78, 0xe3, 0x53, 0xc8, 0x2e, 0xb5, 0x05, 0x9e,
+       },
+       {
+               0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+               0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67,
+               0x35, 0xa9, 0x10, 0x8c, 0x7f, 0xe3, 0x5a, 0xc6,
+               0xa1, 0x3d, 0x84, 0x18, 0xeb, 0x77, 0xce, 0x52,
+               0x6a, 0xf6, 0x4f, 0xd3, 0x20, 0xbc, 0x05, 0x99,
+               0xfe, 0x62, 0xdb, 0x47, 0xb4, 0x28, 0x91, 0x0d,
+               0x5f, 0xc3, 0x7a, 0xe6, 0x15, 0x89, 0x30, 0xac,
+               0xcb, 0x57, 0xee, 0x72, 0x81, 0x1d, 0xa4, 0x38,
+               0xd4, 0x48, 0xf1, 0x6d, 0x9e, 0x02, 0xbb, 0x27,
+               0x40, 0xdc, 0x65, 0xf9, 0x0a, 0x96, 0x2f, 0xb3,
+               0xe1, 0x7d, 0xc4, 0x58, 0xab, 0x37, 0x8e, 0x12,
+               0x75, 0xe9, 0x50, 0xcc, 0x3f, 0xa3, 0x1a, 0x86,
+               0xbe, 0x22, 0x9b, 0x07, 0xf4, 0x68, 0xd1, 0x4d,
+               0x2a, 0xb6, 0x0f, 0x93, 0x60, 0xfc, 0x45, 0xd9,
+               0x8b, 0x17, 0xae, 0x32, 0xc1, 0x5d, 0xe4, 0x78,
+               0x1f, 0x83, 0x3a, 0xa6, 0x55, 0xc9, 0x70, 0xec,
+               0xb5, 0x29, 0x90, 0x0c, 0xff, 0x63, 0xda, 0x46,
+               0x21, 0xbd, 0x04, 0x98, 0x6b, 0xf7, 0x4e, 0xd2,
+               0x80, 0x1c, 0xa5, 0x39, 0xca, 0x56, 0xef, 0x73,
+               0x14, 0x88, 0x31, 0xad, 0x5e, 0xc2, 0x7b, 0xe7,
+               0xdf, 0x43, 0xfa, 0x66, 0x95, 0x09, 0xb0, 0x2c,
+               0x4b, 0xd7, 0x6e, 0xf2, 0x01, 0x9d, 0x24, 0xb8,
+               0xea, 0x76, 0xcf, 0x53, 0xa0, 0x3c, 0x85, 0x19,
+               0x7e, 0xe2, 0x5b, 0xc7, 0x34, 0xa8, 0x11, 0x8d,
+               0x61, 0xfd, 0x44, 0xd8, 0x2b, 0xb7, 0x0e, 0x92,
+               0xf5, 0x69, 0xd0, 0x4c, 0xbf, 0x23, 0x9a, 0x06,
+               0x54, 0xc8, 0x71, 0xed, 0x1e, 0x82, 0x3b, 0xa7,
+               0xc0, 0x5c, 0xe5, 0x79, 0x8a, 0x16, 0xaf, 0x33,
+               0x0b, 0x97, 0x2e, 0xb2, 0x41, 0xdd, 0x64, 0xf8,
+               0x9f, 0x03, 0xba, 0x26, 0xd5, 0x49, 0xf0, 0x6c,
+               0x3e, 0xa2, 0x1b, 0x87, 0x74, 0xe8, 0x51, 0xcd,
+               0xaa, 0x36, 0x8f, 0x13, 0xe0, 0x7c, 0xc5, 0x59,
+       },
+       {
+               0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4,
+               0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68,
+               0x25, 0xb8, 0x02, 0x9f, 0x6b, 0xf6, 0x4c, 0xd1,
+               0xb9, 0x24, 0x9e, 0x03, 0xf7, 0x6a, 0xd0, 0x4d,
+               0x4a, 0xd7, 0x6d, 0xf0, 0x04, 0x99, 0x23, 0xbe,
+               0xd6, 0x4b, 0xf1, 0x6c, 0x98, 0x05, 0xbf, 0x22,
+               0x6f, 0xf2, 0x48, 0xd5, 0x21, 0xbc, 0x06, 0x9b,
+               0xf3, 0x6e, 0xd4, 0x49, 0xbd, 0x20, 0x9a, 0x07,
+               0x94, 0x09, 0xb3, 0x2e, 0xda, 0x47, 0xfd, 0x60,
+               0x08, 0x95, 0x2f, 0xb2, 0x46, 0xdb, 0x61, 0xfc,
+               0xb1, 0x2c, 0x96, 0x0b, 0xff, 0x62, 0xd8, 0x45,
+               0x2d, 0xb0, 0x0a, 0x97, 0x63, 0xfe, 0x44, 0xd9,
+               0xde, 0x43, 0xf9, 0x64, 0x90, 0x0d, 0xb7, 0x2a,
+               0x42, 0xdf, 0x65, 0xf8, 0x0c, 0x91, 0x2b, 0xb6,
+               0xfb, 0x66, 0xdc, 0x41, 0xb5, 0x28, 0x92, 0x0f,
+               0x67, 0xfa, 0x40, 0xdd, 0x29, 0xb4, 0x0e, 0x93,
+               0x35, 0xa8, 0x12, 0x8f, 0x7b, 0xe6, 0x5c, 0xc1,
+               0xa9, 0x34, 0x8e, 0x13, 0xe7, 0x7a, 0xc0, 0x5d,
+               0x10, 0x8d, 0x37, 0xaa, 0x5e, 0xc3, 0x79, 0xe4,
+               0x8c, 0x11, 0xab, 0x36, 0xc2, 0x5f, 0xe5, 0x78,
+               0x7f, 0xe2, 0x58, 0xc5, 0x31, 0xac, 0x16, 0x8b,
+               0xe3, 0x7e, 0xc4, 0x59, 0xad, 0x30, 0x8a, 0x17,
+               0x5a, 0xc7, 0x7d, 0xe0, 0x14, 0x89, 0x33, 0xae,
+               0xc6, 0x5b, 0xe1, 0x7c, 0x88, 0x15, 0xaf, 0x32,
+               0xa1, 0x3c, 0x86, 0x1b, 0xef, 0x72, 0xc8, 0x55,
+               0x3d, 0xa0, 0x1a, 0x87, 0x73, 0xee, 0x54, 0xc9,
+               0x84, 0x19, 0xa3, 0x3e, 0xca, 0x57, 0xed, 0x70,
+               0x18, 0x85, 0x3f, 0xa2, 0x56, 0xcb, 0x71, 0xec,
+               0xeb, 0x76, 0xcc, 0x51, 0xa5, 0x38, 0x82, 0x1f,
+               0x77, 0xea, 0x50, 0xcd, 0x39, 0xa4, 0x1e, 0x83,
+               0xce, 0x53, 0xe9, 0x74, 0x80, 0x1d, 0xa7, 0x3a,
+               0x52, 0xcf, 0x75, 0xe8, 0x1c, 0x81, 0x3b, 0xa6,
+       },
+       {
+               0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd,
+               0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79,
+               0x15, 0x8b, 0x34, 0xaa, 0x57, 0xc9, 0x76, 0xe8,
+               0x91, 0x0f, 0xb0, 0x2e, 0xd3, 0x4d, 0xf2, 0x6c,
+               0x2a, 0xb4, 0x0b, 0x95, 0x68, 0xf6, 0x49, 0xd7,
+               0xae, 0x30, 0x8f, 0x11, 0xec, 0x72, 0xcd, 0x53,
+               0x3f, 0xa1, 0x1e, 0x80, 0x7d, 0xe3, 0x5c, 0xc2,
+               0xbb, 0x25, 0x9a, 0x04, 0xf9, 0x67, 0xd8, 0x46,
+               0x54, 0xca, 0x75, 0xeb, 0x16, 0x88, 0x37, 0xa9,
+               0xd0, 0x4e, 0xf1, 0x6f, 0x92, 0x0c, 0xb3, 0x2d,
+               0x41, 0xdf, 0x60, 0xfe, 0x03, 0x9d, 0x22, 0xbc,
+               0xc5, 0x5b, 0xe4, 0x7a, 0x87, 0x19, 0xa6, 0x38,
+               0x7e, 0xe0, 0x5f, 0xc1, 0x3c, 0xa2, 0x1d, 0x83,
+               0xfa, 0x64, 0xdb, 0x45, 0xb8, 0x26, 0x99, 0x07,
+               0x6b, 0xf5, 0x4a, 0xd4, 0x29, 0xb7, 0x08, 0x96,
+               0xef, 0x71, 0xce, 0x50, 0xad, 0x33, 0x8c, 0x12,
+               0xa8, 0x36, 0x89, 0x17, 0xea, 0x74, 0xcb, 0x55,
+               0x2c, 0xb2, 0x0d, 0x93, 0x6e, 0xf0, 0x4f, 0xd1,
+               0xbd, 0x23, 0x9c, 0x02, 0xff, 0x61, 0xde, 0x40,
+               0x39, 0xa7, 0x18, 0x86, 0x7b, 0xe5, 0x5a, 0xc4,
+               0x82, 0x1c, 0xa3, 0x3d, 0xc0, 0x5e, 0xe1, 0x7f,
+               0x06, 0x98, 0x27, 0xb9, 0x44, 0xda, 0x65, 0xfb,
+               0x97, 0x09, 0xb6, 0x28, 0xd5, 0x4b, 0xf4, 0x6a,
+               0x13, 0x8d, 0x32, 0xac, 0x51, 0xcf, 0x70, 0xee,
+               0xfc, 0x62, 0xdd, 0x43, 0xbe, 0x20, 0x9f, 0x01,
+               0x78, 0xe6, 0x59, 0xc7, 0x3a, 0xa4, 0x1b, 0x85,
+               0xe9, 0x77, 0xc8, 0x56, 0xab, 0x35, 0x8a, 0x14,
+               0x6d, 0xf3, 0x4c, 0xd2, 0x2f, 0xb1, 0x0e, 0x90,
+               0xd6, 0x48, 0xf7, 0x69, 0x94, 0x0a, 0xb5, 0x2b,
+               0x52, 0xcc, 0x73, 0xed, 0x10, 0x8e, 0x31, 0xaf,
+               0xc3, 0x5d, 0xe2, 0x7c, 0x81, 0x1f, 0xa0, 0x3e,
+               0x47, 0xd9, 0x66, 0xf8, 0x05, 0x9b, 0x24, 0xba,
+       },
+       {
+               0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa,
+               0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76,
+               0x05, 0x9a, 0x26, 0xb9, 0x43, 0xdc, 0x60, 0xff,
+               0x89, 0x16, 0xaa, 0x35, 0xcf, 0x50, 0xec, 0x73,
+               0x0a, 0x95, 0x29, 0xb6, 0x4c, 0xd3, 0x6f, 0xf0,
+               0x86, 0x19, 0xa5, 0x3a, 0xc0, 0x5f, 0xe3, 0x7c,
+               0x0f, 0x90, 0x2c, 0xb3, 0x49, 0xd6, 0x6a, 0xf5,
+               0x83, 0x1c, 0xa0, 0x3f, 0xc5, 0x5a, 0xe6, 0x79,
+               0x14, 0x8b, 0x37, 0xa8, 0x52, 0xcd, 0x71, 0xee,
+               0x98, 0x07, 0xbb, 0x24, 0xde, 0x41, 0xfd, 0x62,
+               0x11, 0x8e, 0x32, 0xad, 0x57, 0xc8, 0x74, 0xeb,
+               0x9d, 0x02, 0xbe, 0x21, 0xdb, 0x44, 0xf8, 0x67,
+               0x1e, 0x81, 0x3d, 0xa2, 0x58, 0xc7, 0x7b, 0xe4,
+               0x92, 0x0d, 0xb1, 0x2e, 0xd4, 0x4b, 0xf7, 0x68,
+               0x1b, 0x84, 0x38, 0xa7, 0x5d, 0xc2, 0x7e, 0xe1,
+               0x97, 0x08, 0xb4, 0x2b, 0xd1, 0x4e, 0xf2, 0x6d,
+               0x28, 0xb7, 0x0b, 0x94, 0x6e, 0xf1, 0x4d, 0xd2,
+               0xa4, 0x3b, 0x87, 0x18, 0xe2, 0x7d, 0xc1, 0x5e,
+               0x2d, 0xb2, 0x0e, 0x91, 0x6b, 0xf4, 0x48, 0xd7,
+               0xa1, 0x3e, 0x82, 0x1d, 0xe7, 0x78, 0xc4, 0x5b,
+               0x22, 0xbd, 0x01, 0x9e, 0x64, 0xfb, 0x47, 0xd8,
+               0xae, 0x31, 0x8d, 0x12, 0xe8, 0x77, 0xcb, 0x54,
+               0x27, 0xb8, 0x04, 0x9b, 0x61, 0xfe, 0x42, 0xdd,
+               0xab, 0x34, 0x88, 0x17, 0xed, 0x72, 0xce, 0x51,
+               0x3c, 0xa3, 0x1f, 0x80, 0x7a, 0xe5, 0x59, 0xc6,
+               0xb0, 0x2f, 0x93, 0x0c, 0xf6, 0x69, 0xd5, 0x4a,
+               0x39, 0xa6, 0x1a, 0x85, 0x7f, 0xe0, 0x5c, 0xc3,
+               0xb5, 0x2a, 0x96, 0x09, 0xf3, 0x6c, 0xd0, 0x4f,
+               0x36, 0xa9, 0x15, 0x8a, 0x70, 0xef, 0x53, 0xcc,
+               0xba, 0x25, 0x99, 0x06, 0xfc, 0x63, 0xdf, 0x40,
+               0x33, 0xac, 0x10, 0x8f, 0x75, 0xea, 0x56, 0xc9,
+               0xbf, 0x20, 0x9c, 0x03, 0xf9, 0x66, 0xda, 0x45,
+       },
+       {
+               0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47,
+               0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e,
+               0xd2, 0x72, 0x8f, 0x2f, 0x68, 0xc8, 0x35, 0x95,
+               0xbb, 0x1b, 0xe6, 0x46, 0x01, 0xa1, 0x5c, 0xfc,
+               0xb9, 0x19, 0xe4, 0x44, 0x03, 0xa3, 0x5e, 0xfe,
+               0xd0, 0x70, 0x8d, 0x2d, 0x6a, 0xca, 0x37, 0x97,
+               0x6b, 0xcb, 0x36, 0x96, 0xd1, 0x71, 0x8c, 0x2c,
+               0x02, 0xa2, 0x5f, 0xff, 0xb8, 0x18, 0xe5, 0x45,
+               0x6f, 0xcf, 0x32, 0x92, 0xd5, 0x75, 0x88, 0x28,
+               0x06, 0xa6, 0x5b, 0xfb, 0xbc, 0x1c, 0xe1, 0x41,
+               0xbd, 0x1d, 0xe0, 0x40, 0x07, 0xa7, 0x5a, 0xfa,
+               0xd4, 0x74, 0x89, 0x29, 0x6e, 0xce, 0x33, 0x93,
+               0xd6, 0x76, 0x8b, 0x2b, 0x6c, 0xcc, 0x31, 0x91,
+               0xbf, 0x1f, 0xe2, 0x42, 0x05, 0xa5, 0x58, 0xf8,
+               0x04, 0xa4, 0x59, 0xf9, 0xbe, 0x1e, 0xe3, 0x43,
+               0x6d, 0xcd, 0x30, 0x90, 0xd7, 0x77, 0x8a, 0x2a,
+               0xde, 0x7e, 0x83, 0x23, 0x64, 0xc4, 0x39, 0x99,
+               0xb7, 0x17, 0xea, 0x4a, 0x0d, 0xad, 0x50, 0xf0,
+               0x0c, 0xac, 0x51, 0xf1, 0xb6, 0x16, 0xeb, 0x4b,
+               0x65, 0xc5, 0x38, 0x98, 0xdf, 0x7f, 0x82, 0x22,
+               0x67, 0xc7, 0x3a, 0x9a, 0xdd, 0x7d, 0x80, 0x20,
+               0x0e, 0xae, 0x53, 0xf3, 0xb4, 0x14, 0xe9, 0x49,
+               0xb5, 0x15, 0xe8, 0x48, 0x0f, 0xaf, 0x52, 0xf2,
+               0xdc, 0x7c, 0x81, 0x21, 0x66, 0xc6, 0x3b, 0x9b,
+               0xb1, 0x11, 0xec, 0x4c, 0x0b, 0xab, 0x56, 0xf6,
+               0xd8, 0x78, 0x85, 0x25, 0x62, 0xc2, 0x3f, 0x9f,
+               0x63, 0xc3, 0x3e, 0x9e, 0xd9, 0x79, 0x84, 0x24,
+               0x0a, 0xaa, 0x57, 0xf7, 0xb0, 0x10, 0xed, 0x4d,
+               0x08, 0xa8, 0x55, 0xf5, 0xb2, 0x12, 0xef, 0x4f,
+               0x61, 0xc1, 0x3c, 0x9c, 0xdb, 0x7b, 0x86, 0x26,
+               0xda, 0x7a, 0x87, 0x27, 0x60, 0xc0, 0x3d, 0x9d,
+               0xb3, 0x13, 0xee, 0x4e, 0x09, 0xa9, 0x54, 0xf4,
+       },
+       {
+               0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40,
+               0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21,
+               0xc2, 0x63, 0x9d, 0x3c, 0x7c, 0xdd, 0x23, 0x82,
+               0xa3, 0x02, 0xfc, 0x5d, 0x1d, 0xbc, 0x42, 0xe3,
+               0x99, 0x38, 0xc6, 0x67, 0x27, 0x86, 0x78, 0xd9,
+               0xf8, 0x59, 0xa7, 0x06, 0x46, 0xe7, 0x19, 0xb8,
+               0x5b, 0xfa, 0x04, 0xa5, 0xe5, 0x44, 0xba, 0x1b,
+               0x3a, 0x9b, 0x65, 0xc4, 0x84, 0x25, 0xdb, 0x7a,
+               0x2f, 0x8e, 0x70, 0xd1, 0x91, 0x30, 0xce, 0x6f,
+               0x4e, 0xef, 0x11, 0xb0, 0xf0, 0x51, 0xaf, 0x0e,
+               0xed, 0x4c, 0xb2, 0x13, 0x53, 0xf2, 0x0c, 0xad,
+               0x8c, 0x2d, 0xd3, 0x72, 0x32, 0x93, 0x6d, 0xcc,
+               0xb6, 0x17, 0xe9, 0x48, 0x08, 0xa9, 0x57, 0xf6,
+               0xd7, 0x76, 0x88, 0x29, 0x69, 0xc8, 0x36, 0x97,
+               0x74, 0xd5, 0x2b, 0x8a, 0xca, 0x6b, 0x95, 0x34,
+               0x15, 0xb4, 0x4a, 0xeb, 0xab, 0x0a, 0xf4, 0x55,
+               0x5e, 0xff, 0x01, 0xa0, 0xe0, 0x41, 0xbf, 0x1e,
+               0x3f, 0x9e, 0x60, 0xc1, 0x81, 0x20, 0xde, 0x7f,
+               0x9c, 0x3d, 0xc3, 0x62, 0x22, 0x83, 0x7d, 0xdc,
+               0xfd, 0x5c, 0xa2, 0x03, 0x43, 0xe2, 0x1c, 0xbd,
+               0xc7, 0x66, 0x98, 0x39, 0x79, 0xd8, 0x26, 0x87,
+               0xa6, 0x07, 0xf9, 0x58, 0x18, 0xb9, 0x47, 0xe6,
+               0x05, 0xa4, 0x5a, 0xfb, 0xbb, 0x1a, 0xe4, 0x45,
+               0x64, 0xc5, 0x3b, 0x9a, 0xda, 0x7b, 0x85, 0x24,
+               0x71, 0xd0, 0x2e, 0x8f, 0xcf, 0x6e, 0x90, 0x31,
+               0x10, 0xb1, 0x4f, 0xee, 0xae, 0x0f, 0xf1, 0x50,
+               0xb3, 0x12, 0xec, 0x4d, 0x0d, 0xac, 0x52, 0xf3,
+               0xd2, 0x73, 0x8d, 0x2c, 0x6c, 0xcd, 0x33, 0x92,
+               0xe8, 0x49, 0xb7, 0x16, 0x56, 0xf7, 0x09, 0xa8,
+               0x89, 0x28, 0xd6, 0x77, 0x37, 0x96, 0x68, 0xc9,
+               0x2a, 0x8b, 0x75, 0xd4, 0x94, 0x35, 0xcb, 0x6a,
+               0x4b, 0xea, 0x14, 0xb5, 0xf5, 0x54, 0xaa, 0x0b,
+       },
+       {
+               0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49,
+               0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30,
+               0xf2, 0x50, 0xab, 0x09, 0x40, 0xe2, 0x19, 0xbb,
+               0x8b, 0x29, 0xd2, 0x70, 0x39, 0x9b, 0x60, 0xc2,
+               0xf9, 0x5b, 0xa0, 0x02, 0x4b, 0xe9, 0x12, 0xb0,
+               0x80, 0x22, 0xd9, 0x7b, 0x32, 0x90, 0x6b, 0xc9,
+               0x0b, 0xa9, 0x52, 0xf0, 0xb9, 0x1b, 0xe0, 0x42,
+               0x72, 0xd0, 0x2b, 0x89, 0xc0, 0x62, 0x99, 0x3b,
+               0xef, 0x4d, 0xb6, 0x14, 0x5d, 0xff, 0x04, 0xa6,
+               0x96, 0x34, 0xcf, 0x6d, 0x24, 0x86, 0x7d, 0xdf,
+               0x1d, 0xbf, 0x44, 0xe6, 0xaf, 0x0d, 0xf6, 0x54,
+               0x64, 0xc6, 0x3d, 0x9f, 0xd6, 0x74, 0x8f, 0x2d,
+               0x16, 0xb4, 0x4f, 0xed, 0xa4, 0x06, 0xfd, 0x5f,
+               0x6f, 0xcd, 0x36, 0x94, 0xdd, 0x7f, 0x84, 0x26,
+               0xe4, 0x46, 0xbd, 0x1f, 0x56, 0xf4, 0x0f, 0xad,
+               0x9d, 0x3f, 0xc4, 0x66, 0x2f, 0x8d, 0x76, 0xd4,
+               0xc3, 0x61, 0x9a, 0x38, 0x71, 0xd3, 0x28, 0x8a,
+               0xba, 0x18, 0xe3, 0x41, 0x08, 0xaa, 0x51, 0xf3,
+               0x31, 0x93, 0x68, 0xca, 0x83, 0x21, 0xda, 0x78,
+               0x48, 0xea, 0x11, 0xb3, 0xfa, 0x58, 0xa3, 0x01,
+               0x3a, 0x98, 0x63, 0xc1, 0x88, 0x2a, 0xd1, 0x73,
+               0x43, 0xe1, 0x1a, 0xb8, 0xf1, 0x53, 0xa8, 0x0a,
+               0xc8, 0x6a, 0x91, 0x33, 0x7a, 0xd8, 0x23, 0x81,
+               0xb1, 0x13, 0xe8, 0x4a, 0x03, 0xa1, 0x5a, 0xf8,
+               0x2c, 0x8e, 0x75, 0xd7, 0x9e, 0x3c, 0xc7, 0x65,
+               0x55, 0xf7, 0x0c, 0xae, 0xe7, 0x45, 0xbe, 0x1c,
+               0xde, 0x7c, 0x87, 0x25, 0x6c, 0xce, 0x35, 0x97,
+               0xa7, 0x05, 0xfe, 0x5c, 0x15, 0xb7, 0x4c, 0xee,
+               0xd5, 0x77, 0x8c, 0x2e, 0x67, 0xc5, 0x3e, 0x9c,
+               0xac, 0x0e, 0xf5, 0x57, 0x1e, 0xbc, 0x47, 0xe5,
+               0x27, 0x85, 0x7e, 0xdc, 0x95, 0x37, 0xcc, 0x6e,
+               0x5e, 0xfc, 0x07, 0xa5, 0xec, 0x4e, 0xb5, 0x17,
+       },
+       {
+               0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e,
+               0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f,
+               0xe2, 0x41, 0xb9, 0x1a, 0x54, 0xf7, 0x0f, 0xac,
+               0x93, 0x30, 0xc8, 0x6b, 0x25, 0x86, 0x7e, 0xdd,
+               0xd9, 0x7a, 0x82, 0x21, 0x6f, 0xcc, 0x34, 0x97,
+               0xa8, 0x0b, 0xf3, 0x50, 0x1e, 0xbd, 0x45, 0xe6,
+               0x3b, 0x98, 0x60, 0xc3, 0x8d, 0x2e, 0xd6, 0x75,
+               0x4a, 0xe9, 0x11, 0xb2, 0xfc, 0x5f, 0xa7, 0x04,
+               0xaf, 0x0c, 0xf4, 0x57, 0x19, 0xba, 0x42, 0xe1,
+               0xde, 0x7d, 0x85, 0x26, 0x68, 0xcb, 0x33, 0x90,
+               0x4d, 0xee, 0x16, 0xb5, 0xfb, 0x58, 0xa0, 0x03,
+               0x3c, 0x9f, 0x67, 0xc4, 0x8a, 0x29, 0xd1, 0x72,
+               0x76, 0xd5, 0x2d, 0x8e, 0xc0, 0x63, 0x9b, 0x38,
+               0x07, 0xa4, 0x5c, 0xff, 0xb1, 0x12, 0xea, 0x49,
+               0x94, 0x37, 0xcf, 0x6c, 0x22, 0x81, 0x79, 0xda,
+               0xe5, 0x46, 0xbe, 0x1d, 0x53, 0xf0, 0x08, 0xab,
+               0x43, 0xe0, 0x18, 0xbb, 0xf5, 0x56, 0xae, 0x0d,
+               0x32, 0x91, 0x69, 0xca, 0x84, 0x27, 0xdf, 0x7c,
+               0xa1, 0x02, 0xfa, 0x59, 0x17, 0xb4, 0x4c, 0xef,
+               0xd0, 0x73, 0x8b, 0x28, 0x66, 0xc5, 0x3d, 0x9e,
+               0x9a, 0x39, 0xc1, 0x62, 0x2c, 0x8f, 0x77, 0xd4,
+               0xeb, 0x48, 0xb0, 0x13, 0x5d, 0xfe, 0x06, 0xa5,
+               0x78, 0xdb, 0x23, 0x80, 0xce, 0x6d, 0x95, 0x36,
+               0x09, 0xaa, 0x52, 0xf1, 0xbf, 0x1c, 0xe4, 0x47,
+               0xec, 0x4f, 0xb7, 0x14, 0x5a, 0xf9, 0x01, 0xa2,
+               0x9d, 0x3e, 0xc6, 0x65, 0x2b, 0x88, 0x70, 0xd3,
+               0x0e, 0xad, 0x55, 0xf6, 0xb8, 0x1b, 0xe3, 0x40,
+               0x7f, 0xdc, 0x24, 0x87, 0xc9, 0x6a, 0x92, 0x31,
+               0x35, 0x96, 0x6e, 0xcd, 0x83, 0x20, 0xd8, 0x7b,
+               0x44, 0xe7, 0x1f, 0xbc, 0xf2, 0x51, 0xa9, 0x0a,
+               0xd7, 0x74, 0x8c, 0x2f, 0x61, 0xc2, 0x3a, 0x99,
+               0xa6, 0x05, 0xfd, 0x5e, 0x10, 0xb3, 0x4b, 0xe8,
+       },
+       {
+               0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b,
+               0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12,
+               0x92, 0x36, 0xc7, 0x63, 0x38, 0x9c, 0x6d, 0xc9,
+               0xdb, 0x7f, 0x8e, 0x2a, 0x71, 0xd5, 0x24, 0x80,
+               0x39, 0x9d, 0x6c, 0xc8, 0x93, 0x37, 0xc6, 0x62,
+               0x70, 0xd4, 0x25, 0x81, 0xda, 0x7e, 0x8f, 0x2b,
+               0xab, 0x0f, 0xfe, 0x5a, 0x01, 0xa5, 0x54, 0xf0,
+               0xe2, 0x46, 0xb7, 0x13, 0x48, 0xec, 0x1d, 0xb9,
+               0x72, 0xd6, 0x27, 0x83, 0xd8, 0x7c, 0x8d, 0x29,
+               0x3b, 0x9f, 0x6e, 0xca, 0x91, 0x35, 0xc4, 0x60,
+               0xe0, 0x44, 0xb5, 0x11, 0x4a, 0xee, 0x1f, 0xbb,
+               0xa9, 0x0d, 0xfc, 0x58, 0x03, 0xa7, 0x56, 0xf2,
+               0x4b, 0xef, 0x1e, 0xba, 0xe1, 0x45, 0xb4, 0x10,
+               0x02, 0xa6, 0x57, 0xf3, 0xa8, 0x0c, 0xfd, 0x59,
+               0xd9, 0x7d, 0x8c, 0x28, 0x73, 0xd7, 0x26, 0x82,
+               0x90, 0x34, 0xc5, 0x61, 0x3a, 0x9e, 0x6f, 0xcb,
+               0xe4, 0x40, 0xb1, 0x15, 0x4e, 0xea, 0x1b, 0xbf,
+               0xad, 0x09, 0xf8, 0x5c, 0x07, 0xa3, 0x52, 0xf6,
+               0x76, 0xd2, 0x23, 0x87, 0xdc, 0x78, 0x89, 0x2d,
+               0x3f, 0x9b, 0x6a, 0xce, 0x95, 0x31, 0xc0, 0x64,
+               0xdd, 0x79, 0x88, 0x2c, 0x77, 0xd3, 0x22, 0x86,
+               0x94, 0x30, 0xc1, 0x65, 0x3e, 0x9a, 0x6b, 0xcf,
+               0x4f, 0xeb, 0x1a, 0xbe, 0xe5, 0x41, 0xb0, 0x14,
+               0x06, 0xa2, 0x53, 0xf7, 0xac, 0x08, 0xf9, 0x5d,
+               0x96, 0x32, 0xc3, 0x67, 0x3c, 0x98, 0x69, 0xcd,
+               0xdf, 0x7b, 0x8a, 0x2e, 0x75, 0xd1, 0x20, 0x84,
+               0x04, 0xa0, 0x51, 0xf5, 0xae, 0x0a, 0xfb, 0x5f,
+               0x4d, 0xe9, 0x18, 0xbc, 0xe7, 0x43, 0xb2, 0x16,
+               0xaf, 0x0b, 0xfa, 0x5e, 0x05, 0xa1, 0x50, 0xf4,
+               0xe6, 0x42, 0xb3, 0x17, 0x4c, 0xe8, 0x19, 0xbd,
+               0x3d, 0x99, 0x68, 0xcc, 0x97, 0x33, 0xc2, 0x66,
+               0x74, 0xd0, 0x21, 0x85, 0xde, 0x7a, 0x8b, 0x2f,
+       },
+       {
+               0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c,
+               0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d,
+               0x82, 0x27, 0xd5, 0x70, 0x2c, 0x89, 0x7b, 0xde,
+               0xc3, 0x66, 0x94, 0x31, 0x6d, 0xc8, 0x3a, 0x9f,
+               0x19, 0xbc, 0x4e, 0xeb, 0xb7, 0x12, 0xe0, 0x45,
+               0x58, 0xfd, 0x0f, 0xaa, 0xf6, 0x53, 0xa1, 0x04,
+               0x9b, 0x3e, 0xcc, 0x69, 0x35, 0x90, 0x62, 0xc7,
+               0xda, 0x7f, 0x8d, 0x28, 0x74, 0xd1, 0x23, 0x86,
+               0x32, 0x97, 0x65, 0xc0, 0x9c, 0x39, 0xcb, 0x6e,
+               0x73, 0xd6, 0x24, 0x81, 0xdd, 0x78, 0x8a, 0x2f,
+               0xb0, 0x15, 0xe7, 0x42, 0x1e, 0xbb, 0x49, 0xec,
+               0xf1, 0x54, 0xa6, 0x03, 0x5f, 0xfa, 0x08, 0xad,
+               0x2b, 0x8e, 0x7c, 0xd9, 0x85, 0x20, 0xd2, 0x77,
+               0x6a, 0xcf, 0x3d, 0x98, 0xc4, 0x61, 0x93, 0x36,
+               0xa9, 0x0c, 0xfe, 0x5b, 0x07, 0xa2, 0x50, 0xf5,
+               0xe8, 0x4d, 0xbf, 0x1a, 0x46, 0xe3, 0x11, 0xb4,
+               0x64, 0xc1, 0x33, 0x96, 0xca, 0x6f, 0x9d, 0x38,
+               0x25, 0x80, 0x72, 0xd7, 0x8b, 0x2e, 0xdc, 0x79,
+               0xe6, 0x43, 0xb1, 0x14, 0x48, 0xed, 0x1f, 0xba,
+               0xa7, 0x02, 0xf0, 0x55, 0x09, 0xac, 0x5e, 0xfb,
+               0x7d, 0xd8, 0x2a, 0x8f, 0xd3, 0x76, 0x84, 0x21,
+               0x3c, 0x99, 0x6b, 0xce, 0x92, 0x37, 0xc5, 0x60,
+               0xff, 0x5a, 0xa8, 0x0d, 0x51, 0xf4, 0x06, 0xa3,
+               0xbe, 0x1b, 0xe9, 0x4c, 0x10, 0xb5, 0x47, 0xe2,
+               0x56, 0xf3, 0x01, 0xa4, 0xf8, 0x5d, 0xaf, 0x0a,
+               0x17, 0xb2, 0x40, 0xe5, 0xb9, 0x1c, 0xee, 0x4b,
+               0xd4, 0x71, 0x83, 0x26, 0x7a, 0xdf, 0x2d, 0x88,
+               0x95, 0x30, 0xc2, 0x67, 0x3b, 0x9e, 0x6c, 0xc9,
+               0x4f, 0xea, 0x18, 0xbd, 0xe1, 0x44, 0xb6, 0x13,
+               0x0e, 0xab, 0x59, 0xfc, 0xa0, 0x05, 0xf7, 0x52,
+               0xcd, 0x68, 0x9a, 0x3f, 0x63, 0xc6, 0x34, 0x91,
+               0x8c, 0x29, 0xdb, 0x7e, 0x22, 0x87, 0x75, 0xd0,
+       },
+       {
+               0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+               0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c,
+               0xb2, 0x14, 0xe3, 0x45, 0x10, 0xb6, 0x41, 0xe7,
+               0xeb, 0x4d, 0xba, 0x1c, 0x49, 0xef, 0x18, 0xbe,
+               0x79, 0xdf, 0x28, 0x8e, 0xdb, 0x7d, 0x8a, 0x2c,
+               0x20, 0x86, 0x71, 0xd7, 0x82, 0x24, 0xd3, 0x75,
+               0xcb, 0x6d, 0x9a, 0x3c, 0x69, 0xcf, 0x38, 0x9e,
+               0x92, 0x34, 0xc3, 0x65, 0x30, 0x96, 0x61, 0xc7,
+               0xf2, 0x54, 0xa3, 0x05, 0x50, 0xf6, 0x01, 0xa7,
+               0xab, 0x0d, 0xfa, 0x5c, 0x09, 0xaf, 0x58, 0xfe,
+               0x40, 0xe6, 0x11, 0xb7, 0xe2, 0x44, 0xb3, 0x15,
+               0x19, 0xbf, 0x48, 0xee, 0xbb, 0x1d, 0xea, 0x4c,
+               0x8b, 0x2d, 0xda, 0x7c, 0x29, 0x8f, 0x78, 0xde,
+               0xd2, 0x74, 0x83, 0x25, 0x70, 0xd6, 0x21, 0x87,
+               0x39, 0x9f, 0x68, 0xce, 0x9b, 0x3d, 0xca, 0x6c,
+               0x60, 0xc6, 0x31, 0x97, 0xc2, 0x64, 0x93, 0x35,
+               0xf9, 0x5f, 0xa8, 0x0e, 0x5b, 0xfd, 0x0a, 0xac,
+               0xa0, 0x06, 0xf1, 0x57, 0x02, 0xa4, 0x53, 0xf5,
+               0x4b, 0xed, 0x1a, 0xbc, 0xe9, 0x4f, 0xb8, 0x1e,
+               0x12, 0xb4, 0x43, 0xe5, 0xb0, 0x16, 0xe1, 0x47,
+               0x80, 0x26, 0xd1, 0x77, 0x22, 0x84, 0x73, 0xd5,
+               0xd9, 0x7f, 0x88, 0x2e, 0x7b, 0xdd, 0x2a, 0x8c,
+               0x32, 0x94, 0x63, 0xc5, 0x90, 0x36, 0xc1, 0x67,
+               0x6b, 0xcd, 0x3a, 0x9c, 0xc9, 0x6f, 0x98, 0x3e,
+               0x0b, 0xad, 0x5a, 0xfc, 0xa9, 0x0f, 0xf8, 0x5e,
+               0x52, 0xf4, 0x03, 0xa5, 0xf0, 0x56, 0xa1, 0x07,
+               0xb9, 0x1f, 0xe8, 0x4e, 0x1b, 0xbd, 0x4a, 0xec,
+               0xe0, 0x46, 0xb1, 0x17, 0x42, 0xe4, 0x13, 0xb5,
+               0x72, 0xd4, 0x23, 0x85, 0xd0, 0x76, 0x81, 0x27,
+               0x2b, 0x8d, 0x7a, 0xdc, 0x89, 0x2f, 0xd8, 0x7e,
+               0xc0, 0x66, 0x91, 0x37, 0x62, 0xc4, 0x33, 0x95,
+               0x99, 0x3f, 0xc8, 0x6e, 0x3b, 0x9d, 0x6a, 0xcc,
+       },
+       {
+               0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52,
+               0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03,
+               0xa2, 0x05, 0xf1, 0x56, 0x04, 0xa3, 0x57, 0xf0,
+               0xf3, 0x54, 0xa0, 0x07, 0x55, 0xf2, 0x06, 0xa1,
+               0x59, 0xfe, 0x0a, 0xad, 0xff, 0x58, 0xac, 0x0b,
+               0x08, 0xaf, 0x5b, 0xfc, 0xae, 0x09, 0xfd, 0x5a,
+               0xfb, 0x5c, 0xa8, 0x0f, 0x5d, 0xfa, 0x0e, 0xa9,
+               0xaa, 0x0d, 0xf9, 0x5e, 0x0c, 0xab, 0x5f, 0xf8,
+               0xb2, 0x15, 0xe1, 0x46, 0x14, 0xb3, 0x47, 0xe0,
+               0xe3, 0x44, 0xb0, 0x17, 0x45, 0xe2, 0x16, 0xb1,
+               0x10, 0xb7, 0x43, 0xe4, 0xb6, 0x11, 0xe5, 0x42,
+               0x41, 0xe6, 0x12, 0xb5, 0xe7, 0x40, 0xb4, 0x13,
+               0xeb, 0x4c, 0xb8, 0x1f, 0x4d, 0xea, 0x1e, 0xb9,
+               0xba, 0x1d, 0xe9, 0x4e, 0x1c, 0xbb, 0x4f, 0xe8,
+               0x49, 0xee, 0x1a, 0xbd, 0xef, 0x48, 0xbc, 0x1b,
+               0x18, 0xbf, 0x4b, 0xec, 0xbe, 0x19, 0xed, 0x4a,
+               0x79, 0xde, 0x2a, 0x8d, 0xdf, 0x78, 0x8c, 0x2b,
+               0x28, 0x8f, 0x7b, 0xdc, 0x8e, 0x29, 0xdd, 0x7a,
+               0xdb, 0x7c, 0x88, 0x2f, 0x7d, 0xda, 0x2e, 0x89,
+               0x8a, 0x2d, 0xd9, 0x7e, 0x2c, 0x8b, 0x7f, 0xd8,
+               0x20, 0x87, 0x73, 0xd4, 0x86, 0x21, 0xd5, 0x72,
+               0x71, 0xd6, 0x22, 0x85, 0xd7, 0x70, 0x84, 0x23,
+               0x82, 0x25, 0xd1, 0x76, 0x24, 0x83, 0x77, 0xd0,
+               0xd3, 0x74, 0x80, 0x27, 0x75, 0xd2, 0x26, 0x81,
+               0xcb, 0x6c, 0x98, 0x3f, 0x6d, 0xca, 0x3e, 0x99,
+               0x9a, 0x3d, 0xc9, 0x6e, 0x3c, 0x9b, 0x6f, 0xc8,
+               0x69, 0xce, 0x3a, 0x9d, 0xcf, 0x68, 0x9c, 0x3b,
+               0x38, 0x9f, 0x6b, 0xcc, 0x9e, 0x39, 0xcd, 0x6a,
+               0x92, 0x35, 0xc1, 0x66, 0x34, 0x93, 0x67, 0xc0,
+               0xc3, 0x64, 0x90, 0x37, 0x65, 0xc2, 0x36, 0x91,
+               0x30, 0x97, 0x63, 0xc4, 0x96, 0x31, 0xc5, 0x62,
+               0x61, 0xc6, 0x32, 0x95, 0xc7, 0x60, 0x94, 0x33,
+       },
+       {
+               0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f,
+               0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56,
+               0x52, 0xfa, 0x1f, 0xb7, 0xc8, 0x60, 0x85, 0x2d,
+               0x7b, 0xd3, 0x36, 0x9e, 0xe1, 0x49, 0xac, 0x04,
+               0xa4, 0x0c, 0xe9, 0x41, 0x3e, 0x96, 0x73, 0xdb,
+               0x8d, 0x25, 0xc0, 0x68, 0x17, 0xbf, 0x5a, 0xf2,
+               0xf6, 0x5e, 0xbb, 0x13, 0x6c, 0xc4, 0x21, 0x89,
+               0xdf, 0x77, 0x92, 0x3a, 0x45, 0xed, 0x08, 0xa0,
+               0x55, 0xfd, 0x18, 0xb0, 0xcf, 0x67, 0x82, 0x2a,
+               0x7c, 0xd4, 0x31, 0x99, 0xe6, 0x4e, 0xab, 0x03,
+               0x07, 0xaf, 0x4a, 0xe2, 0x9d, 0x35, 0xd0, 0x78,
+               0x2e, 0x86, 0x63, 0xcb, 0xb4, 0x1c, 0xf9, 0x51,
+               0xf1, 0x59, 0xbc, 0x14, 0x6b, 0xc3, 0x26, 0x8e,
+               0xd8, 0x70, 0x95, 0x3d, 0x42, 0xea, 0x0f, 0xa7,
+               0xa3, 0x0b, 0xee, 0x46, 0x39, 0x91, 0x74, 0xdc,
+               0x8a, 0x22, 0xc7, 0x6f, 0x10, 0xb8, 0x5d, 0xf5,
+               0xaa, 0x02, 0xe7, 0x4f, 0x30, 0x98, 0x7d, 0xd5,
+               0x83, 0x2b, 0xce, 0x66, 0x19, 0xb1, 0x54, 0xfc,
+               0xf8, 0x50, 0xb5, 0x1d, 0x62, 0xca, 0x2f, 0x87,
+               0xd1, 0x79, 0x9c, 0x34, 0x4b, 0xe3, 0x06, 0xae,
+               0x0e, 0xa6, 0x43, 0xeb, 0x94, 0x3c, 0xd9, 0x71,
+               0x27, 0x8f, 0x6a, 0xc2, 0xbd, 0x15, 0xf0, 0x58,
+               0x5c, 0xf4, 0x11, 0xb9, 0xc6, 0x6e, 0x8b, 0x23,
+               0x75, 0xdd, 0x38, 0x90, 0xef, 0x47, 0xa2, 0x0a,
+               0xff, 0x57, 0xb2, 0x1a, 0x65, 0xcd, 0x28, 0x80,
+               0xd6, 0x7e, 0x9b, 0x33, 0x4c, 0xe4, 0x01, 0xa9,
+               0xad, 0x05, 0xe0, 0x48, 0x37, 0x9f, 0x7a, 0xd2,
+               0x84, 0x2c, 0xc9, 0x61, 0x1e, 0xb6, 0x53, 0xfb,
+               0x5b, 0xf3, 0x16, 0xbe, 0xc1, 0x69, 0x8c, 0x24,
+               0x72, 0xda, 0x3f, 0x97, 0xe8, 0x40, 0xa5, 0x0d,
+               0x09, 0xa1, 0x44, 0xec, 0x93, 0x3b, 0xde, 0x76,
+               0x20, 0x88, 0x6d, 0xc5, 0xba, 0x12, 0xf7, 0x5f,
+       },
+       {
+               0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78,
+               0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59,
+               0x42, 0xeb, 0x0d, 0xa4, 0xdc, 0x75, 0x93, 0x3a,
+               0x63, 0xca, 0x2c, 0x85, 0xfd, 0x54, 0xb2, 0x1b,
+               0x84, 0x2d, 0xcb, 0x62, 0x1a, 0xb3, 0x55, 0xfc,
+               0xa5, 0x0c, 0xea, 0x43, 0x3b, 0x92, 0x74, 0xdd,
+               0xc6, 0x6f, 0x89, 0x20, 0x58, 0xf1, 0x17, 0xbe,
+               0xe7, 0x4e, 0xa8, 0x01, 0x79, 0xd0, 0x36, 0x9f,
+               0x15, 0xbc, 0x5a, 0xf3, 0x8b, 0x22, 0xc4, 0x6d,
+               0x34, 0x9d, 0x7b, 0xd2, 0xaa, 0x03, 0xe5, 0x4c,
+               0x57, 0xfe, 0x18, 0xb1, 0xc9, 0x60, 0x86, 0x2f,
+               0x76, 0xdf, 0x39, 0x90, 0xe8, 0x41, 0xa7, 0x0e,
+               0x91, 0x38, 0xde, 0x77, 0x0f, 0xa6, 0x40, 0xe9,
+               0xb0, 0x19, 0xff, 0x56, 0x2e, 0x87, 0x61, 0xc8,
+               0xd3, 0x7a, 0x9c, 0x35, 0x4d, 0xe4, 0x02, 0xab,
+               0xf2, 0x5b, 0xbd, 0x14, 0x6c, 0xc5, 0x23, 0x8a,
+               0x2a, 0x83, 0x65, 0xcc, 0xb4, 0x1d, 0xfb, 0x52,
+               0x0b, 0xa2, 0x44, 0xed, 0x95, 0x3c, 0xda, 0x73,
+               0x68, 0xc1, 0x27, 0x8e, 0xf6, 0x5f, 0xb9, 0x10,
+               0x49, 0xe0, 0x06, 0xaf, 0xd7, 0x7e, 0x98, 0x31,
+               0xae, 0x07, 0xe1, 0x48, 0x30, 0x99, 0x7f, 0xd6,
+               0x8f, 0x26, 0xc0, 0x69, 0x11, 0xb8, 0x5e, 0xf7,
+               0xec, 0x45, 0xa3, 0x0a, 0x72, 0xdb, 0x3d, 0x94,
+               0xcd, 0x64, 0x82, 0x2b, 0x53, 0xfa, 0x1c, 0xb5,
+               0x3f, 0x96, 0x70, 0xd9, 0xa1, 0x08, 0xee, 0x47,
+               0x1e, 0xb7, 0x51, 0xf8, 0x80, 0x29, 0xcf, 0x66,
+               0x7d, 0xd4, 0x32, 0x9b, 0xe3, 0x4a, 0xac, 0x05,
+               0x5c, 0xf5, 0x13, 0xba, 0xc2, 0x6b, 0x8d, 0x24,
+               0xbb, 0x12, 0xf4, 0x5d, 0x25, 0x8c, 0x6a, 0xc3,
+               0x9a, 0x33, 0xd5, 0x7c, 0x04, 0xad, 0x4b, 0xe2,
+               0xf9, 0x50, 0xb6, 0x1f, 0x67, 0xce, 0x28, 0x81,
+               0xd8, 0x71, 0x97, 0x3e, 0x46, 0xef, 0x09, 0xa0,
+       },
+       {
+               0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71,
+               0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48,
+               0x72, 0xd8, 0x3b, 0x91, 0xe0, 0x4a, 0xa9, 0x03,
+               0x4b, 0xe1, 0x02, 0xa8, 0xd9, 0x73, 0x90, 0x3a,
+               0xe4, 0x4e, 0xad, 0x07, 0x76, 0xdc, 0x3f, 0x95,
+               0xdd, 0x77, 0x94, 0x3e, 0x4f, 0xe5, 0x06, 0xac,
+               0x96, 0x3c, 0xdf, 0x75, 0x04, 0xae, 0x4d, 0xe7,
+               0xaf, 0x05, 0xe6, 0x4c, 0x3d, 0x97, 0x74, 0xde,
+               0xd5, 0x7f, 0x9c, 0x36, 0x47, 0xed, 0x0e, 0xa4,
+               0xec, 0x46, 0xa5, 0x0f, 0x7e, 0xd4, 0x37, 0x9d,
+               0xa7, 0x0d, 0xee, 0x44, 0x35, 0x9f, 0x7c, 0xd6,
+               0x9e, 0x34, 0xd7, 0x7d, 0x0c, 0xa6, 0x45, 0xef,
+               0x31, 0x9b, 0x78, 0xd2, 0xa3, 0x09, 0xea, 0x40,
+               0x08, 0xa2, 0x41, 0xeb, 0x9a, 0x30, 0xd3, 0x79,
+               0x43, 0xe9, 0x0a, 0xa0, 0xd1, 0x7b, 0x98, 0x32,
+               0x7a, 0xd0, 0x33, 0x99, 0xe8, 0x42, 0xa1, 0x0b,
+               0xb7, 0x1d, 0xfe, 0x54, 0x25, 0x8f, 0x6c, 0xc6,
+               0x8e, 0x24, 0xc7, 0x6d, 0x1c, 0xb6, 0x55, 0xff,
+               0xc5, 0x6f, 0x8c, 0x26, 0x57, 0xfd, 0x1e, 0xb4,
+               0xfc, 0x56, 0xb5, 0x1f, 0x6e, 0xc4, 0x27, 0x8d,
+               0x53, 0xf9, 0x1a, 0xb0, 0xc1, 0x6b, 0x88, 0x22,
+               0x6a, 0xc0, 0x23, 0x89, 0xf8, 0x52, 0xb1, 0x1b,
+               0x21, 0x8b, 0x68, 0xc2, 0xb3, 0x19, 0xfa, 0x50,
+               0x18, 0xb2, 0x51, 0xfb, 0x8a, 0x20, 0xc3, 0x69,
+               0x62, 0xc8, 0x2b, 0x81, 0xf0, 0x5a, 0xb9, 0x13,
+               0x5b, 0xf1, 0x12, 0xb8, 0xc9, 0x63, 0x80, 0x2a,
+               0x10, 0xba, 0x59, 0xf3, 0x82, 0x28, 0xcb, 0x61,
+               0x29, 0x83, 0x60, 0xca, 0xbb, 0x11, 0xf2, 0x58,
+               0x86, 0x2c, 0xcf, 0x65, 0x14, 0xbe, 0x5d, 0xf7,
+               0xbf, 0x15, 0xf6, 0x5c, 0x2d, 0x87, 0x64, 0xce,
+               0xf4, 0x5e, 0xbd, 0x17, 0x66, 0xcc, 0x2f, 0x85,
+               0xcd, 0x67, 0x84, 0x2e, 0x5f, 0xf5, 0x16, 0xbc,
+       },
+       {
+               0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76,
+               0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47,
+               0x62, 0xc9, 0x29, 0x82, 0xf4, 0x5f, 0xbf, 0x14,
+               0x53, 0xf8, 0x18, 0xb3, 0xc5, 0x6e, 0x8e, 0x25,
+               0xc4, 0x6f, 0x8f, 0x24, 0x52, 0xf9, 0x19, 0xb2,
+               0xf5, 0x5e, 0xbe, 0x15, 0x63, 0xc8, 0x28, 0x83,
+               0xa6, 0x0d, 0xed, 0x46, 0x30, 0x9b, 0x7b, 0xd0,
+               0x97, 0x3c, 0xdc, 0x77, 0x01, 0xaa, 0x4a, 0xe1,
+               0x95, 0x3e, 0xde, 0x75, 0x03, 0xa8, 0x48, 0xe3,
+               0xa4, 0x0f, 0xef, 0x44, 0x32, 0x99, 0x79, 0xd2,
+               0xf7, 0x5c, 0xbc, 0x17, 0x61, 0xca, 0x2a, 0x81,
+               0xc6, 0x6d, 0x8d, 0x26, 0x50, 0xfb, 0x1b, 0xb0,
+               0x51, 0xfa, 0x1a, 0xb1, 0xc7, 0x6c, 0x8c, 0x27,
+               0x60, 0xcb, 0x2b, 0x80, 0xf6, 0x5d, 0xbd, 0x16,
+               0x33, 0x98, 0x78, 0xd3, 0xa5, 0x0e, 0xee, 0x45,
+               0x02, 0xa9, 0x49, 0xe2, 0x94, 0x3f, 0xdf, 0x74,
+               0x37, 0x9c, 0x7c, 0xd7, 0xa1, 0x0a, 0xea, 0x41,
+               0x06, 0xad, 0x4d, 0xe6, 0x90, 0x3b, 0xdb, 0x70,
+               0x55, 0xfe, 0x1e, 0xb5, 0xc3, 0x68, 0x88, 0x23,
+               0x64, 0xcf, 0x2f, 0x84, 0xf2, 0x59, 0xb9, 0x12,
+               0xf3, 0x58, 0xb8, 0x13, 0x65, 0xce, 0x2e, 0x85,
+               0xc2, 0x69, 0x89, 0x22, 0x54, 0xff, 0x1f, 0xb4,
+               0x91, 0x3a, 0xda, 0x71, 0x07, 0xac, 0x4c, 0xe7,
+               0xa0, 0x0b, 0xeb, 0x40, 0x36, 0x9d, 0x7d, 0xd6,
+               0xa2, 0x09, 0xe9, 0x42, 0x34, 0x9f, 0x7f, 0xd4,
+               0x93, 0x38, 0xd8, 0x73, 0x05, 0xae, 0x4e, 0xe5,
+               0xc0, 0x6b, 0x8b, 0x20, 0x56, 0xfd, 0x1d, 0xb6,
+               0xf1, 0x5a, 0xba, 0x11, 0x67, 0xcc, 0x2c, 0x87,
+               0x66, 0xcd, 0x2d, 0x86, 0xf0, 0x5b, 0xbb, 0x10,
+               0x57, 0xfc, 0x1c, 0xb7, 0xc1, 0x6a, 0x8a, 0x21,
+               0x04, 0xaf, 0x4f, 0xe4, 0x92, 0x39, 0xd9, 0x72,
+               0x35, 0x9e, 0x7e, 0xd5, 0xa3, 0x08, 0xe8, 0x43,
+       },
+       {
+               0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63,
+               0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a,
+               0x12, 0xbe, 0x57, 0xfb, 0x98, 0x34, 0xdd, 0x71,
+               0x1b, 0xb7, 0x5e, 0xf2, 0x91, 0x3d, 0xd4, 0x78,
+               0x24, 0x88, 0x61, 0xcd, 0xae, 0x02, 0xeb, 0x47,
+               0x2d, 0x81, 0x68, 0xc4, 0xa7, 0x0b, 0xe2, 0x4e,
+               0x36, 0x9a, 0x73, 0xdf, 0xbc, 0x10, 0xf9, 0x55,
+               0x3f, 0x93, 0x7a, 0xd6, 0xb5, 0x19, 0xf0, 0x5c,
+               0x48, 0xe4, 0x0d, 0xa1, 0xc2, 0x6e, 0x87, 0x2b,
+               0x41, 0xed, 0x04, 0xa8, 0xcb, 0x67, 0x8e, 0x22,
+               0x5a, 0xf6, 0x1f, 0xb3, 0xd0, 0x7c, 0x95, 0x39,
+               0x53, 0xff, 0x16, 0xba, 0xd9, 0x75, 0x9c, 0x30,
+               0x6c, 0xc0, 0x29, 0x85, 0xe6, 0x4a, 0xa3, 0x0f,
+               0x65, 0xc9, 0x20, 0x8c, 0xef, 0x43, 0xaa, 0x06,
+               0x7e, 0xd2, 0x3b, 0x97, 0xf4, 0x58, 0xb1, 0x1d,
+               0x77, 0xdb, 0x32, 0x9e, 0xfd, 0x51, 0xb8, 0x14,
+               0x90, 0x3c, 0xd5, 0x79, 0x1a, 0xb6, 0x5f, 0xf3,
+               0x99, 0x35, 0xdc, 0x70, 0x13, 0xbf, 0x56, 0xfa,
+               0x82, 0x2e, 0xc7, 0x6b, 0x08, 0xa4, 0x4d, 0xe1,
+               0x8b, 0x27, 0xce, 0x62, 0x01, 0xad, 0x44, 0xe8,
+               0xb4, 0x18, 0xf1, 0x5d, 0x3e, 0x92, 0x7b, 0xd7,
+               0xbd, 0x11, 0xf8, 0x54, 0x37, 0x9b, 0x72, 0xde,
+               0xa6, 0x0a, 0xe3, 0x4f, 0x2c, 0x80, 0x69, 0xc5,
+               0xaf, 0x03, 0xea, 0x46, 0x25, 0x89, 0x60, 0xcc,
+               0xd8, 0x74, 0x9d, 0x31, 0x52, 0xfe, 0x17, 0xbb,
+               0xd1, 0x7d, 0x94, 0x38, 0x5b, 0xf7, 0x1e, 0xb2,
+               0xca, 0x66, 0x8f, 0x23, 0x40, 0xec, 0x05, 0xa9,
+               0xc3, 0x6f, 0x86, 0x2a, 0x49, 0xe5, 0x0c, 0xa0,
+               0xfc, 0x50, 0xb9, 0x15, 0x76, 0xda, 0x33, 0x9f,
+               0xf5, 0x59, 0xb0, 0x1c, 0x7f, 0xd3, 0x3a, 0x96,
+               0xee, 0x42, 0xab, 0x07, 0x64, 0xc8, 0x21, 0x8d,
+               0xe7, 0x4b, 0xa2, 0x0e, 0x6d, 0xc1, 0x28, 0x84,
+       },
+       {
+               0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64,
+               0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65,
+               0x02, 0xaf, 0x45, 0xe8, 0x8c, 0x21, 0xcb, 0x66,
+               0x03, 0xae, 0x44, 0xe9, 0x8d, 0x20, 0xca, 0x67,
+               0x04, 0xa9, 0x43, 0xee, 0x8a, 0x27, 0xcd, 0x60,
+               0x05, 0xa8, 0x42, 0xef, 0x8b, 0x26, 0xcc, 0x61,
+               0x06, 0xab, 0x41, 0xec, 0x88, 0x25, 0xcf, 0x62,
+               0x07, 0xaa, 0x40, 0xed, 0x89, 0x24, 0xce, 0x63,
+               0x08, 0xa5, 0x4f, 0xe2, 0x86, 0x2b, 0xc1, 0x6c,
+               0x09, 0xa4, 0x4e, 0xe3, 0x87, 0x2a, 0xc0, 0x6d,
+               0x0a, 0xa7, 0x4d, 0xe0, 0x84, 0x29, 0xc3, 0x6e,
+               0x0b, 0xa6, 0x4c, 0xe1, 0x85, 0x28, 0xc2, 0x6f,
+               0x0c, 0xa1, 0x4b, 0xe6, 0x82, 0x2f, 0xc5, 0x68,
+               0x0d, 0xa0, 0x4a, 0xe7, 0x83, 0x2e, 0xc4, 0x69,
+               0x0e, 0xa3, 0x49, 0xe4, 0x80, 0x2d, 0xc7, 0x6a,
+               0x0f, 0xa2, 0x48, 0xe5, 0x81, 0x2c, 0xc6, 0x6b,
+               0x10, 0xbd, 0x57, 0xfa, 0x9e, 0x33, 0xd9, 0x74,
+               0x11, 0xbc, 0x56, 0xfb, 0x9f, 0x32, 0xd8, 0x75,
+               0x12, 0xbf, 0x55, 0xf8, 0x9c, 0x31, 0xdb, 0x76,
+               0x13, 0xbe, 0x54, 0xf9, 0x9d, 0x30, 0xda, 0x77,
+               0x14, 0xb9, 0x53, 0xfe, 0x9a, 0x37, 0xdd, 0x70,
+               0x15, 0xb8, 0x52, 0xff, 0x9b, 0x36, 0xdc, 0x71,
+               0x16, 0xbb, 0x51, 0xfc, 0x98, 0x35, 0xdf, 0x72,
+               0x17, 0xba, 0x50, 0xfd, 0x99, 0x34, 0xde, 0x73,
+               0x18, 0xb5, 0x5f, 0xf2, 0x96, 0x3b, 0xd1, 0x7c,
+               0x19, 0xb4, 0x5e, 0xf3, 0x97, 0x3a, 0xd0, 0x7d,
+               0x1a, 0xb7, 0x5d, 0xf0, 0x94, 0x39, 0xd3, 0x7e,
+               0x1b, 0xb6, 0x5c, 0xf1, 0x95, 0x38, 0xd2, 0x7f,
+               0x1c, 0xb1, 0x5b, 0xf6, 0x92, 0x3f, 0xd5, 0x78,
+               0x1d, 0xb0, 0x5a, 0xf7, 0x93, 0x3e, 0xd4, 0x79,
+               0x1e, 0xb3, 0x59, 0xf4, 0x90, 0x3d, 0xd7, 0x7a,
+               0x1f, 0xb2, 0x58, 0xf5, 0x91, 0x3c, 0xd6, 0x7b,
+       },
+       {
+               0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d,
+               0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74,
+               0x32, 0x9c, 0x73, 0xdd, 0xb0, 0x1e, 0xf1, 0x5f,
+               0x2b, 0x85, 0x6a, 0xc4, 0xa9, 0x07, 0xe8, 0x46,
+               0x64, 0xca, 0x25, 0x8b, 0xe6, 0x48, 0xa7, 0x09,
+               0x7d, 0xd3, 0x3c, 0x92, 0xff, 0x51, 0xbe, 0x10,
+               0x56, 0xf8, 0x17, 0xb9, 0xd4, 0x7a, 0x95, 0x3b,
+               0x4f, 0xe1, 0x0e, 0xa0, 0xcd, 0x63, 0x8c, 0x22,
+               0xc8, 0x66, 0x89, 0x27, 0x4a, 0xe4, 0x0b, 0xa5,
+               0xd1, 0x7f, 0x90, 0x3e, 0x53, 0xfd, 0x12, 0xbc,
+               0xfa, 0x54, 0xbb, 0x15, 0x78, 0xd6, 0x39, 0x97,
+               0xe3, 0x4d, 0xa2, 0x0c, 0x61, 0xcf, 0x20, 0x8e,
+               0xac, 0x02, 0xed, 0x43, 0x2e, 0x80, 0x6f, 0xc1,
+               0xb5, 0x1b, 0xf4, 0x5a, 0x37, 0x99, 0x76, 0xd8,
+               0x9e, 0x30, 0xdf, 0x71, 0x1c, 0xb2, 0x5d, 0xf3,
+               0x87, 0x29, 0xc6, 0x68, 0x05, 0xab, 0x44, 0xea,
+               0x8d, 0x23, 0xcc, 0x62, 0x0f, 0xa1, 0x4e, 0xe0,
+               0x94, 0x3a, 0xd5, 0x7b, 0x16, 0xb8, 0x57, 0xf9,
+               0xbf, 0x11, 0xfe, 0x50, 0x3d, 0x93, 0x7c, 0xd2,
+               0xa6, 0x08, 0xe7, 0x49, 0x24, 0x8a, 0x65, 0xcb,
+               0xe9, 0x47, 0xa8, 0x06, 0x6b, 0xc5, 0x2a, 0x84,
+               0xf0, 0x5e, 0xb1, 0x1f, 0x72, 0xdc, 0x33, 0x9d,
+               0xdb, 0x75, 0x9a, 0x34, 0x59, 0xf7, 0x18, 0xb6,
+               0xc2, 0x6c, 0x83, 0x2d, 0x40, 0xee, 0x01, 0xaf,
+               0x45, 0xeb, 0x04, 0xaa, 0xc7, 0x69, 0x86, 0x28,
+               0x5c, 0xf2, 0x1d, 0xb3, 0xde, 0x70, 0x9f, 0x31,
+               0x77, 0xd9, 0x36, 0x98, 0xf5, 0x5b, 0xb4, 0x1a,
+               0x6e, 0xc0, 0x2f, 0x81, 0xec, 0x42, 0xad, 0x03,
+               0x21, 0x8f, 0x60, 0xce, 0xa3, 0x0d, 0xe2, 0x4c,
+               0x38, 0x96, 0x79, 0xd7, 0xba, 0x14, 0xfb, 0x55,
+               0x13, 0xbd, 0x52, 0xfc, 0x91, 0x3f, 0xd0, 0x7e,
+               0x0a, 0xa4, 0x4b, 0xe5, 0x88, 0x26, 0xc9, 0x67,
+       },
+       {
+               0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a,
+               0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b,
+               0x22, 0x8d, 0x61, 0xce, 0xa4, 0x0b, 0xe7, 0x48,
+               0x33, 0x9c, 0x70, 0xdf, 0xb5, 0x1a, 0xf6, 0x59,
+               0x44, 0xeb, 0x07, 0xa8, 0xc2, 0x6d, 0x81, 0x2e,
+               0x55, 0xfa, 0x16, 0xb9, 0xd3, 0x7c, 0x90, 0x3f,
+               0x66, 0xc9, 0x25, 0x8a, 0xe0, 0x4f, 0xa3, 0x0c,
+               0x77, 0xd8, 0x34, 0x9b, 0xf1, 0x5e, 0xb2, 0x1d,
+               0x88, 0x27, 0xcb, 0x64, 0x0e, 0xa1, 0x4d, 0xe2,
+               0x99, 0x36, 0xda, 0x75, 0x1f, 0xb0, 0x5c, 0xf3,
+               0xaa, 0x05, 0xe9, 0x46, 0x2c, 0x83, 0x6f, 0xc0,
+               0xbb, 0x14, 0xf8, 0x57, 0x3d, 0x92, 0x7e, 0xd1,
+               0xcc, 0x63, 0x8f, 0x20, 0x4a, 0xe5, 0x09, 0xa6,
+               0xdd, 0x72, 0x9e, 0x31, 0x5b, 0xf4, 0x18, 0xb7,
+               0xee, 0x41, 0xad, 0x02, 0x68, 0xc7, 0x2b, 0x84,
+               0xff, 0x50, 0xbc, 0x13, 0x79, 0xd6, 0x3a, 0x95,
+               0x0d, 0xa2, 0x4e, 0xe1, 0x8b, 0x24, 0xc8, 0x67,
+               0x1c, 0xb3, 0x5f, 0xf0, 0x9a, 0x35, 0xd9, 0x76,
+               0x2f, 0x80, 0x6c, 0xc3, 0xa9, 0x06, 0xea, 0x45,
+               0x3e, 0x91, 0x7d, 0xd2, 0xb8, 0x17, 0xfb, 0x54,
+               0x49, 0xe6, 0x0a, 0xa5, 0xcf, 0x60, 0x8c, 0x23,
+               0x58, 0xf7, 0x1b, 0xb4, 0xde, 0x71, 0x9d, 0x32,
+               0x6b, 0xc4, 0x28, 0x87, 0xed, 0x42, 0xae, 0x01,
+               0x7a, 0xd5, 0x39, 0x96, 0xfc, 0x53, 0xbf, 0x10,
+               0x85, 0x2a, 0xc6, 0x69, 0x03, 0xac, 0x40, 0xef,
+               0x94, 0x3b, 0xd7, 0x78, 0x12, 0xbd, 0x51, 0xfe,
+               0xa7, 0x08, 0xe4, 0x4b, 0x21, 0x8e, 0x62, 0xcd,
+               0xb6, 0x19, 0xf5, 0x5a, 0x30, 0x9f, 0x73, 0xdc,
+               0xc1, 0x6e, 0x82, 0x2d, 0x47, 0xe8, 0x04, 0xab,
+               0xd0, 0x7f, 0x93, 0x3c, 0x56, 0xf9, 0x15, 0xba,
+               0xe3, 0x4c, 0xa0, 0x0f, 0x65, 0xca, 0x26, 0x89,
+               0xf2, 0x5d, 0xb1, 0x1e, 0x74, 0xdb, 0x37, 0x98,
+       },
+       {
+               0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37,
+               0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde,
+               0xcf, 0x7f, 0xb2, 0x02, 0x35, 0x85, 0x48, 0xf8,
+               0x26, 0x96, 0x5b, 0xeb, 0xdc, 0x6c, 0xa1, 0x11,
+               0x83, 0x33, 0xfe, 0x4e, 0x79, 0xc9, 0x04, 0xb4,
+               0x6a, 0xda, 0x17, 0xa7, 0x90, 0x20, 0xed, 0x5d,
+               0x4c, 0xfc, 0x31, 0x81, 0xb6, 0x06, 0xcb, 0x7b,
+               0xa5, 0x15, 0xd8, 0x68, 0x5f, 0xef, 0x22, 0x92,
+               0x1b, 0xab, 0x66, 0xd6, 0xe1, 0x51, 0x9c, 0x2c,
+               0xf2, 0x42, 0x8f, 0x3f, 0x08, 0xb8, 0x75, 0xc5,
+               0xd4, 0x64, 0xa9, 0x19, 0x2e, 0x9e, 0x53, 0xe3,
+               0x3d, 0x8d, 0x40, 0xf0, 0xc7, 0x77, 0xba, 0x0a,
+               0x98, 0x28, 0xe5, 0x55, 0x62, 0xd2, 0x1f, 0xaf,
+               0x71, 0xc1, 0x0c, 0xbc, 0x8b, 0x3b, 0xf6, 0x46,
+               0x57, 0xe7, 0x2a, 0x9a, 0xad, 0x1d, 0xd0, 0x60,
+               0xbe, 0x0e, 0xc3, 0x73, 0x44, 0xf4, 0x39, 0x89,
+               0x36, 0x86, 0x4b, 0xfb, 0xcc, 0x7c, 0xb1, 0x01,
+               0xdf, 0x6f, 0xa2, 0x12, 0x25, 0x95, 0x58, 0xe8,
+               0xf9, 0x49, 0x84, 0x34, 0x03, 0xb3, 0x7e, 0xce,
+               0x10, 0xa0, 0x6d, 0xdd, 0xea, 0x5a, 0x97, 0x27,
+               0xb5, 0x05, 0xc8, 0x78, 0x4f, 0xff, 0x32, 0x82,
+               0x5c, 0xec, 0x21, 0x91, 0xa6, 0x16, 0xdb, 0x6b,
+               0x7a, 0xca, 0x07, 0xb7, 0x80, 0x30, 0xfd, 0x4d,
+               0x93, 0x23, 0xee, 0x5e, 0x69, 0xd9, 0x14, 0xa4,
+               0x2d, 0x9d, 0x50, 0xe0, 0xd7, 0x67, 0xaa, 0x1a,
+               0xc4, 0x74, 0xb9, 0x09, 0x3e, 0x8e, 0x43, 0xf3,
+               0xe2, 0x52, 0x9f, 0x2f, 0x18, 0xa8, 0x65, 0xd5,
+               0x0b, 0xbb, 0x76, 0xc6, 0xf1, 0x41, 0x8c, 0x3c,
+               0xae, 0x1e, 0xd3, 0x63, 0x54, 0xe4, 0x29, 0x99,
+               0x47, 0xf7, 0x3a, 0x8a, 0xbd, 0x0d, 0xc0, 0x70,
+               0x61, 0xd1, 0x1c, 0xac, 0x9b, 0x2b, 0xe6, 0x56,
+               0x88, 0x38, 0xf5, 0x45, 0x72, 0xc2, 0x0f, 0xbf,
+       },
+       {
+               0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30,
+               0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1,
+               0xdf, 0x6e, 0xa0, 0x11, 0x21, 0x90, 0x5e, 0xef,
+               0x3e, 0x8f, 0x41, 0xf0, 0xc0, 0x71, 0xbf, 0x0e,
+               0xa3, 0x12, 0xdc, 0x6d, 0x5d, 0xec, 0x22, 0x93,
+               0x42, 0xf3, 0x3d, 0x8c, 0xbc, 0x0d, 0xc3, 0x72,
+               0x7c, 0xcd, 0x03, 0xb2, 0x82, 0x33, 0xfd, 0x4c,
+               0x9d, 0x2c, 0xe2, 0x53, 0x63, 0xd2, 0x1c, 0xad,
+               0x5b, 0xea, 0x24, 0x95, 0xa5, 0x14, 0xda, 0x6b,
+               0xba, 0x0b, 0xc5, 0x74, 0x44, 0xf5, 0x3b, 0x8a,
+               0x84, 0x35, 0xfb, 0x4a, 0x7a, 0xcb, 0x05, 0xb4,
+               0x65, 0xd4, 0x1a, 0xab, 0x9b, 0x2a, 0xe4, 0x55,
+               0xf8, 0x49, 0x87, 0x36, 0x06, 0xb7, 0x79, 0xc8,
+               0x19, 0xa8, 0x66, 0xd7, 0xe7, 0x56, 0x98, 0x29,
+               0x27, 0x96, 0x58, 0xe9, 0xd9, 0x68, 0xa6, 0x17,
+               0xc6, 0x77, 0xb9, 0x08, 0x38, 0x89, 0x47, 0xf6,
+               0xb6, 0x07, 0xc9, 0x78, 0x48, 0xf9, 0x37, 0x86,
+               0x57, 0xe6, 0x28, 0x99, 0xa9, 0x18, 0xd6, 0x67,
+               0x69, 0xd8, 0x16, 0xa7, 0x97, 0x26, 0xe8, 0x59,
+               0x88, 0x39, 0xf7, 0x46, 0x76, 0xc7, 0x09, 0xb8,
+               0x15, 0xa4, 0x6a, 0xdb, 0xeb, 0x5a, 0x94, 0x25,
+               0xf4, 0x45, 0x8b, 0x3a, 0x0a, 0xbb, 0x75, 0xc4,
+               0xca, 0x7b, 0xb5, 0x04, 0x34, 0x85, 0x4b, 0xfa,
+               0x2b, 0x9a, 0x54, 0xe5, 0xd5, 0x64, 0xaa, 0x1b,
+               0xed, 0x5c, 0x92, 0x23, 0x13, 0xa2, 0x6c, 0xdd,
+               0x0c, 0xbd, 0x73, 0xc2, 0xf2, 0x43, 0x8d, 0x3c,
+               0x32, 0x83, 0x4d, 0xfc, 0xcc, 0x7d, 0xb3, 0x02,
+               0xd3, 0x62, 0xac, 0x1d, 0x2d, 0x9c, 0x52, 0xe3,
+               0x4e, 0xff, 0x31, 0x80, 0xb0, 0x01, 0xcf, 0x7e,
+               0xaf, 0x1e, 0xd0, 0x61, 0x51, 0xe0, 0x2e, 0x9f,
+               0x91, 0x20, 0xee, 0x5f, 0x6f, 0xde, 0x10, 0xa1,
+               0x70, 0xc1, 0x0f, 0xbe, 0x8e, 0x3f, 0xf1, 0x40,
+       },
+       {
+               0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39,
+               0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0,
+               0xef, 0x5d, 0x96, 0x24, 0x1d, 0xaf, 0x64, 0xd6,
+               0x16, 0xa4, 0x6f, 0xdd, 0xe4, 0x56, 0x9d, 0x2f,
+               0xc3, 0x71, 0xba, 0x08, 0x31, 0x83, 0x48, 0xfa,
+               0x3a, 0x88, 0x43, 0xf1, 0xc8, 0x7a, 0xb1, 0x03,
+               0x2c, 0x9e, 0x55, 0xe7, 0xde, 0x6c, 0xa7, 0x15,
+               0xd5, 0x67, 0xac, 0x1e, 0x27, 0x95, 0x5e, 0xec,
+               0x9b, 0x29, 0xe2, 0x50, 0x69, 0xdb, 0x10, 0xa2,
+               0x62, 0xd0, 0x1b, 0xa9, 0x90, 0x22, 0xe9, 0x5b,
+               0x74, 0xc6, 0x0d, 0xbf, 0x86, 0x34, 0xff, 0x4d,
+               0x8d, 0x3f, 0xf4, 0x46, 0x7f, 0xcd, 0x06, 0xb4,
+               0x58, 0xea, 0x21, 0x93, 0xaa, 0x18, 0xd3, 0x61,
+               0xa1, 0x13, 0xd8, 0x6a, 0x53, 0xe1, 0x2a, 0x98,
+               0xb7, 0x05, 0xce, 0x7c, 0x45, 0xf7, 0x3c, 0x8e,
+               0x4e, 0xfc, 0x37, 0x85, 0xbc, 0x0e, 0xc5, 0x77,
+               0x2b, 0x99, 0x52, 0xe0, 0xd9, 0x6b, 0xa0, 0x12,
+               0xd2, 0x60, 0xab, 0x19, 0x20, 0x92, 0x59, 0xeb,
+               0xc4, 0x76, 0xbd, 0x0f, 0x36, 0x84, 0x4f, 0xfd,
+               0x3d, 0x8f, 0x44, 0xf6, 0xcf, 0x7d, 0xb6, 0x04,
+               0xe8, 0x5a, 0x91, 0x23, 0x1a, 0xa8, 0x63, 0xd1,
+               0x11, 0xa3, 0x68, 0xda, 0xe3, 0x51, 0x9a, 0x28,
+               0x07, 0xb5, 0x7e, 0xcc, 0xf5, 0x47, 0x8c, 0x3e,
+               0xfe, 0x4c, 0x87, 0x35, 0x0c, 0xbe, 0x75, 0xc7,
+               0xb0, 0x02, 0xc9, 0x7b, 0x42, 0xf0, 0x3b, 0x89,
+               0x49, 0xfb, 0x30, 0x82, 0xbb, 0x09, 0xc2, 0x70,
+               0x5f, 0xed, 0x26, 0x94, 0xad, 0x1f, 0xd4, 0x66,
+               0xa6, 0x14, 0xdf, 0x6d, 0x54, 0xe6, 0x2d, 0x9f,
+               0x73, 0xc1, 0x0a, 0xb8, 0x81, 0x33, 0xf8, 0x4a,
+               0x8a, 0x38, 0xf3, 0x41, 0x78, 0xca, 0x01, 0xb3,
+               0x9c, 0x2e, 0xe5, 0x57, 0x6e, 0xdc, 0x17, 0xa5,
+               0x65, 0xd7, 0x1c, 0xae, 0x97, 0x25, 0xee, 0x5c,
+       },
+       {
+               0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e,
+               0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf,
+               0xff, 0x4c, 0x84, 0x37, 0x09, 0xba, 0x72, 0xc1,
+               0x0e, 0xbd, 0x75, 0xc6, 0xf8, 0x4b, 0x83, 0x30,
+               0xe3, 0x50, 0x98, 0x2b, 0x15, 0xa6, 0x6e, 0xdd,
+               0x12, 0xa1, 0x69, 0xda, 0xe4, 0x57, 0x9f, 0x2c,
+               0x1c, 0xaf, 0x67, 0xd4, 0xea, 0x59, 0x91, 0x22,
+               0xed, 0x5e, 0x96, 0x25, 0x1b, 0xa8, 0x60, 0xd3,
+               0xdb, 0x68, 0xa0, 0x13, 0x2d, 0x9e, 0x56, 0xe5,
+               0x2a, 0x99, 0x51, 0xe2, 0xdc, 0x6f, 0xa7, 0x14,
+               0x24, 0x97, 0x5f, 0xec, 0xd2, 0x61, 0xa9, 0x1a,
+               0xd5, 0x66, 0xae, 0x1d, 0x23, 0x90, 0x58, 0xeb,
+               0x38, 0x8b, 0x43, 0xf0, 0xce, 0x7d, 0xb5, 0x06,
+               0xc9, 0x7a, 0xb2, 0x01, 0x3f, 0x8c, 0x44, 0xf7,
+               0xc7, 0x74, 0xbc, 0x0f, 0x31, 0x82, 0x4a, 0xf9,
+               0x36, 0x85, 0x4d, 0xfe, 0xc0, 0x73, 0xbb, 0x08,
+               0xab, 0x18, 0xd0, 0x63, 0x5d, 0xee, 0x26, 0x95,
+               0x5a, 0xe9, 0x21, 0x92, 0xac, 0x1f, 0xd7, 0x64,
+               0x54, 0xe7, 0x2f, 0x9c, 0xa2, 0x11, 0xd9, 0x6a,
+               0xa5, 0x16, 0xde, 0x6d, 0x53, 0xe0, 0x28, 0x9b,
+               0x48, 0xfb, 0x33, 0x80, 0xbe, 0x0d, 0xc5, 0x76,
+               0xb9, 0x0a, 0xc2, 0x71, 0x4f, 0xfc, 0x34, 0x87,
+               0xb7, 0x04, 0xcc, 0x7f, 0x41, 0xf2, 0x3a, 0x89,
+               0x46, 0xf5, 0x3d, 0x8e, 0xb0, 0x03, 0xcb, 0x78,
+               0x70, 0xc3, 0x0b, 0xb8, 0x86, 0x35, 0xfd, 0x4e,
+               0x81, 0x32, 0xfa, 0x49, 0x77, 0xc4, 0x0c, 0xbf,
+               0x8f, 0x3c, 0xf4, 0x47, 0x79, 0xca, 0x02, 0xb1,
+               0x7e, 0xcd, 0x05, 0xb6, 0x88, 0x3b, 0xf3, 0x40,
+               0x93, 0x20, 0xe8, 0x5b, 0x65, 0xd6, 0x1e, 0xad,
+               0x62, 0xd1, 0x19, 0xaa, 0x94, 0x27, 0xef, 0x5c,
+               0x6c, 0xdf, 0x17, 0xa4, 0x9a, 0x29, 0xe1, 0x52,
+               0x9d, 0x2e, 0xe6, 0x55, 0x6b, 0xd8, 0x10, 0xa3,
+       },
+       {
+               0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b,
+               0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2,
+               0x8f, 0x3b, 0xfa, 0x4e, 0x65, 0xd1, 0x10, 0xa4,
+               0x46, 0xf2, 0x33, 0x87, 0xac, 0x18, 0xd9, 0x6d,
+               0x03, 0xb7, 0x76, 0xc2, 0xe9, 0x5d, 0x9c, 0x28,
+               0xca, 0x7e, 0xbf, 0x0b, 0x20, 0x94, 0x55, 0xe1,
+               0x8c, 0x38, 0xf9, 0x4d, 0x66, 0xd2, 0x13, 0xa7,
+               0x45, 0xf1, 0x30, 0x84, 0xaf, 0x1b, 0xda, 0x6e,
+               0x06, 0xb2, 0x73, 0xc7, 0xec, 0x58, 0x99, 0x2d,
+               0xcf, 0x7b, 0xba, 0x0e, 0x25, 0x91, 0x50, 0xe4,
+               0x89, 0x3d, 0xfc, 0x48, 0x63, 0xd7, 0x16, 0xa2,
+               0x40, 0xf4, 0x35, 0x81, 0xaa, 0x1e, 0xdf, 0x6b,
+               0x05, 0xb1, 0x70, 0xc4, 0xef, 0x5b, 0x9a, 0x2e,
+               0xcc, 0x78, 0xb9, 0x0d, 0x26, 0x92, 0x53, 0xe7,
+               0x8a, 0x3e, 0xff, 0x4b, 0x60, 0xd4, 0x15, 0xa1,
+               0x43, 0xf7, 0x36, 0x82, 0xa9, 0x1d, 0xdc, 0x68,
+               0x0c, 0xb8, 0x79, 0xcd, 0xe6, 0x52, 0x93, 0x27,
+               0xc5, 0x71, 0xb0, 0x04, 0x2f, 0x9b, 0x5a, 0xee,
+               0x83, 0x37, 0xf6, 0x42, 0x69, 0xdd, 0x1c, 0xa8,
+               0x4a, 0xfe, 0x3f, 0x8b, 0xa0, 0x14, 0xd5, 0x61,
+               0x0f, 0xbb, 0x7a, 0xce, 0xe5, 0x51, 0x90, 0x24,
+               0xc6, 0x72, 0xb3, 0x07, 0x2c, 0x98, 0x59, 0xed,
+               0x80, 0x34, 0xf5, 0x41, 0x6a, 0xde, 0x1f, 0xab,
+               0x49, 0xfd, 0x3c, 0x88, 0xa3, 0x17, 0xd6, 0x62,
+               0x0a, 0xbe, 0x7f, 0xcb, 0xe0, 0x54, 0x95, 0x21,
+               0xc3, 0x77, 0xb6, 0x02, 0x29, 0x9d, 0x5c, 0xe8,
+               0x85, 0x31, 0xf0, 0x44, 0x6f, 0xdb, 0x1a, 0xae,
+               0x4c, 0xf8, 0x39, 0x8d, 0xa6, 0x12, 0xd3, 0x67,
+               0x09, 0xbd, 0x7c, 0xc8, 0xe3, 0x57, 0x96, 0x22,
+               0xc0, 0x74, 0xb5, 0x01, 0x2a, 0x9e, 0x5f, 0xeb,
+               0x86, 0x32, 0xf3, 0x47, 0x6c, 0xd8, 0x19, 0xad,
+               0x4f, 0xfb, 0x3a, 0x8e, 0xa5, 0x11, 0xd0, 0x64,
+       },
+       {
+               0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c,
+               0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed,
+               0x9f, 0x2a, 0xe8, 0x5d, 0x71, 0xc4, 0x06, 0xb3,
+               0x5e, 0xeb, 0x29, 0x9c, 0xb0, 0x05, 0xc7, 0x72,
+               0x23, 0x96, 0x54, 0xe1, 0xcd, 0x78, 0xba, 0x0f,
+               0xe2, 0x57, 0x95, 0x20, 0x0c, 0xb9, 0x7b, 0xce,
+               0xbc, 0x09, 0xcb, 0x7e, 0x52, 0xe7, 0x25, 0x90,
+               0x7d, 0xc8, 0x0a, 0xbf, 0x93, 0x26, 0xe4, 0x51,
+               0x46, 0xf3, 0x31, 0x84, 0xa8, 0x1d, 0xdf, 0x6a,
+               0x87, 0x32, 0xf0, 0x45, 0x69, 0xdc, 0x1e, 0xab,
+               0xd9, 0x6c, 0xae, 0x1b, 0x37, 0x82, 0x40, 0xf5,
+               0x18, 0xad, 0x6f, 0xda, 0xf6, 0x43, 0x81, 0x34,
+               0x65, 0xd0, 0x12, 0xa7, 0x8b, 0x3e, 0xfc, 0x49,
+               0xa4, 0x11, 0xd3, 0x66, 0x4a, 0xff, 0x3d, 0x88,
+               0xfa, 0x4f, 0x8d, 0x38, 0x14, 0xa1, 0x63, 0xd6,
+               0x3b, 0x8e, 0x4c, 0xf9, 0xd5, 0x60, 0xa2, 0x17,
+               0x8c, 0x39, 0xfb, 0x4e, 0x62, 0xd7, 0x15, 0xa0,
+               0x4d, 0xf8, 0x3a, 0x8f, 0xa3, 0x16, 0xd4, 0x61,
+               0x13, 0xa6, 0x64, 0xd1, 0xfd, 0x48, 0x8a, 0x3f,
+               0xd2, 0x67, 0xa5, 0x10, 0x3c, 0x89, 0x4b, 0xfe,
+               0xaf, 0x1a, 0xd8, 0x6d, 0x41, 0xf4, 0x36, 0x83,
+               0x6e, 0xdb, 0x19, 0xac, 0x80, 0x35, 0xf7, 0x42,
+               0x30, 0x85, 0x47, 0xf2, 0xde, 0x6b, 0xa9, 0x1c,
+               0xf1, 0x44, 0x86, 0x33, 0x1f, 0xaa, 0x68, 0xdd,
+               0xca, 0x7f, 0xbd, 0x08, 0x24, 0x91, 0x53, 0xe6,
+               0x0b, 0xbe, 0x7c, 0xc9, 0xe5, 0x50, 0x92, 0x27,
+               0x55, 0xe0, 0x22, 0x97, 0xbb, 0x0e, 0xcc, 0x79,
+               0x94, 0x21, 0xe3, 0x56, 0x7a, 0xcf, 0x0d, 0xb8,
+               0xe9, 0x5c, 0x9e, 0x2b, 0x07, 0xb2, 0x70, 0xc5,
+               0x28, 0x9d, 0x5f, 0xea, 0xc6, 0x73, 0xb1, 0x04,
+               0x76, 0xc3, 0x01, 0xb4, 0x98, 0x2d, 0xef, 0x5a,
+               0xb7, 0x02, 0xc0, 0x75, 0x59, 0xec, 0x2e, 0x9b,
+       },
+       {
+               0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25,
+               0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc,
+               0xaf, 0x19, 0xde, 0x68, 0x4d, 0xfb, 0x3c, 0x8a,
+               0x76, 0xc0, 0x07, 0xb1, 0x94, 0x22, 0xe5, 0x53,
+               0x43, 0xf5, 0x32, 0x84, 0xa1, 0x17, 0xd0, 0x66,
+               0x9a, 0x2c, 0xeb, 0x5d, 0x78, 0xce, 0x09, 0xbf,
+               0xec, 0x5a, 0x9d, 0x2b, 0x0e, 0xb8, 0x7f, 0xc9,
+               0x35, 0x83, 0x44, 0xf2, 0xd7, 0x61, 0xa6, 0x10,
+               0x86, 0x30, 0xf7, 0x41, 0x64, 0xd2, 0x15, 0xa3,
+               0x5f, 0xe9, 0x2e, 0x98, 0xbd, 0x0b, 0xcc, 0x7a,
+               0x29, 0x9f, 0x58, 0xee, 0xcb, 0x7d, 0xba, 0x0c,
+               0xf0, 0x46, 0x81, 0x37, 0x12, 0xa4, 0x63, 0xd5,
+               0xc5, 0x73, 0xb4, 0x02, 0x27, 0x91, 0x56, 0xe0,
+               0x1c, 0xaa, 0x6d, 0xdb, 0xfe, 0x48, 0x8f, 0x39,
+               0x6a, 0xdc, 0x1b, 0xad, 0x88, 0x3e, 0xf9, 0x4f,
+               0xb3, 0x05, 0xc2, 0x74, 0x51, 0xe7, 0x20, 0x96,
+               0x11, 0xa7, 0x60, 0xd6, 0xf3, 0x45, 0x82, 0x34,
+               0xc8, 0x7e, 0xb9, 0x0f, 0x2a, 0x9c, 0x5b, 0xed,
+               0xbe, 0x08, 0xcf, 0x79, 0x5c, 0xea, 0x2d, 0x9b,
+               0x67, 0xd1, 0x16, 0xa0, 0x85, 0x33, 0xf4, 0x42,
+               0x52, 0xe4, 0x23, 0x95, 0xb0, 0x06, 0xc1, 0x77,
+               0x8b, 0x3d, 0xfa, 0x4c, 0x69, 0xdf, 0x18, 0xae,
+               0xfd, 0x4b, 0x8c, 0x3a, 0x1f, 0xa9, 0x6e, 0xd8,
+               0x24, 0x92, 0x55, 0xe3, 0xc6, 0x70, 0xb7, 0x01,
+               0x97, 0x21, 0xe6, 0x50, 0x75, 0xc3, 0x04, 0xb2,
+               0x4e, 0xf8, 0x3f, 0x89, 0xac, 0x1a, 0xdd, 0x6b,
+               0x38, 0x8e, 0x49, 0xff, 0xda, 0x6c, 0xab, 0x1d,
+               0xe1, 0x57, 0x90, 0x26, 0x03, 0xb5, 0x72, 0xc4,
+               0xd4, 0x62, 0xa5, 0x13, 0x36, 0x80, 0x47, 0xf1,
+               0x0d, 0xbb, 0x7c, 0xca, 0xef, 0x59, 0x9e, 0x28,
+               0x7b, 0xcd, 0x0a, 0xbc, 0x99, 0x2f, 0xe8, 0x5e,
+               0xa2, 0x14, 0xd3, 0x65, 0x40, 0xf6, 0x31, 0x87,
+       },
+       {
+               0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22,
+               0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3,
+               0xbf, 0x08, 0xcc, 0x7b, 0x59, 0xee, 0x2a, 0x9d,
+               0x6e, 0xd9, 0x1d, 0xaa, 0x88, 0x3f, 0xfb, 0x4c,
+               0x63, 0xd4, 0x10, 0xa7, 0x85, 0x32, 0xf6, 0x41,
+               0xb2, 0x05, 0xc1, 0x76, 0x54, 0xe3, 0x27, 0x90,
+               0xdc, 0x6b, 0xaf, 0x18, 0x3a, 0x8d, 0x49, 0xfe,
+               0x0d, 0xba, 0x7e, 0xc9, 0xeb, 0x5c, 0x98, 0x2f,
+               0xc6, 0x71, 0xb5, 0x02, 0x20, 0x97, 0x53, 0xe4,
+               0x17, 0xa0, 0x64, 0xd3, 0xf1, 0x46, 0x82, 0x35,
+               0x79, 0xce, 0x0a, 0xbd, 0x9f, 0x28, 0xec, 0x5b,
+               0xa8, 0x1f, 0xdb, 0x6c, 0x4e, 0xf9, 0x3d, 0x8a,
+               0xa5, 0x12, 0xd6, 0x61, 0x43, 0xf4, 0x30, 0x87,
+               0x74, 0xc3, 0x07, 0xb0, 0x92, 0x25, 0xe1, 0x56,
+               0x1a, 0xad, 0x69, 0xde, 0xfc, 0x4b, 0x8f, 0x38,
+               0xcb, 0x7c, 0xb8, 0x0f, 0x2d, 0x9a, 0x5e, 0xe9,
+               0x91, 0x26, 0xe2, 0x55, 0x77, 0xc0, 0x04, 0xb3,
+               0x40, 0xf7, 0x33, 0x84, 0xa6, 0x11, 0xd5, 0x62,
+               0x2e, 0x99, 0x5d, 0xea, 0xc8, 0x7f, 0xbb, 0x0c,
+               0xff, 0x48, 0x8c, 0x3b, 0x19, 0xae, 0x6a, 0xdd,
+               0xf2, 0x45, 0x81, 0x36, 0x14, 0xa3, 0x67, 0xd0,
+               0x23, 0x94, 0x50, 0xe7, 0xc5, 0x72, 0xb6, 0x01,
+               0x4d, 0xfa, 0x3e, 0x89, 0xab, 0x1c, 0xd8, 0x6f,
+               0x9c, 0x2b, 0xef, 0x58, 0x7a, 0xcd, 0x09, 0xbe,
+               0x57, 0xe0, 0x24, 0x93, 0xb1, 0x06, 0xc2, 0x75,
+               0x86, 0x31, 0xf5, 0x42, 0x60, 0xd7, 0x13, 0xa4,
+               0xe8, 0x5f, 0x9b, 0x2c, 0x0e, 0xb9, 0x7d, 0xca,
+               0x39, 0x8e, 0x4a, 0xfd, 0xdf, 0x68, 0xac, 0x1b,
+               0x34, 0x83, 0x47, 0xf0, 0xd2, 0x65, 0xa1, 0x16,
+               0xe5, 0x52, 0x96, 0x21, 0x03, 0xb4, 0x70, 0xc7,
+               0x8b, 0x3c, 0xf8, 0x4f, 0x6d, 0xda, 0x1e, 0xa9,
+               0x5a, 0xed, 0x29, 0x9e, 0xbc, 0x0b, 0xcf, 0x78,
+       },
+       {
+               0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f,
+               0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6,
+               0x4f, 0xf7, 0x22, 0x9a, 0x95, 0x2d, 0xf8, 0x40,
+               0xe6, 0x5e, 0x8b, 0x33, 0x3c, 0x84, 0x51, 0xe9,
+               0x9e, 0x26, 0xf3, 0x4b, 0x44, 0xfc, 0x29, 0x91,
+               0x37, 0x8f, 0x5a, 0xe2, 0xed, 0x55, 0x80, 0x38,
+               0xd1, 0x69, 0xbc, 0x04, 0x0b, 0xb3, 0x66, 0xde,
+               0x78, 0xc0, 0x15, 0xad, 0xa2, 0x1a, 0xcf, 0x77,
+               0x21, 0x99, 0x4c, 0xf4, 0xfb, 0x43, 0x96, 0x2e,
+               0x88, 0x30, 0xe5, 0x5d, 0x52, 0xea, 0x3f, 0x87,
+               0x6e, 0xd6, 0x03, 0xbb, 0xb4, 0x0c, 0xd9, 0x61,
+               0xc7, 0x7f, 0xaa, 0x12, 0x1d, 0xa5, 0x70, 0xc8,
+               0xbf, 0x07, 0xd2, 0x6a, 0x65, 0xdd, 0x08, 0xb0,
+               0x16, 0xae, 0x7b, 0xc3, 0xcc, 0x74, 0xa1, 0x19,
+               0xf0, 0x48, 0x9d, 0x25, 0x2a, 0x92, 0x47, 0xff,
+               0x59, 0xe1, 0x34, 0x8c, 0x83, 0x3b, 0xee, 0x56,
+               0x42, 0xfa, 0x2f, 0x97, 0x98, 0x20, 0xf5, 0x4d,
+               0xeb, 0x53, 0x86, 0x3e, 0x31, 0x89, 0x5c, 0xe4,
+               0x0d, 0xb5, 0x60, 0xd8, 0xd7, 0x6f, 0xba, 0x02,
+               0xa4, 0x1c, 0xc9, 0x71, 0x7e, 0xc6, 0x13, 0xab,
+               0xdc, 0x64, 0xb1, 0x09, 0x06, 0xbe, 0x6b, 0xd3,
+               0x75, 0xcd, 0x18, 0xa0, 0xaf, 0x17, 0xc2, 0x7a,
+               0x93, 0x2b, 0xfe, 0x46, 0x49, 0xf1, 0x24, 0x9c,
+               0x3a, 0x82, 0x57, 0xef, 0xe0, 0x58, 0x8d, 0x35,
+               0x63, 0xdb, 0x0e, 0xb6, 0xb9, 0x01, 0xd4, 0x6c,
+               0xca, 0x72, 0xa7, 0x1f, 0x10, 0xa8, 0x7d, 0xc5,
+               0x2c, 0x94, 0x41, 0xf9, 0xf6, 0x4e, 0x9b, 0x23,
+               0x85, 0x3d, 0xe8, 0x50, 0x5f, 0xe7, 0x32, 0x8a,
+               0xfd, 0x45, 0x90, 0x28, 0x27, 0x9f, 0x4a, 0xf2,
+               0x54, 0xec, 0x39, 0x81, 0x8e, 0x36, 0xe3, 0x5b,
+               0xb2, 0x0a, 0xdf, 0x67, 0x68, 0xd0, 0x05, 0xbd,
+               0x1b, 0xa3, 0x76, 0xce, 0xc1, 0x79, 0xac, 0x14,
+       },
+       {
+               0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08,
+               0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9,
+               0x5f, 0xe6, 0x30, 0x89, 0x81, 0x38, 0xee, 0x57,
+               0xfe, 0x47, 0x91, 0x28, 0x20, 0x99, 0x4f, 0xf6,
+               0xbe, 0x07, 0xd1, 0x68, 0x60, 0xd9, 0x0f, 0xb6,
+               0x1f, 0xa6, 0x70, 0xc9, 0xc1, 0x78, 0xae, 0x17,
+               0xe1, 0x58, 0x8e, 0x37, 0x3f, 0x86, 0x50, 0xe9,
+               0x40, 0xf9, 0x2f, 0x96, 0x9e, 0x27, 0xf1, 0x48,
+               0x61, 0xd8, 0x0e, 0xb7, 0xbf, 0x06, 0xd0, 0x69,
+               0xc0, 0x79, 0xaf, 0x16, 0x1e, 0xa7, 0x71, 0xc8,
+               0x3e, 0x87, 0x51, 0xe8, 0xe0, 0x59, 0x8f, 0x36,
+               0x9f, 0x26, 0xf0, 0x49, 0x41, 0xf8, 0x2e, 0x97,
+               0xdf, 0x66, 0xb0, 0x09, 0x01, 0xb8, 0x6e, 0xd7,
+               0x7e, 0xc7, 0x11, 0xa8, 0xa0, 0x19, 0xcf, 0x76,
+               0x80, 0x39, 0xef, 0x56, 0x5e, 0xe7, 0x31, 0x88,
+               0x21, 0x98, 0x4e, 0xf7, 0xff, 0x46, 0x90, 0x29,
+               0xc2, 0x7b, 0xad, 0x14, 0x1c, 0xa5, 0x73, 0xca,
+               0x63, 0xda, 0x0c, 0xb5, 0xbd, 0x04, 0xd2, 0x6b,
+               0x9d, 0x24, 0xf2, 0x4b, 0x43, 0xfa, 0x2c, 0x95,
+               0x3c, 0x85, 0x53, 0xea, 0xe2, 0x5b, 0x8d, 0x34,
+               0x7c, 0xc5, 0x13, 0xaa, 0xa2, 0x1b, 0xcd, 0x74,
+               0xdd, 0x64, 0xb2, 0x0b, 0x03, 0xba, 0x6c, 0xd5,
+               0x23, 0x9a, 0x4c, 0xf5, 0xfd, 0x44, 0x92, 0x2b,
+               0x82, 0x3b, 0xed, 0x54, 0x5c, 0xe5, 0x33, 0x8a,
+               0xa3, 0x1a, 0xcc, 0x75, 0x7d, 0xc4, 0x12, 0xab,
+               0x02, 0xbb, 0x6d, 0xd4, 0xdc, 0x65, 0xb3, 0x0a,
+               0xfc, 0x45, 0x93, 0x2a, 0x22, 0x9b, 0x4d, 0xf4,
+               0x5d, 0xe4, 0x32, 0x8b, 0x83, 0x3a, 0xec, 0x55,
+               0x1d, 0xa4, 0x72, 0xcb, 0xc3, 0x7a, 0xac, 0x15,
+               0xbc, 0x05, 0xd3, 0x6a, 0x62, 0xdb, 0x0d, 0xb4,
+               0x42, 0xfb, 0x2d, 0x94, 0x9c, 0x25, 0xf3, 0x4a,
+               0xe3, 0x5a, 0x8c, 0x35, 0x3d, 0x84, 0x52, 0xeb,
+       },
+       {
+               0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01,
+               0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8,
+               0x6f, 0xd5, 0x06, 0xbc, 0xbd, 0x07, 0xd4, 0x6e,
+               0xd6, 0x6c, 0xbf, 0x05, 0x04, 0xbe, 0x6d, 0xd7,
+               0xde, 0x64, 0xb7, 0x0d, 0x0c, 0xb6, 0x65, 0xdf,
+               0x67, 0xdd, 0x0e, 0xb4, 0xb5, 0x0f, 0xdc, 0x66,
+               0xb1, 0x0b, 0xd8, 0x62, 0x63, 0xd9, 0x0a, 0xb0,
+               0x08, 0xb2, 0x61, 0xdb, 0xda, 0x60, 0xb3, 0x09,
+               0xa1, 0x1b, 0xc8, 0x72, 0x73, 0xc9, 0x1a, 0xa0,
+               0x18, 0xa2, 0x71, 0xcb, 0xca, 0x70, 0xa3, 0x19,
+               0xce, 0x74, 0xa7, 0x1d, 0x1c, 0xa6, 0x75, 0xcf,
+               0x77, 0xcd, 0x1e, 0xa4, 0xa5, 0x1f, 0xcc, 0x76,
+               0x7f, 0xc5, 0x16, 0xac, 0xad, 0x17, 0xc4, 0x7e,
+               0xc6, 0x7c, 0xaf, 0x15, 0x14, 0xae, 0x7d, 0xc7,
+               0x10, 0xaa, 0x79, 0xc3, 0xc2, 0x78, 0xab, 0x11,
+               0xa9, 0x13, 0xc0, 0x7a, 0x7b, 0xc1, 0x12, 0xa8,
+               0x5f, 0xe5, 0x36, 0x8c, 0x8d, 0x37, 0xe4, 0x5e,
+               0xe6, 0x5c, 0x8f, 0x35, 0x34, 0x8e, 0x5d, 0xe7,
+               0x30, 0x8a, 0x59, 0xe3, 0xe2, 0x58, 0x8b, 0x31,
+               0x89, 0x33, 0xe0, 0x5a, 0x5b, 0xe1, 0x32, 0x88,
+               0x81, 0x3b, 0xe8, 0x52, 0x53, 0xe9, 0x3a, 0x80,
+               0x38, 0x82, 0x51, 0xeb, 0xea, 0x50, 0x83, 0x39,
+               0xee, 0x54, 0x87, 0x3d, 0x3c, 0x86, 0x55, 0xef,
+               0x57, 0xed, 0x3e, 0x84, 0x85, 0x3f, 0xec, 0x56,
+               0xfe, 0x44, 0x97, 0x2d, 0x2c, 0x96, 0x45, 0xff,
+               0x47, 0xfd, 0x2e, 0x94, 0x95, 0x2f, 0xfc, 0x46,
+               0x91, 0x2b, 0xf8, 0x42, 0x43, 0xf9, 0x2a, 0x90,
+               0x28, 0x92, 0x41, 0xfb, 0xfa, 0x40, 0x93, 0x29,
+               0x20, 0x9a, 0x49, 0xf3, 0xf2, 0x48, 0x9b, 0x21,
+               0x99, 0x23, 0xf0, 0x4a, 0x4b, 0xf1, 0x22, 0x98,
+               0x4f, 0xf5, 0x26, 0x9c, 0x9d, 0x27, 0xf4, 0x4e,
+               0xf6, 0x4c, 0x9f, 0x25, 0x24, 0x9e, 0x4d, 0xf7,
+       },
+       {
+               0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+               0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7,
+               0x7f, 0xc4, 0x14, 0xaf, 0xa9, 0x12, 0xc2, 0x79,
+               0xce, 0x75, 0xa5, 0x1e, 0x18, 0xa3, 0x73, 0xc8,
+               0xfe, 0x45, 0x95, 0x2e, 0x28, 0x93, 0x43, 0xf8,
+               0x4f, 0xf4, 0x24, 0x9f, 0x99, 0x22, 0xf2, 0x49,
+               0x81, 0x3a, 0xea, 0x51, 0x57, 0xec, 0x3c, 0x87,
+               0x30, 0x8b, 0x5b, 0xe0, 0xe6, 0x5d, 0x8d, 0x36,
+               0xe1, 0x5a, 0x8a, 0x31, 0x37, 0x8c, 0x5c, 0xe7,
+               0x50, 0xeb, 0x3b, 0x80, 0x86, 0x3d, 0xed, 0x56,
+               0x9e, 0x25, 0xf5, 0x4e, 0x48, 0xf3, 0x23, 0x98,
+               0x2f, 0x94, 0x44, 0xff, 0xf9, 0x42, 0x92, 0x29,
+               0x1f, 0xa4, 0x74, 0xcf, 0xc9, 0x72, 0xa2, 0x19,
+               0xae, 0x15, 0xc5, 0x7e, 0x78, 0xc3, 0x13, 0xa8,
+               0x60, 0xdb, 0x0b, 0xb0, 0xb6, 0x0d, 0xdd, 0x66,
+               0xd1, 0x6a, 0xba, 0x01, 0x07, 0xbc, 0x6c, 0xd7,
+               0xdf, 0x64, 0xb4, 0x0f, 0x09, 0xb2, 0x62, 0xd9,
+               0x6e, 0xd5, 0x05, 0xbe, 0xb8, 0x03, 0xd3, 0x68,
+               0xa0, 0x1b, 0xcb, 0x70, 0x76, 0xcd, 0x1d, 0xa6,
+               0x11, 0xaa, 0x7a, 0xc1, 0xc7, 0x7c, 0xac, 0x17,
+               0x21, 0x9a, 0x4a, 0xf1, 0xf7, 0x4c, 0x9c, 0x27,
+               0x90, 0x2b, 0xfb, 0x40, 0x46, 0xfd, 0x2d, 0x96,
+               0x5e, 0xe5, 0x35, 0x8e, 0x88, 0x33, 0xe3, 0x58,
+               0xef, 0x54, 0x84, 0x3f, 0x39, 0x82, 0x52, 0xe9,
+               0x3e, 0x85, 0x55, 0xee, 0xe8, 0x53, 0x83, 0x38,
+               0x8f, 0x34, 0xe4, 0x5f, 0x59, 0xe2, 0x32, 0x89,
+               0x41, 0xfa, 0x2a, 0x91, 0x97, 0x2c, 0xfc, 0x47,
+               0xf0, 0x4b, 0x9b, 0x20, 0x26, 0x9d, 0x4d, 0xf6,
+               0xc0, 0x7b, 0xab, 0x10, 0x16, 0xad, 0x7d, 0xc6,
+               0x71, 0xca, 0x1a, 0xa1, 0xa7, 0x1c, 0xcc, 0x77,
+               0xbf, 0x04, 0xd4, 0x6f, 0x69, 0xd2, 0x02, 0xb9,
+               0x0e, 0xb5, 0x65, 0xde, 0xd8, 0x63, 0xb3, 0x08,
+       },
+       {
+               0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13,
+               0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a,
+               0x0f, 0xb3, 0x6a, 0xd6, 0xc5, 0x79, 0xa0, 0x1c,
+               0x86, 0x3a, 0xe3, 0x5f, 0x4c, 0xf0, 0x29, 0x95,
+               0x1e, 0xa2, 0x7b, 0xc7, 0xd4, 0x68, 0xb1, 0x0d,
+               0x97, 0x2b, 0xf2, 0x4e, 0x5d, 0xe1, 0x38, 0x84,
+               0x11, 0xad, 0x74, 0xc8, 0xdb, 0x67, 0xbe, 0x02,
+               0x98, 0x24, 0xfd, 0x41, 0x52, 0xee, 0x37, 0x8b,
+               0x3c, 0x80, 0x59, 0xe5, 0xf6, 0x4a, 0x93, 0x2f,
+               0xb5, 0x09, 0xd0, 0x6c, 0x7f, 0xc3, 0x1a, 0xa6,
+               0x33, 0x8f, 0x56, 0xea, 0xf9, 0x45, 0x9c, 0x20,
+               0xba, 0x06, 0xdf, 0x63, 0x70, 0xcc, 0x15, 0xa9,
+               0x22, 0x9e, 0x47, 0xfb, 0xe8, 0x54, 0x8d, 0x31,
+               0xab, 0x17, 0xce, 0x72, 0x61, 0xdd, 0x04, 0xb8,
+               0x2d, 0x91, 0x48, 0xf4, 0xe7, 0x5b, 0x82, 0x3e,
+               0xa4, 0x18, 0xc1, 0x7d, 0x6e, 0xd2, 0x0b, 0xb7,
+               0x78, 0xc4, 0x1d, 0xa1, 0xb2, 0x0e, 0xd7, 0x6b,
+               0xf1, 0x4d, 0x94, 0x28, 0x3b, 0x87, 0x5e, 0xe2,
+               0x77, 0xcb, 0x12, 0xae, 0xbd, 0x01, 0xd8, 0x64,
+               0xfe, 0x42, 0x9b, 0x27, 0x34, 0x88, 0x51, 0xed,
+               0x66, 0xda, 0x03, 0xbf, 0xac, 0x10, 0xc9, 0x75,
+               0xef, 0x53, 0x8a, 0x36, 0x25, 0x99, 0x40, 0xfc,
+               0x69, 0xd5, 0x0c, 0xb0, 0xa3, 0x1f, 0xc6, 0x7a,
+               0xe0, 0x5c, 0x85, 0x39, 0x2a, 0x96, 0x4f, 0xf3,
+               0x44, 0xf8, 0x21, 0x9d, 0x8e, 0x32, 0xeb, 0x57,
+               0xcd, 0x71, 0xa8, 0x14, 0x07, 0xbb, 0x62, 0xde,
+               0x4b, 0xf7, 0x2e, 0x92, 0x81, 0x3d, 0xe4, 0x58,
+               0xc2, 0x7e, 0xa7, 0x1b, 0x08, 0xb4, 0x6d, 0xd1,
+               0x5a, 0xe6, 0x3f, 0x83, 0x90, 0x2c, 0xf5, 0x49,
+               0xd3, 0x6f, 0xb6, 0x0a, 0x19, 0xa5, 0x7c, 0xc0,
+               0x55, 0xe9, 0x30, 0x8c, 0x9f, 0x23, 0xfa, 0x46,
+               0xdc, 0x60, 0xb9, 0x05, 0x16, 0xaa, 0x73, 0xcf,
+       },
+       {
+               0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14,
+               0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95,
+               0x1f, 0xa2, 0x78, 0xc5, 0xd1, 0x6c, 0xb6, 0x0b,
+               0x9e, 0x23, 0xf9, 0x44, 0x50, 0xed, 0x37, 0x8a,
+               0x3e, 0x83, 0x59, 0xe4, 0xf0, 0x4d, 0x97, 0x2a,
+               0xbf, 0x02, 0xd8, 0x65, 0x71, 0xcc, 0x16, 0xab,
+               0x21, 0x9c, 0x46, 0xfb, 0xef, 0x52, 0x88, 0x35,
+               0xa0, 0x1d, 0xc7, 0x7a, 0x6e, 0xd3, 0x09, 0xb4,
+               0x7c, 0xc1, 0x1b, 0xa6, 0xb2, 0x0f, 0xd5, 0x68,
+               0xfd, 0x40, 0x9a, 0x27, 0x33, 0x8e, 0x54, 0xe9,
+               0x63, 0xde, 0x04, 0xb9, 0xad, 0x10, 0xca, 0x77,
+               0xe2, 0x5f, 0x85, 0x38, 0x2c, 0x91, 0x4b, 0xf6,
+               0x42, 0xff, 0x25, 0x98, 0x8c, 0x31, 0xeb, 0x56,
+               0xc3, 0x7e, 0xa4, 0x19, 0x0d, 0xb0, 0x6a, 0xd7,
+               0x5d, 0xe0, 0x3a, 0x87, 0x93, 0x2e, 0xf4, 0x49,
+               0xdc, 0x61, 0xbb, 0x06, 0x12, 0xaf, 0x75, 0xc8,
+               0xf8, 0x45, 0x9f, 0x22, 0x36, 0x8b, 0x51, 0xec,
+               0x79, 0xc4, 0x1e, 0xa3, 0xb7, 0x0a, 0xd0, 0x6d,
+               0xe7, 0x5a, 0x80, 0x3d, 0x29, 0x94, 0x4e, 0xf3,
+               0x66, 0xdb, 0x01, 0xbc, 0xa8, 0x15, 0xcf, 0x72,
+               0xc6, 0x7b, 0xa1, 0x1c, 0x08, 0xb5, 0x6f, 0xd2,
+               0x47, 0xfa, 0x20, 0x9d, 0x89, 0x34, 0xee, 0x53,
+               0xd9, 0x64, 0xbe, 0x03, 0x17, 0xaa, 0x70, 0xcd,
+               0x58, 0xe5, 0x3f, 0x82, 0x96, 0x2b, 0xf1, 0x4c,
+               0x84, 0x39, 0xe3, 0x5e, 0x4a, 0xf7, 0x2d, 0x90,
+               0x05, 0xb8, 0x62, 0xdf, 0xcb, 0x76, 0xac, 0x11,
+               0x9b, 0x26, 0xfc, 0x41, 0x55, 0xe8, 0x32, 0x8f,
+               0x1a, 0xa7, 0x7d, 0xc0, 0xd4, 0x69, 0xb3, 0x0e,
+               0xba, 0x07, 0xdd, 0x60, 0x74, 0xc9, 0x13, 0xae,
+               0x3b, 0x86, 0x5c, 0xe1, 0xf5, 0x48, 0x92, 0x2f,
+               0xa5, 0x18, 0xc2, 0x7f, 0x6b, 0xd6, 0x0c, 0xb1,
+               0x24, 0x99, 0x43, 0xfe, 0xea, 0x57, 0x8d, 0x30,
+       },
+       {
+               0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d,
+               0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84,
+               0x2f, 0x91, 0x4e, 0xf0, 0xed, 0x53, 0x8c, 0x32,
+               0xb6, 0x08, 0xd7, 0x69, 0x74, 0xca, 0x15, 0xab,
+               0x5e, 0xe0, 0x3f, 0x81, 0x9c, 0x22, 0xfd, 0x43,
+               0xc7, 0x79, 0xa6, 0x18, 0x05, 0xbb, 0x64, 0xda,
+               0x71, 0xcf, 0x10, 0xae, 0xb3, 0x0d, 0xd2, 0x6c,
+               0xe8, 0x56, 0x89, 0x37, 0x2a, 0x94, 0x4b, 0xf5,
+               0xbc, 0x02, 0xdd, 0x63, 0x7e, 0xc0, 0x1f, 0xa1,
+               0x25, 0x9b, 0x44, 0xfa, 0xe7, 0x59, 0x86, 0x38,
+               0x93, 0x2d, 0xf2, 0x4c, 0x51, 0xef, 0x30, 0x8e,
+               0x0a, 0xb4, 0x6b, 0xd5, 0xc8, 0x76, 0xa9, 0x17,
+               0xe2, 0x5c, 0x83, 0x3d, 0x20, 0x9e, 0x41, 0xff,
+               0x7b, 0xc5, 0x1a, 0xa4, 0xb9, 0x07, 0xd8, 0x66,
+               0xcd, 0x73, 0xac, 0x12, 0x0f, 0xb1, 0x6e, 0xd0,
+               0x54, 0xea, 0x35, 0x8b, 0x96, 0x28, 0xf7, 0x49,
+               0x65, 0xdb, 0x04, 0xba, 0xa7, 0x19, 0xc6, 0x78,
+               0xfc, 0x42, 0x9d, 0x23, 0x3e, 0x80, 0x5f, 0xe1,
+               0x4a, 0xf4, 0x2b, 0x95, 0x88, 0x36, 0xe9, 0x57,
+               0xd3, 0x6d, 0xb2, 0x0c, 0x11, 0xaf, 0x70, 0xce,
+               0x3b, 0x85, 0x5a, 0xe4, 0xf9, 0x47, 0x98, 0x26,
+               0xa2, 0x1c, 0xc3, 0x7d, 0x60, 0xde, 0x01, 0xbf,
+               0x14, 0xaa, 0x75, 0xcb, 0xd6, 0x68, 0xb7, 0x09,
+               0x8d, 0x33, 0xec, 0x52, 0x4f, 0xf1, 0x2e, 0x90,
+               0xd9, 0x67, 0xb8, 0x06, 0x1b, 0xa5, 0x7a, 0xc4,
+               0x40, 0xfe, 0x21, 0x9f, 0x82, 0x3c, 0xe3, 0x5d,
+               0xf6, 0x48, 0x97, 0x29, 0x34, 0x8a, 0x55, 0xeb,
+               0x6f, 0xd1, 0x0e, 0xb0, 0xad, 0x13, 0xcc, 0x72,
+               0x87, 0x39, 0xe6, 0x58, 0x45, 0xfb, 0x24, 0x9a,
+               0x1e, 0xa0, 0x7f, 0xc1, 0xdc, 0x62, 0xbd, 0x03,
+               0xa8, 0x16, 0xc9, 0x77, 0x6a, 0xd4, 0x0b, 0xb5,
+               0x31, 0x8f, 0x50, 0xee, 0xf3, 0x4d, 0x92, 0x2c,
+       },
+       {
+               0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a,
+               0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b,
+               0x3f, 0x80, 0x5c, 0xe3, 0xf9, 0x46, 0x9a, 0x25,
+               0xae, 0x11, 0xcd, 0x72, 0x68, 0xd7, 0x0b, 0xb4,
+               0x7e, 0xc1, 0x1d, 0xa2, 0xb8, 0x07, 0xdb, 0x64,
+               0xef, 0x50, 0x8c, 0x33, 0x29, 0x96, 0x4a, 0xf5,
+               0x41, 0xfe, 0x22, 0x9d, 0x87, 0x38, 0xe4, 0x5b,
+               0xd0, 0x6f, 0xb3, 0x0c, 0x16, 0xa9, 0x75, 0xca,
+               0xfc, 0x43, 0x9f, 0x20, 0x3a, 0x85, 0x59, 0xe6,
+               0x6d, 0xd2, 0x0e, 0xb1, 0xab, 0x14, 0xc8, 0x77,
+               0xc3, 0x7c, 0xa0, 0x1f, 0x05, 0xba, 0x66, 0xd9,
+               0x52, 0xed, 0x31, 0x8e, 0x94, 0x2b, 0xf7, 0x48,
+               0x82, 0x3d, 0xe1, 0x5e, 0x44, 0xfb, 0x27, 0x98,
+               0x13, 0xac, 0x70, 0xcf, 0xd5, 0x6a, 0xb6, 0x09,
+               0xbd, 0x02, 0xde, 0x61, 0x7b, 0xc4, 0x18, 0xa7,
+               0x2c, 0x93, 0x4f, 0xf0, 0xea, 0x55, 0x89, 0x36,
+               0xe5, 0x5a, 0x86, 0x39, 0x23, 0x9c, 0x40, 0xff,
+               0x74, 0xcb, 0x17, 0xa8, 0xb2, 0x0d, 0xd1, 0x6e,
+               0xda, 0x65, 0xb9, 0x06, 0x1c, 0xa3, 0x7f, 0xc0,
+               0x4b, 0xf4, 0x28, 0x97, 0x8d, 0x32, 0xee, 0x51,
+               0x9b, 0x24, 0xf8, 0x47, 0x5d, 0xe2, 0x3e, 0x81,
+               0x0a, 0xb5, 0x69, 0xd6, 0xcc, 0x73, 0xaf, 0x10,
+               0xa4, 0x1b, 0xc7, 0x78, 0x62, 0xdd, 0x01, 0xbe,
+               0x35, 0x8a, 0x56, 0xe9, 0xf3, 0x4c, 0x90, 0x2f,
+               0x19, 0xa6, 0x7a, 0xc5, 0xdf, 0x60, 0xbc, 0x03,
+               0x88, 0x37, 0xeb, 0x54, 0x4e, 0xf1, 0x2d, 0x92,
+               0x26, 0x99, 0x45, 0xfa, 0xe0, 0x5f, 0x83, 0x3c,
+               0xb7, 0x08, 0xd4, 0x6b, 0x71, 0xce, 0x12, 0xad,
+               0x67, 0xd8, 0x04, 0xbb, 0xa1, 0x1e, 0xc2, 0x7d,
+               0xf6, 0x49, 0x95, 0x2a, 0x30, 0x8f, 0x53, 0xec,
+               0x58, 0xe7, 0x3b, 0x84, 0x9e, 0x21, 0xfd, 0x42,
+               0xc9, 0x76, 0xaa, 0x15, 0x0f, 0xb0, 0x6c, 0xd3,
+       },
+       {
+               0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a,
+               0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34,
+               0x9c, 0x5c, 0x01, 0xc1, 0xbb, 0x7b, 0x26, 0xe6,
+               0xd2, 0x12, 0x4f, 0x8f, 0xf5, 0x35, 0x68, 0xa8,
+               0x25, 0xe5, 0xb8, 0x78, 0x02, 0xc2, 0x9f, 0x5f,
+               0x6b, 0xab, 0xf6, 0x36, 0x4c, 0x8c, 0xd1, 0x11,
+               0xb9, 0x79, 0x24, 0xe4, 0x9e, 0x5e, 0x03, 0xc3,
+               0xf7, 0x37, 0x6a, 0xaa, 0xd0, 0x10, 0x4d, 0x8d,
+               0x4a, 0x8a, 0xd7, 0x17, 0x6d, 0xad, 0xf0, 0x30,
+               0x04, 0xc4, 0x99, 0x59, 0x23, 0xe3, 0xbe, 0x7e,
+               0xd6, 0x16, 0x4b, 0x8b, 0xf1, 0x31, 0x6c, 0xac,
+               0x98, 0x58, 0x05, 0xc5, 0xbf, 0x7f, 0x22, 0xe2,
+               0x6f, 0xaf, 0xf2, 0x32, 0x48, 0x88, 0xd5, 0x15,
+               0x21, 0xe1, 0xbc, 0x7c, 0x06, 0xc6, 0x9b, 0x5b,
+               0xf3, 0x33, 0x6e, 0xae, 0xd4, 0x14, 0x49, 0x89,
+               0xbd, 0x7d, 0x20, 0xe0, 0x9a, 0x5a, 0x07, 0xc7,
+               0x94, 0x54, 0x09, 0xc9, 0xb3, 0x73, 0x2e, 0xee,
+               0xda, 0x1a, 0x47, 0x87, 0xfd, 0x3d, 0x60, 0xa0,
+               0x08, 0xc8, 0x95, 0x55, 0x2f, 0xef, 0xb2, 0x72,
+               0x46, 0x86, 0xdb, 0x1b, 0x61, 0xa1, 0xfc, 0x3c,
+               0xb1, 0x71, 0x2c, 0xec, 0x96, 0x56, 0x0b, 0xcb,
+               0xff, 0x3f, 0x62, 0xa2, 0xd8, 0x18, 0x45, 0x85,
+               0x2d, 0xed, 0xb0, 0x70, 0x0a, 0xca, 0x97, 0x57,
+               0x63, 0xa3, 0xfe, 0x3e, 0x44, 0x84, 0xd9, 0x19,
+               0xde, 0x1e, 0x43, 0x83, 0xf9, 0x39, 0x64, 0xa4,
+               0x90, 0x50, 0x0d, 0xcd, 0xb7, 0x77, 0x2a, 0xea,
+               0x42, 0x82, 0xdf, 0x1f, 0x65, 0xa5, 0xf8, 0x38,
+               0x0c, 0xcc, 0x91, 0x51, 0x2b, 0xeb, 0xb6, 0x76,
+               0xfb, 0x3b, 0x66, 0xa6, 0xdc, 0x1c, 0x41, 0x81,
+               0xb5, 0x75, 0x28, 0xe8, 0x92, 0x52, 0x0f, 0xcf,
+               0x67, 0xa7, 0xfa, 0x3a, 0x40, 0x80, 0xdd, 0x1d,
+               0x29, 0xe9, 0xb4, 0x74, 0x0e, 0xce, 0x93, 0x53,
+       },
+       {
+               0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d,
+               0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b,
+               0x8c, 0x4d, 0x13, 0xd2, 0xaf, 0x6e, 0x30, 0xf1,
+               0xca, 0x0b, 0x55, 0x94, 0xe9, 0x28, 0x76, 0xb7,
+               0x05, 0xc4, 0x9a, 0x5b, 0x26, 0xe7, 0xb9, 0x78,
+               0x43, 0x82, 0xdc, 0x1d, 0x60, 0xa1, 0xff, 0x3e,
+               0x89, 0x48, 0x16, 0xd7, 0xaa, 0x6b, 0x35, 0xf4,
+               0xcf, 0x0e, 0x50, 0x91, 0xec, 0x2d, 0x73, 0xb2,
+               0x0a, 0xcb, 0x95, 0x54, 0x29, 0xe8, 0xb6, 0x77,
+               0x4c, 0x8d, 0xd3, 0x12, 0x6f, 0xae, 0xf0, 0x31,
+               0x86, 0x47, 0x19, 0xd8, 0xa5, 0x64, 0x3a, 0xfb,
+               0xc0, 0x01, 0x5f, 0x9e, 0xe3, 0x22, 0x7c, 0xbd,
+               0x0f, 0xce, 0x90, 0x51, 0x2c, 0xed, 0xb3, 0x72,
+               0x49, 0x88, 0xd6, 0x17, 0x6a, 0xab, 0xf5, 0x34,
+               0x83, 0x42, 0x1c, 0xdd, 0xa0, 0x61, 0x3f, 0xfe,
+               0xc5, 0x04, 0x5a, 0x9b, 0xe6, 0x27, 0x79, 0xb8,
+               0x14, 0xd5, 0x8b, 0x4a, 0x37, 0xf6, 0xa8, 0x69,
+               0x52, 0x93, 0xcd, 0x0c, 0x71, 0xb0, 0xee, 0x2f,
+               0x98, 0x59, 0x07, 0xc6, 0xbb, 0x7a, 0x24, 0xe5,
+               0xde, 0x1f, 0x41, 0x80, 0xfd, 0x3c, 0x62, 0xa3,
+               0x11, 0xd0, 0x8e, 0x4f, 0x32, 0xf3, 0xad, 0x6c,
+               0x57, 0x96, 0xc8, 0x09, 0x74, 0xb5, 0xeb, 0x2a,
+               0x9d, 0x5c, 0x02, 0xc3, 0xbe, 0x7f, 0x21, 0xe0,
+               0xdb, 0x1a, 0x44, 0x85, 0xf8, 0x39, 0x67, 0xa6,
+               0x1e, 0xdf, 0x81, 0x40, 0x3d, 0xfc, 0xa2, 0x63,
+               0x58, 0x99, 0xc7, 0x06, 0x7b, 0xba, 0xe4, 0x25,
+               0x92, 0x53, 0x0d, 0xcc, 0xb1, 0x70, 0x2e, 0xef,
+               0xd4, 0x15, 0x4b, 0x8a, 0xf7, 0x36, 0x68, 0xa9,
+               0x1b, 0xda, 0x84, 0x45, 0x38, 0xf9, 0xa7, 0x66,
+               0x5d, 0x9c, 0xc2, 0x03, 0x7e, 0xbf, 0xe1, 0x20,
+               0x97, 0x56, 0x08, 0xc9, 0xb4, 0x75, 0x2b, 0xea,
+               0xd1, 0x10, 0x4e, 0x8f, 0xf2, 0x33, 0x6d, 0xac,
+       },
+       {
+               0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74,
+               0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a,
+               0xbc, 0x7e, 0x25, 0xe7, 0x93, 0x51, 0x0a, 0xc8,
+               0xe2, 0x20, 0x7b, 0xb9, 0xcd, 0x0f, 0x54, 0x96,
+               0x65, 0xa7, 0xfc, 0x3e, 0x4a, 0x88, 0xd3, 0x11,
+               0x3b, 0xf9, 0xa2, 0x60, 0x14, 0xd6, 0x8d, 0x4f,
+               0xd9, 0x1b, 0x40, 0x82, 0xf6, 0x34, 0x6f, 0xad,
+               0x87, 0x45, 0x1e, 0xdc, 0xa8, 0x6a, 0x31, 0xf3,
+               0xca, 0x08, 0x53, 0x91, 0xe5, 0x27, 0x7c, 0xbe,
+               0x94, 0x56, 0x0d, 0xcf, 0xbb, 0x79, 0x22, 0xe0,
+               0x76, 0xb4, 0xef, 0x2d, 0x59, 0x9b, 0xc0, 0x02,
+               0x28, 0xea, 0xb1, 0x73, 0x07, 0xc5, 0x9e, 0x5c,
+               0xaf, 0x6d, 0x36, 0xf4, 0x80, 0x42, 0x19, 0xdb,
+               0xf1, 0x33, 0x68, 0xaa, 0xde, 0x1c, 0x47, 0x85,
+               0x13, 0xd1, 0x8a, 0x48, 0x3c, 0xfe, 0xa5, 0x67,
+               0x4d, 0x8f, 0xd4, 0x16, 0x62, 0xa0, 0xfb, 0x39,
+               0x89, 0x4b, 0x10, 0xd2, 0xa6, 0x64, 0x3f, 0xfd,
+               0xd7, 0x15, 0x4e, 0x8c, 0xf8, 0x3a, 0x61, 0xa3,
+               0x35, 0xf7, 0xac, 0x6e, 0x1a, 0xd8, 0x83, 0x41,
+               0x6b, 0xa9, 0xf2, 0x30, 0x44, 0x86, 0xdd, 0x1f,
+               0xec, 0x2e, 0x75, 0xb7, 0xc3, 0x01, 0x5a, 0x98,
+               0xb2, 0x70, 0x2b, 0xe9, 0x9d, 0x5f, 0x04, 0xc6,
+               0x50, 0x92, 0xc9, 0x0b, 0x7f, 0xbd, 0xe6, 0x24,
+               0x0e, 0xcc, 0x97, 0x55, 0x21, 0xe3, 0xb8, 0x7a,
+               0x43, 0x81, 0xda, 0x18, 0x6c, 0xae, 0xf5, 0x37,
+               0x1d, 0xdf, 0x84, 0x46, 0x32, 0xf0, 0xab, 0x69,
+               0xff, 0x3d, 0x66, 0xa4, 0xd0, 0x12, 0x49, 0x8b,
+               0xa1, 0x63, 0x38, 0xfa, 0x8e, 0x4c, 0x17, 0xd5,
+               0x26, 0xe4, 0xbf, 0x7d, 0x09, 0xcb, 0x90, 0x52,
+               0x78, 0xba, 0xe1, 0x23, 0x57, 0x95, 0xce, 0x0c,
+               0x9a, 0x58, 0x03, 0xc1, 0xb5, 0x77, 0x2c, 0xee,
+               0xc4, 0x06, 0x5d, 0x9f, 0xeb, 0x29, 0x72, 0xb0,
+       },
+       {
+               0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73,
+               0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25,
+               0xac, 0x6f, 0x37, 0xf4, 0x87, 0x44, 0x1c, 0xdf,
+               0xfa, 0x39, 0x61, 0xa2, 0xd1, 0x12, 0x4a, 0x89,
+               0x45, 0x86, 0xde, 0x1d, 0x6e, 0xad, 0xf5, 0x36,
+               0x13, 0xd0, 0x88, 0x4b, 0x38, 0xfb, 0xa3, 0x60,
+               0xe9, 0x2a, 0x72, 0xb1, 0xc2, 0x01, 0x59, 0x9a,
+               0xbf, 0x7c, 0x24, 0xe7, 0x94, 0x57, 0x0f, 0xcc,
+               0x8a, 0x49, 0x11, 0xd2, 0xa1, 0x62, 0x3a, 0xf9,
+               0xdc, 0x1f, 0x47, 0x84, 0xf7, 0x34, 0x6c, 0xaf,
+               0x26, 0xe5, 0xbd, 0x7e, 0x0d, 0xce, 0x96, 0x55,
+               0x70, 0xb3, 0xeb, 0x28, 0x5b, 0x98, 0xc0, 0x03,
+               0xcf, 0x0c, 0x54, 0x97, 0xe4, 0x27, 0x7f, 0xbc,
+               0x99, 0x5a, 0x02, 0xc1, 0xb2, 0x71, 0x29, 0xea,
+               0x63, 0xa0, 0xf8, 0x3b, 0x48, 0x8b, 0xd3, 0x10,
+               0x35, 0xf6, 0xae, 0x6d, 0x1e, 0xdd, 0x85, 0x46,
+               0x09, 0xca, 0x92, 0x51, 0x22, 0xe1, 0xb9, 0x7a,
+               0x5f, 0x9c, 0xc4, 0x07, 0x74, 0xb7, 0xef, 0x2c,
+               0xa5, 0x66, 0x3e, 0xfd, 0x8e, 0x4d, 0x15, 0xd6,
+               0xf3, 0x30, 0x68, 0xab, 0xd8, 0x1b, 0x43, 0x80,
+               0x4c, 0x8f, 0xd7, 0x14, 0x67, 0xa4, 0xfc, 0x3f,
+               0x1a, 0xd9, 0x81, 0x42, 0x31, 0xf2, 0xaa, 0x69,
+               0xe0, 0x23, 0x7b, 0xb8, 0xcb, 0x08, 0x50, 0x93,
+               0xb6, 0x75, 0x2d, 0xee, 0x9d, 0x5e, 0x06, 0xc5,
+               0x83, 0x40, 0x18, 0xdb, 0xa8, 0x6b, 0x33, 0xf0,
+               0xd5, 0x16, 0x4e, 0x8d, 0xfe, 0x3d, 0x65, 0xa6,
+               0x2f, 0xec, 0xb4, 0x77, 0x04, 0xc7, 0x9f, 0x5c,
+               0x79, 0xba, 0xe2, 0x21, 0x52, 0x91, 0xc9, 0x0a,
+               0xc6, 0x05, 0x5d, 0x9e, 0xed, 0x2e, 0x76, 0xb5,
+               0x90, 0x53, 0x0b, 0xc8, 0xbb, 0x78, 0x20, 0xe3,
+               0x6a, 0xa9, 0xf1, 0x32, 0x41, 0x82, 0xda, 0x19,
+               0x3c, 0xff, 0xa7, 0x64, 0x17, 0xd4, 0x8c, 0x4f,
+       },
+       {
+               0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66,
+               0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08,
+               0xdc, 0x18, 0x49, 0x8d, 0xeb, 0x2f, 0x7e, 0xba,
+               0xb2, 0x76, 0x27, 0xe3, 0x85, 0x41, 0x10, 0xd4,
+               0xa5, 0x61, 0x30, 0xf4, 0x92, 0x56, 0x07, 0xc3,
+               0xcb, 0x0f, 0x5e, 0x9a, 0xfc, 0x38, 0x69, 0xad,
+               0x79, 0xbd, 0xec, 0x28, 0x4e, 0x8a, 0xdb, 0x1f,
+               0x17, 0xd3, 0x82, 0x46, 0x20, 0xe4, 0xb5, 0x71,
+               0x57, 0x93, 0xc2, 0x06, 0x60, 0xa4, 0xf5, 0x31,
+               0x39, 0xfd, 0xac, 0x68, 0x0e, 0xca, 0x9b, 0x5f,
+               0x8b, 0x4f, 0x1e, 0xda, 0xbc, 0x78, 0x29, 0xed,
+               0xe5, 0x21, 0x70, 0xb4, 0xd2, 0x16, 0x47, 0x83,
+               0xf2, 0x36, 0x67, 0xa3, 0xc5, 0x01, 0x50, 0x94,
+               0x9c, 0x58, 0x09, 0xcd, 0xab, 0x6f, 0x3e, 0xfa,
+               0x2e, 0xea, 0xbb, 0x7f, 0x19, 0xdd, 0x8c, 0x48,
+               0x40, 0x84, 0xd5, 0x11, 0x77, 0xb3, 0xe2, 0x26,
+               0xae, 0x6a, 0x3b, 0xff, 0x99, 0x5d, 0x0c, 0xc8,
+               0xc0, 0x04, 0x55, 0x91, 0xf7, 0x33, 0x62, 0xa6,
+               0x72, 0xb6, 0xe7, 0x23, 0x45, 0x81, 0xd0, 0x14,
+               0x1c, 0xd8, 0x89, 0x4d, 0x2b, 0xef, 0xbe, 0x7a,
+               0x0b, 0xcf, 0x9e, 0x5a, 0x3c, 0xf8, 0xa9, 0x6d,
+               0x65, 0xa1, 0xf0, 0x34, 0x52, 0x96, 0xc7, 0x03,
+               0xd7, 0x13, 0x42, 0x86, 0xe0, 0x24, 0x75, 0xb1,
+               0xb9, 0x7d, 0x2c, 0xe8, 0x8e, 0x4a, 0x1b, 0xdf,
+               0xf9, 0x3d, 0x6c, 0xa8, 0xce, 0x0a, 0x5b, 0x9f,
+               0x97, 0x53, 0x02, 0xc6, 0xa0, 0x64, 0x35, 0xf1,
+               0x25, 0xe1, 0xb0, 0x74, 0x12, 0xd6, 0x87, 0x43,
+               0x4b, 0x8f, 0xde, 0x1a, 0x7c, 0xb8, 0xe9, 0x2d,
+               0x5c, 0x98, 0xc9, 0x0d, 0x6b, 0xaf, 0xfe, 0x3a,
+               0x32, 0xf6, 0xa7, 0x63, 0x05, 0xc1, 0x90, 0x54,
+               0x80, 0x44, 0x15, 0xd1, 0xb7, 0x73, 0x22, 0xe6,
+               0xee, 0x2a, 0x7b, 0xbf, 0xd9, 0x1d, 0x4c, 0x88,
+       },
+       {
+               0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61,
+               0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07,
+               0xcc, 0x09, 0x5b, 0x9e, 0xff, 0x3a, 0x68, 0xad,
+               0xaa, 0x6f, 0x3d, 0xf8, 0x99, 0x5c, 0x0e, 0xcb,
+               0x85, 0x40, 0x12, 0xd7, 0xb6, 0x73, 0x21, 0xe4,
+               0xe3, 0x26, 0x74, 0xb1, 0xd0, 0x15, 0x47, 0x82,
+               0x49, 0x8c, 0xde, 0x1b, 0x7a, 0xbf, 0xed, 0x28,
+               0x2f, 0xea, 0xb8, 0x7d, 0x1c, 0xd9, 0x8b, 0x4e,
+               0x17, 0xd2, 0x80, 0x45, 0x24, 0xe1, 0xb3, 0x76,
+               0x71, 0xb4, 0xe6, 0x23, 0x42, 0x87, 0xd5, 0x10,
+               0xdb, 0x1e, 0x4c, 0x89, 0xe8, 0x2d, 0x7f, 0xba,
+               0xbd, 0x78, 0x2a, 0xef, 0x8e, 0x4b, 0x19, 0xdc,
+               0x92, 0x57, 0x05, 0xc0, 0xa1, 0x64, 0x36, 0xf3,
+               0xf4, 0x31, 0x63, 0xa6, 0xc7, 0x02, 0x50, 0x95,
+               0x5e, 0x9b, 0xc9, 0x0c, 0x6d, 0xa8, 0xfa, 0x3f,
+               0x38, 0xfd, 0xaf, 0x6a, 0x0b, 0xce, 0x9c, 0x59,
+               0x2e, 0xeb, 0xb9, 0x7c, 0x1d, 0xd8, 0x8a, 0x4f,
+               0x48, 0x8d, 0xdf, 0x1a, 0x7b, 0xbe, 0xec, 0x29,
+               0xe2, 0x27, 0x75, 0xb0, 0xd1, 0x14, 0x46, 0x83,
+               0x84, 0x41, 0x13, 0xd6, 0xb7, 0x72, 0x20, 0xe5,
+               0xab, 0x6e, 0x3c, 0xf9, 0x98, 0x5d, 0x0f, 0xca,
+               0xcd, 0x08, 0x5a, 0x9f, 0xfe, 0x3b, 0x69, 0xac,
+               0x67, 0xa2, 0xf0, 0x35, 0x54, 0x91, 0xc3, 0x06,
+               0x01, 0xc4, 0x96, 0x53, 0x32, 0xf7, 0xa5, 0x60,
+               0x39, 0xfc, 0xae, 0x6b, 0x0a, 0xcf, 0x9d, 0x58,
+               0x5f, 0x9a, 0xc8, 0x0d, 0x6c, 0xa9, 0xfb, 0x3e,
+               0xf5, 0x30, 0x62, 0xa7, 0xc6, 0x03, 0x51, 0x94,
+               0x93, 0x56, 0x04, 0xc1, 0xa0, 0x65, 0x37, 0xf2,
+               0xbc, 0x79, 0x2b, 0xee, 0x8f, 0x4a, 0x18, 0xdd,
+               0xda, 0x1f, 0x4d, 0x88, 0xe9, 0x2c, 0x7e, 0xbb,
+               0x70, 0xb5, 0xe7, 0x22, 0x43, 0x86, 0xd4, 0x11,
+               0x16, 0xd3, 0x81, 0x44, 0x25, 0xe0, 0xb2, 0x77,
+       },
+       {
+               0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68,
+               0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16,
+               0xfc, 0x3a, 0x6d, 0xab, 0xc3, 0x05, 0x52, 0x94,
+               0x82, 0x44, 0x13, 0xd5, 0xbd, 0x7b, 0x2c, 0xea,
+               0xe5, 0x23, 0x74, 0xb2, 0xda, 0x1c, 0x4b, 0x8d,
+               0x9b, 0x5d, 0x0a, 0xcc, 0xa4, 0x62, 0x35, 0xf3,
+               0x19, 0xdf, 0x88, 0x4e, 0x26, 0xe0, 0xb7, 0x71,
+               0x67, 0xa1, 0xf6, 0x30, 0x58, 0x9e, 0xc9, 0x0f,
+               0xd7, 0x11, 0x46, 0x80, 0xe8, 0x2e, 0x79, 0xbf,
+               0xa9, 0x6f, 0x38, 0xfe, 0x96, 0x50, 0x07, 0xc1,
+               0x2b, 0xed, 0xba, 0x7c, 0x14, 0xd2, 0x85, 0x43,
+               0x55, 0x93, 0xc4, 0x02, 0x6a, 0xac, 0xfb, 0x3d,
+               0x32, 0xf4, 0xa3, 0x65, 0x0d, 0xcb, 0x9c, 0x5a,
+               0x4c, 0x8a, 0xdd, 0x1b, 0x73, 0xb5, 0xe2, 0x24,
+               0xce, 0x08, 0x5f, 0x99, 0xf1, 0x37, 0x60, 0xa6,
+               0xb0, 0x76, 0x21, 0xe7, 0x8f, 0x49, 0x1e, 0xd8,
+               0xb3, 0x75, 0x22, 0xe4, 0x8c, 0x4a, 0x1d, 0xdb,
+               0xcd, 0x0b, 0x5c, 0x9a, 0xf2, 0x34, 0x63, 0xa5,
+               0x4f, 0x89, 0xde, 0x18, 0x70, 0xb6, 0xe1, 0x27,
+               0x31, 0xf7, 0xa0, 0x66, 0x0e, 0xc8, 0x9f, 0x59,
+               0x56, 0x90, 0xc7, 0x01, 0x69, 0xaf, 0xf8, 0x3e,
+               0x28, 0xee, 0xb9, 0x7f, 0x17, 0xd1, 0x86, 0x40,
+               0xaa, 0x6c, 0x3b, 0xfd, 0x95, 0x53, 0x04, 0xc2,
+               0xd4, 0x12, 0x45, 0x83, 0xeb, 0x2d, 0x7a, 0xbc,
+               0x64, 0xa2, 0xf5, 0x33, 0x5b, 0x9d, 0xca, 0x0c,
+               0x1a, 0xdc, 0x8b, 0x4d, 0x25, 0xe3, 0xb4, 0x72,
+               0x98, 0x5e, 0x09, 0xcf, 0xa7, 0x61, 0x36, 0xf0,
+               0xe6, 0x20, 0x77, 0xb1, 0xd9, 0x1f, 0x48, 0x8e,
+               0x81, 0x47, 0x10, 0xd6, 0xbe, 0x78, 0x2f, 0xe9,
+               0xff, 0x39, 0x6e, 0xa8, 0xc0, 0x06, 0x51, 0x97,
+               0x7d, 0xbb, 0xec, 0x2a, 0x42, 0x84, 0xd3, 0x15,
+               0x03, 0xc5, 0x92, 0x54, 0x3c, 0xfa, 0xad, 0x6b,
+       },
+       {
+               0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f,
+               0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19,
+               0xec, 0x2b, 0x7f, 0xb8, 0xd7, 0x10, 0x44, 0x83,
+               0x9a, 0x5d, 0x09, 0xce, 0xa1, 0x66, 0x32, 0xf5,
+               0xc5, 0x02, 0x56, 0x91, 0xfe, 0x39, 0x6d, 0xaa,
+               0xb3, 0x74, 0x20, 0xe7, 0x88, 0x4f, 0x1b, 0xdc,
+               0x29, 0xee, 0xba, 0x7d, 0x12, 0xd5, 0x81, 0x46,
+               0x5f, 0x98, 0xcc, 0x0b, 0x64, 0xa3, 0xf7, 0x30,
+               0x97, 0x50, 0x04, 0xc3, 0xac, 0x6b, 0x3f, 0xf8,
+               0xe1, 0x26, 0x72, 0xb5, 0xda, 0x1d, 0x49, 0x8e,
+               0x7b, 0xbc, 0xe8, 0x2f, 0x40, 0x87, 0xd3, 0x14,
+               0x0d, 0xca, 0x9e, 0x59, 0x36, 0xf1, 0xa5, 0x62,
+               0x52, 0x95, 0xc1, 0x06, 0x69, 0xae, 0xfa, 0x3d,
+               0x24, 0xe3, 0xb7, 0x70, 0x1f, 0xd8, 0x8c, 0x4b,
+               0xbe, 0x79, 0x2d, 0xea, 0x85, 0x42, 0x16, 0xd1,
+               0xc8, 0x0f, 0x5b, 0x9c, 0xf3, 0x34, 0x60, 0xa7,
+               0x33, 0xf4, 0xa0, 0x67, 0x08, 0xcf, 0x9b, 0x5c,
+               0x45, 0x82, 0xd6, 0x11, 0x7e, 0xb9, 0xed, 0x2a,
+               0xdf, 0x18, 0x4c, 0x8b, 0xe4, 0x23, 0x77, 0xb0,
+               0xa9, 0x6e, 0x3a, 0xfd, 0x92, 0x55, 0x01, 0xc6,
+               0xf6, 0x31, 0x65, 0xa2, 0xcd, 0x0a, 0x5e, 0x99,
+               0x80, 0x47, 0x13, 0xd4, 0xbb, 0x7c, 0x28, 0xef,
+               0x1a, 0xdd, 0x89, 0x4e, 0x21, 0xe6, 0xb2, 0x75,
+               0x6c, 0xab, 0xff, 0x38, 0x57, 0x90, 0xc4, 0x03,
+               0xa4, 0x63, 0x37, 0xf0, 0x9f, 0x58, 0x0c, 0xcb,
+               0xd2, 0x15, 0x41, 0x86, 0xe9, 0x2e, 0x7a, 0xbd,
+               0x48, 0x8f, 0xdb, 0x1c, 0x73, 0xb4, 0xe0, 0x27,
+               0x3e, 0xf9, 0xad, 0x6a, 0x05, 0xc2, 0x96, 0x51,
+               0x61, 0xa6, 0xf2, 0x35, 0x5a, 0x9d, 0xc9, 0x0e,
+               0x17, 0xd0, 0x84, 0x43, 0x2c, 0xeb, 0xbf, 0x78,
+               0x8d, 0x4a, 0x1e, 0xd9, 0xb6, 0x71, 0x25, 0xe2,
+               0xfb, 0x3c, 0x68, 0xaf, 0xc0, 0x07, 0x53, 0x94,
+       },
+       {
+               0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42,
+               0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c,
+               0x1c, 0xd4, 0x91, 0x59, 0x1b, 0xd3, 0x96, 0x5e,
+               0x12, 0xda, 0x9f, 0x57, 0x15, 0xdd, 0x98, 0x50,
+               0x38, 0xf0, 0xb5, 0x7d, 0x3f, 0xf7, 0xb2, 0x7a,
+               0x36, 0xfe, 0xbb, 0x73, 0x31, 0xf9, 0xbc, 0x74,
+               0x24, 0xec, 0xa9, 0x61, 0x23, 0xeb, 0xae, 0x66,
+               0x2a, 0xe2, 0xa7, 0x6f, 0x2d, 0xe5, 0xa0, 0x68,
+               0x70, 0xb8, 0xfd, 0x35, 0x77, 0xbf, 0xfa, 0x32,
+               0x7e, 0xb6, 0xf3, 0x3b, 0x79, 0xb1, 0xf4, 0x3c,
+               0x6c, 0xa4, 0xe1, 0x29, 0x6b, 0xa3, 0xe6, 0x2e,
+               0x62, 0xaa, 0xef, 0x27, 0x65, 0xad, 0xe8, 0x20,
+               0x48, 0x80, 0xc5, 0x0d, 0x4f, 0x87, 0xc2, 0x0a,
+               0x46, 0x8e, 0xcb, 0x03, 0x41, 0x89, 0xcc, 0x04,
+               0x54, 0x9c, 0xd9, 0x11, 0x53, 0x9b, 0xde, 0x16,
+               0x5a, 0x92, 0xd7, 0x1f, 0x5d, 0x95, 0xd0, 0x18,
+               0xe0, 0x28, 0x6d, 0xa5, 0xe7, 0x2f, 0x6a, 0xa2,
+               0xee, 0x26, 0x63, 0xab, 0xe9, 0x21, 0x64, 0xac,
+               0xfc, 0x34, 0x71, 0xb9, 0xfb, 0x33, 0x76, 0xbe,
+               0xf2, 0x3a, 0x7f, 0xb7, 0xf5, 0x3d, 0x78, 0xb0,
+               0xd8, 0x10, 0x55, 0x9d, 0xdf, 0x17, 0x52, 0x9a,
+               0xd6, 0x1e, 0x5b, 0x93, 0xd1, 0x19, 0x5c, 0x94,
+               0xc4, 0x0c, 0x49, 0x81, 0xc3, 0x0b, 0x4e, 0x86,
+               0xca, 0x02, 0x47, 0x8f, 0xcd, 0x05, 0x40, 0x88,
+               0x90, 0x58, 0x1d, 0xd5, 0x97, 0x5f, 0x1a, 0xd2,
+               0x9e, 0x56, 0x13, 0xdb, 0x99, 0x51, 0x14, 0xdc,
+               0x8c, 0x44, 0x01, 0xc9, 0x8b, 0x43, 0x06, 0xce,
+               0x82, 0x4a, 0x0f, 0xc7, 0x85, 0x4d, 0x08, 0xc0,
+               0xa8, 0x60, 0x25, 0xed, 0xaf, 0x67, 0x22, 0xea,
+               0xa6, 0x6e, 0x2b, 0xe3, 0xa1, 0x69, 0x2c, 0xe4,
+               0xb4, 0x7c, 0x39, 0xf1, 0xb3, 0x7b, 0x3e, 0xf6,
+               0xba, 0x72, 0x37, 0xff, 0xbd, 0x75, 0x30, 0xf8,
+       },
+       {
+               0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45,
+               0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43,
+               0x0c, 0xc5, 0x83, 0x4a, 0x0f, 0xc6, 0x80, 0x49,
+               0x0a, 0xc3, 0x85, 0x4c, 0x09, 0xc0, 0x86, 0x4f,
+               0x18, 0xd1, 0x97, 0x5e, 0x1b, 0xd2, 0x94, 0x5d,
+               0x1e, 0xd7, 0x91, 0x58, 0x1d, 0xd4, 0x92, 0x5b,
+               0x14, 0xdd, 0x9b, 0x52, 0x17, 0xde, 0x98, 0x51,
+               0x12, 0xdb, 0x9d, 0x54, 0x11, 0xd8, 0x9e, 0x57,
+               0x30, 0xf9, 0xbf, 0x76, 0x33, 0xfa, 0xbc, 0x75,
+               0x36, 0xff, 0xb9, 0x70, 0x35, 0xfc, 0xba, 0x73,
+               0x3c, 0xf5, 0xb3, 0x7a, 0x3f, 0xf6, 0xb0, 0x79,
+               0x3a, 0xf3, 0xb5, 0x7c, 0x39, 0xf0, 0xb6, 0x7f,
+               0x28, 0xe1, 0xa7, 0x6e, 0x2b, 0xe2, 0xa4, 0x6d,
+               0x2e, 0xe7, 0xa1, 0x68, 0x2d, 0xe4, 0xa2, 0x6b,
+               0x24, 0xed, 0xab, 0x62, 0x27, 0xee, 0xa8, 0x61,
+               0x22, 0xeb, 0xad, 0x64, 0x21, 0xe8, 0xae, 0x67,
+               0x60, 0xa9, 0xef, 0x26, 0x63, 0xaa, 0xec, 0x25,
+               0x66, 0xaf, 0xe9, 0x20, 0x65, 0xac, 0xea, 0x23,
+               0x6c, 0xa5, 0xe3, 0x2a, 0x6f, 0xa6, 0xe0, 0x29,
+               0x6a, 0xa3, 0xe5, 0x2c, 0x69, 0xa0, 0xe6, 0x2f,
+               0x78, 0xb1, 0xf7, 0x3e, 0x7b, 0xb2, 0xf4, 0x3d,
+               0x7e, 0xb7, 0xf1, 0x38, 0x7d, 0xb4, 0xf2, 0x3b,
+               0x74, 0xbd, 0xfb, 0x32, 0x77, 0xbe, 0xf8, 0x31,
+               0x72, 0xbb, 0xfd, 0x34, 0x71, 0xb8, 0xfe, 0x37,
+               0x50, 0x99, 0xdf, 0x16, 0x53, 0x9a, 0xdc, 0x15,
+               0x56, 0x9f, 0xd9, 0x10, 0x55, 0x9c, 0xda, 0x13,
+               0x5c, 0x95, 0xd3, 0x1a, 0x5f, 0x96, 0xd0, 0x19,
+               0x5a, 0x93, 0xd5, 0x1c, 0x59, 0x90, 0xd6, 0x1f,
+               0x48, 0x81, 0xc7, 0x0e, 0x4b, 0x82, 0xc4, 0x0d,
+               0x4e, 0x87, 0xc1, 0x08, 0x4d, 0x84, 0xc2, 0x0b,
+               0x44, 0x8d, 0xcb, 0x02, 0x47, 0x8e, 0xc8, 0x01,
+               0x42, 0x8b, 0xcd, 0x04, 0x41, 0x88, 0xce, 0x07,
+       },
+       {
+               0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c,
+               0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52,
+               0x3c, 0xf6, 0xb5, 0x7f, 0x33, 0xf9, 0xba, 0x70,
+               0x22, 0xe8, 0xab, 0x61, 0x2d, 0xe7, 0xa4, 0x6e,
+               0x78, 0xb2, 0xf1, 0x3b, 0x77, 0xbd, 0xfe, 0x34,
+               0x66, 0xac, 0xef, 0x25, 0x69, 0xa3, 0xe0, 0x2a,
+               0x44, 0x8e, 0xcd, 0x07, 0x4b, 0x81, 0xc2, 0x08,
+               0x5a, 0x90, 0xd3, 0x19, 0x55, 0x9f, 0xdc, 0x16,
+               0xf0, 0x3a, 0x79, 0xb3, 0xff, 0x35, 0x76, 0xbc,
+               0xee, 0x24, 0x67, 0xad, 0xe1, 0x2b, 0x68, 0xa2,
+               0xcc, 0x06, 0x45, 0x8f, 0xc3, 0x09, 0x4a, 0x80,
+               0xd2, 0x18, 0x5b, 0x91, 0xdd, 0x17, 0x54, 0x9e,
+               0x88, 0x42, 0x01, 0xcb, 0x87, 0x4d, 0x0e, 0xc4,
+               0x96, 0x5c, 0x1f, 0xd5, 0x99, 0x53, 0x10, 0xda,
+               0xb4, 0x7e, 0x3d, 0xf7, 0xbb, 0x71, 0x32, 0xf8,
+               0xaa, 0x60, 0x23, 0xe9, 0xa5, 0x6f, 0x2c, 0xe6,
+               0xfd, 0x37, 0x74, 0xbe, 0xf2, 0x38, 0x7b, 0xb1,
+               0xe3, 0x29, 0x6a, 0xa0, 0xec, 0x26, 0x65, 0xaf,
+               0xc1, 0x0b, 0x48, 0x82, 0xce, 0x04, 0x47, 0x8d,
+               0xdf, 0x15, 0x56, 0x9c, 0xd0, 0x1a, 0x59, 0x93,
+               0x85, 0x4f, 0x0c, 0xc6, 0x8a, 0x40, 0x03, 0xc9,
+               0x9b, 0x51, 0x12, 0xd8, 0x94, 0x5e, 0x1d, 0xd7,
+               0xb9, 0x73, 0x30, 0xfa, 0xb6, 0x7c, 0x3f, 0xf5,
+               0xa7, 0x6d, 0x2e, 0xe4, 0xa8, 0x62, 0x21, 0xeb,
+               0x0d, 0xc7, 0x84, 0x4e, 0x02, 0xc8, 0x8b, 0x41,
+               0x13, 0xd9, 0x9a, 0x50, 0x1c, 0xd6, 0x95, 0x5f,
+               0x31, 0xfb, 0xb8, 0x72, 0x3e, 0xf4, 0xb7, 0x7d,
+               0x2f, 0xe5, 0xa6, 0x6c, 0x20, 0xea, 0xa9, 0x63,
+               0x75, 0xbf, 0xfc, 0x36, 0x7a, 0xb0, 0xf3, 0x39,
+               0x6b, 0xa1, 0xe2, 0x28, 0x64, 0xae, 0xed, 0x27,
+               0x49, 0x83, 0xc0, 0x0a, 0x46, 0x8c, 0xcf, 0x05,
+               0x57, 0x9d, 0xde, 0x14, 0x58, 0x92, 0xd1, 0x1b,
+       },
+       {
+               0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b,
+               0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d,
+               0x2c, 0xe7, 0xa7, 0x6c, 0x27, 0xec, 0xac, 0x67,
+               0x3a, 0xf1, 0xb1, 0x7a, 0x31, 0xfa, 0xba, 0x71,
+               0x58, 0x93, 0xd3, 0x18, 0x53, 0x98, 0xd8, 0x13,
+               0x4e, 0x85, 0xc5, 0x0e, 0x45, 0x8e, 0xce, 0x05,
+               0x74, 0xbf, 0xff, 0x34, 0x7f, 0xb4, 0xf4, 0x3f,
+               0x62, 0xa9, 0xe9, 0x22, 0x69, 0xa2, 0xe2, 0x29,
+               0xb0, 0x7b, 0x3b, 0xf0, 0xbb, 0x70, 0x30, 0xfb,
+               0xa6, 0x6d, 0x2d, 0xe6, 0xad, 0x66, 0x26, 0xed,
+               0x9c, 0x57, 0x17, 0xdc, 0x97, 0x5c, 0x1c, 0xd7,
+               0x8a, 0x41, 0x01, 0xca, 0x81, 0x4a, 0x0a, 0xc1,
+               0xe8, 0x23, 0x63, 0xa8, 0xe3, 0x28, 0x68, 0xa3,
+               0xfe, 0x35, 0x75, 0xbe, 0xf5, 0x3e, 0x7e, 0xb5,
+               0xc4, 0x0f, 0x4f, 0x84, 0xcf, 0x04, 0x44, 0x8f,
+               0xd2, 0x19, 0x59, 0x92, 0xd9, 0x12, 0x52, 0x99,
+               0x7d, 0xb6, 0xf6, 0x3d, 0x76, 0xbd, 0xfd, 0x36,
+               0x6b, 0xa0, 0xe0, 0x2b, 0x60, 0xab, 0xeb, 0x20,
+               0x51, 0x9a, 0xda, 0x11, 0x5a, 0x91, 0xd1, 0x1a,
+               0x47, 0x8c, 0xcc, 0x07, 0x4c, 0x87, 0xc7, 0x0c,
+               0x25, 0xee, 0xae, 0x65, 0x2e, 0xe5, 0xa5, 0x6e,
+               0x33, 0xf8, 0xb8, 0x73, 0x38, 0xf3, 0xb3, 0x78,
+               0x09, 0xc2, 0x82, 0x49, 0x02, 0xc9, 0x89, 0x42,
+               0x1f, 0xd4, 0x94, 0x5f, 0x14, 0xdf, 0x9f, 0x54,
+               0xcd, 0x06, 0x46, 0x8d, 0xc6, 0x0d, 0x4d, 0x86,
+               0xdb, 0x10, 0x50, 0x9b, 0xd0, 0x1b, 0x5b, 0x90,
+               0xe1, 0x2a, 0x6a, 0xa1, 0xea, 0x21, 0x61, 0xaa,
+               0xf7, 0x3c, 0x7c, 0xb7, 0xfc, 0x37, 0x77, 0xbc,
+               0x95, 0x5e, 0x1e, 0xd5, 0x9e, 0x55, 0x15, 0xde,
+               0x83, 0x48, 0x08, 0xc3, 0x88, 0x43, 0x03, 0xc8,
+               0xb9, 0x72, 0x32, 0xf9, 0xb2, 0x79, 0x39, 0xf2,
+               0xaf, 0x64, 0x24, 0xef, 0xa4, 0x6f, 0x2f, 0xe4,
+       },
+       {
+               0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e,
+               0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70,
+               0x5c, 0x90, 0xd9, 0x15, 0x4b, 0x87, 0xce, 0x02,
+               0x72, 0xbe, 0xf7, 0x3b, 0x65, 0xa9, 0xe0, 0x2c,
+               0xb8, 0x74, 0x3d, 0xf1, 0xaf, 0x63, 0x2a, 0xe6,
+               0x96, 0x5a, 0x13, 0xdf, 0x81, 0x4d, 0x04, 0xc8,
+               0xe4, 0x28, 0x61, 0xad, 0xf3, 0x3f, 0x76, 0xba,
+               0xca, 0x06, 0x4f, 0x83, 0xdd, 0x11, 0x58, 0x94,
+               0x6d, 0xa1, 0xe8, 0x24, 0x7a, 0xb6, 0xff, 0x33,
+               0x43, 0x8f, 0xc6, 0x0a, 0x54, 0x98, 0xd1, 0x1d,
+               0x31, 0xfd, 0xb4, 0x78, 0x26, 0xea, 0xa3, 0x6f,
+               0x1f, 0xd3, 0x9a, 0x56, 0x08, 0xc4, 0x8d, 0x41,
+               0xd5, 0x19, 0x50, 0x9c, 0xc2, 0x0e, 0x47, 0x8b,
+               0xfb, 0x37, 0x7e, 0xb2, 0xec, 0x20, 0x69, 0xa5,
+               0x89, 0x45, 0x0c, 0xc0, 0x9e, 0x52, 0x1b, 0xd7,
+               0xa7, 0x6b, 0x22, 0xee, 0xb0, 0x7c, 0x35, 0xf9,
+               0xda, 0x16, 0x5f, 0x93, 0xcd, 0x01, 0x48, 0x84,
+               0xf4, 0x38, 0x71, 0xbd, 0xe3, 0x2f, 0x66, 0xaa,
+               0x86, 0x4a, 0x03, 0xcf, 0x91, 0x5d, 0x14, 0xd8,
+               0xa8, 0x64, 0x2d, 0xe1, 0xbf, 0x73, 0x3a, 0xf6,
+               0x62, 0xae, 0xe7, 0x2b, 0x75, 0xb9, 0xf0, 0x3c,
+               0x4c, 0x80, 0xc9, 0x05, 0x5b, 0x97, 0xde, 0x12,
+               0x3e, 0xf2, 0xbb, 0x77, 0x29, 0xe5, 0xac, 0x60,
+               0x10, 0xdc, 0x95, 0x59, 0x07, 0xcb, 0x82, 0x4e,
+               0xb7, 0x7b, 0x32, 0xfe, 0xa0, 0x6c, 0x25, 0xe9,
+               0x99, 0x55, 0x1c, 0xd0, 0x8e, 0x42, 0x0b, 0xc7,
+               0xeb, 0x27, 0x6e, 0xa2, 0xfc, 0x30, 0x79, 0xb5,
+               0xc5, 0x09, 0x40, 0x8c, 0xd2, 0x1e, 0x57, 0x9b,
+               0x0f, 0xc3, 0x8a, 0x46, 0x18, 0xd4, 0x9d, 0x51,
+               0x21, 0xed, 0xa4, 0x68, 0x36, 0xfa, 0xb3, 0x7f,
+               0x53, 0x9f, 0xd6, 0x1a, 0x44, 0x88, 0xc1, 0x0d,
+               0x7d, 0xb1, 0xf8, 0x34, 0x6a, 0xa6, 0xef, 0x23,
+       },
+       {
+               0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59,
+               0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f,
+               0x4c, 0x81, 0xcb, 0x06, 0x5f, 0x92, 0xd8, 0x15,
+               0x6a, 0xa7, 0xed, 0x20, 0x79, 0xb4, 0xfe, 0x33,
+               0x98, 0x55, 0x1f, 0xd2, 0x8b, 0x46, 0x0c, 0xc1,
+               0xbe, 0x73, 0x39, 0xf4, 0xad, 0x60, 0x2a, 0xe7,
+               0xd4, 0x19, 0x53, 0x9e, 0xc7, 0x0a, 0x40, 0x8d,
+               0xf2, 0x3f, 0x75, 0xb8, 0xe1, 0x2c, 0x66, 0xab,
+               0x2d, 0xe0, 0xaa, 0x67, 0x3e, 0xf3, 0xb9, 0x74,
+               0x0b, 0xc6, 0x8c, 0x41, 0x18, 0xd5, 0x9f, 0x52,
+               0x61, 0xac, 0xe6, 0x2b, 0x72, 0xbf, 0xf5, 0x38,
+               0x47, 0x8a, 0xc0, 0x0d, 0x54, 0x99, 0xd3, 0x1e,
+               0xb5, 0x78, 0x32, 0xff, 0xa6, 0x6b, 0x21, 0xec,
+               0x93, 0x5e, 0x14, 0xd9, 0x80, 0x4d, 0x07, 0xca,
+               0xf9, 0x34, 0x7e, 0xb3, 0xea, 0x27, 0x6d, 0xa0,
+               0xdf, 0x12, 0x58, 0x95, 0xcc, 0x01, 0x4b, 0x86,
+               0x5a, 0x97, 0xdd, 0x10, 0x49, 0x84, 0xce, 0x03,
+               0x7c, 0xb1, 0xfb, 0x36, 0x6f, 0xa2, 0xe8, 0x25,
+               0x16, 0xdb, 0x91, 0x5c, 0x05, 0xc8, 0x82, 0x4f,
+               0x30, 0xfd, 0xb7, 0x7a, 0x23, 0xee, 0xa4, 0x69,
+               0xc2, 0x0f, 0x45, 0x88, 0xd1, 0x1c, 0x56, 0x9b,
+               0xe4, 0x29, 0x63, 0xae, 0xf7, 0x3a, 0x70, 0xbd,
+               0x8e, 0x43, 0x09, 0xc4, 0x9d, 0x50, 0x1a, 0xd7,
+               0xa8, 0x65, 0x2f, 0xe2, 0xbb, 0x76, 0x3c, 0xf1,
+               0x77, 0xba, 0xf0, 0x3d, 0x64, 0xa9, 0xe3, 0x2e,
+               0x51, 0x9c, 0xd6, 0x1b, 0x42, 0x8f, 0xc5, 0x08,
+               0x3b, 0xf6, 0xbc, 0x71, 0x28, 0xe5, 0xaf, 0x62,
+               0x1d, 0xd0, 0x9a, 0x57, 0x0e, 0xc3, 0x89, 0x44,
+               0xef, 0x22, 0x68, 0xa5, 0xfc, 0x31, 0x7b, 0xb6,
+               0xc9, 0x04, 0x4e, 0x83, 0xda, 0x17, 0x5d, 0x90,
+               0xa3, 0x6e, 0x24, 0xe9, 0xb0, 0x7d, 0x37, 0xfa,
+               0x85, 0x48, 0x02, 0xcf, 0x96, 0x5b, 0x11, 0xdc,
+       },
+       {
+               0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50,
+               0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e,
+               0x7c, 0xb2, 0xfd, 0x33, 0x63, 0xad, 0xe2, 0x2c,
+               0x42, 0x8c, 0xc3, 0x0d, 0x5d, 0x93, 0xdc, 0x12,
+               0xf8, 0x36, 0x79, 0xb7, 0xe7, 0x29, 0x66, 0xa8,
+               0xc6, 0x08, 0x47, 0x89, 0xd9, 0x17, 0x58, 0x96,
+               0x84, 0x4a, 0x05, 0xcb, 0x9b, 0x55, 0x1a, 0xd4,
+               0xba, 0x74, 0x3b, 0xf5, 0xa5, 0x6b, 0x24, 0xea,
+               0xed, 0x23, 0x6c, 0xa2, 0xf2, 0x3c, 0x73, 0xbd,
+               0xd3, 0x1d, 0x52, 0x9c, 0xcc, 0x02, 0x4d, 0x83,
+               0x91, 0x5f, 0x10, 0xde, 0x8e, 0x40, 0x0f, 0xc1,
+               0xaf, 0x61, 0x2e, 0xe0, 0xb0, 0x7e, 0x31, 0xff,
+               0x15, 0xdb, 0x94, 0x5a, 0x0a, 0xc4, 0x8b, 0x45,
+               0x2b, 0xe5, 0xaa, 0x64, 0x34, 0xfa, 0xb5, 0x7b,
+               0x69, 0xa7, 0xe8, 0x26, 0x76, 0xb8, 0xf7, 0x39,
+               0x57, 0x99, 0xd6, 0x18, 0x48, 0x86, 0xc9, 0x07,
+               0xc7, 0x09, 0x46, 0x88, 0xd8, 0x16, 0x59, 0x97,
+               0xf9, 0x37, 0x78, 0xb6, 0xe6, 0x28, 0x67, 0xa9,
+               0xbb, 0x75, 0x3a, 0xf4, 0xa4, 0x6a, 0x25, 0xeb,
+               0x85, 0x4b, 0x04, 0xca, 0x9a, 0x54, 0x1b, 0xd5,
+               0x3f, 0xf1, 0xbe, 0x70, 0x20, 0xee, 0xa1, 0x6f,
+               0x01, 0xcf, 0x80, 0x4e, 0x1e, 0xd0, 0x9f, 0x51,
+               0x43, 0x8d, 0xc2, 0x0c, 0x5c, 0x92, 0xdd, 0x13,
+               0x7d, 0xb3, 0xfc, 0x32, 0x62, 0xac, 0xe3, 0x2d,
+               0x2a, 0xe4, 0xab, 0x65, 0x35, 0xfb, 0xb4, 0x7a,
+               0x14, 0xda, 0x95, 0x5b, 0x0b, 0xc5, 0x8a, 0x44,
+               0x56, 0x98, 0xd7, 0x19, 0x49, 0x87, 0xc8, 0x06,
+               0x68, 0xa6, 0xe9, 0x27, 0x77, 0xb9, 0xf6, 0x38,
+               0xd2, 0x1c, 0x53, 0x9d, 0xcd, 0x03, 0x4c, 0x82,
+               0xec, 0x22, 0x6d, 0xa3, 0xf3, 0x3d, 0x72, 0xbc,
+               0xae, 0x60, 0x2f, 0xe1, 0xb1, 0x7f, 0x30, 0xfe,
+               0x90, 0x5e, 0x11, 0xdf, 0x8f, 0x41, 0x0e, 0xc0,
+       },
+       {
+               0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+               0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61,
+               0x6c, 0xa3, 0xef, 0x20, 0x77, 0xb8, 0xf4, 0x3b,
+               0x5a, 0x95, 0xd9, 0x16, 0x41, 0x8e, 0xc2, 0x0d,
+               0xd8, 0x17, 0x5b, 0x94, 0xc3, 0x0c, 0x40, 0x8f,
+               0xee, 0x21, 0x6d, 0xa2, 0xf5, 0x3a, 0x76, 0xb9,
+               0xb4, 0x7b, 0x37, 0xf8, 0xaf, 0x60, 0x2c, 0xe3,
+               0x82, 0x4d, 0x01, 0xce, 0x99, 0x56, 0x1a, 0xd5,
+               0xad, 0x62, 0x2e, 0xe1, 0xb6, 0x79, 0x35, 0xfa,
+               0x9b, 0x54, 0x18, 0xd7, 0x80, 0x4f, 0x03, 0xcc,
+               0xc1, 0x0e, 0x42, 0x8d, 0xda, 0x15, 0x59, 0x96,
+               0xf7, 0x38, 0x74, 0xbb, 0xec, 0x23, 0x6f, 0xa0,
+               0x75, 0xba, 0xf6, 0x39, 0x6e, 0xa1, 0xed, 0x22,
+               0x43, 0x8c, 0xc0, 0x0f, 0x58, 0x97, 0xdb, 0x14,
+               0x19, 0xd6, 0x9a, 0x55, 0x02, 0xcd, 0x81, 0x4e,
+               0x2f, 0xe0, 0xac, 0x63, 0x34, 0xfb, 0xb7, 0x78,
+               0x47, 0x88, 0xc4, 0x0b, 0x5c, 0x93, 0xdf, 0x10,
+               0x71, 0xbe, 0xf2, 0x3d, 0x6a, 0xa5, 0xe9, 0x26,
+               0x2b, 0xe4, 0xa8, 0x67, 0x30, 0xff, 0xb3, 0x7c,
+               0x1d, 0xd2, 0x9e, 0x51, 0x06, 0xc9, 0x85, 0x4a,
+               0x9f, 0x50, 0x1c, 0xd3, 0x84, 0x4b, 0x07, 0xc8,
+               0xa9, 0x66, 0x2a, 0xe5, 0xb2, 0x7d, 0x31, 0xfe,
+               0xf3, 0x3c, 0x70, 0xbf, 0xe8, 0x27, 0x6b, 0xa4,
+               0xc5, 0x0a, 0x46, 0x89, 0xde, 0x11, 0x5d, 0x92,
+               0xea, 0x25, 0x69, 0xa6, 0xf1, 0x3e, 0x72, 0xbd,
+               0xdc, 0x13, 0x5f, 0x90, 0xc7, 0x08, 0x44, 0x8b,
+               0x86, 0x49, 0x05, 0xca, 0x9d, 0x52, 0x1e, 0xd1,
+               0xb0, 0x7f, 0x33, 0xfc, 0xab, 0x64, 0x28, 0xe7,
+               0x32, 0xfd, 0xb1, 0x7e, 0x29, 0xe6, 0xaa, 0x65,
+               0x04, 0xcb, 0x87, 0x48, 0x1f, 0xd0, 0x9c, 0x53,
+               0x5e, 0x91, 0xdd, 0x12, 0x45, 0x8a, 0xc6, 0x09,
+               0x68, 0xa7, 0xeb, 0x24, 0x73, 0xbc, 0xf0, 0x3f,
+       },
+       {
+               0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a,
+               0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4,
+               0x81, 0x51, 0x3c, 0xec, 0xe6, 0x36, 0x5b, 0x8b,
+               0x4f, 0x9f, 0xf2, 0x22, 0x28, 0xf8, 0x95, 0x45,
+               0x1f, 0xcf, 0xa2, 0x72, 0x78, 0xa8, 0xc5, 0x15,
+               0xd1, 0x01, 0x6c, 0xbc, 0xb6, 0x66, 0x0b, 0xdb,
+               0x9e, 0x4e, 0x23, 0xf3, 0xf9, 0x29, 0x44, 0x94,
+               0x50, 0x80, 0xed, 0x3d, 0x37, 0xe7, 0x8a, 0x5a,
+               0x3e, 0xee, 0x83, 0x53, 0x59, 0x89, 0xe4, 0x34,
+               0xf0, 0x20, 0x4d, 0x9d, 0x97, 0x47, 0x2a, 0xfa,
+               0xbf, 0x6f, 0x02, 0xd2, 0xd8, 0x08, 0x65, 0xb5,
+               0x71, 0xa1, 0xcc, 0x1c, 0x16, 0xc6, 0xab, 0x7b,
+               0x21, 0xf1, 0x9c, 0x4c, 0x46, 0x96, 0xfb, 0x2b,
+               0xef, 0x3f, 0x52, 0x82, 0x88, 0x58, 0x35, 0xe5,
+               0xa0, 0x70, 0x1d, 0xcd, 0xc7, 0x17, 0x7a, 0xaa,
+               0x6e, 0xbe, 0xd3, 0x03, 0x09, 0xd9, 0xb4, 0x64,
+               0x7c, 0xac, 0xc1, 0x11, 0x1b, 0xcb, 0xa6, 0x76,
+               0xb2, 0x62, 0x0f, 0xdf, 0xd5, 0x05, 0x68, 0xb8,
+               0xfd, 0x2d, 0x40, 0x90, 0x9a, 0x4a, 0x27, 0xf7,
+               0x33, 0xe3, 0x8e, 0x5e, 0x54, 0x84, 0xe9, 0x39,
+               0x63, 0xb3, 0xde, 0x0e, 0x04, 0xd4, 0xb9, 0x69,
+               0xad, 0x7d, 0x10, 0xc0, 0xca, 0x1a, 0x77, 0xa7,
+               0xe2, 0x32, 0x5f, 0x8f, 0x85, 0x55, 0x38, 0xe8,
+               0x2c, 0xfc, 0x91, 0x41, 0x4b, 0x9b, 0xf6, 0x26,
+               0x42, 0x92, 0xff, 0x2f, 0x25, 0xf5, 0x98, 0x48,
+               0x8c, 0x5c, 0x31, 0xe1, 0xeb, 0x3b, 0x56, 0x86,
+               0xc3, 0x13, 0x7e, 0xae, 0xa4, 0x74, 0x19, 0xc9,
+               0x0d, 0xdd, 0xb0, 0x60, 0x6a, 0xba, 0xd7, 0x07,
+               0x5d, 0x8d, 0xe0, 0x30, 0x3a, 0xea, 0x87, 0x57,
+               0x93, 0x43, 0x2e, 0xfe, 0xf4, 0x24, 0x49, 0x99,
+               0xdc, 0x0c, 0x61, 0xb1, 0xbb, 0x6b, 0x06, 0xd6,
+               0x12, 0xc2, 0xaf, 0x7f, 0x75, 0xa5, 0xc8, 0x18,
+       },
+       {
+               0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d,
+               0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb,
+               0x91, 0x40, 0x2e, 0xff, 0xf2, 0x23, 0x4d, 0x9c,
+               0x57, 0x86, 0xe8, 0x39, 0x34, 0xe5, 0x8b, 0x5a,
+               0x3f, 0xee, 0x80, 0x51, 0x5c, 0x8d, 0xe3, 0x32,
+               0xf9, 0x28, 0x46, 0x97, 0x9a, 0x4b, 0x25, 0xf4,
+               0xae, 0x7f, 0x11, 0xc0, 0xcd, 0x1c, 0x72, 0xa3,
+               0x68, 0xb9, 0xd7, 0x06, 0x0b, 0xda, 0xb4, 0x65,
+               0x7e, 0xaf, 0xc1, 0x10, 0x1d, 0xcc, 0xa2, 0x73,
+               0xb8, 0x69, 0x07, 0xd6, 0xdb, 0x0a, 0x64, 0xb5,
+               0xef, 0x3e, 0x50, 0x81, 0x8c, 0x5d, 0x33, 0xe2,
+               0x29, 0xf8, 0x96, 0x47, 0x4a, 0x9b, 0xf5, 0x24,
+               0x41, 0x90, 0xfe, 0x2f, 0x22, 0xf3, 0x9d, 0x4c,
+               0x87, 0x56, 0x38, 0xe9, 0xe4, 0x35, 0x5b, 0x8a,
+               0xd0, 0x01, 0x6f, 0xbe, 0xb3, 0x62, 0x0c, 0xdd,
+               0x16, 0xc7, 0xa9, 0x78, 0x75, 0xa4, 0xca, 0x1b,
+               0xfc, 0x2d, 0x43, 0x92, 0x9f, 0x4e, 0x20, 0xf1,
+               0x3a, 0xeb, 0x85, 0x54, 0x59, 0x88, 0xe6, 0x37,
+               0x6d, 0xbc, 0xd2, 0x03, 0x0e, 0xdf, 0xb1, 0x60,
+               0xab, 0x7a, 0x14, 0xc5, 0xc8, 0x19, 0x77, 0xa6,
+               0xc3, 0x12, 0x7c, 0xad, 0xa0, 0x71, 0x1f, 0xce,
+               0x05, 0xd4, 0xba, 0x6b, 0x66, 0xb7, 0xd9, 0x08,
+               0x52, 0x83, 0xed, 0x3c, 0x31, 0xe0, 0x8e, 0x5f,
+               0x94, 0x45, 0x2b, 0xfa, 0xf7, 0x26, 0x48, 0x99,
+               0x82, 0x53, 0x3d, 0xec, 0xe1, 0x30, 0x5e, 0x8f,
+               0x44, 0x95, 0xfb, 0x2a, 0x27, 0xf6, 0x98, 0x49,
+               0x13, 0xc2, 0xac, 0x7d, 0x70, 0xa1, 0xcf, 0x1e,
+               0xd5, 0x04, 0x6a, 0xbb, 0xb6, 0x67, 0x09, 0xd8,
+               0xbd, 0x6c, 0x02, 0xd3, 0xde, 0x0f, 0x61, 0xb0,
+               0x7b, 0xaa, 0xc4, 0x15, 0x18, 0xc9, 0xa7, 0x76,
+               0x2c, 0xfd, 0x93, 0x42, 0x4f, 0x9e, 0xf0, 0x21,
+               0xea, 0x3b, 0x55, 0x84, 0x89, 0x58, 0x36, 0xe7,
+       },
+       {
+               0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+               0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda,
+               0xa1, 0x73, 0x18, 0xca, 0xce, 0x1c, 0x77, 0xa5,
+               0x7f, 0xad, 0xc6, 0x14, 0x10, 0xc2, 0xa9, 0x7b,
+               0x5f, 0x8d, 0xe6, 0x34, 0x30, 0xe2, 0x89, 0x5b,
+               0x81, 0x53, 0x38, 0xea, 0xee, 0x3c, 0x57, 0x85,
+               0xfe, 0x2c, 0x47, 0x95, 0x91, 0x43, 0x28, 0xfa,
+               0x20, 0xf2, 0x99, 0x4b, 0x4f, 0x9d, 0xf6, 0x24,
+               0xbe, 0x6c, 0x07, 0xd5, 0xd1, 0x03, 0x68, 0xba,
+               0x60, 0xb2, 0xd9, 0x0b, 0x0f, 0xdd, 0xb6, 0x64,
+               0x1f, 0xcd, 0xa6, 0x74, 0x70, 0xa2, 0xc9, 0x1b,
+               0xc1, 0x13, 0x78, 0xaa, 0xae, 0x7c, 0x17, 0xc5,
+               0xe1, 0x33, 0x58, 0x8a, 0x8e, 0x5c, 0x37, 0xe5,
+               0x3f, 0xed, 0x86, 0x54, 0x50, 0x82, 0xe9, 0x3b,
+               0x40, 0x92, 0xf9, 0x2b, 0x2f, 0xfd, 0x96, 0x44,
+               0x9e, 0x4c, 0x27, 0xf5, 0xf1, 0x23, 0x48, 0x9a,
+               0x61, 0xb3, 0xd8, 0x0a, 0x0e, 0xdc, 0xb7, 0x65,
+               0xbf, 0x6d, 0x06, 0xd4, 0xd0, 0x02, 0x69, 0xbb,
+               0xc0, 0x12, 0x79, 0xab, 0xaf, 0x7d, 0x16, 0xc4,
+               0x1e, 0xcc, 0xa7, 0x75, 0x71, 0xa3, 0xc8, 0x1a,
+               0x3e, 0xec, 0x87, 0x55, 0x51, 0x83, 0xe8, 0x3a,
+               0xe0, 0x32, 0x59, 0x8b, 0x8f, 0x5d, 0x36, 0xe4,
+               0x9f, 0x4d, 0x26, 0xf4, 0xf0, 0x22, 0x49, 0x9b,
+               0x41, 0x93, 0xf8, 0x2a, 0x2e, 0xfc, 0x97, 0x45,
+               0xdf, 0x0d, 0x66, 0xb4, 0xb0, 0x62, 0x09, 0xdb,
+               0x01, 0xd3, 0xb8, 0x6a, 0x6e, 0xbc, 0xd7, 0x05,
+               0x7e, 0xac, 0xc7, 0x15, 0x11, 0xc3, 0xa8, 0x7a,
+               0xa0, 0x72, 0x19, 0xcb, 0xcf, 0x1d, 0x76, 0xa4,
+               0x80, 0x52, 0x39, 0xeb, 0xef, 0x3d, 0x56, 0x84,
+               0x5e, 0x8c, 0xe7, 0x35, 0x31, 0xe3, 0x88, 0x5a,
+               0x21, 0xf3, 0x98, 0x4a, 0x4e, 0x9c, 0xf7, 0x25,
+               0xff, 0x2d, 0x46, 0x94, 0x90, 0x42, 0x29, 0xfb,
+       },
+       {
+               0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03,
+               0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5,
+               0xb1, 0x62, 0x0a, 0xd9, 0xda, 0x09, 0x61, 0xb2,
+               0x67, 0xb4, 0xdc, 0x0f, 0x0c, 0xdf, 0xb7, 0x64,
+               0x7f, 0xac, 0xc4, 0x17, 0x14, 0xc7, 0xaf, 0x7c,
+               0xa9, 0x7a, 0x12, 0xc1, 0xc2, 0x11, 0x79, 0xaa,
+               0xce, 0x1d, 0x75, 0xa6, 0xa5, 0x76, 0x1e, 0xcd,
+               0x18, 0xcb, 0xa3, 0x70, 0x73, 0xa0, 0xc8, 0x1b,
+               0xfe, 0x2d, 0x45, 0x96, 0x95, 0x46, 0x2e, 0xfd,
+               0x28, 0xfb, 0x93, 0x40, 0x43, 0x90, 0xf8, 0x2b,
+               0x4f, 0x9c, 0xf4, 0x27, 0x24, 0xf7, 0x9f, 0x4c,
+               0x99, 0x4a, 0x22, 0xf1, 0xf2, 0x21, 0x49, 0x9a,
+               0x81, 0x52, 0x3a, 0xe9, 0xea, 0x39, 0x51, 0x82,
+               0x57, 0x84, 0xec, 0x3f, 0x3c, 0xef, 0x87, 0x54,
+               0x30, 0xe3, 0x8b, 0x58, 0x5b, 0x88, 0xe0, 0x33,
+               0xe6, 0x35, 0x5d, 0x8e, 0x8d, 0x5e, 0x36, 0xe5,
+               0xe1, 0x32, 0x5a, 0x89, 0x8a, 0x59, 0x31, 0xe2,
+               0x37, 0xe4, 0x8c, 0x5f, 0x5c, 0x8f, 0xe7, 0x34,
+               0x50, 0x83, 0xeb, 0x38, 0x3b, 0xe8, 0x80, 0x53,
+               0x86, 0x55, 0x3d, 0xee, 0xed, 0x3e, 0x56, 0x85,
+               0x9e, 0x4d, 0x25, 0xf6, 0xf5, 0x26, 0x4e, 0x9d,
+               0x48, 0x9b, 0xf3, 0x20, 0x23, 0xf0, 0x98, 0x4b,
+               0x2f, 0xfc, 0x94, 0x47, 0x44, 0x97, 0xff, 0x2c,
+               0xf9, 0x2a, 0x42, 0x91, 0x92, 0x41, 0x29, 0xfa,
+               0x1f, 0xcc, 0xa4, 0x77, 0x74, 0xa7, 0xcf, 0x1c,
+               0xc9, 0x1a, 0x72, 0xa1, 0xa2, 0x71, 0x19, 0xca,
+               0xae, 0x7d, 0x15, 0xc6, 0xc5, 0x16, 0x7e, 0xad,
+               0x78, 0xab, 0xc3, 0x10, 0x13, 0xc0, 0xa8, 0x7b,
+               0x60, 0xb3, 0xdb, 0x08, 0x0b, 0xd8, 0xb0, 0x63,
+               0xb6, 0x65, 0x0d, 0xde, 0xdd, 0x0e, 0x66, 0xb5,
+               0xd1, 0x02, 0x6a, 0xb9, 0xba, 0x69, 0x01, 0xd2,
+               0x07, 0xd4, 0xbc, 0x6f, 0x6c, 0xbf, 0xd7, 0x04,
+       },
+       {
+               0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16,
+               0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8,
+               0xc1, 0x15, 0x74, 0xa0, 0xb6, 0x62, 0x03, 0xd7,
+               0x2f, 0xfb, 0x9a, 0x4e, 0x58, 0x8c, 0xed, 0x39,
+               0x9f, 0x4b, 0x2a, 0xfe, 0xe8, 0x3c, 0x5d, 0x89,
+               0x71, 0xa5, 0xc4, 0x10, 0x06, 0xd2, 0xb3, 0x67,
+               0x5e, 0x8a, 0xeb, 0x3f, 0x29, 0xfd, 0x9c, 0x48,
+               0xb0, 0x64, 0x05, 0xd1, 0xc7, 0x13, 0x72, 0xa6,
+               0x23, 0xf7, 0x96, 0x42, 0x54, 0x80, 0xe1, 0x35,
+               0xcd, 0x19, 0x78, 0xac, 0xba, 0x6e, 0x0f, 0xdb,
+               0xe2, 0x36, 0x57, 0x83, 0x95, 0x41, 0x20, 0xf4,
+               0x0c, 0xd8, 0xb9, 0x6d, 0x7b, 0xaf, 0xce, 0x1a,
+               0xbc, 0x68, 0x09, 0xdd, 0xcb, 0x1f, 0x7e, 0xaa,
+               0x52, 0x86, 0xe7, 0x33, 0x25, 0xf1, 0x90, 0x44,
+               0x7d, 0xa9, 0xc8, 0x1c, 0x0a, 0xde, 0xbf, 0x6b,
+               0x93, 0x47, 0x26, 0xf2, 0xe4, 0x30, 0x51, 0x85,
+               0x46, 0x92, 0xf3, 0x27, 0x31, 0xe5, 0x84, 0x50,
+               0xa8, 0x7c, 0x1d, 0xc9, 0xdf, 0x0b, 0x6a, 0xbe,
+               0x87, 0x53, 0x32, 0xe6, 0xf0, 0x24, 0x45, 0x91,
+               0x69, 0xbd, 0xdc, 0x08, 0x1e, 0xca, 0xab, 0x7f,
+               0xd9, 0x0d, 0x6c, 0xb8, 0xae, 0x7a, 0x1b, 0xcf,
+               0x37, 0xe3, 0x82, 0x56, 0x40, 0x94, 0xf5, 0x21,
+               0x18, 0xcc, 0xad, 0x79, 0x6f, 0xbb, 0xda, 0x0e,
+               0xf6, 0x22, 0x43, 0x97, 0x81, 0x55, 0x34, 0xe0,
+               0x65, 0xb1, 0xd0, 0x04, 0x12, 0xc6, 0xa7, 0x73,
+               0x8b, 0x5f, 0x3e, 0xea, 0xfc, 0x28, 0x49, 0x9d,
+               0xa4, 0x70, 0x11, 0xc5, 0xd3, 0x07, 0x66, 0xb2,
+               0x4a, 0x9e, 0xff, 0x2b, 0x3d, 0xe9, 0x88, 0x5c,
+               0xfa, 0x2e, 0x4f, 0x9b, 0x8d, 0x59, 0x38, 0xec,
+               0x14, 0xc0, 0xa1, 0x75, 0x63, 0xb7, 0xd6, 0x02,
+               0x3b, 0xef, 0x8e, 0x5a, 0x4c, 0x98, 0xf9, 0x2d,
+               0xd5, 0x01, 0x60, 0xb4, 0xa2, 0x76, 0x17, 0xc3,
+       },
+       {
+               0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11,
+               0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7,
+               0xd1, 0x04, 0x66, 0xb3, 0xa2, 0x77, 0x15, 0xc0,
+               0x37, 0xe2, 0x80, 0x55, 0x44, 0x91, 0xf3, 0x26,
+               0xbf, 0x6a, 0x08, 0xdd, 0xcc, 0x19, 0x7b, 0xae,
+               0x59, 0x8c, 0xee, 0x3b, 0x2a, 0xff, 0x9d, 0x48,
+               0x6e, 0xbb, 0xd9, 0x0c, 0x1d, 0xc8, 0xaa, 0x7f,
+               0x88, 0x5d, 0x3f, 0xea, 0xfb, 0x2e, 0x4c, 0x99,
+               0x63, 0xb6, 0xd4, 0x01, 0x10, 0xc5, 0xa7, 0x72,
+               0x85, 0x50, 0x32, 0xe7, 0xf6, 0x23, 0x41, 0x94,
+               0xb2, 0x67, 0x05, 0xd0, 0xc1, 0x14, 0x76, 0xa3,
+               0x54, 0x81, 0xe3, 0x36, 0x27, 0xf2, 0x90, 0x45,
+               0xdc, 0x09, 0x6b, 0xbe, 0xaf, 0x7a, 0x18, 0xcd,
+               0x3a, 0xef, 0x8d, 0x58, 0x49, 0x9c, 0xfe, 0x2b,
+               0x0d, 0xd8, 0xba, 0x6f, 0x7e, 0xab, 0xc9, 0x1c,
+               0xeb, 0x3e, 0x5c, 0x89, 0x98, 0x4d, 0x2f, 0xfa,
+               0xc6, 0x13, 0x71, 0xa4, 0xb5, 0x60, 0x02, 0xd7,
+               0x20, 0xf5, 0x97, 0x42, 0x53, 0x86, 0xe4, 0x31,
+               0x17, 0xc2, 0xa0, 0x75, 0x64, 0xb1, 0xd3, 0x06,
+               0xf1, 0x24, 0x46, 0x93, 0x82, 0x57, 0x35, 0xe0,
+               0x79, 0xac, 0xce, 0x1b, 0x0a, 0xdf, 0xbd, 0x68,
+               0x9f, 0x4a, 0x28, 0xfd, 0xec, 0x39, 0x5b, 0x8e,
+               0xa8, 0x7d, 0x1f, 0xca, 0xdb, 0x0e, 0x6c, 0xb9,
+               0x4e, 0x9b, 0xf9, 0x2c, 0x3d, 0xe8, 0x8a, 0x5f,
+               0xa5, 0x70, 0x12, 0xc7, 0xd6, 0x03, 0x61, 0xb4,
+               0x43, 0x96, 0xf4, 0x21, 0x30, 0xe5, 0x87, 0x52,
+               0x74, 0xa1, 0xc3, 0x16, 0x07, 0xd2, 0xb0, 0x65,
+               0x92, 0x47, 0x25, 0xf0, 0xe1, 0x34, 0x56, 0x83,
+               0x1a, 0xcf, 0xad, 0x78, 0x69, 0xbc, 0xde, 0x0b,
+               0xfc, 0x29, 0x4b, 0x9e, 0x8f, 0x5a, 0x38, 0xed,
+               0xcb, 0x1e, 0x7c, 0xa9, 0xb8, 0x6d, 0x0f, 0xda,
+               0x2d, 0xf8, 0x9a, 0x4f, 0x5e, 0x8b, 0xe9, 0x3c,
+       },
+       {
+               0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18,
+               0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6,
+               0xe1, 0x37, 0x50, 0x86, 0x9e, 0x48, 0x2f, 0xf9,
+               0x1f, 0xc9, 0xae, 0x78, 0x60, 0xb6, 0xd1, 0x07,
+               0xdf, 0x09, 0x6e, 0xb8, 0xa0, 0x76, 0x11, 0xc7,
+               0x21, 0xf7, 0x90, 0x46, 0x5e, 0x88, 0xef, 0x39,
+               0x3e, 0xe8, 0x8f, 0x59, 0x41, 0x97, 0xf0, 0x26,
+               0xc0, 0x16, 0x71, 0xa7, 0xbf, 0x69, 0x0e, 0xd8,
+               0xa3, 0x75, 0x12, 0xc4, 0xdc, 0x0a, 0x6d, 0xbb,
+               0x5d, 0x8b, 0xec, 0x3a, 0x22, 0xf4, 0x93, 0x45,
+               0x42, 0x94, 0xf3, 0x25, 0x3d, 0xeb, 0x8c, 0x5a,
+               0xbc, 0x6a, 0x0d, 0xdb, 0xc3, 0x15, 0x72, 0xa4,
+               0x7c, 0xaa, 0xcd, 0x1b, 0x03, 0xd5, 0xb2, 0x64,
+               0x82, 0x54, 0x33, 0xe5, 0xfd, 0x2b, 0x4c, 0x9a,
+               0x9d, 0x4b, 0x2c, 0xfa, 0xe2, 0x34, 0x53, 0x85,
+               0x63, 0xb5, 0xd2, 0x04, 0x1c, 0xca, 0xad, 0x7b,
+               0x5b, 0x8d, 0xea, 0x3c, 0x24, 0xf2, 0x95, 0x43,
+               0xa5, 0x73, 0x14, 0xc2, 0xda, 0x0c, 0x6b, 0xbd,
+               0xba, 0x6c, 0x0b, 0xdd, 0xc5, 0x13, 0x74, 0xa2,
+               0x44, 0x92, 0xf5, 0x23, 0x3b, 0xed, 0x8a, 0x5c,
+               0x84, 0x52, 0x35, 0xe3, 0xfb, 0x2d, 0x4a, 0x9c,
+               0x7a, 0xac, 0xcb, 0x1d, 0x05, 0xd3, 0xb4, 0x62,
+               0x65, 0xb3, 0xd4, 0x02, 0x1a, 0xcc, 0xab, 0x7d,
+               0x9b, 0x4d, 0x2a, 0xfc, 0xe4, 0x32, 0x55, 0x83,
+               0xf8, 0x2e, 0x49, 0x9f, 0x87, 0x51, 0x36, 0xe0,
+               0x06, 0xd0, 0xb7, 0x61, 0x79, 0xaf, 0xc8, 0x1e,
+               0x19, 0xcf, 0xa8, 0x7e, 0x66, 0xb0, 0xd7, 0x01,
+               0xe7, 0x31, 0x56, 0x80, 0x98, 0x4e, 0x29, 0xff,
+               0x27, 0xf1, 0x96, 0x40, 0x58, 0x8e, 0xe9, 0x3f,
+               0xd9, 0x0f, 0x68, 0xbe, 0xa6, 0x70, 0x17, 0xc1,
+               0xc6, 0x10, 0x77, 0xa1, 0xb9, 0x6f, 0x08, 0xde,
+               0x38, 0xee, 0x89, 0x5f, 0x47, 0x91, 0xf6, 0x20,
+       },
+       {
+               0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f,
+               0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9,
+               0xf1, 0x26, 0x42, 0x95, 0x8a, 0x5d, 0x39, 0xee,
+               0x07, 0xd0, 0xb4, 0x63, 0x7c, 0xab, 0xcf, 0x18,
+               0xff, 0x28, 0x4c, 0x9b, 0x84, 0x53, 0x37, 0xe0,
+               0x09, 0xde, 0xba, 0x6d, 0x72, 0xa5, 0xc1, 0x16,
+               0x0e, 0xd9, 0xbd, 0x6a, 0x75, 0xa2, 0xc6, 0x11,
+               0xf8, 0x2f, 0x4b, 0x9c, 0x83, 0x54, 0x30, 0xe7,
+               0xe3, 0x34, 0x50, 0x87, 0x98, 0x4f, 0x2b, 0xfc,
+               0x15, 0xc2, 0xa6, 0x71, 0x6e, 0xb9, 0xdd, 0x0a,
+               0x12, 0xc5, 0xa1, 0x76, 0x69, 0xbe, 0xda, 0x0d,
+               0xe4, 0x33, 0x57, 0x80, 0x9f, 0x48, 0x2c, 0xfb,
+               0x1c, 0xcb, 0xaf, 0x78, 0x67, 0xb0, 0xd4, 0x03,
+               0xea, 0x3d, 0x59, 0x8e, 0x91, 0x46, 0x22, 0xf5,
+               0xed, 0x3a, 0x5e, 0x89, 0x96, 0x41, 0x25, 0xf2,
+               0x1b, 0xcc, 0xa8, 0x7f, 0x60, 0xb7, 0xd3, 0x04,
+               0xdb, 0x0c, 0x68, 0xbf, 0xa0, 0x77, 0x13, 0xc4,
+               0x2d, 0xfa, 0x9e, 0x49, 0x56, 0x81, 0xe5, 0x32,
+               0x2a, 0xfd, 0x99, 0x4e, 0x51, 0x86, 0xe2, 0x35,
+               0xdc, 0x0b, 0x6f, 0xb8, 0xa7, 0x70, 0x14, 0xc3,
+               0x24, 0xf3, 0x97, 0x40, 0x5f, 0x88, 0xec, 0x3b,
+               0xd2, 0x05, 0x61, 0xb6, 0xa9, 0x7e, 0x1a, 0xcd,
+               0xd5, 0x02, 0x66, 0xb1, 0xae, 0x79, 0x1d, 0xca,
+               0x23, 0xf4, 0x90, 0x47, 0x58, 0x8f, 0xeb, 0x3c,
+               0x38, 0xef, 0x8b, 0x5c, 0x43, 0x94, 0xf0, 0x27,
+               0xce, 0x19, 0x7d, 0xaa, 0xb5, 0x62, 0x06, 0xd1,
+               0xc9, 0x1e, 0x7a, 0xad, 0xb2, 0x65, 0x01, 0xd6,
+               0x3f, 0xe8, 0x8c, 0x5b, 0x44, 0x93, 0xf7, 0x20,
+               0xc7, 0x10, 0x74, 0xa3, 0xbc, 0x6b, 0x0f, 0xd8,
+               0x31, 0xe6, 0x82, 0x55, 0x4a, 0x9d, 0xf9, 0x2e,
+               0x36, 0xe1, 0x85, 0x52, 0x4d, 0x9a, 0xfe, 0x29,
+               0xc0, 0x17, 0x73, 0xa4, 0xbb, 0x6c, 0x08, 0xdf,
+       },
+       {
+               0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32,
+               0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc,
+               0x01, 0xd9, 0xac, 0x74, 0x46, 0x9e, 0xeb, 0x33,
+               0x8f, 0x57, 0x22, 0xfa, 0xc8, 0x10, 0x65, 0xbd,
+               0x02, 0xda, 0xaf, 0x77, 0x45, 0x9d, 0xe8, 0x30,
+               0x8c, 0x54, 0x21, 0xf9, 0xcb, 0x13, 0x66, 0xbe,
+               0x03, 0xdb, 0xae, 0x76, 0x44, 0x9c, 0xe9, 0x31,
+               0x8d, 0x55, 0x20, 0xf8, 0xca, 0x12, 0x67, 0xbf,
+               0x04, 0xdc, 0xa9, 0x71, 0x43, 0x9b, 0xee, 0x36,
+               0x8a, 0x52, 0x27, 0xff, 0xcd, 0x15, 0x60, 0xb8,
+               0x05, 0xdd, 0xa8, 0x70, 0x42, 0x9a, 0xef, 0x37,
+               0x8b, 0x53, 0x26, 0xfe, 0xcc, 0x14, 0x61, 0xb9,
+               0x06, 0xde, 0xab, 0x73, 0x41, 0x99, 0xec, 0x34,
+               0x88, 0x50, 0x25, 0xfd, 0xcf, 0x17, 0x62, 0xba,
+               0x07, 0xdf, 0xaa, 0x72, 0x40, 0x98, 0xed, 0x35,
+               0x89, 0x51, 0x24, 0xfc, 0xce, 0x16, 0x63, 0xbb,
+               0x08, 0xd0, 0xa5, 0x7d, 0x4f, 0x97, 0xe2, 0x3a,
+               0x86, 0x5e, 0x2b, 0xf3, 0xc1, 0x19, 0x6c, 0xb4,
+               0x09, 0xd1, 0xa4, 0x7c, 0x4e, 0x96, 0xe3, 0x3b,
+               0x87, 0x5f, 0x2a, 0xf2, 0xc0, 0x18, 0x6d, 0xb5,
+               0x0a, 0xd2, 0xa7, 0x7f, 0x4d, 0x95, 0xe0, 0x38,
+               0x84, 0x5c, 0x29, 0xf1, 0xc3, 0x1b, 0x6e, 0xb6,
+               0x0b, 0xd3, 0xa6, 0x7e, 0x4c, 0x94, 0xe1, 0x39,
+               0x85, 0x5d, 0x28, 0xf0, 0xc2, 0x1a, 0x6f, 0xb7,
+               0x0c, 0xd4, 0xa1, 0x79, 0x4b, 0x93, 0xe6, 0x3e,
+               0x82, 0x5a, 0x2f, 0xf7, 0xc5, 0x1d, 0x68, 0xb0,
+               0x0d, 0xd5, 0xa0, 0x78, 0x4a, 0x92, 0xe7, 0x3f,
+               0x83, 0x5b, 0x2e, 0xf6, 0xc4, 0x1c, 0x69, 0xb1,
+               0x0e, 0xd6, 0xa3, 0x7b, 0x49, 0x91, 0xe4, 0x3c,
+               0x80, 0x58, 0x2d, 0xf5, 0xc7, 0x1f, 0x6a, 0xb2,
+               0x0f, 0xd7, 0xa2, 0x7a, 0x48, 0x90, 0xe5, 0x3d,
+               0x81, 0x59, 0x2c, 0xf4, 0xc6, 0x1e, 0x6b, 0xb3,
+       },
+       {
+               0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35,
+               0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3,
+               0x11, 0xc8, 0xbe, 0x67, 0x52, 0x8b, 0xfd, 0x24,
+               0x97, 0x4e, 0x38, 0xe1, 0xd4, 0x0d, 0x7b, 0xa2,
+               0x22, 0xfb, 0x8d, 0x54, 0x61, 0xb8, 0xce, 0x17,
+               0xa4, 0x7d, 0x0b, 0xd2, 0xe7, 0x3e, 0x48, 0x91,
+               0x33, 0xea, 0x9c, 0x45, 0x70, 0xa9, 0xdf, 0x06,
+               0xb5, 0x6c, 0x1a, 0xc3, 0xf6, 0x2f, 0x59, 0x80,
+               0x44, 0x9d, 0xeb, 0x32, 0x07, 0xde, 0xa8, 0x71,
+               0xc2, 0x1b, 0x6d, 0xb4, 0x81, 0x58, 0x2e, 0xf7,
+               0x55, 0x8c, 0xfa, 0x23, 0x16, 0xcf, 0xb9, 0x60,
+               0xd3, 0x0a, 0x7c, 0xa5, 0x90, 0x49, 0x3f, 0xe6,
+               0x66, 0xbf, 0xc9, 0x10, 0x25, 0xfc, 0x8a, 0x53,
+               0xe0, 0x39, 0x4f, 0x96, 0xa3, 0x7a, 0x0c, 0xd5,
+               0x77, 0xae, 0xd8, 0x01, 0x34, 0xed, 0x9b, 0x42,
+               0xf1, 0x28, 0x5e, 0x87, 0xb2, 0x6b, 0x1d, 0xc4,
+               0x88, 0x51, 0x27, 0xfe, 0xcb, 0x12, 0x64, 0xbd,
+               0x0e, 0xd7, 0xa1, 0x78, 0x4d, 0x94, 0xe2, 0x3b,
+               0x99, 0x40, 0x36, 0xef, 0xda, 0x03, 0x75, 0xac,
+               0x1f, 0xc6, 0xb0, 0x69, 0x5c, 0x85, 0xf3, 0x2a,
+               0xaa, 0x73, 0x05, 0xdc, 0xe9, 0x30, 0x46, 0x9f,
+               0x2c, 0xf5, 0x83, 0x5a, 0x6f, 0xb6, 0xc0, 0x19,
+               0xbb, 0x62, 0x14, 0xcd, 0xf8, 0x21, 0x57, 0x8e,
+               0x3d, 0xe4, 0x92, 0x4b, 0x7e, 0xa7, 0xd1, 0x08,
+               0xcc, 0x15, 0x63, 0xba, 0x8f, 0x56, 0x20, 0xf9,
+               0x4a, 0x93, 0xe5, 0x3c, 0x09, 0xd0, 0xa6, 0x7f,
+               0xdd, 0x04, 0x72, 0xab, 0x9e, 0x47, 0x31, 0xe8,
+               0x5b, 0x82, 0xf4, 0x2d, 0x18, 0xc1, 0xb7, 0x6e,
+               0xee, 0x37, 0x41, 0x98, 0xad, 0x74, 0x02, 0xdb,
+               0x68, 0xb1, 0xc7, 0x1e, 0x2b, 0xf2, 0x84, 0x5d,
+               0xff, 0x26, 0x50, 0x89, 0xbc, 0x65, 0x13, 0xca,
+               0x79, 0xa0, 0xd6, 0x0f, 0x3a, 0xe3, 0x95, 0x4c,
+       },
+       {
+               0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c,
+               0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2,
+               0x21, 0xfb, 0x88, 0x52, 0x6e, 0xb4, 0xc7, 0x1d,
+               0xbf, 0x65, 0x16, 0xcc, 0xf0, 0x2a, 0x59, 0x83,
+               0x42, 0x98, 0xeb, 0x31, 0x0d, 0xd7, 0xa4, 0x7e,
+               0xdc, 0x06, 0x75, 0xaf, 0x93, 0x49, 0x3a, 0xe0,
+               0x63, 0xb9, 0xca, 0x10, 0x2c, 0xf6, 0x85, 0x5f,
+               0xfd, 0x27, 0x54, 0x8e, 0xb2, 0x68, 0x1b, 0xc1,
+               0x84, 0x5e, 0x2d, 0xf7, 0xcb, 0x11, 0x62, 0xb8,
+               0x1a, 0xc0, 0xb3, 0x69, 0x55, 0x8f, 0xfc, 0x26,
+               0xa5, 0x7f, 0x0c, 0xd6, 0xea, 0x30, 0x43, 0x99,
+               0x3b, 0xe1, 0x92, 0x48, 0x74, 0xae, 0xdd, 0x07,
+               0xc6, 0x1c, 0x6f, 0xb5, 0x89, 0x53, 0x20, 0xfa,
+               0x58, 0x82, 0xf1, 0x2b, 0x17, 0xcd, 0xbe, 0x64,
+               0xe7, 0x3d, 0x4e, 0x94, 0xa8, 0x72, 0x01, 0xdb,
+               0x79, 0xa3, 0xd0, 0x0a, 0x36, 0xec, 0x9f, 0x45,
+               0x15, 0xcf, 0xbc, 0x66, 0x5a, 0x80, 0xf3, 0x29,
+               0x8b, 0x51, 0x22, 0xf8, 0xc4, 0x1e, 0x6d, 0xb7,
+               0x34, 0xee, 0x9d, 0x47, 0x7b, 0xa1, 0xd2, 0x08,
+               0xaa, 0x70, 0x03, 0xd9, 0xe5, 0x3f, 0x4c, 0x96,
+               0x57, 0x8d, 0xfe, 0x24, 0x18, 0xc2, 0xb1, 0x6b,
+               0xc9, 0x13, 0x60, 0xba, 0x86, 0x5c, 0x2f, 0xf5,
+               0x76, 0xac, 0xdf, 0x05, 0x39, 0xe3, 0x90, 0x4a,
+               0xe8, 0x32, 0x41, 0x9b, 0xa7, 0x7d, 0x0e, 0xd4,
+               0x91, 0x4b, 0x38, 0xe2, 0xde, 0x04, 0x77, 0xad,
+               0x0f, 0xd5, 0xa6, 0x7c, 0x40, 0x9a, 0xe9, 0x33,
+               0xb0, 0x6a, 0x19, 0xc3, 0xff, 0x25, 0x56, 0x8c,
+               0x2e, 0xf4, 0x87, 0x5d, 0x61, 0xbb, 0xc8, 0x12,
+               0xd3, 0x09, 0x7a, 0xa0, 0x9c, 0x46, 0x35, 0xef,
+               0x4d, 0x97, 0xe4, 0x3e, 0x02, 0xd8, 0xab, 0x71,
+               0xf2, 0x28, 0x5b, 0x81, 0xbd, 0x67, 0x14, 0xce,
+               0x6c, 0xb6, 0xc5, 0x1f, 0x23, 0xf9, 0x8a, 0x50,
+       },
+       {
+               0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b,
+               0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad,
+               0x31, 0xea, 0x9a, 0x41, 0x7a, 0xa1, 0xd1, 0x0a,
+               0xa7, 0x7c, 0x0c, 0xd7, 0xec, 0x37, 0x47, 0x9c,
+               0x62, 0xb9, 0xc9, 0x12, 0x29, 0xf2, 0x82, 0x59,
+               0xf4, 0x2f, 0x5f, 0x84, 0xbf, 0x64, 0x14, 0xcf,
+               0x53, 0x88, 0xf8, 0x23, 0x18, 0xc3, 0xb3, 0x68,
+               0xc5, 0x1e, 0x6e, 0xb5, 0x8e, 0x55, 0x25, 0xfe,
+               0xc4, 0x1f, 0x6f, 0xb4, 0x8f, 0x54, 0x24, 0xff,
+               0x52, 0x89, 0xf9, 0x22, 0x19, 0xc2, 0xb2, 0x69,
+               0xf5, 0x2e, 0x5e, 0x85, 0xbe, 0x65, 0x15, 0xce,
+               0x63, 0xb8, 0xc8, 0x13, 0x28, 0xf3, 0x83, 0x58,
+               0xa6, 0x7d, 0x0d, 0xd6, 0xed, 0x36, 0x46, 0x9d,
+               0x30, 0xeb, 0x9b, 0x40, 0x7b, 0xa0, 0xd0, 0x0b,
+               0x97, 0x4c, 0x3c, 0xe7, 0xdc, 0x07, 0x77, 0xac,
+               0x01, 0xda, 0xaa, 0x71, 0x4a, 0x91, 0xe1, 0x3a,
+               0x95, 0x4e, 0x3e, 0xe5, 0xde, 0x05, 0x75, 0xae,
+               0x03, 0xd8, 0xa8, 0x73, 0x48, 0x93, 0xe3, 0x38,
+               0xa4, 0x7f, 0x0f, 0xd4, 0xef, 0x34, 0x44, 0x9f,
+               0x32, 0xe9, 0x99, 0x42, 0x79, 0xa2, 0xd2, 0x09,
+               0xf7, 0x2c, 0x5c, 0x87, 0xbc, 0x67, 0x17, 0xcc,
+               0x61, 0xba, 0xca, 0x11, 0x2a, 0xf1, 0x81, 0x5a,
+               0xc6, 0x1d, 0x6d, 0xb6, 0x8d, 0x56, 0x26, 0xfd,
+               0x50, 0x8b, 0xfb, 0x20, 0x1b, 0xc0, 0xb0, 0x6b,
+               0x51, 0x8a, 0xfa, 0x21, 0x1a, 0xc1, 0xb1, 0x6a,
+               0xc7, 0x1c, 0x6c, 0xb7, 0x8c, 0x57, 0x27, 0xfc,
+               0x60, 0xbb, 0xcb, 0x10, 0x2b, 0xf0, 0x80, 0x5b,
+               0xf6, 0x2d, 0x5d, 0x86, 0xbd, 0x66, 0x16, 0xcd,
+               0x33, 0xe8, 0x98, 0x43, 0x78, 0xa3, 0xd3, 0x08,
+               0xa5, 0x7e, 0x0e, 0xd5, 0xee, 0x35, 0x45, 0x9e,
+               0x02, 0xd9, 0xa9, 0x72, 0x49, 0x92, 0xe2, 0x39,
+               0x94, 0x4f, 0x3f, 0xe4, 0xdf, 0x04, 0x74, 0xaf,
+       },
+       {
+               0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e,
+               0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80,
+               0x41, 0x9d, 0xe4, 0x38, 0x16, 0xca, 0xb3, 0x6f,
+               0xef, 0x33, 0x4a, 0x96, 0xb8, 0x64, 0x1d, 0xc1,
+               0x82, 0x5e, 0x27, 0xfb, 0xd5, 0x09, 0x70, 0xac,
+               0x2c, 0xf0, 0x89, 0x55, 0x7b, 0xa7, 0xde, 0x02,
+               0xc3, 0x1f, 0x66, 0xba, 0x94, 0x48, 0x31, 0xed,
+               0x6d, 0xb1, 0xc8, 0x14, 0x3a, 0xe6, 0x9f, 0x43,
+               0x19, 0xc5, 0xbc, 0x60, 0x4e, 0x92, 0xeb, 0x37,
+               0xb7, 0x6b, 0x12, 0xce, 0xe0, 0x3c, 0x45, 0x99,
+               0x58, 0x84, 0xfd, 0x21, 0x0f, 0xd3, 0xaa, 0x76,
+               0xf6, 0x2a, 0x53, 0x8f, 0xa1, 0x7d, 0x04, 0xd8,
+               0x9b, 0x47, 0x3e, 0xe2, 0xcc, 0x10, 0x69, 0xb5,
+               0x35, 0xe9, 0x90, 0x4c, 0x62, 0xbe, 0xc7, 0x1b,
+               0xda, 0x06, 0x7f, 0xa3, 0x8d, 0x51, 0x28, 0xf4,
+               0x74, 0xa8, 0xd1, 0x0d, 0x23, 0xff, 0x86, 0x5a,
+               0x32, 0xee, 0x97, 0x4b, 0x65, 0xb9, 0xc0, 0x1c,
+               0x9c, 0x40, 0x39, 0xe5, 0xcb, 0x17, 0x6e, 0xb2,
+               0x73, 0xaf, 0xd6, 0x0a, 0x24, 0xf8, 0x81, 0x5d,
+               0xdd, 0x01, 0x78, 0xa4, 0x8a, 0x56, 0x2f, 0xf3,
+               0xb0, 0x6c, 0x15, 0xc9, 0xe7, 0x3b, 0x42, 0x9e,
+               0x1e, 0xc2, 0xbb, 0x67, 0x49, 0x95, 0xec, 0x30,
+               0xf1, 0x2d, 0x54, 0x88, 0xa6, 0x7a, 0x03, 0xdf,
+               0x5f, 0x83, 0xfa, 0x26, 0x08, 0xd4, 0xad, 0x71,
+               0x2b, 0xf7, 0x8e, 0x52, 0x7c, 0xa0, 0xd9, 0x05,
+               0x85, 0x59, 0x20, 0xfc, 0xd2, 0x0e, 0x77, 0xab,
+               0x6a, 0xb6, 0xcf, 0x13, 0x3d, 0xe1, 0x98, 0x44,
+               0xc4, 0x18, 0x61, 0xbd, 0x93, 0x4f, 0x36, 0xea,
+               0xa9, 0x75, 0x0c, 0xd0, 0xfe, 0x22, 0x5b, 0x87,
+               0x07, 0xdb, 0xa2, 0x7e, 0x50, 0x8c, 0xf5, 0x29,
+               0xe8, 0x34, 0x4d, 0x91, 0xbf, 0x63, 0x1a, 0xc6,
+               0x46, 0x9a, 0xe3, 0x3f, 0x11, 0xcd, 0xb4, 0x68,
+       },
+       {
+               0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29,
+               0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f,
+               0x51, 0x8c, 0xf6, 0x2b, 0x02, 0xdf, 0xa5, 0x78,
+               0xf7, 0x2a, 0x50, 0x8d, 0xa4, 0x79, 0x03, 0xde,
+               0xa2, 0x7f, 0x05, 0xd8, 0xf1, 0x2c, 0x56, 0x8b,
+               0x04, 0xd9, 0xa3, 0x7e, 0x57, 0x8a, 0xf0, 0x2d,
+               0xf3, 0x2e, 0x54, 0x89, 0xa0, 0x7d, 0x07, 0xda,
+               0x55, 0x88, 0xf2, 0x2f, 0x06, 0xdb, 0xa1, 0x7c,
+               0x59, 0x84, 0xfe, 0x23, 0x0a, 0xd7, 0xad, 0x70,
+               0xff, 0x22, 0x58, 0x85, 0xac, 0x71, 0x0b, 0xd6,
+               0x08, 0xd5, 0xaf, 0x72, 0x5b, 0x86, 0xfc, 0x21,
+               0xae, 0x73, 0x09, 0xd4, 0xfd, 0x20, 0x5a, 0x87,
+               0xfb, 0x26, 0x5c, 0x81, 0xa8, 0x75, 0x0f, 0xd2,
+               0x5d, 0x80, 0xfa, 0x27, 0x0e, 0xd3, 0xa9, 0x74,
+               0xaa, 0x77, 0x0d, 0xd0, 0xf9, 0x24, 0x5e, 0x83,
+               0x0c, 0xd1, 0xab, 0x76, 0x5f, 0x82, 0xf8, 0x25,
+               0xb2, 0x6f, 0x15, 0xc8, 0xe1, 0x3c, 0x46, 0x9b,
+               0x14, 0xc9, 0xb3, 0x6e, 0x47, 0x9a, 0xe0, 0x3d,
+               0xe3, 0x3e, 0x44, 0x99, 0xb0, 0x6d, 0x17, 0xca,
+               0x45, 0x98, 0xe2, 0x3f, 0x16, 0xcb, 0xb1, 0x6c,
+               0x10, 0xcd, 0xb7, 0x6a, 0x43, 0x9e, 0xe4, 0x39,
+               0xb6, 0x6b, 0x11, 0xcc, 0xe5, 0x38, 0x42, 0x9f,
+               0x41, 0x9c, 0xe6, 0x3b, 0x12, 0xcf, 0xb5, 0x68,
+               0xe7, 0x3a, 0x40, 0x9d, 0xb4, 0x69, 0x13, 0xce,
+               0xeb, 0x36, 0x4c, 0x91, 0xb8, 0x65, 0x1f, 0xc2,
+               0x4d, 0x90, 0xea, 0x37, 0x1e, 0xc3, 0xb9, 0x64,
+               0xba, 0x67, 0x1d, 0xc0, 0xe9, 0x34, 0x4e, 0x93,
+               0x1c, 0xc1, 0xbb, 0x66, 0x4f, 0x92, 0xe8, 0x35,
+               0x49, 0x94, 0xee, 0x33, 0x1a, 0xc7, 0xbd, 0x60,
+               0xef, 0x32, 0x48, 0x95, 0xbc, 0x61, 0x1b, 0xc6,
+               0x18, 0xc5, 0xbf, 0x62, 0x4b, 0x96, 0xec, 0x31,
+               0xbe, 0x63, 0x19, 0xc4, 0xed, 0x30, 0x4a, 0x97,
+       },
+       {
+               0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20,
+               0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e,
+               0x61, 0xbf, 0xc0, 0x1e, 0x3e, 0xe0, 0x9f, 0x41,
+               0xdf, 0x01, 0x7e, 0xa0, 0x80, 0x5e, 0x21, 0xff,
+               0xc2, 0x1c, 0x63, 0xbd, 0x9d, 0x43, 0x3c, 0xe2,
+               0x7c, 0xa2, 0xdd, 0x03, 0x23, 0xfd, 0x82, 0x5c,
+               0xa3, 0x7d, 0x02, 0xdc, 0xfc, 0x22, 0x5d, 0x83,
+               0x1d, 0xc3, 0xbc, 0x62, 0x42, 0x9c, 0xe3, 0x3d,
+               0x99, 0x47, 0x38, 0xe6, 0xc6, 0x18, 0x67, 0xb9,
+               0x27, 0xf9, 0x86, 0x58, 0x78, 0xa6, 0xd9, 0x07,
+               0xf8, 0x26, 0x59, 0x87, 0xa7, 0x79, 0x06, 0xd8,
+               0x46, 0x98, 0xe7, 0x39, 0x19, 0xc7, 0xb8, 0x66,
+               0x5b, 0x85, 0xfa, 0x24, 0x04, 0xda, 0xa5, 0x7b,
+               0xe5, 0x3b, 0x44, 0x9a, 0xba, 0x64, 0x1b, 0xc5,
+               0x3a, 0xe4, 0x9b, 0x45, 0x65, 0xbb, 0xc4, 0x1a,
+               0x84, 0x5a, 0x25, 0xfb, 0xdb, 0x05, 0x7a, 0xa4,
+               0x2f, 0xf1, 0x8e, 0x50, 0x70, 0xae, 0xd1, 0x0f,
+               0x91, 0x4f, 0x30, 0xee, 0xce, 0x10, 0x6f, 0xb1,
+               0x4e, 0x90, 0xef, 0x31, 0x11, 0xcf, 0xb0, 0x6e,
+               0xf0, 0x2e, 0x51, 0x8f, 0xaf, 0x71, 0x0e, 0xd0,
+               0xed, 0x33, 0x4c, 0x92, 0xb2, 0x6c, 0x13, 0xcd,
+               0x53, 0x8d, 0xf2, 0x2c, 0x0c, 0xd2, 0xad, 0x73,
+               0x8c, 0x52, 0x2d, 0xf3, 0xd3, 0x0d, 0x72, 0xac,
+               0x32, 0xec, 0x93, 0x4d, 0x6d, 0xb3, 0xcc, 0x12,
+               0xb6, 0x68, 0x17, 0xc9, 0xe9, 0x37, 0x48, 0x96,
+               0x08, 0xd6, 0xa9, 0x77, 0x57, 0x89, 0xf6, 0x28,
+               0xd7, 0x09, 0x76, 0xa8, 0x88, 0x56, 0x29, 0xf7,
+               0x69, 0xb7, 0xc8, 0x16, 0x36, 0xe8, 0x97, 0x49,
+               0x74, 0xaa, 0xd5, 0x0b, 0x2b, 0xf5, 0x8a, 0x54,
+               0xca, 0x14, 0x6b, 0xb5, 0x95, 0x4b, 0x34, 0xea,
+               0x15, 0xcb, 0xb4, 0x6a, 0x4a, 0x94, 0xeb, 0x35,
+               0xab, 0x75, 0x0a, 0xd4, 0xf4, 0x2a, 0x55, 0x8b,
+       },
+       {
+               0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27,
+               0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91,
+               0x71, 0xae, 0xd2, 0x0d, 0x2a, 0xf5, 0x89, 0x56,
+               0xc7, 0x18, 0x64, 0xbb, 0x9c, 0x43, 0x3f, 0xe0,
+               0xe2, 0x3d, 0x41, 0x9e, 0xb9, 0x66, 0x1a, 0xc5,
+               0x54, 0x8b, 0xf7, 0x28, 0x0f, 0xd0, 0xac, 0x73,
+               0x93, 0x4c, 0x30, 0xef, 0xc8, 0x17, 0x6b, 0xb4,
+               0x25, 0xfa, 0x86, 0x59, 0x7e, 0xa1, 0xdd, 0x02,
+               0xd9, 0x06, 0x7a, 0xa5, 0x82, 0x5d, 0x21, 0xfe,
+               0x6f, 0xb0, 0xcc, 0x13, 0x34, 0xeb, 0x97, 0x48,
+               0xa8, 0x77, 0x0b, 0xd4, 0xf3, 0x2c, 0x50, 0x8f,
+               0x1e, 0xc1, 0xbd, 0x62, 0x45, 0x9a, 0xe6, 0x39,
+               0x3b, 0xe4, 0x98, 0x47, 0x60, 0xbf, 0xc3, 0x1c,
+               0x8d, 0x52, 0x2e, 0xf1, 0xd6, 0x09, 0x75, 0xaa,
+               0x4a, 0x95, 0xe9, 0x36, 0x11, 0xce, 0xb2, 0x6d,
+               0xfc, 0x23, 0x5f, 0x80, 0xa7, 0x78, 0x04, 0xdb,
+               0xaf, 0x70, 0x0c, 0xd3, 0xf4, 0x2b, 0x57, 0x88,
+               0x19, 0xc6, 0xba, 0x65, 0x42, 0x9d, 0xe1, 0x3e,
+               0xde, 0x01, 0x7d, 0xa2, 0x85, 0x5a, 0x26, 0xf9,
+               0x68, 0xb7, 0xcb, 0x14, 0x33, 0xec, 0x90, 0x4f,
+               0x4d, 0x92, 0xee, 0x31, 0x16, 0xc9, 0xb5, 0x6a,
+               0xfb, 0x24, 0x58, 0x87, 0xa0, 0x7f, 0x03, 0xdc,
+               0x3c, 0xe3, 0x9f, 0x40, 0x67, 0xb8, 0xc4, 0x1b,
+               0x8a, 0x55, 0x29, 0xf6, 0xd1, 0x0e, 0x72, 0xad,
+               0x76, 0xa9, 0xd5, 0x0a, 0x2d, 0xf2, 0x8e, 0x51,
+               0xc0, 0x1f, 0x63, 0xbc, 0x9b, 0x44, 0x38, 0xe7,
+               0x07, 0xd8, 0xa4, 0x7b, 0x5c, 0x83, 0xff, 0x20,
+               0xb1, 0x6e, 0x12, 0xcd, 0xea, 0x35, 0x49, 0x96,
+               0x94, 0x4b, 0x37, 0xe8, 0xcf, 0x10, 0x6c, 0xb3,
+               0x22, 0xfd, 0x81, 0x5e, 0x79, 0xa6, 0xda, 0x05,
+               0xe5, 0x3a, 0x46, 0x99, 0xbe, 0x61, 0x1d, 0xc2,
+               0x53, 0x8c, 0xf0, 0x2f, 0x08, 0xd7, 0xab, 0x74,
+       },
+       {
+               0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a,
+               0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9,
+               0xa6, 0x46, 0x7b, 0x9b, 0x01, 0xe1, 0xdc, 0x3c,
+               0xf5, 0x15, 0x28, 0xc8, 0x52, 0xb2, 0x8f, 0x6f,
+               0x51, 0xb1, 0x8c, 0x6c, 0xf6, 0x16, 0x2b, 0xcb,
+               0x02, 0xe2, 0xdf, 0x3f, 0xa5, 0x45, 0x78, 0x98,
+               0xf7, 0x17, 0x2a, 0xca, 0x50, 0xb0, 0x8d, 0x6d,
+               0xa4, 0x44, 0x79, 0x99, 0x03, 0xe3, 0xde, 0x3e,
+               0xa2, 0x42, 0x7f, 0x9f, 0x05, 0xe5, 0xd8, 0x38,
+               0xf1, 0x11, 0x2c, 0xcc, 0x56, 0xb6, 0x8b, 0x6b,
+               0x04, 0xe4, 0xd9, 0x39, 0xa3, 0x43, 0x7e, 0x9e,
+               0x57, 0xb7, 0x8a, 0x6a, 0xf0, 0x10, 0x2d, 0xcd,
+               0xf3, 0x13, 0x2e, 0xce, 0x54, 0xb4, 0x89, 0x69,
+               0xa0, 0x40, 0x7d, 0x9d, 0x07, 0xe7, 0xda, 0x3a,
+               0x55, 0xb5, 0x88, 0x68, 0xf2, 0x12, 0x2f, 0xcf,
+               0x06, 0xe6, 0xdb, 0x3b, 0xa1, 0x41, 0x7c, 0x9c,
+               0x59, 0xb9, 0x84, 0x64, 0xfe, 0x1e, 0x23, 0xc3,
+               0x0a, 0xea, 0xd7, 0x37, 0xad, 0x4d, 0x70, 0x90,
+               0xff, 0x1f, 0x22, 0xc2, 0x58, 0xb8, 0x85, 0x65,
+               0xac, 0x4c, 0x71, 0x91, 0x0b, 0xeb, 0xd6, 0x36,
+               0x08, 0xe8, 0xd5, 0x35, 0xaf, 0x4f, 0x72, 0x92,
+               0x5b, 0xbb, 0x86, 0x66, 0xfc, 0x1c, 0x21, 0xc1,
+               0xae, 0x4e, 0x73, 0x93, 0x09, 0xe9, 0xd4, 0x34,
+               0xfd, 0x1d, 0x20, 0xc0, 0x5a, 0xba, 0x87, 0x67,
+               0xfb, 0x1b, 0x26, 0xc6, 0x5c, 0xbc, 0x81, 0x61,
+               0xa8, 0x48, 0x75, 0x95, 0x0f, 0xef, 0xd2, 0x32,
+               0x5d, 0xbd, 0x80, 0x60, 0xfa, 0x1a, 0x27, 0xc7,
+               0x0e, 0xee, 0xd3, 0x33, 0xa9, 0x49, 0x74, 0x94,
+               0xaa, 0x4a, 0x77, 0x97, 0x0d, 0xed, 0xd0, 0x30,
+               0xf9, 0x19, 0x24, 0xc4, 0x5e, 0xbe, 0x83, 0x63,
+               0x0c, 0xec, 0xd1, 0x31, 0xab, 0x4b, 0x76, 0x96,
+               0x5f, 0xbf, 0x82, 0x62, 0xf8, 0x18, 0x25, 0xc5,
+       },
+       {
+               0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d,
+               0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6,
+               0xb6, 0x57, 0x69, 0x88, 0x15, 0xf4, 0xca, 0x2b,
+               0xed, 0x0c, 0x32, 0xd3, 0x4e, 0xaf, 0x91, 0x70,
+               0x71, 0x90, 0xae, 0x4f, 0xd2, 0x33, 0x0d, 0xec,
+               0x2a, 0xcb, 0xf5, 0x14, 0x89, 0x68, 0x56, 0xb7,
+               0xc7, 0x26, 0x18, 0xf9, 0x64, 0x85, 0xbb, 0x5a,
+               0x9c, 0x7d, 0x43, 0xa2, 0x3f, 0xde, 0xe0, 0x01,
+               0xe2, 0x03, 0x3d, 0xdc, 0x41, 0xa0, 0x9e, 0x7f,
+               0xb9, 0x58, 0x66, 0x87, 0x1a, 0xfb, 0xc5, 0x24,
+               0x54, 0xb5, 0x8b, 0x6a, 0xf7, 0x16, 0x28, 0xc9,
+               0x0f, 0xee, 0xd0, 0x31, 0xac, 0x4d, 0x73, 0x92,
+               0x93, 0x72, 0x4c, 0xad, 0x30, 0xd1, 0xef, 0x0e,
+               0xc8, 0x29, 0x17, 0xf6, 0x6b, 0x8a, 0xb4, 0x55,
+               0x25, 0xc4, 0xfa, 0x1b, 0x86, 0x67, 0x59, 0xb8,
+               0x7e, 0x9f, 0xa1, 0x40, 0xdd, 0x3c, 0x02, 0xe3,
+               0xd9, 0x38, 0x06, 0xe7, 0x7a, 0x9b, 0xa5, 0x44,
+               0x82, 0x63, 0x5d, 0xbc, 0x21, 0xc0, 0xfe, 0x1f,
+               0x6f, 0x8e, 0xb0, 0x51, 0xcc, 0x2d, 0x13, 0xf2,
+               0x34, 0xd5, 0xeb, 0x0a, 0x97, 0x76, 0x48, 0xa9,
+               0xa8, 0x49, 0x77, 0x96, 0x0b, 0xea, 0xd4, 0x35,
+               0xf3, 0x12, 0x2c, 0xcd, 0x50, 0xb1, 0x8f, 0x6e,
+               0x1e, 0xff, 0xc1, 0x20, 0xbd, 0x5c, 0x62, 0x83,
+               0x45, 0xa4, 0x9a, 0x7b, 0xe6, 0x07, 0x39, 0xd8,
+               0x3b, 0xda, 0xe4, 0x05, 0x98, 0x79, 0x47, 0xa6,
+               0x60, 0x81, 0xbf, 0x5e, 0xc3, 0x22, 0x1c, 0xfd,
+               0x8d, 0x6c, 0x52, 0xb3, 0x2e, 0xcf, 0xf1, 0x10,
+               0xd6, 0x37, 0x09, 0xe8, 0x75, 0x94, 0xaa, 0x4b,
+               0x4a, 0xab, 0x95, 0x74, 0xe9, 0x08, 0x36, 0xd7,
+               0x11, 0xf0, 0xce, 0x2f, 0xb2, 0x53, 0x6d, 0x8c,
+               0xfc, 0x1d, 0x23, 0xc2, 0x5f, 0xbe, 0x80, 0x61,
+               0xa7, 0x46, 0x78, 0x99, 0x04, 0xe5, 0xdb, 0x3a,
+       },
+       {
+               0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94,
+               0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7,
+               0x86, 0x64, 0x5f, 0xbd, 0x29, 0xcb, 0xf0, 0x12,
+               0xc5, 0x27, 0x1c, 0xfe, 0x6a, 0x88, 0xb3, 0x51,
+               0x11, 0xf3, 0xc8, 0x2a, 0xbe, 0x5c, 0x67, 0x85,
+               0x52, 0xb0, 0x8b, 0x69, 0xfd, 0x1f, 0x24, 0xc6,
+               0x97, 0x75, 0x4e, 0xac, 0x38, 0xda, 0xe1, 0x03,
+               0xd4, 0x36, 0x0d, 0xef, 0x7b, 0x99, 0xa2, 0x40,
+               0x22, 0xc0, 0xfb, 0x19, 0x8d, 0x6f, 0x54, 0xb6,
+               0x61, 0x83, 0xb8, 0x5a, 0xce, 0x2c, 0x17, 0xf5,
+               0xa4, 0x46, 0x7d, 0x9f, 0x0b, 0xe9, 0xd2, 0x30,
+               0xe7, 0x05, 0x3e, 0xdc, 0x48, 0xaa, 0x91, 0x73,
+               0x33, 0xd1, 0xea, 0x08, 0x9c, 0x7e, 0x45, 0xa7,
+               0x70, 0x92, 0xa9, 0x4b, 0xdf, 0x3d, 0x06, 0xe4,
+               0xb5, 0x57, 0x6c, 0x8e, 0x1a, 0xf8, 0xc3, 0x21,
+               0xf6, 0x14, 0x2f, 0xcd, 0x59, 0xbb, 0x80, 0x62,
+               0x44, 0xa6, 0x9d, 0x7f, 0xeb, 0x09, 0x32, 0xd0,
+               0x07, 0xe5, 0xde, 0x3c, 0xa8, 0x4a, 0x71, 0x93,
+               0xc2, 0x20, 0x1b, 0xf9, 0x6d, 0x8f, 0xb4, 0x56,
+               0x81, 0x63, 0x58, 0xba, 0x2e, 0xcc, 0xf7, 0x15,
+               0x55, 0xb7, 0x8c, 0x6e, 0xfa, 0x18, 0x23, 0xc1,
+               0x16, 0xf4, 0xcf, 0x2d, 0xb9, 0x5b, 0x60, 0x82,
+               0xd3, 0x31, 0x0a, 0xe8, 0x7c, 0x9e, 0xa5, 0x47,
+               0x90, 0x72, 0x49, 0xab, 0x3f, 0xdd, 0xe6, 0x04,
+               0x66, 0x84, 0xbf, 0x5d, 0xc9, 0x2b, 0x10, 0xf2,
+               0x25, 0xc7, 0xfc, 0x1e, 0x8a, 0x68, 0x53, 0xb1,
+               0xe0, 0x02, 0x39, 0xdb, 0x4f, 0xad, 0x96, 0x74,
+               0xa3, 0x41, 0x7a, 0x98, 0x0c, 0xee, 0xd5, 0x37,
+               0x77, 0x95, 0xae, 0x4c, 0xd8, 0x3a, 0x01, 0xe3,
+               0x34, 0xd6, 0xed, 0x0f, 0x9b, 0x79, 0x42, 0xa0,
+               0xf1, 0x13, 0x28, 0xca, 0x5e, 0xbc, 0x87, 0x65,
+               0xb2, 0x50, 0x6b, 0x89, 0x1d, 0xff, 0xc4, 0x26,
+       },
+       {
+               0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93,
+               0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8,
+               0x96, 0x75, 0x4d, 0xae, 0x3d, 0xde, 0xe6, 0x05,
+               0xdd, 0x3e, 0x06, 0xe5, 0x76, 0x95, 0xad, 0x4e,
+               0x31, 0xd2, 0xea, 0x09, 0x9a, 0x79, 0x41, 0xa2,
+               0x7a, 0x99, 0xa1, 0x42, 0xd1, 0x32, 0x0a, 0xe9,
+               0xa7, 0x44, 0x7c, 0x9f, 0x0c, 0xef, 0xd7, 0x34,
+               0xec, 0x0f, 0x37, 0xd4, 0x47, 0xa4, 0x9c, 0x7f,
+               0x62, 0x81, 0xb9, 0x5a, 0xc9, 0x2a, 0x12, 0xf1,
+               0x29, 0xca, 0xf2, 0x11, 0x82, 0x61, 0x59, 0xba,
+               0xf4, 0x17, 0x2f, 0xcc, 0x5f, 0xbc, 0x84, 0x67,
+               0xbf, 0x5c, 0x64, 0x87, 0x14, 0xf7, 0xcf, 0x2c,
+               0x53, 0xb0, 0x88, 0x6b, 0xf8, 0x1b, 0x23, 0xc0,
+               0x18, 0xfb, 0xc3, 0x20, 0xb3, 0x50, 0x68, 0x8b,
+               0xc5, 0x26, 0x1e, 0xfd, 0x6e, 0x8d, 0xb5, 0x56,
+               0x8e, 0x6d, 0x55, 0xb6, 0x25, 0xc6, 0xfe, 0x1d,
+               0xc4, 0x27, 0x1f, 0xfc, 0x6f, 0x8c, 0xb4, 0x57,
+               0x8f, 0x6c, 0x54, 0xb7, 0x24, 0xc7, 0xff, 0x1c,
+               0x52, 0xb1, 0x89, 0x6a, 0xf9, 0x1a, 0x22, 0xc1,
+               0x19, 0xfa, 0xc2, 0x21, 0xb2, 0x51, 0x69, 0x8a,
+               0xf5, 0x16, 0x2e, 0xcd, 0x5e, 0xbd, 0x85, 0x66,
+               0xbe, 0x5d, 0x65, 0x86, 0x15, 0xf6, 0xce, 0x2d,
+               0x63, 0x80, 0xb8, 0x5b, 0xc8, 0x2b, 0x13, 0xf0,
+               0x28, 0xcb, 0xf3, 0x10, 0x83, 0x60, 0x58, 0xbb,
+               0xa6, 0x45, 0x7d, 0x9e, 0x0d, 0xee, 0xd6, 0x35,
+               0xed, 0x0e, 0x36, 0xd5, 0x46, 0xa5, 0x9d, 0x7e,
+               0x30, 0xd3, 0xeb, 0x08, 0x9b, 0x78, 0x40, 0xa3,
+               0x7b, 0x98, 0xa0, 0x43, 0xd0, 0x33, 0x0b, 0xe8,
+               0x97, 0x74, 0x4c, 0xaf, 0x3c, 0xdf, 0xe7, 0x04,
+               0xdc, 0x3f, 0x07, 0xe4, 0x77, 0x94, 0xac, 0x4f,
+               0x01, 0xe2, 0xda, 0x39, 0xaa, 0x49, 0x71, 0x92,
+               0x4a, 0xa9, 0x91, 0x72, 0xe1, 0x02, 0x3a, 0xd9,
+       },
+       {
+               0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86,
+               0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5,
+               0xe6, 0x02, 0x33, 0xd7, 0x51, 0xb5, 0x84, 0x60,
+               0x95, 0x71, 0x40, 0xa4, 0x22, 0xc6, 0xf7, 0x13,
+               0xd1, 0x35, 0x04, 0xe0, 0x66, 0x82, 0xb3, 0x57,
+               0xa2, 0x46, 0x77, 0x93, 0x15, 0xf1, 0xc0, 0x24,
+               0x37, 0xd3, 0xe2, 0x06, 0x80, 0x64, 0x55, 0xb1,
+               0x44, 0xa0, 0x91, 0x75, 0xf3, 0x17, 0x26, 0xc2,
+               0xbf, 0x5b, 0x6a, 0x8e, 0x08, 0xec, 0xdd, 0x39,
+               0xcc, 0x28, 0x19, 0xfd, 0x7b, 0x9f, 0xae, 0x4a,
+               0x59, 0xbd, 0x8c, 0x68, 0xee, 0x0a, 0x3b, 0xdf,
+               0x2a, 0xce, 0xff, 0x1b, 0x9d, 0x79, 0x48, 0xac,
+               0x6e, 0x8a, 0xbb, 0x5f, 0xd9, 0x3d, 0x0c, 0xe8,
+               0x1d, 0xf9, 0xc8, 0x2c, 0xaa, 0x4e, 0x7f, 0x9b,
+               0x88, 0x6c, 0x5d, 0xb9, 0x3f, 0xdb, 0xea, 0x0e,
+               0xfb, 0x1f, 0x2e, 0xca, 0x4c, 0xa8, 0x99, 0x7d,
+               0x63, 0x87, 0xb6, 0x52, 0xd4, 0x30, 0x01, 0xe5,
+               0x10, 0xf4, 0xc5, 0x21, 0xa7, 0x43, 0x72, 0x96,
+               0x85, 0x61, 0x50, 0xb4, 0x32, 0xd6, 0xe7, 0x03,
+               0xf6, 0x12, 0x23, 0xc7, 0x41, 0xa5, 0x94, 0x70,
+               0xb2, 0x56, 0x67, 0x83, 0x05, 0xe1, 0xd0, 0x34,
+               0xc1, 0x25, 0x14, 0xf0, 0x76, 0x92, 0xa3, 0x47,
+               0x54, 0xb0, 0x81, 0x65, 0xe3, 0x07, 0x36, 0xd2,
+               0x27, 0xc3, 0xf2, 0x16, 0x90, 0x74, 0x45, 0xa1,
+               0xdc, 0x38, 0x09, 0xed, 0x6b, 0x8f, 0xbe, 0x5a,
+               0xaf, 0x4b, 0x7a, 0x9e, 0x18, 0xfc, 0xcd, 0x29,
+               0x3a, 0xde, 0xef, 0x0b, 0x8d, 0x69, 0x58, 0xbc,
+               0x49, 0xad, 0x9c, 0x78, 0xfe, 0x1a, 0x2b, 0xcf,
+               0x0d, 0xe9, 0xd8, 0x3c, 0xba, 0x5e, 0x6f, 0x8b,
+               0x7e, 0x9a, 0xab, 0x4f, 0xc9, 0x2d, 0x1c, 0xf8,
+               0xeb, 0x0f, 0x3e, 0xda, 0x5c, 0xb8, 0x89, 0x6d,
+               0x98, 0x7c, 0x4d, 0xa9, 0x2f, 0xcb, 0xfa, 0x1e,
+       },
+       {
+               0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81,
+               0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa,
+               0xf6, 0x13, 0x21, 0xc4, 0x45, 0xa0, 0x92, 0x77,
+               0x8d, 0x68, 0x5a, 0xbf, 0x3e, 0xdb, 0xe9, 0x0c,
+               0xf1, 0x14, 0x26, 0xc3, 0x42, 0xa7, 0x95, 0x70,
+               0x8a, 0x6f, 0x5d, 0xb8, 0x39, 0xdc, 0xee, 0x0b,
+               0x07, 0xe2, 0xd0, 0x35, 0xb4, 0x51, 0x63, 0x86,
+               0x7c, 0x99, 0xab, 0x4e, 0xcf, 0x2a, 0x18, 0xfd,
+               0xff, 0x1a, 0x28, 0xcd, 0x4c, 0xa9, 0x9b, 0x7e,
+               0x84, 0x61, 0x53, 0xb6, 0x37, 0xd2, 0xe0, 0x05,
+               0x09, 0xec, 0xde, 0x3b, 0xba, 0x5f, 0x6d, 0x88,
+               0x72, 0x97, 0xa5, 0x40, 0xc1, 0x24, 0x16, 0xf3,
+               0x0e, 0xeb, 0xd9, 0x3c, 0xbd, 0x58, 0x6a, 0x8f,
+               0x75, 0x90, 0xa2, 0x47, 0xc6, 0x23, 0x11, 0xf4,
+               0xf8, 0x1d, 0x2f, 0xca, 0x4b, 0xae, 0x9c, 0x79,
+               0x83, 0x66, 0x54, 0xb1, 0x30, 0xd5, 0xe7, 0x02,
+               0xe3, 0x06, 0x34, 0xd1, 0x50, 0xb5, 0x87, 0x62,
+               0x98, 0x7d, 0x4f, 0xaa, 0x2b, 0xce, 0xfc, 0x19,
+               0x15, 0xf0, 0xc2, 0x27, 0xa6, 0x43, 0x71, 0x94,
+               0x6e, 0x8b, 0xb9, 0x5c, 0xdd, 0x38, 0x0a, 0xef,
+               0x12, 0xf7, 0xc5, 0x20, 0xa1, 0x44, 0x76, 0x93,
+               0x69, 0x8c, 0xbe, 0x5b, 0xda, 0x3f, 0x0d, 0xe8,
+               0xe4, 0x01, 0x33, 0xd6, 0x57, 0xb2, 0x80, 0x65,
+               0x9f, 0x7a, 0x48, 0xad, 0x2c, 0xc9, 0xfb, 0x1e,
+               0x1c, 0xf9, 0xcb, 0x2e, 0xaf, 0x4a, 0x78, 0x9d,
+               0x67, 0x82, 0xb0, 0x55, 0xd4, 0x31, 0x03, 0xe6,
+               0xea, 0x0f, 0x3d, 0xd8, 0x59, 0xbc, 0x8e, 0x6b,
+               0x91, 0x74, 0x46, 0xa3, 0x22, 0xc7, 0xf5, 0x10,
+               0xed, 0x08, 0x3a, 0xdf, 0x5e, 0xbb, 0x89, 0x6c,
+               0x96, 0x73, 0x41, 0xa4, 0x25, 0xc0, 0xf2, 0x17,
+               0x1b, 0xfe, 0xcc, 0x29, 0xa8, 0x4d, 0x7f, 0x9a,
+               0x60, 0x85, 0xb7, 0x52, 0xd3, 0x36, 0x04, 0xe1,
+       },
+       {
+               0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88,
+               0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb,
+               0xc6, 0x20, 0x17, 0xf1, 0x79, 0x9f, 0xa8, 0x4e,
+               0xa5, 0x43, 0x74, 0x92, 0x1a, 0xfc, 0xcb, 0x2d,
+               0x91, 0x77, 0x40, 0xa6, 0x2e, 0xc8, 0xff, 0x19,
+               0xf2, 0x14, 0x23, 0xc5, 0x4d, 0xab, 0x9c, 0x7a,
+               0x57, 0xb1, 0x86, 0x60, 0xe8, 0x0e, 0x39, 0xdf,
+               0x34, 0xd2, 0xe5, 0x03, 0x8b, 0x6d, 0x5a, 0xbc,
+               0x3f, 0xd9, 0xee, 0x08, 0x80, 0x66, 0x51, 0xb7,
+               0x5c, 0xba, 0x8d, 0x6b, 0xe3, 0x05, 0x32, 0xd4,
+               0xf9, 0x1f, 0x28, 0xce, 0x46, 0xa0, 0x97, 0x71,
+               0x9a, 0x7c, 0x4b, 0xad, 0x25, 0xc3, 0xf4, 0x12,
+               0xae, 0x48, 0x7f, 0x99, 0x11, 0xf7, 0xc0, 0x26,
+               0xcd, 0x2b, 0x1c, 0xfa, 0x72, 0x94, 0xa3, 0x45,
+               0x68, 0x8e, 0xb9, 0x5f, 0xd7, 0x31, 0x06, 0xe0,
+               0x0b, 0xed, 0xda, 0x3c, 0xb4, 0x52, 0x65, 0x83,
+               0x7e, 0x98, 0xaf, 0x49, 0xc1, 0x27, 0x10, 0xf6,
+               0x1d, 0xfb, 0xcc, 0x2a, 0xa2, 0x44, 0x73, 0x95,
+               0xb8, 0x5e, 0x69, 0x8f, 0x07, 0xe1, 0xd6, 0x30,
+               0xdb, 0x3d, 0x0a, 0xec, 0x64, 0x82, 0xb5, 0x53,
+               0xef, 0x09, 0x3e, 0xd8, 0x50, 0xb6, 0x81, 0x67,
+               0x8c, 0x6a, 0x5d, 0xbb, 0x33, 0xd5, 0xe2, 0x04,
+               0x29, 0xcf, 0xf8, 0x1e, 0x96, 0x70, 0x47, 0xa1,
+               0x4a, 0xac, 0x9b, 0x7d, 0xf5, 0x13, 0x24, 0xc2,
+               0x41, 0xa7, 0x90, 0x76, 0xfe, 0x18, 0x2f, 0xc9,
+               0x22, 0xc4, 0xf3, 0x15, 0x9d, 0x7b, 0x4c, 0xaa,
+               0x87, 0x61, 0x56, 0xb0, 0x38, 0xde, 0xe9, 0x0f,
+               0xe4, 0x02, 0x35, 0xd3, 0x5b, 0xbd, 0x8a, 0x6c,
+               0xd0, 0x36, 0x01, 0xe7, 0x6f, 0x89, 0xbe, 0x58,
+               0xb3, 0x55, 0x62, 0x84, 0x0c, 0xea, 0xdd, 0x3b,
+               0x16, 0xf0, 0xc7, 0x21, 0xa9, 0x4f, 0x78, 0x9e,
+               0x75, 0x93, 0xa4, 0x42, 0xca, 0x2c, 0x1b, 0xfd,
+       },
+       {
+               0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f,
+               0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4,
+               0xd6, 0x31, 0x05, 0xe2, 0x6d, 0x8a, 0xbe, 0x59,
+               0xbd, 0x5a, 0x6e, 0x89, 0x06, 0xe1, 0xd5, 0x32,
+               0xb1, 0x56, 0x62, 0x85, 0x0a, 0xed, 0xd9, 0x3e,
+               0xda, 0x3d, 0x09, 0xee, 0x61, 0x86, 0xb2, 0x55,
+               0x67, 0x80, 0xb4, 0x53, 0xdc, 0x3b, 0x0f, 0xe8,
+               0x0c, 0xeb, 0xdf, 0x38, 0xb7, 0x50, 0x64, 0x83,
+               0x7f, 0x98, 0xac, 0x4b, 0xc4, 0x23, 0x17, 0xf0,
+               0x14, 0xf3, 0xc7, 0x20, 0xaf, 0x48, 0x7c, 0x9b,
+               0xa9, 0x4e, 0x7a, 0x9d, 0x12, 0xf5, 0xc1, 0x26,
+               0xc2, 0x25, 0x11, 0xf6, 0x79, 0x9e, 0xaa, 0x4d,
+               0xce, 0x29, 0x1d, 0xfa, 0x75, 0x92, 0xa6, 0x41,
+               0xa5, 0x42, 0x76, 0x91, 0x1e, 0xf9, 0xcd, 0x2a,
+               0x18, 0xff, 0xcb, 0x2c, 0xa3, 0x44, 0x70, 0x97,
+               0x73, 0x94, 0xa0, 0x47, 0xc8, 0x2f, 0x1b, 0xfc,
+               0xfe, 0x19, 0x2d, 0xca, 0x45, 0xa2, 0x96, 0x71,
+               0x95, 0x72, 0x46, 0xa1, 0x2e, 0xc9, 0xfd, 0x1a,
+               0x28, 0xcf, 0xfb, 0x1c, 0x93, 0x74, 0x40, 0xa7,
+               0x43, 0xa4, 0x90, 0x77, 0xf8, 0x1f, 0x2b, 0xcc,
+               0x4f, 0xa8, 0x9c, 0x7b, 0xf4, 0x13, 0x27, 0xc0,
+               0x24, 0xc3, 0xf7, 0x10, 0x9f, 0x78, 0x4c, 0xab,
+               0x99, 0x7e, 0x4a, 0xad, 0x22, 0xc5, 0xf1, 0x16,
+               0xf2, 0x15, 0x21, 0xc6, 0x49, 0xae, 0x9a, 0x7d,
+               0x81, 0x66, 0x52, 0xb5, 0x3a, 0xdd, 0xe9, 0x0e,
+               0xea, 0x0d, 0x39, 0xde, 0x51, 0xb6, 0x82, 0x65,
+               0x57, 0xb0, 0x84, 0x63, 0xec, 0x0b, 0x3f, 0xd8,
+               0x3c, 0xdb, 0xef, 0x08, 0x87, 0x60, 0x54, 0xb3,
+               0x30, 0xd7, 0xe3, 0x04, 0x8b, 0x6c, 0x58, 0xbf,
+               0x5b, 0xbc, 0x88, 0x6f, 0xe0, 0x07, 0x33, 0xd4,
+               0xe6, 0x01, 0x35, 0xd2, 0x5d, 0xba, 0x8e, 0x69,
+               0x8d, 0x6a, 0x5e, 0xb9, 0x36, 0xd1, 0xe5, 0x02,
+       },
+       {
+               0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+               0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1,
+               0x26, 0xce, 0xeb, 0x03, 0xa1, 0x49, 0x6c, 0x84,
+               0x35, 0xdd, 0xf8, 0x10, 0xb2, 0x5a, 0x7f, 0x97,
+               0x4c, 0xa4, 0x81, 0x69, 0xcb, 0x23, 0x06, 0xee,
+               0x5f, 0xb7, 0x92, 0x7a, 0xd8, 0x30, 0x15, 0xfd,
+               0x6a, 0x82, 0xa7, 0x4f, 0xed, 0x05, 0x20, 0xc8,
+               0x79, 0x91, 0xb4, 0x5c, 0xfe, 0x16, 0x33, 0xdb,
+               0x98, 0x70, 0x55, 0xbd, 0x1f, 0xf7, 0xd2, 0x3a,
+               0x8b, 0x63, 0x46, 0xae, 0x0c, 0xe4, 0xc1, 0x29,
+               0xbe, 0x56, 0x73, 0x9b, 0x39, 0xd1, 0xf4, 0x1c,
+               0xad, 0x45, 0x60, 0x88, 0x2a, 0xc2, 0xe7, 0x0f,
+               0xd4, 0x3c, 0x19, 0xf1, 0x53, 0xbb, 0x9e, 0x76,
+               0xc7, 0x2f, 0x0a, 0xe2, 0x40, 0xa8, 0x8d, 0x65,
+               0xf2, 0x1a, 0x3f, 0xd7, 0x75, 0x9d, 0xb8, 0x50,
+               0xe1, 0x09, 0x2c, 0xc4, 0x66, 0x8e, 0xab, 0x43,
+               0x2d, 0xc5, 0xe0, 0x08, 0xaa, 0x42, 0x67, 0x8f,
+               0x3e, 0xd6, 0xf3, 0x1b, 0xb9, 0x51, 0x74, 0x9c,
+               0x0b, 0xe3, 0xc6, 0x2e, 0x8c, 0x64, 0x41, 0xa9,
+               0x18, 0xf0, 0xd5, 0x3d, 0x9f, 0x77, 0x52, 0xba,
+               0x61, 0x89, 0xac, 0x44, 0xe6, 0x0e, 0x2b, 0xc3,
+               0x72, 0x9a, 0xbf, 0x57, 0xf5, 0x1d, 0x38, 0xd0,
+               0x47, 0xaf, 0x8a, 0x62, 0xc0, 0x28, 0x0d, 0xe5,
+               0x54, 0xbc, 0x99, 0x71, 0xd3, 0x3b, 0x1e, 0xf6,
+               0xb5, 0x5d, 0x78, 0x90, 0x32, 0xda, 0xff, 0x17,
+               0xa6, 0x4e, 0x6b, 0x83, 0x21, 0xc9, 0xec, 0x04,
+               0x93, 0x7b, 0x5e, 0xb6, 0x14, 0xfc, 0xd9, 0x31,
+               0x80, 0x68, 0x4d, 0xa5, 0x07, 0xef, 0xca, 0x22,
+               0xf9, 0x11, 0x34, 0xdc, 0x7e, 0x96, 0xb3, 0x5b,
+               0xea, 0x02, 0x27, 0xcf, 0x6d, 0x85, 0xa0, 0x48,
+               0xdf, 0x37, 0x12, 0xfa, 0x58, 0xb0, 0x95, 0x7d,
+               0xcc, 0x24, 0x01, 0xe9, 0x4b, 0xa3, 0x86, 0x6e,
+       },
+       {
+               0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5,
+               0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe,
+               0x36, 0xdf, 0xf9, 0x10, 0xb5, 0x5c, 0x7a, 0x93,
+               0x2d, 0xc4, 0xe2, 0x0b, 0xae, 0x47, 0x61, 0x88,
+               0x6c, 0x85, 0xa3, 0x4a, 0xef, 0x06, 0x20, 0xc9,
+               0x77, 0x9e, 0xb8, 0x51, 0xf4, 0x1d, 0x3b, 0xd2,
+               0x5a, 0xb3, 0x95, 0x7c, 0xd9, 0x30, 0x16, 0xff,
+               0x41, 0xa8, 0x8e, 0x67, 0xc2, 0x2b, 0x0d, 0xe4,
+               0xd8, 0x31, 0x17, 0xfe, 0x5b, 0xb2, 0x94, 0x7d,
+               0xc3, 0x2a, 0x0c, 0xe5, 0x40, 0xa9, 0x8f, 0x66,
+               0xee, 0x07, 0x21, 0xc8, 0x6d, 0x84, 0xa2, 0x4b,
+               0xf5, 0x1c, 0x3a, 0xd3, 0x76, 0x9f, 0xb9, 0x50,
+               0xb4, 0x5d, 0x7b, 0x92, 0x37, 0xde, 0xf8, 0x11,
+               0xaf, 0x46, 0x60, 0x89, 0x2c, 0xc5, 0xe3, 0x0a,
+               0x82, 0x6b, 0x4d, 0xa4, 0x01, 0xe8, 0xce, 0x27,
+               0x99, 0x70, 0x56, 0xbf, 0x1a, 0xf3, 0xd5, 0x3c,
+               0xad, 0x44, 0x62, 0x8b, 0x2e, 0xc7, 0xe1, 0x08,
+               0xb6, 0x5f, 0x79, 0x90, 0x35, 0xdc, 0xfa, 0x13,
+               0x9b, 0x72, 0x54, 0xbd, 0x18, 0xf1, 0xd7, 0x3e,
+               0x80, 0x69, 0x4f, 0xa6, 0x03, 0xea, 0xcc, 0x25,
+               0xc1, 0x28, 0x0e, 0xe7, 0x42, 0xab, 0x8d, 0x64,
+               0xda, 0x33, 0x15, 0xfc, 0x59, 0xb0, 0x96, 0x7f,
+               0xf7, 0x1e, 0x38, 0xd1, 0x74, 0x9d, 0xbb, 0x52,
+               0xec, 0x05, 0x23, 0xca, 0x6f, 0x86, 0xa0, 0x49,
+               0x75, 0x9c, 0xba, 0x53, 0xf6, 0x1f, 0x39, 0xd0,
+               0x6e, 0x87, 0xa1, 0x48, 0xed, 0x04, 0x22, 0xcb,
+               0x43, 0xaa, 0x8c, 0x65, 0xc0, 0x29, 0x0f, 0xe6,
+               0x58, 0xb1, 0x97, 0x7e, 0xdb, 0x32, 0x14, 0xfd,
+               0x19, 0xf0, 0xd6, 0x3f, 0x9a, 0x73, 0x55, 0xbc,
+               0x02, 0xeb, 0xcd, 0x24, 0x81, 0x68, 0x4e, 0xa7,
+               0x2f, 0xc6, 0xe0, 0x09, 0xac, 0x45, 0x63, 0x8a,
+               0x34, 0xdd, 0xfb, 0x12, 0xb7, 0x5e, 0x78, 0x91,
+       },
+       {
+               0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac,
+               0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf,
+               0x06, 0xec, 0xcf, 0x25, 0x89, 0x63, 0x40, 0xaa,
+               0x05, 0xef, 0xcc, 0x26, 0x8a, 0x60, 0x43, 0xa9,
+               0x0c, 0xe6, 0xc5, 0x2f, 0x83, 0x69, 0x4a, 0xa0,
+               0x0f, 0xe5, 0xc6, 0x2c, 0x80, 0x6a, 0x49, 0xa3,
+               0x0a, 0xe0, 0xc3, 0x29, 0x85, 0x6f, 0x4c, 0xa6,
+               0x09, 0xe3, 0xc0, 0x2a, 0x86, 0x6c, 0x4f, 0xa5,
+               0x18, 0xf2, 0xd1, 0x3b, 0x97, 0x7d, 0x5e, 0xb4,
+               0x1b, 0xf1, 0xd2, 0x38, 0x94, 0x7e, 0x5d, 0xb7,
+               0x1e, 0xf4, 0xd7, 0x3d, 0x91, 0x7b, 0x58, 0xb2,
+               0x1d, 0xf7, 0xd4, 0x3e, 0x92, 0x78, 0x5b, 0xb1,
+               0x14, 0xfe, 0xdd, 0x37, 0x9b, 0x71, 0x52, 0xb8,
+               0x17, 0xfd, 0xde, 0x34, 0x98, 0x72, 0x51, 0xbb,
+               0x12, 0xf8, 0xdb, 0x31, 0x9d, 0x77, 0x54, 0xbe,
+               0x11, 0xfb, 0xd8, 0x32, 0x9e, 0x74, 0x57, 0xbd,
+               0x30, 0xda, 0xf9, 0x13, 0xbf, 0x55, 0x76, 0x9c,
+               0x33, 0xd9, 0xfa, 0x10, 0xbc, 0x56, 0x75, 0x9f,
+               0x36, 0xdc, 0xff, 0x15, 0xb9, 0x53, 0x70, 0x9a,
+               0x35, 0xdf, 0xfc, 0x16, 0xba, 0x50, 0x73, 0x99,
+               0x3c, 0xd6, 0xf5, 0x1f, 0xb3, 0x59, 0x7a, 0x90,
+               0x3f, 0xd5, 0xf6, 0x1c, 0xb0, 0x5a, 0x79, 0x93,
+               0x3a, 0xd0, 0xf3, 0x19, 0xb5, 0x5f, 0x7c, 0x96,
+               0x39, 0xd3, 0xf0, 0x1a, 0xb6, 0x5c, 0x7f, 0x95,
+               0x28, 0xc2, 0xe1, 0x0b, 0xa7, 0x4d, 0x6e, 0x84,
+               0x2b, 0xc1, 0xe2, 0x08, 0xa4, 0x4e, 0x6d, 0x87,
+               0x2e, 0xc4, 0xe7, 0x0d, 0xa1, 0x4b, 0x68, 0x82,
+               0x2d, 0xc7, 0xe4, 0x0e, 0xa2, 0x48, 0x6b, 0x81,
+               0x24, 0xce, 0xed, 0x07, 0xab, 0x41, 0x62, 0x88,
+               0x27, 0xcd, 0xee, 0x04, 0xa8, 0x42, 0x61, 0x8b,
+               0x22, 0xc8, 0xeb, 0x01, 0xad, 0x47, 0x64, 0x8e,
+               0x21, 0xcb, 0xe8, 0x02, 0xae, 0x44, 0x67, 0x8d,
+       },
+       {
+               0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab,
+               0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0,
+               0x16, 0xfd, 0xdd, 0x36, 0x9d, 0x76, 0x56, 0xbd,
+               0x1d, 0xf6, 0xd6, 0x3d, 0x96, 0x7d, 0x5d, 0xb6,
+               0x2c, 0xc7, 0xe7, 0x0c, 0xa7, 0x4c, 0x6c, 0x87,
+               0x27, 0xcc, 0xec, 0x07, 0xac, 0x47, 0x67, 0x8c,
+               0x3a, 0xd1, 0xf1, 0x1a, 0xb1, 0x5a, 0x7a, 0x91,
+               0x31, 0xda, 0xfa, 0x11, 0xba, 0x51, 0x71, 0x9a,
+               0x58, 0xb3, 0x93, 0x78, 0xd3, 0x38, 0x18, 0xf3,
+               0x53, 0xb8, 0x98, 0x73, 0xd8, 0x33, 0x13, 0xf8,
+               0x4e, 0xa5, 0x85, 0x6e, 0xc5, 0x2e, 0x0e, 0xe5,
+               0x45, 0xae, 0x8e, 0x65, 0xce, 0x25, 0x05, 0xee,
+               0x74, 0x9f, 0xbf, 0x54, 0xff, 0x14, 0x34, 0xdf,
+               0x7f, 0x94, 0xb4, 0x5f, 0xf4, 0x1f, 0x3f, 0xd4,
+               0x62, 0x89, 0xa9, 0x42, 0xe9, 0x02, 0x22, 0xc9,
+               0x69, 0x82, 0xa2, 0x49, 0xe2, 0x09, 0x29, 0xc2,
+               0xb0, 0x5b, 0x7b, 0x90, 0x3b, 0xd0, 0xf0, 0x1b,
+               0xbb, 0x50, 0x70, 0x9b, 0x30, 0xdb, 0xfb, 0x10,
+               0xa6, 0x4d, 0x6d, 0x86, 0x2d, 0xc6, 0xe6, 0x0d,
+               0xad, 0x46, 0x66, 0x8d, 0x26, 0xcd, 0xed, 0x06,
+               0x9c, 0x77, 0x57, 0xbc, 0x17, 0xfc, 0xdc, 0x37,
+               0x97, 0x7c, 0x5c, 0xb7, 0x1c, 0xf7, 0xd7, 0x3c,
+               0x8a, 0x61, 0x41, 0xaa, 0x01, 0xea, 0xca, 0x21,
+               0x81, 0x6a, 0x4a, 0xa1, 0x0a, 0xe1, 0xc1, 0x2a,
+               0xe8, 0x03, 0x23, 0xc8, 0x63, 0x88, 0xa8, 0x43,
+               0xe3, 0x08, 0x28, 0xc3, 0x68, 0x83, 0xa3, 0x48,
+               0xfe, 0x15, 0x35, 0xde, 0x75, 0x9e, 0xbe, 0x55,
+               0xf5, 0x1e, 0x3e, 0xd5, 0x7e, 0x95, 0xb5, 0x5e,
+               0xc4, 0x2f, 0x0f, 0xe4, 0x4f, 0xa4, 0x84, 0x6f,
+               0xcf, 0x24, 0x04, 0xef, 0x44, 0xaf, 0x8f, 0x64,
+               0xd2, 0x39, 0x19, 0xf2, 0x59, 0xb2, 0x92, 0x79,
+               0xd9, 0x32, 0x12, 0xf9, 0x52, 0xb9, 0x99, 0x72,
+       },
+       {
+               0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe,
+               0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d,
+               0x66, 0x8a, 0xa3, 0x4f, 0xf1, 0x1d, 0x34, 0xd8,
+               0x55, 0xb9, 0x90, 0x7c, 0xc2, 0x2e, 0x07, 0xeb,
+               0xcc, 0x20, 0x09, 0xe5, 0x5b, 0xb7, 0x9e, 0x72,
+               0xff, 0x13, 0x3a, 0xd6, 0x68, 0x84, 0xad, 0x41,
+               0xaa, 0x46, 0x6f, 0x83, 0x3d, 0xd1, 0xf8, 0x14,
+               0x99, 0x75, 0x5c, 0xb0, 0x0e, 0xe2, 0xcb, 0x27,
+               0x85, 0x69, 0x40, 0xac, 0x12, 0xfe, 0xd7, 0x3b,
+               0xb6, 0x5a, 0x73, 0x9f, 0x21, 0xcd, 0xe4, 0x08,
+               0xe3, 0x0f, 0x26, 0xca, 0x74, 0x98, 0xb1, 0x5d,
+               0xd0, 0x3c, 0x15, 0xf9, 0x47, 0xab, 0x82, 0x6e,
+               0x49, 0xa5, 0x8c, 0x60, 0xde, 0x32, 0x1b, 0xf7,
+               0x7a, 0x96, 0xbf, 0x53, 0xed, 0x01, 0x28, 0xc4,
+               0x2f, 0xc3, 0xea, 0x06, 0xb8, 0x54, 0x7d, 0x91,
+               0x1c, 0xf0, 0xd9, 0x35, 0x8b, 0x67, 0x4e, 0xa2,
+               0x17, 0xfb, 0xd2, 0x3e, 0x80, 0x6c, 0x45, 0xa9,
+               0x24, 0xc8, 0xe1, 0x0d, 0xb3, 0x5f, 0x76, 0x9a,
+               0x71, 0x9d, 0xb4, 0x58, 0xe6, 0x0a, 0x23, 0xcf,
+               0x42, 0xae, 0x87, 0x6b, 0xd5, 0x39, 0x10, 0xfc,
+               0xdb, 0x37, 0x1e, 0xf2, 0x4c, 0xa0, 0x89, 0x65,
+               0xe8, 0x04, 0x2d, 0xc1, 0x7f, 0x93, 0xba, 0x56,
+               0xbd, 0x51, 0x78, 0x94, 0x2a, 0xc6, 0xef, 0x03,
+               0x8e, 0x62, 0x4b, 0xa7, 0x19, 0xf5, 0xdc, 0x30,
+               0x92, 0x7e, 0x57, 0xbb, 0x05, 0xe9, 0xc0, 0x2c,
+               0xa1, 0x4d, 0x64, 0x88, 0x36, 0xda, 0xf3, 0x1f,
+               0xf4, 0x18, 0x31, 0xdd, 0x63, 0x8f, 0xa6, 0x4a,
+               0xc7, 0x2b, 0x02, 0xee, 0x50, 0xbc, 0x95, 0x79,
+               0x5e, 0xb2, 0x9b, 0x77, 0xc9, 0x25, 0x0c, 0xe0,
+               0x6d, 0x81, 0xa8, 0x44, 0xfa, 0x16, 0x3f, 0xd3,
+               0x38, 0xd4, 0xfd, 0x11, 0xaf, 0x43, 0x6a, 0x86,
+               0x0b, 0xe7, 0xce, 0x22, 0x9c, 0x70, 0x59, 0xb5,
+       },
+       {
+               0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9,
+               0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82,
+               0x76, 0x9b, 0xb1, 0x5c, 0xe5, 0x08, 0x22, 0xcf,
+               0x4d, 0xa0, 0x8a, 0x67, 0xde, 0x33, 0x19, 0xf4,
+               0xec, 0x01, 0x2b, 0xc6, 0x7f, 0x92, 0xb8, 0x55,
+               0xd7, 0x3a, 0x10, 0xfd, 0x44, 0xa9, 0x83, 0x6e,
+               0x9a, 0x77, 0x5d, 0xb0, 0x09, 0xe4, 0xce, 0x23,
+               0xa1, 0x4c, 0x66, 0x8b, 0x32, 0xdf, 0xf5, 0x18,
+               0xc5, 0x28, 0x02, 0xef, 0x56, 0xbb, 0x91, 0x7c,
+               0xfe, 0x13, 0x39, 0xd4, 0x6d, 0x80, 0xaa, 0x47,
+               0xb3, 0x5e, 0x74, 0x99, 0x20, 0xcd, 0xe7, 0x0a,
+               0x88, 0x65, 0x4f, 0xa2, 0x1b, 0xf6, 0xdc, 0x31,
+               0x29, 0xc4, 0xee, 0x03, 0xba, 0x57, 0x7d, 0x90,
+               0x12, 0xff, 0xd5, 0x38, 0x81, 0x6c, 0x46, 0xab,
+               0x5f, 0xb2, 0x98, 0x75, 0xcc, 0x21, 0x0b, 0xe6,
+               0x64, 0x89, 0xa3, 0x4e, 0xf7, 0x1a, 0x30, 0xdd,
+               0x97, 0x7a, 0x50, 0xbd, 0x04, 0xe9, 0xc3, 0x2e,
+               0xac, 0x41, 0x6b, 0x86, 0x3f, 0xd2, 0xf8, 0x15,
+               0xe1, 0x0c, 0x26, 0xcb, 0x72, 0x9f, 0xb5, 0x58,
+               0xda, 0x37, 0x1d, 0xf0, 0x49, 0xa4, 0x8e, 0x63,
+               0x7b, 0x96, 0xbc, 0x51, 0xe8, 0x05, 0x2f, 0xc2,
+               0x40, 0xad, 0x87, 0x6a, 0xd3, 0x3e, 0x14, 0xf9,
+               0x0d, 0xe0, 0xca, 0x27, 0x9e, 0x73, 0x59, 0xb4,
+               0x36, 0xdb, 0xf1, 0x1c, 0xa5, 0x48, 0x62, 0x8f,
+               0x52, 0xbf, 0x95, 0x78, 0xc1, 0x2c, 0x06, 0xeb,
+               0x69, 0x84, 0xae, 0x43, 0xfa, 0x17, 0x3d, 0xd0,
+               0x24, 0xc9, 0xe3, 0x0e, 0xb7, 0x5a, 0x70, 0x9d,
+               0x1f, 0xf2, 0xd8, 0x35, 0x8c, 0x61, 0x4b, 0xa6,
+               0xbe, 0x53, 0x79, 0x94, 0x2d, 0xc0, 0xea, 0x07,
+               0x85, 0x68, 0x42, 0xaf, 0x16, 0xfb, 0xd1, 0x3c,
+               0xc8, 0x25, 0x0f, 0xe2, 0x5b, 0xb6, 0x9c, 0x71,
+               0xf3, 0x1e, 0x34, 0xd9, 0x60, 0x8d, 0xa7, 0x4a,
+       },
+       {
+               0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0,
+               0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93,
+               0x46, 0xa8, 0x87, 0x69, 0xd9, 0x37, 0x18, 0xf6,
+               0x65, 0x8b, 0xa4, 0x4a, 0xfa, 0x14, 0x3b, 0xd5,
+               0x8c, 0x62, 0x4d, 0xa3, 0x13, 0xfd, 0xd2, 0x3c,
+               0xaf, 0x41, 0x6e, 0x80, 0x30, 0xde, 0xf1, 0x1f,
+               0xca, 0x24, 0x0b, 0xe5, 0x55, 0xbb, 0x94, 0x7a,
+               0xe9, 0x07, 0x28, 0xc6, 0x76, 0x98, 0xb7, 0x59,
+               0x05, 0xeb, 0xc4, 0x2a, 0x9a, 0x74, 0x5b, 0xb5,
+               0x26, 0xc8, 0xe7, 0x09, 0xb9, 0x57, 0x78, 0x96,
+               0x43, 0xad, 0x82, 0x6c, 0xdc, 0x32, 0x1d, 0xf3,
+               0x60, 0x8e, 0xa1, 0x4f, 0xff, 0x11, 0x3e, 0xd0,
+               0x89, 0x67, 0x48, 0xa6, 0x16, 0xf8, 0xd7, 0x39,
+               0xaa, 0x44, 0x6b, 0x85, 0x35, 0xdb, 0xf4, 0x1a,
+               0xcf, 0x21, 0x0e, 0xe0, 0x50, 0xbe, 0x91, 0x7f,
+               0xec, 0x02, 0x2d, 0xc3, 0x73, 0x9d, 0xb2, 0x5c,
+               0x0a, 0xe4, 0xcb, 0x25, 0x95, 0x7b, 0x54, 0xba,
+               0x29, 0xc7, 0xe8, 0x06, 0xb6, 0x58, 0x77, 0x99,
+               0x4c, 0xa2, 0x8d, 0x63, 0xd3, 0x3d, 0x12, 0xfc,
+               0x6f, 0x81, 0xae, 0x40, 0xf0, 0x1e, 0x31, 0xdf,
+               0x86, 0x68, 0x47, 0xa9, 0x19, 0xf7, 0xd8, 0x36,
+               0xa5, 0x4b, 0x64, 0x8a, 0x3a, 0xd4, 0xfb, 0x15,
+               0xc0, 0x2e, 0x01, 0xef, 0x5f, 0xb1, 0x9e, 0x70,
+               0xe3, 0x0d, 0x22, 0xcc, 0x7c, 0x92, 0xbd, 0x53,
+               0x0f, 0xe1, 0xce, 0x20, 0x90, 0x7e, 0x51, 0xbf,
+               0x2c, 0xc2, 0xed, 0x03, 0xb3, 0x5d, 0x72, 0x9c,
+               0x49, 0xa7, 0x88, 0x66, 0xd6, 0x38, 0x17, 0xf9,
+               0x6a, 0x84, 0xab, 0x45, 0xf5, 0x1b, 0x34, 0xda,
+               0x83, 0x6d, 0x42, 0xac, 0x1c, 0xf2, 0xdd, 0x33,
+               0xa0, 0x4e, 0x61, 0x8f, 0x3f, 0xd1, 0xfe, 0x10,
+               0xc5, 0x2b, 0x04, 0xea, 0x5a, 0xb4, 0x9b, 0x75,
+               0xe6, 0x08, 0x27, 0xc9, 0x79, 0x97, 0xb8, 0x56,
+       },
+       {
+               0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7,
+               0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c,
+               0x56, 0xb9, 0x95, 0x7a, 0xcd, 0x22, 0x0e, 0xe1,
+               0x7d, 0x92, 0xbe, 0x51, 0xe6, 0x09, 0x25, 0xca,
+               0xac, 0x43, 0x6f, 0x80, 0x37, 0xd8, 0xf4, 0x1b,
+               0x87, 0x68, 0x44, 0xab, 0x1c, 0xf3, 0xdf, 0x30,
+               0xfa, 0x15, 0x39, 0xd6, 0x61, 0x8e, 0xa2, 0x4d,
+               0xd1, 0x3e, 0x12, 0xfd, 0x4a, 0xa5, 0x89, 0x66,
+               0x45, 0xaa, 0x86, 0x69, 0xde, 0x31, 0x1d, 0xf2,
+               0x6e, 0x81, 0xad, 0x42, 0xf5, 0x1a, 0x36, 0xd9,
+               0x13, 0xfc, 0xd0, 0x3f, 0x88, 0x67, 0x4b, 0xa4,
+               0x38, 0xd7, 0xfb, 0x14, 0xa3, 0x4c, 0x60, 0x8f,
+               0xe9, 0x06, 0x2a, 0xc5, 0x72, 0x9d, 0xb1, 0x5e,
+               0xc2, 0x2d, 0x01, 0xee, 0x59, 0xb6, 0x9a, 0x75,
+               0xbf, 0x50, 0x7c, 0x93, 0x24, 0xcb, 0xe7, 0x08,
+               0x94, 0x7b, 0x57, 0xb8, 0x0f, 0xe0, 0xcc, 0x23,
+               0x8a, 0x65, 0x49, 0xa6, 0x11, 0xfe, 0xd2, 0x3d,
+               0xa1, 0x4e, 0x62, 0x8d, 0x3a, 0xd5, 0xf9, 0x16,
+               0xdc, 0x33, 0x1f, 0xf0, 0x47, 0xa8, 0x84, 0x6b,
+               0xf7, 0x18, 0x34, 0xdb, 0x6c, 0x83, 0xaf, 0x40,
+               0x26, 0xc9, 0xe5, 0x0a, 0xbd, 0x52, 0x7e, 0x91,
+               0x0d, 0xe2, 0xce, 0x21, 0x96, 0x79, 0x55, 0xba,
+               0x70, 0x9f, 0xb3, 0x5c, 0xeb, 0x04, 0x28, 0xc7,
+               0x5b, 0xb4, 0x98, 0x77, 0xc0, 0x2f, 0x03, 0xec,
+               0xcf, 0x20, 0x0c, 0xe3, 0x54, 0xbb, 0x97, 0x78,
+               0xe4, 0x0b, 0x27, 0xc8, 0x7f, 0x90, 0xbc, 0x53,
+               0x99, 0x76, 0x5a, 0xb5, 0x02, 0xed, 0xc1, 0x2e,
+               0xb2, 0x5d, 0x71, 0x9e, 0x29, 0xc6, 0xea, 0x05,
+               0x63, 0x8c, 0xa0, 0x4f, 0xf8, 0x17, 0x3b, 0xd4,
+               0x48, 0xa7, 0x8b, 0x64, 0xd3, 0x3c, 0x10, 0xff,
+               0x35, 0xda, 0xf6, 0x19, 0xae, 0x41, 0x6d, 0x82,
+               0x1e, 0xf1, 0xdd, 0x32, 0x85, 0x6a, 0x46, 0xa9,
+       },
+       {
+               0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea,
+               0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39,
+               0xbb, 0x4b, 0x46, 0xb6, 0x5c, 0xac, 0xa1, 0x51,
+               0x68, 0x98, 0x95, 0x65, 0x8f, 0x7f, 0x72, 0x82,
+               0x6b, 0x9b, 0x96, 0x66, 0x8c, 0x7c, 0x71, 0x81,
+               0xb8, 0x48, 0x45, 0xb5, 0x5f, 0xaf, 0xa2, 0x52,
+               0xd0, 0x20, 0x2d, 0xdd, 0x37, 0xc7, 0xca, 0x3a,
+               0x03, 0xf3, 0xfe, 0x0e, 0xe4, 0x14, 0x19, 0xe9,
+               0xd6, 0x26, 0x2b, 0xdb, 0x31, 0xc1, 0xcc, 0x3c,
+               0x05, 0xf5, 0xf8, 0x08, 0xe2, 0x12, 0x1f, 0xef,
+               0x6d, 0x9d, 0x90, 0x60, 0x8a, 0x7a, 0x77, 0x87,
+               0xbe, 0x4e, 0x43, 0xb3, 0x59, 0xa9, 0xa4, 0x54,
+               0xbd, 0x4d, 0x40, 0xb0, 0x5a, 0xaa, 0xa7, 0x57,
+               0x6e, 0x9e, 0x93, 0x63, 0x89, 0x79, 0x74, 0x84,
+               0x06, 0xf6, 0xfb, 0x0b, 0xe1, 0x11, 0x1c, 0xec,
+               0xd5, 0x25, 0x28, 0xd8, 0x32, 0xc2, 0xcf, 0x3f,
+               0xb1, 0x41, 0x4c, 0xbc, 0x56, 0xa6, 0xab, 0x5b,
+               0x62, 0x92, 0x9f, 0x6f, 0x85, 0x75, 0x78, 0x88,
+               0x0a, 0xfa, 0xf7, 0x07, 0xed, 0x1d, 0x10, 0xe0,
+               0xd9, 0x29, 0x24, 0xd4, 0x3e, 0xce, 0xc3, 0x33,
+               0xda, 0x2a, 0x27, 0xd7, 0x3d, 0xcd, 0xc0, 0x30,
+               0x09, 0xf9, 0xf4, 0x04, 0xee, 0x1e, 0x13, 0xe3,
+               0x61, 0x91, 0x9c, 0x6c, 0x86, 0x76, 0x7b, 0x8b,
+               0xb2, 0x42, 0x4f, 0xbf, 0x55, 0xa5, 0xa8, 0x58,
+               0x67, 0x97, 0x9a, 0x6a, 0x80, 0x70, 0x7d, 0x8d,
+               0xb4, 0x44, 0x49, 0xb9, 0x53, 0xa3, 0xae, 0x5e,
+               0xdc, 0x2c, 0x21, 0xd1, 0x3b, 0xcb, 0xc6, 0x36,
+               0x0f, 0xff, 0xf2, 0x02, 0xe8, 0x18, 0x15, 0xe5,
+               0x0c, 0xfc, 0xf1, 0x01, 0xeb, 0x1b, 0x16, 0xe6,
+               0xdf, 0x2f, 0x22, 0xd2, 0x38, 0xc8, 0xc5, 0x35,
+               0xb7, 0x47, 0x4a, 0xba, 0x50, 0xa0, 0xad, 0x5d,
+               0x64, 0x94, 0x99, 0x69, 0x83, 0x73, 0x7e, 0x8e,
+       },
+       {
+               0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed,
+               0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36,
+               0xab, 0x5a, 0x54, 0xa5, 0x48, 0xb9, 0xb7, 0x46,
+               0x70, 0x81, 0x8f, 0x7e, 0x93, 0x62, 0x6c, 0x9d,
+               0x4b, 0xba, 0xb4, 0x45, 0xa8, 0x59, 0x57, 0xa6,
+               0x90, 0x61, 0x6f, 0x9e, 0x73, 0x82, 0x8c, 0x7d,
+               0xe0, 0x11, 0x1f, 0xee, 0x03, 0xf2, 0xfc, 0x0d,
+               0x3b, 0xca, 0xc4, 0x35, 0xd8, 0x29, 0x27, 0xd6,
+               0x96, 0x67, 0x69, 0x98, 0x75, 0x84, 0x8a, 0x7b,
+               0x4d, 0xbc, 0xb2, 0x43, 0xae, 0x5f, 0x51, 0xa0,
+               0x3d, 0xcc, 0xc2, 0x33, 0xde, 0x2f, 0x21, 0xd0,
+               0xe6, 0x17, 0x19, 0xe8, 0x05, 0xf4, 0xfa, 0x0b,
+               0xdd, 0x2c, 0x22, 0xd3, 0x3e, 0xcf, 0xc1, 0x30,
+               0x06, 0xf7, 0xf9, 0x08, 0xe5, 0x14, 0x1a, 0xeb,
+               0x76, 0x87, 0x89, 0x78, 0x95, 0x64, 0x6a, 0x9b,
+               0xad, 0x5c, 0x52, 0xa3, 0x4e, 0xbf, 0xb1, 0x40,
+               0x31, 0xc0, 0xce, 0x3f, 0xd2, 0x23, 0x2d, 0xdc,
+               0xea, 0x1b, 0x15, 0xe4, 0x09, 0xf8, 0xf6, 0x07,
+               0x9a, 0x6b, 0x65, 0x94, 0x79, 0x88, 0x86, 0x77,
+               0x41, 0xb0, 0xbe, 0x4f, 0xa2, 0x53, 0x5d, 0xac,
+               0x7a, 0x8b, 0x85, 0x74, 0x99, 0x68, 0x66, 0x97,
+               0xa1, 0x50, 0x5e, 0xaf, 0x42, 0xb3, 0xbd, 0x4c,
+               0xd1, 0x20, 0x2e, 0xdf, 0x32, 0xc3, 0xcd, 0x3c,
+               0x0a, 0xfb, 0xf5, 0x04, 0xe9, 0x18, 0x16, 0xe7,
+               0xa7, 0x56, 0x58, 0xa9, 0x44, 0xb5, 0xbb, 0x4a,
+               0x7c, 0x8d, 0x83, 0x72, 0x9f, 0x6e, 0x60, 0x91,
+               0x0c, 0xfd, 0xf3, 0x02, 0xef, 0x1e, 0x10, 0xe1,
+               0xd7, 0x26, 0x28, 0xd9, 0x34, 0xc5, 0xcb, 0x3a,
+               0xec, 0x1d, 0x13, 0xe2, 0x0f, 0xfe, 0xf0, 0x01,
+               0x37, 0xc6, 0xc8, 0x39, 0xd4, 0x25, 0x2b, 0xda,
+               0x47, 0xb6, 0xb8, 0x49, 0xa4, 0x55, 0x5b, 0xaa,
+               0x9c, 0x6d, 0x63, 0x92, 0x7f, 0x8e, 0x80, 0x71,
+       },
+       {
+               0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4,
+               0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27,
+               0x9b, 0x69, 0x62, 0x90, 0x74, 0x86, 0x8d, 0x7f,
+               0x58, 0xaa, 0xa1, 0x53, 0xb7, 0x45, 0x4e, 0xbc,
+               0x2b, 0xd9, 0xd2, 0x20, 0xc4, 0x36, 0x3d, 0xcf,
+               0xe8, 0x1a, 0x11, 0xe3, 0x07, 0xf5, 0xfe, 0x0c,
+               0xb0, 0x42, 0x49, 0xbb, 0x5f, 0xad, 0xa6, 0x54,
+               0x73, 0x81, 0x8a, 0x78, 0x9c, 0x6e, 0x65, 0x97,
+               0x56, 0xa4, 0xaf, 0x5d, 0xb9, 0x4b, 0x40, 0xb2,
+               0x95, 0x67, 0x6c, 0x9e, 0x7a, 0x88, 0x83, 0x71,
+               0xcd, 0x3f, 0x34, 0xc6, 0x22, 0xd0, 0xdb, 0x29,
+               0x0e, 0xfc, 0xf7, 0x05, 0xe1, 0x13, 0x18, 0xea,
+               0x7d, 0x8f, 0x84, 0x76, 0x92, 0x60, 0x6b, 0x99,
+               0xbe, 0x4c, 0x47, 0xb5, 0x51, 0xa3, 0xa8, 0x5a,
+               0xe6, 0x14, 0x1f, 0xed, 0x09, 0xfb, 0xf0, 0x02,
+               0x25, 0xd7, 0xdc, 0x2e, 0xca, 0x38, 0x33, 0xc1,
+               0xac, 0x5e, 0x55, 0xa7, 0x43, 0xb1, 0xba, 0x48,
+               0x6f, 0x9d, 0x96, 0x64, 0x80, 0x72, 0x79, 0x8b,
+               0x37, 0xc5, 0xce, 0x3c, 0xd8, 0x2a, 0x21, 0xd3,
+               0xf4, 0x06, 0x0d, 0xff, 0x1b, 0xe9, 0xe2, 0x10,
+               0x87, 0x75, 0x7e, 0x8c, 0x68, 0x9a, 0x91, 0x63,
+               0x44, 0xb6, 0xbd, 0x4f, 0xab, 0x59, 0x52, 0xa0,
+               0x1c, 0xee, 0xe5, 0x17, 0xf3, 0x01, 0x0a, 0xf8,
+               0xdf, 0x2d, 0x26, 0xd4, 0x30, 0xc2, 0xc9, 0x3b,
+               0xfa, 0x08, 0x03, 0xf1, 0x15, 0xe7, 0xec, 0x1e,
+               0x39, 0xcb, 0xc0, 0x32, 0xd6, 0x24, 0x2f, 0xdd,
+               0x61, 0x93, 0x98, 0x6a, 0x8e, 0x7c, 0x77, 0x85,
+               0xa2, 0x50, 0x5b, 0xa9, 0x4d, 0xbf, 0xb4, 0x46,
+               0xd1, 0x23, 0x28, 0xda, 0x3e, 0xcc, 0xc7, 0x35,
+               0x12, 0xe0, 0xeb, 0x19, 0xfd, 0x0f, 0x04, 0xf6,
+               0x4a, 0xb8, 0xb3, 0x41, 0xa5, 0x57, 0x5c, 0xae,
+               0x89, 0x7b, 0x70, 0x82, 0x66, 0x94, 0x9f, 0x6d,
+       },
+       {
+               0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3,
+               0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28,
+               0x8b, 0x78, 0x70, 0x83, 0x60, 0x93, 0x9b, 0x68,
+               0x40, 0xb3, 0xbb, 0x48, 0xab, 0x58, 0x50, 0xa3,
+               0x0b, 0xf8, 0xf0, 0x03, 0xe0, 0x13, 0x1b, 0xe8,
+               0xc0, 0x33, 0x3b, 0xc8, 0x2b, 0xd8, 0xd0, 0x23,
+               0x80, 0x73, 0x7b, 0x88, 0x6b, 0x98, 0x90, 0x63,
+               0x4b, 0xb8, 0xb0, 0x43, 0xa0, 0x53, 0x5b, 0xa8,
+               0x16, 0xe5, 0xed, 0x1e, 0xfd, 0x0e, 0x06, 0xf5,
+               0xdd, 0x2e, 0x26, 0xd5, 0x36, 0xc5, 0xcd, 0x3e,
+               0x9d, 0x6e, 0x66, 0x95, 0x76, 0x85, 0x8d, 0x7e,
+               0x56, 0xa5, 0xad, 0x5e, 0xbd, 0x4e, 0x46, 0xb5,
+               0x1d, 0xee, 0xe6, 0x15, 0xf6, 0x05, 0x0d, 0xfe,
+               0xd6, 0x25, 0x2d, 0xde, 0x3d, 0xce, 0xc6, 0x35,
+               0x96, 0x65, 0x6d, 0x9e, 0x7d, 0x8e, 0x86, 0x75,
+               0x5d, 0xae, 0xa6, 0x55, 0xb6, 0x45, 0x4d, 0xbe,
+               0x2c, 0xdf, 0xd7, 0x24, 0xc7, 0x34, 0x3c, 0xcf,
+               0xe7, 0x14, 0x1c, 0xef, 0x0c, 0xff, 0xf7, 0x04,
+               0xa7, 0x54, 0x5c, 0xaf, 0x4c, 0xbf, 0xb7, 0x44,
+               0x6c, 0x9f, 0x97, 0x64, 0x87, 0x74, 0x7c, 0x8f,
+               0x27, 0xd4, 0xdc, 0x2f, 0xcc, 0x3f, 0x37, 0xc4,
+               0xec, 0x1f, 0x17, 0xe4, 0x07, 0xf4, 0xfc, 0x0f,
+               0xac, 0x5f, 0x57, 0xa4, 0x47, 0xb4, 0xbc, 0x4f,
+               0x67, 0x94, 0x9c, 0x6f, 0x8c, 0x7f, 0x77, 0x84,
+               0x3a, 0xc9, 0xc1, 0x32, 0xd1, 0x22, 0x2a, 0xd9,
+               0xf1, 0x02, 0x0a, 0xf9, 0x1a, 0xe9, 0xe1, 0x12,
+               0xb1, 0x42, 0x4a, 0xb9, 0x5a, 0xa9, 0xa1, 0x52,
+               0x7a, 0x89, 0x81, 0x72, 0x91, 0x62, 0x6a, 0x99,
+               0x31, 0xc2, 0xca, 0x39, 0xda, 0x29, 0x21, 0xd2,
+               0xfa, 0x09, 0x01, 0xf2, 0x11, 0xe2, 0xea, 0x19,
+               0xba, 0x49, 0x41, 0xb2, 0x51, 0xa2, 0xaa, 0x59,
+               0x71, 0x82, 0x8a, 0x79, 0x9a, 0x69, 0x61, 0x92,
+       },
+       {
+               0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6,
+               0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05,
+               0xfb, 0x0f, 0x0e, 0xfa, 0x0c, 0xf8, 0xf9, 0x0d,
+               0x08, 0xfc, 0xfd, 0x09, 0xff, 0x0b, 0x0a, 0xfe,
+               0xeb, 0x1f, 0x1e, 0xea, 0x1c, 0xe8, 0xe9, 0x1d,
+               0x18, 0xec, 0xed, 0x19, 0xef, 0x1b, 0x1a, 0xee,
+               0x10, 0xe4, 0xe5, 0x11, 0xe7, 0x13, 0x12, 0xe6,
+               0xe3, 0x17, 0x16, 0xe2, 0x14, 0xe0, 0xe1, 0x15,
+               0xcb, 0x3f, 0x3e, 0xca, 0x3c, 0xc8, 0xc9, 0x3d,
+               0x38, 0xcc, 0xcd, 0x39, 0xcf, 0x3b, 0x3a, 0xce,
+               0x30, 0xc4, 0xc5, 0x31, 0xc7, 0x33, 0x32, 0xc6,
+               0xc3, 0x37, 0x36, 0xc2, 0x34, 0xc0, 0xc1, 0x35,
+               0x20, 0xd4, 0xd5, 0x21, 0xd7, 0x23, 0x22, 0xd6,
+               0xd3, 0x27, 0x26, 0xd2, 0x24, 0xd0, 0xd1, 0x25,
+               0xdb, 0x2f, 0x2e, 0xda, 0x2c, 0xd8, 0xd9, 0x2d,
+               0x28, 0xdc, 0xdd, 0x29, 0xdf, 0x2b, 0x2a, 0xde,
+               0x8b, 0x7f, 0x7e, 0x8a, 0x7c, 0x88, 0x89, 0x7d,
+               0x78, 0x8c, 0x8d, 0x79, 0x8f, 0x7b, 0x7a, 0x8e,
+               0x70, 0x84, 0x85, 0x71, 0x87, 0x73, 0x72, 0x86,
+               0x83, 0x77, 0x76, 0x82, 0x74, 0x80, 0x81, 0x75,
+               0x60, 0x94, 0x95, 0x61, 0x97, 0x63, 0x62, 0x96,
+               0x93, 0x67, 0x66, 0x92, 0x64, 0x90, 0x91, 0x65,
+               0x9b, 0x6f, 0x6e, 0x9a, 0x6c, 0x98, 0x99, 0x6d,
+               0x68, 0x9c, 0x9d, 0x69, 0x9f, 0x6b, 0x6a, 0x9e,
+               0x40, 0xb4, 0xb5, 0x41, 0xb7, 0x43, 0x42, 0xb6,
+               0xb3, 0x47, 0x46, 0xb2, 0x44, 0xb0, 0xb1, 0x45,
+               0xbb, 0x4f, 0x4e, 0xba, 0x4c, 0xb8, 0xb9, 0x4d,
+               0x48, 0xbc, 0xbd, 0x49, 0xbf, 0x4b, 0x4a, 0xbe,
+               0xab, 0x5f, 0x5e, 0xaa, 0x5c, 0xa8, 0xa9, 0x5d,
+               0x58, 0xac, 0xad, 0x59, 0xaf, 0x5b, 0x5a, 0xae,
+               0x50, 0xa4, 0xa5, 0x51, 0xa7, 0x53, 0x52, 0xa6,
+               0xa3, 0x57, 0x56, 0xa2, 0x54, 0xa0, 0xa1, 0x55,
+       },
+       {
+               0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+               0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a,
+               0xeb, 0x1e, 0x1c, 0xe9, 0x18, 0xed, 0xef, 0x1a,
+               0x10, 0xe5, 0xe7, 0x12, 0xe3, 0x16, 0x14, 0xe1,
+               0xcb, 0x3e, 0x3c, 0xc9, 0x38, 0xcd, 0xcf, 0x3a,
+               0x30, 0xc5, 0xc7, 0x32, 0xc3, 0x36, 0x34, 0xc1,
+               0x20, 0xd5, 0xd7, 0x22, 0xd3, 0x26, 0x24, 0xd1,
+               0xdb, 0x2e, 0x2c, 0xd9, 0x28, 0xdd, 0xdf, 0x2a,
+               0x8b, 0x7e, 0x7c, 0x89, 0x78, 0x8d, 0x8f, 0x7a,
+               0x70, 0x85, 0x87, 0x72, 0x83, 0x76, 0x74, 0x81,
+               0x60, 0x95, 0x97, 0x62, 0x93, 0x66, 0x64, 0x91,
+               0x9b, 0x6e, 0x6c, 0x99, 0x68, 0x9d, 0x9f, 0x6a,
+               0x40, 0xb5, 0xb7, 0x42, 0xb3, 0x46, 0x44, 0xb1,
+               0xbb, 0x4e, 0x4c, 0xb9, 0x48, 0xbd, 0xbf, 0x4a,
+               0xab, 0x5e, 0x5c, 0xa9, 0x58, 0xad, 0xaf, 0x5a,
+               0x50, 0xa5, 0xa7, 0x52, 0xa3, 0x56, 0x54, 0xa1,
+               0x0b, 0xfe, 0xfc, 0x09, 0xf8, 0x0d, 0x0f, 0xfa,
+               0xf0, 0x05, 0x07, 0xf2, 0x03, 0xf6, 0xf4, 0x01,
+               0xe0, 0x15, 0x17, 0xe2, 0x13, 0xe6, 0xe4, 0x11,
+               0x1b, 0xee, 0xec, 0x19, 0xe8, 0x1d, 0x1f, 0xea,
+               0xc0, 0x35, 0x37, 0xc2, 0x33, 0xc6, 0xc4, 0x31,
+               0x3b, 0xce, 0xcc, 0x39, 0xc8, 0x3d, 0x3f, 0xca,
+               0x2b, 0xde, 0xdc, 0x29, 0xd8, 0x2d, 0x2f, 0xda,
+               0xd0, 0x25, 0x27, 0xd2, 0x23, 0xd6, 0xd4, 0x21,
+               0x80, 0x75, 0x77, 0x82, 0x73, 0x86, 0x84, 0x71,
+               0x7b, 0x8e, 0x8c, 0x79, 0x88, 0x7d, 0x7f, 0x8a,
+               0x6b, 0x9e, 0x9c, 0x69, 0x98, 0x6d, 0x6f, 0x9a,
+               0x90, 0x65, 0x67, 0x92, 0x63, 0x96, 0x94, 0x61,
+               0x4b, 0xbe, 0xbc, 0x49, 0xb8, 0x4d, 0x4f, 0xba,
+               0xb0, 0x45, 0x47, 0xb2, 0x43, 0xb6, 0xb4, 0x41,
+               0xa0, 0x55, 0x57, 0xa2, 0x53, 0xa6, 0xa4, 0x51,
+               0x5b, 0xae, 0xac, 0x59, 0xa8, 0x5d, 0x5f, 0xaa,
+       },
+       {
+               0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8,
+               0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b,
+               0xdb, 0x2d, 0x2a, 0xdc, 0x24, 0xd2, 0xd5, 0x23,
+               0x38, 0xce, 0xc9, 0x3f, 0xc7, 0x31, 0x36, 0xc0,
+               0xab, 0x5d, 0x5a, 0xac, 0x54, 0xa2, 0xa5, 0x53,
+               0x48, 0xbe, 0xb9, 0x4f, 0xb7, 0x41, 0x46, 0xb0,
+               0x70, 0x86, 0x81, 0x77, 0x8f, 0x79, 0x7e, 0x88,
+               0x93, 0x65, 0x62, 0x94, 0x6c, 0x9a, 0x9d, 0x6b,
+               0x4b, 0xbd, 0xba, 0x4c, 0xb4, 0x42, 0x45, 0xb3,
+               0xa8, 0x5e, 0x59, 0xaf, 0x57, 0xa1, 0xa6, 0x50,
+               0x90, 0x66, 0x61, 0x97, 0x6f, 0x99, 0x9e, 0x68,
+               0x73, 0x85, 0x82, 0x74, 0x8c, 0x7a, 0x7d, 0x8b,
+               0xe0, 0x16, 0x11, 0xe7, 0x1f, 0xe9, 0xee, 0x18,
+               0x03, 0xf5, 0xf2, 0x04, 0xfc, 0x0a, 0x0d, 0xfb,
+               0x3b, 0xcd, 0xca, 0x3c, 0xc4, 0x32, 0x35, 0xc3,
+               0xd8, 0x2e, 0x29, 0xdf, 0x27, 0xd1, 0xd6, 0x20,
+               0x96, 0x60, 0x67, 0x91, 0x69, 0x9f, 0x98, 0x6e,
+               0x75, 0x83, 0x84, 0x72, 0x8a, 0x7c, 0x7b, 0x8d,
+               0x4d, 0xbb, 0xbc, 0x4a, 0xb2, 0x44, 0x43, 0xb5,
+               0xae, 0x58, 0x5f, 0xa9, 0x51, 0xa7, 0xa0, 0x56,
+               0x3d, 0xcb, 0xcc, 0x3a, 0xc2, 0x34, 0x33, 0xc5,
+               0xde, 0x28, 0x2f, 0xd9, 0x21, 0xd7, 0xd0, 0x26,
+               0xe6, 0x10, 0x17, 0xe1, 0x19, 0xef, 0xe8, 0x1e,
+               0x05, 0xf3, 0xf4, 0x02, 0xfa, 0x0c, 0x0b, 0xfd,
+               0xdd, 0x2b, 0x2c, 0xda, 0x22, 0xd4, 0xd3, 0x25,
+               0x3e, 0xc8, 0xcf, 0x39, 0xc1, 0x37, 0x30, 0xc6,
+               0x06, 0xf0, 0xf7, 0x01, 0xf9, 0x0f, 0x08, 0xfe,
+               0xe5, 0x13, 0x14, 0xe2, 0x1a, 0xec, 0xeb, 0x1d,
+               0x76, 0x80, 0x87, 0x71, 0x89, 0x7f, 0x78, 0x8e,
+               0x95, 0x63, 0x64, 0x92, 0x6a, 0x9c, 0x9b, 0x6d,
+               0xad, 0x5b, 0x5c, 0xaa, 0x52, 0xa4, 0xa3, 0x55,
+               0x4e, 0xb8, 0xbf, 0x49, 0xb1, 0x47, 0x40, 0xb6,
+       },
+       {
+               0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff,
+               0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14,
+               0xcb, 0x3c, 0x38, 0xcf, 0x30, 0xc7, 0xc3, 0x34,
+               0x20, 0xd7, 0xd3, 0x24, 0xdb, 0x2c, 0x28, 0xdf,
+               0x8b, 0x7c, 0x78, 0x8f, 0x70, 0x87, 0x83, 0x74,
+               0x60, 0x97, 0x93, 0x64, 0x9b, 0x6c, 0x68, 0x9f,
+               0x40, 0xb7, 0xb3, 0x44, 0xbb, 0x4c, 0x48, 0xbf,
+               0xab, 0x5c, 0x58, 0xaf, 0x50, 0xa7, 0xa3, 0x54,
+               0x0b, 0xfc, 0xf8, 0x0f, 0xf0, 0x07, 0x03, 0xf4,
+               0xe0, 0x17, 0x13, 0xe4, 0x1b, 0xec, 0xe8, 0x1f,
+               0xc0, 0x37, 0x33, 0xc4, 0x3b, 0xcc, 0xc8, 0x3f,
+               0x2b, 0xdc, 0xd8, 0x2f, 0xd0, 0x27, 0x23, 0xd4,
+               0x80, 0x77, 0x73, 0x84, 0x7b, 0x8c, 0x88, 0x7f,
+               0x6b, 0x9c, 0x98, 0x6f, 0x90, 0x67, 0x63, 0x94,
+               0x4b, 0xbc, 0xb8, 0x4f, 0xb0, 0x47, 0x43, 0xb4,
+               0xa0, 0x57, 0x53, 0xa4, 0x5b, 0xac, 0xa8, 0x5f,
+               0x16, 0xe1, 0xe5, 0x12, 0xed, 0x1a, 0x1e, 0xe9,
+               0xfd, 0x0a, 0x0e, 0xf9, 0x06, 0xf1, 0xf5, 0x02,
+               0xdd, 0x2a, 0x2e, 0xd9, 0x26, 0xd1, 0xd5, 0x22,
+               0x36, 0xc1, 0xc5, 0x32, 0xcd, 0x3a, 0x3e, 0xc9,
+               0x9d, 0x6a, 0x6e, 0x99, 0x66, 0x91, 0x95, 0x62,
+               0x76, 0x81, 0x85, 0x72, 0x8d, 0x7a, 0x7e, 0x89,
+               0x56, 0xa1, 0xa5, 0x52, 0xad, 0x5a, 0x5e, 0xa9,
+               0xbd, 0x4a, 0x4e, 0xb9, 0x46, 0xb1, 0xb5, 0x42,
+               0x1d, 0xea, 0xee, 0x19, 0xe6, 0x11, 0x15, 0xe2,
+               0xf6, 0x01, 0x05, 0xf2, 0x0d, 0xfa, 0xfe, 0x09,
+               0xd6, 0x21, 0x25, 0xd2, 0x2d, 0xda, 0xde, 0x29,
+               0x3d, 0xca, 0xce, 0x39, 0xc6, 0x31, 0x35, 0xc2,
+               0x96, 0x61, 0x65, 0x92, 0x6d, 0x9a, 0x9e, 0x69,
+               0x7d, 0x8a, 0x8e, 0x79, 0x86, 0x71, 0x75, 0x82,
+               0x5d, 0xaa, 0xae, 0x59, 0xa6, 0x51, 0x55, 0xa2,
+               0xb6, 0x41, 0x45, 0xb2, 0x4d, 0xba, 0xbe, 0x49,
+       },
+       {
+               0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2,
+               0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41,
+               0x3b, 0xc3, 0xd6, 0x2e, 0xfc, 0x04, 0x11, 0xe9,
+               0xa8, 0x50, 0x45, 0xbd, 0x6f, 0x97, 0x82, 0x7a,
+               0x76, 0x8e, 0x9b, 0x63, 0xb1, 0x49, 0x5c, 0xa4,
+               0xe5, 0x1d, 0x08, 0xf0, 0x22, 0xda, 0xcf, 0x37,
+               0x4d, 0xb5, 0xa0, 0x58, 0x8a, 0x72, 0x67, 0x9f,
+               0xde, 0x26, 0x33, 0xcb, 0x19, 0xe1, 0xf4, 0x0c,
+               0xec, 0x14, 0x01, 0xf9, 0x2b, 0xd3, 0xc6, 0x3e,
+               0x7f, 0x87, 0x92, 0x6a, 0xb8, 0x40, 0x55, 0xad,
+               0xd7, 0x2f, 0x3a, 0xc2, 0x10, 0xe8, 0xfd, 0x05,
+               0x44, 0xbc, 0xa9, 0x51, 0x83, 0x7b, 0x6e, 0x96,
+               0x9a, 0x62, 0x77, 0x8f, 0x5d, 0xa5, 0xb0, 0x48,
+               0x09, 0xf1, 0xe4, 0x1c, 0xce, 0x36, 0x23, 0xdb,
+               0xa1, 0x59, 0x4c, 0xb4, 0x66, 0x9e, 0x8b, 0x73,
+               0x32, 0xca, 0xdf, 0x27, 0xf5, 0x0d, 0x18, 0xe0,
+               0xc5, 0x3d, 0x28, 0xd0, 0x02, 0xfa, 0xef, 0x17,
+               0x56, 0xae, 0xbb, 0x43, 0x91, 0x69, 0x7c, 0x84,
+               0xfe, 0x06, 0x13, 0xeb, 0x39, 0xc1, 0xd4, 0x2c,
+               0x6d, 0x95, 0x80, 0x78, 0xaa, 0x52, 0x47, 0xbf,
+               0xb3, 0x4b, 0x5e, 0xa6, 0x74, 0x8c, 0x99, 0x61,
+               0x20, 0xd8, 0xcd, 0x35, 0xe7, 0x1f, 0x0a, 0xf2,
+               0x88, 0x70, 0x65, 0x9d, 0x4f, 0xb7, 0xa2, 0x5a,
+               0x1b, 0xe3, 0xf6, 0x0e, 0xdc, 0x24, 0x31, 0xc9,
+               0x29, 0xd1, 0xc4, 0x3c, 0xee, 0x16, 0x03, 0xfb,
+               0xba, 0x42, 0x57, 0xaf, 0x7d, 0x85, 0x90, 0x68,
+               0x12, 0xea, 0xff, 0x07, 0xd5, 0x2d, 0x38, 0xc0,
+               0x81, 0x79, 0x6c, 0x94, 0x46, 0xbe, 0xab, 0x53,
+               0x5f, 0xa7, 0xb2, 0x4a, 0x98, 0x60, 0x75, 0x8d,
+               0xcc, 0x34, 0x21, 0xd9, 0x0b, 0xf3, 0xe6, 0x1e,
+               0x64, 0x9c, 0x89, 0x71, 0xa3, 0x5b, 0x4e, 0xb6,
+               0xf7, 0x0f, 0x1a, 0xe2, 0x30, 0xc8, 0xdd, 0x25,
+       },
+       {
+               0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5,
+               0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e,
+               0x2b, 0xd2, 0xc4, 0x3d, 0xe8, 0x11, 0x07, 0xfe,
+               0xb0, 0x49, 0x5f, 0xa6, 0x73, 0x8a, 0x9c, 0x65,
+               0x56, 0xaf, 0xb9, 0x40, 0x95, 0x6c, 0x7a, 0x83,
+               0xcd, 0x34, 0x22, 0xdb, 0x0e, 0xf7, 0xe1, 0x18,
+               0x7d, 0x84, 0x92, 0x6b, 0xbe, 0x47, 0x51, 0xa8,
+               0xe6, 0x1f, 0x09, 0xf0, 0x25, 0xdc, 0xca, 0x33,
+               0xac, 0x55, 0x43, 0xba, 0x6f, 0x96, 0x80, 0x79,
+               0x37, 0xce, 0xd8, 0x21, 0xf4, 0x0d, 0x1b, 0xe2,
+               0x87, 0x7e, 0x68, 0x91, 0x44, 0xbd, 0xab, 0x52,
+               0x1c, 0xe5, 0xf3, 0x0a, 0xdf, 0x26, 0x30, 0xc9,
+               0xfa, 0x03, 0x15, 0xec, 0x39, 0xc0, 0xd6, 0x2f,
+               0x61, 0x98, 0x8e, 0x77, 0xa2, 0x5b, 0x4d, 0xb4,
+               0xd1, 0x28, 0x3e, 0xc7, 0x12, 0xeb, 0xfd, 0x04,
+               0x4a, 0xb3, 0xa5, 0x5c, 0x89, 0x70, 0x66, 0x9f,
+               0x45, 0xbc, 0xaa, 0x53, 0x86, 0x7f, 0x69, 0x90,
+               0xde, 0x27, 0x31, 0xc8, 0x1d, 0xe4, 0xf2, 0x0b,
+               0x6e, 0x97, 0x81, 0x78, 0xad, 0x54, 0x42, 0xbb,
+               0xf5, 0x0c, 0x1a, 0xe3, 0x36, 0xcf, 0xd9, 0x20,
+               0x13, 0xea, 0xfc, 0x05, 0xd0, 0x29, 0x3f, 0xc6,
+               0x88, 0x71, 0x67, 0x9e, 0x4b, 0xb2, 0xa4, 0x5d,
+               0x38, 0xc1, 0xd7, 0x2e, 0xfb, 0x02, 0x14, 0xed,
+               0xa3, 0x5a, 0x4c, 0xb5, 0x60, 0x99, 0x8f, 0x76,
+               0xe9, 0x10, 0x06, 0xff, 0x2a, 0xd3, 0xc5, 0x3c,
+               0x72, 0x8b, 0x9d, 0x64, 0xb1, 0x48, 0x5e, 0xa7,
+               0xc2, 0x3b, 0x2d, 0xd4, 0x01, 0xf8, 0xee, 0x17,
+               0x59, 0xa0, 0xb6, 0x4f, 0x9a, 0x63, 0x75, 0x8c,
+               0xbf, 0x46, 0x50, 0xa9, 0x7c, 0x85, 0x93, 0x6a,
+               0x24, 0xdd, 0xcb, 0x32, 0xe7, 0x1e, 0x08, 0xf1,
+               0x94, 0x6d, 0x7b, 0x82, 0x57, 0xae, 0xb8, 0x41,
+               0x0f, 0xf6, 0xe0, 0x19, 0xcc, 0x35, 0x23, 0xda,
+       },
+       {
+               0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc,
+               0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f,
+               0x1b, 0xe1, 0xf2, 0x08, 0xd4, 0x2e, 0x3d, 0xc7,
+               0x98, 0x62, 0x71, 0x8b, 0x57, 0xad, 0xbe, 0x44,
+               0x36, 0xcc, 0xdf, 0x25, 0xf9, 0x03, 0x10, 0xea,
+               0xb5, 0x4f, 0x5c, 0xa6, 0x7a, 0x80, 0x93, 0x69,
+               0x2d, 0xd7, 0xc4, 0x3e, 0xe2, 0x18, 0x0b, 0xf1,
+               0xae, 0x54, 0x47, 0xbd, 0x61, 0x9b, 0x88, 0x72,
+               0x6c, 0x96, 0x85, 0x7f, 0xa3, 0x59, 0x4a, 0xb0,
+               0xef, 0x15, 0x06, 0xfc, 0x20, 0xda, 0xc9, 0x33,
+               0x77, 0x8d, 0x9e, 0x64, 0xb8, 0x42, 0x51, 0xab,
+               0xf4, 0x0e, 0x1d, 0xe7, 0x3b, 0xc1, 0xd2, 0x28,
+               0x5a, 0xa0, 0xb3, 0x49, 0x95, 0x6f, 0x7c, 0x86,
+               0xd9, 0x23, 0x30, 0xca, 0x16, 0xec, 0xff, 0x05,
+               0x41, 0xbb, 0xa8, 0x52, 0x8e, 0x74, 0x67, 0x9d,
+               0xc2, 0x38, 0x2b, 0xd1, 0x0d, 0xf7, 0xe4, 0x1e,
+               0xd8, 0x22, 0x31, 0xcb, 0x17, 0xed, 0xfe, 0x04,
+               0x5b, 0xa1, 0xb2, 0x48, 0x94, 0x6e, 0x7d, 0x87,
+               0xc3, 0x39, 0x2a, 0xd0, 0x0c, 0xf6, 0xe5, 0x1f,
+               0x40, 0xba, 0xa9, 0x53, 0x8f, 0x75, 0x66, 0x9c,
+               0xee, 0x14, 0x07, 0xfd, 0x21, 0xdb, 0xc8, 0x32,
+               0x6d, 0x97, 0x84, 0x7e, 0xa2, 0x58, 0x4b, 0xb1,
+               0xf5, 0x0f, 0x1c, 0xe6, 0x3a, 0xc0, 0xd3, 0x29,
+               0x76, 0x8c, 0x9f, 0x65, 0xb9, 0x43, 0x50, 0xaa,
+               0xb4, 0x4e, 0x5d, 0xa7, 0x7b, 0x81, 0x92, 0x68,
+               0x37, 0xcd, 0xde, 0x24, 0xf8, 0x02, 0x11, 0xeb,
+               0xaf, 0x55, 0x46, 0xbc, 0x60, 0x9a, 0x89, 0x73,
+               0x2c, 0xd6, 0xc5, 0x3f, 0xe3, 0x19, 0x0a, 0xf0,
+               0x82, 0x78, 0x6b, 0x91, 0x4d, 0xb7, 0xa4, 0x5e,
+               0x01, 0xfb, 0xe8, 0x12, 0xce, 0x34, 0x27, 0xdd,
+               0x99, 0x63, 0x70, 0x8a, 0x56, 0xac, 0xbf, 0x45,
+               0x1a, 0xe0, 0xf3, 0x09, 0xd5, 0x2f, 0x3c, 0xc6,
+       },
+       {
+               0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb,
+               0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50,
+               0x0b, 0xf0, 0xe0, 0x1b, 0xc0, 0x3b, 0x2b, 0xd0,
+               0x80, 0x7b, 0x6b, 0x90, 0x4b, 0xb0, 0xa0, 0x5b,
+               0x16, 0xed, 0xfd, 0x06, 0xdd, 0x26, 0x36, 0xcd,
+               0x9d, 0x66, 0x76, 0x8d, 0x56, 0xad, 0xbd, 0x46,
+               0x1d, 0xe6, 0xf6, 0x0d, 0xd6, 0x2d, 0x3d, 0xc6,
+               0x96, 0x6d, 0x7d, 0x86, 0x5d, 0xa6, 0xb6, 0x4d,
+               0x2c, 0xd7, 0xc7, 0x3c, 0xe7, 0x1c, 0x0c, 0xf7,
+               0xa7, 0x5c, 0x4c, 0xb7, 0x6c, 0x97, 0x87, 0x7c,
+               0x27, 0xdc, 0xcc, 0x37, 0xec, 0x17, 0x07, 0xfc,
+               0xac, 0x57, 0x47, 0xbc, 0x67, 0x9c, 0x8c, 0x77,
+               0x3a, 0xc1, 0xd1, 0x2a, 0xf1, 0x0a, 0x1a, 0xe1,
+               0xb1, 0x4a, 0x5a, 0xa1, 0x7a, 0x81, 0x91, 0x6a,
+               0x31, 0xca, 0xda, 0x21, 0xfa, 0x01, 0x11, 0xea,
+               0xba, 0x41, 0x51, 0xaa, 0x71, 0x8a, 0x9a, 0x61,
+               0x58, 0xa3, 0xb3, 0x48, 0x93, 0x68, 0x78, 0x83,
+               0xd3, 0x28, 0x38, 0xc3, 0x18, 0xe3, 0xf3, 0x08,
+               0x53, 0xa8, 0xb8, 0x43, 0x98, 0x63, 0x73, 0x88,
+               0xd8, 0x23, 0x33, 0xc8, 0x13, 0xe8, 0xf8, 0x03,
+               0x4e, 0xb5, 0xa5, 0x5e, 0x85, 0x7e, 0x6e, 0x95,
+               0xc5, 0x3e, 0x2e, 0xd5, 0x0e, 0xf5, 0xe5, 0x1e,
+               0x45, 0xbe, 0xae, 0x55, 0x8e, 0x75, 0x65, 0x9e,
+               0xce, 0x35, 0x25, 0xde, 0x05, 0xfe, 0xee, 0x15,
+               0x74, 0x8f, 0x9f, 0x64, 0xbf, 0x44, 0x54, 0xaf,
+               0xff, 0x04, 0x14, 0xef, 0x34, 0xcf, 0xdf, 0x24,
+               0x7f, 0x84, 0x94, 0x6f, 0xb4, 0x4f, 0x5f, 0xa4,
+               0xf4, 0x0f, 0x1f, 0xe4, 0x3f, 0xc4, 0xd4, 0x2f,
+               0x62, 0x99, 0x89, 0x72, 0xa9, 0x52, 0x42, 0xb9,
+               0xe9, 0x12, 0x02, 0xf9, 0x22, 0xd9, 0xc9, 0x32,
+               0x69, 0x92, 0x82, 0x79, 0xa2, 0x59, 0x49, 0xb2,
+               0xe2, 0x19, 0x09, 0xf2, 0x29, 0xd2, 0xc2, 0x39,
+       },
+       {
+               0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce,
+               0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d,
+               0x7b, 0x87, 0x9e, 0x62, 0xac, 0x50, 0x49, 0xb5,
+               0xc8, 0x34, 0x2d, 0xd1, 0x1f, 0xe3, 0xfa, 0x06,
+               0xf6, 0x0a, 0x13, 0xef, 0x21, 0xdd, 0xc4, 0x38,
+               0x45, 0xb9, 0xa0, 0x5c, 0x92, 0x6e, 0x77, 0x8b,
+               0x8d, 0x71, 0x68, 0x94, 0x5a, 0xa6, 0xbf, 0x43,
+               0x3e, 0xc2, 0xdb, 0x27, 0xe9, 0x15, 0x0c, 0xf0,
+               0xf1, 0x0d, 0x14, 0xe8, 0x26, 0xda, 0xc3, 0x3f,
+               0x42, 0xbe, 0xa7, 0x5b, 0x95, 0x69, 0x70, 0x8c,
+               0x8a, 0x76, 0x6f, 0x93, 0x5d, 0xa1, 0xb8, 0x44,
+               0x39, 0xc5, 0xdc, 0x20, 0xee, 0x12, 0x0b, 0xf7,
+               0x07, 0xfb, 0xe2, 0x1e, 0xd0, 0x2c, 0x35, 0xc9,
+               0xb4, 0x48, 0x51, 0xad, 0x63, 0x9f, 0x86, 0x7a,
+               0x7c, 0x80, 0x99, 0x65, 0xab, 0x57, 0x4e, 0xb2,
+               0xcf, 0x33, 0x2a, 0xd6, 0x18, 0xe4, 0xfd, 0x01,
+               0xff, 0x03, 0x1a, 0xe6, 0x28, 0xd4, 0xcd, 0x31,
+               0x4c, 0xb0, 0xa9, 0x55, 0x9b, 0x67, 0x7e, 0x82,
+               0x84, 0x78, 0x61, 0x9d, 0x53, 0xaf, 0xb6, 0x4a,
+               0x37, 0xcb, 0xd2, 0x2e, 0xe0, 0x1c, 0x05, 0xf9,
+               0x09, 0xf5, 0xec, 0x10, 0xde, 0x22, 0x3b, 0xc7,
+               0xba, 0x46, 0x5f, 0xa3, 0x6d, 0x91, 0x88, 0x74,
+               0x72, 0x8e, 0x97, 0x6b, 0xa5, 0x59, 0x40, 0xbc,
+               0xc1, 0x3d, 0x24, 0xd8, 0x16, 0xea, 0xf3, 0x0f,
+               0x0e, 0xf2, 0xeb, 0x17, 0xd9, 0x25, 0x3c, 0xc0,
+               0xbd, 0x41, 0x58, 0xa4, 0x6a, 0x96, 0x8f, 0x73,
+               0x75, 0x89, 0x90, 0x6c, 0xa2, 0x5e, 0x47, 0xbb,
+               0xc6, 0x3a, 0x23, 0xdf, 0x11, 0xed, 0xf4, 0x08,
+               0xf8, 0x04, 0x1d, 0xe1, 0x2f, 0xd3, 0xca, 0x36,
+               0x4b, 0xb7, 0xae, 0x52, 0x9c, 0x60, 0x79, 0x85,
+               0x83, 0x7f, 0x66, 0x9a, 0x54, 0xa8, 0xb1, 0x4d,
+               0x30, 0xcc, 0xd5, 0x29, 0xe7, 0x1b, 0x02, 0xfe,
+       },
+       {
+               0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9,
+               0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72,
+               0x6b, 0x96, 0x8c, 0x71, 0xb8, 0x45, 0x5f, 0xa2,
+               0xd0, 0x2d, 0x37, 0xca, 0x03, 0xfe, 0xe4, 0x19,
+               0xd6, 0x2b, 0x31, 0xcc, 0x05, 0xf8, 0xe2, 0x1f,
+               0x6d, 0x90, 0x8a, 0x77, 0xbe, 0x43, 0x59, 0xa4,
+               0xbd, 0x40, 0x5a, 0xa7, 0x6e, 0x93, 0x89, 0x74,
+               0x06, 0xfb, 0xe1, 0x1c, 0xd5, 0x28, 0x32, 0xcf,
+               0xb1, 0x4c, 0x56, 0xab, 0x62, 0x9f, 0x85, 0x78,
+               0x0a, 0xf7, 0xed, 0x10, 0xd9, 0x24, 0x3e, 0xc3,
+               0xda, 0x27, 0x3d, 0xc0, 0x09, 0xf4, 0xee, 0x13,
+               0x61, 0x9c, 0x86, 0x7b, 0xb2, 0x4f, 0x55, 0xa8,
+               0x67, 0x9a, 0x80, 0x7d, 0xb4, 0x49, 0x53, 0xae,
+               0xdc, 0x21, 0x3b, 0xc6, 0x0f, 0xf2, 0xe8, 0x15,
+               0x0c, 0xf1, 0xeb, 0x16, 0xdf, 0x22, 0x38, 0xc5,
+               0xb7, 0x4a, 0x50, 0xad, 0x64, 0x99, 0x83, 0x7e,
+               0x7f, 0x82, 0x98, 0x65, 0xac, 0x51, 0x4b, 0xb6,
+               0xc4, 0x39, 0x23, 0xde, 0x17, 0xea, 0xf0, 0x0d,
+               0x14, 0xe9, 0xf3, 0x0e, 0xc7, 0x3a, 0x20, 0xdd,
+               0xaf, 0x52, 0x48, 0xb5, 0x7c, 0x81, 0x9b, 0x66,
+               0xa9, 0x54, 0x4e, 0xb3, 0x7a, 0x87, 0x9d, 0x60,
+               0x12, 0xef, 0xf5, 0x08, 0xc1, 0x3c, 0x26, 0xdb,
+               0xc2, 0x3f, 0x25, 0xd8, 0x11, 0xec, 0xf6, 0x0b,
+               0x79, 0x84, 0x9e, 0x63, 0xaa, 0x57, 0x4d, 0xb0,
+               0xce, 0x33, 0x29, 0xd4, 0x1d, 0xe0, 0xfa, 0x07,
+               0x75, 0x88, 0x92, 0x6f, 0xa6, 0x5b, 0x41, 0xbc,
+               0xa5, 0x58, 0x42, 0xbf, 0x76, 0x8b, 0x91, 0x6c,
+               0x1e, 0xe3, 0xf9, 0x04, 0xcd, 0x30, 0x2a, 0xd7,
+               0x18, 0xe5, 0xff, 0x02, 0xcb, 0x36, 0x2c, 0xd1,
+               0xa3, 0x5e, 0x44, 0xb9, 0x70, 0x8d, 0x97, 0x6a,
+               0x73, 0x8e, 0x94, 0x69, 0xa0, 0x5d, 0x47, 0xba,
+               0xc8, 0x35, 0x2f, 0xd2, 0x1b, 0xe6, 0xfc, 0x01,
+       },
+       {
+               0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0,
+               0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63,
+               0x5b, 0xa5, 0xba, 0x44, 0x84, 0x7a, 0x65, 0x9b,
+               0xf8, 0x06, 0x19, 0xe7, 0x27, 0xd9, 0xc6, 0x38,
+               0xb6, 0x48, 0x57, 0xa9, 0x69, 0x97, 0x88, 0x76,
+               0x15, 0xeb, 0xf4, 0x0a, 0xca, 0x34, 0x2b, 0xd5,
+               0xed, 0x13, 0x0c, 0xf2, 0x32, 0xcc, 0xd3, 0x2d,
+               0x4e, 0xb0, 0xaf, 0x51, 0x91, 0x6f, 0x70, 0x8e,
+               0x71, 0x8f, 0x90, 0x6e, 0xae, 0x50, 0x4f, 0xb1,
+               0xd2, 0x2c, 0x33, 0xcd, 0x0d, 0xf3, 0xec, 0x12,
+               0x2a, 0xd4, 0xcb, 0x35, 0xf5, 0x0b, 0x14, 0xea,
+               0x89, 0x77, 0x68, 0x96, 0x56, 0xa8, 0xb7, 0x49,
+               0xc7, 0x39, 0x26, 0xd8, 0x18, 0xe6, 0xf9, 0x07,
+               0x64, 0x9a, 0x85, 0x7b, 0xbb, 0x45, 0x5a, 0xa4,
+               0x9c, 0x62, 0x7d, 0x83, 0x43, 0xbd, 0xa2, 0x5c,
+               0x3f, 0xc1, 0xde, 0x20, 0xe0, 0x1e, 0x01, 0xff,
+               0xe2, 0x1c, 0x03, 0xfd, 0x3d, 0xc3, 0xdc, 0x22,
+               0x41, 0xbf, 0xa0, 0x5e, 0x9e, 0x60, 0x7f, 0x81,
+               0xb9, 0x47, 0x58, 0xa6, 0x66, 0x98, 0x87, 0x79,
+               0x1a, 0xe4, 0xfb, 0x05, 0xc5, 0x3b, 0x24, 0xda,
+               0x54, 0xaa, 0xb5, 0x4b, 0x8b, 0x75, 0x6a, 0x94,
+               0xf7, 0x09, 0x16, 0xe8, 0x28, 0xd6, 0xc9, 0x37,
+               0x0f, 0xf1, 0xee, 0x10, 0xd0, 0x2e, 0x31, 0xcf,
+               0xac, 0x52, 0x4d, 0xb3, 0x73, 0x8d, 0x92, 0x6c,
+               0x93, 0x6d, 0x72, 0x8c, 0x4c, 0xb2, 0xad, 0x53,
+               0x30, 0xce, 0xd1, 0x2f, 0xef, 0x11, 0x0e, 0xf0,
+               0xc8, 0x36, 0x29, 0xd7, 0x17, 0xe9, 0xf6, 0x08,
+               0x6b, 0x95, 0x8a, 0x74, 0xb4, 0x4a, 0x55, 0xab,
+               0x25, 0xdb, 0xc4, 0x3a, 0xfa, 0x04, 0x1b, 0xe5,
+               0x86, 0x78, 0x67, 0x99, 0x59, 0xa7, 0xb8, 0x46,
+               0x7e, 0x80, 0x9f, 0x61, 0xa1, 0x5f, 0x40, 0xbe,
+               0xdd, 0x23, 0x3c, 0xc2, 0x02, 0xfc, 0xe3, 0x1d,
+       },
+       {
+               0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7,
+               0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c,
+               0x4b, 0xb4, 0xa8, 0x57, 0x90, 0x6f, 0x73, 0x8c,
+               0xe0, 0x1f, 0x03, 0xfc, 0x3b, 0xc4, 0xd8, 0x27,
+               0x96, 0x69, 0x75, 0x8a, 0x4d, 0xb2, 0xae, 0x51,
+               0x3d, 0xc2, 0xde, 0x21, 0xe6, 0x19, 0x05, 0xfa,
+               0xdd, 0x22, 0x3e, 0xc1, 0x06, 0xf9, 0xe5, 0x1a,
+               0x76, 0x89, 0x95, 0x6a, 0xad, 0x52, 0x4e, 0xb1,
+               0x31, 0xce, 0xd2, 0x2d, 0xea, 0x15, 0x09, 0xf6,
+               0x9a, 0x65, 0x79, 0x86, 0x41, 0xbe, 0xa2, 0x5d,
+               0x7a, 0x85, 0x99, 0x66, 0xa1, 0x5e, 0x42, 0xbd,
+               0xd1, 0x2e, 0x32, 0xcd, 0x0a, 0xf5, 0xe9, 0x16,
+               0xa7, 0x58, 0x44, 0xbb, 0x7c, 0x83, 0x9f, 0x60,
+               0x0c, 0xf3, 0xef, 0x10, 0xd7, 0x28, 0x34, 0xcb,
+               0xec, 0x13, 0x0f, 0xf0, 0x37, 0xc8, 0xd4, 0x2b,
+               0x47, 0xb8, 0xa4, 0x5b, 0x9c, 0x63, 0x7f, 0x80,
+               0x62, 0x9d, 0x81, 0x7e, 0xb9, 0x46, 0x5a, 0xa5,
+               0xc9, 0x36, 0x2a, 0xd5, 0x12, 0xed, 0xf1, 0x0e,
+               0x29, 0xd6, 0xca, 0x35, 0xf2, 0x0d, 0x11, 0xee,
+               0x82, 0x7d, 0x61, 0x9e, 0x59, 0xa6, 0xba, 0x45,
+               0xf4, 0x0b, 0x17, 0xe8, 0x2f, 0xd0, 0xcc, 0x33,
+               0x5f, 0xa0, 0xbc, 0x43, 0x84, 0x7b, 0x67, 0x98,
+               0xbf, 0x40, 0x5c, 0xa3, 0x64, 0x9b, 0x87, 0x78,
+               0x14, 0xeb, 0xf7, 0x08, 0xcf, 0x30, 0x2c, 0xd3,
+               0x53, 0xac, 0xb0, 0x4f, 0x88, 0x77, 0x6b, 0x94,
+               0xf8, 0x07, 0x1b, 0xe4, 0x23, 0xdc, 0xc0, 0x3f,
+               0x18, 0xe7, 0xfb, 0x04, 0xc3, 0x3c, 0x20, 0xdf,
+               0xb3, 0x4c, 0x50, 0xaf, 0x68, 0x97, 0x8b, 0x74,
+               0xc5, 0x3a, 0x26, 0xd9, 0x1e, 0xe1, 0xfd, 0x02,
+               0x6e, 0x91, 0x8d, 0x72, 0xb5, 0x4a, 0x56, 0xa9,
+               0x8e, 0x71, 0x6d, 0x92, 0x55, 0xaa, 0xb6, 0x49,
+               0x25, 0xda, 0xc6, 0x39, 0xfe, 0x01, 0x1d, 0xe2,
+       },
+};
+
+const uint8_t __aligned(256) raid_gfexp[256] =
+{
+       0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+       0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
+       0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
+       0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
+       0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
+       0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
+       0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
+       0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
+       0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
+       0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+       0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
+       0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
+       0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
+       0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
+       0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
+       0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
+       0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
+       0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
+       0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
+       0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+       0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
+       0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
+       0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
+       0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
+       0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
+       0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
+       0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
+       0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
+       0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
+       0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+       0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
+       0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01,
+};
+
+const uint8_t __aligned(256) raid_gfinv[256] =
+{
+       /* note that the first element is not significative */
+       0x00, 0x01, 0x8e, 0xf4, 0x47, 0xa7, 0x7a, 0xba,
+       0xad, 0x9d, 0xdd, 0x98, 0x3d, 0xaa, 0x5d, 0x96,
+       0xd8, 0x72, 0xc0, 0x58, 0xe0, 0x3e, 0x4c, 0x66,
+       0x90, 0xde, 0x55, 0x80, 0xa0, 0x83, 0x4b, 0x2a,
+       0x6c, 0xed, 0x39, 0x51, 0x60, 0x56, 0x2c, 0x8a,
+       0x70, 0xd0, 0x1f, 0x4a, 0x26, 0x8b, 0x33, 0x6e,
+       0x48, 0x89, 0x6f, 0x2e, 0xa4, 0xc3, 0x40, 0x5e,
+       0x50, 0x22, 0xcf, 0xa9, 0xab, 0x0c, 0x15, 0xe1,
+       0x36, 0x5f, 0xf8, 0xd5, 0x92, 0x4e, 0xa6, 0x04,
+       0x30, 0x88, 0x2b, 0x1e, 0x16, 0x67, 0x45, 0x93,
+       0x38, 0x23, 0x68, 0x8c, 0x81, 0x1a, 0x25, 0x61,
+       0x13, 0xc1, 0xcb, 0x63, 0x97, 0x0e, 0x37, 0x41,
+       0x24, 0x57, 0xca, 0x5b, 0xb9, 0xc4, 0x17, 0x4d,
+       0x52, 0x8d, 0xef, 0xb3, 0x20, 0xec, 0x2f, 0x32,
+       0x28, 0xd1, 0x11, 0xd9, 0xe9, 0xfb, 0xda, 0x79,
+       0xdb, 0x77, 0x06, 0xbb, 0x84, 0xcd, 0xfe, 0xfc,
+       0x1b, 0x54, 0xa1, 0x1d, 0x7c, 0xcc, 0xe4, 0xb0,
+       0x49, 0x31, 0x27, 0x2d, 0x53, 0x69, 0x02, 0xf5,
+       0x18, 0xdf, 0x44, 0x4f, 0x9b, 0xbc, 0x0f, 0x5c,
+       0x0b, 0xdc, 0xbd, 0x94, 0xac, 0x09, 0xc7, 0xa2,
+       0x1c, 0x82, 0x9f, 0xc6, 0x34, 0xc2, 0x46, 0x05,
+       0xce, 0x3b, 0x0d, 0x3c, 0x9c, 0x08, 0xbe, 0xb7,
+       0x87, 0xe5, 0xee, 0x6b, 0xeb, 0xf2, 0xbf, 0xaf,
+       0xc5, 0x64, 0x07, 0x7b, 0x95, 0x9a, 0xae, 0xb6,
+       0x12, 0x59, 0xa5, 0x35, 0x65, 0xb8, 0xa3, 0x9e,
+       0xd2, 0xf7, 0x62, 0x5a, 0x85, 0x7d, 0xa8, 0x3a,
+       0x29, 0x71, 0xc8, 0xf6, 0xf9, 0x43, 0xd7, 0xd6,
+       0x10, 0x73, 0x76, 0x78, 0x99, 0x0a, 0x19, 0x91,
+       0x14, 0x3f, 0xe6, 0xf0, 0x86, 0xb1, 0xe2, 0xf1,
+       0xfa, 0x74, 0xf3, 0xb4, 0x6d, 0x21, 0xb2, 0x6a,
+       0xe3, 0xe7, 0xb5, 0xea, 0x03, 0x8f, 0xd3, 0xc9,
+       0x42, 0xd4, 0xe8, 0x75, 0x7f, 0xff, 0x7e, 0xfd,
+};
+
+/**
+ * Power matrix used to generate parity.
+ * This matrix is valid for up to 3 parity with 251 data disks.
+ *
+ * 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01
+ * 01 02 04 08 10 20 40 80 1d 3a 74 e8 cd 87 13 26 4c 98 2d 5a b4 75 ea c9 8f 03 06 0c 18 30 60 c0 9d 27 4e 9c 25 4a 94 35 6a d4 b5 77 ee c1 9f 23 46 8c 05 0a 14 28 50 a0 5d ba 69 d2 b9 6f de a1 5f be 61 c2 99 2f 5e bc 65 ca 89 0f 1e 3c 78 f0 fd e7 d3 bb 6b d6 b1 7f fe e1 df a3 5b b6 71 e2 d9 af 43 86 11 22 44 88 0d 1a 34 68 d0 bd 67 ce 81 1f 3e 7c f8 ed c7 93 3b 76 ec c5 97 33 66 cc 85 17 2e 5c b8 6d da a9 4f 9e 21 42 84 15 2a 54 a8 4d 9a 29 52 a4 55 aa 49 92 39 72 e4 d5 b7 73 e6 d1 bf 63 c6 91 3f 7e fc e5 d7 b3 7b f6 f1 ff e3 db ab 4b 96 31 62 c4 95 37 6e dc a5 57 ae 41 82 19 32 64 c8 8d 07 0e 1c 38 70 e0 dd a7 53 a6 51 a2 59 b2 79 f2 f9 ef c3 9b 2b 56 ac 45 8a 09 12 24 48 90 3d 7a f4 f5 f7 f3 fb eb cb 8b 0b 16 2c 58 b0 7d fa e9 cf 83 1b 36 6c
+ * 01 8e 47 ad d8 6c 36 1b 83 cf e9 fa 7d b0 58 2c 16 0b 8b cb eb fb f3 f7 f5 f4 7a 3d 90 48 24 12 09 8a 45 ac 56 2b 9b c3 ef f9 f2 79 b2 59 a2 51 a6 53 a7 dd e0 70 38 1c 0e 07 8d c8 64 32 19 82 41 ae 57 a5 dc 6e 37 95 c4 62 31 96 4b ab db e3 ff f1 f6 7b b3 d7 e5 fc 7e 3f 91 c6 63 bf d1 e6 73 b7 d5 e4 72 39 92 49 aa 55 a4 52 29 9a 4d a8 54 2a 15 84 42 21 9e 4f a9 da 6d b8 5c 2e 17 85 cc 66 33 97 c5 ec 76 3b 93 c7 ed f8 7c 3e 1f 81 ce 67 bd d0 68 34 1a 0d 88 44 22 11 86 43 af d9 e2 71 b6 5b a3 df e1 fe 7f b1 d6 6b bb d3 e7 fd f0 78 3c 1e 0f 89 ca 65 bc 5e 2f 99 c2 61 be 5f a1 de 6f b9 d2 69 ba 5d a0 50 28 14 0a 05 8c 46 23 9f c1 ee 77 b5 d4 6a 35 94 4a 25 9c 4e 27 9d c0 60 30 18 0c 06 03 8f c9 ea 75 b4 5a 2d 98 4c 26 13 87 cd e8 74 3a 1d 80 40 20
+ */
+const uint8_t __aligned(256) raid_gfvandermonde[3][256] =
+{
+       {
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01,
+       },
+       {
+               0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+               0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
+               0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
+               0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
+               0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
+               0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
+               0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
+               0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
+               0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
+               0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+               0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
+               0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
+               0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
+               0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
+               0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
+               0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
+               0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
+               0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
+               0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
+               0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+               0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
+               0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
+               0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
+               0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
+               0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
+               0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
+               0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
+               0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
+               0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
+               0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+               0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
+               0x1b, 0x36, 0x6c,
+       },
+       {
+               0x01, 0x8e, 0x47, 0xad, 0xd8, 0x6c, 0x36, 0x1b,
+               0x83, 0xcf, 0xe9, 0xfa, 0x7d, 0xb0, 0x58, 0x2c,
+               0x16, 0x0b, 0x8b, 0xcb, 0xeb, 0xfb, 0xf3, 0xf7,
+               0xf5, 0xf4, 0x7a, 0x3d, 0x90, 0x48, 0x24, 0x12,
+               0x09, 0x8a, 0x45, 0xac, 0x56, 0x2b, 0x9b, 0xc3,
+               0xef, 0xf9, 0xf2, 0x79, 0xb2, 0x59, 0xa2, 0x51,
+               0xa6, 0x53, 0xa7, 0xdd, 0xe0, 0x70, 0x38, 0x1c,
+               0x0e, 0x07, 0x8d, 0xc8, 0x64, 0x32, 0x19, 0x82,
+               0x41, 0xae, 0x57, 0xa5, 0xdc, 0x6e, 0x37, 0x95,
+               0xc4, 0x62, 0x31, 0x96, 0x4b, 0xab, 0xdb, 0xe3,
+               0xff, 0xf1, 0xf6, 0x7b, 0xb3, 0xd7, 0xe5, 0xfc,
+               0x7e, 0x3f, 0x91, 0xc6, 0x63, 0xbf, 0xd1, 0xe6,
+               0x73, 0xb7, 0xd5, 0xe4, 0x72, 0x39, 0x92, 0x49,
+               0xaa, 0x55, 0xa4, 0x52, 0x29, 0x9a, 0x4d, 0xa8,
+               0x54, 0x2a, 0x15, 0x84, 0x42, 0x21, 0x9e, 0x4f,
+               0xa9, 0xda, 0x6d, 0xb8, 0x5c, 0x2e, 0x17, 0x85,
+               0xcc, 0x66, 0x33, 0x97, 0xc5, 0xec, 0x76, 0x3b,
+               0x93, 0xc7, 0xed, 0xf8, 0x7c, 0x3e, 0x1f, 0x81,
+               0xce, 0x67, 0xbd, 0xd0, 0x68, 0x34, 0x1a, 0x0d,
+               0x88, 0x44, 0x22, 0x11, 0x86, 0x43, 0xaf, 0xd9,
+               0xe2, 0x71, 0xb6, 0x5b, 0xa3, 0xdf, 0xe1, 0xfe,
+               0x7f, 0xb1, 0xd6, 0x6b, 0xbb, 0xd3, 0xe7, 0xfd,
+               0xf0, 0x78, 0x3c, 0x1e, 0x0f, 0x89, 0xca, 0x65,
+               0xbc, 0x5e, 0x2f, 0x99, 0xc2, 0x61, 0xbe, 0x5f,
+               0xa1, 0xde, 0x6f, 0xb9, 0xd2, 0x69, 0xba, 0x5d,
+               0xa0, 0x50, 0x28, 0x14, 0x0a, 0x05, 0x8c, 0x46,
+               0x23, 0x9f, 0xc1, 0xee, 0x77, 0xb5, 0xd4, 0x6a,
+               0x35, 0x94, 0x4a, 0x25, 0x9c, 0x4e, 0x27, 0x9d,
+               0xc0, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x03, 0x8f,
+               0xc9, 0xea, 0x75, 0xb4, 0x5a, 0x2d, 0x98, 0x4c,
+               0x26, 0x13, 0x87, 0xcd, 0xe8, 0x74, 0x3a, 0x1d,
+               0x80, 0x40, 0x20,
+       },
+};
+
+/**
+ * Cauchy matrix used to generate parity.
+ * This matrix is valid for up to 6 parity with 251 data disks.
+ *
+ * 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01
+ * 01 02 04 08 10 20 40 80 1d 3a 74 e8 cd 87 13 26 4c 98 2d 5a b4 75 ea c9 8f 03 06 0c 18 30 60 c0 9d 27 4e 9c 25 4a 94 35 6a d4 b5 77 ee c1 9f 23 46 8c 05 0a 14 28 50 a0 5d ba 69 d2 b9 6f de a1 5f be 61 c2 99 2f 5e bc 65 ca 89 0f 1e 3c 78 f0 fd e7 d3 bb 6b d6 b1 7f fe e1 df a3 5b b6 71 e2 d9 af 43 86 11 22 44 88 0d 1a 34 68 d0 bd 67 ce 81 1f 3e 7c f8 ed c7 93 3b 76 ec c5 97 33 66 cc 85 17 2e 5c b8 6d da a9 4f 9e 21 42 84 15 2a 54 a8 4d 9a 29 52 a4 55 aa 49 92 39 72 e4 d5 b7 73 e6 d1 bf 63 c6 91 3f 7e fc e5 d7 b3 7b f6 f1 ff e3 db ab 4b 96 31 62 c4 95 37 6e dc a5 57 ae 41 82 19 32 64 c8 8d 07 0e 1c 38 70 e0 dd a7 53 a6 51 a2 59 b2 79 f2 f9 ef c3 9b 2b 56 ac 45 8a 09 12 24 48 90 3d 7a f4 f5 f7 f3 fb eb cb 8b 0b 16 2c 58 b0 7d fa e9 cf 83 1b 36 6c
+ * 01 f5 d2 c4 9a 71 f1 7f fc 87 c1 c6 19 2f 40 55 3d ba 53 04 9c 61 34 8c 46 68 70 3e cc 7d 74 75 b5 db 0c df 9e 6d 79 eb 63 9f 38 d0 94 a5 24 89 5c 65 5b ae 37 33 4c dd 47 f4 02 a6 39 d8 9d 2d 62 b9 2e 0f 2b 60 58 e4 f8 6c 72 b0 85 4d 95 41 1c 23 05 99 32 c5 0e 82 91 14 d1 af f9 b3 07 97 6e 0b 67 3b 78 e6 28 22 4f a3 ca 48 de 1d a8 17 6f 90 aa 31 5a f3 e9 a9 44 30 56 09 59 6a 42 cd e5 d6 86 d9 bf cb 26 66 7c d5 be 25 1f e0 98 27 92 51 c7 45 2c c0 ad a7 69 f7 b4 e8 84 e1 18 88 3c 76 20 5e 9b 1e 0d 81 4a bd 16 8a ac 93 ce 1a c2 0a 3f fd e3 77 6b d7 ef a4 80 a1 36 ed a2 12 57 b6 29 8d 7b c8 52 c3 bc b8 21 d4 ea d3 06 ab 2a 1b 5f b7 10 ec 64 f6 e2 11 50 83 54 3a fa fb f2 43 b1 ff e7 c9 03 bb ee 13 8b dc 35 b2 da cf a0 96 49 4e 08 73 f0 7e fe 15 4b
+ * 01 bb a6 d7 c7 07 ce 82 4a 2f a5 9b b6 60 f1 ad e7 f4 06 d2 df 2e ca 65 5c 48 21 aa cd 4e c1 61 38 0a 3e d1 d5 cb 10 dc 5e 24 b8 de 79 36 43 72 d9 f8 f9 a2 a4 6a 3d ea 8e 03 f5 ab b4 5d b5 53 6b 39 86 b0 50 74 96 84 5a 4b e8 49 e5 51 ef 12 bc 89 5b 2b 29 09 c3 57 1e 37 76 0b 64 8a 52 59 80 da a8 44 95 3c 33 e6 7c af 6c b1 9d fc 92 d6 d8 ff a7 77 04 13 73 66 28 7d 83 fb 5f 63 25 19 bd c5 3b 6e 20 35 55 42 31 e1 b9 9e 90 d4 ba db f7 2a e9 3a a0 75 7a d3 02 ee 9c c6 1f 14 cc 22 4d 30 71 58 11 85 4f 6f 6d 1d cf fa 54 a9 17 a3 0f ae 0d 1c c2 d0 32 16 f6 c0 7f 2d 15 f3 1b f2 ed b3 45 c8 ac 7b 2c e2 e4 bf be 9f 34 05 70 3f 98 fe 62 18 9a 56 8d 93 97 78 4c 7e 27 87 08 8b ec 67 0e 1a 23 8c 68 99 94 40 b2 a1 eb b7 26 f0 dd e3 69 0c c4 88 41 81 91 e0 fd
+ * 01 97 7f 9c 7c 18 bd a2 58 1a da 74 70 a3 e5 47 29 07 f5 80 23 e9 fa 46 54 a0 99 95 53 9b 0b c7 09 c0 78 89 92 e3 0d b0 2a 8c fb 17 3f 26 65 87 27 5c 66 61 79 4d 32 b3 8d 52 e2 82 3d f9 c5 02 bc 4c 73 48 62 af ba 41 d9 c4 2f b1 33 b8 15 7d cf 3a a9 5f 84 6d 34 1b 44 94 72 81 42 be cc 4b 0a 6f 5a 22 36 b5 3c 9d 13 7e 08 dd d6 5e 04 fc 5b ec ef f1 6e 1e 77 24 e6 c6 aa cb fd 51 67 06 6a 4a 88 db b2 c2 5d 43 40 f7 50 a8 f2 7a 71 a4 d2 bf 31 90 19 9a 8e f6 c3 a6 e7 60 12 ee 2d de 38 e8 b7 98 c1 28 f3 05 96 63 d1 b9 14 9f 1d 83 68 75 ed 16 03 ce e4 df e0 10 ae 69 55 91 2e 4e fe 21 1f 9e e1 d5 cd ca f0 8b 2b c9 8a 93 bb 57 20 86 1c a1 4f 3e 25 d4 6c a5 6b a7 37 ff 39 35 0c f8 ea 56 45 8f 2c 59 ab 85 eb 49 0f dc d8 76 b6 f4 0e 11 b4 d0 30 d3 3b ad d7
+ * 01 2b 3f cf 73 2c d6 ed cb 74 15 78 8a c1 17 c9 89 68 21 ab 76 3b 4b 5a 6e 0e b9 d3 b6 3e 36 86 bf a2 a7 30 14 eb c7 2d 96 67 20 b5 9a e0 a8 c6 80 04 8d fe 75 5e 23 ca 8f 48 99 0d df 8e b8 70 29 9c 44 69 3d a5 c2 90 d2 1c 9b 02 1d 98 93 ec 84 e8 64 4c 3a 8b 97 f3 e5 c0 7d 26 c8 08 a0 62 82 55 f7 33 f6 51 63 4d 77 da fd c3 38 6d ee 09 47 a3 05 de a6 f1 22 25 6a 0c 81 b2 6b 58 d5 b3 fc fb 28 7f 07 dc 7a 9e d0 37 b4 e1 1a 24 03 ae 94 ba 88 2f ea 2e 8c 5b bb 79 d1 11 ff a4 19 3c 2a 4e 52 e3 95 bd 31 5d 35 4a 41 c4 db 42 c5 0b 49 1b 7c e4 b0 9d 45 f0 a9 61 57 06 d4 40 91 56 13 fa 87 ac 27 54 dd 59 1f 71 39 43 6c f9 be 4f f4 1e 32 cd e9 7e 7b 66 5f ef e7 6f 0a 60 d7 b7 83 92 e2 af 72 f8 b1 50 10 ce 18 53 a1 cc ad 12 34 0f f5 aa 16 e6 f2 d8 85 9f bc
+ */
+const uint8_t __aligned(256) raid_gfcauchy[6][256] =
+{
+       {
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+               0x01, 0x01, 0x01,
+       },
+       {
+               0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+               0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
+               0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
+               0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
+               0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
+               0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
+               0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
+               0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
+               0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
+               0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+               0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
+               0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
+               0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
+               0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
+               0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
+               0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
+               0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
+               0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
+               0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
+               0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+               0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
+               0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
+               0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
+               0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
+               0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
+               0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
+               0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
+               0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
+               0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
+               0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+               0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
+               0x1b, 0x36, 0x6c,
+       },
+       {
+               0x01, 0xf5, 0xd2, 0xc4, 0x9a, 0x71, 0xf1, 0x7f,
+               0xfc, 0x87, 0xc1, 0xc6, 0x19, 0x2f, 0x40, 0x55,
+               0x3d, 0xba, 0x53, 0x04, 0x9c, 0x61, 0x34, 0x8c,
+               0x46, 0x68, 0x70, 0x3e, 0xcc, 0x7d, 0x74, 0x75,
+               0xb5, 0xdb, 0x0c, 0xdf, 0x9e, 0x6d, 0x79, 0xeb,
+               0x63, 0x9f, 0x38, 0xd0, 0x94, 0xa5, 0x24, 0x89,
+               0x5c, 0x65, 0x5b, 0xae, 0x37, 0x33, 0x4c, 0xdd,
+               0x47, 0xf4, 0x02, 0xa6, 0x39, 0xd8, 0x9d, 0x2d,
+               0x62, 0xb9, 0x2e, 0x0f, 0x2b, 0x60, 0x58, 0xe4,
+               0xf8, 0x6c, 0x72, 0xb0, 0x85, 0x4d, 0x95, 0x41,
+               0x1c, 0x23, 0x05, 0x99, 0x32, 0xc5, 0x0e, 0x82,
+               0x91, 0x14, 0xd1, 0xaf, 0xf9, 0xb3, 0x07, 0x97,
+               0x6e, 0x0b, 0x67, 0x3b, 0x78, 0xe6, 0x28, 0x22,
+               0x4f, 0xa3, 0xca, 0x48, 0xde, 0x1d, 0xa8, 0x17,
+               0x6f, 0x90, 0xaa, 0x31, 0x5a, 0xf3, 0xe9, 0xa9,
+               0x44, 0x30, 0x56, 0x09, 0x59, 0x6a, 0x42, 0xcd,
+               0xe5, 0xd6, 0x86, 0xd9, 0xbf, 0xcb, 0x26, 0x66,
+               0x7c, 0xd5, 0xbe, 0x25, 0x1f, 0xe0, 0x98, 0x27,
+               0x92, 0x51, 0xc7, 0x45, 0x2c, 0xc0, 0xad, 0xa7,
+               0x69, 0xf7, 0xb4, 0xe8, 0x84, 0xe1, 0x18, 0x88,
+               0x3c, 0x76, 0x20, 0x5e, 0x9b, 0x1e, 0x0d, 0x81,
+               0x4a, 0xbd, 0x16, 0x8a, 0xac, 0x93, 0xce, 0x1a,
+               0xc2, 0x0a, 0x3f, 0xfd, 0xe3, 0x77, 0x6b, 0xd7,
+               0xef, 0xa4, 0x80, 0xa1, 0x36, 0xed, 0xa2, 0x12,
+               0x57, 0xb6, 0x29, 0x8d, 0x7b, 0xc8, 0x52, 0xc3,
+               0xbc, 0xb8, 0x21, 0xd4, 0xea, 0xd3, 0x06, 0xab,
+               0x2a, 0x1b, 0x5f, 0xb7, 0x10, 0xec, 0x64, 0xf6,
+               0xe2, 0x11, 0x50, 0x83, 0x54, 0x3a, 0xfa, 0xfb,
+               0xf2, 0x43, 0xb1, 0xff, 0xe7, 0xc9, 0x03, 0xbb,
+               0xee, 0x13, 0x8b, 0xdc, 0x35, 0xb2, 0xda, 0xcf,
+               0xa0, 0x96, 0x49, 0x4e, 0x08, 0x73, 0xf0, 0x7e,
+               0xfe, 0x15, 0x4b,
+       },
+       {
+               0x01, 0xbb, 0xa6, 0xd7, 0xc7, 0x07, 0xce, 0x82,
+               0x4a, 0x2f, 0xa5, 0x9b, 0xb6, 0x60, 0xf1, 0xad,
+               0xe7, 0xf4, 0x06, 0xd2, 0xdf, 0x2e, 0xca, 0x65,
+               0x5c, 0x48, 0x21, 0xaa, 0xcd, 0x4e, 0xc1, 0x61,
+               0x38, 0x0a, 0x3e, 0xd1, 0xd5, 0xcb, 0x10, 0xdc,
+               0x5e, 0x24, 0xb8, 0xde, 0x79, 0x36, 0x43, 0x72,
+               0xd9, 0xf8, 0xf9, 0xa2, 0xa4, 0x6a, 0x3d, 0xea,
+               0x8e, 0x03, 0xf5, 0xab, 0xb4, 0x5d, 0xb5, 0x53,
+               0x6b, 0x39, 0x86, 0xb0, 0x50, 0x74, 0x96, 0x84,
+               0x5a, 0x4b, 0xe8, 0x49, 0xe5, 0x51, 0xef, 0x12,
+               0xbc, 0x89, 0x5b, 0x2b, 0x29, 0x09, 0xc3, 0x57,
+               0x1e, 0x37, 0x76, 0x0b, 0x64, 0x8a, 0x52, 0x59,
+               0x80, 0xda, 0xa8, 0x44, 0x95, 0x3c, 0x33, 0xe6,
+               0x7c, 0xaf, 0x6c, 0xb1, 0x9d, 0xfc, 0x92, 0xd6,
+               0xd8, 0xff, 0xa7, 0x77, 0x04, 0x13, 0x73, 0x66,
+               0x28, 0x7d, 0x83, 0xfb, 0x5f, 0x63, 0x25, 0x19,
+               0xbd, 0xc5, 0x3b, 0x6e, 0x20, 0x35, 0x55, 0x42,
+               0x31, 0xe1, 0xb9, 0x9e, 0x90, 0xd4, 0xba, 0xdb,
+               0xf7, 0x2a, 0xe9, 0x3a, 0xa0, 0x75, 0x7a, 0xd3,
+               0x02, 0xee, 0x9c, 0xc6, 0x1f, 0x14, 0xcc, 0x22,
+               0x4d, 0x30, 0x71, 0x58, 0x11, 0x85, 0x4f, 0x6f,
+               0x6d, 0x1d, 0xcf, 0xfa, 0x54, 0xa9, 0x17, 0xa3,
+               0x0f, 0xae, 0x0d, 0x1c, 0xc2, 0xd0, 0x32, 0x16,
+               0xf6, 0xc0, 0x7f, 0x2d, 0x15, 0xf3, 0x1b, 0xf2,
+               0xed, 0xb3, 0x45, 0xc8, 0xac, 0x7b, 0x2c, 0xe2,
+               0xe4, 0xbf, 0xbe, 0x9f, 0x34, 0x05, 0x70, 0x3f,
+               0x98, 0xfe, 0x62, 0x18, 0x9a, 0x56, 0x8d, 0x93,
+               0x97, 0x78, 0x4c, 0x7e, 0x27, 0x87, 0x08, 0x8b,
+               0xec, 0x67, 0x0e, 0x1a, 0x23, 0x8c, 0x68, 0x99,
+               0x94, 0x40, 0xb2, 0xa1, 0xeb, 0xb7, 0x26, 0xf0,
+               0xdd, 0xe3, 0x69, 0x0c, 0xc4, 0x88, 0x41, 0x81,
+               0x91, 0xe0, 0xfd,
+       },
+       {
+               0x01, 0x97, 0x7f, 0x9c, 0x7c, 0x18, 0xbd, 0xa2,
+               0x58, 0x1a, 0xda, 0x74, 0x70, 0xa3, 0xe5, 0x47,
+               0x29, 0x07, 0xf5, 0x80, 0x23, 0xe9, 0xfa, 0x46,
+               0x54, 0xa0, 0x99, 0x95, 0x53, 0x9b, 0x0b, 0xc7,
+               0x09, 0xc0, 0x78, 0x89, 0x92, 0xe3, 0x0d, 0xb0,
+               0x2a, 0x8c, 0xfb, 0x17, 0x3f, 0x26, 0x65, 0x87,
+               0x27, 0x5c, 0x66, 0x61, 0x79, 0x4d, 0x32, 0xb3,
+               0x8d, 0x52, 0xe2, 0x82, 0x3d, 0xf9, 0xc5, 0x02,
+               0xbc, 0x4c, 0x73, 0x48, 0x62, 0xaf, 0xba, 0x41,
+               0xd9, 0xc4, 0x2f, 0xb1, 0x33, 0xb8, 0x15, 0x7d,
+               0xcf, 0x3a, 0xa9, 0x5f, 0x84, 0x6d, 0x34, 0x1b,
+               0x44, 0x94, 0x72, 0x81, 0x42, 0xbe, 0xcc, 0x4b,
+               0x0a, 0x6f, 0x5a, 0x22, 0x36, 0xb5, 0x3c, 0x9d,
+               0x13, 0x7e, 0x08, 0xdd, 0xd6, 0x5e, 0x04, 0xfc,
+               0x5b, 0xec, 0xef, 0xf1, 0x6e, 0x1e, 0x77, 0x24,
+               0xe6, 0xc6, 0xaa, 0xcb, 0xfd, 0x51, 0x67, 0x06,
+               0x6a, 0x4a, 0x88, 0xdb, 0xb2, 0xc2, 0x5d, 0x43,
+               0x40, 0xf7, 0x50, 0xa8, 0xf2, 0x7a, 0x71, 0xa4,
+               0xd2, 0xbf, 0x31, 0x90, 0x19, 0x9a, 0x8e, 0xf6,
+               0xc3, 0xa6, 0xe7, 0x60, 0x12, 0xee, 0x2d, 0xde,
+               0x38, 0xe8, 0xb7, 0x98, 0xc1, 0x28, 0xf3, 0x05,
+               0x96, 0x63, 0xd1, 0xb9, 0x14, 0x9f, 0x1d, 0x83,
+               0x68, 0x75, 0xed, 0x16, 0x03, 0xce, 0xe4, 0xdf,
+               0xe0, 0x10, 0xae, 0x69, 0x55, 0x91, 0x2e, 0x4e,
+               0xfe, 0x21, 0x1f, 0x9e, 0xe1, 0xd5, 0xcd, 0xca,
+               0xf0, 0x8b, 0x2b, 0xc9, 0x8a, 0x93, 0xbb, 0x57,
+               0x20, 0x86, 0x1c, 0xa1, 0x4f, 0x3e, 0x25, 0xd4,
+               0x6c, 0xa5, 0x6b, 0xa7, 0x37, 0xff, 0x39, 0x35,
+               0x0c, 0xf8, 0xea, 0x56, 0x45, 0x8f, 0x2c, 0x59,
+               0xab, 0x85, 0xeb, 0x49, 0x0f, 0xdc, 0xd8, 0x76,
+               0xb6, 0xf4, 0x0e, 0x11, 0xb4, 0xd0, 0x30, 0xd3,
+               0x3b, 0xad, 0xd7,
+       },
+       {
+               0x01, 0x2b, 0x3f, 0xcf, 0x73, 0x2c, 0xd6, 0xed,
+               0xcb, 0x74, 0x15, 0x78, 0x8a, 0xc1, 0x17, 0xc9,
+               0x89, 0x68, 0x21, 0xab, 0x76, 0x3b, 0x4b, 0x5a,
+               0x6e, 0x0e, 0xb9, 0xd3, 0xb6, 0x3e, 0x36, 0x86,
+               0xbf, 0xa2, 0xa7, 0x30, 0x14, 0xeb, 0xc7, 0x2d,
+               0x96, 0x67, 0x20, 0xb5, 0x9a, 0xe0, 0xa8, 0xc6,
+               0x80, 0x04, 0x8d, 0xfe, 0x75, 0x5e, 0x23, 0xca,
+               0x8f, 0x48, 0x99, 0x0d, 0xdf, 0x8e, 0xb8, 0x70,
+               0x29, 0x9c, 0x44, 0x69, 0x3d, 0xa5, 0xc2, 0x90,
+               0xd2, 0x1c, 0x9b, 0x02, 0x1d, 0x98, 0x93, 0xec,
+               0x84, 0xe8, 0x64, 0x4c, 0x3a, 0x8b, 0x97, 0xf3,
+               0xe5, 0xc0, 0x7d, 0x26, 0xc8, 0x08, 0xa0, 0x62,
+               0x82, 0x55, 0xf7, 0x33, 0xf6, 0x51, 0x63, 0x4d,
+               0x77, 0xda, 0xfd, 0xc3, 0x38, 0x6d, 0xee, 0x09,
+               0x47, 0xa3, 0x05, 0xde, 0xa6, 0xf1, 0x22, 0x25,
+               0x6a, 0x0c, 0x81, 0xb2, 0x6b, 0x58, 0xd5, 0xb3,
+               0xfc, 0xfb, 0x28, 0x7f, 0x07, 0xdc, 0x7a, 0x9e,
+               0xd0, 0x37, 0xb4, 0xe1, 0x1a, 0x24, 0x03, 0xae,
+               0x94, 0xba, 0x88, 0x2f, 0xea, 0x2e, 0x8c, 0x5b,
+               0xbb, 0x79, 0xd1, 0x11, 0xff, 0xa4, 0x19, 0x3c,
+               0x2a, 0x4e, 0x52, 0xe3, 0x95, 0xbd, 0x31, 0x5d,
+               0x35, 0x4a, 0x41, 0xc4, 0xdb, 0x42, 0xc5, 0x0b,
+               0x49, 0x1b, 0x7c, 0xe4, 0xb0, 0x9d, 0x45, 0xf0,
+               0xa9, 0x61, 0x57, 0x06, 0xd4, 0x40, 0x91, 0x56,
+               0x13, 0xfa, 0x87, 0xac, 0x27, 0x54, 0xdd, 0x59,
+               0x1f, 0x71, 0x39, 0x43, 0x6c, 0xf9, 0xbe, 0x4f,
+               0xf4, 0x1e, 0x32, 0xcd, 0xe9, 0x7e, 0x7b, 0x66,
+               0x5f, 0xef, 0xe7, 0x6f, 0x0a, 0x60, 0xd7, 0xb7,
+               0x83, 0x92, 0xe2, 0xaf, 0x72, 0xf8, 0xb1, 0x50,
+               0x10, 0xce, 0x18, 0x53, 0xa1, 0xcc, 0xad, 0x12,
+               0x34, 0x0f, 0xf5, 0xaa, 0x16, 0xe6, 0xf2, 0xd8,
+               0x85, 0x9f, 0xbc,
+       },
+};
+
+#ifdef CONFIG_X86
+/**
+ * PSHUFB tables for the Cauchy matrix.
+ *
+ * Indexes are [DISK][PARITY - 2][LH].
+ * Where DISK is from 0 to 250, PARITY from 2 to 5, LH from 0 to 1.
+ */
+const uint8_t __aligned(256) raid_gfcauchypshufb[251][4][2][16] =
+{
+       {
+               {
+                       { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+                       { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+               },
+               {
+                       { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+                       { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+               },
+               {
+                       { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+                       { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+               },
+               {
+                       { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+                       { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+                       { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+               },
+               {
+                       { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+                       { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+               },
+               {
+                       { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+                       { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+               },
+               {
+                       { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+                       { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+                       { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+               },
+               {
+                       { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+                       { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+               },
+               {
+                       { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+                       { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+               },
+               {
+                       { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+                       { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+                       { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+               },
+               {
+                       { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+                       { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+               },
+               {
+                       { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+                       { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+               },
+               {
+                       { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+                       { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+                       { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+               },
+               {
+                       { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+                       { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+               },
+               {
+                       { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+                       { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+               },
+               {
+                       { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+                       { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+                       { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+               },
+               {
+                       { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+                       { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+               },
+               {
+                       { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+                       { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+               },
+               {
+                       { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+                       { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+                       { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+               },
+               {
+                       { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+                       { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+               },
+               {
+                       { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+                       { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+               },
+               {
+                       { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+                       { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+                       { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+               },
+               {
+                       { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+                       { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+               },
+               {
+                       { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+                       { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+               },
+               {
+                       { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+                       { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+                       { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+               },
+               {
+                       { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+                       { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+               },
+               {
+                       { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+                       { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+               },
+               {
+                       { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+                       { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+                       { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+               },
+               {
+                       { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+                       { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+               },
+               {
+                       { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+                       { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+               },
+               {
+                       { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+                       { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+                       { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+               },
+               {
+                       { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+                       { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+               },
+               {
+                       { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+                       { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+               },
+               {
+                       { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+                       { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+                       { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+               },
+               {
+                       { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+                       { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+               },
+               {
+                       { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+                       { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+               },
+               {
+                       { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+                       { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+                       { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+               },
+               {
+                       { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+                       { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+               },
+               {
+                       { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+                       { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+               },
+               {
+                       { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+                       { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+                       { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+               },
+               {
+                       { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+                       { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+               },
+               {
+                       { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+                       { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+               },
+               {
+                       { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+                       { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+                       { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+               },
+               {
+                       { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+                       { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+               },
+               {
+                       { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+                       { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+               },
+               {
+                       { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+                       { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+                       { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+               },
+               {
+                       { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+                       { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+               },
+               {
+                       { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+                       { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+               },
+               {
+                       { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+                       { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+                       { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+               },
+               {
+                       { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+                       { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+               },
+               {
+                       { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+                       { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+               },
+               {
+                       { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+                       { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+                       { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+               },
+               {
+                       { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+                       { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+               },
+               {
+                       { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+                       { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+               },
+               {
+                       { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+                       { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+                       { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+               },
+               {
+                       { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+                       { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+               },
+               {
+                       { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+                       { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+               },
+               {
+                       { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+                       { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+                       { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+               },
+               {
+                       { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+                       { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+               },
+               {
+                       { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+                       { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+               },
+               {
+                       { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+                       { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+                       { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+               },
+               {
+                       { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+                       { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+               },
+               {
+                       { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+                       { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+               },
+               {
+                       { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+                       { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+                       { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+               },
+               {
+                       { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+                       { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+               },
+               {
+                       { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+                       { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+               },
+               {
+                       { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+                       { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+                       { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+               },
+               {
+                       { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+                       { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+               },
+               {
+                       { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+                       { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+               },
+               {
+                       { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+                       { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+                       { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+               },
+               {
+                       { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+                       { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+               },
+               {
+                       { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+                       { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+               },
+               {
+                       { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+                       { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+                       { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+               },
+               {
+                       { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+                       { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+               },
+               {
+                       { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+                       { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+               },
+               {
+                       { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+                       { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+                       { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+               },
+               {
+                       { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+                       { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+               },
+               {
+                       { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+                       { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+               },
+               {
+                       { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+                       { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+                       { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+               },
+               {
+                       { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+                       { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+               },
+               {
+                       { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+                       { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+               },
+               {
+                       { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+                       { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+                       { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+               },
+               {
+                       { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+                       { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+               },
+               {
+                       { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+                       { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+               },
+               {
+                       { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+                       { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+                       { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+               },
+               {
+                       { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+                       { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+               },
+               {
+                       { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+                       { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+               },
+               {
+                       { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+                       { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+                       { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+               },
+               {
+                       { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+                       { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+               },
+               {
+                       { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+                       { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+               },
+               {
+                       { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+                       { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+                       { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+               },
+               {
+                       { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+                       { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+               },
+               {
+                       { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+                       { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+               },
+               {
+                       { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+                       { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+                       { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+               },
+               {
+                       { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+                       { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+               },
+               {
+                       { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+                       { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+               },
+               {
+                       { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+                       { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+                       { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+               },
+               {
+                       { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+                       { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+               },
+               {
+                       { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+                       { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+               },
+               {
+                       { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+                       { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+                       { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+               },
+               {
+                       { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+                       { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+               },
+               {
+                       { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+                       { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+               },
+               {
+                       { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+                       { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+                       { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+               },
+               {
+                       { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+                       { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+               },
+               {
+                       { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+                       { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+               },
+               {
+                       { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+                       { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+                       { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+               },
+               {
+                       { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+                       { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+               },
+               {
+                       { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+                       { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+               },
+               {
+                       { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+                       { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+                       { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+               },
+               {
+                       { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+                       { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+               },
+               {
+                       { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+                       { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+               },
+               {
+                       { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+                       { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+                       { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+               },
+               {
+                       { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+                       { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+               },
+               {
+                       { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+                       { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+               },
+               {
+                       { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+                       { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+                       { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+               },
+               {
+                       { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+                       { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+               },
+               {
+                       { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+                       { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+               },
+               {
+                       { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+                       { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+                       { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+               },
+               {
+                       { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+                       { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+               },
+               {
+                       { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+                       { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+               },
+               {
+                       { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+                       { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+                       { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+               },
+               {
+                       { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+                       { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+               },
+               {
+                       { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+                       { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+               },
+               {
+                       { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+                       { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+                       { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+               },
+               {
+                       { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+                       { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+               },
+               {
+                       { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+                       { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+               },
+               {
+                       { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+                       { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+                       { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+               },
+               {
+                       { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+                       { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+               },
+               {
+                       { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+                       { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+               },
+               {
+                       { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+                       { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+                       { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+               },
+               {
+                       { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+                       { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+               },
+               {
+                       { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+                       { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+               },
+               {
+                       { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+                       { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+                       { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+               },
+               {
+                       { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+                       { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+               },
+               {
+                       { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+                       { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+               },
+               {
+                       { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+                       { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+                       { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+               },
+               {
+                       { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+                       { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+               },
+               {
+                       { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+                       { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+               },
+               {
+                       { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+                       { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+                       { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+               },
+               {
+                       { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+                       { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+               },
+               {
+                       { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+                       { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+               },
+               {
+                       { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+                       { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+                       { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+               },
+               {
+                       { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+                       { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+               },
+               {
+                       { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+                       { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+               },
+               {
+                       { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+                       { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+                       { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+               },
+               {
+                       { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+                       { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+               },
+               {
+                       { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+                       { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+               },
+               {
+                       { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+                       { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+                       { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+               },
+               {
+                       { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+                       { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+               },
+               {
+                       { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+                       { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+               },
+               {
+                       { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+                       { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+                       { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+               },
+               {
+                       { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+                       { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+               },
+               {
+                       { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+                       { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+               },
+               {
+                       { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+                       { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+                       { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+               },
+               {
+                       { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+                       { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+               },
+               {
+                       { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+                       { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+               },
+               {
+                       { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+                       { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+                       { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+               },
+               {
+                       { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+                       { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+               },
+               {
+                       { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+                       { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+               },
+               {
+                       { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+                       { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+                       { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+               },
+               {
+                       { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+                       { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+               },
+               {
+                       { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+                       { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+               },
+               {
+                       { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+                       { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+                       { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+               },
+               {
+                       { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+                       { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+               },
+               {
+                       { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+                       { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+               },
+               {
+                       { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+                       { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+                       { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+               },
+               {
+                       { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+                       { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+               },
+               {
+                       { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+                       { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+               },
+               {
+                       { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+                       { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+                       { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+               },
+               {
+                       { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+                       { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+               },
+               {
+                       { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+                       { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+               },
+               {
+                       { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+                       { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+                       { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+               },
+               {
+                       { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+                       { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+               },
+               {
+                       { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+                       { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+               },
+               {
+                       { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+                       { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+                       { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+               },
+               {
+                       { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+                       { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+               },
+               {
+                       { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+                       { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+               },
+               {
+                       { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+                       { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+                       { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+               },
+               {
+                       { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+                       { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+               },
+               {
+                       { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+                       { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+               },
+               {
+                       { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+                       { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+                       { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+               },
+               {
+                       { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+                       { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+               },
+               {
+                       { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+                       { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+               },
+               {
+                       { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+                       { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+                       { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+               },
+               {
+                       { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+                       { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+               },
+               {
+                       { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+                       { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+               },
+               {
+                       { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+                       { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+                       { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+               },
+               {
+                       { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+                       { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+               },
+               {
+                       { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+                       { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+               },
+               {
+                       { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+                       { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+                       { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+               },
+               {
+                       { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+                       { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+               },
+               {
+                       { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+                       { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+               },
+               {
+                       { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+                       { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+                       { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+               },
+               {
+                       { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+                       { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+               },
+               {
+                       { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+                       { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+               },
+               {
+                       { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+                       { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+                       { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+               },
+               {
+                       { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+                       { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+               },
+               {
+                       { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+                       { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+               },
+               {
+                       { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+                       { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+                       { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+               },
+               {
+                       { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+                       { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+               },
+               {
+                       { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+                       { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+               },
+               {
+                       { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+                       { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+                       { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+               },
+               {
+                       { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+                       { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+               },
+               {
+                       { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+                       { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+               },
+               {
+                       { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+                       { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+                       { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+               },
+               {
+                       { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+                       { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+               },
+               {
+                       { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+                       { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+               },
+               {
+                       { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+                       { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+                       { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+               },
+               {
+                       { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+                       { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+               },
+               {
+                       { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+                       { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+               },
+               {
+                       { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+                       { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+                       { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+               },
+               {
+                       { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+                       { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+               },
+               {
+                       { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+                       { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+               },
+               {
+                       { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+                       { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+                       { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+               },
+               {
+                       { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+                       { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+               },
+               {
+                       { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+                       { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+               },
+               {
+                       { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+                       { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+                       { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+               },
+               {
+                       { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+                       { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+               },
+               {
+                       { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+                       { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+               },
+               {
+                       { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+                       { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+                       { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+               },
+               {
+                       { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+                       { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+               },
+               {
+                       { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+                       { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+               },
+               {
+                       { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+                       { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+                       { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+               },
+               {
+                       { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+                       { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+               },
+               {
+                       { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+                       { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+               },
+               {
+                       { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+                       { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+                       { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+               },
+               {
+                       { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+                       { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+               },
+               {
+                       { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+                       { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+               },
+               {
+                       { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+                       { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+                       { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+               },
+               {
+                       { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+                       { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+               },
+               {
+                       { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+                       { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+               },
+               {
+                       { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+                       { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+                       { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+               },
+               {
+                       { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+                       { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+               },
+               {
+                       { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+                       { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+               },
+               {
+                       { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+                       { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+                       { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+               },
+               {
+                       { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+                       { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+               },
+               {
+                       { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+                       { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+               },
+               {
+                       { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+                       { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+                       { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+               },
+               {
+                       { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+                       { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+               },
+               {
+                       { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+                       { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+               },
+               {
+                       { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+                       { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+                       { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+               },
+               {
+                       { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+                       { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+               },
+               {
+                       { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+                       { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+               },
+               {
+                       { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+                       { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+                       { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+               },
+               {
+                       { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+                       { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+               },
+               {
+                       { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+                       { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+               },
+               {
+                       { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+                       { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+                       { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+               },
+               {
+                       { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+                       { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+               },
+               {
+                       { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+                       { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+               },
+               {
+                       { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+                       { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+                       { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+               },
+               {
+                       { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+                       { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+               },
+               {
+                       { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+                       { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+               },
+               {
+                       { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+                       { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+                       { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+               },
+               {
+                       { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+                       { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+               },
+               {
+                       { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+                       { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+               },
+               {
+                       { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+                       { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+                       { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+               },
+               {
+                       { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+                       { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+               },
+               {
+                       { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+                       { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+               },
+               {
+                       { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+                       { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+                       { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+               },
+               {
+                       { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+                       { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+               },
+               {
+                       { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+                       { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+               },
+               {
+                       { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+                       { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+                       { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+               },
+               {
+                       { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+                       { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+               },
+               {
+                       { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+                       { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+               },
+               {
+                       { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+                       { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+                       { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+               },
+               {
+                       { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+                       { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+               },
+               {
+                       { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+                       { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+               },
+               {
+                       { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+                       { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+                       { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+               },
+               {
+                       { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+                       { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+               },
+               {
+                       { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+                       { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+               },
+               {
+                       { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+                       { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+                       { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+               },
+               {
+                       { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+                       { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+               },
+               {
+                       { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+                       { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+               },
+               {
+                       { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+                       { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+                       { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+               },
+               {
+                       { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+                       { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+               },
+               {
+                       { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+                       { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+               },
+               {
+                       { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+                       { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+                       { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+               },
+               {
+                       { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+                       { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+               },
+               {
+                       { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+                       { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+               },
+               {
+                       { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+                       { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+                       { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+               },
+               {
+                       { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+                       { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+               },
+               {
+                       { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+                       { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+               },
+               {
+                       { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+                       { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+                       { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+               },
+               {
+                       { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+                       { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+               },
+               {
+                       { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+                       { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+               },
+               {
+                       { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+                       { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+                       { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+               },
+               {
+                       { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+                       { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+               },
+               {
+                       { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+                       { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+               },
+               {
+                       { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+                       { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+                       { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+               },
+               {
+                       { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+                       { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+               },
+               {
+                       { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+                       { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+               },
+               {
+                       { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+                       { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+                       { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+               },
+               {
+                       { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+                       { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+               },
+               {
+                       { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+                       { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+               },
+               {
+                       { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+                       { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+                       { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+               },
+               {
+                       { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+                       { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+               },
+               {
+                       { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+                       { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+               },
+               {
+                       { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+                       { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+                       { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+               },
+               {
+                       { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+                       { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+               },
+               {
+                       { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+                       { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+               },
+               {
+                       { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+                       { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+                       { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+               },
+               {
+                       { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+                       { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+               },
+               {
+                       { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+                       { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+               },
+               {
+                       { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+                       { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+                       { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+               },
+               {
+                       { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+                       { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+               },
+               {
+                       { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+                       { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+               },
+               {
+                       { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+                       { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+                       { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+               },
+               {
+                       { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+                       { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+               },
+               {
+                       { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+                       { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+               },
+               {
+                       { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+                       { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+                       { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+               },
+               {
+                       { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+                       { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+               },
+               {
+                       { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+                       { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+               },
+               {
+                       { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+                       { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+                       { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+               },
+               {
+                       { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+                       { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+               },
+               {
+                       { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+                       { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+               },
+               {
+                       { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+                       { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+                       { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+               },
+               {
+                       { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+                       { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+               },
+               {
+                       { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+                       { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+               },
+               {
+                       { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+                       { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+                       { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+               },
+               {
+                       { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+                       { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+               },
+               {
+                       { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+                       { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+               },
+               {
+                       { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+                       { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+                       { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+               },
+               {
+                       { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+                       { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+               },
+               {
+                       { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+                       { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+               },
+               {
+                       { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+                       { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+                       { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+               },
+               {
+                       { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+                       { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+               },
+               {
+                       { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+                       { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+               },
+               {
+                       { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+                       { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+                       { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+               },
+               {
+                       { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+                       { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+               },
+               {
+                       { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+                       { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+               },
+               {
+                       { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+                       { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+                       { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+               },
+               {
+                       { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+                       { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+               },
+               {
+                       { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+                       { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+               },
+               {
+                       { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+                       { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+                       { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+               },
+               {
+                       { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+                       { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+               },
+               {
+                       { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+                       { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+               },
+               {
+                       { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+                       { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+                       { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+               },
+               {
+                       { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+                       { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+               },
+               {
+                       { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+                       { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+               },
+               {
+                       { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+                       { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+                       { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+               },
+               {
+                       { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+                       { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+               },
+               {
+                       { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+                       { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+               },
+               {
+                       { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+                       { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+                       { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+               },
+               {
+                       { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+                       { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+               },
+               {
+                       { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+                       { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+               },
+               {
+                       { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+                       { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+                       { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+               },
+               {
+                       { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+                       { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+               },
+               {
+                       { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+                       { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+               },
+               {
+                       { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+                       { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+                       { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+               },
+               {
+                       { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+                       { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+               },
+               {
+                       { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+                       { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+               },
+               {
+                       { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+                       { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+                       { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+               },
+               {
+                       { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+                       { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+               },
+               {
+                       { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+                       { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+               },
+               {
+                       { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+                       { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+                       { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+               },
+               {
+                       { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+                       { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+               },
+               {
+                       { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+                       { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+               },
+               {
+                       { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+                       { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+                       { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+               },
+               {
+                       { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+                       { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+               },
+               {
+                       { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+                       { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+               },
+               {
+                       { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+                       { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+                       { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+               },
+               {
+                       { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+                       { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+               },
+               {
+                       { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+                       { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+               },
+               {
+                       { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+                       { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+                       { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+               },
+               {
+                       { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+                       { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+               },
+               {
+                       { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+                       { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+               },
+               {
+                       { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+                       { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+                       { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+               },
+               {
+                       { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+                       { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+               },
+               {
+                       { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+                       { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+               },
+               {
+                       { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+                       { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+                       { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+               },
+               {
+                       { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+                       { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+               },
+               {
+                       { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+                       { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+               },
+               {
+                       { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+                       { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+                       { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+               },
+               {
+                       { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+                       { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+               },
+               {
+                       { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+                       { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+               },
+               {
+                       { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+                       { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+                       { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+               },
+               {
+                       { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+                       { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+               },
+               {
+                       { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+                       { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+               },
+               {
+                       { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+                       { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+                       { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+               },
+               {
+                       { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+                       { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+               },
+               {
+                       { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+                       { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+               },
+               {
+                       { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+                       { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+                       { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+               },
+               {
+                       { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+                       { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+               },
+               {
+                       { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+                       { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+               },
+               {
+                       { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+                       { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+                       { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+               },
+               {
+                       { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+                       { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+               },
+               {
+                       { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+                       { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+               },
+               {
+                       { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+                       { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+                       { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+               },
+               {
+                       { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+                       { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+               },
+               {
+                       { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+                       { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+               },
+               {
+                       { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+                       { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+                       { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+               },
+               {
+                       { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+                       { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+               },
+               {
+                       { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+                       { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+               },
+               {
+                       { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+                       { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+                       { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+               },
+               {
+                       { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+                       { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+               },
+               {
+                       { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+                       { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+               },
+               {
+                       { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+                       { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+                       { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+               },
+               {
+                       { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+                       { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+               },
+               {
+                       { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+                       { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+               },
+               {
+                       { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+                       { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+                       { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+               },
+               {
+                       { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+                       { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+               },
+               {
+                       { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+                       { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+               },
+               {
+                       { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+                       { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+                       { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+               },
+               {
+                       { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+                       { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+               },
+               {
+                       { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+                       { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+               },
+               {
+                       { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+                       { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+                       { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+               },
+               {
+                       { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+                       { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+               },
+               {
+                       { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+                       { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+               },
+               {
+                       { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+                       { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+                       { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+               },
+               {
+                       { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+                       { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+               },
+               {
+                       { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+                       { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+               },
+               {
+                       { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+                       { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+                       { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+               },
+               {
+                       { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+                       { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+               },
+               {
+                       { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+                       { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+               },
+               {
+                       { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+                       { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+                       { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+               },
+               {
+                       { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+                       { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+               },
+               {
+                       { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+                       { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+               },
+               {
+                       { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+                       { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+                       { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+               },
+               {
+                       { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+                       { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+               },
+               {
+                       { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+                       { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+               },
+               {
+                       { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+                       { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+                       { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+               },
+               {
+                       { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+                       { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+               },
+               {
+                       { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+                       { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+               },
+               {
+                       { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+                       { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+                       { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+               },
+               {
+                       { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+                       { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+               },
+               {
+                       { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+                       { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+               },
+               {
+                       { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+                       { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+                       { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+               },
+               {
+                       { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+                       { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+               },
+               {
+                       { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+                       { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+               },
+               {
+                       { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+                       { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+                       { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+               },
+               {
+                       { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+                       { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+               },
+               {
+                       { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+                       { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+               },
+               {
+                       { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+                       { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+                       { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+               },
+               {
+                       { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+                       { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+               },
+               {
+                       { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+                       { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+               },
+               {
+                       { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+                       { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+                       { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+               },
+               {
+                       { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+                       { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+               },
+               {
+                       { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+                       { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+               },
+               {
+                       { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+                       { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+                       { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+               },
+               {
+                       { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+                       { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+               },
+               {
+                       { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+                       { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+               },
+               {
+                       { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+                       { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+                       { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+               },
+               {
+                       { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+                       { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+               },
+               {
+                       { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+                       { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+               },
+               {
+                       { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+                       { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+                       { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+               },
+               {
+                       { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+                       { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+               },
+               {
+                       { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+                       { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+               },
+               {
+                       { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+                       { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+                       { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+               },
+               {
+                       { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+                       { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+               },
+               {
+                       { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+                       { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+               },
+               {
+                       { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+                       { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+                       { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+               },
+               {
+                       { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+                       { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+               },
+               {
+                       { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+                       { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+               },
+               {
+                       { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+                       { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+                       { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+               },
+               {
+                       { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+                       { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+               },
+               {
+                       { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+                       { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+               },
+               {
+                       { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+                       { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+                       { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+               },
+               {
+                       { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+                       { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+               },
+               {
+                       { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+                       { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+               },
+               {
+                       { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+                       { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+                       { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+               },
+               {
+                       { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+                       { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+               },
+               {
+                       { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+                       { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+               },
+               {
+                       { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+                       { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+                       { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+               },
+               {
+                       { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+                       { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+               },
+               {
+                       { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+                       { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+               },
+               {
+                       { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+                       { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+                       { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+               },
+               {
+                       { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+                       { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+               },
+               {
+                       { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+                       { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+               },
+               {
+                       { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+                       { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+                       { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+               },
+               {
+                       { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+                       { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+               },
+               {
+                       { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+                       { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+               },
+               {
+                       { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+                       { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+                       { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+               },
+               {
+                       { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+                       { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+               },
+               {
+                       { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+                       { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+               },
+               {
+                       { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+                       { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+                       { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+               },
+               {
+                       { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+                       { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+               },
+               {
+                       { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+                       { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+               },
+               {
+                       { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+                       { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+                       { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+               },
+               {
+                       { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+                       { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+               },
+               {
+                       { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+                       { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+               },
+               {
+                       { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+                       { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+                       { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+               },
+               {
+                       { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+                       { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+               },
+               {
+                       { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+                       { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+               },
+               {
+                       { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+                       { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+                       { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+               },
+               {
+                       { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+                       { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+               },
+               {
+                       { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+                       { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+               },
+               {
+                       { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+                       { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+                       { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+               },
+               {
+                       { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+                       { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+               },
+               {
+                       { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+                       { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+               },
+               {
+                       { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+                       { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+                       { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+               },
+               {
+                       { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+                       { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+               },
+               {
+                       { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+                       { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+               },
+               {
+                       { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+                       { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+                       { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+               },
+               {
+                       { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+                       { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+               },
+               {
+                       { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+                       { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+               },
+               {
+                       { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+                       { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+                       { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+               },
+               {
+                       { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+                       { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+               },
+               {
+                       { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+                       { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+               },
+               {
+                       { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+                       { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+                       { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+               },
+               {
+                       { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+                       { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+               },
+               {
+                       { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+                       { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+               },
+               {
+                       { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+                       { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+                       { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+               },
+               {
+                       { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+                       { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+               },
+               {
+                       { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+                       { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+               },
+               {
+                       { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+                       { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+                       { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+               },
+               {
+                       { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+                       { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+               },
+               {
+                       { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+                       { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+               },
+               {
+                       { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+                       { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+                       { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+               },
+               {
+                       { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+                       { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+               },
+               {
+                       { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+                       { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+               },
+               {
+                       { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+                       { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+                       { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+               },
+               {
+                       { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+                       { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+               },
+               {
+                       { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+                       { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+               },
+               {
+                       { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+                       { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+                       { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+               },
+               {
+                       { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+                       { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+               },
+               {
+                       { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+                       { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+               },
+               {
+                       { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+                       { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+                       { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+               },
+               {
+                       { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+                       { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+               },
+               {
+                       { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+                       { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+               },
+               {
+                       { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+                       { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+                       { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+               },
+               {
+                       { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+                       { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+               },
+               {
+                       { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+                       { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+               },
+               {
+                       { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+                       { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+                       { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+               },
+               {
+                       { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+                       { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+               },
+               {
+                       { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+                       { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+               },
+               {
+                       { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+                       { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+                       { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+               },
+               {
+                       { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+                       { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+               },
+               {
+                       { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+                       { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+               },
+               {
+                       { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+                       { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+                       { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+               },
+               {
+                       { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+                       { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+               },
+               {
+                       { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+                       { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+               },
+               {
+                       { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+                       { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+                       { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+               },
+               {
+                       { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+                       { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+               },
+               {
+                       { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+                       { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+               },
+               {
+                       { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+                       { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+                       { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+               },
+               {
+                       { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+                       { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+               },
+               {
+                       { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+                       { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+               },
+               {
+                       { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+                       { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+                       { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+               },
+               {
+                       { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+                       { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+               },
+               {
+                       { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+                       { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+               },
+               {
+                       { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+                       { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+                       { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+               },
+               {
+                       { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+                       { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+               },
+               {
+                       { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+                       { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+               },
+               {
+                       { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+                       { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+                       { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+               },
+               {
+                       { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+                       { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+               },
+               {
+                       { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+                       { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+               },
+               {
+                       { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+                       { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+                       { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+               },
+               {
+                       { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+                       { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+               },
+               {
+                       { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+                       { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+               },
+               {
+                       { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+                       { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+                       { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+               },
+               {
+                       { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+                       { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+               },
+               {
+                       { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+                       { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+               },
+               {
+                       { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+                       { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+                       { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+               },
+               {
+                       { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+                       { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+               },
+               {
+                       { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+                       { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+               },
+               {
+                       { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+                       { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+                       { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+               },
+               {
+                       { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+                       { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+               },
+               {
+                       { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+                       { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+               },
+               {
+                       { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+                       { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+                       { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+               },
+               {
+                       { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+                       { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+               },
+               {
+                       { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+                       { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+               },
+               {
+                       { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+                       { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+                       { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+               },
+               {
+                       { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+                       { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+               },
+               {
+                       { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+                       { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+               },
+               {
+                       { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+                       { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+                       { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+               },
+               {
+                       { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+                       { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+               },
+               {
+                       { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+                       { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+               },
+               {
+                       { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+                       { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+                       { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+               },
+               {
+                       { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+                       { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+               },
+               {
+                       { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+                       { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+               },
+               {
+                       { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+                       { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+                       { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+               },
+               {
+                       { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+                       { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+               },
+               {
+                       { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+                       { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+               },
+               {
+                       { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+                       { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+                       { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+               },
+               {
+                       { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+                       { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+               },
+               {
+                       { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+                       { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+               },
+               {
+                       { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+                       { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+                       { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+               },
+               {
+                       { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+                       { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+               },
+               {
+                       { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+                       { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+               },
+               {
+                       { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+                       { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+                       { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+               },
+               {
+                       { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+                       { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+               },
+               {
+                       { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+                       { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+               },
+               {
+                       { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+                       { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+                       { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+               },
+               {
+                       { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+                       { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+               },
+               {
+                       { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+                       { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+               },
+               {
+                       { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+                       { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+                       { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+               },
+               {
+                       { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+                       { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+               },
+               {
+                       { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+                       { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+               },
+               {
+                       { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+                       { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+                       { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+               },
+               {
+                       { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+                       { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+               },
+               {
+                       { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+                       { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+               },
+               {
+                       { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+                       { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+                       { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+               },
+               {
+                       { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+                       { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+               },
+               {
+                       { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+                       { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+               },
+               {
+                       { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+                       { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+                       { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+               },
+               {
+                       { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+                       { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+               },
+               {
+                       { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+                       { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+               },
+               {
+                       { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+                       { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+                       { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+               },
+               {
+                       { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+                       { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+               },
+               {
+                       { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+                       { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+               },
+               {
+                       { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+                       { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+                       { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+               },
+               {
+                       { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+                       { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+               },
+               {
+                       { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+                       { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+               },
+               {
+                       { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+                       { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+                       { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+               },
+               {
+                       { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+                       { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+               },
+               {
+                       { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+                       { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+               },
+               {
+                       { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+                       { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+                       { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+               },
+               {
+                       { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+                       { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+               },
+               {
+                       { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+                       { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+               },
+               {
+                       { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+                       { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+                       { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+               },
+               {
+                       { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+                       { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+               },
+               {
+                       { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+                       { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+               },
+               {
+                       { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+                       { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+                       { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+               },
+               {
+                       { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+                       { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+               },
+               {
+                       { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+                       { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+               },
+               {
+                       { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+                       { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+                       { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+               },
+               {
+                       { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+                       { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+               },
+               {
+                       { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+                       { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+               },
+               {
+                       { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+                       { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+                       { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+               },
+               {
+                       { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+                       { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+               },
+               {
+                       { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+                       { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+               },
+               {
+                       { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+                       { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+                       { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+               },
+               {
+                       { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+                       { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+               },
+               {
+                       { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+                       { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+               },
+               {
+                       { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+                       { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+                       { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+               },
+               {
+                       { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+                       { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+               },
+               {
+                       { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+                       { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+               },
+               {
+                       { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+                       { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+                       { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+               },
+               {
+                       { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+                       { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+               },
+               {
+                       { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+                       { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+               },
+               {
+                       { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+                       { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+                       { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+               },
+               {
+                       { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+                       { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+               },
+               {
+                       { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+                       { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+               },
+               {
+                       { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+                       { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+                       { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+               },
+               {
+                       { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+                       { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+               },
+               {
+                       { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+                       { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+               },
+               {
+                       { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+                       { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+                       { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+               },
+               {
+                       { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+                       { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+               },
+               {
+                       { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+                       { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+               },
+               {
+                       { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+                       { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+                       { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+               },
+               {
+                       { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+                       { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+               },
+               {
+                       { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+                       { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+               },
+               {
+                       { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+                       { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+                       { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+               },
+               {
+                       { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+                       { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+               },
+               {
+                       { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+                       { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+               },
+               {
+                       { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+                       { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+                       { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+               },
+               {
+                       { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+                       { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+               },
+               {
+                       { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+                       { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+               },
+               {
+                       { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+                       { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+                       { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+               },
+               {
+                       { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+                       { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+               },
+               {
+                       { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+                       { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+               },
+               {
+                       { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+                       { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+                       { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+               },
+               {
+                       { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+                       { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+               },
+               {
+                       { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+                       { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+               },
+               {
+                       { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+                       { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+                       { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+               },
+               {
+                       { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+                       { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+               },
+               {
+                       { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+                       { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+               },
+               {
+                       { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+                       { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+                       { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+               },
+               {
+                       { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+                       { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+               },
+               {
+                       { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+                       { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+               },
+               {
+                       { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+                       { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+                       { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+               },
+               {
+                       { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+                       { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+               },
+               {
+                       { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+                       { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+               },
+               {
+                       { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+                       { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+                       { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+               },
+               {
+                       { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+                       { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+               },
+               {
+                       { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+                       { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+               },
+               {
+                       { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+                       { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+                       { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+               },
+               {
+                       { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+                       { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+               },
+               {
+                       { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+                       { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+               },
+               {
+                       { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+                       { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+                       { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+               },
+               {
+                       { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+                       { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+               },
+               {
+                       { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+                       { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+               },
+               {
+                       { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+                       { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+                       { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+               },
+               {
+                       { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+                       { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+               },
+               {
+                       { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+                       { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+               },
+               {
+                       { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+                       { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+                       { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+               },
+               {
+                       { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+                       { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+               },
+               {
+                       { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+                       { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+               },
+               {
+                       { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+                       { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+                       { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+               },
+               {
+                       { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+                       { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+               },
+               {
+                       { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+                       { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+               },
+               {
+                       { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+                       { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+                       { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+               },
+               {
+                       { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+                       { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+               },
+               {
+                       { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+                       { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+               },
+               {
+                       { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+                       { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+                       { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+               },
+               {
+                       { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+                       { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+               },
+               {
+                       { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+                       { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+               },
+               {
+                       { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+                       { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+                       { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+               },
+               {
+                       { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+                       { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+               },
+               {
+                       { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+                       { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+               },
+               {
+                       { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+                       { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+                       { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+               },
+               {
+                       { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+                       { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+               },
+               {
+                       { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+                       { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+               },
+               {
+                       { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+                       { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+                       { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+               },
+               {
+                       { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+                       { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+               },
+               {
+                       { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+                       { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+               },
+               {
+                       { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+                       { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+                       { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+               },
+               {
+                       { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+                       { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+               },
+               {
+                       { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+                       { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+               },
+               {
+                       { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+                       { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+                       { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+               },
+               {
+                       { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+                       { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+               },
+               {
+                       { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+                       { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+               },
+               {
+                       { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+                       { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+                       { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+               },
+               {
+                       { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+                       { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+               },
+               {
+                       { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+                       { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+               },
+               {
+                       { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+                       { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+                       { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+               },
+               {
+                       { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+                       { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+               },
+               {
+                       { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+                       { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+               },
+               {
+                       { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+                       { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+                       { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+               },
+               {
+                       { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+                       { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+               },
+               {
+                       { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+                       { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+               },
+               {
+                       { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+                       { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+                       { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+               },
+               {
+                       { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+                       { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+               },
+               {
+                       { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+                       { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+               },
+               {
+                       { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+                       { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+                       { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+               },
+               {
+                       { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+                       { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+               },
+               {
+                       { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+                       { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+               },
+               {
+                       { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+                       { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+                       { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+               },
+               {
+                       { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+                       { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+               },
+               {
+                       { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+                       { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+               },
+               {
+                       { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+                       { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+                       { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+               },
+               {
+                       { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+                       { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+               },
+               {
+                       { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+                       { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+               },
+               {
+                       { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+                       { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+                       { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+               },
+               {
+                       { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+                       { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+               },
+               {
+                       { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+                       { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+               },
+               {
+                       { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+                       { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+                       { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+               },
+               {
+                       { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+                       { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+               },
+               {
+                       { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+                       { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+               },
+               {
+                       { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+                       { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+                       { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+               },
+               {
+                       { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+                       { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+               },
+               {
+                       { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+                       { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+               },
+               {
+                       { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+                       { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+                       { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+               },
+               {
+                       { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+                       { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+               },
+               {
+                       { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+                       { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+               },
+               {
+                       { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+                       { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+                       { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+               },
+               {
+                       { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+                       { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+               },
+               {
+                       { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+                       { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+               },
+               {
+                       { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+                       { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+                       { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+               },
+               {
+                       { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+                       { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+               },
+               {
+                       { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+                       { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+               },
+               {
+                       { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+                       { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+                       { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+               },
+               {
+                       { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+                       { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+               },
+               {
+                       { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+                       { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+               },
+               {
+                       { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+                       { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+               },
+       },
+       {
+               {
+                       { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+                       { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+               },
+               {
+                       { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+                       { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+               },
+               {
+                       { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+                       { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+               },
+               {
+                       { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+                       { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+                       { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+               },
+               {
+                       { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+                       { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+               },
+               {
+                       { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+                       { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+               },
+               {
+                       { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+                       { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+               },
+       },
+       {
+               {
+                       { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+                       { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+               },
+               {
+                       { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+                       { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+               },
+               {
+                       { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+                       { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+               },
+               {
+                       { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+                       { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+               },
+       },
+};
+#endif
+
+#ifdef CONFIG_X86
+/**
+ * PSHUFB tables for generic multiplication.
+ *
+ * Indexes are [MULTIPLER][LH].
+ * Where MULTIPLER is from 0 to 255, LH from 0 to 1.
+ */
+const uint8_t __aligned(256) raid_gfmulpshufb[256][2][16] =
+{
+       {
+               { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+               { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+       },
+       {
+               { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+               { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+       },
+       {
+               { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+               { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+       },
+       {
+               { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+               { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+       },
+       {
+               { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+               { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+       },
+       {
+               { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+               { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+       },
+       {
+               { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+               { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+       },
+       {
+               { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+               { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+       },
+       {
+               { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+               { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+       },
+       {
+               { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+               { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+       },
+       {
+               { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+               { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+       },
+       {
+               { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+               { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+       },
+       {
+               { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+               { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+       },
+       {
+               { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+               { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+       },
+       {
+               { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+               { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+       },
+       {
+               { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+               { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+       },
+       {
+               { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+               { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+       },
+       {
+               { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+               { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+       },
+       {
+               { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+               { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+       },
+       {
+               { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+               { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+       },
+       {
+               { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+               { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+       },
+       {
+               { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+               { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+       },
+       {
+               { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+               { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+       },
+       {
+               { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+               { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+       },
+       {
+               { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+               { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+       },
+       {
+               { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+               { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+       },
+       {
+               { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+               { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+       },
+       {
+               { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+               { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+       },
+       {
+               { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+               { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+       },
+       {
+               { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+               { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+       },
+       {
+               { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+               { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+       },
+       {
+               { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+               { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+       },
+       {
+               { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+               { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+       },
+       {
+               { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+               { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+       },
+       {
+               { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+               { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+       },
+       {
+               { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+               { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+       },
+       {
+               { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+               { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+       },
+       {
+               { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+               { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+       },
+       {
+               { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+               { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+       },
+       {
+               { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+               { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+       },
+       {
+               { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+               { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+       },
+       {
+               { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+               { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+       },
+       {
+               { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+               { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+       },
+       {
+               { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+               { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+       },
+       {
+               { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+               { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+       },
+       {
+               { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+               { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+       },
+       {
+               { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+               { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+       },
+       {
+               { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+               { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+       },
+       {
+               { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+               { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+       },
+       {
+               { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+               { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+       },
+       {
+               { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+               { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+       },
+       {
+               { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+               { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+       },
+       {
+               { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+               { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+       },
+       {
+               { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+               { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+       },
+       {
+               { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+               { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+       },
+       {
+               { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+               { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+       },
+       {
+               { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+               { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+       },
+       {
+               { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+               { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+       },
+       {
+               { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+               { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+       },
+       {
+               { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+               { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+       },
+       {
+               { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+               { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+       },
+       {
+               { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+               { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+       },
+       {
+               { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+               { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+       },
+       {
+               { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+               { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+       },
+       {
+               { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+               { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+       },
+       {
+               { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+               { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+       },
+       {
+               { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+               { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+       },
+       {
+               { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+               { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+       },
+       {
+               { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+               { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+       },
+       {
+               { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+               { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+       },
+       {
+               { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+               { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+       },
+       {
+               { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+               { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+       },
+       {
+               { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+               { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+       },
+       {
+               { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+               { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+       },
+       {
+               { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+               { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+       },
+       {
+               { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+               { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+       },
+       {
+               { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+               { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+       },
+       {
+               { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+               { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+       },
+       {
+               { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+               { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+       },
+       {
+               { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+               { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+       },
+       {
+               { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+               { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+       },
+       {
+               { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+               { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+       },
+       {
+               { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+               { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+       },
+       {
+               { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+               { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+       },
+       {
+               { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+               { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+       },
+       {
+               { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+               { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+       },
+       {
+               { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+               { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+       },
+       {
+               { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+               { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+       },
+       {
+               { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+               { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+       },
+       {
+               { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+               { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+       },
+       {
+               { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+               { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+       },
+       {
+               { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+               { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+       },
+       {
+               { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+               { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+       },
+       {
+               { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+               { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+       },
+       {
+               { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+               { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+       },
+       {
+               { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+               { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+       },
+       {
+               { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+               { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+       },
+       {
+               { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+               { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+       },
+       {
+               { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+               { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+       },
+       {
+               { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+               { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+       },
+       {
+               { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+               { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+       },
+       {
+               { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+               { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+       },
+       {
+               { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+               { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+       },
+       {
+               { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+               { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+       },
+       {
+               { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+               { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+       },
+       {
+               { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+               { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+       },
+       {
+               { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+               { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+       },
+       {
+               { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+               { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+       },
+       {
+               { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+               { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+       },
+       {
+               { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+               { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+       },
+       {
+               { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+               { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+       },
+       {
+               { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+               { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+       },
+       {
+               { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+               { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+       },
+       {
+               { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+               { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+       },
+       {
+               { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+               { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+       },
+       {
+               { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+               { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+       },
+       {
+               { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+               { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+       },
+       {
+               { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+               { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+       },
+       {
+               { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+               { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+       },
+       {
+               { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+               { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+       },
+       {
+               { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+               { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+       },
+       {
+               { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+               { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+       },
+       {
+               { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+               { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+       },
+       {
+               { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+               { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+       },
+       {
+               { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+               { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+       },
+       {
+               { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+               { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+       },
+       {
+               { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+               { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+       },
+       {
+               { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+               { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+       },
+       {
+               { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+               { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+       },
+       {
+               { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+               { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+       },
+       {
+               { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+               { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+       },
+       {
+               { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+               { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+       },
+       {
+               { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+               { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+       },
+       {
+               { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+               { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+       },
+       {
+               { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+               { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+       },
+       {
+               { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+               { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+       },
+       {
+               { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+               { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+       },
+       {
+               { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+               { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+       },
+       {
+               { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+               { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+       },
+       {
+               { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+               { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+       },
+       {
+               { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+               { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+       },
+       {
+               { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+               { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+       },
+       {
+               { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+               { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+       },
+       {
+               { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+               { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+       },
+       {
+               { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+               { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+       },
+       {
+               { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+               { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+       },
+       {
+               { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+               { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+       },
+       {
+               { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+               { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+       },
+       {
+               { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+               { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+       },
+       {
+               { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+               { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+       },
+       {
+               { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+               { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+       },
+       {
+               { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+               { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+       },
+       {
+               { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+               { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+       },
+       {
+               { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+               { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+       },
+       {
+               { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+               { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+       },
+       {
+               { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+               { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+       },
+       {
+               { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+               { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+       },
+       {
+               { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+               { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+       },
+       {
+               { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+               { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+       },
+       {
+               { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+               { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+       },
+       {
+               { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+               { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+       },
+       {
+               { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+               { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+       },
+       {
+               { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+               { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+       },
+       {
+               { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+               { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+       },
+       {
+               { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+               { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+       },
+       {
+               { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+               { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+       },
+       {
+               { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+               { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+       },
+       {
+               { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+               { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+       },
+       {
+               { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+               { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+       },
+       {
+               { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+               { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+       },
+       {
+               { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+               { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+       },
+       {
+               { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+               { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+       },
+       {
+               { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+               { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+       },
+       {
+               { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+               { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+       },
+       {
+               { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+               { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+       },
+       {
+               { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+               { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+       },
+       {
+               { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+               { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+       },
+       {
+               { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+               { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+       },
+       {
+               { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+               { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+       },
+       {
+               { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+               { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+       },
+       {
+               { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+               { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+       },
+       {
+               { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+               { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+       },
+       {
+               { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+               { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+       },
+       {
+               { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+               { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+       },
+       {
+               { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+               { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+       },
+       {
+               { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+               { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+       },
+       {
+               { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+               { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+       },
+       {
+               { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+               { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+       },
+       {
+               { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+               { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+       },
+       {
+               { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+               { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+       },
+       {
+               { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+               { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+       },
+       {
+               { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+               { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+       },
+       {
+               { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+               { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+       },
+       {
+               { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+               { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+       },
+       {
+               { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+               { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+       },
+       {
+               { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+               { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+       },
+       {
+               { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+               { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+       },
+       {
+               { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+               { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+       },
+       {
+               { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+               { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+       },
+       {
+               { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+               { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+       },
+       {
+               { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+               { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+       },
+       {
+               { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+               { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+       },
+       {
+               { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+               { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+       },
+       {
+               { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+               { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+       },
+       {
+               { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+               { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+       },
+       {
+               { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+               { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+       },
+       {
+               { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+               { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+       },
+       {
+               { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+               { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+       },
+       {
+               { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+               { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+       },
+       {
+               { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+               { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+       },
+       {
+               { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+               { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+       },
+       {
+               { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+               { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+       },
+       {
+               { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+               { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+       },
+       {
+               { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+               { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+       },
+       {
+               { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+               { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+       },
+       {
+               { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+               { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+       },
+       {
+               { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+               { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+       },
+       {
+               { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+               { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+       },
+       {
+               { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+               { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+       },
+       {
+               { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+               { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+       },
+       {
+               { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+               { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+       },
+       {
+               { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+               { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+       },
+       {
+               { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+               { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+       },
+       {
+               { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+               { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+       },
+       {
+               { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+               { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+       },
+       {
+               { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+               { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+       },
+       {
+               { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+               { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+       },
+       {
+               { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+               { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+       },
+       {
+               { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+               { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+       },
+       {
+               { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+               { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+       },
+       {
+               { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+               { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+       },
+       {
+               { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+               { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+       },
+       {
+               { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+               { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+       },
+       {
+               { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+               { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+       },
+       {
+               { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+               { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+       },
+       {
+               { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+               { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+       },
+       {
+               { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+               { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+       },
+       {
+               { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+               { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+       },
+       {
+               { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+               { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+       },
+       {
+               { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+               { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+       },
+       {
+               { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+               { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+       },
+       {
+               { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+               { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+       },
+       {
+               { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+               { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+       },
+       {
+               { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+               { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+       },
+       {
+               { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+               { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+       },
+       {
+               { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+               { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+       },
+       {
+               { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+               { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+       },
+       {
+               { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+               { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+       },
+       {
+               { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+               { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+       },
+       {
+               { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+               { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+       },
+       {
+               { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+               { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+       },
+       {
+               { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+               { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+       },
+       {
+               { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+               { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+       },
+       {
+               { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+               { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+       },
+       {
+               { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+               { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+       },
+       {
+               { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+               { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+       },
+};
+#endif
+
diff --git a/raid/tag.c b/raid/tag.c
new file mode 100644 (file)
index 0000000..bfeefaa
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+
+static struct raid_func {
+       const char *name;
+       void (*p)();
+} RAID_FUNC[] = {
+       { "int8", raid_gen3_int8 },
+       { "int8", raid_gen4_int8 },
+       { "int8", raid_gen5_int8 },
+       { "int8", raid_gen6_int8 },
+       { "int32", raid_gen1_int32 },
+       { "int64", raid_gen1_int64 },
+       { "int32", raid_gen2_int32 },
+       { "int64", raid_gen2_int64 },
+       { "int32", raid_genz_int32 },
+       { "int64", raid_genz_int64 },
+       { "int8", raid_rec1_int8 },
+       { "int8", raid_rec2_int8 },
+       { "int8", raid_recX_int8 },
+
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSE2
+       { "sse2", raid_gen1_sse2 },
+       { "sse2", raid_gen2_sse2 },
+       { "sse2", raid_genz_sse2 },
+#endif
+#ifdef CONFIG_SSSE3
+       { "ssse3", raid_gen3_ssse3 },
+       { "ssse3", raid_gen4_ssse3 },
+       { "ssse3", raid_gen5_ssse3 },
+       { "ssse3", raid_gen6_ssse3 },
+       { "ssse3", raid_rec1_ssse3 },
+       { "ssse3", raid_rec2_ssse3 },
+       { "ssse3", raid_recX_ssse3 },
+#endif
+#ifdef CONFIG_AVX2
+       { "avx2", raid_gen1_avx2 },
+       { "avx2", raid_gen2_avx2 },
+       { "avx2", raid_rec1_avx2 },
+       { "avx2", raid_rec2_avx2 },
+       { "avx2", raid_recX_avx2 },
+#endif
+#endif
+
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_SSE2
+       { "sse2e", raid_gen2_sse2ext },
+       { "sse2e", raid_genz_sse2ext },
+#endif
+#ifdef CONFIG_SSSE3
+       { "ssse3e", raid_gen3_ssse3ext },
+       { "ssse3e", raid_gen4_ssse3ext },
+       { "ssse3e", raid_gen5_ssse3ext },
+       { "ssse3e", raid_gen6_ssse3ext },
+#endif
+#ifdef CONFIG_AVX2
+       { "avx2e", raid_gen3_avx2ext },
+       { "avx2e", raid_genz_avx2ext },
+       { "avx2e", raid_gen4_avx2ext },
+       { "avx2e", raid_gen5_avx2ext },
+       { "avx2e", raid_gen6_avx2ext },
+#endif
+#endif
+       { 0, 0 }
+};
+
+static const char *raid_tag(void (*func)())
+{
+       struct raid_func *i = RAID_FUNC;
+
+       while (i->name != 0) {
+               if (i->p == func)
+                       return i->name;
+               ++i;
+       }
+
+       /* LCOV_EXCL_START */
+       return "unknown";
+       /* LCOV_EXCL_STOP */
+}
+
+const char *raid_gen1_tag(void)
+{
+       return raid_tag(raid_gen_ptr[0]);
+}
+
+const char *raid_gen2_tag(void)
+{
+       return raid_tag(raid_gen_ptr[1]);
+}
+
+const char *raid_genz_tag(void)
+{
+       return raid_tag(raid_genz_ptr);
+}
+
+const char *raid_gen3_tag(void)
+{
+       return raid_tag(raid_gen_ptr[2]);
+}
+
+const char *raid_gen4_tag(void)
+{
+       return raid_tag(raid_gen_ptr[3]);
+}
+
+const char *raid_gen5_tag(void)
+{
+       return raid_tag(raid_gen_ptr[4]);
+}
+
+const char *raid_gen6_tag(void)
+{
+       return raid_tag(raid_gen_ptr[5]);
+}
+
+const char *raid_rec1_tag(void)
+{
+       return raid_tag(raid_rec_ptr[0]);
+}
+
+const char *raid_rec2_tag(void)
+{
+       return raid_tag(raid_rec_ptr[1]);
+}
+
+const char *raid_recX_tag(void)
+{
+       return raid_tag(raid_rec_ptr[2]);
+}
+
diff --git a/raid/test.c b/raid/test.c
new file mode 100644 (file)
index 0000000..feb8a41
--- /dev/null
@@ -0,0 +1,452 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "cpu.h"
+#include "combo.h"
+#include "memory.h"
+
+/**
+ * Binomial coefficient of n over r.
+ */
+static int ibc(int n, int r)
+{
+       if (r == 0 || n == r)
+               return 1;
+       else
+               return ibc(n - 1, r - 1) + ibc(n - 1, r);
+}
+
+/**
+ * Power n ^ r;
+ */
+static int ipow(int n, int r)
+{
+       int v = 1;
+
+       while (r) {
+               v *= n;
+               --r;
+       }
+       return v;
+}
+
+int raid_test_combo(void)
+{
+       int r;
+       int count;
+       int p[RAID_PARITY_MAX];
+
+       for (r = 1; r <= RAID_PARITY_MAX; ++r) {
+               /* count combination (r of RAID_PARITY_MAX) elements */
+               count = 0;
+               combination_first(r, RAID_PARITY_MAX, p);
+
+               do {
+                       ++count;
+               } while (combination_next(r, RAID_PARITY_MAX, p));
+
+               if (count != ibc(RAID_PARITY_MAX, r)) {
+                       /* LCOV_EXCL_START */
+                       return -1;
+                       /* LCOV_EXCL_STOP */
+               }
+       }
+
+       for (r = 1; r <= RAID_PARITY_MAX; ++r) {
+               /* count permutation (r of RAID_PARITY_MAX) elements */
+               count = 0;
+               permutation_first(r, RAID_PARITY_MAX, p);
+
+               do {
+                       ++count;
+               } while (permutation_next(r, RAID_PARITY_MAX, p));
+
+               if (count != ipow(RAID_PARITY_MAX, r)) {
+                       /* LCOV_EXCL_START */
+                       return -1;
+                       /* LCOV_EXCL_STOP */
+               }
+       }
+
+       return 0;
+}
+
+int raid_test_insert(void)
+{
+       int p[RAID_PARITY_MAX];
+       int r;
+
+       for (r = 1; r <= RAID_PARITY_MAX; ++r) {
+               permutation_first(r, RAID_PARITY_MAX, p);
+               do {
+                       int i[RAID_PARITY_MAX];
+                       int j;
+
+                       /* insert in order */
+                       for (j = 0; j < r; ++j)
+                               raid_insert(j, i, p[j]);
+
+                       /* check order */
+                       for (j = 1; j < r; ++j) {
+                               if (i[j - 1] > i[j]) {
+                                       /* LCOV_EXCL_START */
+                                       return -1;
+                                       /* LCOV_EXCL_STOP */
+                               }
+                       }
+               } while (permutation_next(r, RAID_PARITY_MAX, p));
+       }
+
+       return 0;
+}
+
+int raid_test_sort(void)
+{
+       int p[RAID_PARITY_MAX];
+       int r;
+
+       for (r = 1; r <= RAID_PARITY_MAX; ++r) {
+               permutation_first(r, RAID_PARITY_MAX, p);
+               do {
+                       int i[RAID_PARITY_MAX];
+                       int j;
+
+                       /* make a copy */
+                       for (j = 0; j < r; ++j)
+                               i[j] = p[j];
+
+                       raid_sort(r, i);
+
+                       /* check order */
+                       for (j = 1; j < r; ++j) {
+                               if (i[j - 1] > i[j]) {
+                                       /* LCOV_EXCL_START */
+                                       return -1;
+                                       /* LCOV_EXCL_STOP */
+                               }
+                       }
+               } while (permutation_next(r, RAID_PARITY_MAX, p));
+       }
+
+       return 0;
+}
+
+int raid_test_rec(int mode, int nd, size_t size)
+{
+       void (*f[RAID_PARITY_MAX][4])(
+               int nr, int *id, int *ip, int nd, size_t size, void **vbuf);
+       void *v_alloc;
+       void **v;
+       void **data;
+       void **parity;
+       void **test;
+       void *data_save[RAID_PARITY_MAX];
+       void *parity_save[RAID_PARITY_MAX];
+       void *waste;
+       int nv;
+       int id[RAID_PARITY_MAX];
+       int ip[RAID_PARITY_MAX];
+       int i;
+       int j;
+       int nr;
+       int nf[RAID_PARITY_MAX];
+       int np;
+
+       raid_mode(mode);
+       if (mode == RAID_MODE_CAUCHY)
+               np = RAID_PARITY_MAX;
+       else
+               np = 3;
+
+       nv = nd + np * 2 + 2;
+
+       v = raid_malloc_vector(nd, nv, size, &v_alloc);
+       if (!v) {
+               /* LCOV_EXCL_START */
+               return -1;
+               /* LCOV_EXCL_STOP */
+       }
+
+       data = v;
+       parity = v + nd;
+       test = v + nd + np;
+
+       for (i = 0; i < np; ++i)
+               parity_save[i] = parity[i];
+
+       memset(v[nv - 2], 0, size);
+       raid_zero(v[nv - 2]);
+
+       waste = v[nv - 1];
+
+       /* fill with pseudo-random data with the arbitrary seed "1" */
+       raid_mrand_vector(1, nd, size, v);
+
+       /* setup recov functions */
+       for (i = 0; i < np; ++i) {
+               nf[i] = 0;
+               if (i == 0) {
+                       f[i][nf[i]++] = raid_rec1_int8;
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSSE3
+                       if (raid_cpu_has_ssse3())
+                               f[i][nf[i]++] = raid_rec1_ssse3;
+#endif
+#ifdef CONFIG_AVX2
+                       if (raid_cpu_has_avx2())
+                               f[i][nf[i]++] = raid_rec1_avx2;
+#endif
+#endif
+               } else if (i == 1) {
+                       f[i][nf[i]++] = raid_rec2_int8;
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSSE3
+                       if (raid_cpu_has_ssse3())
+                               f[i][nf[i]++] = raid_rec2_ssse3;
+#endif
+#ifdef CONFIG_AVX2
+                       if (raid_cpu_has_avx2())
+                               f[i][nf[i]++] = raid_rec2_avx2;
+#endif
+#endif
+               } else {
+                       f[i][nf[i]++] = raid_recX_int8;
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSSE3
+                       if (raid_cpu_has_ssse3())
+                               f[i][nf[i]++] = raid_recX_ssse3;
+#endif
+#ifdef CONFIG_AVX2
+                       if (raid_cpu_has_avx2())
+                               f[i][nf[i]++] = raid_recX_avx2;
+#endif
+#endif
+               }
+       }
+
+       /* compute the parity */
+       raid_gen_ref(nd, np, size, v);
+
+       /* set all the parity to the waste v */
+       for (i = 0; i < np; ++i)
+               parity[i] = waste;
+
+       /* all parity levels */
+       for (nr = 1; nr <= np; ++nr) {
+               /* all combinations (nr of nd) disks */
+               combination_first(nr, nd, id);
+               do {
+                       /* all combinations (nr of np) parities */
+                       combination_first(nr, np, ip);
+                       do {
+                               /* for each recover function */
+                               for (j = 0; j < nf[nr - 1]; ++j) {
+                                       /* set */
+                                       for (i = 0; i < nr; ++i) {
+                                               /* remove the missing data */
+                                               data_save[i] = data[id[i]];
+                                               data[id[i]] = test[i];
+                                               /* set the parity to use */
+                                               parity[ip[i]] = parity_save[ip[i]];
+                                       }
+
+                                       /* recover */
+                                       f[nr - 1][j](nr, id, ip, nd, size, v);
+
+                                       /* check */
+                                       for (i = 0; i < nr; ++i) {
+                                               if (memcmp(test[i], data_save[i], size) != 0) {
+                                                       /* LCOV_EXCL_START */
+                                                       goto bail;
+                                                       /* LCOV_EXCL_STOP */
+                                               }
+                                       }
+
+                                       /* restore */
+                                       for (i = 0; i < nr; ++i) {
+                                               /* restore the data */
+                                               data[id[i]] = data_save[i];
+                                               /* restore the parity */
+                                               parity[ip[i]] = waste;
+                                       }
+                               }
+                       } while (combination_next(nr, np, ip));
+               } while (combination_next(nr, nd, id));
+       }
+
+       free(v_alloc);
+       free(v);
+       return 0;
+
+bail:
+       /* LCOV_EXCL_START */
+       free(v_alloc);
+       free(v);
+       return -1;
+       /* LCOV_EXCL_STOP */
+}
+
+int raid_test_par(int mode, int nd, size_t size)
+{
+       void (*f[64])(int nd, size_t size, void **vbuf);
+       void *v_alloc;
+       void **v;
+       int nv;
+       int i, j;
+       int nf;
+       int np;
+
+       raid_mode(mode);
+       if (mode == RAID_MODE_CAUCHY)
+               np = RAID_PARITY_MAX;
+       else
+               np = 3;
+
+       nv = nd + np * 2;
+
+       v = raid_malloc_vector(nd, nv, size, &v_alloc);
+       if (!v) {
+               /* LCOV_EXCL_START */
+               return -1;
+               /* LCOV_EXCL_STOP */
+       }
+
+       /* check memory */
+       if (raid_mtest_vector(nv, size, v) != 0) {
+               /* LCOV_EXCL_START */
+               goto bail;
+               /* LCOV_EXCL_STOP */
+       }
+
+       /* fill with pseudo-random data with the arbitrary seed "2" */
+       raid_mrand_vector(2, nv, size, v);
+
+       /* compute the parity */
+       raid_gen_ref(nd, np, size, v);
+
+       /* copy in back buffers */
+       for (i = 0; i < np; ++i)
+               memcpy(v[nd + np + i], v[nd + i], size);
+
+       /* load all the available functions */
+       nf = 0;
+
+       f[nf++] = raid_gen1_int32;
+       f[nf++] = raid_gen1_int64;
+       f[nf++] = raid_gen2_int32;
+       f[nf++] = raid_gen2_int64;
+
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSE2
+       if (raid_cpu_has_sse2()) {
+               f[nf++] = raid_gen1_sse2;
+               f[nf++] = raid_gen2_sse2;
+#ifdef CONFIG_X86_64
+               f[nf++] = raid_gen2_sse2ext;
+#endif
+       }
+#endif
+
+#ifdef CONFIG_AVX2
+       if (raid_cpu_has_avx2()) {
+               f[nf++] = raid_gen1_avx2;
+               f[nf++] = raid_gen2_avx2;
+       }
+#endif
+#endif /* CONFIG_X86 */
+
+       if (mode == RAID_MODE_CAUCHY) {
+               f[nf++] = raid_gen3_int8;
+               f[nf++] = raid_gen4_int8;
+               f[nf++] = raid_gen5_int8;
+               f[nf++] = raid_gen6_int8;
+
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSSE3
+               if (raid_cpu_has_ssse3()) {
+                       f[nf++] = raid_gen3_ssse3;
+                       f[nf++] = raid_gen4_ssse3;
+                       f[nf++] = raid_gen5_ssse3;
+                       f[nf++] = raid_gen6_ssse3;
+#ifdef CONFIG_X86_64
+                       f[nf++] = raid_gen3_ssse3ext;
+                       f[nf++] = raid_gen4_ssse3ext;
+                       f[nf++] = raid_gen5_ssse3ext;
+                       f[nf++] = raid_gen6_ssse3ext;
+#endif
+               }
+#endif
+
+#ifdef CONFIG_AVX2
+#ifdef CONFIG_X86_64
+               if (raid_cpu_has_avx2()) {
+                       f[nf++] = raid_gen3_avx2ext;
+                       f[nf++] = raid_gen4_avx2ext;
+                       f[nf++] = raid_gen5_avx2ext;
+                       f[nf++] = raid_gen6_avx2ext;
+               }
+#endif
+#endif
+#endif /* CONFIG_X86 */
+       } else {
+               f[nf++] = raid_genz_int32;
+               f[nf++] = raid_genz_int64;
+
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSE2
+               if (raid_cpu_has_sse2()) {
+                       f[nf++] = raid_genz_sse2;
+#ifdef CONFIG_X86_64
+                       f[nf++] = raid_genz_sse2ext;
+#endif
+               }
+#endif
+
+#ifdef CONFIG_AVX2
+#ifdef CONFIG_X86_64
+               if (raid_cpu_has_avx2())
+                       f[nf++] = raid_genz_avx2ext;
+#endif
+#endif
+#endif /* CONFIG_X86 */
+       }
+
+       /* check all the functions */
+       for (j = 0; j < nf; ++j) {
+               /* compute parity */
+               f[j](nd, size, v);
+
+               /* check it */
+               for (i = 0; i < np; ++i) {
+                       if (memcmp(v[nd + np + i], v[nd + i], size) != 0) {
+                               /* LCOV_EXCL_START */
+                               goto bail;
+                               /* LCOV_EXCL_STOP */
+                       }
+               }
+       }
+
+       free(v_alloc);
+       free(v);
+       return 0;
+
+bail:
+       /* LCOV_EXCL_START */
+       free(v_alloc);
+       free(v);
+       return -1;
+       /* LCOV_EXCL_STOP */
+}
+
diff --git a/raid/test.h b/raid/test.h
new file mode 100644 (file)
index 0000000..6d902c7
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_TEST_H
+#define __RAID_TEST_H
+
+/**
+ * Tests insertion function.
+ *
+ * Test raid_insert() with all the possible combinations of elements to insert.
+ *
+ * Returns 0 on success.
+ */
+int raid_test_insert(void);
+
+/**
+ * Tests sorting function.
+ *
+ * Test raid_sort() with all the possible combinations of elements to sort.
+ *
+ * Returns 0 on success.
+ */
+int raid_test_sort(void);
+
+/**
+ * Tests combination functions.
+ *
+ * Tests combination_first() and combination_next() for all the parity levels.
+ *
+ * Returns 0 on success.
+ */
+int raid_test_combo(void);
+
+/**
+ * Tests recovering functions.
+ *
+ * All the recovering functions are tested with all the combinations
+ * of failing disks and recovering parities.
+ *
+ * Take care that the test time grows exponentially with the number of disks.
+ *
+ * Returns 0 on success.
+ */
+int raid_test_rec(unsigned mode, int nd, size_t size);
+
+/**
+ * Tests parity generation functions.
+ *
+ * All the parity generation functions are tested with the specified
+ * number of disks.
+ *
+ * Returns 0 on success.
+ */
+int raid_test_par(unsigned mode, int nd, size_t size);
+
+#endif
+
diff --git a/raid/x86.c b/raid/x86.c
new file mode 100644 (file)
index 0000000..84b12c1
--- /dev/null
@@ -0,0 +1,2452 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "gf.h"
+
+/*
+ * For x86 optimizations you can see:
+ *
+ * Software optimization resources
+ * http://www.agner.org/optimize/
+ *
+ * x86, x64 Instruction Latency, Memory Latency and CPUID dumps
+ * http://users.atw.hu/instlatx64/
+ */
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
+/*
+ * GEN1 (RAID5 with xor) SSE2 implementation
+ *
+ * Intentionally don't process more than 64 bytes because 64 is the typical
+ * cache block, and processing 128 bytes doesn't increase performance, and in
+ * some cases it even decreases it.
+ */
+void raid_gen1_sse2(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+
+       raid_sse_begin();
+
+       for (i = 0; i < size; i += 64) {
+               asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
+               asm volatile ("movdqa %0,%%xmm1" : : "m" (v[l][i + 16]));
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (v[l][i + 32]));
+               asm volatile ("movdqa %0,%%xmm3" : : "m" (v[l][i + 48]));
+               for (d = l - 1; d >= 0; --d) {
+                       asm volatile ("pxor %0,%%xmm0" : : "m" (v[d][i]));
+                       asm volatile ("pxor %0,%%xmm1" : : "m" (v[d][i + 16]));
+                       asm volatile ("pxor %0,%%xmm2" : : "m" (v[d][i + 32]));
+                       asm volatile ("pxor %0,%%xmm3" : : "m" (v[d][i + 48]));
+               }
+               asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+               asm volatile ("movntdq %%xmm1,%0" : "=m" (p[i + 16]));
+               asm volatile ("movntdq %%xmm2,%0" : "=m" (p[i + 32]));
+               asm volatile ("movntdq %%xmm3,%0" : "=m" (p[i + 48]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_AVX2)
+/*
+ * GEN1 (RAID5 with xor) AVX2 implementation
+ *
+ * Intentionally don't process more than 64 bytes because 64 is the typical
+ * cache block, and processing 128 bytes doesn't increase performance, and in
+ * some cases it even decreases it.
+ */
+void raid_gen1_avx2(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+
+       raid_avx_begin();
+
+       for (i = 0; i < size; i += 64) {
+               asm volatile ("vmovdqa %0,%%ymm0" : : "m" (v[l][i]));
+               asm volatile ("vmovdqa %0,%%ymm1" : : "m" (v[l][i + 32]));
+               for (d = l - 1; d >= 0; --d) {
+                       asm volatile ("vpxor %0,%%ymm0,%%ymm0" : : "m" (v[d][i]));
+                       asm volatile ("vpxor %0,%%ymm1,%%ymm1" : : "m" (v[d][i + 32]));
+               }
+               asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+               asm volatile ("vmovntdq %%ymm1,%0" : "=m" (p[i + 32]));
+       }
+
+       raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
+static const struct gfconst16 {
+       uint8_t poly[16];
+       uint8_t low4[16];
+} gfconst16 __aligned(32) = {
+       {
+               0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+               0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d
+       },
+       {
+               0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+               0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f
+       },
+};
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
+/*
+ * GEN2 (RAID6 with powers of 2) SSE2 implementation
+ */
+void raid_gen2_sse2(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+
+       raid_sse_begin();
+
+       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+
+       for (i = 0; i < size; i += 32) {
+               asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
+               asm volatile ("movdqa %0,%%xmm1" : : "m" (v[l][i + 16]));
+               asm volatile ("movdqa %xmm0,%xmm2");
+               asm volatile ("movdqa %xmm1,%xmm3");
+               for (d = l - 1; d >= 0; --d) {
+                       asm volatile ("pxor %xmm4,%xmm4");
+                       asm volatile ("pxor %xmm5,%xmm5");
+                       asm volatile ("pcmpgtb %xmm2,%xmm4");
+                       asm volatile ("pcmpgtb %xmm3,%xmm5");
+                       asm volatile ("paddb %xmm2,%xmm2");
+                       asm volatile ("paddb %xmm3,%xmm3");
+                       asm volatile ("pand %xmm7,%xmm4");
+                       asm volatile ("pand %xmm7,%xmm5");
+                       asm volatile ("pxor %xmm4,%xmm2");
+                       asm volatile ("pxor %xmm5,%xmm3");
+
+                       asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+                       asm volatile ("movdqa %0,%%xmm5" : : "m" (v[d][i + 16]));
+                       asm volatile ("pxor %xmm4,%xmm0");
+                       asm volatile ("pxor %xmm5,%xmm1");
+                       asm volatile ("pxor %xmm4,%xmm2");
+                       asm volatile ("pxor %xmm5,%xmm3");
+               }
+               asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+               asm volatile ("movntdq %%xmm1,%0" : "=m" (p[i + 16]));
+               asm volatile ("movntdq %%xmm2,%0" : "=m" (q[i]));
+               asm volatile ("movntdq %%xmm3,%0" : "=m" (q[i + 16]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_AVX2)
+/*
+ * GEN2 (RAID6 with powers of 2) AVX2 implementation
+ */
+void raid_gen2_avx2(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+
+       raid_avx_begin();
+
+       asm volatile ("vbroadcasti128 %0, %%ymm7" : : "m" (gfconst16.poly[0]));
+       asm volatile ("vpxor %ymm6,%ymm6,%ymm6");
+
+       for (i = 0; i < size; i += 64) {
+               asm volatile ("vmovdqa %0,%%ymm0" : : "m" (v[l][i]));
+               asm volatile ("vmovdqa %0,%%ymm1" : : "m" (v[l][i + 32]));
+               asm volatile ("vmovdqa %ymm0,%ymm2");
+               asm volatile ("vmovdqa %ymm1,%ymm3");
+               for (d = l - 1; d >= 0; --d) {
+                       asm volatile ("vpcmpgtb %ymm2,%ymm6,%ymm4");
+                       asm volatile ("vpcmpgtb %ymm3,%ymm6,%ymm5");
+                       asm volatile ("vpaddb %ymm2,%ymm2,%ymm2");
+                       asm volatile ("vpaddb %ymm3,%ymm3,%ymm3");
+                       asm volatile ("vpand %ymm7,%ymm4,%ymm4");
+                       asm volatile ("vpand %ymm7,%ymm5,%ymm5");
+                       asm volatile ("vpxor %ymm4,%ymm2,%ymm2");
+                       asm volatile ("vpxor %ymm5,%ymm3,%ymm3");
+
+                       asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[d][i]));
+                       asm volatile ("vmovdqa %0,%%ymm5" : : "m" (v[d][i + 32]));
+                       asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
+                       asm volatile ("vpxor %ymm5,%ymm1,%ymm1");
+                       asm volatile ("vpxor %ymm4,%ymm2,%ymm2");
+                       asm volatile ("vpxor %ymm5,%ymm3,%ymm3");
+               }
+               asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+               asm volatile ("vmovntdq %%ymm1,%0" : "=m" (p[i + 32]));
+               asm volatile ("vmovntdq %%ymm2,%0" : "=m" (q[i]));
+               asm volatile ("vmovntdq %%ymm3,%0" : "=m" (q[i + 32]));
+       }
+
+       raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SSE2)
+/*
+ * GEN2 (RAID6 with powers of 2) SSE2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen2_sse2ext(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+
+       raid_sse_begin();
+
+       asm volatile ("movdqa %0,%%xmm15" : : "m" (gfconst16.poly[0]));
+
+       for (i = 0; i < size; i += 64) {
+               asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
+               asm volatile ("movdqa %0,%%xmm1" : : "m" (v[l][i + 16]));
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (v[l][i + 32]));
+               asm volatile ("movdqa %0,%%xmm3" : : "m" (v[l][i + 48]));
+               asm volatile ("movdqa %xmm0,%xmm4");
+               asm volatile ("movdqa %xmm1,%xmm5");
+               asm volatile ("movdqa %xmm2,%xmm6");
+               asm volatile ("movdqa %xmm3,%xmm7");
+               for (d = l - 1; d >= 0; --d) {
+                       asm volatile ("pxor %xmm8,%xmm8");
+                       asm volatile ("pxor %xmm9,%xmm9");
+                       asm volatile ("pxor %xmm10,%xmm10");
+                       asm volatile ("pxor %xmm11,%xmm11");
+                       asm volatile ("pcmpgtb %xmm4,%xmm8");
+                       asm volatile ("pcmpgtb %xmm5,%xmm9");
+                       asm volatile ("pcmpgtb %xmm6,%xmm10");
+                       asm volatile ("pcmpgtb %xmm7,%xmm11");
+                       asm volatile ("paddb %xmm4,%xmm4");
+                       asm volatile ("paddb %xmm5,%xmm5");
+                       asm volatile ("paddb %xmm6,%xmm6");
+                       asm volatile ("paddb %xmm7,%xmm7");
+                       asm volatile ("pand %xmm15,%xmm8");
+                       asm volatile ("pand %xmm15,%xmm9");
+                       asm volatile ("pand %xmm15,%xmm10");
+                       asm volatile ("pand %xmm15,%xmm11");
+                       asm volatile ("pxor %xmm8,%xmm4");
+                       asm volatile ("pxor %xmm9,%xmm5");
+                       asm volatile ("pxor %xmm10,%xmm6");
+                       asm volatile ("pxor %xmm11,%xmm7");
+
+                       asm volatile ("movdqa %0,%%xmm8" : : "m" (v[d][i]));
+                       asm volatile ("movdqa %0,%%xmm9" : : "m" (v[d][i + 16]));
+                       asm volatile ("movdqa %0,%%xmm10" : : "m" (v[d][i + 32]));
+                       asm volatile ("movdqa %0,%%xmm11" : : "m" (v[d][i + 48]));
+                       asm volatile ("pxor %xmm8,%xmm0");
+                       asm volatile ("pxor %xmm9,%xmm1");
+                       asm volatile ("pxor %xmm10,%xmm2");
+                       asm volatile ("pxor %xmm11,%xmm3");
+                       asm volatile ("pxor %xmm8,%xmm4");
+                       asm volatile ("pxor %xmm9,%xmm5");
+                       asm volatile ("pxor %xmm10,%xmm6");
+                       asm volatile ("pxor %xmm11,%xmm7");
+               }
+               asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+               asm volatile ("movntdq %%xmm1,%0" : "=m" (p[i + 16]));
+               asm volatile ("movntdq %%xmm2,%0" : "=m" (p[i + 32]));
+               asm volatile ("movntdq %%xmm3,%0" : "=m" (p[i + 48]));
+               asm volatile ("movntdq %%xmm4,%0" : "=m" (q[i]));
+               asm volatile ("movntdq %%xmm5,%0" : "=m" (q[i + 16]));
+               asm volatile ("movntdq %%xmm6,%0" : "=m" (q[i + 32]));
+               asm volatile ("movntdq %%xmm7,%0" : "=m" (q[i + 48]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * GEN3 (triple parity with Cauchy matrix) SSSE3 implementation
+ */
+void raid_gen3_ssse3(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+
+       /* special case with only one data disk */
+       if (l == 0) {
+               for (i = 0; i < 3; ++i)
+                       memcpy(v[1 + i], v[0], size);
+               return;
+       }
+
+       raid_sse_begin();
+
+       /* generic case with at least two data disks */
+       asm volatile ("movdqa %0,%%xmm3" : : "m" (gfconst16.poly[0]));
+       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+
+       for (i = 0; i < size; i += 16) {
+               /* last disk without the by two multiplication */
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (v[l][i]));
+
+               asm volatile ("movdqa %xmm4,%xmm0");
+               asm volatile ("movdqa %xmm4,%xmm1");
+
+               asm volatile ("movdqa %xmm4,%xmm5");
+               asm volatile ("psrlw  $4,%xmm5");
+               asm volatile ("pand   %xmm7,%xmm4");
+               asm volatile ("pand   %xmm7,%xmm5");
+
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][0][0][0]));
+               asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[l][0][1][0]));
+               asm volatile ("pshufb %xmm4,%xmm2");
+               asm volatile ("pshufb %xmm5,%xmm6");
+               asm volatile ("pxor   %xmm6,%xmm2");
+
+               /* intermediate disks */
+               for (d = l - 1; d > 0; --d) {
+                       asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+
+                       asm volatile ("pxor %xmm5,%xmm5");
+                       asm volatile ("pcmpgtb %xmm1,%xmm5");
+                       asm volatile ("paddb %xmm1,%xmm1");
+                       asm volatile ("pand %xmm3,%xmm5");
+                       asm volatile ("pxor %xmm5,%xmm1");
+
+                       asm volatile ("pxor %xmm4,%xmm0");
+                       asm volatile ("pxor %xmm4,%xmm1");
+
+                       asm volatile ("movdqa %xmm4,%xmm5");
+                       asm volatile ("psrlw  $4,%xmm5");
+                       asm volatile ("pand   %xmm7,%xmm4");
+                       asm volatile ("pand   %xmm7,%xmm5");
+
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][0][0]));
+                       asm volatile ("pshufb %xmm4,%xmm6");
+                       asm volatile ("pxor   %xmm6,%xmm2");
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][1][0]));
+                       asm volatile ("pshufb %xmm5,%xmm6");
+                       asm volatile ("pxor   %xmm6,%xmm2");
+               }
+
+               /* first disk with all coefficients at 1 */
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (v[0][i]));
+
+               asm volatile ("pxor %xmm5,%xmm5");
+               asm volatile ("pcmpgtb %xmm1,%xmm5");
+               asm volatile ("paddb %xmm1,%xmm1");
+               asm volatile ("pand %xmm3,%xmm5");
+               asm volatile ("pxor %xmm5,%xmm1");
+
+               asm volatile ("pxor %xmm4,%xmm0");
+               asm volatile ("pxor %xmm4,%xmm1");
+               asm volatile ("pxor %xmm4,%xmm2");
+
+               asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+               asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+               asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SSSE3)
+/*
+ * GEN3 (triple parity with Cauchy matrix) SSSE3 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen3_ssse3ext(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+
+       /* special case with only one data disk */
+       if (l == 0) {
+               for (i = 0; i < 3; ++i)
+                       memcpy(v[1 + i], v[0], size);
+               return;
+       }
+
+       raid_sse_begin();
+
+       /* generic case with at least two data disks */
+       asm volatile ("movdqa %0,%%xmm3" : : "m" (gfconst16.poly[0]));
+       asm volatile ("movdqa %0,%%xmm11" : : "m" (gfconst16.low4[0]));
+
+       for (i = 0; i < size; i += 32) {
+               /* last disk without the by two multiplication */
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (v[l][i]));
+               asm volatile ("movdqa %0,%%xmm12" : : "m" (v[l][i + 16]));
+
+               asm volatile ("movdqa %xmm4,%xmm0");
+               asm volatile ("movdqa %xmm4,%xmm1");
+               asm volatile ("movdqa %xmm12,%xmm8");
+               asm volatile ("movdqa %xmm12,%xmm9");
+
+               asm volatile ("movdqa %xmm4,%xmm5");
+               asm volatile ("movdqa %xmm12,%xmm13");
+               asm volatile ("psrlw  $4,%xmm5");
+               asm volatile ("psrlw  $4,%xmm13");
+               asm volatile ("pand   %xmm11,%xmm4");
+               asm volatile ("pand   %xmm11,%xmm12");
+               asm volatile ("pand   %xmm11,%xmm5");
+               asm volatile ("pand   %xmm11,%xmm13");
+
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][0][0][0]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][0][1][0]));
+               asm volatile ("movdqa %xmm2,%xmm10");
+               asm volatile ("movdqa %xmm7,%xmm15");
+               asm volatile ("pshufb %xmm4,%xmm2");
+               asm volatile ("pshufb %xmm12,%xmm10");
+               asm volatile ("pshufb %xmm5,%xmm7");
+               asm volatile ("pshufb %xmm13,%xmm15");
+               asm volatile ("pxor   %xmm7,%xmm2");
+               asm volatile ("pxor   %xmm15,%xmm10");
+
+               /* intermediate disks */
+               for (d = l - 1; d > 0; --d) {
+                       asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+                       asm volatile ("movdqa %0,%%xmm12" : : "m" (v[d][i + 16]));
+
+                       asm volatile ("pxor %xmm5,%xmm5");
+                       asm volatile ("pxor %xmm13,%xmm13");
+                       asm volatile ("pcmpgtb %xmm1,%xmm5");
+                       asm volatile ("pcmpgtb %xmm9,%xmm13");
+                       asm volatile ("paddb %xmm1,%xmm1");
+                       asm volatile ("paddb %xmm9,%xmm9");
+                       asm volatile ("pand %xmm3,%xmm5");
+                       asm volatile ("pand %xmm3,%xmm13");
+                       asm volatile ("pxor %xmm5,%xmm1");
+                       asm volatile ("pxor %xmm13,%xmm9");
+
+                       asm volatile ("pxor %xmm4,%xmm0");
+                       asm volatile ("pxor %xmm4,%xmm1");
+                       asm volatile ("pxor %xmm12,%xmm8");
+                       asm volatile ("pxor %xmm12,%xmm9");
+
+                       asm volatile ("movdqa %xmm4,%xmm5");
+                       asm volatile ("movdqa %xmm12,%xmm13");
+                       asm volatile ("psrlw  $4,%xmm5");
+                       asm volatile ("psrlw  $4,%xmm13");
+                       asm volatile ("pand   %xmm11,%xmm4");
+                       asm volatile ("pand   %xmm11,%xmm12");
+                       asm volatile ("pand   %xmm11,%xmm5");
+                       asm volatile ("pand   %xmm11,%xmm13");
+
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][0][0]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][0][1][0]));
+                       asm volatile ("movdqa %xmm6,%xmm14");
+                       asm volatile ("movdqa %xmm7,%xmm15");
+                       asm volatile ("pshufb %xmm4,%xmm6");
+                       asm volatile ("pshufb %xmm12,%xmm14");
+                       asm volatile ("pshufb %xmm5,%xmm7");
+                       asm volatile ("pshufb %xmm13,%xmm15");
+                       asm volatile ("pxor   %xmm6,%xmm2");
+                       asm volatile ("pxor   %xmm14,%xmm10");
+                       asm volatile ("pxor   %xmm7,%xmm2");
+                       asm volatile ("pxor   %xmm15,%xmm10");
+               }
+
+               /* first disk with all coefficients at 1 */
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (v[0][i]));
+               asm volatile ("movdqa %0,%%xmm12" : : "m" (v[0][i + 16]));
+
+               asm volatile ("pxor %xmm5,%xmm5");
+               asm volatile ("pxor %xmm13,%xmm13");
+               asm volatile ("pcmpgtb %xmm1,%xmm5");
+               asm volatile ("pcmpgtb %xmm9,%xmm13");
+               asm volatile ("paddb %xmm1,%xmm1");
+               asm volatile ("paddb %xmm9,%xmm9");
+               asm volatile ("pand %xmm3,%xmm5");
+               asm volatile ("pand %xmm3,%xmm13");
+               asm volatile ("pxor %xmm5,%xmm1");
+               asm volatile ("pxor %xmm13,%xmm9");
+
+               asm volatile ("pxor %xmm4,%xmm0");
+               asm volatile ("pxor %xmm4,%xmm1");
+               asm volatile ("pxor %xmm4,%xmm2");
+               asm volatile ("pxor %xmm12,%xmm8");
+               asm volatile ("pxor %xmm12,%xmm9");
+               asm volatile ("pxor %xmm12,%xmm10");
+
+               asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+               asm volatile ("movntdq %%xmm8,%0" : "=m" (p[i + 16]));
+               asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+               asm volatile ("movntdq %%xmm9,%0" : "=m" (q[i + 16]));
+               asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+               asm volatile ("movntdq %%xmm10,%0" : "=m" (r[i + 16]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_AVX2)
+/*
+ * GEN3 (triple parity with Cauchy matrix) AVX2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen3_avx2ext(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+
+       /* special case with only one data disk */
+       if (l == 0) {
+               for (i = 0; i < 3; ++i)
+                       memcpy(v[1 + i], v[0], size);
+               return;
+       }
+
+       raid_avx_begin();
+
+       /* generic case with at least two data disks */
+       asm volatile ("vbroadcasti128 %0, %%ymm3" : : "m" (gfconst16.poly[0]));
+       asm volatile ("vbroadcasti128 %0, %%ymm11" : : "m" (gfconst16.low4[0]));
+
+       for (i = 0; i < size; i += 64) {
+               /* last disk without the by two multiplication */
+               asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[l][i]));
+               asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[l][i + 32]));
+
+               asm volatile ("vmovdqa %ymm4,%ymm0");
+               asm volatile ("vmovdqa %ymm4,%ymm1");
+               asm volatile ("vmovdqa %ymm12,%ymm8");
+               asm volatile ("vmovdqa %ymm12,%ymm9");
+
+               asm volatile ("vpsrlw  $4,%ymm4,%ymm5");
+               asm volatile ("vpsrlw  $4,%ymm12,%ymm13");
+               asm volatile ("vpand   %ymm11,%ymm4,%ymm4");
+               asm volatile ("vpand   %ymm11,%ymm12,%ymm12");
+               asm volatile ("vpand   %ymm11,%ymm5,%ymm5");
+               asm volatile ("vpand   %ymm11,%ymm13,%ymm13");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm10" : : "m" (gfgenpshufb[l][0][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfgenpshufb[l][0][1][0]));
+               asm volatile ("vpshufb %ymm4,%ymm10,%ymm2");
+               asm volatile ("vpshufb %ymm12,%ymm10,%ymm10");
+               asm volatile ("vpshufb %ymm5,%ymm15,%ymm7");
+               asm volatile ("vpshufb %ymm13,%ymm15,%ymm15");
+               asm volatile ("vpxor   %ymm7,%ymm2,%ymm2");
+               asm volatile ("vpxor   %ymm15,%ymm10,%ymm10");
+
+               /* intermediate disks */
+               for (d = l - 1; d > 0; --d) {
+                       asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[d][i]));
+                       asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[d][i + 32]));
+
+                       asm volatile ("vpxor %ymm5,%ymm5,%ymm5");
+                       asm volatile ("vpxor %ymm13,%ymm13,%ymm13");
+                       asm volatile ("vpcmpgtb %ymm1,%ymm5,%ymm5");
+                       asm volatile ("vpcmpgtb %ymm9,%ymm13,%ymm13");
+                       asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+                       asm volatile ("vpaddb %ymm9,%ymm9,%ymm9");
+                       asm volatile ("vpand %ymm3,%ymm5,%ymm5");
+                       asm volatile ("vpand %ymm3,%ymm13,%ymm13");
+                       asm volatile ("vpxor %ymm5,%ymm1,%ymm1");
+                       asm volatile ("vpxor %ymm13,%ymm9,%ymm9");
+
+                       asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
+                       asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+                       asm volatile ("vpxor %ymm12,%ymm8,%ymm8");
+                       asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+
+                       asm volatile ("vpsrlw  $4,%ymm4,%ymm5");
+                       asm volatile ("vpsrlw  $4,%ymm12,%ymm13");
+                       asm volatile ("vpand   %ymm11,%ymm4,%ymm4");
+                       asm volatile ("vpand   %ymm11,%ymm12,%ymm12");
+                       asm volatile ("vpand   %ymm11,%ymm5,%ymm5");
+                       asm volatile ("vpand   %ymm11,%ymm13,%ymm13");
+
+                       asm volatile ("vbroadcasti128 %0,%%ymm14" : : "m" (gfgenpshufb[d][0][0][0]));
+                       asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfgenpshufb[d][0][1][0]));
+                       asm volatile ("vpshufb %ymm4,%ymm14,%ymm6");
+                       asm volatile ("vpshufb %ymm12,%ymm14,%ymm14");
+                       asm volatile ("vpshufb %ymm5,%ymm15,%ymm7");
+                       asm volatile ("vpshufb %ymm13,%ymm15,%ymm15");
+                       asm volatile ("vpxor   %ymm6,%ymm2,%ymm2");
+                       asm volatile ("vpxor   %ymm14,%ymm10,%ymm10");
+                       asm volatile ("vpxor   %ymm7,%ymm2,%ymm2");
+                       asm volatile ("vpxor   %ymm15,%ymm10,%ymm10");
+               }
+
+               /* first disk with all coefficients at 1 */
+               asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[0][i]));
+               asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[0][i + 32]));
+
+               asm volatile ("vpxor %ymm5,%ymm5,%ymm5");
+               asm volatile ("vpxor %ymm13,%ymm13,%ymm13");
+               asm volatile ("vpcmpgtb %ymm1,%ymm5,%ymm5");
+               asm volatile ("vpcmpgtb %ymm9,%ymm13,%ymm13");
+               asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+               asm volatile ("vpaddb %ymm9,%ymm9,%ymm9");
+               asm volatile ("vpand %ymm3,%ymm5,%ymm5");
+               asm volatile ("vpand %ymm3,%ymm13,%ymm13");
+               asm volatile ("vpxor %ymm5,%ymm1,%ymm1");
+               asm volatile ("vpxor %ymm13,%ymm9,%ymm9");
+
+               asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
+               asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+               asm volatile ("vpxor %ymm4,%ymm2,%ymm2");
+               asm volatile ("vpxor %ymm12,%ymm8,%ymm8");
+               asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+               asm volatile ("vpxor %ymm12,%ymm10,%ymm10");
+
+               asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+               asm volatile ("vmovntdq %%ymm8,%0" : "=m" (p[i + 32]));
+               asm volatile ("vmovntdq %%ymm1,%0" : "=m" (q[i]));
+               asm volatile ("vmovntdq %%ymm9,%0" : "=m" (q[i + 32]));
+               asm volatile ("vmovntdq %%ymm2,%0" : "=m" (r[i]));
+               asm volatile ("vmovntdq %%ymm10,%0" : "=m" (r[i + 32]));
+       }
+
+       raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * GEN4 (quad parity with Cauchy matrix) SSSE3 implementation
+ */
+void raid_gen4_ssse3(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       uint8_t *s;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+       s = v[nd + 3];
+
+       /* special case with only one data disk */
+       if (l == 0) {
+               for (i = 0; i < 4; ++i)
+                       memcpy(v[1 + i], v[0], size);
+               return;
+       }
+
+       raid_sse_begin();
+
+       /* generic case with at least two data disks */
+       for (i = 0; i < size; i += 16) {
+               /* last disk without the by two multiplication */
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (v[l][i]));
+
+               asm volatile ("movdqa %xmm4,%xmm0");
+               asm volatile ("movdqa %xmm4,%xmm1");
+
+               asm volatile ("movdqa %xmm4,%xmm5");
+               asm volatile ("psrlw  $4,%xmm5");
+               asm volatile ("pand   %xmm7,%xmm4");
+               asm volatile ("pand   %xmm7,%xmm5");
+
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][0][0][0]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][0][1][0]));
+               asm volatile ("pshufb %xmm4,%xmm2");
+               asm volatile ("pshufb %xmm5,%xmm7");
+               asm volatile ("pxor   %xmm7,%xmm2");
+
+               asm volatile ("movdqa %0,%%xmm3" : : "m" (gfgenpshufb[l][1][0][0]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][1][1][0]));
+               asm volatile ("pshufb %xmm4,%xmm3");
+               asm volatile ("pshufb %xmm5,%xmm7");
+               asm volatile ("pxor   %xmm7,%xmm3");
+
+               /* intermediate disks */
+               for (d = l - 1; d > 0; --d) {
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+                       asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+
+                       asm volatile ("pxor %xmm5,%xmm5");
+                       asm volatile ("pcmpgtb %xmm1,%xmm5");
+                       asm volatile ("paddb %xmm1,%xmm1");
+                       asm volatile ("pand %xmm7,%xmm5");
+                       asm volatile ("pxor %xmm5,%xmm1");
+
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+
+                       asm volatile ("pxor %xmm4,%xmm0");
+                       asm volatile ("pxor %xmm4,%xmm1");
+
+                       asm volatile ("movdqa %xmm4,%xmm5");
+                       asm volatile ("psrlw  $4,%xmm5");
+                       asm volatile ("pand   %xmm7,%xmm4");
+                       asm volatile ("pand   %xmm7,%xmm5");
+
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][0][0]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][0][1][0]));
+                       asm volatile ("pshufb %xmm4,%xmm6");
+                       asm volatile ("pshufb %xmm5,%xmm7");
+                       asm volatile ("pxor   %xmm6,%xmm2");
+                       asm volatile ("pxor   %xmm7,%xmm2");
+
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][1][0][0]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][1][1][0]));
+                       asm volatile ("pshufb %xmm4,%xmm6");
+                       asm volatile ("pshufb %xmm5,%xmm7");
+                       asm volatile ("pxor   %xmm6,%xmm3");
+                       asm volatile ("pxor   %xmm7,%xmm3");
+               }
+
+               /* first disk with all coefficients at 1 */
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (v[0][i]));
+
+               asm volatile ("pxor %xmm5,%xmm5");
+               asm volatile ("pcmpgtb %xmm1,%xmm5");
+               asm volatile ("paddb %xmm1,%xmm1");
+               asm volatile ("pand %xmm7,%xmm5");
+               asm volatile ("pxor %xmm5,%xmm1");
+
+               asm volatile ("pxor %xmm4,%xmm0");
+               asm volatile ("pxor %xmm4,%xmm1");
+               asm volatile ("pxor %xmm4,%xmm2");
+               asm volatile ("pxor %xmm4,%xmm3");
+
+               asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+               asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+               asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+               asm volatile ("movntdq %%xmm3,%0" : "=m" (s[i]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SSSE3)
+/*
+ * GEN4 (quad parity with Cauchy matrix) SSSE3 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen4_ssse3ext(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       uint8_t *s;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+       s = v[nd + 3];
+
+       /* special case with only one data disk */
+       if (l == 0) {
+               for (i = 0; i < 4; ++i)
+                       memcpy(v[1 + i], v[0], size);
+               return;
+       }
+
+       raid_sse_begin();
+
+       /* generic case with at least two data disks */
+       for (i = 0; i < size; i += 32) {
+               /* last disk without the by two multiplication */
+               asm volatile ("movdqa %0,%%xmm15" : : "m" (gfconst16.low4[0]));
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (v[l][i]));
+               asm volatile ("movdqa %0,%%xmm12" : : "m" (v[l][i + 16]));
+
+               asm volatile ("movdqa %xmm4,%xmm0");
+               asm volatile ("movdqa %xmm4,%xmm1");
+               asm volatile ("movdqa %xmm12,%xmm8");
+               asm volatile ("movdqa %xmm12,%xmm9");
+
+               asm volatile ("movdqa %xmm4,%xmm5");
+               asm volatile ("movdqa %xmm12,%xmm13");
+               asm volatile ("psrlw  $4,%xmm5");
+               asm volatile ("psrlw  $4,%xmm13");
+               asm volatile ("pand   %xmm15,%xmm4");
+               asm volatile ("pand   %xmm15,%xmm12");
+               asm volatile ("pand   %xmm15,%xmm5");
+               asm volatile ("pand   %xmm15,%xmm13");
+
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][0][0][0]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][0][1][0]));
+               asm volatile ("movdqa %xmm2,%xmm10");
+               asm volatile ("movdqa %xmm7,%xmm15");
+               asm volatile ("pshufb %xmm4,%xmm2");
+               asm volatile ("pshufb %xmm12,%xmm10");
+               asm volatile ("pshufb %xmm5,%xmm7");
+               asm volatile ("pshufb %xmm13,%xmm15");
+               asm volatile ("pxor   %xmm7,%xmm2");
+               asm volatile ("pxor   %xmm15,%xmm10");
+
+               asm volatile ("movdqa %0,%%xmm3" : : "m" (gfgenpshufb[l][1][0][0]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][1][1][0]));
+               asm volatile ("movdqa %xmm3,%xmm11");
+               asm volatile ("movdqa %xmm7,%xmm15");
+               asm volatile ("pshufb %xmm4,%xmm3");
+               asm volatile ("pshufb %xmm12,%xmm11");
+               asm volatile ("pshufb %xmm5,%xmm7");
+               asm volatile ("pshufb %xmm13,%xmm15");
+               asm volatile ("pxor   %xmm7,%xmm3");
+               asm volatile ("pxor   %xmm15,%xmm11");
+
+               /* intermediate disks */
+               for (d = l - 1; d > 0; --d) {
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+                       asm volatile ("movdqa %0,%%xmm15" : : "m" (gfconst16.low4[0]));
+                       asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+                       asm volatile ("movdqa %0,%%xmm12" : : "m" (v[d][i + 16]));
+
+                       asm volatile ("pxor %xmm5,%xmm5");
+                       asm volatile ("pxor %xmm13,%xmm13");
+                       asm volatile ("pcmpgtb %xmm1,%xmm5");
+                       asm volatile ("pcmpgtb %xmm9,%xmm13");
+                       asm volatile ("paddb %xmm1,%xmm1");
+                       asm volatile ("paddb %xmm9,%xmm9");
+                       asm volatile ("pand %xmm7,%xmm5");
+                       asm volatile ("pand %xmm7,%xmm13");
+                       asm volatile ("pxor %xmm5,%xmm1");
+                       asm volatile ("pxor %xmm13,%xmm9");
+
+                       asm volatile ("pxor %xmm4,%xmm0");
+                       asm volatile ("pxor %xmm4,%xmm1");
+                       asm volatile ("pxor %xmm12,%xmm8");
+                       asm volatile ("pxor %xmm12,%xmm9");
+
+                       asm volatile ("movdqa %xmm4,%xmm5");
+                       asm volatile ("movdqa %xmm12,%xmm13");
+                       asm volatile ("psrlw  $4,%xmm5");
+                       asm volatile ("psrlw  $4,%xmm13");
+                       asm volatile ("pand   %xmm15,%xmm4");
+                       asm volatile ("pand   %xmm15,%xmm12");
+                       asm volatile ("pand   %xmm15,%xmm5");
+                       asm volatile ("pand   %xmm15,%xmm13");
+
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][0][0]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][0][1][0]));
+                       asm volatile ("movdqa %xmm6,%xmm14");
+                       asm volatile ("movdqa %xmm7,%xmm15");
+                       asm volatile ("pshufb %xmm4,%xmm6");
+                       asm volatile ("pshufb %xmm12,%xmm14");
+                       asm volatile ("pshufb %xmm5,%xmm7");
+                       asm volatile ("pshufb %xmm13,%xmm15");
+                       asm volatile ("pxor   %xmm6,%xmm2");
+                       asm volatile ("pxor   %xmm14,%xmm10");
+                       asm volatile ("pxor   %xmm7,%xmm2");
+                       asm volatile ("pxor   %xmm15,%xmm10");
+
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][1][0][0]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][1][1][0]));
+                       asm volatile ("movdqa %xmm6,%xmm14");
+                       asm volatile ("movdqa %xmm7,%xmm15");
+                       asm volatile ("pshufb %xmm4,%xmm6");
+                       asm volatile ("pshufb %xmm12,%xmm14");
+                       asm volatile ("pshufb %xmm5,%xmm7");
+                       asm volatile ("pshufb %xmm13,%xmm15");
+                       asm volatile ("pxor   %xmm6,%xmm3");
+                       asm volatile ("pxor   %xmm14,%xmm11");
+                       asm volatile ("pxor   %xmm7,%xmm3");
+                       asm volatile ("pxor   %xmm15,%xmm11");
+               }
+
+               /* first disk with all coefficients at 1 */
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+               asm volatile ("movdqa %0,%%xmm15" : : "m" (gfconst16.low4[0]));
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (v[0][i]));
+               asm volatile ("movdqa %0,%%xmm12" : : "m" (v[0][i + 16]));
+
+               asm volatile ("pxor %xmm5,%xmm5");
+               asm volatile ("pxor %xmm13,%xmm13");
+               asm volatile ("pcmpgtb %xmm1,%xmm5");
+               asm volatile ("pcmpgtb %xmm9,%xmm13");
+               asm volatile ("paddb %xmm1,%xmm1");
+               asm volatile ("paddb %xmm9,%xmm9");
+               asm volatile ("pand %xmm7,%xmm5");
+               asm volatile ("pand %xmm7,%xmm13");
+               asm volatile ("pxor %xmm5,%xmm1");
+               asm volatile ("pxor %xmm13,%xmm9");
+
+               asm volatile ("pxor %xmm4,%xmm0");
+               asm volatile ("pxor %xmm4,%xmm1");
+               asm volatile ("pxor %xmm4,%xmm2");
+               asm volatile ("pxor %xmm4,%xmm3");
+               asm volatile ("pxor %xmm12,%xmm8");
+               asm volatile ("pxor %xmm12,%xmm9");
+               asm volatile ("pxor %xmm12,%xmm10");
+               asm volatile ("pxor %xmm12,%xmm11");
+
+               asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+               asm volatile ("movntdq %%xmm8,%0" : "=m" (p[i + 16]));
+               asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+               asm volatile ("movntdq %%xmm9,%0" : "=m" (q[i + 16]));
+               asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+               asm volatile ("movntdq %%xmm10,%0" : "=m" (r[i + 16]));
+               asm volatile ("movntdq %%xmm3,%0" : "=m" (s[i]));
+               asm volatile ("movntdq %%xmm11,%0" : "=m" (s[i + 16]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_AVX2)
+/*
+ * GEN4 (quad parity with Cauchy matrix) AVX2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen4_avx2ext(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       uint8_t *s;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+       s = v[nd + 3];
+
+       /* special case with only one data disk */
+       if (l == 0) {
+               for (i = 0; i < 4; ++i)
+                       memcpy(v[1 + i], v[0], size);
+               return;
+       }
+
+       raid_avx_begin();
+
+       /* generic case with at least two data disks */
+       for (i = 0; i < size; i += 64) {
+               /* last disk without the by two multiplication */
+               asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfconst16.low4[0]));
+               asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[l][i]));
+               asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[l][i + 32]));
+
+               asm volatile ("vmovdqa %ymm4,%ymm0");
+               asm volatile ("vmovdqa %ymm4,%ymm1");
+               asm volatile ("vmovdqa %ymm12,%ymm8");
+               asm volatile ("vmovdqa %ymm12,%ymm9");
+
+               asm volatile ("vpsrlw  $4,%ymm4,%ymm5");
+               asm volatile ("vpsrlw  $4,%ymm12,%ymm13");
+               asm volatile ("vpand   %ymm15,%ymm4,%ymm4");
+               asm volatile ("vpand   %ymm15,%ymm12,%ymm12");
+               asm volatile ("vpand   %ymm15,%ymm5,%ymm5");
+               asm volatile ("vpand   %ymm15,%ymm13,%ymm13");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm10" : : "m" (gfgenpshufb[l][0][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfgenpshufb[l][0][1][0]));
+               asm volatile ("vpshufb %ymm4,%ymm10,%ymm2");
+               asm volatile ("vpshufb %ymm5,%ymm15,%ymm7");
+               asm volatile ("vpshufb %ymm12,%ymm10,%ymm10");
+               asm volatile ("vpshufb %ymm13,%ymm15,%ymm15");
+               asm volatile ("vpxor   %ymm7,%ymm2,%ymm2");
+               asm volatile ("vpxor   %ymm15,%ymm10,%ymm10");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm11" : : "m" (gfgenpshufb[l][1][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfgenpshufb[l][1][1][0]));
+               asm volatile ("vpshufb %ymm4,%ymm11,%ymm3");
+               asm volatile ("vpshufb %ymm5,%ymm15,%ymm7");
+               asm volatile ("vpshufb %ymm12,%ymm11,%ymm11");
+               asm volatile ("vpshufb %ymm13,%ymm15,%ymm15");
+               asm volatile ("vpxor   %ymm7,%ymm3,%ymm3");
+               asm volatile ("vpxor   %ymm15,%ymm11,%ymm11");
+
+               /* intermediate disks */
+               for (d = l - 1; d > 0; --d) {
+                       asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfconst16.poly[0]));
+                       asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfconst16.low4[0]));
+                       asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[d][i]));
+                       asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[d][i + 32]));
+
+                       asm volatile ("vpxor %ymm5,%ymm5,%ymm5");
+                       asm volatile ("vpxor %ymm13,%ymm13,%ymm13");
+                       asm volatile ("vpcmpgtb %ymm1,%ymm5,%ymm5");
+                       asm volatile ("vpcmpgtb %ymm9,%ymm13,%ymm13");
+                       asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+                       asm volatile ("vpaddb %ymm9,%ymm9,%ymm9");
+                       asm volatile ("vpand %ymm7,%ymm5,%ymm5");
+                       asm volatile ("vpand %ymm7,%ymm13,%ymm13");
+                       asm volatile ("vpxor %ymm5,%ymm1,%ymm1");
+                       asm volatile ("vpxor %ymm13,%ymm9,%ymm9");
+
+                       asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
+                       asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+                       asm volatile ("vpxor %ymm12,%ymm8,%ymm8");
+                       asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+
+                       asm volatile ("vpsrlw  $4,%ymm4,%ymm5");
+                       asm volatile ("vpsrlw  $4,%ymm12,%ymm13");
+                       asm volatile ("vpand   %ymm15,%ymm4,%ymm4");
+                       asm volatile ("vpand   %ymm15,%ymm12,%ymm12");
+                       asm volatile ("vpand   %ymm15,%ymm5,%ymm5");
+                       asm volatile ("vpand   %ymm15,%ymm13,%ymm13");
+
+                       asm volatile ("vbroadcasti128 %0,%%ymm14" : : "m" (gfgenpshufb[d][0][0][0]));
+                       asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfgenpshufb[d][0][1][0]));
+                       asm volatile ("vpshufb %ymm4,%ymm14,%ymm6");
+                       asm volatile ("vpshufb %ymm5,%ymm15,%ymm7");
+                       asm volatile ("vpshufb %ymm12,%ymm14,%ymm14");
+                       asm volatile ("vpshufb %ymm13,%ymm15,%ymm15");
+                       asm volatile ("vpxor   %ymm6,%ymm2,%ymm2");
+                       asm volatile ("vpxor   %ymm14,%ymm10,%ymm10");
+                       asm volatile ("vpxor   %ymm7,%ymm2,%ymm2");
+                       asm volatile ("vpxor   %ymm15,%ymm10,%ymm10");
+
+                       asm volatile ("vbroadcasti128 %0,%%ymm14" : : "m" (gfgenpshufb[d][1][0][0]));
+                       asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfgenpshufb[d][1][1][0]));
+                       asm volatile ("vpshufb %ymm4,%ymm14,%ymm6");
+                       asm volatile ("vpshufb %ymm5,%ymm15,%ymm7");
+                       asm volatile ("vpshufb %ymm12,%ymm14,%ymm14");
+                       asm volatile ("vpshufb %ymm13,%ymm15,%ymm15");
+                       asm volatile ("vpxor   %ymm6,%ymm3,%ymm3");
+                       asm volatile ("vpxor   %ymm14,%ymm11,%ymm11");
+                       asm volatile ("vpxor   %ymm7,%ymm3,%ymm3");
+                       asm volatile ("vpxor   %ymm15,%ymm11,%ymm11");
+               }
+
+               /* first disk with all coefficients at 1 */
+               asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfconst16.poly[0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfconst16.low4[0]));
+               asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[0][i]));
+               asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[0][i + 32]));
+
+               asm volatile ("vpxor %ymm5,%ymm5,%ymm5");
+               asm volatile ("vpxor %ymm13,%ymm13,%ymm13");
+               asm volatile ("vpcmpgtb %ymm1,%ymm5,%ymm5");
+               asm volatile ("vpcmpgtb %ymm9,%ymm13,%ymm13");
+               asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+               asm volatile ("vpaddb %ymm9,%ymm9,%ymm9");
+               asm volatile ("vpand %ymm7,%ymm5,%ymm5");
+               asm volatile ("vpand %ymm7,%ymm13,%ymm13");
+               asm volatile ("vpxor %ymm5,%ymm1,%ymm1");
+               asm volatile ("vpxor %ymm13,%ymm9,%ymm9");
+
+               asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
+               asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+               asm volatile ("vpxor %ymm4,%ymm2,%ymm2");
+               asm volatile ("vpxor %ymm4,%ymm3,%ymm3");
+               asm volatile ("vpxor %ymm12,%ymm8,%ymm8");
+               asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+               asm volatile ("vpxor %ymm12,%ymm10,%ymm10");
+               asm volatile ("vpxor %ymm12,%ymm11,%ymm11");
+
+               asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+               asm volatile ("vmovntdq %%ymm8,%0" : "=m" (p[i + 32]));
+               asm volatile ("vmovntdq %%ymm1,%0" : "=m" (q[i]));
+               asm volatile ("vmovntdq %%ymm9,%0" : "=m" (q[i + 32]));
+               asm volatile ("vmovntdq %%ymm2,%0" : "=m" (r[i]));
+               asm volatile ("vmovntdq %%ymm10,%0" : "=m" (r[i + 32]));
+               asm volatile ("vmovntdq %%ymm3,%0" : "=m" (s[i]));
+               asm volatile ("vmovntdq %%ymm11,%0" : "=m" (s[i + 32]));
+       }
+
+       raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * GEN5 (penta parity with Cauchy matrix) SSSE3 implementation
+ */
+void raid_gen5_ssse3(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       uint8_t *s;
+       uint8_t *t;
+       int d, l;
+       size_t i;
+       uint8_t buffer[16+16];
+       uint8_t *pd = __align_ptr(buffer, 16);
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+       s = v[nd + 3];
+       t = v[nd + 4];
+
+       /* special case with only one data disk */
+       if (l == 0) {
+               for (i = 0; i < 5; ++i)
+                       memcpy(v[1 + i], v[0], size);
+               return;
+       }
+
+       raid_sse_begin();
+
+       /* generic case with at least two data disks */
+       for (i = 0; i < size; i += 16) {
+               /* last disk without the by two multiplication */
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (v[l][i]));
+
+               asm volatile ("movdqa %xmm4,%xmm0");
+               asm volatile ("movdqa %%xmm4,%0" : "=m" (pd[0]));
+
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+               asm volatile ("movdqa %xmm4,%xmm5");
+               asm volatile ("psrlw  $4,%xmm5");
+               asm volatile ("pand   %xmm7,%xmm4");
+               asm volatile ("pand   %xmm7,%xmm5");
+
+               asm volatile ("movdqa %0,%%xmm1" : : "m" (gfgenpshufb[l][0][0][0]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][0][1][0]));
+               asm volatile ("pshufb %xmm4,%xmm1");
+               asm volatile ("pshufb %xmm5,%xmm7");
+               asm volatile ("pxor   %xmm7,%xmm1");
+
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][1][0][0]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][1][1][0]));
+               asm volatile ("pshufb %xmm4,%xmm2");
+               asm volatile ("pshufb %xmm5,%xmm7");
+               asm volatile ("pxor   %xmm7,%xmm2");
+
+               asm volatile ("movdqa %0,%%xmm3" : : "m" (gfgenpshufb[l][2][0][0]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][2][1][0]));
+               asm volatile ("pshufb %xmm4,%xmm3");
+               asm volatile ("pshufb %xmm5,%xmm7");
+               asm volatile ("pxor   %xmm7,%xmm3");
+
+               /* intermediate disks */
+               for (d = l - 1; d > 0; --d) {
+                       asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (pd[0]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+
+                       asm volatile ("pxor %xmm5,%xmm5");
+                       asm volatile ("pcmpgtb %xmm0,%xmm5");
+                       asm volatile ("paddb %xmm0,%xmm0");
+                       asm volatile ("pand %xmm7,%xmm5");
+                       asm volatile ("pxor %xmm5,%xmm0");
+
+                       asm volatile ("pxor %xmm4,%xmm0");
+                       asm volatile ("pxor %xmm4,%xmm6");
+                       asm volatile ("movdqa %%xmm6,%0" : "=m" (pd[0]));
+
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+                       asm volatile ("movdqa %xmm4,%xmm5");
+                       asm volatile ("psrlw  $4,%xmm5");
+                       asm volatile ("pand   %xmm7,%xmm4");
+                       asm volatile ("pand   %xmm7,%xmm5");
+
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][0][0]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][0][1][0]));
+                       asm volatile ("pshufb %xmm4,%xmm6");
+                       asm volatile ("pshufb %xmm5,%xmm7");
+                       asm volatile ("pxor   %xmm6,%xmm1");
+                       asm volatile ("pxor   %xmm7,%xmm1");
+
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][1][0][0]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][1][1][0]));
+                       asm volatile ("pshufb %xmm4,%xmm6");
+                       asm volatile ("pshufb %xmm5,%xmm7");
+                       asm volatile ("pxor   %xmm6,%xmm2");
+                       asm volatile ("pxor   %xmm7,%xmm2");
+
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][2][0][0]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][2][1][0]));
+                       asm volatile ("pshufb %xmm4,%xmm6");
+                       asm volatile ("pshufb %xmm5,%xmm7");
+                       asm volatile ("pxor   %xmm6,%xmm3");
+                       asm volatile ("pxor   %xmm7,%xmm3");
+               }
+
+               /* first disk with all coefficients at 1 */
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (v[0][i]));
+               asm volatile ("movdqa %0,%%xmm6" : : "m" (pd[0]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+
+               asm volatile ("pxor %xmm5,%xmm5");
+               asm volatile ("pcmpgtb %xmm0,%xmm5");
+               asm volatile ("paddb %xmm0,%xmm0");
+               asm volatile ("pand %xmm7,%xmm5");
+               asm volatile ("pxor %xmm5,%xmm0");
+
+               asm volatile ("pxor %xmm4,%xmm0");
+               asm volatile ("pxor %xmm4,%xmm1");
+               asm volatile ("pxor %xmm4,%xmm2");
+               asm volatile ("pxor %xmm4,%xmm3");
+               asm volatile ("pxor %xmm4,%xmm6");
+
+               asm volatile ("movntdq %%xmm6,%0" : "=m" (p[i]));
+               asm volatile ("movntdq %%xmm0,%0" : "=m" (q[i]));
+               asm volatile ("movntdq %%xmm1,%0" : "=m" (r[i]));
+               asm volatile ("movntdq %%xmm2,%0" : "=m" (s[i]));
+               asm volatile ("movntdq %%xmm3,%0" : "=m" (t[i]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SSSE3)
+/*
+ * GEN5 (penta parity with Cauchy matrix) SSSE3 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen5_ssse3ext(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       uint8_t *s;
+       uint8_t *t;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+       s = v[nd + 3];
+       t = v[nd + 4];
+
+       /* special case with only one data disk */
+       if (l == 0) {
+               for (i = 0; i < 5; ++i)
+                       memcpy(v[1 + i], v[0], size);
+               return;
+       }
+
+       raid_sse_begin();
+
+       /* generic case with at least two data disks */
+       asm volatile ("movdqa %0,%%xmm14" : : "m" (gfconst16.poly[0]));
+       asm volatile ("movdqa %0,%%xmm15" : : "m" (gfconst16.low4[0]));
+
+       for (i = 0; i < size; i += 16) {
+               /* last disk without the by two multiplication */
+               asm volatile ("movdqa %0,%%xmm10" : : "m" (v[l][i]));
+
+               asm volatile ("movdqa %xmm10,%xmm0");
+               asm volatile ("movdqa %xmm10,%xmm1");
+
+               asm volatile ("movdqa %xmm10,%xmm11");
+               asm volatile ("psrlw  $4,%xmm11");
+               asm volatile ("pand   %xmm15,%xmm10");
+               asm volatile ("pand   %xmm15,%xmm11");
+
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][0][0][0]));
+               asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][0][1][0]));
+               asm volatile ("pshufb %xmm10,%xmm2");
+               asm volatile ("pshufb %xmm11,%xmm13");
+               asm volatile ("pxor   %xmm13,%xmm2");
+
+               asm volatile ("movdqa %0,%%xmm3" : : "m" (gfgenpshufb[l][1][0][0]));
+               asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][1][1][0]));
+               asm volatile ("pshufb %xmm10,%xmm3");
+               asm volatile ("pshufb %xmm11,%xmm13");
+               asm volatile ("pxor   %xmm13,%xmm3");
+
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (gfgenpshufb[l][2][0][0]));
+               asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][2][1][0]));
+               asm volatile ("pshufb %xmm10,%xmm4");
+               asm volatile ("pshufb %xmm11,%xmm13");
+               asm volatile ("pxor   %xmm13,%xmm4");
+
+               /* intermediate disks */
+               for (d = l - 1; d > 0; --d) {
+                       asm volatile ("movdqa %0,%%xmm10" : : "m" (v[d][i]));
+
+                       asm volatile ("pxor %xmm11,%xmm11");
+                       asm volatile ("pcmpgtb %xmm1,%xmm11");
+                       asm volatile ("paddb %xmm1,%xmm1");
+                       asm volatile ("pand %xmm14,%xmm11");
+                       asm volatile ("pxor %xmm11,%xmm1");
+
+                       asm volatile ("pxor %xmm10,%xmm0");
+                       asm volatile ("pxor %xmm10,%xmm1");
+
+                       asm volatile ("movdqa %xmm10,%xmm11");
+                       asm volatile ("psrlw  $4,%xmm11");
+                       asm volatile ("pand   %xmm15,%xmm10");
+                       asm volatile ("pand   %xmm15,%xmm11");
+
+                       asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][0][0][0]));
+                       asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][0][1][0]));
+                       asm volatile ("pshufb %xmm10,%xmm12");
+                       asm volatile ("pshufb %xmm11,%xmm13");
+                       asm volatile ("pxor   %xmm12,%xmm2");
+                       asm volatile ("pxor   %xmm13,%xmm2");
+
+                       asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][1][0][0]));
+                       asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][1][1][0]));
+                       asm volatile ("pshufb %xmm10,%xmm12");
+                       asm volatile ("pshufb %xmm11,%xmm13");
+                       asm volatile ("pxor   %xmm12,%xmm3");
+                       asm volatile ("pxor   %xmm13,%xmm3");
+
+                       asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][2][0][0]));
+                       asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][2][1][0]));
+                       asm volatile ("pshufb %xmm10,%xmm12");
+                       asm volatile ("pshufb %xmm11,%xmm13");
+                       asm volatile ("pxor   %xmm12,%xmm4");
+                       asm volatile ("pxor   %xmm13,%xmm4");
+               }
+
+               /* first disk with all coefficients at 1 */
+               asm volatile ("movdqa %0,%%xmm10" : : "m" (v[0][i]));
+
+               asm volatile ("pxor %xmm11,%xmm11");
+               asm volatile ("pcmpgtb %xmm1,%xmm11");
+               asm volatile ("paddb %xmm1,%xmm1");
+               asm volatile ("pand %xmm14,%xmm11");
+               asm volatile ("pxor %xmm11,%xmm1");
+
+               asm volatile ("pxor %xmm10,%xmm0");
+               asm volatile ("pxor %xmm10,%xmm1");
+               asm volatile ("pxor %xmm10,%xmm2");
+               asm volatile ("pxor %xmm10,%xmm3");
+               asm volatile ("pxor %xmm10,%xmm4");
+
+               asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+               asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+               asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+               asm volatile ("movntdq %%xmm3,%0" : "=m" (s[i]));
+               asm volatile ("movntdq %%xmm4,%0" : "=m" (t[i]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_AVX2)
+/*
+ * GEN5 (penta parity with Cauchy matrix) AVX2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen5_avx2ext(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       uint8_t *s;
+       uint8_t *t;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+       s = v[nd + 3];
+       t = v[nd + 4];
+
+       /* special case with only one data disk */
+       if (l == 0) {
+               for (i = 0; i < 5; ++i)
+                       memcpy(v[1 + i], v[0], size);
+               return;
+       }
+
+       raid_avx_begin();
+
+       /* generic case with at least two data disks */
+       asm volatile ("vpxor %ymm8,%ymm8,%ymm8");
+       asm volatile ("vbroadcasti128 %0,%%ymm14" : : "m" (gfconst16.poly[0]));
+       asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfconst16.low4[0]));
+
+       for (i = 0; i < size; i += 32) {
+               /* last disk without the by two multiplication */
+               asm volatile ("vmovdqa %0,%%ymm10" : : "m" (v[l][i]));
+
+               asm volatile ("vmovdqa %ymm10,%ymm0");
+               asm volatile ("vmovdqa %ymm10,%ymm1");
+
+               asm volatile ("vpsrlw  $4,%ymm10,%ymm11");
+               asm volatile ("vpand   %ymm15,%ymm10,%ymm10");
+               asm volatile ("vpand   %ymm15,%ymm11,%ymm11");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfgenpshufb[l][0][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][0][1][0]));
+               asm volatile ("vpshufb %ymm10,%ymm2,%ymm2");
+               asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+               asm volatile ("vpxor   %ymm13,%ymm2,%ymm2");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfgenpshufb[l][1][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][1][1][0]));
+               asm volatile ("vpshufb %ymm10,%ymm3,%ymm3");
+               asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+               asm volatile ("vpxor   %ymm13,%ymm3,%ymm3");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm4" : : "m" (gfgenpshufb[l][2][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][2][1][0]));
+               asm volatile ("vpshufb %ymm10,%ymm4,%ymm4");
+               asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+               asm volatile ("vpxor   %ymm13,%ymm4,%ymm4");
+
+               /* intermediate disks */
+               for (d = l - 1; d > 0; --d) {
+                       asm volatile ("vmovdqa %0,%%ymm10" : : "m" (v[d][i]));
+
+                       asm volatile ("vpcmpgtb %ymm1,%ymm8,%ymm11");
+                       asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+                       asm volatile ("vpand %ymm14,%ymm11,%ymm11");
+                       asm volatile ("vpxor %ymm11,%ymm1,%ymm1");
+
+                       asm volatile ("vpxor %ymm10,%ymm0,%ymm0");
+                       asm volatile ("vpxor %ymm10,%ymm1,%ymm1");
+
+                       asm volatile ("vpsrlw  $4,%ymm10,%ymm11");
+                       asm volatile ("vpand   %ymm15,%ymm10,%ymm10");
+                       asm volatile ("vpand   %ymm15,%ymm11,%ymm11");
+
+                       asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][0][0][0]));
+                       asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][0][1][0]));
+                       asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+                       asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+                       asm volatile ("vpxor   %ymm12,%ymm2,%ymm2");
+                       asm volatile ("vpxor   %ymm13,%ymm2,%ymm2");
+
+                       asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][1][0][0]));
+                       asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][1][1][0]));
+                       asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+                       asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+                       asm volatile ("vpxor   %ymm12,%ymm3,%ymm3");
+                       asm volatile ("vpxor   %ymm13,%ymm3,%ymm3");
+
+                       asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][2][0][0]));
+                       asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][2][1][0]));
+                       asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+                       asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+                       asm volatile ("vpxor   %ymm12,%ymm4,%ymm4");
+                       asm volatile ("vpxor   %ymm13,%ymm4,%ymm4");
+               }
+
+               /* first disk with all coefficients at 1 */
+               asm volatile ("vmovdqa %0,%%ymm10" : : "m" (v[0][i]));
+
+               asm volatile ("vpcmpgtb %ymm1,%ymm8,%ymm11");
+               asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+               asm volatile ("vpand %ymm14,%ymm11,%ymm11");
+               asm volatile ("vpxor %ymm11,%ymm1,%ymm1");
+
+               asm volatile ("vpxor %ymm10,%ymm0,%ymm0");
+               asm volatile ("vpxor %ymm10,%ymm1,%ymm1");
+               asm volatile ("vpxor %ymm10,%ymm2,%ymm2");
+               asm volatile ("vpxor %ymm10,%ymm3,%ymm3");
+               asm volatile ("vpxor %ymm10,%ymm4,%ymm4");
+
+               asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+               asm volatile ("vmovntdq %%ymm1,%0" : "=m" (q[i]));
+               asm volatile ("vmovntdq %%ymm2,%0" : "=m" (r[i]));
+               asm volatile ("vmovntdq %%ymm3,%0" : "=m" (s[i]));
+               asm volatile ("vmovntdq %%ymm4,%0" : "=m" (t[i]));
+       }
+
+       raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * GEN6 (hexa parity with Cauchy matrix) SSSE3 implementation
+ */
+void raid_gen6_ssse3(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       uint8_t *s;
+       uint8_t *t;
+       uint8_t *u;
+       int d, l;
+       size_t i;
+       uint8_t buffer[2*16+16];
+       uint8_t *pd = __align_ptr(buffer, 16);
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+       s = v[nd + 3];
+       t = v[nd + 4];
+       u = v[nd + 5];
+
+       /* special case with only one data disk */
+       if (l == 0) {
+               for (i = 0; i < 6; ++i)
+                       memcpy(v[1 + i], v[0], size);
+               return;
+       }
+
+       raid_sse_begin();
+
+       /* generic case with at least two data disks */
+       for (i = 0; i < size; i += 16) {
+               /* last disk without the by two multiplication */
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (v[l][i]));
+
+               asm volatile ("movdqa %%xmm4,%0" : "=m" (pd[0]));
+               asm volatile ("movdqa %%xmm4,%0" : "=m" (pd[16]));
+
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+               asm volatile ("movdqa %xmm4,%xmm5");
+               asm volatile ("psrlw  $4,%xmm5");
+               asm volatile ("pand   %xmm7,%xmm4");
+               asm volatile ("pand   %xmm7,%xmm5");
+
+               asm volatile ("movdqa %0,%%xmm0" : : "m" (gfgenpshufb[l][0][0][0]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][0][1][0]));
+               asm volatile ("pshufb %xmm4,%xmm0");
+               asm volatile ("pshufb %xmm5,%xmm7");
+               asm volatile ("pxor   %xmm7,%xmm0");
+
+               asm volatile ("movdqa %0,%%xmm1" : : "m" (gfgenpshufb[l][1][0][0]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][1][1][0]));
+               asm volatile ("pshufb %xmm4,%xmm1");
+               asm volatile ("pshufb %xmm5,%xmm7");
+               asm volatile ("pxor   %xmm7,%xmm1");
+
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][2][0][0]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][2][1][0]));
+               asm volatile ("pshufb %xmm4,%xmm2");
+               asm volatile ("pshufb %xmm5,%xmm7");
+               asm volatile ("pxor   %xmm7,%xmm2");
+
+               asm volatile ("movdqa %0,%%xmm3" : : "m" (gfgenpshufb[l][3][0][0]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][3][1][0]));
+               asm volatile ("pshufb %xmm4,%xmm3");
+               asm volatile ("pshufb %xmm5,%xmm7");
+               asm volatile ("pxor   %xmm7,%xmm3");
+
+               /* intermediate disks */
+               for (d = l - 1; d > 0; --d) {
+                       asm volatile ("movdqa %0,%%xmm5" : : "m" (pd[0]));
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (pd[16]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+
+                       asm volatile ("pxor %xmm4,%xmm4");
+                       asm volatile ("pcmpgtb %xmm6,%xmm4");
+                       asm volatile ("paddb %xmm6,%xmm6");
+                       asm volatile ("pand %xmm7,%xmm4");
+                       asm volatile ("pxor %xmm4,%xmm6");
+
+                       asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+
+                       asm volatile ("pxor %xmm4,%xmm5");
+                       asm volatile ("pxor %xmm4,%xmm6");
+                       asm volatile ("movdqa %%xmm5,%0" : "=m" (pd[0]));
+                       asm volatile ("movdqa %%xmm6,%0" : "=m" (pd[16]));
+
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+                       asm volatile ("movdqa %xmm4,%xmm5");
+                       asm volatile ("psrlw  $4,%xmm5");
+                       asm volatile ("pand   %xmm7,%xmm4");
+                       asm volatile ("pand   %xmm7,%xmm5");
+
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][0][0]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][0][1][0]));
+                       asm volatile ("pshufb %xmm4,%xmm6");
+                       asm volatile ("pshufb %xmm5,%xmm7");
+                       asm volatile ("pxor   %xmm6,%xmm0");
+                       asm volatile ("pxor   %xmm7,%xmm0");
+
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][1][0][0]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][1][1][0]));
+                       asm volatile ("pshufb %xmm4,%xmm6");
+                       asm volatile ("pshufb %xmm5,%xmm7");
+                       asm volatile ("pxor   %xmm6,%xmm1");
+                       asm volatile ("pxor   %xmm7,%xmm1");
+
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][2][0][0]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][2][1][0]));
+                       asm volatile ("pshufb %xmm4,%xmm6");
+                       asm volatile ("pshufb %xmm5,%xmm7");
+                       asm volatile ("pxor   %xmm6,%xmm2");
+                       asm volatile ("pxor   %xmm7,%xmm2");
+
+                       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][3][0][0]));
+                       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][3][1][0]));
+                       asm volatile ("pshufb %xmm4,%xmm6");
+                       asm volatile ("pshufb %xmm5,%xmm7");
+                       asm volatile ("pxor   %xmm6,%xmm3");
+                       asm volatile ("pxor   %xmm7,%xmm3");
+               }
+
+               /* first disk with all coefficients at 1 */
+               asm volatile ("movdqa %0,%%xmm5" : : "m" (pd[0]));
+               asm volatile ("movdqa %0,%%xmm6" : : "m" (pd[16]));
+               asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+
+               asm volatile ("pxor %xmm4,%xmm4");
+               asm volatile ("pcmpgtb %xmm6,%xmm4");
+               asm volatile ("paddb %xmm6,%xmm6");
+               asm volatile ("pand %xmm7,%xmm4");
+               asm volatile ("pxor %xmm4,%xmm6");
+
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (v[0][i]));
+               asm volatile ("pxor %xmm4,%xmm0");
+               asm volatile ("pxor %xmm4,%xmm1");
+               asm volatile ("pxor %xmm4,%xmm2");
+               asm volatile ("pxor %xmm4,%xmm3");
+               asm volatile ("pxor %xmm4,%xmm5");
+               asm volatile ("pxor %xmm4,%xmm6");
+
+               asm volatile ("movntdq %%xmm5,%0" : "=m" (p[i]));
+               asm volatile ("movntdq %%xmm6,%0" : "=m" (q[i]));
+               asm volatile ("movntdq %%xmm0,%0" : "=m" (r[i]));
+               asm volatile ("movntdq %%xmm1,%0" : "=m" (s[i]));
+               asm volatile ("movntdq %%xmm2,%0" : "=m" (t[i]));
+               asm volatile ("movntdq %%xmm3,%0" : "=m" (u[i]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SSSE3)
+/*
+ * GEN6 (hexa parity with Cauchy matrix) SSSE3 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen6_ssse3ext(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       uint8_t *s;
+       uint8_t *t;
+       uint8_t *u;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+       s = v[nd + 3];
+       t = v[nd + 4];
+       u = v[nd + 5];
+
+       /* special case with only one data disk */
+       if (l == 0) {
+               for (i = 0; i < 6; ++i)
+                       memcpy(v[1 + i], v[0], size);
+               return;
+       }
+
+       raid_sse_begin();
+
+       /* generic case with at least two data disks */
+       asm volatile ("movdqa %0,%%xmm14" : : "m" (gfconst16.poly[0]));
+       asm volatile ("movdqa %0,%%xmm15" : : "m" (gfconst16.low4[0]));
+
+       for (i = 0; i < size; i += 16) {
+               /* last disk without the by two multiplication */
+               asm volatile ("movdqa %0,%%xmm10" : : "m" (v[l][i]));
+
+               asm volatile ("movdqa %xmm10,%xmm0");
+               asm volatile ("movdqa %xmm10,%xmm1");
+
+               asm volatile ("movdqa %xmm10,%xmm11");
+               asm volatile ("psrlw  $4,%xmm11");
+               asm volatile ("pand   %xmm15,%xmm10");
+               asm volatile ("pand   %xmm15,%xmm11");
+
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][0][0][0]));
+               asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][0][1][0]));
+               asm volatile ("pshufb %xmm10,%xmm2");
+               asm volatile ("pshufb %xmm11,%xmm13");
+               asm volatile ("pxor   %xmm13,%xmm2");
+
+               asm volatile ("movdqa %0,%%xmm3" : : "m" (gfgenpshufb[l][1][0][0]));
+               asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][1][1][0]));
+               asm volatile ("pshufb %xmm10,%xmm3");
+               asm volatile ("pshufb %xmm11,%xmm13");
+               asm volatile ("pxor   %xmm13,%xmm3");
+
+               asm volatile ("movdqa %0,%%xmm4" : : "m" (gfgenpshufb[l][2][0][0]));
+               asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][2][1][0]));
+               asm volatile ("pshufb %xmm10,%xmm4");
+               asm volatile ("pshufb %xmm11,%xmm13");
+               asm volatile ("pxor   %xmm13,%xmm4");
+
+               asm volatile ("movdqa %0,%%xmm5" : : "m" (gfgenpshufb[l][3][0][0]));
+               asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][3][1][0]));
+               asm volatile ("pshufb %xmm10,%xmm5");
+               asm volatile ("pshufb %xmm11,%xmm13");
+               asm volatile ("pxor   %xmm13,%xmm5");
+
+               /* intermediate disks */
+               for (d = l - 1; d > 0; --d) {
+                       asm volatile ("movdqa %0,%%xmm10" : : "m" (v[d][i]));
+
+                       asm volatile ("pxor %xmm11,%xmm11");
+                       asm volatile ("pcmpgtb %xmm1,%xmm11");
+                       asm volatile ("paddb %xmm1,%xmm1");
+                       asm volatile ("pand %xmm14,%xmm11");
+                       asm volatile ("pxor %xmm11,%xmm1");
+
+                       asm volatile ("pxor %xmm10,%xmm0");
+                       asm volatile ("pxor %xmm10,%xmm1");
+
+                       asm volatile ("movdqa %xmm10,%xmm11");
+                       asm volatile ("psrlw  $4,%xmm11");
+                       asm volatile ("pand   %xmm15,%xmm10");
+                       asm volatile ("pand   %xmm15,%xmm11");
+
+                       asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][0][0][0]));
+                       asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][0][1][0]));
+                       asm volatile ("pshufb %xmm10,%xmm12");
+                       asm volatile ("pshufb %xmm11,%xmm13");
+                       asm volatile ("pxor   %xmm12,%xmm2");
+                       asm volatile ("pxor   %xmm13,%xmm2");
+
+                       asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][1][0][0]));
+                       asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][1][1][0]));
+                       asm volatile ("pshufb %xmm10,%xmm12");
+                       asm volatile ("pshufb %xmm11,%xmm13");
+                       asm volatile ("pxor   %xmm12,%xmm3");
+                       asm volatile ("pxor   %xmm13,%xmm3");
+
+                       asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][2][0][0]));
+                       asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][2][1][0]));
+                       asm volatile ("pshufb %xmm10,%xmm12");
+                       asm volatile ("pshufb %xmm11,%xmm13");
+                       asm volatile ("pxor   %xmm12,%xmm4");
+                       asm volatile ("pxor   %xmm13,%xmm4");
+
+                       asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][3][0][0]));
+                       asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][3][1][0]));
+                       asm volatile ("pshufb %xmm10,%xmm12");
+                       asm volatile ("pshufb %xmm11,%xmm13");
+                       asm volatile ("pxor   %xmm12,%xmm5");
+                       asm volatile ("pxor   %xmm13,%xmm5");
+               }
+
+               /* first disk with all coefficients at 1 */
+               asm volatile ("movdqa %0,%%xmm10" : : "m" (v[0][i]));
+
+               asm volatile ("pxor %xmm11,%xmm11");
+               asm volatile ("pcmpgtb %xmm1,%xmm11");
+               asm volatile ("paddb %xmm1,%xmm1");
+               asm volatile ("pand %xmm14,%xmm11");
+               asm volatile ("pxor %xmm11,%xmm1");
+
+               asm volatile ("pxor %xmm10,%xmm0");
+               asm volatile ("pxor %xmm10,%xmm1");
+               asm volatile ("pxor %xmm10,%xmm2");
+               asm volatile ("pxor %xmm10,%xmm3");
+               asm volatile ("pxor %xmm10,%xmm4");
+               asm volatile ("pxor %xmm10,%xmm5");
+
+               asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+               asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+               asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+               asm volatile ("movntdq %%xmm3,%0" : "=m" (s[i]));
+               asm volatile ("movntdq %%xmm4,%0" : "=m" (t[i]));
+               asm volatile ("movntdq %%xmm5,%0" : "=m" (u[i]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_AVX2)
+/*
+ * GEN6 (hexa parity with Cauchy matrix) AVX2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen6_avx2ext(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       uint8_t *s;
+       uint8_t *t;
+       uint8_t *u;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+       s = v[nd + 3];
+       t = v[nd + 4];
+       u = v[nd + 5];
+
+       /* special case with only one data disk */
+       if (l == 0) {
+               for (i = 0; i < 6; ++i)
+                       memcpy(v[1 + i], v[0], size);
+               return;
+       }
+
+       raid_avx_begin();
+
+       /* generic case with at least two data disks */
+       asm volatile ("vpxor %ymm8,%ymm8,%ymm8");
+       asm volatile ("vbroadcasti128 %0,%%ymm14" : : "m" (gfconst16.poly[0]));
+       asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfconst16.low4[0]));
+
+       for (i = 0; i < size; i += 32) {
+               /* last disk without the by two multiplication */
+               asm volatile ("vmovdqa %0,%%ymm10" : : "m" (v[l][i]));
+
+               asm volatile ("vmovdqa %ymm10,%ymm0");
+               asm volatile ("vmovdqa %ymm10,%ymm1");
+
+               asm volatile ("vpsrlw  $4,%ymm10,%ymm11");
+               asm volatile ("vpand   %ymm15,%ymm10,%ymm10");
+               asm volatile ("vpand   %ymm15,%ymm11,%ymm11");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfgenpshufb[l][0][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][0][1][0]));
+               asm volatile ("vpshufb %ymm10,%ymm2,%ymm2");
+               asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+               asm volatile ("vpxor   %ymm13,%ymm2,%ymm2");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfgenpshufb[l][1][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][1][1][0]));
+               asm volatile ("vpshufb %ymm10,%ymm3,%ymm3");
+               asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+               asm volatile ("vpxor   %ymm13,%ymm3,%ymm3");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm4" : : "m" (gfgenpshufb[l][2][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][2][1][0]));
+               asm volatile ("vpshufb %ymm10,%ymm4,%ymm4");
+               asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+               asm volatile ("vpxor   %ymm13,%ymm4,%ymm4");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm5" : : "m" (gfgenpshufb[l][3][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][3][1][0]));
+               asm volatile ("vpshufb %ymm10,%ymm5,%ymm5");
+               asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+               asm volatile ("vpxor   %ymm13,%ymm5,%ymm5");
+
+               /* intermediate disks */
+               for (d = l - 1; d > 0; --d) {
+                       asm volatile ("vmovdqa %0,%%ymm10" : : "m" (v[d][i]));
+
+                       asm volatile ("vpcmpgtb %ymm1,%ymm8,%ymm11");
+                       asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+                       asm volatile ("vpand %ymm14,%ymm11,%ymm11");
+                       asm volatile ("vpxor %ymm11,%ymm1,%ymm1");
+
+                       asm volatile ("vpxor %ymm10,%ymm0,%ymm0");
+                       asm volatile ("vpxor %ymm10,%ymm1,%ymm1");
+
+                       asm volatile ("vpsrlw  $4,%ymm10,%ymm11");
+                       asm volatile ("vpand   %ymm15,%ymm10,%ymm10");
+                       asm volatile ("vpand   %ymm15,%ymm11,%ymm11");
+
+                       asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][0][0][0]));
+                       asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][0][1][0]));
+                       asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+                       asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+                       asm volatile ("vpxor   %ymm12,%ymm2,%ymm2");
+                       asm volatile ("vpxor   %ymm13,%ymm2,%ymm2");
+
+                       asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][1][0][0]));
+                       asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][1][1][0]));
+                       asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+                       asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+                       asm volatile ("vpxor   %ymm12,%ymm3,%ymm3");
+                       asm volatile ("vpxor   %ymm13,%ymm3,%ymm3");
+
+                       asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][2][0][0]));
+                       asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][2][1][0]));
+                       asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+                       asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+                       asm volatile ("vpxor   %ymm12,%ymm4,%ymm4");
+                       asm volatile ("vpxor   %ymm13,%ymm4,%ymm4");
+
+                       asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][3][0][0]));
+                       asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][3][1][0]));
+                       asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+                       asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+                       asm volatile ("vpxor   %ymm12,%ymm5,%ymm5");
+                       asm volatile ("vpxor   %ymm13,%ymm5,%ymm5");
+               }
+
+               /* first disk with all coefficients at 1 */
+               asm volatile ("vmovdqa %0,%%ymm10" : : "m" (v[0][i]));
+
+               asm volatile ("vpcmpgtb %ymm1,%ymm8,%ymm11");
+               asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+               asm volatile ("vpand %ymm14,%ymm11,%ymm11");
+               asm volatile ("vpxor %ymm11,%ymm1,%ymm1");
+
+               asm volatile ("vpxor %ymm10,%ymm0,%ymm0");
+               asm volatile ("vpxor %ymm10,%ymm1,%ymm1");
+               asm volatile ("vpxor %ymm10,%ymm2,%ymm2");
+               asm volatile ("vpxor %ymm10,%ymm3,%ymm3");
+               asm volatile ("vpxor %ymm10,%ymm4,%ymm4");
+               asm volatile ("vpxor %ymm10,%ymm5,%ymm5");
+
+               asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+               asm volatile ("vmovntdq %%ymm1,%0" : "=m" (q[i]));
+               asm volatile ("vmovntdq %%ymm2,%0" : "=m" (r[i]));
+               asm volatile ("vmovntdq %%ymm3,%0" : "=m" (s[i]));
+               asm volatile ("vmovntdq %%ymm4,%0" : "=m" (t[i]));
+               asm volatile ("vmovntdq %%ymm5,%0" : "=m" (u[i]));
+       }
+
+       raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * RAID recovering for one disk SSSE3 implementation
+ */
+void raid_rec1_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *pa;
+       uint8_t G;
+       uint8_t V;
+       size_t i;
+
+       (void)nr; /* unused, it's always 1 */
+
+       /* if it's RAID5 uses the faster function */
+       if (ip[0] == 0) {
+               raid_rec1of1(id, nd, size, vv);
+               return;
+       }
+
+       /* setup the coefficients matrix */
+       G = A(ip[0], id[0]);
+
+       /* invert it to solve the system of linear equations */
+       V = inv(G);
+
+       /* compute delta parity */
+       raid_delta_gen(1, id, ip, nd, size, vv);
+
+       p = v[nd + ip[0]];
+       pa = v[id[0]];
+
+       raid_sse_begin();
+
+       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+       asm volatile ("movdqa %0,%%xmm4" : : "m" (gfmulpshufb[V][0][0]));
+       asm volatile ("movdqa %0,%%xmm5" : : "m" (gfmulpshufb[V][1][0]));
+
+       for (i = 0; i < size; i += 16) {
+               asm volatile ("movdqa %0,%%xmm0" : : "m" (p[i]));
+               asm volatile ("movdqa %0,%%xmm1" : : "m" (pa[i]));
+               asm volatile ("movdqa %xmm4,%xmm2");
+               asm volatile ("movdqa %xmm5,%xmm3");
+               asm volatile ("pxor   %xmm0,%xmm1");
+               asm volatile ("movdqa %xmm1,%xmm0");
+               asm volatile ("psrlw  $4,%xmm1");
+               asm volatile ("pand   %xmm7,%xmm0");
+               asm volatile ("pand   %xmm7,%xmm1");
+               asm volatile ("pshufb %xmm0,%xmm2");
+               asm volatile ("pshufb %xmm1,%xmm3");
+               asm volatile ("pxor   %xmm3,%xmm2");
+               asm volatile ("movdqa %%xmm2,%0" : "=m" (pa[i]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * RAID recovering for two disks SSSE3 implementation
+ */
+void raid_rec2_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       const int N = 2;
+       uint8_t *p[N];
+       uint8_t *pa[N];
+       uint8_t G[N * N];
+       uint8_t V[N * N];
+       size_t i;
+       int j, k;
+
+       (void)nr; /* unused, it's always 2 */
+
+       /* setup the coefficients matrix */
+       for (j = 0; j < N; ++j)
+               for (k = 0; k < N; ++k)
+                       G[j * N + k] = A(ip[j], id[k]);
+
+       /* invert it to solve the system of linear equations */
+       raid_invert(G, V, N);
+
+       /* compute delta parity */
+       raid_delta_gen(N, id, ip, nd, size, vv);
+
+       for (j = 0; j < N; ++j) {
+               p[j] = v[nd + ip[j]];
+               pa[j] = v[id[j]];
+       }
+
+       raid_sse_begin();
+
+       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+
+       for (i = 0; i < size; i += 16) {
+               asm volatile ("movdqa %0,%%xmm0" : : "m" (p[0][i]));
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (pa[0][i]));
+               asm volatile ("movdqa %0,%%xmm1" : : "m" (p[1][i]));
+               asm volatile ("movdqa %0,%%xmm3" : : "m" (pa[1][i]));
+               asm volatile ("pxor   %xmm2,%xmm0");
+               asm volatile ("pxor   %xmm3,%xmm1");
+
+               asm volatile ("pxor %xmm6,%xmm6");
+
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (gfmulpshufb[V[0]][0][0]));
+               asm volatile ("movdqa %0,%%xmm3" : : "m" (gfmulpshufb[V[0]][1][0]));
+               asm volatile ("movdqa %xmm0,%xmm4");
+               asm volatile ("movdqa %xmm0,%xmm5");
+               asm volatile ("psrlw  $4,%xmm5");
+               asm volatile ("pand   %xmm7,%xmm4");
+               asm volatile ("pand   %xmm7,%xmm5");
+               asm volatile ("pshufb %xmm4,%xmm2");
+               asm volatile ("pshufb %xmm5,%xmm3");
+               asm volatile ("pxor   %xmm2,%xmm6");
+               asm volatile ("pxor   %xmm3,%xmm6");
+
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (gfmulpshufb[V[1]][0][0]));
+               asm volatile ("movdqa %0,%%xmm3" : : "m" (gfmulpshufb[V[1]][1][0]));
+               asm volatile ("movdqa %xmm1,%xmm4");
+               asm volatile ("movdqa %xmm1,%xmm5");
+               asm volatile ("psrlw  $4,%xmm5");
+               asm volatile ("pand   %xmm7,%xmm4");
+               asm volatile ("pand   %xmm7,%xmm5");
+               asm volatile ("pshufb %xmm4,%xmm2");
+               asm volatile ("pshufb %xmm5,%xmm3");
+               asm volatile ("pxor   %xmm2,%xmm6");
+               asm volatile ("pxor   %xmm3,%xmm6");
+
+               asm volatile ("movdqa %%xmm6,%0" : "=m" (pa[0][i]));
+
+               asm volatile ("pxor %xmm6,%xmm6");
+
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (gfmulpshufb[V[2]][0][0]));
+               asm volatile ("movdqa %0,%%xmm3" : : "m" (gfmulpshufb[V[2]][1][0]));
+               asm volatile ("movdqa %xmm0,%xmm4");
+               asm volatile ("movdqa %xmm0,%xmm5");
+               asm volatile ("psrlw  $4,%xmm5");
+               asm volatile ("pand   %xmm7,%xmm4");
+               asm volatile ("pand   %xmm7,%xmm5");
+               asm volatile ("pshufb %xmm4,%xmm2");
+               asm volatile ("pshufb %xmm5,%xmm3");
+               asm volatile ("pxor   %xmm2,%xmm6");
+               asm volatile ("pxor   %xmm3,%xmm6");
+
+               asm volatile ("movdqa %0,%%xmm2" : : "m" (gfmulpshufb[V[3]][0][0]));
+               asm volatile ("movdqa %0,%%xmm3" : : "m" (gfmulpshufb[V[3]][1][0]));
+               asm volatile ("movdqa %xmm1,%xmm4");
+               asm volatile ("movdqa %xmm1,%xmm5");
+               asm volatile ("psrlw  $4,%xmm5");
+               asm volatile ("pand   %xmm7,%xmm4");
+               asm volatile ("pand   %xmm7,%xmm5");
+               asm volatile ("pshufb %xmm4,%xmm2");
+               asm volatile ("pshufb %xmm5,%xmm3");
+               asm volatile ("pxor   %xmm2,%xmm6");
+               asm volatile ("pxor   %xmm3,%xmm6");
+
+               asm volatile ("movdqa %%xmm6,%0" : "=m" (pa[1][i]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * RAID recovering SSSE3 implementation
+ */
+void raid_recX_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       int N = nr;
+       uint8_t *p[RAID_PARITY_MAX];
+       uint8_t *pa[RAID_PARITY_MAX];
+       uint8_t G[RAID_PARITY_MAX * RAID_PARITY_MAX];
+       uint8_t V[RAID_PARITY_MAX * RAID_PARITY_MAX];
+       uint8_t buffer[RAID_PARITY_MAX*16+16];
+       uint8_t *pd = __align_ptr(buffer, 16);
+       size_t i;
+       int j, k;
+
+       /* setup the coefficients matrix */
+       for (j = 0; j < N; ++j)
+               for (k = 0; k < N; ++k)
+                       G[j * N + k] = A(ip[j], id[k]);
+
+       /* invert it to solve the system of linear equations */
+       raid_invert(G, V, N);
+
+       /* compute delta parity */
+       raid_delta_gen(N, id, ip, nd, size, vv);
+
+       for (j = 0; j < N; ++j) {
+               p[j] = v[nd + ip[j]];
+               pa[j] = v[id[j]];
+       }
+
+       raid_sse_begin();
+
+       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+
+       for (i = 0; i < size; i += 16) {
+               /* delta */
+               for (j = 0; j < N; ++j) {
+                       asm volatile ("movdqa %0,%%xmm0" : : "m" (p[j][i]));
+                       asm volatile ("movdqa %0,%%xmm1" : : "m" (pa[j][i]));
+                       asm volatile ("pxor   %xmm1,%xmm0");
+                       asm volatile ("movdqa %%xmm0,%0" : "=m" (pd[j*16]));
+               }
+
+               /* reconstruct */
+               for (j = 0; j < N; ++j) {
+                       asm volatile ("pxor %xmm0,%xmm0");
+                       asm volatile ("pxor %xmm1,%xmm1");
+
+                       for (k = 0; k < N; ++k) {
+                               uint8_t m = V[j * N + k];
+
+                               asm volatile ("movdqa %0,%%xmm2" : : "m" (gfmulpshufb[m][0][0]));
+                               asm volatile ("movdqa %0,%%xmm3" : : "m" (gfmulpshufb[m][1][0]));
+                               asm volatile ("movdqa %0,%%xmm4" : : "m" (pd[k*16]));
+                               asm volatile ("movdqa %xmm4,%xmm5");
+                               asm volatile ("psrlw  $4,%xmm5");
+                               asm volatile ("pand   %xmm7,%xmm4");
+                               asm volatile ("pand   %xmm7,%xmm5");
+                               asm volatile ("pshufb %xmm4,%xmm2");
+                               asm volatile ("pshufb %xmm5,%xmm3");
+                               asm volatile ("pxor   %xmm2,%xmm0");
+                               asm volatile ("pxor   %xmm3,%xmm1");
+                       }
+
+                       asm volatile ("pxor %xmm1,%xmm0");
+                       asm volatile ("movdqa %%xmm0,%0" : "=m" (pa[j][i]));
+               }
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_AVX2)
+/*
+ * RAID recovering for one disk AVX2 implementation
+ */
+void raid_rec1_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       uint8_t *p;
+       uint8_t *pa;
+       uint8_t G;
+       uint8_t V;
+       size_t i;
+
+       (void)nr; /* unused, it's always 1 */
+
+       /* if it's RAID5 uses the faster function */
+       if (ip[0] == 0) {
+               raid_rec1of1(id, nd, size, vv);
+               return;
+       }
+
+       /* setup the coefficients matrix */
+       G = A(ip[0], id[0]);
+
+       /* invert it to solve the system of linear equations */
+       V = inv(G);
+
+       /* compute delta parity */
+       raid_delta_gen(1, id, ip, nd, size, vv);
+
+       p = v[nd + ip[0]];
+       pa = v[id[0]];
+
+       raid_avx_begin();
+
+       asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfconst16.low4[0]));
+       asm volatile ("vbroadcasti128 %0,%%ymm4" : : "m" (gfmulpshufb[V][0][0]));
+       asm volatile ("vbroadcasti128 %0,%%ymm5" : : "m" (gfmulpshufb[V][1][0]));
+
+       for (i = 0; i < size; i += 32) {
+               asm volatile ("vmovdqa %0,%%ymm0" : : "m" (p[i]));
+               asm volatile ("vmovdqa %0,%%ymm1" : : "m" (pa[i]));
+               asm volatile ("vpxor   %ymm1,%ymm0,%ymm0");
+               asm volatile ("vpsrlw  $4,%ymm0,%ymm1");
+               asm volatile ("vpand   %ymm7,%ymm0,%ymm0");
+               asm volatile ("vpand   %ymm7,%ymm1,%ymm1");
+               asm volatile ("vpshufb %ymm0,%ymm4,%ymm2");
+               asm volatile ("vpshufb %ymm1,%ymm5,%ymm3");
+               asm volatile ("vpxor   %ymm3,%ymm2,%ymm2");
+               asm volatile ("vmovdqa %%ymm2,%0" : "=m" (pa[i]));
+       }
+
+       raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_AVX2)
+/*
+ * RAID recovering for two disks AVX2 implementation
+ */
+void raid_rec2_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       const int N = 2;
+       uint8_t *p[N];
+       uint8_t *pa[N];
+       uint8_t G[N * N];
+       uint8_t V[N * N];
+       size_t i;
+       int j, k;
+
+       (void)nr; /* unused, it's always 2 */
+
+       /* setup the coefficients matrix */
+       for (j = 0; j < N; ++j)
+               for (k = 0; k < N; ++k)
+                       G[j * N + k] = A(ip[j], id[k]);
+
+       /* invert it to solve the system of linear equations */
+       raid_invert(G, V, N);
+
+       /* compute delta parity */
+       raid_delta_gen(N, id, ip, nd, size, vv);
+
+       for (j = 0; j < N; ++j) {
+               p[j] = v[nd + ip[j]];
+               pa[j] = v[id[j]];
+       }
+
+       raid_avx_begin();
+
+       asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfconst16.low4[0]));
+
+       for (i = 0; i < size; i += 32) {
+               asm volatile ("vmovdqa %0,%%ymm0" : : "m" (p[0][i]));
+               asm volatile ("vmovdqa %0,%%ymm2" : : "m" (pa[0][i]));
+               asm volatile ("vmovdqa %0,%%ymm1" : : "m" (p[1][i]));
+               asm volatile ("vmovdqa %0,%%ymm3" : : "m" (pa[1][i]));
+               asm volatile ("vpxor   %ymm2,%ymm0,%ymm0");
+               asm volatile ("vpxor   %ymm3,%ymm1,%ymm1");
+
+               asm volatile ("vpxor %ymm6,%ymm6,%ymm6");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfmulpshufb[V[0]][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfmulpshufb[V[0]][1][0]));
+               asm volatile ("vpsrlw  $4,%ymm0,%ymm5");
+               asm volatile ("vpand   %ymm7,%ymm0,%ymm4");
+               asm volatile ("vpand   %ymm7,%ymm5,%ymm5");
+               asm volatile ("vpshufb %ymm4,%ymm2,%ymm2");
+               asm volatile ("vpshufb %ymm5,%ymm3,%ymm3");
+               asm volatile ("vpxor   %ymm2,%ymm6,%ymm6");
+               asm volatile ("vpxor   %ymm3,%ymm6,%ymm6");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfmulpshufb[V[1]][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfmulpshufb[V[1]][1][0]));
+               asm volatile ("vpsrlw  $4,%ymm1,%ymm5");
+               asm volatile ("vpand   %ymm7,%ymm1,%ymm4");
+               asm volatile ("vpand   %ymm7,%ymm5,%ymm5");
+               asm volatile ("vpshufb %ymm4,%ymm2,%ymm2");
+               asm volatile ("vpshufb %ymm5,%ymm3,%ymm3");
+               asm volatile ("vpxor   %ymm2,%ymm6,%ymm6");
+               asm volatile ("vpxor   %ymm3,%ymm6,%ymm6");
+
+               asm volatile ("vmovdqa %%ymm6,%0" : "=m" (pa[0][i]));
+
+               asm volatile ("vpxor %ymm6,%ymm6,%ymm6");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfmulpshufb[V[2]][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfmulpshufb[V[2]][1][0]));
+               asm volatile ("vpsrlw  $4,%ymm0,%ymm5");
+               asm volatile ("vpand   %ymm7,%ymm0,%ymm4");
+               asm volatile ("vpand   %ymm7,%ymm5,%ymm5");
+               asm volatile ("vpshufb %ymm4,%ymm2,%ymm2");
+               asm volatile ("vpshufb %ymm5,%ymm3,%ymm3");
+               asm volatile ("vpxor   %ymm2,%ymm6,%ymm6");
+               asm volatile ("vpxor   %ymm3,%ymm6,%ymm6");
+
+               asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfmulpshufb[V[3]][0][0]));
+               asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfmulpshufb[V[3]][1][0]));
+               asm volatile ("vpsrlw  $4,%ymm1,%ymm5");
+               asm volatile ("vpand   %ymm7,%ymm1,%ymm4");
+               asm volatile ("vpand   %ymm7,%ymm5,%ymm5");
+               asm volatile ("vpshufb %ymm4,%ymm2,%ymm2");
+               asm volatile ("vpshufb %ymm5,%ymm3,%ymm3");
+               asm volatile ("vpxor   %ymm2,%ymm6,%ymm6");
+               asm volatile ("vpxor   %ymm3,%ymm6,%ymm6");
+
+               asm volatile ("vmovdqa %%ymm6,%0" : "=m" (pa[1][i]));
+       }
+
+       raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_AVX2)
+/*
+ * RAID recovering AVX2 implementation
+ */
+void raid_recX_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t **)vv;
+       int N = nr;
+       uint8_t *p[RAID_PARITY_MAX];
+       uint8_t *pa[RAID_PARITY_MAX];
+       uint8_t G[RAID_PARITY_MAX * RAID_PARITY_MAX];
+       uint8_t V[RAID_PARITY_MAX * RAID_PARITY_MAX];
+       uint8_t buffer[RAID_PARITY_MAX*32+32];
+       uint8_t *pd = __align_ptr(buffer, 32);
+       size_t i;
+       int j, k;
+
+       /* setup the coefficients matrix */
+       for (j = 0; j < N; ++j)
+               for (k = 0; k < N; ++k)
+                       G[j * N + k] = A(ip[j], id[k]);
+
+       /* invert it to solve the system of linear equations */
+       raid_invert(G, V, N);
+
+       /* compute delta parity */
+       raid_delta_gen(N, id, ip, nd, size, vv);
+
+       for (j = 0; j < N; ++j) {
+               p[j] = v[nd + ip[j]];
+               pa[j] = v[id[j]];
+       }
+
+       raid_avx_begin();
+
+       asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfconst16.low4[0]));
+
+       for (i = 0; i < size; i += 32) {
+               /* delta */
+               for (j = 0; j < N; ++j) {
+                       asm volatile ("vmovdqa %0,%%ymm0" : : "m" (p[j][i]));
+                       asm volatile ("vmovdqa %0,%%ymm1" : : "m" (pa[j][i]));
+                       asm volatile ("vpxor   %ymm1,%ymm0,%ymm0");
+                       asm volatile ("vmovdqa %%ymm0,%0" : "=m" (pd[j*32]));
+               }
+
+               /* reconstruct */
+               for (j = 0; j < N; ++j) {
+                       asm volatile ("vpxor %ymm0,%ymm0,%ymm0");
+                       asm volatile ("vpxor %ymm1,%ymm1,%ymm1");
+
+                       for (k = 0; k < N; ++k) {
+                               uint8_t m = V[j * N + k];
+
+                               asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfmulpshufb[m][0][0]));
+                               asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfmulpshufb[m][1][0]));
+                               asm volatile ("vmovdqa %0,%%ymm4" : : "m" (pd[k*32]));
+                               asm volatile ("vpsrlw  $4,%ymm4,%ymm5");
+                               asm volatile ("vpand   %ymm7,%ymm4,%ymm4");
+                               asm volatile ("vpand   %ymm7,%ymm5,%ymm5");
+                               asm volatile ("vpshufb %ymm4,%ymm2,%ymm2");
+                               asm volatile ("vpshufb %ymm5,%ymm3,%ymm3");
+                               asm volatile ("vpxor   %ymm2,%ymm0,%ymm0");
+                               asm volatile ("vpxor   %ymm3,%ymm1,%ymm1");
+                       }
+
+                       asm volatile ("vpxor %ymm1,%ymm0,%ymm0");
+                       asm volatile ("vmovdqa %%ymm0,%0" : "=m" (pa[j][i]));
+               }
+       }
+
+       raid_avx_end();
+}
+#endif
+
diff --git a/raid/x86z.c b/raid/x86z.c
new file mode 100644 (file)
index 0000000..1e3fe89
--- /dev/null
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
+static const struct gfzconst16 {
+       uint8_t poly[16];
+       uint8_t half[16];
+       uint8_t low7[16];
+} gfzconst16 __aligned(64) =
+{
+       {
+               0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+               0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d
+       },
+       {
+               0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e,
+               0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e
+       },
+       {
+               0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+               0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f
+       }
+};
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
+/*
+ * GENz (triple parity with powers of 2^-1) SSE2 implementation
+ */
+void raid_genz_sse2(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t**)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+
+       raid_sse_begin();
+
+       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfzconst16.poly[0]));
+       asm volatile ("movdqa %0,%%xmm3" : : "m" (gfzconst16.half[0]));
+       asm volatile ("movdqa %0,%%xmm6" : : "m" (gfzconst16.low7[0]));
+
+       for (i = 0; i < size; i += 16) {
+               asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
+               asm volatile ("movdqa %xmm0,%xmm1");
+               asm volatile ("movdqa %xmm0,%xmm2");
+               for (d = l - 1; d >= 0; --d) {
+                       asm volatile ("pxor %xmm4,%xmm4");
+                       asm volatile ("pcmpgtb %xmm1,%xmm4");
+                       asm volatile ("paddb %xmm1,%xmm1");
+                       asm volatile ("pand %xmm7,%xmm4");
+                       asm volatile ("pxor %xmm4,%xmm1");
+
+                       asm volatile ("movdqa %xmm2,%xmm4");
+                       asm volatile ("pxor %xmm5,%xmm5");
+                       asm volatile ("psllw $7,%xmm4");
+                       asm volatile ("psrlw $1,%xmm2");
+                       asm volatile ("pcmpgtb %xmm4,%xmm5");
+                       asm volatile ("pand %xmm6,%xmm2");
+                       asm volatile ("pand %xmm3,%xmm5");
+                       asm volatile ("pxor %xmm5,%xmm2");
+
+                       asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+                       asm volatile ("pxor %xmm4,%xmm0");
+                       asm volatile ("pxor %xmm4,%xmm1");
+                       asm volatile ("pxor %xmm4,%xmm2");
+               }
+               asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+               asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+               asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SSE2)
+/*
+ * GENz (triple parity with powers of 2^-1) SSE2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_genz_sse2ext(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t**)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+
+       raid_sse_begin();
+
+       asm volatile ("movdqa %0,%%xmm7" : : "m" (gfzconst16.poly[0]));
+       asm volatile ("movdqa %0,%%xmm3" : : "m" (gfzconst16.half[0]));
+       asm volatile ("movdqa %0,%%xmm11" : : "m" (gfzconst16.low7[0]));
+
+       for (i = 0; i < size; i += 32) {
+               asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
+               asm volatile ("movdqa %0,%%xmm8" : : "m" (v[l][i + 16]));
+               asm volatile ("movdqa %xmm0,%xmm1");
+               asm volatile ("movdqa %xmm8,%xmm9");
+               asm volatile ("movdqa %xmm0,%xmm2");
+               asm volatile ("movdqa %xmm8,%xmm10");
+               for (d = l - 1; d >= 0; --d) {
+                       asm volatile ("movdqa %xmm2,%xmm6");
+                       asm volatile ("movdqa %xmm10,%xmm14");
+                       asm volatile ("pxor %xmm4,%xmm4");
+                       asm volatile ("pxor %xmm12,%xmm12");
+                       asm volatile ("pxor %xmm5,%xmm5");
+                       asm volatile ("pxor %xmm13,%xmm13");
+                       asm volatile ("psllw $7,%xmm6");
+                       asm volatile ("psllw $7,%xmm14");
+                       asm volatile ("psrlw $1,%xmm2");
+                       asm volatile ("psrlw $1,%xmm10");
+                       asm volatile ("pcmpgtb %xmm1,%xmm4");
+                       asm volatile ("pcmpgtb %xmm9,%xmm12");
+                       asm volatile ("pcmpgtb %xmm6,%xmm5");
+                       asm volatile ("pcmpgtb %xmm14,%xmm13");
+                       asm volatile ("paddb %xmm1,%xmm1");
+                       asm volatile ("paddb %xmm9,%xmm9");
+                       asm volatile ("pand %xmm11,%xmm2");
+                       asm volatile ("pand %xmm11,%xmm10");
+                       asm volatile ("pand %xmm7,%xmm4");
+                       asm volatile ("pand %xmm7,%xmm12");
+                       asm volatile ("pand %xmm3,%xmm5");
+                       asm volatile ("pand %xmm3,%xmm13");
+                       asm volatile ("pxor %xmm4,%xmm1");
+                       asm volatile ("pxor %xmm12,%xmm9");
+                       asm volatile ("pxor %xmm5,%xmm2");
+                       asm volatile ("pxor %xmm13,%xmm10");
+
+                       asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+                       asm volatile ("movdqa %0,%%xmm12" : : "m" (v[d][i + 16]));
+                       asm volatile ("pxor %xmm4,%xmm0");
+                       asm volatile ("pxor %xmm4,%xmm1");
+                       asm volatile ("pxor %xmm4,%xmm2");
+                       asm volatile ("pxor %xmm12,%xmm8");
+                       asm volatile ("pxor %xmm12,%xmm9");
+                       asm volatile ("pxor %xmm12,%xmm10");
+               }
+               asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+               asm volatile ("movntdq %%xmm8,%0" : "=m" (p[i + 16]));
+               asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+               asm volatile ("movntdq %%xmm9,%0" : "=m" (q[i + 16]));
+               asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+               asm volatile ("movntdq %%xmm10,%0" : "=m" (r[i + 16]));
+       }
+
+       raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_AVX2)
+/*
+ * GENz (triple parity with powers of 2^-1) AVX2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_genz_avx2ext(int nd, size_t size, void **vv)
+{
+       uint8_t **v = (uint8_t**)vv;
+       uint8_t *p;
+       uint8_t *q;
+       uint8_t *r;
+       int d, l;
+       size_t i;
+
+       l = nd - 1;
+       p = v[nd];
+       q = v[nd + 1];
+       r = v[nd + 2];
+
+       raid_avx_begin();
+
+       asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfzconst16.poly[0]));
+       asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfzconst16.half[0]));
+       asm volatile ("vbroadcasti128 %0,%%ymm11" : : "m" (gfzconst16.low7[0]));
+       asm volatile ("vpxor %ymm15,%ymm15,%ymm15");
+
+       for (i = 0; i < size; i += 64) {
+               asm volatile ("vmovdqa %0,%%ymm0" : : "m" (v[l][i]));
+               asm volatile ("vmovdqa %0,%%ymm8" : : "m" (v[l][i + 32]));
+               asm volatile ("vmovdqa %ymm0,%ymm1");
+               asm volatile ("vmovdqa %ymm8,%ymm9");
+               asm volatile ("vmovdqa %ymm0,%ymm2");
+               asm volatile ("vmovdqa %ymm8,%ymm10");
+               for (d = l - 1; d >= 0; --d) {
+                       asm volatile ("vpsllw $7,%ymm2,%ymm6");
+                       asm volatile ("vpsllw $7,%ymm10,%ymm14");
+                       asm volatile ("vpsrlw $1,%ymm2,%ymm2");
+                       asm volatile ("vpsrlw $1,%ymm10,%ymm10");
+                       asm volatile ("vpcmpgtb %ymm1,%ymm15,%ymm4");
+                       asm volatile ("vpcmpgtb %ymm9,%ymm15,%ymm12");
+                       asm volatile ("vpcmpgtb %ymm6,%ymm15,%ymm5");
+                       asm volatile ("vpcmpgtb %ymm14,%ymm15,%ymm13");
+                       asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+                       asm volatile ("vpaddb %ymm9,%ymm9,%ymm9");
+                       asm volatile ("vpand %ymm11,%ymm2,%ymm2");
+                       asm volatile ("vpand %ymm11,%ymm10,%ymm10");
+                       asm volatile ("vpand %ymm7,%ymm4,%ymm4");
+                       asm volatile ("vpand %ymm7,%ymm12,%ymm12");
+                       asm volatile ("vpand %ymm3,%ymm5,%ymm5");
+                       asm volatile ("vpand %ymm3,%ymm13,%ymm13");
+                       asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+                       asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+                       asm volatile ("vpxor %ymm5,%ymm2,%ymm2");
+                       asm volatile ("vpxor %ymm13,%ymm10,%ymm10");
+
+                       asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[d][i]));
+                       asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[d][i + 32]));
+                       asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
+                       asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+                       asm volatile ("vpxor %ymm4,%ymm2,%ymm2");
+                       asm volatile ("vpxor %ymm12,%ymm8,%ymm8");
+                       asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+                       asm volatile ("vpxor %ymm12,%ymm10,%ymm10");
+               }
+               asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+               asm volatile ("vmovntdq %%ymm8,%0" : "=m" (p[i + 32]));
+               asm volatile ("vmovntdq %%ymm1,%0" : "=m" (q[i]));
+               asm volatile ("vmovntdq %%ymm9,%0" : "=m" (q[i + 32]));
+               asm volatile ("vmovntdq %%ymm2,%0" : "=m" (r[i]));
+               asm volatile ("vmovntdq %%ymm10,%0" : "=m" (r[i + 32]));
+       }
+
+       raid_avx_end();
+}
+#endif
+
diff --git a/smoke_test b/smoke_test
new file mode 100755 (executable)
index 0000000..076806d
--- /dev/null
@@ -0,0 +1,82 @@
+#!/bin/bash
+#
+# This is a smoke test of bcachefs-tools.
+#
+# It builds the source with multiple options (debug, release, valgrind, FUSE)
+# and runs the test suite.
+#
+# Returns 0 on success, nonzero on any failure.
+#
+# Dependencies:
+#
+# valgrind, python3-pytest, python3-pytest-xdist
+#
+# On debian/ubuntu based systems, install with:
+#
+#   apt install valgrind python3-pytest python3-pytest-xdist
+#
+# You also currently need fuse 3.7 or later.  Fuse 3.7 unfortunately requires
+# debian sid or bullseye at this time, so you may need to install from source.
+
+set -e
+
+spam=$(tempfile)
+unset BCACHEFS_FUSE BCACHEFS_TEST_USE_VALGRIND D
+
+trap "set +x; cat ${spam}; rm -f ${spam} ; echo; echo FAILED." EXIT
+
+echo -- Verify dependencies --
+pkg-config --atleast-version 3.7.0 fuse3
+python3 -c "import pytest"
+python3 -c "import xdist"
+which valgrind > /dev/null
+echo OK
+
+JOBS=$(nproc)
+function build() {
+    echo Building.
+    make -j ${JOBS} clean          > ${spam} 2>&1
+    make -j ${JOBS} tests bcachefs > ${spam} 2>&1
+    truncate -s0 ${spam}
+}
+
+function test() {
+    echo Running tests.
+    (
+        cd tests
+        pytest-3 -n${JOBS}
+    ) > ${spam} 2>&1
+}
+
+function test_vg() {
+    echo Running tests with valgrind.
+    (
+        export BCACHEFS_TEST_USE_VALGRIND=yes
+        cd tests
+        pytest-3 -n${JOBS}
+    ) > ${spam} 2>&1
+}
+
+
+echo -- Test: default --
+build
+test    
+
+echo -- Test: debug --
+export D=1
+build
+test
+
+echo -- Test: debug with valgrind --
+test_vg
+
+echo -- Test: fuse debug --
+export BCACHEFS_FUSE=1
+build
+test
+
+echo -- Test: fuse debug with valgrind --
+test_vg
+
+rm -f ${spam}
+trap "set +x; echo; echo SUCCESS." EXIT
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644 (file)
index 0000000..45c6273
--- /dev/null
@@ -0,0 +1,18 @@
+#!/usr/bin/python3
+#
+# pytest fixture definitions.
+
+import pytest
+import util
+
+@pytest.fixture
+def bfuse(tmpdir):
+    '''A test requesting a "bfuse" is given one via this fixture.'''
+
+    dev = util.format_1g(tmpdir)
+    mnt = util.mountpoint(tmpdir)
+    bf = util.BFuse(dev, mnt)
+
+    yield bf
+
+    bf.unmount(timeout=5.0)
diff --git a/tests/test_basic.py b/tests/test_basic.py
new file mode 100644 (file)
index 0000000..47b621c
--- /dev/null
@@ -0,0 +1,64 @@
+#!/usr/bin/python3
+#
+# Basic bcachefs functionality tests.
+
+import re
+import util
+
+def test_help():
+    ret = util.run_bch(valgrind=True)
+
+    assert ret.returncode == 1
+    assert "missing command" in ret.stdout
+    assert len(ret.stderr) == 0
+
+def test_format(tmpdir):
+    dev = util.device_1g(tmpdir)
+    ret = util.run_bch('format', dev, valgrind=True)
+
+    assert ret.returncode == 0
+    assert len(ret.stdout) > 0
+    assert len(ret.stderr) == 0
+
+def test_fsck(tmpdir):
+    dev = util.format_1g(tmpdir)
+
+    ret = util.run_bch('fsck', dev, valgrind=True)
+
+    assert ret.returncode == 0
+    assert len(ret.stdout) > 0
+    assert len(ret.stderr) == 0
+
+def test_list(tmpdir):
+    dev = util.format_1g(tmpdir)
+
+    ret = util.run_bch('list', dev, valgrind=True)
+
+    assert ret.returncode == 0
+    assert len(ret.stderr) == 0
+    assert "recovering from clean shutdown" in ret.stdout
+    assert len(ret.stdout.splitlines()) == 95
+
+def test_list_inodes(tmpdir):
+    dev = util.format_1g(tmpdir)
+
+    ret = util.run_bch('list', '-b', 'inodes', dev, valgrind=True)
+
+    assert ret.returncode == 0
+    assert len(ret.stderr) == 0
+    assert len(ret.stdout.splitlines()) == (2 + 2) # 2 inodes on clean format
+
+def test_list_dirent(tmpdir):
+    dev = util.format_1g(tmpdir)
+
+    ret = util.run_bch('list', '-b', 'dirents', dev, valgrind=True)
+
+    assert ret.returncode == 0
+    assert len(ret.stderr) == 0
+    assert len(ret.stdout.splitlines()) == (2 + 1) # 1 dirent
+
+    # Example:
+    # u64s 8 type dirent 4096:2449855786607753081
+    # snap 0 len 0 ver 0: lost+found -> 4097
+    last = ret.stdout.splitlines()[-1]
+    assert re.match(r'^.*type dirent.*: lost\+found ->.*$', last)
diff --git a/tests/test_fixture.py b/tests/test_fixture.py
new file mode 100644 (file)
index 0000000..d8d3819
--- /dev/null
@@ -0,0 +1,75 @@
+#!/usr/bin/python3
+#
+# Tests of the functions in util.py
+
+import pytest
+import signal
+import subprocess
+import time
+
+import util
+from pathlib import Path
+
+#helper = Path('.') / 'test_helper'
+helper = './test_helper'
+
+def test_sparse_file(tmpdir):
+    dev = util.sparse_file(tmpdir / '1k', 1024)
+    assert dev.stat().st_size == 1024
+
+def test_device_1g(tmpdir):
+    dev = util.device_1g(tmpdir)
+    assert dev.stat().st_size == 1024**3
+
+def test_abort():
+    ret = util.run(helper, 'abort')
+    assert ret.returncode == -signal.SIGABRT
+
+def test_segfault():
+    ret = util.run(helper, 'segfault')
+    assert ret.returncode == -signal.SIGSEGV
+
+@pytest.mark.skipif(not util.ENABLE_VALGRIND, reason="no valgrind")
+def test_check():
+    with pytest.raises(subprocess.CalledProcessError):
+        ret = util.run(helper, 'abort', check=True)
+
+@pytest.mark.skipif(not util.ENABLE_VALGRIND, reason="no valgrind")
+def test_leak():
+    with pytest.raises(util.ValgrindFailedError):
+        ret = util.run(helper, 'leak', valgrind=True)
+
+@pytest.mark.skipif(not util.ENABLE_VALGRIND, reason="no valgrind")
+def test_undefined():
+    with pytest.raises(util.ValgrindFailedError):
+        ret = util.run(helper, 'undefined', valgrind=True)
+
+@pytest.mark.skipif(not util.ENABLE_VALGRIND, reason="no valgrind")
+def test_undefined_branch():
+    with pytest.raises(util.ValgrindFailedError):
+        ret = util.run(helper, 'undefined_branch', valgrind=True)
+
+@pytest.mark.skipif(not util.ENABLE_VALGRIND, reason="no valgrind")
+def test_read_after_free():
+    with pytest.raises(util.ValgrindFailedError):
+        ret = util.run(helper, 'read_after_free', valgrind=True)
+
+@pytest.mark.skipif(not util.ENABLE_VALGRIND, reason="no valgrind")
+def test_write_after_free():
+    with pytest.raises(util.ValgrindFailedError):
+        ret = util.run(helper, 'write_after_free', valgrind=True)
+
+def test_mountpoint(tmpdir):
+    path = util.mountpoint(tmpdir)
+    assert str(path)[-4:] == '/mnt'
+    assert path.is_dir()
+
+def test_timestamp():
+    t1 = time.clock_gettime(time.CLOCK_REALTIME)
+    with util.Timestamp() as ts:
+        t2 = time.clock_gettime(time.CLOCK_REALTIME)
+    t3 = time.clock_gettime(time.CLOCK_REALTIME)
+
+    assert not ts.contains(t1)
+    assert ts.contains(t2)
+    assert not ts.contains(t3)
diff --git a/tests/test_fuse.py b/tests/test_fuse.py
new file mode 100644 (file)
index 0000000..660d92d
--- /dev/null
@@ -0,0 +1,235 @@
+#!/usr/bin/python3
+#
+# Tests of the fuse mount functionality.
+
+import pytest
+import os
+import util
+
+pytestmark = pytest.mark.skipif(
+    not util.have_fuse(), reason="bcachefs not built with fuse support.")
+
+def test_mount(bfuse):
+    bfuse.mount()
+    bfuse.unmount()
+    bfuse.verify()
+
+def test_remount(bfuse):
+    bfuse.mount()
+    bfuse.unmount()
+    bfuse.mount()
+    bfuse.unmount()
+    bfuse.verify()
+
+def test_lostfound(bfuse):
+    bfuse.mount()
+
+    lf = bfuse.mnt / "lost+found"
+    assert lf.is_dir()
+
+    st = lf.stat()
+    assert st.st_mode == 0o40700
+
+    bfuse.unmount()
+    bfuse.verify()
+
+def test_create(bfuse):
+    bfuse.mount()
+
+    path = bfuse.mnt / "file"
+
+    with util.Timestamp() as ts:
+        fd = os.open(path, os.O_CREAT, 0o700)
+
+    assert fd >= 0
+
+    os.close(fd)
+    assert path.is_file()
+
+    # Verify file.
+    st = path.stat()
+    assert st.st_mode == 0o100700
+    assert st.st_mtime == st.st_ctime
+    assert st.st_mtime == st.st_atime
+    assert ts.contains(st.st_mtime)
+
+    # Verify dir.
+    dst = bfuse.mnt.stat()
+    assert dst.st_mtime == dst.st_ctime
+    assert ts.contains(dst.st_mtime)
+
+    bfuse.unmount()
+    bfuse.verify()
+
+def test_mkdir(bfuse):
+    bfuse.mount()
+
+    path = bfuse.mnt / "dir"
+
+    with util.Timestamp() as ts:
+        os.mkdir(path, 0o700)
+
+    assert path.is_dir()
+
+    # Verify child.
+    st = path.stat()
+    assert st.st_mode == 0o40700
+    assert st.st_mtime == st.st_ctime
+    assert st.st_mtime == st.st_atime
+    assert ts.contains(st.st_mtime)
+
+    # Verify parent.
+    dst = bfuse.mnt.stat()
+    assert dst.st_mtime == dst.st_ctime
+    assert ts.contains(dst.st_mtime)
+
+    bfuse.unmount()
+    bfuse.verify()
+
+def test_unlink(bfuse):
+    bfuse.mount()
+
+    path = bfuse.mnt / "file"
+    path.touch(mode=0o600, exist_ok=False)
+
+    with util.Timestamp() as ts:
+        os.unlink(path)
+
+    assert not path.exists()
+
+    # Verify dir.
+    dst = bfuse.mnt.stat()
+    assert dst.st_mtime == dst.st_ctime
+    assert ts.contains(dst.st_mtime)
+
+    bfuse.unmount()
+    bfuse.verify()
+
+def test_rmdir(bfuse):
+    bfuse.mount()
+
+    path = bfuse.mnt / "dir"
+    path.mkdir(mode=0o700, exist_ok=False)
+
+    with util.Timestamp() as ts:
+        os.rmdir(path)
+
+    assert not path.exists()
+
+    # Verify dir.
+    dst = bfuse.mnt.stat()
+    assert dst.st_mtime == dst.st_ctime
+    assert ts.contains(dst.st_mtime)
+
+    bfuse.unmount()
+    bfuse.verify()
+
+def test_rename(bfuse):
+    bfuse.mount()
+
+    srcdir = bfuse.mnt
+
+    path = srcdir / "file"
+    path.touch(mode=0o600, exist_ok=False)
+
+    destdir = srcdir / "dir"
+    destdir.mkdir(mode=0o700, exist_ok=False)
+
+    destpath = destdir / "file"
+
+    path_pre_st = path.stat()
+
+    with util.Timestamp() as ts:
+        os.rename(path, destpath)
+
+    assert not path.exists()
+    assert destpath.is_file()
+
+    # Verify dirs.
+    src_st = srcdir.stat()
+    assert src_st.st_mtime == src_st.st_ctime
+    assert ts.contains(src_st.st_mtime)
+
+    dest_st = destdir.stat()
+    assert dest_st.st_mtime == dest_st.st_ctime
+    assert ts.contains(dest_st.st_mtime)
+
+    # Verify file.
+    path_post_st = destpath.stat()
+    assert path_post_st.st_mtime == path_pre_st.st_mtime
+    assert path_post_st.st_atime == path_pre_st.st_atime
+    assert ts.contains(path_post_st.st_ctime)
+
+    bfuse.unmount()
+    bfuse.verify()
+
+def test_link(bfuse):
+    bfuse.mount()
+
+    srcdir = bfuse.mnt
+
+    path = srcdir / "file"
+    path.touch(mode=0o600, exist_ok=False)
+
+    destdir = srcdir / "dir"
+    destdir.mkdir(mode=0o700, exist_ok=False)
+
+    destpath = destdir / "file"
+
+    path_pre_st = path.stat()
+    srcdir_pre_st = srcdir.stat()
+
+    with util.Timestamp() as ts:
+        os.link(path, destpath)
+
+    assert path.exists()
+    assert destpath.is_file()
+
+    # Verify source dir is unchanged.
+    srcdir_post_st = srcdir.stat()
+    assert srcdir_pre_st == srcdir_post_st
+
+    # Verify dest dir.
+    destdir_st = destdir.stat()
+    assert destdir_st.st_mtime == destdir_st.st_ctime
+    assert ts.contains(destdir_st.st_mtime)
+
+    # Verify file.
+    path_post_st = path.stat()
+    destpath_post_st = destpath.stat()
+    assert path_post_st == destpath_post_st
+
+    assert path_post_st.st_mtime == path_pre_st.st_mtime
+    assert path_post_st.st_atime == path_pre_st.st_atime
+    assert ts.contains(path_post_st.st_ctime)
+
+    bfuse.unmount()
+    bfuse.verify()
+
+def test_write(bfuse):
+    bfuse.mount()
+
+    path = bfuse.mnt / "file"
+    path.touch(mode=0o600, exist_ok=False)
+
+    pre_st = path.stat()
+
+    fd = os.open(path, os.O_WRONLY)
+    assert fd >= 0
+
+    with util.Timestamp() as ts:
+        written = os.write(fd, b'test')
+
+    os.close(fd)
+
+    assert written == 4
+
+    post_st = path.stat()
+    assert post_st.st_atime == pre_st.st_atime
+    assert post_st.st_mtime == post_st.st_ctime
+    assert ts.contains(post_st.st_mtime)
+
+    assert path.read_bytes() == b'test'
+
+    bfuse.unmount()
+    bfuse.verify()
diff --git a/tests/test_helper.c b/tests/test_helper.c
new file mode 100644 (file)
index 0000000..c7604f0
--- /dev/null
@@ -0,0 +1,103 @@
+#include <assert.h>
+#include <malloc.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void trick_compiler(int *x);
+
+static void test_abort(void)
+{
+       abort();
+}
+
+static void test_segfault(void)
+{
+       raise(SIGSEGV);
+}
+
+static void test_leak(void)
+{
+       int *p = malloc(sizeof *p);
+       trick_compiler(p);
+}
+
+static void test_undefined(void)
+{
+       int *p = malloc(1);
+       printf("%d\n", *p);
+}
+
+static void test_undefined_branch(void)
+{
+               int x;
+       trick_compiler(&x);
+
+       if (x)
+               printf("1\n");
+       else
+               printf("0\n");
+}
+
+static void test_read_after_free(void)
+{
+       int *p = malloc(sizeof *p);
+       free(p);
+
+       printf("%d\n", *p);
+}
+
+static void test_write_after_free(void)
+{
+       int *p = malloc(sizeof *p);
+       free(p);
+
+       printf("%d\n", *p);
+}
+
+typedef void (*test_fun)(void);
+
+struct test {
+       const char      *name;
+       test_fun        fun;
+};
+
+#define TEST(f) { .name = #f, .fun = test_##f, }
+static struct test tests[] = {
+       TEST(abort),
+       TEST(segfault),
+       TEST(leak),
+       TEST(undefined),
+       TEST(undefined_branch),
+       TEST(read_after_free),
+       TEST(write_after_free),
+};
+#define ntests (sizeof tests / sizeof *tests)
+
+int main(int argc, char *argv[])
+{
+       int i;
+
+       if (argc != 2) {
+               fprintf(stderr, "Usage: test_helper <test>\n");
+               exit(1);
+       }
+
+       bool found = false;
+       for (i = 0; i < ntests; ++i)
+               if (!strcmp(argv[1], tests[i].name)) {
+                       found = true;
+                       printf("Running test: %s\n", tests[i].name);
+                       tests[i].fun();
+                       break;
+               }
+
+       if (!found) {
+               fprintf(stderr, "Unable to find test: %s\n", argv[1]);
+               exit(1);
+       }
+
+       return 0;
+}
diff --git a/tests/test_helper_trick.c b/tests/test_helper_trick.c
new file mode 100644 (file)
index 0000000..8bc11fd
--- /dev/null
@@ -0,0 +1,8 @@
+/*
+ * Prevent compiler from optimizing away a variable by referencing it from
+ * another compilation unit.
+ */
+void
+trick_compiler(int *x)
+{
+}
diff --git a/tests/util.py b/tests/util.py
new file mode 100644 (file)
index 0000000..c4d7431
--- /dev/null
@@ -0,0 +1,238 @@
+#!/usr/bin/python3
+
+import os
+import pytest
+import re
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+
+from pathlib import Path
+
+DIR = Path('..')
+BCH_PATH = DIR / 'bcachefs'
+
+VPAT = re.compile(r'ERROR SUMMARY: (\d+) errors from (\d+) contexts')
+
+ENABLE_VALGRIND = os.getenv('BCACHEFS_TEST_USE_VALGRIND', 'no') == 'yes'
+
+class ValgrindFailedError(Exception):
+    def __init__(self, log):
+        self.log = log
+
+def check_valgrind(log):
+    m = VPAT.search(log)
+    if m is None:
+        print('Internal error: valgrind log did not match.')
+        print('-- valgrind log:')
+        print(log)
+        print('-- end log --')
+        assert False
+
+    errors = int(m.group(1))
+    if errors > 0:
+        raise ValgrindFailedError(log)
+
+def run(cmd, *args, valgrind=False, check=False):
+    """Run an external program via subprocess, optionally with valgrind.
+
+    This subprocess wrapper will capture the stdout and stderr. If valgrind is
+    requested, it will be checked for errors and raise a
+    ValgrindFailedError if there's a problem.
+    """
+    cmds = [cmd] + list(args)
+    valgrind = valgrind and ENABLE_VALGRIND
+
+    if valgrind:
+        vout = tempfile.NamedTemporaryFile()
+        vcmd = ['valgrind',
+               '--leak-check=full',
+               '--log-file={}'.format(vout.name)]
+        cmds = vcmd + cmds
+
+    print("Running '{}'".format(cmds))
+    res = subprocess.run(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                         encoding='utf-8', check=check)
+
+    if valgrind:
+        check_valgrind(vout.read().decode('utf-8'))
+
+    return res
+
+def run_bch(*args, **kwargs):
+    """Wrapper to run the bcachefs binary specifically."""
+    cmds = [BCH_PATH] + list(args)
+    return run(*cmds, **kwargs)
+
+def sparse_file(lpath, size):
+    """Construct a sparse file of the specified size.
+
+    This is typically used to create device files for bcachefs.
+    """
+    path = Path(lpath)
+    f = path.touch(mode = 0o600, exist_ok = False)
+    os.truncate(path, size)
+
+    return path
+
+def device_1g(tmpdir):
+    """Default 1g sparse file for use with bcachefs."""
+    path = tmpdir / 'dev-1g'
+    return sparse_file(path, 1024**3)
+
+def format_1g(tmpdir):
+    """Format a default filesystem on a 1g device."""
+    dev = device_1g(tmpdir)
+    run_bch('format', dev, check=True)
+    return dev
+
+def mountpoint(tmpdir):
+    """Construct a mountpoint "mnt" for tests."""
+    path = Path(tmpdir) / 'mnt'
+    path.mkdir(mode = 0o700)
+    return path
+
+class Timestamp:
+    '''Context manager to assist in verifying timestamps.
+
+    Records the range of times which would be valid for an encoded operation to
+    use.
+
+    FIXME: The kernel code is currently using CLOCK_REALTIME_COARSE, but python
+    didn't expose this time API (yet).  Probably the kernel shouldn't be using
+    _COARSE anyway, but this might lead to occasional errors.
+
+    To make sure this doesn't happen, we sleep a fraction of a second in an
+    attempt to guarantee containment.
+
+    N.B. this might be better tested by overriding the clock used in bcachefs.
+
+    '''
+    def __init__(self):
+        self.start = None
+        self.end = None
+
+    def __enter__(self):
+        self.start = time.clock_gettime(time.CLOCK_REALTIME)
+        time.sleep(0.1)
+        return self
+
+    def __exit__(self, type, value, traceback):
+        time.sleep(0.1)
+        self.end = time.clock_gettime(time.CLOCK_REALTIME)
+
+    def contains(self, test):
+        '''True iff the test time is within the range.'''
+        return self.start <= test <= self.end
+
+class FuseError(Exception):
+    def __init__(self, msg):
+        self.msg = msg
+
+class BFuse:
+    '''bcachefs fuse runner.
+
+    This class runs bcachefs in fusemount mode, and waits until the mount has
+    reached a point suitable for testing the filesystem.
+
+    bcachefs is run under valgrind by default, and is checked for errors.
+    '''
+
+    def __init__(self, dev, mnt):
+        self.thread = None
+        self.dev = dev
+        self.mnt = mnt
+        self.ready = threading.Event()
+        self.proc = None
+        self.returncode = None
+        self.stdout = None
+        self.stderr = None
+        self.vout = None
+
+    def run(self):
+        """Background thread which runs "bcachefs fusemount" under valgrind"""
+
+        vlog = None
+        cmd = []
+
+        if ENABLE_VALGRIND:
+            vlog = tempfile.NamedTemporaryFile()
+            cmd += [ 'valgrind',
+                     '--leak-check=full',
+                     '--log-file={}'.format(vlog.name) ]
+
+        cmd += [ BCH_PATH,
+                 'fusemount', '-f', self.dev, self.mnt]
+
+        print("Running {}".format(cmd))
+
+        err = tempfile.TemporaryFile()
+        self.proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=err,
+                                     encoding='utf-8')
+
+        out1 = self.expect(self.proc.stdout, r'^Fuse mount initialized.$')
+        self.ready.set()
+
+        print("Waiting for process.")
+        (out2, _) = self.proc.communicate()
+        print("Process exited.")
+
+        self.stdout = out1 + out2
+        self.stderr = err.read()
+        self.returncode = self.proc.returncode
+        self.vout = vlog.read().decode('utf-8')
+
+    def expect(self, pipe, regex):
+        """Wait for the child process to mount."""
+
+        c = re.compile(regex)
+
+        out = ""
+        for line in pipe:
+            print('Expect line "{}"'.format(line.rstrip()))
+            out += line
+            if c.match(line):
+                print("Matched.")
+                return out
+
+        raise FuseError('stdout did not contain regex "{}"'.format(regex))
+
+    def mount(self):
+        print("Starting fuse thread.")
+
+        assert not self.thread
+        self.thread = threading.Thread(target=self.run)
+        self.thread.start()
+
+        self.ready.wait()
+        print("Fuse is mounted.")
+
+    def unmount(self, timeout=None):
+        print("Unmounting fuse.")
+        run("fusermount3", "-zu", self.mnt)
+
+        if self.thread:
+            print("Waiting for thread to exit.")
+            self.thread.join(timeout)
+            if self.thread.is_alive():
+                self.proc.kill()
+                self.thread.join()
+        else:
+            print("Thread was already done.")
+
+        self.thread = None
+        self.ready.clear()
+
+        if self.vout:
+            check_valgrind(self.vout)
+
+    def verify(self):
+        assert self.returncode == 0
+        assert len(self.stdout) > 0
+        assert len(self.stderr) == 0
+
+def have_fuse():
+    res = run(BCH_PATH, 'fusemount', valgrind=False)
+    return "Please supply a mountpoint." in res.stdout
diff --git a/tools-util.c b/tools-util.c
new file mode 100644 (file)
index 0000000..88e923c
--- /dev/null
@@ -0,0 +1,665 @@
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/fs.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <blkid.h>
+#include <uuid/uuid.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "linux/sort.h"
+#include "tools-util.h"
+#include "libbcachefs/util.h"
+
+void die(const char *fmt, ...)
+{
+       va_list args;
+
+       va_start(args, fmt);
+       vfprintf(stderr, fmt, args);
+       va_end(args);
+       fputc('\n', stderr);
+
+       _exit(EXIT_FAILURE);
+}
+
+char *mprintf(const char *fmt, ...)
+{
+       va_list args;
+       char *str;
+       int ret;
+
+       va_start(args, fmt);
+       ret = vasprintf(&str, fmt, args);
+       va_end(args);
+
+       if (ret < 0)
+               die("insufficient memory");
+
+       return str;
+}
+
+void *xcalloc(size_t count, size_t size)
+{
+       void *p = calloc(count, size);
+
+       if (!p)
+               die("insufficient memory");
+
+       return p;
+}
+
+void *xmalloc(size_t size)
+{
+       void *p = malloc(size);
+
+       if (!p)
+               die("insufficient memory");
+
+       memset(p, 0, size);
+       return p;
+}
+
+void *xrealloc(void *p, size_t size)
+{
+       p = realloc(p, size);
+       if (!p)
+               die("insufficient memory");
+
+       return p;
+}
+
+void xpread(int fd, void *buf, size_t count, off_t offset)
+{
+       while (count) {
+               ssize_t r = pread(fd, buf, count, offset);
+
+               if (r < 0)
+                       die("read error: %m");
+               if (!r)
+                       die("pread error: unexpected eof");
+               count   -= r;
+               offset  += r;
+       }
+}
+
+void xpwrite(int fd, const void *buf, size_t count, off_t offset)
+{
+       ssize_t r = pwrite(fd, buf, count, offset);
+
+       if (r != count)
+               die("write error (ret %zi err %m)", r);
+}
+
+struct stat xfstatat(int dirfd, const char *path, int flags)
+{
+       struct stat stat;
+       if (fstatat(dirfd, path, &stat, flags))
+               die("stat error: %m");
+       return stat;
+}
+
+struct stat xfstat(int fd)
+{
+       struct stat stat;
+       if (fstat(fd, &stat))
+               die("stat error: %m");
+       return stat;
+}
+
+struct stat xstat(const char *path)
+{
+       struct stat statbuf;
+       if (stat(path, &statbuf))
+               die("stat error statting %s: %m", path);
+       return statbuf;
+}
+
+/* Formatting: */
+
+int printf_pad(unsigned pad, const char * fmt, ...)
+{
+       va_list args;
+       int ret;
+
+       va_start(args, fmt);
+       ret = vprintf(fmt, args);
+       va_end(args);
+
+       while (ret++ < pad)
+              putchar(' ');
+
+       return ret;
+}
+
+struct units_buf __pr_units(s64 _v, enum units units)
+{
+       struct units_buf ret;
+       char *out = ret.b, *end = out + sizeof(ret.b);
+       u64 v = _v;
+
+       if (_v < 0) {
+               out += scnprintf(out, end - out, "-");
+               v = -_v;
+       }
+
+       switch (units) {
+       case BYTES:
+               snprintf(out, end - out, "%llu", v << 9);
+               break;
+       case SECTORS:
+               snprintf(out, end - out, "%llu", v);
+               break;
+       case HUMAN_READABLE:
+               v <<= 9;
+
+               if (v >= 1024) {
+                       int exp = log(v) / log(1024);
+                       snprintf(out, end - out, "%.1f%c",
+                                v / pow(1024, exp),
+                                "KMGTPE"[exp-1]);
+               } else {
+                       snprintf(out, end - out, "%llu", v);
+               }
+
+               break;
+       }
+
+       return ret;
+}
+
+/* Argument parsing stuff: */
+
+/* File parsing (i.e. sysfs) */
+
+char *read_file_str(int dirfd, const char *path)
+{
+       int fd = xopenat(dirfd, path, O_RDONLY);
+       ssize_t len = xfstat(fd).st_size;
+
+       char *buf = xmalloc(len + 1);
+
+       len = read(fd, buf, len);
+       if (len < 0)
+               die("read error: %m");
+
+       buf[len] = '\0';
+       if (len && buf[len - 1] == '\n')
+               buf[len - 1] = '\0';
+       if (!strlen(buf)) {
+               free(buf);
+               buf = NULL;
+       }
+
+       close(fd);
+
+       return buf;
+}
+
+u64 read_file_u64(int dirfd, const char *path)
+{
+       char *buf = read_file_str(dirfd, path);
+       u64 v;
+       if (kstrtou64(buf, 10, &v))
+               die("read_file_u64: error parsing %s (got %s)", path, buf);
+       free(buf);
+       return v;
+}
+
+/* String list options: */
+
+ssize_t read_string_list_or_die(const char *opt, const char * const list[],
+                               const char *msg)
+{
+       ssize_t v = match_string(list, -1, opt);
+       if (v < 0)
+               die("Bad %s %s", msg, opt);
+
+       return v;
+}
+
+/* Returns size of file or block device: */
+u64 get_size(const char *path, int fd)
+{
+       struct stat statbuf = xfstat(fd);
+
+       if (!S_ISBLK(statbuf.st_mode))
+               return statbuf.st_size;
+
+       u64 ret;
+       xioctl(fd, BLKGETSIZE64, &ret);
+       return ret;
+}
+
+/* Returns blocksize in units of 512 byte sectors: */
+unsigned get_blocksize(const char *path, int fd)
+{
+       struct stat statbuf = xfstat(fd);
+
+       if (!S_ISBLK(statbuf.st_mode))
+               return statbuf.st_blksize >> 9;
+
+       unsigned ret;
+       xioctl(fd, BLKPBSZGET, &ret);
+       return ret >> 9;
+}
+
+/* Open a block device, do magic blkid stuff to probe for existing filesystems: */
+int open_for_format(const char *dev, bool force)
+{
+       blkid_probe pr;
+       const char *fs_type = NULL, *fs_label = NULL;
+       size_t fs_type_len, fs_label_len;
+
+       int fd = xopen(dev, O_RDWR|O_EXCL);
+
+       if (force)
+               return fd;
+
+       if (!(pr = blkid_new_probe()))
+               die("blkid error 1");
+       if (blkid_probe_set_device(pr, fd, 0, 0))
+               die("blkid error 2");
+       if (blkid_probe_enable_partitions(pr, true))
+               die("blkid error 3");
+       if (blkid_do_fullprobe(pr) < 0)
+               die("blkid error 4");
+
+       blkid_probe_lookup_value(pr, "TYPE", &fs_type, &fs_type_len);
+       blkid_probe_lookup_value(pr, "LABEL", &fs_label, &fs_label_len);
+
+       if (fs_type) {
+               if (fs_label)
+                       printf("%s contains a %s filesystem labelled '%s'\n",
+                              dev, fs_type, fs_label);
+               else
+                       printf("%s contains a %s filesystem\n",
+                              dev, fs_type);
+               fputs("Proceed anyway?", stdout);
+               if (!ask_yn())
+                       exit(EXIT_FAILURE);
+       }
+
+       blkid_free_probe(pr);
+       return fd;
+}
+
+bool ask_yn(void)
+{
+       const char *short_yes = "yY";
+       char *buf = NULL;
+       size_t buflen = 0;
+       bool ret;
+
+       fputs(" (y,n) ", stdout);
+       fflush(stdout);
+
+       if (getline(&buf, &buflen, stdin) < 0)
+               die("error reading from standard input");
+
+       ret = strchr(short_yes, buf[0]);
+       free(buf);
+       return ret;
+}
+
+static int range_cmp(const void *_l, const void *_r)
+{
+       const struct range *l = _l, *r = _r;
+
+       if (l->start < r->start)
+               return -1;
+       if (l->start > r->start)
+               return  1;
+       return 0;
+}
+
+void ranges_sort_merge(ranges *r)
+{
+       struct range *t, *i;
+       ranges tmp = { NULL };
+
+       sort(&darray_item(*r, 0), darray_size(*r),
+            sizeof(darray_item(*r, 0)), range_cmp, NULL);
+
+       /* Merge contiguous ranges: */
+       darray_foreach(i, *r) {
+               t = tmp.size ?  &tmp.item[tmp.size - 1] : NULL;
+
+               if (t && t->end >= i->start)
+                       t->end = max(t->end, i->end);
+               else
+                       darray_append(tmp, *i);
+       }
+
+       darray_free(*r);
+       *r = tmp;
+}
+
+void ranges_roundup(ranges *r, unsigned block_size)
+{
+       struct range *i;
+
+       darray_foreach(i, *r) {
+               i->start = round_down(i->start, block_size);
+               i->end  = round_up(i->end, block_size);
+       }
+}
+
+void ranges_rounddown(ranges *r, unsigned block_size)
+{
+       struct range *i;
+
+       darray_foreach(i, *r) {
+               i->start = round_up(i->start, block_size);
+               i->end  = round_down(i->end, block_size);
+               i->end  = max(i->end, i->start);
+       }
+}
+
+struct fiemap_extent fiemap_iter_next(struct fiemap_iter *iter)
+{
+       struct fiemap_extent e;
+
+       BUG_ON(iter->idx > iter->f.fm_mapped_extents);
+
+       if (iter->idx == iter->f.fm_mapped_extents) {
+               xioctl(iter->fd, FS_IOC_FIEMAP, &iter->f);
+
+               if (!iter->f.fm_mapped_extents)
+                       return (struct fiemap_extent) { .fe_length = 0 };
+
+               iter->idx = 0;
+       }
+
+       e = iter->f.fm_extents[iter->idx++];
+       BUG_ON(!e.fe_length);
+
+       iter->f.fm_start = e.fe_logical + e.fe_length;
+
+       return e;
+}
+
+char *strcmp_prefix(char *a, const char *a_prefix)
+{
+       while (*a_prefix && *a == *a_prefix) {
+               a++;
+               a_prefix++;
+       }
+       return *a_prefix ? NULL : a;
+}
+
+unsigned hatoi_validate(const char *s, const char *msg)
+{
+       u64 v;
+
+       if (bch2_strtoull_h(s, &v))
+               die("bad %s %s", msg, s);
+
+       v /= 512;
+
+       if (v > USHRT_MAX)
+               die("%s too large\n", msg);
+
+       if (!v)
+               die("%s too small\n", msg);
+
+       return v;
+}
+
+/* crc32c */
+
+static u32 crc32c_default(u32 crc, const void *buf, size_t size)
+{
+       static const u32 crc32c_tab[] = {
+               0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
+               0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+               0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
+               0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+               0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
+               0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+               0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
+               0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+               0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
+               0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+               0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
+               0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+               0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
+               0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+               0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
+               0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+               0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
+               0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+               0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
+               0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+               0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
+               0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+               0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
+               0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+               0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
+               0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+               0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
+               0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+               0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
+               0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+               0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
+               0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+               0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
+               0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+               0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
+               0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+               0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
+               0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+               0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
+               0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+               0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
+               0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+               0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
+               0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+               0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
+               0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+               0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
+               0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+               0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
+               0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+               0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
+               0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+               0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
+               0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+               0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
+               0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+               0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
+               0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+               0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
+               0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+               0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
+               0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+               0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
+               0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
+       };
+       const u8 *p = buf;
+
+       while (size--)
+               crc = crc32c_tab[(crc ^ *p++) & 0xFFL] ^ (crc >> 8);
+
+       return crc;
+}
+
+#include <linux/compiler.h>
+
+#ifdef __x86_64__
+
+#ifdef CONFIG_X86_64
+#define REX_PRE "0x48, "
+#else
+#define REX_PRE
+#endif
+
+static u32 crc32c_sse42(u32 crc, const void *buf, size_t size)
+{
+       while (size >= sizeof(long)) {
+               const unsigned long *d = buf;
+
+               __asm__ __volatile__(
+                       ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
+                       :"=S"(crc)
+                       :"0"(crc), "c"(*d)
+               );
+               buf     += sizeof(long);
+               size    -= sizeof(long);
+       }
+
+       while (size) {
+               const u8 *d = buf;
+
+               __asm__ __volatile__(
+                       ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
+                       :"=S"(crc)
+                       :"0"(crc), "c"(*d)
+               );
+               buf     += 1;
+               size    -= 1;
+       }
+
+       return crc;
+}
+
+#endif
+
+static void *resolve_crc32c(void)
+{
+#ifdef __x86_64__
+       if (__builtin_cpu_supports("sse4.2"))
+               return crc32c_sse42;
+#endif
+       return crc32c_default;
+}
+
+/*
+ * ifunc is buggy and I don't know what breaks it (LTO?)
+ */
+#ifdef HAVE_WORKING_IFUNC
+
+static void *ifunc_resolve_crc32c(void)
+{
+       __builtin_cpu_init();
+
+       return resolve_crc32c
+}
+
+u32 crc32c(u32, const void *, size_t)
+       __attribute__((ifunc("ifunc_resolve_crc32c")));
+
+#else
+
+u32 crc32c(u32 crc, const void *buf, size_t size)
+{
+       static u32 (*real_crc32c)(u32, const void *, size_t);
+
+       if (unlikely(!real_crc32c))
+               real_crc32c = resolve_crc32c();
+
+       return real_crc32c(crc, buf, size);
+}
+
+#endif /* HAVE_WORKING_IFUNC */
+
+char *dev_to_name(dev_t dev)
+{
+       char *line = NULL, *name = NULL;
+       size_t n = 0;
+
+       FILE *f = fopen("/proc/partitions", "r");
+       if (!f)
+               die("error opening /proc/partitions: %m");
+
+       while (getline(&line, &n, f) != -1) {
+               unsigned ma, mi;
+               u64 sectors;
+
+               name = realloc(name, n + 1);
+
+               if (sscanf(line, " %u %u %llu %s", &ma, &mi, &sectors, name) == 4 &&
+                   ma == major(dev) && mi == minor(dev))
+                       goto found;
+       }
+
+       free(name);
+       name = NULL;
+found:
+       fclose(f);
+       free(line);
+       return name;
+}
+
+char *dev_to_path(dev_t dev)
+{
+       char *name = dev_to_name(dev);
+       if (!name)
+               return NULL;
+
+       char *path = mprintf("/dev/%s", name);
+
+       free(name);
+       return path;
+}
+
+struct mntent *dev_to_mount(char *dev)
+{
+       struct mntent *mnt, *ret = NULL;
+       FILE *f = setmntent("/proc/mounts", "r");
+       if (!f)
+               die("error opening /proc/mounts: %m");
+
+       struct stat d1 = xstat(dev);
+
+       while ((mnt = getmntent(f))) {
+               char *d, *p = mnt->mnt_fsname;
+
+               while ((d = strsep(&p, ":"))) {
+                       struct stat d2;
+
+                       if (stat(d, &d2))
+                               continue;
+
+                       if (S_ISBLK(d1.st_mode) != S_ISBLK(d2.st_mode))
+                               continue;
+
+                       if (S_ISBLK(d1.st_mode)) {
+                               if (d1.st_rdev != d2.st_rdev)
+                                       continue;
+                       } else {
+                               if (d1.st_dev != d2.st_dev ||
+                                   d1.st_ino != d2.st_ino)
+                                       continue;
+                       }
+
+                       ret = mnt;
+                       goto found;
+               }
+       }
+found:
+       fclose(f);
+       return ret;
+}
+
+int dev_mounted(char *dev)
+{
+       struct mntent *mnt = dev_to_mount(dev);
+
+       if (!mnt)
+               return 0;
+       if (hasmntopt(mnt, "ro"))
+               return 1;
+       return 2;
+}
diff --git a/tools-util.h b/tools-util.h
new file mode 100644 (file)
index 0000000..d6814bc
--- /dev/null
@@ -0,0 +1,175 @@
+#ifndef _TOOLS_UTIL_H
+#define _TOOLS_UTIL_H
+
+#include <errno.h>
+#include <mntent.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <linux/bug.h>
+#include <linux/byteorder.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include "ccan/darray/darray.h"
+
+void die(const char *, ...);
+char *mprintf(const char *, ...)
+       __attribute__ ((format (printf, 1, 2)));
+void *xcalloc(size_t, size_t);
+void *xmalloc(size_t);
+void *xrealloc(void *, size_t);
+void xpread(int, void *, size_t, off_t);
+void xpwrite(int, const void *, size_t, off_t);
+struct stat xfstatat(int, const char *, int);
+struct stat xfstat(int);
+struct stat xstat(const char *);
+
+#define xopenat(_dirfd, _path, ...)                                    \
+({                                                                     \
+       int _fd = openat((_dirfd), (_path), __VA_ARGS__);               \
+       if (_fd < 0)                                                    \
+               die("Error opening %s: %m", (_path));                   \
+       _fd;                                                            \
+})
+
+#define xopen(...)     xopenat(AT_FDCWD, __VA_ARGS__)
+
+#define xioctl(_fd, _nr, ...)                                          \
+({                                                                     \
+       int _ret = ioctl((_fd), (_nr), ##__VA_ARGS__);                  \
+       if (_ret < 0)                                                   \
+               die(#_nr " ioctl error: %m");                           \
+       _ret;                                                           \
+})
+
+int printf_pad(unsigned pad, const char * fmt, ...);
+
+enum units {
+       BYTES,
+       SECTORS,
+       HUMAN_READABLE,
+};
+
+struct units_buf __pr_units(s64, enum units);
+
+struct units_buf {
+       char    b[20];
+};
+
+#define pr_units(_v, _u)       &(__pr_units(_v, _u).b[0])
+
+char *read_file_str(int, const char *);
+u64 read_file_u64(int, const char *);
+
+ssize_t read_string_list_or_die(const char *, const char * const[],
+                               const char *);
+
+u64 get_size(const char *, int);
+unsigned get_blocksize(const char *, int);
+int open_for_format(const char *, bool);
+
+bool ask_yn(void);
+
+struct range {
+       u64             start;
+       u64             end;
+};
+
+typedef darray(struct range) ranges;
+
+static inline void range_add(ranges *data, u64 offset, u64 size)
+{
+       darray_append(*data, (struct range) {
+               .start = offset,
+               .end = offset + size
+       });
+}
+
+void ranges_sort_merge(ranges *);
+void ranges_roundup(ranges *, unsigned);
+void ranges_rounddown(ranges *, unsigned);
+
+struct hole_iter {
+       ranges          r;
+       size_t          idx;
+       u64             end;
+};
+
+static inline struct range hole_iter_next(struct hole_iter *iter)
+{
+       struct range r = {
+               .start  = iter->idx ? iter->r.item[iter->idx - 1].end : 0,
+               .end    = iter->idx < iter->r.size
+                       ? iter->r.item[iter->idx].start : iter->end,
+       };
+
+       BUG_ON(r.start > r.end);
+
+       iter->idx++;
+       return r;
+}
+
+#define for_each_hole(_iter, _ranges, _end, _i)                                \
+       for (_iter = (struct hole_iter) { .r = _ranges, .end = _end };  \
+            (_iter.idx <= _iter.r.size &&                              \
+             (_i = hole_iter_next(&_iter), true));)
+
+#include <linux/fiemap.h>
+
+struct fiemap_iter {
+       struct fiemap           f;
+       struct fiemap_extent    fe[1024];
+       unsigned                idx;
+       int                     fd;
+};
+
+static inline void fiemap_iter_init(struct fiemap_iter *iter, int fd)
+{
+       memset(iter, 0, sizeof(*iter));
+
+       iter->f.fm_extent_count = ARRAY_SIZE(iter->fe);
+       iter->f.fm_length       = FIEMAP_MAX_OFFSET;
+       iter->fd                = fd;
+}
+
+struct fiemap_extent fiemap_iter_next(struct fiemap_iter *);
+
+#define fiemap_for_each(fd, iter, extent)                              \
+       for (fiemap_iter_init(&iter, fd);                               \
+            (extent = fiemap_iter_next(&iter)).fe_length;)
+
+char *strcmp_prefix(char *, const char *);
+
+unsigned hatoi_validate(const char *, const char *);
+
+u32 crc32c(u32, const void *, size_t);
+
+char *dev_to_name(dev_t);
+char *dev_to_path(dev_t);
+struct mntent *dev_to_mount(char *);
+int dev_mounted(char *);
+
+#define args_shift(_nr)                                                        \
+do {                                                                   \
+       unsigned _n = min((_nr), argc);                                 \
+       argc -= _n;                                                     \
+       argv += _n;                                                     \
+} while (0)
+
+#define arg_pop()                                                      \
+({                                                                     \
+       char *_ret = argc ? argv[0] : NULL;                             \
+       if (_ret)                                                       \
+               args_shift(1);                                          \
+       _ret;                                                           \
+})
+
+#endif /* _TOOLS_UTIL_H */