X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fsuper-io.c;h=3903b730bba31bd8f0f61433109a0fe76d4e9095;hb=700d013b5280b72a1fb3830d8f70ecce5decb0ab;hp=83523572881a9890ea1bc7a8ac8158e64ca66552;hpb=cc6479303f0672d5e07899009c64eb32821e170b;p=bcachefs-tools-debian diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 8352357..3903b73 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -1,10 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "btree_update_interior.h" +#include "buckets.h" #include "checksum.h" #include "disk_groups.h" +#include "ec.h" #include "error.h" #include "io.h" #include "journal.h" +#include "journal_io.h" +#include "journal_seq_blacklist.h" #include "replicas.h" #include "quota.h" #include "super-io.h" @@ -44,10 +50,11 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb, unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0; unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s; - BUG_ON(get_order(__vstruct_bytes(struct bch_sb, sb_u64s)) > - sb->page_order); + BUG_ON(__vstruct_bytes(struct bch_sb, sb_u64s) > sb->buffer_size); - if (!f) { + if (!f && !u64s) { + /* nothing to do: */ + } else if (!f) { f = vstruct_last(sb->sb); memset(f, 0, sizeof(u64) * u64s); f->u64s = cpu_to_le32(u64s); @@ -93,18 +100,23 @@ void bch2_free_super(struct bch_sb_handle *sb) if (!IS_ERR_OR_NULL(sb->bdev)) blkdev_put(sb->bdev, sb->mode); - free_pages((unsigned long) sb->sb, sb->page_order); + kfree(sb->sb); memset(sb, 0, sizeof(*sb)); } int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) { size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s); - unsigned order = get_order(new_bytes); + size_t new_buffer_size; struct bch_sb *new_sb; struct bio *bio; - if (sb->sb && sb->page_order >= order) + if (sb->bdev) + new_bytes = max_t(size_t, new_bytes, bdev_logical_block_size(sb->bdev)); + + new_buffer_size = roundup_pow_of_two(new_bytes); + + if (sb->sb && sb->buffer_size >= new_buffer_size) return 0; if (sb->have_layout) { @@ -119,14 +131,15 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) } } - if (sb->page_order >= order && sb->sb) + if (sb->buffer_size >= new_buffer_size && sb->sb) return 0; if (dynamic_fault("bcachefs:add:super_realloc")) return -ENOMEM; if (sb->have_bio) { - bio = bio_kmalloc(GFP_KERNEL, 1 << order); + bio = bio_kmalloc(GFP_KERNEL, + DIV_ROUND_UP(new_buffer_size, PAGE_SIZE)); if (!bio) return -ENOMEM; @@ -135,17 +148,12 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) sb->bio = bio; } - new_sb = (void *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, order); + new_sb = krealloc(sb->sb, new_buffer_size, GFP_NOFS|__GFP_ZERO); if (!new_sb) return -ENOMEM; - if (sb->sb) - memcpy(new_sb, sb->sb, PAGE_SIZE << sb->page_order); - - free_pages((unsigned long) sb->sb, sb->page_order); sb->sb = new_sb; - - sb->page_order = order; + sb->buffer_size = new_buffer_size; return 0; } @@ -180,6 +188,7 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb, } } + f = bch2_sb_field_get(sb->sb, type); f = __bch2_sb_field_resize(sb, f, u64s); if (f) f->type = cpu_to_le32(type); @@ -231,21 +240,25 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) struct bch_sb_field *f; struct bch_sb_field_members *mi; const char *err; + u32 version, version_min; u16 block_size; - if (le64_to_cpu(sb->version) < BCH_SB_VERSION_MIN || - le64_to_cpu(sb->version) > BCH_SB_VERSION_MAX) - return"Unsupported superblock version"; + version = le16_to_cpu(sb->version); + version_min = version >= bcachefs_metadata_version_new_versioning + ? le16_to_cpu(sb->version_min) + : version; + + if (version >= bcachefs_metadata_version_max || + version_min < bcachefs_metadata_version_min) + return "Unsupported superblock version"; + + if (version_min > version) + return "Bad minimum version"; if (sb->features[1] || (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) return "Filesystem has incompatible features"; - if (le64_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) { - SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb, 7); - SET_BCH_SB_POSIX_ACL(sb, 1); - } - block_size = le16_to_cpu(sb->block_size); if (!is_power_of_2(block_size) || @@ -264,19 +277,19 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) return "Bad number of member devices"; if (!BCH_SB_META_REPLICAS_WANT(sb) || - BCH_SB_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX) + BCH_SB_META_REPLICAS_WANT(sb) > BCH_REPLICAS_MAX) return "Invalid number of metadata replicas"; if (!BCH_SB_META_REPLICAS_REQ(sb) || - BCH_SB_META_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX) + BCH_SB_META_REPLICAS_REQ(sb) > BCH_REPLICAS_MAX) return "Invalid number of metadata replicas"; if (!BCH_SB_DATA_REPLICAS_WANT(sb) || - BCH_SB_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX) + BCH_SB_DATA_REPLICAS_WANT(sb) > BCH_REPLICAS_MAX) return "Invalid number of data replicas"; if (!BCH_SB_DATA_REPLICAS_REQ(sb) || - BCH_SB_DATA_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX) + BCH_SB_DATA_REPLICAS_REQ(sb) > BCH_REPLICAS_MAX) return "Invalid number of data replicas"; if (BCH_SB_META_CSUM_TYPE(sb) >= BCH_CSUM_OPT_NR) @@ -332,13 +345,6 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) return err; } - if (le64_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_NONCE_V1 && - bch2_sb_get_crypt(sb) && - BCH_SB_INITIALIZED(sb)) - return "Incompatible extent nonces"; - - sb->version = cpu_to_le64(BCH_SB_VERSION_MAX); - return NULL; } @@ -355,20 +361,28 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.uuid = src->uuid; c->sb.user_uuid = src->user_uuid; + c->sb.version = le16_to_cpu(src->version); + c->sb.version_min = le16_to_cpu(src->version_min); c->sb.nr_devices = src->nr_devices; c->sb.clean = BCH_SB_CLEAN(src); c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src); c->sb.encoded_extent_max= 1 << BCH_SB_ENCODED_EXTENT_MAX_BITS(src); - c->sb.time_base_lo = le64_to_cpu(src->time_base_lo); + + c->sb.nsec_per_time_unit = le32_to_cpu(src->time_precision); + c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit; + + /* XXX this is wrong, we need a 96 or 128 bit integer type */ + c->sb.time_base_lo = div_u64(le64_to_cpu(src->time_base_lo), + c->sb.nsec_per_time_unit); c->sb.time_base_hi = le32_to_cpu(src->time_base_hi); - c->sb.time_precision = le32_to_cpu(src->time_precision); + c->sb.features = le64_to_cpu(src->features[0]); + c->sb.compat = le64_to_cpu(src->compat[0]); for_each_member_device(ca, c, i) ca->mi = bch2_mi_to_cpu(mi->members + i); } -/* doesn't copy member info */ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) { struct bch_sb_field *src_f, *dst_f; @@ -376,6 +390,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) unsigned i; dst->version = src->version; + dst->version_min = src->version_min; dst->seq = src->seq; dst->uuid = src->uuid; dst->user_uuid = src->user_uuid; @@ -424,6 +439,11 @@ int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src) __copy_super(&c->disk_sb, src); + if (BCH_SB_HAS_ERRORS(c->disk_sb.sb)) + set_bit(BCH_FS_ERROR, &c->flags); + if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) + set_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags); + ret = bch2_sb_replicas_to_cpu_replicas(c); if (ret) return ret; @@ -465,9 +485,8 @@ reread: bio_reset(sb->bio); bio_set_dev(sb->bio, sb->bdev); sb->bio->bi_iter.bi_sector = offset; - sb->bio->bi_iter.bi_size = PAGE_SIZE << sb->page_order; bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META); - bch2_bio_map(sb->bio, sb->sb); + bch2_bio_map(sb->bio, sb->sb, sb->buffer_size); if (submit_bio_wait(sb->bio)) return "IO error"; @@ -475,8 +494,8 @@ reread: if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC)) return "Not a bcachefs superblock"; - if (le64_to_cpu(sb->sb->version) < BCH_SB_VERSION_MIN || - le64_to_cpu(sb->sb->version) > BCH_SB_VERSION_MAX) + if (le16_to_cpu(sb->sb->version) < bcachefs_metadata_version_min || + le16_to_cpu(sb->sb->version) >= bcachefs_metadata_version_max) return "Unsupported superblock version"; bytes = vstruct_bytes(sb->sb); @@ -484,7 +503,7 @@ reread: if (bytes > 512 << sb->sb->layout.sb_max_size_bits) return "Bad superblock: too big"; - if (get_order(bytes) > sb->page_order) { + if (bytes > sb->buffer_size) { if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s))) return "cannot allocate memory"; goto reread; @@ -500,6 +519,8 @@ reread: if (bch2_crc_cmp(csum, sb->sb->csum)) return "bad checksum reading superblock"; + sb->seq = le64_to_cpu(sb->sb->seq); + return NULL; } @@ -567,13 +588,12 @@ int bch2_read_super(const char *path, struct bch_opts *opts, bio_reset(sb->bio); bio_set_dev(sb->bio, sb->bdev); sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR; - sb->bio->bi_iter.bi_size = sizeof(struct bch_sb_layout); bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META); /* * use sb buffer to read layout, since sb buffer is page aligned but * layout won't be: */ - bch2_bio_map(sb->bio, sb->sb); + bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout)); err = "IO error"; if (submit_bio_wait(sb->bio)) @@ -606,9 +626,6 @@ got_super: bdev_logical_block_size(sb->bdev)) goto err; - if (sb->mode & FMODE_WRITE) - bdev_get_queue(sb->bdev)->backing_dev_info->capabilities - |= BDI_CAP_STABLE_WRITES; ret = 0; sb->have_layout = true; out: @@ -628,13 +645,34 @@ static void write_super_endio(struct bio *bio) /* XXX: return errors directly */ - if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write")) + if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write error: %s", + bch2_blk_status_to_str(bio->bi_status))) ca->sb_write_error = 1; closure_put(&ca->fs->sb_write); percpu_ref_put(&ca->io_ref); } +static void read_back_super(struct bch_fs *c, struct bch_dev *ca) +{ + struct bch_sb *sb = ca->disk_sb.sb; + struct bio *bio = ca->disk_sb.bio; + + bio_reset(bio); + bio_set_dev(bio, ca->disk_sb.bdev); + bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]); + bio->bi_end_io = write_super_endio; + bio->bi_private = ca; + bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC|REQ_META); + bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE); + + this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], + bio_sectors(bio)); + + percpu_ref_get(&ca->io_ref); + closure_bio_submit(bio, &c->sb_write); +} + static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) { struct bch_sb *sb = ca->disk_sb.sb; @@ -642,29 +680,28 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) sb->offset = sb->layout.sb_offset[idx]; - SET_BCH_SB_CSUM_TYPE(sb, c->opts.metadata_checksum); + SET_BCH_SB_CSUM_TYPE(sb, bch2_csum_opt_to_type(c->opts.metadata_checksum, false)); sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb), null_nonce(), sb); bio_reset(bio); bio_set_dev(bio, ca->disk_sb.bdev); bio->bi_iter.bi_sector = le64_to_cpu(sb->offset); - bio->bi_iter.bi_size = - roundup((size_t) vstruct_bytes(sb), - bdev_logical_block_size(ca->disk_sb.bdev)); bio->bi_end_io = write_super_endio; bio->bi_private = ca; bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META); - bch2_bio_map(bio, sb); + bch2_bio_map(bio, sb, + roundup((size_t) vstruct_bytes(sb), + bdev_logical_block_size(ca->disk_sb.bdev))); - this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_SB], + this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb], bio_sectors(bio)); percpu_ref_get(&ca->io_ref); closure_bio_submit(bio, &c->sb_write); } -void bch2_write_super(struct bch_fs *c) +int bch2_write_super(struct bch_fs *c) { struct closure *cl = &c->sb_write; struct bch_dev *ca; @@ -672,6 +709,11 @@ void bch2_write_super(struct bch_fs *c) const char *err; struct bch_devs_mask sb_written; bool wrote, can_mount_without_written, can_mount_with_written; + unsigned degraded_flags = BCH_FORCE_IF_DEGRADED; + int ret = 0; + + if (c->opts.very_degraded) + degraded_flags |= BCH_FORCE_IF_LOST; lockdep_assert_held(&c->sb_lock); @@ -680,6 +722,13 @@ void bch2_write_super(struct bch_fs *c) le64_add_cpu(&c->disk_sb.sb->seq, 1); + if (test_bit(BCH_FS_ERROR, &c->flags)) + SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1); + if (test_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags)) + SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 1); + + SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN); + for_each_online_member(ca, c, i) bch2_sb_from_fs(c, ca); @@ -687,12 +736,12 @@ void bch2_write_super(struct bch_fs *c) err = bch2_sb_validate(&ca->disk_sb); if (err) { bch2_fs_inconsistent(c, "sb invalid before write: %s", err); + ret = -1; goto out; } } - if (c->opts.nochanges || - test_bit(BCH_FS_ERROR, &c->flags)) + if (c->opts.nochanges) goto out; for_each_online_member(ca, c, i) { @@ -700,10 +749,27 @@ void bch2_write_super(struct bch_fs *c) ca->sb_write_error = 0; } + for_each_online_member(ca, c, i) + read_back_super(c, ca); + closure_sync(cl); + + for_each_online_member(ca, c, i) { + if (!ca->sb_write_error && + ca->disk_sb.seq != + le64_to_cpu(ca->sb_read_scratch->seq)) { + bch2_fs_fatal_error(c, + "Superblock modified by another process"); + percpu_ref_put(&ca->io_ref); + ret = -EROFS; + goto out; + } + } + do { wrote = false; for_each_online_member(ca, c, i) - if (sb < ca->disk_sb.sb->layout.nr_superblocks) { + if (!ca->sb_write_error && + sb < ca->disk_sb.sb->layout.nr_superblocks) { write_one_super(c, ca, sb); wrote = true; } @@ -711,22 +777,23 @@ void bch2_write_super(struct bch_fs *c) sb++; } while (wrote); - for_each_online_member(ca, c, i) + for_each_online_member(ca, c, i) { if (ca->sb_write_error) __clear_bit(ca->dev_idx, sb_written.d); + else + ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq); + } nr_wrote = dev_mask_nr(&sb_written); can_mount_with_written = - bch2_have_enough_devs(__bch2_replicas_status(c, sb_written), - BCH_FORCE_IF_DEGRADED); + bch2_have_enough_devs(c, sb_written, degraded_flags, false); for (i = 0; i < ARRAY_SIZE(sb_written.d); i++) sb_written.d[i] = ~sb_written.d[i]; can_mount_without_written = - bch2_have_enough_devs(__bch2_replicas_status(c, sb_written), - BCH_FORCE_IF_DEGRADED); + bch2_have_enough_devs(c, sb_written, degraded_flags, false); /* * If we would be able to mount _without_ the devices we successfully @@ -736,13 +803,27 @@ void bch2_write_super(struct bch_fs *c) * written anything (new filesystem), we continue if we'd be able to * mount with the devices we did successfully write to: */ - bch2_fs_fatal_err_on(!nr_wrote || - (can_mount_without_written && - !can_mount_with_written), c, - "Unable to write superblock to sufficient devices"); + if (bch2_fs_fatal_err_on(!nr_wrote || + !can_mount_with_written || + (can_mount_without_written && + !can_mount_with_written), c, + "Unable to write superblock to sufficient devices")) + ret = -1; out: /* Make new options visible after they're persistent: */ bch2_sb_update(c); + return ret; +} + +void __bch2_check_set_feature(struct bch_fs *c, unsigned feat) +{ + mutex_lock(&c->sb_lock); + if (!(c->sb.features & (1ULL << feat))) { + c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat); + + bch2_write_super(c); + } + mutex_unlock(&c->sb_lock); } /* BCH_SB_FIELD_journal: */ @@ -842,12 +923,6 @@ static const char *bch2_sb_validate_members(struct bch_sb *sb, return "bucket size smaller than btree node size"; } - if (le64_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) - for (m = mi->members; - m < mi->members + sb->nr_devices; - m++) - SET_BCH_MEMBER_DATA_ALLOWED(m, ~0); - return NULL; } @@ -877,29 +952,170 @@ static const struct bch_sb_field_ops bch_sb_field_ops_crypt = { /* BCH_SB_FIELD_clean: */ -void bch2_fs_mark_clean(struct bch_fs *c, bool clean) +int bch2_sb_clean_validate(struct bch_fs *c, struct bch_sb_field_clean *clean, int write) +{ + struct jset_entry *entry; + int ret; + + for (entry = clean->start; + entry < (struct jset_entry *) vstruct_end(&clean->field); + entry = vstruct_next(entry)) { + ret = bch2_journal_entry_validate(c, "superblock", entry, + le16_to_cpu(c->disk_sb.sb->version), + BCH_SB_BIG_ENDIAN(c->disk_sb.sb), + write); + if (ret) + return ret; + } + + return 0; +} + +int bch2_fs_mark_dirty(struct bch_fs *c) +{ + int ret; + + /* + * Unconditionally write superblock, to verify it hasn't changed before + * we go rw: + */ + + mutex_lock(&c->sb_lock); + SET_BCH_SB_CLEAN(c->disk_sb.sb, false); + c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS); + c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); + ret = bch2_write_super(c); + mutex_unlock(&c->sb_lock); + + return ret; +} + +static struct jset_entry *jset_entry_init(struct jset_entry **end, size_t size) +{ + struct jset_entry *entry = *end; + unsigned u64s = DIV_ROUND_UP(size, sizeof(u64)); + + memset(entry, 0, u64s * sizeof(u64)); + /* + * The u64s field counts from the start of data, ignoring the shared + * fields. + */ + entry->u64s = cpu_to_le16(u64s - 1); + + *end = vstruct_next(*end); + return entry; +} + +void bch2_journal_super_entries_add_common(struct bch_fs *c, + struct jset_entry **end, + u64 journal_seq) +{ + struct bch_dev *ca; + unsigned i, dev; + + percpu_down_read(&c->mark_lock); + + if (!journal_seq) { + for (i = 0; i < ARRAY_SIZE(c->usage); i++) + bch2_fs_usage_acc_to_base(c, i); + } else { + bch2_fs_usage_acc_to_base(c, journal_seq & JOURNAL_BUF_MASK); + } + + { + struct jset_entry_usage *u = + container_of(jset_entry_init(end, sizeof(*u)), + struct jset_entry_usage, entry); + + u->entry.type = BCH_JSET_ENTRY_usage; + u->entry.btree_id = FS_USAGE_INODES; + u->v = cpu_to_le64(c->usage_base->nr_inodes); + } + + { + struct jset_entry_usage *u = + container_of(jset_entry_init(end, sizeof(*u)), + struct jset_entry_usage, entry); + + u->entry.type = BCH_JSET_ENTRY_usage; + u->entry.btree_id = FS_USAGE_KEY_VERSION; + u->v = cpu_to_le64(atomic64_read(&c->key_version)); + } + + for (i = 0; i < BCH_REPLICAS_MAX; i++) { + struct jset_entry_usage *u = + container_of(jset_entry_init(end, sizeof(*u)), + struct jset_entry_usage, entry); + + u->entry.type = BCH_JSET_ENTRY_usage; + u->entry.btree_id = FS_USAGE_RESERVED; + u->entry.level = i; + u->v = cpu_to_le64(c->usage_base->persistent_reserved[i]); + } + + for (i = 0; i < c->replicas.nr; i++) { + struct bch_replicas_entry *e = + cpu_replicas_entry(&c->replicas, i); + struct jset_entry_data_usage *u = + container_of(jset_entry_init(end, sizeof(*u) + e->nr_devs), + struct jset_entry_data_usage, entry); + + u->entry.type = BCH_JSET_ENTRY_data_usage; + u->v = cpu_to_le64(c->usage_base->replicas[i]); + memcpy(&u->r, e, replicas_entry_bytes(e)); + } + + for_each_member_device(ca, c, dev) { + unsigned b = sizeof(struct jset_entry_dev_usage) + + sizeof(struct jset_entry_dev_usage_type) * BCH_DATA_NR; + struct jset_entry_dev_usage *u = + container_of(jset_entry_init(end, b), + struct jset_entry_dev_usage, entry); + + u->entry.type = BCH_JSET_ENTRY_dev_usage; + u->dev = cpu_to_le32(dev); + u->buckets_ec = cpu_to_le64(ca->usage_base->buckets_ec); + u->buckets_unavailable = cpu_to_le64(ca->usage_base->buckets_unavailable); + + for (i = 0; i < BCH_DATA_NR; i++) { + u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets); + u->d[i].sectors = cpu_to_le64(ca->usage_base->d[i].sectors); + u->d[i].fragmented = cpu_to_le64(ca->usage_base->d[i].fragmented); + } + } + + percpu_up_read(&c->mark_lock); + + for (i = 0; i < 2; i++) { + struct jset_entry_clock *clock = + container_of(jset_entry_init(end, sizeof(*clock)), + struct jset_entry_clock, entry); + + clock->entry.type = BCH_JSET_ENTRY_clock; + clock->rw = i; + clock->time = cpu_to_le64(atomic64_read(&c->io_clock[i].now)); + } +} + +void bch2_fs_mark_clean(struct bch_fs *c) { struct bch_sb_field_clean *sb_clean; - unsigned u64s = sizeof(*sb_clean) / sizeof(u64); struct jset_entry *entry; - struct btree_root *r; + unsigned u64s; + int ret; mutex_lock(&c->sb_lock); - if (clean == BCH_SB_CLEAN(c->disk_sb.sb)) + if (BCH_SB_CLEAN(c->disk_sb.sb)) goto out; - SET_BCH_SB_CLEAN(c->disk_sb.sb, clean); - - if (!clean) - goto write_super; + SET_BCH_SB_CLEAN(c->disk_sb.sb, true); - mutex_lock(&c->btree_root_lock); + c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info); + c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_metadata); + c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_extents_above_btree_updates)); + c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_btree_updates_journalled)); - for (r = c->btree_roots; - r < c->btree_roots + BTREE_ID_NR; - r++) - if (r->alive) - u64s += jset_u64s(r->key.u64s); + u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved; sb_clean = bch2_sb_resize_clean(&c->disk_sb, u64s); if (!sb_clean) { @@ -908,31 +1124,29 @@ void bch2_fs_mark_clean(struct bch_fs *c, bool clean) } sb_clean->flags = 0; - sb_clean->read_clock = cpu_to_le16(c->bucket_clock[READ].hand); - sb_clean->write_clock = cpu_to_le16(c->bucket_clock[WRITE].hand); - sb_clean->journal_seq = journal_cur_seq(&c->journal) - 1; + sb_clean->journal_seq = cpu_to_le64(journal_cur_seq(&c->journal) - 1); + + /* Trying to catch outstanding bug: */ + BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX); entry = sb_clean->start; + bch2_journal_super_entries_add_common(c, &entry, 0); + entry = bch2_btree_roots_to_journal_entries(c, entry, entry); + BUG_ON((void *) entry > vstruct_end(&sb_clean->field)); + memset(entry, 0, vstruct_end(&sb_clean->field) - (void *) entry); - for (r = c->btree_roots; - r < c->btree_roots + BTREE_ID_NR; - r++) - if (r->alive) { - entry->u64s = r->key.u64s; - entry->btree_id = r - c->btree_roots; - entry->level = r->level; - entry->type = BCH_JSET_ENTRY_btree_root; - bkey_copy(&entry->start[0], &r->key); - entry = vstruct_next(entry); - BUG_ON((void *) entry > vstruct_end(&sb_clean->field)); - } - - BUG_ON(entry != vstruct_end(&sb_clean->field)); + /* + * this should be in the write path, and we should be validating every + * superblock section: + */ + ret = bch2_sb_clean_validate(c, sb_clean, WRITE); + if (ret) { + bch_err(c, "error writing marking filesystem clean: validate error"); + goto out; + } - mutex_unlock(&c->btree_root_lock); -write_super: bch2_write_super(c); out: mutex_unlock(&c->sb_lock);