X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fsuper.c;h=1feb7dee2e0c1ca744683cb9158f8596fdcb4f32;hb=e61b61c03bf1f1eedc5e2dbd6887f77e45144a31;hp=2096c76ed81d39de8f8e1461dfbe891d6bd1834a;hpb=e7c4380a892297d2f65e1c317a1b6d4c67378299;p=bcachefs-tools-debian diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 2096c76..1feb7de 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -39,6 +39,7 @@ #include "rebalance.h" #include "recovery.h" #include "replicas.h" +#include "subvolume.h" #include "super.h" #include "super-io.h" #include "sysfs.h" @@ -99,7 +100,7 @@ static int bch2_dev_alloc(struct bch_fs *, unsigned); static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *); static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *); -struct bch_fs *bch2_bdev_to_fs(struct block_device *bdev) +struct bch_fs *bch2_dev_to_fs(dev_t dev) { struct bch_fs *c; struct bch_dev *ca; @@ -110,7 +111,7 @@ struct bch_fs *bch2_bdev_to_fs(struct block_device *bdev) list_for_each_entry(c, &bch_fs_list, list) for_each_member_device_rcu(ca, c, i, NULL) - if (ca->disk_sb.bdev == bdev) { + if (ca->disk_sb.bdev && ca->disk_sb.bdev->bd_dev == dev) { closure_get(&c->cl); goto found; } @@ -269,7 +270,7 @@ static void bch2_writes_disabled(struct percpu_ref *writes) void bch2_fs_read_only(struct bch_fs *c) { if (!test_bit(BCH_FS_RW, &c->flags)) { - BUG_ON(c->journal.reclaim_thread); + bch2_journal_reclaim_stop(&c->journal); return; } @@ -286,7 +287,6 @@ void bch2_fs_read_only(struct bch_fs *c) percpu_ref_kill(&c->writes); cancel_work_sync(&c->ec_stripe_delete_work); - cancel_delayed_work(&c->pd_controllers_update); /* * If we're not doing an emergency shutdown, we want to wait on @@ -371,8 +371,6 @@ static int bch2_fs_read_write_late(struct bch_fs *c) return ret; } - schedule_delayed_work(&c->pd_controllers_update, 5 * HZ); - schedule_work(&c->ec_stripe_delete_work); return 0; @@ -384,6 +382,11 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) unsigned i; int ret; + if (test_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags)) { + bch_err(c, "cannot go rw, unfixed btree errors"); + return -EROFS; + } + if (test_bit(BCH_FS_RW, &c->flags)) return 0; @@ -396,6 +399,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) (!early || c->opts.read_only))) return -EROFS; + bch_info(c, "going read-write"); + ret = bch2_fs_mark_dirty(c); if (ret) goto err; @@ -427,12 +432,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) for_each_rw_member(ca, c, i) bch2_wake_allocator(ca); - ret = bch2_journal_reclaim_start(&c->journal); - if (ret) { - bch_err(c, "error starting journal reclaim: %i", ret); - return ret; - } - if (!early) { ret = bch2_fs_read_write_late(c); if (ret) @@ -441,6 +440,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) percpu_ref_reinit(&c->writes); set_bit(BCH_FS_RW, &c->flags); + set_bit(BCH_FS_WAS_RW, &c->flags); return 0; err: __bch2_fs_read_only(c); @@ -469,6 +469,7 @@ static void __bch2_fs_free(struct bch_fs *c) for (i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_exit(&c->times[i]); + bch2_fs_snapshots_exit(c); bch2_fs_quota_exit(c); bch2_fs_fsio_exit(c); bch2_fs_ec_exit(c); @@ -478,6 +479,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_fs_btree_iter_exit(c); bch2_fs_btree_key_cache_exit(&c->btree_key_cache); bch2_fs_btree_cache_exit(c); + bch2_fs_replicas_exit(c); bch2_fs_journal_exit(&c->journal); bch2_io_clock_exit(&c->io_clock[WRITE]); bch2_io_clock_exit(&c->io_clock[READ]); @@ -485,36 +487,34 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_journal_keys_free(&c->journal_keys); bch2_journal_entries_free(&c->journal_entries); percpu_free_rwsem(&c->mark_lock); - kfree(c->usage_scratch); - for (i = 0; i < ARRAY_SIZE(c->usage); i++) - free_percpu(c->usage[i]); - kfree(c->usage_base); - if (c->btree_iters_bufs) + if (c->btree_paths_bufs) for_each_possible_cpu(cpu) - kfree(per_cpu_ptr(c->btree_iters_bufs, cpu)->iter); + kfree(per_cpu_ptr(c->btree_paths_bufs, cpu)->path); - free_percpu(c->btree_iters_bufs); + free_percpu(c->online_reserved); + free_percpu(c->btree_paths_bufs); free_percpu(c->pcpu); mempool_exit(&c->large_bkey_pool); mempool_exit(&c->btree_bounce_pool); bioset_exit(&c->btree_bio); mempool_exit(&c->fill_iter); percpu_ref_exit(&c->writes); - kfree(c->replicas.entries); - kfree(c->replicas_gc.entries); kfree(rcu_dereference_protected(c->disk_groups, 1)); kfree(c->journal_seq_blacklist_table); kfree(c->unused_inode_hints); free_heap(&c->copygc_heap); + if (c->io_complete_wq ) + destroy_workqueue(c->io_complete_wq ); if (c->copygc_wq) destroy_workqueue(c->copygc_wq); - if (c->wq) - destroy_workqueue(c->wq); + if (c->btree_io_complete_wq) + destroy_workqueue(c->btree_io_complete_wq); + if (c->btree_update_wq) + destroy_workqueue(c->btree_update_wq); - free_pages((unsigned long) c->disk_sb.sb, - c->disk_sb.page_order); + bch2_free_super(&c->disk_sb); kvpfree(c, sizeof(*c)); module_put(THIS_MODULE); } @@ -544,8 +544,7 @@ void __bch2_fs_stop(struct bch_fs *c) for_each_member_device(ca, c, i) if (ca->kobj.state_in_sysfs && ca->disk_sb.bdev) - sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj, - "bcachefs"); + sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs"); if (c->kobj.state_in_sysfs) kobject_del(&c->kobj); @@ -563,8 +562,6 @@ void __bch2_fs_stop(struct bch_fs *c) for_each_member_device(ca, c, i) cancel_work_sync(&ca->io_error_work); - cancel_work_sync(&c->btree_write_error_work); - cancel_delayed_work_sync(&c->pd_controllers_update); cancel_work_sync(&c->read_only_work); for (i = 0; i < c->sb.nr_devices; i++) @@ -629,9 +626,11 @@ static const char *bch2_fs_online(struct bch_fs *c) down_write(&c->state_lock); err = "error creating sysfs objects"; - __for_each_member_device(ca, c, i, NULL) - if (bch2_dev_sysfs_online(c, ca)) + for_each_member_device(ca, c, i) + if (bch2_dev_sysfs_online(c, ca)) { + percpu_ref_put(&ca->ref); goto err; + } list_add(&c->list, &bch_fs_list); err = NULL; @@ -689,10 +688,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) mutex_init(&c->usage_scratch_lock); mutex_init(&c->bio_bounce_pages_lock); + mutex_init(&c->snapshot_table_lock); - bio_list_init(&c->btree_write_error_list); spin_lock_init(&c->btree_write_error_lock); - INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work); INIT_WORK(&c->journal_seq_blacklist_gc_work, bch2_blacklist_entries_gc); @@ -709,6 +707,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) INIT_LIST_HEAD(&c->ec_stripe_new_list); mutex_init(&c->ec_stripe_new_lock); + INIT_LIST_HEAD(&c->data_progress_list); + mutex_init(&c->data_progress_lock); + spin_lock_init(&c->ec_stripes_heap_lock); seqcount_init(&c->gc_pos_lock); @@ -760,10 +761,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus())); - if (!(c->wq = alloc_workqueue("bcachefs", + if (!(c->btree_update_wq = alloc_workqueue("bcachefs", + WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || + !(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io", WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || !(c->copygc_wq = alloc_workqueue("bcachefs_copygc", WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || + !(c->io_complete_wq = alloc_workqueue("bcachefs_io", + WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) || percpu_ref_init(&c->writes, bch2_writes_disabled, PERCPU_REF_INIT_DEAD, GFP_KERNEL) || mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) || @@ -772,7 +777,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) offsetof(struct btree_write_bio, wbio.bio)), BIOSET_NEED_BVECS) || !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) || - !(c->btree_iters_bufs = alloc_percpu(struct btree_iter_buf)) || + !(c->btree_paths_bufs = alloc_percpu(struct btree_path_buf)) || + !(c->online_reserved = alloc_percpu(u64)) || mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, btree_bytes(c)) || mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || @@ -786,6 +792,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_btree_key_cache_init(&c->btree_key_cache) || bch2_fs_btree_iter_init(c) || bch2_fs_btree_interior_update_init(c) || + bch2_fs_subvolumes_init(c) || bch2_fs_io_init(c) || bch2_fs_encryption_init(c) || bch2_fs_compress_init(c) || @@ -905,9 +912,16 @@ int bch2_fs_start(struct bch_fs *c) /* * Allocator threads don't start filling copygc reserve until after we * set BCH_FS_STARTED - wake them now: + * + * XXX ugly hack: + * Need to set ca->allocator_state here instead of relying on the + * allocator threads to do it to avoid racing with the copygc threads + * checking it and thinking they have no alloc reserve: */ - for_each_online_member(ca, c, i) + for_each_online_member(ca, c, i) { + ca->allocator_state = ALLOCATOR_running; bch2_wake_allocator(ca); + } if (c->opts.read_only || c->opts.nochanges) { bch2_fs_read_only(c); @@ -1010,8 +1024,7 @@ static void bch2_dev_free(struct bch_dev *ca) if (ca->kobj.state_in_sysfs && ca->disk_sb.bdev) - sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj, - "bcachefs"); + sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs"); if (ca->kobj.state_in_sysfs) kobject_del(&ca->kobj); @@ -1047,10 +1060,7 @@ static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca) wait_for_completion(&ca->io_ref_completion); if (ca->kobj.state_in_sysfs) { - struct kobject *block = - &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj; - - sysfs_remove_link(block, "bcachefs"); + sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs"); sysfs_remove_link(&ca->kobj, "block"); } @@ -1087,12 +1097,12 @@ static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca) } if (ca->disk_sb.bdev) { - struct kobject *block = - &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj; + struct kobject *block = bdev_kobj(ca->disk_sb.bdev); ret = sysfs_create_link(block, &ca->kobj, "bcachefs"); if (ret) return ret; + ret = sysfs_create_link(&ca->kobj, block, "block"); if (ret) return ret; @@ -1433,7 +1443,7 @@ int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, /* Device add/removal: */ -int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) +static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) { struct btree_trans trans; size_t i; @@ -1676,7 +1686,7 @@ have_slot: bch2_dev_usage_journal_reserve(c); err = "error marking superblock"; - ret = bch2_trans_mark_dev_sb(c, NULL, ca); + ret = bch2_trans_mark_dev_sb(c, ca); if (ret) goto err_late; @@ -1736,7 +1746,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path) ca = bch_dev_locked(c, dev_idx); - if (bch2_trans_mark_dev_sb(c, NULL, ca)) { + if (bch2_trans_mark_dev_sb(c, ca)) { err = "bch2_trans_mark_dev_sb() error"; goto err; } @@ -1814,6 +1824,11 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) goto err; } + ret = bch2_trans_mark_dev_sb(c, ca); + if (ret) { + goto err; + } + mutex_lock(&c->sb_lock); mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx]; mi->nbuckets = cpu_to_le64(nbuckets); @@ -1830,20 +1845,23 @@ err: /* return with ref on ca->ref: */ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path) { - struct block_device *bdev = lookup_bdev(path); struct bch_dev *ca; + dev_t dev; unsigned i; + int ret; - if (IS_ERR(bdev)) - return ERR_CAST(bdev); + ret = lookup_bdev(path, &dev); + if (ret) + return ERR_PTR(ret); - for_each_member_device(ca, c, i) - if (ca->disk_sb.bdev == bdev) + rcu_read_lock(); + for_each_member_device_rcu(ca, c, i, NULL) + if (ca->disk_sb.bdev->bd_dev == dev) goto found; - ca = ERR_PTR(-ENOENT); found: - bdput(bdev); + rcu_read_unlock(); + return ca; }