From 64c325ef483c863c720a7f53c6b3126e583e05a0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 3 Apr 2017 23:05:13 -0800 Subject: [PATCH] Update bcachefs sources to ff95156479 --- .bcachefs_revision | 2 +- Makefile | 5 ++ libbcachefs/alloc.c | 26 ++++---- libbcachefs/alloc.h | 1 + libbcachefs/bcachefs.h | 3 + libbcachefs/bcachefs_format.h | 18 +++--- libbcachefs/bkey_methods.h | 2 +- libbcachefs/btree_gc.c | 116 +++++++++++++++++++++------------- libbcachefs/btree_io.c | 2 +- libbcachefs/btree_locking.h | 3 +- libbcachefs/fifo.h | 29 +++++---- libbcachefs/inode.h | 2 +- libbcachefs/io.c | 36 ++++------- libbcachefs/io_types.h | 3 +- libbcachefs/journal.c | 51 +++++++-------- libbcachefs/opts.c | 6 +- libbcachefs/str_hash.h | 2 +- libbcachefs/super.c | 29 +++++++++ libbcachefs/super.h | 5 ++ libbcachefs/util.c | 2 +- 20 files changed, 207 insertions(+), 136 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index e8975f4..ad735e5 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -c07073eb3b218df0ea107a3e04d2431703f0c07b +83667254ddf04f558c90f32439e36d7a04ac3a39 diff --git a/Makefile b/Makefile index 7e00baa..0822433 100644 --- a/Makefile +++ b/Makefile @@ -121,3 +121,8 @@ update-bcachefs-sources: echo `cd $(LINUX_DIR); git rev-parse HEAD` > .bcachefs_revision cp $(LINUX_DIR)/fs/bcachefs/*.[ch] libbcachefs/ cp $(LINUX_DIR)/include/trace/events/bcachefs.h include/trace/events/ + +.PHONE: update-commit-bcachefs-sources +update-commit-bcachefs-sources: update-bcachefs-sources + git commit -m "Update bcachefs sources to `cut -b1-10 .bcachefs_revision`" \ + .bcachefs_revision libbcachefs/ diff --git a/libbcachefs/alloc.c b/libbcachefs/alloc.c index fc2a4ab..d5d2679 100644 --- a/libbcachefs/alloc.c +++ b/libbcachefs/alloc.c @@ -256,19 +256,22 @@ static struct nonce prio_nonce(struct prio_set *p) }}; } -static int bch2_prio_write(struct bch_dev *ca) +int bch2_prio_write(struct bch_dev *ca) { struct bch_fs *c = ca->fs; struct journal *j = &c->journal; struct journal_res res = { 0 }; bool need_new_journal_entry; - int i, ret; + int i, ret = 0; if (c->opts.nochanges) return 0; + mutex_lock(&ca->prio_write_lock); trace_prio_write_start(ca); + ca->need_prio_write = false; + atomic64_add(ca->mi.bucket_size * prio_buckets(ca), &ca->meta_sectors_written); @@ -322,7 +325,7 @@ static int bch2_prio_write(struct bch_dev *ca) if (bch2_dev_fatal_io_err_on(ret, ca, "prio write to bucket %zu", r) || bch2_meta_write_fault("prio")) - return ret; + goto err; } spin_lock(&j->lock); @@ -340,7 +343,7 @@ static int bch2_prio_write(struct bch_dev *ca) ret = bch2_journal_res_get(j, &res, u64s, u64s); if (ret) - return ret; + goto err; need_new_journal_entry = j->buf[res.idx].nr_prio_buckets < ca->dev_idx + 1; @@ -348,7 +351,7 @@ static int bch2_prio_write(struct bch_dev *ca) ret = bch2_journal_flush_seq(j, res.seq); if (ret) - return ret; + goto err; } while (need_new_journal_entry); /* @@ -369,7 +372,9 @@ static int bch2_prio_write(struct bch_dev *ca) spin_unlock(&ca->prio_buckets_lock); trace_prio_write_end(ca); - return 0; +err: + mutex_unlock(&ca->prio_write_lock); + return ret; } int bch2_prio_read(struct bch_dev *ca) @@ -863,6 +868,7 @@ static int bch2_allocator_thread(void *arg) { struct bch_dev *ca = arg; struct bch_fs *c = ca->fs; + long bucket; int ret; set_freezable(); @@ -877,7 +883,7 @@ static int bch2_allocator_thread(void *arg) */ while (!fifo_empty(&ca->free_inc)) { - long bucket = fifo_peek(&ca->free_inc); + bucket = fifo_peek(&ca->free_inc); /* * Don't remove from free_inc until after it's added @@ -960,12 +966,8 @@ static int bch2_allocator_thread(void *arg) * consistent-ish: */ spin_lock(&ca->freelist_lock); - while (!fifo_empty(&ca->free_inc)) { - long bucket; - - fifo_pop(&ca->free_inc, bucket); + while (fifo_pop(&ca->free_inc, bucket)) bch2_mark_free_bucket(ca, ca->buckets + bucket); - } spin_unlock(&ca->freelist_lock); goto out; } diff --git a/libbcachefs/alloc.h b/libbcachefs/alloc.h index 08638b2..c6b57fa 100644 --- a/libbcachefs/alloc.h +++ b/libbcachefs/alloc.h @@ -24,6 +24,7 @@ void bch2_dev_group_remove(struct dev_group *, struct bch_dev *); void bch2_dev_group_add(struct dev_group *, struct bch_dev *); int bch2_prio_read(struct bch_dev *); +int bch2_prio_write(struct bch_dev *); size_t bch2_bucket_alloc(struct bch_dev *, enum alloc_reserve); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 90d4098..cf1c4bd 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -379,6 +379,8 @@ struct bch_dev { spinlock_t prio_buckets_lock; struct bio *bio_prio; bool prio_read_done; + bool need_prio_write; + struct mutex prio_write_lock; /* * free: Buckets that are ready to be used @@ -456,6 +458,7 @@ enum { BCH_FS_BDEV_MOUNTED, BCH_FS_ERROR, BCH_FS_FSCK_FIXED_ERRORS, + BCH_FS_FIXED_GENS, }; struct btree_debug { diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 0a0dc87..8d780d2 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -617,7 +617,7 @@ struct bch_inode { __le32 i_flags; __le16 i_mode; __u8 fields[0]; -} __attribute__((packed)); +} __attribute__((packed, aligned(8))); BKEY_VAL_TYPE(inode, BCH_INODE_FS); #define BCH_INODE_FIELDS() \ @@ -714,7 +714,7 @@ struct bch_dirent { __u8 d_type; __u8 d_name[]; -} __attribute__((packed)); +} __attribute__((packed, aligned(8))); BKEY_VAL_TYPE(dirent, BCH_DIRENT); /* Xattrs */ @@ -736,7 +736,7 @@ struct bch_xattr { __u8 x_name_len; __le16 x_val_len; __u8 x_name[]; -} __attribute__((packed)); +} __attribute__((packed, aligned(8))); BKEY_VAL_TYPE(xattr, BCH_XATTR); /* Superblock */ @@ -811,7 +811,7 @@ struct bch_sb_layout { __u8 nr_superblocks; __u8 pad[5]; __u64 sb_offset[61]; -} __attribute__((packed)); +} __attribute__((packed, aligned(8))); #define BCH_SB_LAYOUT_SECTOR 7 @@ -1211,7 +1211,7 @@ struct jset { struct jset_entry start[0]; __u64 _data[0]; }; -} __attribute__((packed)); +} __attribute__((packed, aligned(8))); LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4); LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5); @@ -1237,7 +1237,7 @@ struct prio_set { __le16 write_prio; __u8 gen; } __attribute__((packed)) data[]; -} __attribute__((packed)); +} __attribute__((packed, aligned(8))); LE32_BITMASK(PSET_CSUM_TYPE, struct prio_set, flags, 0, 4); @@ -1295,7 +1295,7 @@ struct bset { struct bkey_packed start[0]; __u64 _data[0]; }; -} __attribute__((packed)); +} __attribute__((packed, aligned(8))); LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4); @@ -1325,7 +1325,7 @@ struct btree_node { }; }; -} __attribute__((packed)); +} __attribute__((packed, aligned(8))); LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4); LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8); @@ -1342,7 +1342,7 @@ struct btree_node_entry { }; }; -} __attribute__((packed)); +} __attribute__((packed, aligned(8))); #ifdef __cplusplus } diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h index d372fa6..f795db6 100644 --- a/libbcachefs/bkey_methods.h +++ b/libbcachefs/bkey_methods.h @@ -13,7 +13,7 @@ enum bkey_type { /* Type of a key in btree @id at level @level: */ static inline enum bkey_type bkey_type(unsigned level, enum btree_id id) { - return level ? BKEY_TYPE_BTREE : id; + return level ? BKEY_TYPE_BTREE : (enum bkey_type) id; } static inline bool btree_type_has_ptrs(enum bkey_type type) diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 3620c29..e07a3f9 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -129,7 +129,7 @@ static u8 bch2_btree_mark_key(struct bch_fs *c, enum bkey_type type, int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type, struct bkey_s_c k) { - int ret; + int ret = 0; switch (k.k->type) { case BCH_EXTENT: @@ -140,12 +140,17 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type, extent_for_each_ptr(e, ptr) { struct bch_dev *ca = c->devs[ptr->dev]; struct bucket *g = PTR_BUCKET(ca, ptr); - - unfixable_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c, - "%s ptr gen in the future: %u > %u", - type == BKEY_TYPE_BTREE - ? "btree" : "data", - ptr->gen, g->mark.gen); + struct bucket_mark new; + + if (fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c, + "%s ptr gen in the future: %u > %u", + type == BKEY_TYPE_BTREE + ? "btree" : "data", + ptr->gen, g->mark.gen)) { + bucket_cmpxchg(g, new, new.gen = ptr->gen); + set_bit(BCH_FS_FIXED_GENS, &c->flags); + ca->need_prio_write = true; + } } break; @@ -157,7 +162,6 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type, atomic64_read(&c->key_version))); bch2_btree_mark_key(c, type, k); - return 0; fsck_err: return ret; } @@ -382,50 +386,14 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c) mutex_unlock(&c->btree_interior_update_lock); } -/** - * bch_gc - recompute bucket marks and oldest_gen, rewrite btree nodes - */ -void bch2_gc(struct bch_fs *c) +void bch2_gc_start(struct bch_fs *c) { struct bch_dev *ca; struct bucket *g; struct bucket_mark new; - u64 start_time = local_clock(); unsigned i; int cpu; - /* - * Walk _all_ references to buckets, and recompute them: - * - * Order matters here: - * - Concurrent GC relies on the fact that we have a total ordering for - * everything that GC walks - see gc_will_visit_node(), - * gc_will_visit_root() - * - * - also, references move around in the course of index updates and - * various other crap: everything needs to agree on the ordering - * references are allowed to move around in - e.g., we're allowed to - * start with a reference owned by an open_bucket (the allocator) and - * move it to the btree, but not the reverse. - * - * This is necessary to ensure that gc doesn't miss references that - * move around - if references move backwards in the ordering GC - * uses, GC could skip past them - */ - - if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) - return; - - trace_gc_start(c); - - /* - * Do this before taking gc_lock - bch2_disk_reservation_get() blocks on - * gc_lock if sectors_available goes to 0: - */ - bch2_recalc_sectors_available(c); - - down_write(&c->gc_lock); - lg_global_lock(&c->usage_lock); /* @@ -466,6 +434,50 @@ void bch2_gc(struct bch_fs *c) })); ca->oldest_gens[g - ca->buckets] = new.gen; } +} + +/** + * bch_gc - recompute bucket marks and oldest_gen, rewrite btree nodes + */ +void bch2_gc(struct bch_fs *c) +{ + struct bch_dev *ca; + u64 start_time = local_clock(); + unsigned i; + + /* + * Walk _all_ references to buckets, and recompute them: + * + * Order matters here: + * - Concurrent GC relies on the fact that we have a total ordering for + * everything that GC walks - see gc_will_visit_node(), + * gc_will_visit_root() + * + * - also, references move around in the course of index updates and + * various other crap: everything needs to agree on the ordering + * references are allowed to move around in - e.g., we're allowed to + * start with a reference owned by an open_bucket (the allocator) and + * move it to the btree, but not the reverse. + * + * This is necessary to ensure that gc doesn't miss references that + * move around - if references move backwards in the ordering GC + * uses, GC could skip past them + */ + + if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) + return; + + trace_gc_start(c); + + /* + * Do this before taking gc_lock - bch2_disk_reservation_get() blocks on + * gc_lock if sectors_available goes to 0: + */ + bch2_recalc_sectors_available(c); + + down_write(&c->gc_lock); + + bch2_gc_start(c); /* Walk allocator's references: */ bch2_mark_allocator_buckets(c); @@ -964,8 +976,11 @@ err: int bch2_initial_gc(struct bch_fs *c, struct list_head *journal) { + unsigned iter = 0; enum btree_id id; int ret; +again: + bch2_gc_start(c); for (id = 0; id < BTREE_ID_NR; id++) { ret = bch2_initial_gc_btree(c, id); @@ -981,6 +996,17 @@ int bch2_initial_gc(struct bch_fs *c, struct list_head *journal) bch2_mark_metadata(c); + if (test_bit(BCH_FS_FIXED_GENS, &c->flags)) { + if (iter++ > 2) { + bch_info(c, "Unable to fix bucket gens, looping"); + return -EINVAL; + } + + bch_info(c, "Fixed gens, restarting initial mark and sweep:"); + clear_bit(BCH_FS_FIXED_GENS, &c->flags); + goto again; + } + /* * Skip past versions that might have possibly been used (as nonces), * but hadn't had their pointers written: diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index bb8cee1..46612c1 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1305,7 +1305,7 @@ static void btree_node_write_endio(struct bio *bio) closure_put(cl); } - if (ca) + if (wbio->have_io_ref) percpu_ref_put(&ca->io_ref); } diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index 27709d1..0945ea8 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -74,8 +74,7 @@ static inline void mark_btree_node_intent_locked(struct btree_iter *iter, mark_btree_node_locked(iter, level, SIX_LOCK_intent); } -static inline enum six_lock_type -btree_lock_want(struct btree_iter *iter, int level) +static inline int btree_lock_want(struct btree_iter *iter, int level) { return level < iter->locks_want ? SIX_LOCK_intent diff --git a/libbcachefs/fifo.h b/libbcachefs/fifo.h index 2908ca2..a391277 100644 --- a/libbcachefs/fifo.h +++ b/libbcachefs/fifo.h @@ -71,27 +71,33 @@ do { \ #define fifo_entry_idx(fifo, p) (((p) - &fifo_peek_front(fifo)) & (fifo)->mask) -#define fifo_push_back(fifo, i) \ +#define fifo_push_back_ref(f) \ + (fifo_full((f)) ? NULL : &(f)->data[(f)->back++ & (f)->mask]) + +#define fifo_push_front_ref(f) \ + (fifo_full((f)) ? NULL : &(f)->data[--(f)->front & (f)->mask]) + +#define fifo_push_back(fifo, new) \ ({ \ - bool _r = !fifo_full((fifo)); \ + typeof((fifo)->data) _r = fifo_push_back_ref(fifo); \ if (_r) \ - (fifo)->data[(fifo)->back++ & (fifo)->mask] = (i); \ - _r; \ + *_r = (new); \ + _r != NULL; \ }) -#define fifo_pop_front(fifo, i) \ +#define fifo_push_front(fifo, new) \ ({ \ - bool _r = !fifo_empty((fifo)); \ + typeof((fifo)->data) _r = fifo_push_front_ref(fifo); \ if (_r) \ - (i) = (fifo)->data[(fifo)->front++ & (fifo)->mask]; \ - _r; \ + *_r = (new); \ + _r != NULL; \ }) -#define fifo_push_front(fifo, i) \ +#define fifo_pop_front(fifo, i) \ ({ \ - bool _r = !fifo_full((fifo)); \ + bool _r = !fifo_empty((fifo)); \ if (_r) \ - (fifo)->data[--(fifo)->front & (fifo)->mask] = (i); \ + (i) = (fifo)->data[(fifo)->front++ & (fifo)->mask]; \ _r; \ }) @@ -103,6 +109,7 @@ do { \ _r; \ }) +#define fifo_push_ref(fifo) fifo_push_back_ref(fifo) #define fifo_push(fifo, i) fifo_push_back(fifo, (i)) #define fifo_pop(fifo, i) fifo_pop_front(fifo, (i)) #define fifo_peek(fifo) fifo_peek_front(fifo) diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index 277d4e4..d1d64a7 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -22,7 +22,7 @@ struct bkey_inode_buf { #define BCH_INODE_FIELD(_name, _bits) + 8 + _bits / 8 u8 _pad[0 + BCH_INODE_FIELDS()]; #undef BCH_INODE_FIELD -} __packed; +}; void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *); int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *); diff --git a/libbcachefs/io.c b/libbcachefs/io.c index d349461..44082a0 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -85,22 +85,6 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio, /* Bios with headers */ -static void bch2_submit_wbio(struct bch_fs *c, struct bch_write_bio *wbio, - struct bch_dev *ca, const struct bch_extent_ptr *ptr) -{ - wbio->ca = ca; - wbio->submit_time_us = local_clock_us(); - wbio->bio.bi_iter.bi_sector = ptr->offset; - wbio->bio.bi_bdev = ca ? ca->disk_sb.bdev : NULL; - - if (unlikely(!ca)) { - bcache_io_error(c, &wbio->bio, "device has been removed"); - bio_endio(&wbio->bio); - } else { - generic_make_request(&wbio->bio); - } -} - void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, const struct bkey_i *k) { @@ -116,10 +100,6 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, extent_for_each_ptr(e, ptr) { ca = c->devs[ptr->dev]; - if (!percpu_ref_tryget(&ca->io_ref)) { - bch2_submit_wbio(c, wbio, NULL, ptr); - break; - } if (ptr + 1 < &extent_entry_last(e)->ptr) { n = to_wbio(bio_clone_fast(&wbio->bio, GFP_NOIO, @@ -132,6 +112,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, n->bounce = false; n->split = true; n->put_bio = true; + n->have_io_ref = true; n->bio.bi_opf = wbio->bio.bi_opf; __bio_inc_remaining(n->orig); } else { @@ -141,7 +122,18 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, if (!journal_flushes_device(ca)) n->bio.bi_opf |= REQ_FUA; - bch2_submit_wbio(c, n, ca, ptr); + n->ca = ca; + n->submit_time_us = local_clock_us(); + n->bio.bi_iter.bi_sector = ptr->offset; + + if (likely(percpu_ref_tryget(&ca->io_ref))) { + n->bio.bi_bdev = ca->disk_sb.bdev; + generic_make_request(&n->bio); + } else { + n->have_io_ref = false; + bcache_io_error(c, &n->bio, "device has been removed"); + bio_endio(&n->bio); + } } } @@ -327,7 +319,7 @@ static void bch2_write_endio(struct bio *bio) set_closure_fn(cl, bch2_write_io_error, index_update_wq(op)); } - if (ca) + if (wbio->have_io_ref) percpu_ref_put(&ca->io_ref); if (bio->bi_error && orig) diff --git a/libbcachefs/io_types.h b/libbcachefs/io_types.h index 07ea67c..d104cb7 100644 --- a/libbcachefs/io_types.h +++ b/libbcachefs/io_types.h @@ -73,7 +73,8 @@ struct bch_write_bio { unsigned submit_time_us; unsigned split:1, bounce:1, - put_bio:1; + put_bio:1, + have_io_ref:1; /* Only for btree writes: */ unsigned used_mempool:1; diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 76a3b46..7d250df 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -1122,21 +1122,31 @@ void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set) #endif } -static void __bch2_journal_next_entry(struct journal *j) +static struct journal_entry_pin_list * +__journal_entry_new(struct journal *j, int count) { - struct journal_entry_pin_list pin_list, *p; - struct journal_buf *buf; + struct journal_entry_pin_list *p = fifo_push_ref(&j->pin); /* * The fifo_push() needs to happen at the same time as j->seq is * incremented for last_seq() to be calculated correctly */ atomic64_inc(&j->seq); - BUG_ON(!fifo_push(&j->pin, pin_list)); - p = &fifo_peek_back(&j->pin); + + BUG_ON(journal_pin_seq(j, p) != atomic64_read(&j->seq)); INIT_LIST_HEAD(&p->list); - atomic_set(&p->count, 1); + atomic_set(&p->count, count); + + return p; +} + +static void __bch2_journal_next_entry(struct journal *j) +{ + struct journal_entry_pin_list *p; + struct journal_buf *buf; + + p = __journal_entry_new(j, 1); if (test_bit(JOURNAL_REPLAY_DONE, &j->flags)) { smp_wmb(); @@ -1149,8 +1159,6 @@ static void __bch2_journal_next_entry(struct journal *j) memset(buf->data, 0, sizeof(*buf->data)); buf->data->seq = cpu_to_le64(atomic64_read(&j->seq)); buf->data->u64s = 0; - - BUG_ON(journal_pin_seq(j, p) != atomic64_read(&j->seq)); } static inline size_t journal_entry_u64s_reserve(struct journal_buf *buf) @@ -1423,16 +1431,8 @@ void bch2_journal_start(struct bch_fs *c) set_bit(JOURNAL_STARTED, &j->flags); - while (atomic64_read(&j->seq) < new_seq) { - struct journal_entry_pin_list pin_list, *p; - - BUG_ON(!fifo_push(&j->pin, pin_list)); - p = &fifo_peek_back(&j->pin); - - INIT_LIST_HEAD(&p->list); - atomic_set(&p->count, 0); - atomic64_inc(&j->seq); - } + while (atomic64_read(&j->seq) < new_seq) + __journal_entry_new(j, 0); /* * journal_buf_switch() only inits the next journal entry when it @@ -1494,8 +1494,11 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) BTREE_INSERT_JOURNAL_REPLAY); bch2_disk_reservation_put(c, &disk_res); - if (ret) + if (ret) { + bch_err(c, "journal replay: error %d while replaying key", + ret); goto err; + } cond_resched(); keys++; @@ -1517,8 +1520,10 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) * entry on disk, if we crash before writing the next journal entry: */ ret = bch2_journal_meta(&c->journal); - if (ret) + if (ret) { + bch_err(c, "journal replay: error %d flushing journal", ret); goto err; + } } bch_info(c, "journal replay done, %i keys in %i entries, seq %llu", @@ -1526,11 +1531,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) bch2_journal_set_replay_done(&c->journal); err: - if (ret) - bch_err(c, "journal replay error: %d", ret); - bch2_journal_entries_free(list); - return ret; } @@ -2372,7 +2373,7 @@ retry: switch (journal_buf_switch(j, false)) { case JOURNAL_ENTRY_ERROR: spin_unlock(&j->lock); - return -EIO; + return -EROFS; case JOURNAL_ENTRY_INUSE: /* haven't finished writing out the previous one: */ spin_unlock(&j->lock); diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index 7c4cf80..1eb27ae 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -72,7 +72,7 @@ const struct bch_option bch2_opt_table[] = { #undef BCH_OPT }; -static enum bch_opt_id bch2_opt_lookup(const char *name) +static int bch2_opt_lookup(const char *name) { const struct bch_option *i; @@ -209,7 +209,7 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options) enum bch_opt_id bch2_parse_sysfs_opt(const char *name, const char *val, u64 *res) { - enum bch_opt_id id = bch2_opt_lookup(name); + int id = bch2_opt_lookup(name); int ret; if (id < 0) @@ -225,7 +225,7 @@ enum bch_opt_id bch2_parse_sysfs_opt(const char *name, const char *val, ssize_t bch2_opt_show(struct bch_opts *opts, const char *name, char *buf, size_t size) { - enum bch_opt_id id = bch2_opt_lookup(name); + int id = bch2_opt_lookup(name); const struct bch_option *opt; u64 v; diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index f70fc1a..6eac6fc 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -25,7 +25,7 @@ bch2_hash_info_init(struct bch_fs *c, /* XXX ick */ struct bch_hash_info info = { .type = (bi->i_flags >> INODE_STR_HASH_OFFSET) & - ~(~0 << INODE_STR_HASH_BITS) + ~(~0U << INODE_STR_HASH_BITS) }; switch (info.type) { diff --git a/libbcachefs/super.c b/libbcachefs/super.c index b813974..19f9692 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -768,6 +768,15 @@ static const char *__bch2_fs_start(struct bch_fs *c) if (ret) goto err; + for_each_rw_member(ca, c, i) + if (ca->need_prio_write) { + ret = bch2_prio_write(ca); + if (ret) { + percpu_ref_put(&ca->io_ref); + goto err; + } + } + bch_verbose(c, "fsck done"); } else { struct bch_inode_unpacked inode; @@ -1092,6 +1101,7 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) spin_lock_init(&ca->freelist_lock); spin_lock_init(&ca->prio_buckets_lock); mutex_init(&ca->heap_lock); + mutex_init(&ca->prio_write_lock); bch2_dev_moving_gc_init(ca); INIT_WORK(&ca->io_error_work, bch2_nonfatal_io_error_work); @@ -1265,6 +1275,15 @@ bool bch2_fs_may_start(struct bch_fs *c, int flags) return true; } +/* + * Note: this function is also used by the error paths - when a particular + * device sees an error, we call it to determine whether we can just set the + * device RO, or - if this function returns false - we'll set the whole + * filesystem RO: + * + * XXX: maybe we should be more explicit about whether we're changing state + * because we got an error or what have you? + */ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { @@ -1273,6 +1292,16 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, if (new_state == BCH_MEMBER_STATE_RW) return true; + if (ca->mi.state == BCH_MEMBER_STATE_FAILED) + return true; + + /* + * If the device is already offline - whatever is going on with it can't + * possible make the FS need to go RO: + */ + if (!bch2_dev_is_online(ca)) + return true; + if (ca->mi.has_data && !(flags & BCH_FORCE_IF_DATA_DEGRADED)) return false; diff --git a/libbcachefs/super.h b/libbcachefs/super.h index 700344a..e4bb583 100644 --- a/libbcachefs/super.h +++ b/libbcachefs/super.h @@ -32,6 +32,11 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter) return ca; } +static inline bool bch2_dev_is_online(struct bch_dev *ca) +{ + return !percpu_ref_is_zero(&ca->io_ref); +} + #define __for_each_member_device(ca, c, iter) \ for ((iter) = 0; ((ca) = __bch2_next_dev((c), &(iter))); (iter)++) diff --git a/libbcachefs/util.c b/libbcachefs/util.c index f2e6ec4..f57224a 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -85,7 +85,7 @@ ssize_t bch2_hprint(char *buf, s64 v) int u, t = 0; for (u = 0; v >= 1024 || v <= -1024; u++) { - t = v & ~(~0 << 10); + t = v & ~(~0U << 10); v >>= 10; } -- 2.39.2