]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to a5c0e1bb30 bcachefs: Clean up bch2_btree_and_journal_walk()
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 30 Apr 2021 20:48:21 +0000 (16:48 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Fri, 30 Apr 2021 22:23:38 +0000 (18:23 -0400)
34 files changed:
.bcachefs_revision
include/crypto/sha2.h [moved from include/crypto/sha.h with 100% similarity]
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/freezer.h
include/linux/poison.h
include/trace/events/bcachefs.h
libbcachefs/alloc_background.c
libbcachefs/alloc_background.h
libbcachefs/bcachefs.h
libbcachefs/btree_cache.c
libbcachefs/btree_iter.c
libbcachefs/btree_key_cache.c
libbcachefs/btree_update_leaf.c
libbcachefs/buckets.c
libbcachefs/ec.c
libbcachefs/ec.h
libbcachefs/fs-io.c
libbcachefs/fs.c
libbcachefs/io.c
libbcachefs/journal.c
libbcachefs/journal_reclaim.c
libbcachefs/journal_reclaim.h
libbcachefs/journal_types.h
libbcachefs/movinggc.c
libbcachefs/quota.c
libbcachefs/recovery.c
libbcachefs/recovery.h
libbcachefs/s128.h [new file with mode: 0644]
libbcachefs/str_hash.h
libbcachefs/super-io.c
libbcachefs/super.c
libbcachefs/super.h
linux/blkdev.c

index d1024536aa74e22a37fdb81c6484008f59167ac0..7ceb5bb5e8502c06881eda0ffab8eb29e125b0d1 100644 (file)
@@ -1 +1 @@
-8d3093bd9b9254957badce4a4ff178baeb3632ed
+a5c0e1bb306e79b40b2432a22f164697c8b22110
similarity index 100%
rename from include/crypto/sha.h
rename to include/crypto/sha2.h
index 42cd003227d430ad4102f50ce1f40c8fb880a7d6..8aef4bb8e661b39dc75f73fc2ac061eb02a8b767 100644 (file)
@@ -8,12 +8,43 @@
 #include <linux/atomic.h>
 #include <linux/types.h>
 #include <linux/bvec.h>
+#include <linux/kobject.h>
 
 struct bio_set;
 struct bio;
-struct block_device;
 typedef void (bio_end_io_t) (struct bio *);
 
+#define BDEVNAME_SIZE  32
+
+struct request_queue {
+       struct backing_dev_info *backing_dev_info;
+};
+
+struct gendisk {
+};
+
+struct hd_struct {
+       struct kobject          kobj;
+};
+
+struct block_device {
+       struct kobject          kobj;
+       dev_t                   bd_dev;
+       char                    name[BDEVNAME_SIZE];
+       struct inode            *bd_inode;
+       struct request_queue    queue;
+       void                    *bd_holder;
+       struct gendisk *        bd_disk;
+       struct gendisk          __bd_disk;
+       int                     bd_fd;
+       int                     bd_sync_fd;
+
+       struct backing_dev_info *bd_bdi;
+       struct backing_dev_info __bd_bdi;
+};
+
+#define bdev_kobj(_bdev) (&((_bdev)->kobj))
+
 /*
  * Block error status values.  See block/blk-core:blk_errors for the details.
  */
index 318bcfaffc50043c1488fcd40103c64de1378846..4300c4da3d1253d0ff3a1a065cdaec60534257dd 100644 (file)
@@ -59,36 +59,8 @@ static inline struct inode *file_inode(const struct file *f)
        return f->f_inode;
 }
 
-#define BDEVNAME_SIZE  32
-
-struct request_queue {
-       struct backing_dev_info *backing_dev_info;
-};
-
-struct gendisk {
-};
-
-struct hd_struct {
-       struct kobject          kobj;
-};
-
 #define part_to_dev(part)      (part)
 
-struct block_device {
-       char                    name[BDEVNAME_SIZE];
-       struct inode            *bd_inode;
-       struct request_queue    queue;
-       void                    *bd_holder;
-       struct hd_struct        *bd_part;
-       struct gendisk          *bd_disk;
-       struct gendisk          __bd_disk;
-       int                     bd_fd;
-       int                     bd_sync_fd;
-
-       struct backing_dev_info *bd_bdi;
-       struct backing_dev_info __bd_bdi;
-};
-
 void generic_make_request(struct bio *);
 int submit_bio_wait(struct bio *);
 
@@ -111,7 +83,7 @@ sector_t get_capacity(struct gendisk *disk);
 void blkdev_put(struct block_device *bdev, fmode_t mode);
 void bdput(struct block_device *bdev);
 struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder);
-struct block_device *lookup_bdev(const char *path);
+int lookup_bdev(const char *path, dev_t *);
 
 struct super_block {
        void                    *s_fs_info;
index 1af94d5b6a07fe113bc7cd172682695b02176d57..a29d1565cfc5e2763e98b52e50b536d922300a81 100644 (file)
@@ -4,5 +4,6 @@
 #define try_to_freeze()
 #define set_freezable()
 #define freezing(task)         false
+#define freezable_schedule_timeout(_t) schedule_timeout(_t);
 
 #endif /* __TOOLS_LINUX_FREEZER_H */
index dc8ae5d8db036edb22360544fbb1b7c7db5b1eaf..aff1c9250c8216e4f14a99a6844c00988d7dbe8b 100644 (file)
 #define TIMER_ENTRY_STATIC     ((void *) 0x300 + POISON_POINTER_DELTA)
 
 /********** mm/page_poison.c **********/
-#ifdef CONFIG_PAGE_POISONING_ZERO
-#define PAGE_POISON 0x00
-#else
 #define PAGE_POISON 0xaa
-#endif
 
 /********** mm/page_alloc.c ************/
 
index e6c3e17a4c0593c15f0a5451aaa747f2de93c0e5..c79338c8ebf7cbfd44cc6a26de1c8fcc7ce29efb 100644 (file)
@@ -528,6 +528,62 @@ TRACE_EVENT(copygc,
                __entry->buckets_moved, __entry->buckets_not_moved)
 );
 
+TRACE_EVENT(trans_get_iter,
+       TP_PROTO(unsigned long caller, unsigned long ip,
+                enum btree_id btree_id,
+                struct bpos *pos_want,
+                unsigned locks_want,
+                struct bpos *pos_found,
+                unsigned locks_found,
+                unsigned uptodate),
+       TP_ARGS(caller, ip, btree_id,
+               pos_want, locks_want,
+               pos_found, locks_found,
+               uptodate),
+
+       TP_STRUCT__entry(
+               __field(unsigned long,  caller                  )
+               __field(unsigned long,  ip                      )
+               __field(u8,             btree_id                )
+               __field(u8,             uptodate                )
+               __field(u8,             locks_want              )
+               __field(u8,             locks_found             )
+               __field(u64,            pos_want_inode          )
+               __field(u64,            pos_want_offset         )
+               __field(u32,            pos_want_snapshot       )
+               __field(u64,            pos_found_inode         )
+               __field(u64,            pos_found_offset        )
+               __field(u32,            pos_found_snapshot      )
+       ),
+
+       TP_fast_assign(
+               __entry->caller                 = caller;
+               __entry->ip                     = ip;
+               __entry->btree_id               = btree_id;
+               __entry->uptodate               = uptodate;
+               __entry->pos_want_inode         = pos_want->inode;
+               __entry->pos_want_offset        = pos_want->offset;
+               __entry->pos_want_snapshot      = pos_want->snapshot;
+               __entry->pos_found_inode        = pos_found->inode;
+               __entry->pos_found_offset       = pos_found->offset;
+               __entry->pos_found_snapshot     = pos_found->snapshot;
+       ),
+
+       TP_printk("%ps %pS btree %u uptodate %u want %llu:%llu:%u locks %u found %llu:%llu:%u locks %u",
+                 (void *) __entry->caller,
+                 (void *) __entry->ip,
+                 __entry->btree_id,
+                 __entry->uptodate,
+                 __entry->pos_want_inode,
+                 __entry->pos_want_offset,
+                 __entry->pos_want_snapshot,
+                 __entry->locks_want,
+                 __entry->pos_found_inode,
+                 __entry->pos_found_offset,
+                 __entry->pos_found_snapshot,
+                 __entry->locks_found)
+);
+
 TRACE_EVENT(transaction_restart_ip,
        TP_PROTO(unsigned long caller, unsigned long ip),
        TP_ARGS(caller, ip),
@@ -565,6 +621,11 @@ DEFINE_EVENT(transaction_restart,  trans_restart_btree_node_reused,
        TP_ARGS(ip)
 );
 
+DEFINE_EVENT(transaction_restart,      trans_blocked_journal_reclaim,
+       TP_PROTO(unsigned long ip),
+       TP_ARGS(ip)
+);
+
 TRACE_EVENT(trans_restart_would_deadlock,
        TP_PROTO(unsigned long  trans_ip,
                 unsigned long  caller_ip,
index 912020e628b5593d24968ec01019d73a04125708..3ac8b03029f8b6391b9a538d14672d3c7cd249e8 100644 (file)
@@ -261,16 +261,14 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
 #undef  x
 }
 
-static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
-                             unsigned level, struct bkey_s_c k)
+static int bch2_alloc_read_fn(struct bch_fs *c, struct bkey_s_c k)
 {
        struct bch_dev *ca;
        struct bucket *g;
        struct bkey_alloc_unpacked u;
 
-       if (level ||
-           (k.k->type != KEY_TYPE_alloc &&
-            k.k->type != KEY_TYPE_alloc_v2))
+       if (k.k->type != KEY_TYPE_alloc &&
+           k.k->type != KEY_TYPE_alloc_v2)
                return 0;
 
        ca = bch_dev_bkey_exists(c, k.k->p.inode);
@@ -289,13 +287,12 @@ static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
        return 0;
 }
 
-int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
+int bch2_alloc_read(struct bch_fs *c)
 {
        int ret;
 
        down_read(&c->gc_lock);
-       ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_alloc,
-                                         NULL, bch2_alloc_read_fn);
+       ret = bch2_btree_and_journal_walk(c, BTREE_ID_alloc, bch2_alloc_read_fn);
        up_read(&c->gc_lock);
        if (ret) {
                bch_err(c, "error reading alloc info: %i", ret);
index ad15a80602c0c53d3d3205f7f0f1bfd62984694e..9cadfdb5b83df30211082a06851131c25d5b197b 100644 (file)
@@ -91,8 +91,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
        .val_to_text    = bch2_alloc_to_text,           \
 }
 
-struct journal_keys;
-int bch2_alloc_read(struct bch_fs *, struct journal_keys *);
+int bch2_alloc_read(struct bch_fs *);
 
 static inline void bch2_wake_allocator(struct bch_dev *ca)
 {
index f2d2c7bbc29be54be0f88b7dcd229e0cb89331f7..24aa2cc7d965f929ebbe951c5b34359f58299fc5 100644 (file)
@@ -605,11 +605,13 @@ struct bch_fs {
 
                u64             time_base_lo;
                u32             time_base_hi;
-               u32             time_precision;
+               unsigned        time_units_per_sec;
+               unsigned        nsec_per_time_unit;
                u64             features;
                u64             compat;
        }                       sb;
 
+
        struct bch_sb_handle    disk_sb;
 
        unsigned short          block_bits;     /* ilog2(block_size) */
@@ -873,19 +875,22 @@ static inline unsigned block_bytes(const struct bch_fs *c)
        return c->opts.block_size << 9;
 }
 
-static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, u64 time)
+static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, s64 time)
 {
-       return ns_to_timespec64(time * c->sb.time_precision + c->sb.time_base_lo);
+       struct timespec64 t;
+       s32 rem;
+
+       time += c->sb.time_base_lo;
+
+       t.tv_sec = div_s64_rem(time, c->sb.time_units_per_sec, &rem);
+       t.tv_nsec = rem * c->sb.nsec_per_time_unit;
+       return t;
 }
 
 static inline s64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts)
 {
-       s64 ns = timespec64_to_ns(&ts) - c->sb.time_base_lo;
-
-       if (c->sb.time_precision == 1)
-               return ns;
-
-       return div_s64(ns, c->sb.time_precision);
+       return (ts.tv_sec * c->sb.time_units_per_sec +
+               (int) ts.tv_nsec / c->sb.nsec_per_time_unit) - c->sb.time_base_lo;
 }
 
 static inline s64 bch2_current_time(struct bch_fs *c)
index f8692f792dd40e97afb08b321ffc509ac3b6f649..f6adbe8955d78ed7baffde7c8c07fe3aea67f591 100644 (file)
@@ -581,6 +581,7 @@ out:
        b->sib_u64s[1]          = 0;
        b->whiteout_u64s        = 0;
        bch2_btree_keys_init(b);
+       set_btree_node_accessed(b);
 
        bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
                               start_time);
@@ -653,9 +654,13 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
                return NULL;
        }
 
-       /* Unlock before doing IO: */
-       if (iter && sync)
-               bch2_trans_unlock(iter->trans);
+       /*
+        * Unlock before doing IO:
+        *
+        * XXX: ideally should be dropping all btree node locks here
+        */
+       if (iter && btree_node_read_locked(iter, level + 1))
+               btree_node_unlock(iter, level + 1);
 
        bch2_btree_node_read(c, b, sync);
 
@@ -666,16 +671,6 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
                return NULL;
        }
 
-       /*
-        * XXX: this will probably always fail because btree_iter_relock()
-        * currently fails for iterators that aren't pointed at a valid btree
-        * node
-        */
-       if (iter && !bch2_trans_relock(iter->trans)) {
-               six_unlock_intent(&b->c.lock);
-               return ERR_PTR(-EINTR);
-       }
-
        if (lock_type == SIX_LOCK_read)
                six_lock_downgrade(&b->c.lock);
 
@@ -817,22 +812,9 @@ lock_node:
                }
        }
 
-       if (unlikely(btree_node_read_in_flight(b))) {
-               six_unlock_type(&b->c.lock, lock_type);
-               bch2_trans_unlock(iter->trans);
-
-               wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
-                              TASK_UNINTERRUPTIBLE);
-
-               /*
-                * XXX: check if this always fails - btree_iter_relock()
-                * currently fails for iterators that aren't pointed at a valid
-                * btree node
-                */
-               if (iter && !bch2_trans_relock(iter->trans))
-                       return ERR_PTR(-EINTR);
-               goto retry;
-       }
+       /* XXX: waiting on IO with btree locks held: */
+       wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
+                      TASK_UNINTERRUPTIBLE);
 
        prefetch(b->aux_data);
 
index 93194e62ab2a152a3363c67a161647a8bd924bd8..cdec05c86173f468f960d08d9c3fa449d022064c 100644 (file)
@@ -2013,6 +2013,13 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
                                         unsigned flags)
 {
        struct btree_iter *iter, *best = NULL;
+       struct bpos real_pos, pos_min = POS_MIN;
+
+       if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
+           btree_node_type_is_extents(btree_id) &&
+           !(flags & BTREE_ITER_NOT_EXTENTS) &&
+           !(flags & BTREE_ITER_ALL_SNAPSHOTS))
+               flags |= BTREE_ITER_IS_EXTENTS;
 
        if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
            !btree_type_has_snapshots(btree_id))
@@ -2022,6 +2029,12 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
                pos.snapshot = btree_type_has_snapshots(btree_id)
                        ? U32_MAX : 0;
 
+       real_pos = pos;
+
+       if ((flags & BTREE_ITER_IS_EXTENTS) &&
+           bkey_cmp(pos, POS_MAX))
+               real_pos = bpos_nosnap_successor(pos);
+
        trans_for_each_iter(trans, iter) {
                if (btree_iter_type(iter) != (flags & BTREE_ITER_TYPE))
                        continue;
@@ -2030,8 +2043,8 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
                        continue;
 
                if (best) {
-                       int cmp = bkey_cmp(bpos_diff(best->real_pos, pos),
-                                          bpos_diff(iter->real_pos, pos));
+                       int cmp = bkey_cmp(bpos_diff(best->real_pos, real_pos),
+                                          bpos_diff(iter->real_pos, real_pos));
 
                        if (cmp < 0 ||
                            ((cmp == 0 && btree_iter_keep(trans, iter))))
@@ -2041,6 +2054,13 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
                best = iter;
        }
 
+       trace_trans_get_iter(_RET_IP_, trans->ip,
+                            btree_id,
+                            &real_pos, locks_want,
+                            best ? &best->real_pos : &pos_min,
+                            best ? best->locks_want : 0,
+                            best ? best->uptodate : BTREE_ITER_NEED_TRAVERSE);
+
        if (!best) {
                iter = btree_trans_iter_alloc(trans);
                bch2_btree_iter_init(trans, iter, btree_id);
@@ -2054,12 +2074,6 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
        trans->iters_live       |= 1ULL << iter->idx;
        trans->iters_touched    |= 1ULL << iter->idx;
 
-       if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
-           btree_node_type_is_extents(btree_id) &&
-           !(flags & BTREE_ITER_NOT_EXTENTS) &&
-           !(flags & BTREE_ITER_ALL_SNAPSHOTS))
-               flags |= BTREE_ITER_IS_EXTENTS;
-
        iter->flags = flags;
 
        iter->snapshot = pos.snapshot;
@@ -2078,19 +2092,20 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
                btree_iter_get_locks(iter, true, false);
        }
 
-       while (iter->level < depth) {
+       while (iter->level != depth) {
                btree_node_unlock(iter, iter->level);
                iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT;
-               iter->level++;
+               iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
+               if (iter->level < depth)
+                       iter->level++;
+               else
+                       iter->level--;
        }
 
-       while (iter->level > depth)
-               iter->l[--iter->level].b = BTREE_ITER_NO_NODE_INIT;
-
        iter->min_depth = depth;
 
        bch2_btree_iter_set_pos(iter, pos);
-       btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
+       btree_iter_set_search_pos(iter, real_pos);
 
        return iter;
 }
index a0ff0c3ceb90c52271f0d73106e125167b711435..dfaf5e6df917c0ede6cc2a8cf345833384fce9e2 100644 (file)
@@ -645,8 +645,10 @@ static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
        struct bch_fs *c = container_of(shrink, struct bch_fs,
                                        btree_key_cache.shrink);
        struct btree_key_cache *bc = &c->btree_key_cache;
+       long nr = atomic_long_read(&bc->nr_keys) -
+               atomic_long_read(&bc->nr_dirty);
 
-       return atomic_long_read(&bc->nr_keys);
+       return max(0L, nr);
 }
 
 void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
index b793ab77e452a21bf8a200d2bd9a63a4135bda0e..64b6e86cf90a3f48ab52c559c6154bf2b76d869e 100644 (file)
@@ -725,6 +725,8 @@ int bch2_trans_commit_error(struct btree_trans *trans,
        case BTREE_INSERT_NEED_JOURNAL_RECLAIM:
                bch2_trans_unlock(trans);
 
+               trace_trans_blocked_journal_reclaim(trans->ip);
+
                wait_event_freezable(c->journal.reclaim_wait,
                                     (ret = journal_reclaim_wait_done(c)));
                if (ret < 0)
index c3ad0bc85e78a21fd33b62e22b38514bf3d099e1..70008603f047d51aa5e10ab8ae3bbffd635c2b11 100644 (file)
@@ -898,6 +898,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
                spin_unlock(&c->ec_stripes_heap_lock);
                bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
                                    (u64) p.idx);
+               bch2_inconsistent_error(c);
                return -EIO;
        }
 
@@ -1015,6 +1016,7 @@ static int bch2_mark_stripe(struct bch_fs *c,
        if (!m || (old_s && !m->alive)) {
                bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
                                    idx);
+               bch2_inconsistent_error(c);
                return -1;
        }
 
@@ -1499,6 +1501,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
                bch2_fs_inconsistent(c,
                        "pointer to nonexistent stripe %llu",
                        (u64) p.ec.idx);
+               bch2_inconsistent_error(c);
                ret = -EIO;
                goto out;
        }
@@ -1739,6 +1742,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
                bch2_fs_inconsistent(c,
                        "%llu:%llu len %u points to nonexistent indirect extent %llu",
                        p.k->p.inode, p.k->p.offset, p.k->size, idx);
+               bch2_inconsistent_error(c);
                ret = -EIO;
                goto err;
        }
index 7062ab9c58f18ad89a7ff893fcd43337dffc4f6e..fa7450d2b2adced23c1a5771c1465cae3396b64a 100644 (file)
@@ -1630,26 +1630,22 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags)
        return ret;
 }
 
-static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
-                             unsigned level, struct bkey_s_c k)
+static int bch2_stripes_read_fn(struct bch_fs *c, struct bkey_s_c k)
 {
        int ret = 0;
 
-       if (k.k->type == KEY_TYPE_stripe) {
+       if (k.k->type == KEY_TYPE_stripe)
                ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
                        bch2_mark_key(c, k, 0, 0, NULL, 0,
                                      BTREE_TRIGGER_NOATOMIC);
-               if (ret)
-                       return ret;
-       }
 
        return ret;
 }
 
-int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
+int bch2_stripes_read(struct bch_fs *c)
 {
-       int ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_stripes,
-                                         NULL, bch2_stripes_read_fn);
+       int ret = bch2_btree_and_journal_walk(c, BTREE_ID_stripes,
+                                             bch2_stripes_read_fn);
        if (ret)
                bch_err(c, "error reading stripes: %i", ret);
 
index 744e51eaf327e66577b768479b384d25c07a88db..e79626b59509a3082ecef7f06acba9afbabeb21f 100644 (file)
@@ -215,8 +215,7 @@ void bch2_ec_flush_new_stripes(struct bch_fs *);
 
 void bch2_stripes_heap_start(struct bch_fs *);
 
-struct journal_keys;
-int bch2_stripes_read(struct bch_fs *, struct journal_keys *);
+int bch2_stripes_read(struct bch_fs *);
 int bch2_stripes_write(struct bch_fs *, unsigned);
 
 int bch2_ec_mem_alloc(struct bch_fs *, bool);
index dda3608dc3556b4c93c43d4ecd19f1252ba62214..d707cabd75363acf30842a1f3801e99432cfa47e 100644 (file)
@@ -1931,8 +1931,9 @@ loop:
                        i_size_write(&inode->v, req->ki_pos);
                spin_unlock(&inode->v.i_lock);
 
-               bio_for_each_segment_all(bv, bio, iter)
-                       put_page(bv->bv_page);
+               if (likely(!bio_flagged(bio, BIO_NO_PAGE_REF)))
+                       bio_for_each_segment_all(bv, bio, iter)
+                               put_page(bv->bv_page);
 
                if (dio->op.error) {
                        set_bit(EI_INODE_ERROR, &inode->ei_flags);
@@ -2393,6 +2394,15 @@ err:
 
 /* fallocate: */
 
+static int inode_update_times_fn(struct bch_inode_info *inode,
+                                struct bch_inode_unpacked *bi, void *p)
+{
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+       bi->bi_mtime = bi->bi_ctime = bch2_current_time(c);
+       return 0;
+}
+
 static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len)
 {
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
@@ -2430,6 +2440,11 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len
                                  &i_sectors_delta);
                i_sectors_acct(c, inode, NULL, i_sectors_delta);
        }
+
+       mutex_lock(&inode->ei_update_lock);
+       ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
+                              ATTR_MTIME|ATTR_CTIME) ?: ret;
+       mutex_unlock(&inode->ei_update_lock);
 err:
        bch2_pagecache_block_put(&inode->ei_pagecache_lock);
        inode_unlock(&inode->v);
index 8034d48c62bb0a9feb273ff4f5bbbf35e8795709..b1bbec3f96bd3c5360b8b3b7eb9bddab10857736 100644 (file)
@@ -143,7 +143,7 @@ int __must_check bch2_write_inode(struct bch_fs *c,
        struct bch_inode_unpacked inode_u;
        int ret;
 
-       bch2_trans_init(&trans, c, 0, 0);
+       bch2_trans_init(&trans, c, 0, 256);
 retry:
        bch2_trans_begin(&trans);
 
@@ -998,10 +998,7 @@ static const struct file_operations bch_file_operations = {
        .open           = generic_file_open,
        .fsync          = bch2_fsync,
        .splice_read    = generic_file_splice_read,
-#if 0
-       /* Busted: */
        .splice_write   = iter_file_splice_write,
-#endif
        .fallocate      = bch2_fallocate_dispatch,
        .unlocked_ioctl = bch2_fs_file_ioctl,
 #ifdef CONFIG_COMPAT
@@ -1293,16 +1290,17 @@ static int bch2_sync_fs(struct super_block *sb, int wait)
        return bch2_journal_flush(&c->journal);
 }
 
-static struct bch_fs *bch2_path_to_fs(const char *dev)
+static struct bch_fs *bch2_path_to_fs(const char *path)
 {
        struct bch_fs *c;
-       struct block_device *bdev = lookup_bdev(dev);
+       dev_t dev;
+       int ret;
 
-       if (IS_ERR(bdev))
-               return ERR_CAST(bdev);
+       ret = lookup_bdev(path, &dev);
+       if (ret)
+               return ERR_PTR(ret);
 
-       c = bch2_bdev_to_fs(bdev);
-       bdput(bdev);
+       c = bch2_dev_to_fs(dev);
        if (c)
                closure_put(&c->cl);
        return c ?: ERR_PTR(-ENOENT);
@@ -1554,7 +1552,9 @@ got_sb:
 #endif
        sb->s_xattr             = bch2_xattr_handlers;
        sb->s_magic             = BCACHEFS_STATFS_MAGIC;
-       sb->s_time_gran         = c->sb.time_precision;
+       sb->s_time_gran         = c->sb.nsec_per_time_unit;
+       sb->s_time_min          = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
+       sb->s_time_max          = div_s64(S64_MAX, c->sb.time_units_per_sec);
        c->vfs_sb               = sb;
        strlcpy(sb->s_id, c->name, sizeof(sb->s_id));
 
index 36b10cb7ae6285ae3b461cdf1f89024a4303138d..d1a623991bbc7013202179f9f9a969befd3df47a 100644 (file)
@@ -1968,6 +1968,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
            k.k->type != KEY_TYPE_indirect_inline_data) {
                bch_err_inum_ratelimited(trans->c, orig_k->k->k.p.inode,
                                "pointer to nonexistent indirect extent");
+               bch2_inconsistent_error(trans->c);
                ret = -EIO;
                goto err;
        }
index 1b49a1c3b669d77f4bfec2e44c452546162aec75..c2773126a8c6ed52feb5d8a65404a28e7efb8620 100644 (file)
@@ -1187,6 +1187,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
               "nr noflush writes:\t%llu\n"
               "nr direct reclaim:\t%llu\n"
               "nr background reclaim:\t%llu\n"
+              "reclaim kicked:\t\t%u\n"
+              "reclaim runs in:\t%u ms\n"
               "current entry sectors:\t%u\n"
               "current entry error:\t%u\n"
               "current entry:\t\t",
@@ -1202,6 +1204,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
               j->nr_noflush_writes,
               j->nr_direct_reclaim,
               j->nr_background_reclaim,
+              j->reclaim_kicked,
+              jiffies_to_msecs(j->next_reclaim - jiffies),
               j->cur_entry_sectors,
               j->cur_entry_error);
 
index 24d04e51fb618d5bc5e7e3307ed1050d683f6c49..427be2da1dfccaadfc3c15081af9e23af1375540 100644 (file)
@@ -677,13 +677,15 @@ int bch2_journal_reclaim(struct journal *j)
 static int bch2_journal_reclaim_thread(void *arg)
 {
        struct journal *j = arg;
-       unsigned long next;
+       unsigned long delay, now;
        int ret = 0;
 
        set_freezable();
 
        kthread_wait_freezable(test_bit(JOURNAL_RECLAIM_STARTED, &j->flags));
 
+       j->last_flushed = jiffies;
+
        while (!ret && !kthread_should_stop()) {
                j->reclaim_kicked = false;
 
@@ -691,7 +693,12 @@ static int bch2_journal_reclaim_thread(void *arg)
                ret = __bch2_journal_reclaim(j, false);
                mutex_unlock(&j->reclaim_lock);
 
-               next = j->last_flushed + msecs_to_jiffies(j->reclaim_delay_ms);
+               now = jiffies;
+               delay = msecs_to_jiffies(j->reclaim_delay_ms);
+               j->next_reclaim = j->last_flushed + delay;
+
+               if (!time_in_range(j->next_reclaim, now, now + delay))
+                       j->next_reclaim = now + delay;
 
                while (1) {
                        set_current_state(TASK_INTERRUPTIBLE);
@@ -699,10 +706,9 @@ static int bch2_journal_reclaim_thread(void *arg)
                                break;
                        if (j->reclaim_kicked)
                                break;
-                       if (time_after_eq(jiffies, next))
+                       if (time_after_eq(jiffies, j->next_reclaim))
                                break;
-                       schedule_timeout(next - jiffies);
-                       try_to_freeze();
+                       freezable_schedule_timeout(j->next_reclaim - jiffies);
 
                }
                __set_current_state(TASK_RUNNING);
index adf1f5c981cdfa80e227516d7c203306a543eba3..0fd1af120db551746fc5cac54000c8616914a4f3 100644 (file)
@@ -8,11 +8,9 @@ static inline void journal_reclaim_kick(struct journal *j)
 {
        struct task_struct *p = READ_ONCE(j->reclaim_thread);
 
-       if (p && !j->reclaim_kicked) {
-               j->reclaim_kicked = true;
-               if (p)
-                       wake_up_process(p);
-       }
+       j->reclaim_kicked = true;
+       if (p)
+               wake_up_process(p);
 }
 
 unsigned bch2_journal_dev_buckets_available(struct journal *,
index c24bc4aa9af2473ba49154efb63d5b5876c98da1..a7aa12e919e278f8453b37ce3dfd8316bdf4c6d8 100644 (file)
@@ -248,6 +248,7 @@ struct journal {
        wait_queue_head_t       reclaim_wait;
        struct task_struct      *reclaim_thread;
        bool                    reclaim_kicked;
+       unsigned long           next_reclaim;
        u64                     nr_direct_reclaim;
        u64                     nr_background_reclaim;
 
index 4ac7e61fb8413fd04e211222fff1793fa80335cd..61c5901f09802443bd9ab5f3e93385e4f45c3c4f 100644 (file)
@@ -293,17 +293,19 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
 {
        struct bch_dev *ca;
        unsigned dev_idx;
-       u64 fragmented_allowed = 0, fragmented = 0;
+       s64 wait = S64_MAX, fragmented_allowed, fragmented;
 
        for_each_rw_member(ca, c, dev_idx) {
                struct bch_dev_usage usage = bch2_dev_usage_read(ca);
 
-               fragmented_allowed += ((__dev_buckets_reclaimable(ca, usage) *
+               fragmented_allowed = ((__dev_buckets_reclaimable(ca, usage) *
                                        ca->mi.bucket_size) >> 1);
-               fragmented += usage.d[BCH_DATA_user].fragmented;
+               fragmented = usage.d[BCH_DATA_user].fragmented;
+
+               wait = min(wait, max(0LL, fragmented_allowed - fragmented));
        }
 
-       return max_t(s64, 0, fragmented_allowed - fragmented);
+       return wait;
 }
 
 static int bch2_copygc_thread(void *arg)
index 8e272519ce0e38a036b147a2cc60448c56eca0ec..3f78fe7d37f4b8c26a1d8dde39aacd2172478a43 100644 (file)
@@ -372,6 +372,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
                if (ret)
                        break;
        }
+       bch2_trans_iter_put(&trans, iter);
 
        return bch2_trans_exit(&trans) ?: ret;
 }
@@ -449,6 +450,8 @@ int bch2_fs_quota_read(struct bch_fs *c)
                                        KEY_TYPE_QUOTA_NOCHECK);
                }
        }
+       bch2_trans_iter_put(&trans, iter);
+
        return bch2_trans_exit(&trans) ?: ret;
 }
 
index a9ccd14effe7297fc61cf2dc9217719bdc342d90..b35b297d4446891a2aa812e8c31746602e8e3be9 100644 (file)
@@ -323,9 +323,7 @@ static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b,
 }
 
 static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b,
-                               struct journal_keys *journal_keys,
                                enum btree_id btree_id,
-                               btree_walk_node_fn node_fn,
                                btree_walk_key_fn key_fn)
 {
        struct btree_and_journal_iter iter;
@@ -338,15 +336,9 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
        bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
 
        while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
-               ret = key_fn(c, btree_id, b->c.level, k);
-               if (ret)
-                       break;
-
                if (b->c.level) {
                        bch2_bkey_buf_reassemble(&tmp, c, k);
 
-                       bch2_btree_and_journal_iter_advance(&iter);
-
                        child = bch2_btree_node_get_noiter(c, tmp.k,
                                                b->c.btree_id, b->c.level - 1,
                                                false);
@@ -357,16 +349,17 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
 
                        btree_and_journal_iter_prefetch(c, b, iter);
 
-                       ret   = (node_fn ? node_fn(c, b) : 0) ?:
-                               bch2_btree_and_journal_walk_recurse(c, child,
-                                       journal_keys, btree_id, node_fn, key_fn);
+                       ret = bch2_btree_and_journal_walk_recurse(c, child,
+                                       btree_id, key_fn);
                        six_unlock_read(&child->c.lock);
-
-                       if (ret)
-                               break;
                } else {
-                       bch2_btree_and_journal_iter_advance(&iter);
+                       ret = key_fn(c, k);
                }
+
+               if (ret)
+                       break;
+
+               bch2_btree_and_journal_iter_advance(&iter);
        }
 
        bch2_btree_and_journal_iter_exit(&iter);
@@ -374,9 +367,7 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
        return ret;
 }
 
-int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_keys,
-                               enum btree_id btree_id,
-                               btree_walk_node_fn node_fn,
+int bch2_btree_and_journal_walk(struct bch_fs *c, enum btree_id btree_id,
                                btree_walk_key_fn key_fn)
 {
        struct btree *b = c->btree_roots[btree_id].b;
@@ -386,10 +377,7 @@ int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_k
                return 0;
 
        six_lock_read(&b->c.lock, NULL, NULL);
-       ret   = (node_fn ? node_fn(c, b) : 0) ?:
-               bch2_btree_and_journal_walk_recurse(c, b, journal_keys, btree_id,
-                                                   node_fn, key_fn) ?:
-               key_fn(c, btree_id, b->c.level + 1, bkey_i_to_s_c(&b->key));
+       ret = bch2_btree_and_journal_walk_recurse(c, b, btree_id, key_fn);
        six_unlock_read(&b->c.lock);
 
        return ret;
@@ -1120,14 +1108,14 @@ use_clean:
 
        bch_verbose(c, "starting alloc read");
        err = "error reading allocation information";
-       ret = bch2_alloc_read(c, &c->journal_keys);
+       ret = bch2_alloc_read(c);
        if (ret)
                goto err;
        bch_verbose(c, "alloc read done");
 
        bch_verbose(c, "starting stripes_read");
        err = "error reading stripes";
-       ret = bch2_stripes_read(c, &c->journal_keys);
+       ret = bch2_stripes_read(c);
        if (ret)
                goto err;
        bch_verbose(c, "stripes_read done");
index fa91851b9ed7a2e890cb498b9012fe451c327813..e5565e4f335a535d27e960ab35571b21ca07171d 100644 (file)
@@ -45,12 +45,9 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
                                                struct bch_fs *,
                                                struct btree *);
 
-typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b);
-typedef int (*btree_walk_key_fn)(struct bch_fs *c, enum btree_id id,
-                                unsigned level, struct bkey_s_c k);
+typedef int (*btree_walk_key_fn)(struct bch_fs *c, struct bkey_s_c k);
 
-int bch2_btree_and_journal_walk(struct bch_fs *, struct journal_keys *, enum btree_id,
-                               btree_walk_node_fn, btree_walk_key_fn);
+int bch2_btree_and_journal_walk(struct bch_fs *, enum btree_id, btree_walk_key_fn);
 
 void bch2_journal_keys_free(struct journal_keys *);
 void bch2_journal_entries_free(struct list_head *);
diff --git a/libbcachefs/s128.h b/libbcachefs/s128.h
new file mode 100644 (file)
index 0000000..6d77554
--- /dev/null
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_S128_H
+#define _BCACHEFS_S128_H
+
+#include <linux/math64.h>
+
+typedef struct {
+       s64     lo;
+       s64     hi;
+} s128;
+
+typedef struct {
+       s64     lo;
+       s32     hi;
+} s96;
+
+static inline s128 s128_mul(s128 a, s128 b)
+{
+       return a.lo
+
+}
+
+static inline s96 s96_mul(s96 a, s96 b)
+{
+       return a.lo
+
+}
+
+#endif /* _BCACHEFS_S128_H */
index 9f0bd44051991263b33e32eedff96aa07b0690db..b85f895de34642b69a2aba132cb693f4d992460a 100644 (file)
@@ -12,7 +12,7 @@
 
 #include <linux/crc32c.h>
 #include <crypto/hash.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 
 static inline enum bch_str_hash_type
 bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
index 11d7167b01294002d98e6d930b236e233cfe3c4a..74a75ced031e47cb9c901a36b6a4e9874032f712 100644 (file)
@@ -367,9 +367,15 @@ static void bch2_sb_update(struct bch_fs *c)
        c->sb.clean             = BCH_SB_CLEAN(src);
        c->sb.encryption_type   = BCH_SB_ENCRYPTION_TYPE(src);
        c->sb.encoded_extent_max= 1 << BCH_SB_ENCODED_EXTENT_MAX_BITS(src);
-       c->sb.time_base_lo      = le64_to_cpu(src->time_base_lo);
+
+       c->sb.nsec_per_time_unit = le32_to_cpu(src->time_precision);
+       c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit;
+
+       /* XXX this is wrong, we need a 96 or 128 bit integer type */
+       c->sb.time_base_lo      = div_u64(le64_to_cpu(src->time_base_lo),
+                                         c->sb.nsec_per_time_unit);
        c->sb.time_base_hi      = le32_to_cpu(src->time_base_hi);
-       c->sb.time_precision    = le32_to_cpu(src->time_precision);
+
        c->sb.features          = le64_to_cpu(src->features[0]);
        c->sb.compat            = le64_to_cpu(src->compat[0]);
 
index fd8a29911254a7ff5b6718bc54b42a35a6671b0e..bbd313a4287edfee401e37d16888db761c95776e 100644 (file)
@@ -99,7 +99,7 @@ static int bch2_dev_alloc(struct bch_fs *, unsigned);
 static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
 static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
 
-struct bch_fs *bch2_bdev_to_fs(struct block_device *bdev)
+struct bch_fs *bch2_dev_to_fs(dev_t dev)
 {
        struct bch_fs *c;
        struct bch_dev *ca;
@@ -110,7 +110,7 @@ struct bch_fs *bch2_bdev_to_fs(struct block_device *bdev)
 
        list_for_each_entry(c, &bch_fs_list, list)
                for_each_member_device_rcu(ca, c, i, NULL)
-                       if (ca->disk_sb.bdev == bdev) {
+                       if (ca->disk_sb.bdev->bd_dev == dev) {
                                closure_get(&c->cl);
                                goto found;
                        }
@@ -544,8 +544,7 @@ void __bch2_fs_stop(struct bch_fs *c)
        for_each_member_device(ca, c, i)
                if (ca->kobj.state_in_sysfs &&
                    ca->disk_sb.bdev)
-                       sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
-                                         "bcachefs");
+                       sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs");
 
        if (c->kobj.state_in_sysfs)
                kobject_del(&c->kobj);
@@ -1017,8 +1016,7 @@ static void bch2_dev_free(struct bch_dev *ca)
 
        if (ca->kobj.state_in_sysfs &&
            ca->disk_sb.bdev)
-               sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
-                                 "bcachefs");
+               sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs");
 
        if (ca->kobj.state_in_sysfs)
                kobject_del(&ca->kobj);
@@ -1054,10 +1052,7 @@ static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
        wait_for_completion(&ca->io_ref_completion);
 
        if (ca->kobj.state_in_sysfs) {
-               struct kobject *block =
-                       &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
-
-               sysfs_remove_link(block, "bcachefs");
+               sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs");
                sysfs_remove_link(&ca->kobj, "block");
        }
 
@@ -1094,12 +1089,12 @@ static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
        }
 
        if (ca->disk_sb.bdev) {
-               struct kobject *block =
-                       &part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
+               struct kobject *block = bdev_kobj(ca->disk_sb.bdev);
 
                ret = sysfs_create_link(block, &ca->kobj, "bcachefs");
                if (ret)
                        return ret;
+
                ret = sysfs_create_link(&ca->kobj, block, "block");
                if (ret)
                        return ret;
@@ -1837,20 +1832,21 @@ err:
 /* return with ref on ca->ref: */
 struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path)
 {
-       struct block_device *bdev = lookup_bdev(path);
        struct bch_dev *ca;
+       dev_t dev;
        unsigned i;
+       int ret;
 
-       if (IS_ERR(bdev))
-               return ERR_CAST(bdev);
+       ret = lookup_bdev(path, &dev);
+       if (ret)
+               return ERR_PTR(ret);
 
        for_each_member_device(ca, c, i)
-               if (ca->disk_sb.bdev == bdev)
+               if (ca->disk_sb.bdev->bd_dev == dev)
                        goto found;
 
        ca = ERR_PTR(-ENOENT);
 found:
-       bdput(bdev);
        return ca;
 }
 
index bef27906e4809d5d5a049746be96c84f13d71195..6cab506150a86eebc9603a040e1031188244b089 100644 (file)
@@ -197,7 +197,7 @@ static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
        return devs;
 }
 
-struct bch_fs *bch2_bdev_to_fs(struct block_device *);
+struct bch_fs *bch2_dev_to_fs(dev_t);
 struct bch_fs *bch2_uuid_to_fs(uuid_le);
 
 bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,
index 709e770345f925e31101a82ea3bd5366525bb848..270d3c899471ec298cface7265cecc5fd5927eaa 100644 (file)
@@ -215,6 +215,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
        strncpy(bdev->name, path, sizeof(bdev->name));
        bdev->name[sizeof(bdev->name) - 1] = '\0';
 
+       bdev->bd_dev            = xfstat(fd).st_rdev;
        bdev->bd_fd             = fd;
        bdev->bd_sync_fd        = sync_fd;
        bdev->bd_holder         = holder;
@@ -230,9 +231,9 @@ void bdput(struct block_device *bdev)
        BUG();
 }
 
-struct block_device *lookup_bdev(const char *path)
+int lookup_bdev(const char *path, dev_t *dev)
 {
-       return ERR_PTR(-EINVAL);
+       return -EINVAL;
 }
 
 static int aio_completion_thread(void *arg)