-e3a7cee5034f0f218f593a0a970e8ccd8bf99565
+69be0dae3162e1651a5d5fcce08562e6e2af971a
#include <linux/kobject.h>
#include <linux/types.h>
-#define BIO_MAX_PAGES 256
+#define BIO_MAX_VECS 256
typedef unsigned fmode_t;
struct inode;
struct dentry;
+struct user_namespace;
/*
* struct xattr_handler: When @name is set, match attributes with exactly that
int (*get)(const struct xattr_handler *, struct dentry *dentry,
struct inode *inode, const char *name, void *buffer,
size_t size);
- int (*set)(const struct xattr_handler *, struct dentry *dentry,
+ int (*set)(const struct xattr_handler *,
+ struct user_namespace *mnt_userns, struct dentry *dentry,
struct inode *inode, const char *name, const void *buffer,
size_t size, int flags);
};
),
TP_fast_assign(
- __entry->dev = bio->bi_disk ? bio_dev(bio) : 0;
+ __entry->dev = bio->bi_bdev ? bio_dev(bio) : 0;
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
- blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
),
TP_printk("%d,%d %s %llu + %u",
return ret == -ENOENT ? 0 : ret;
}
-int bch2_set_acl(struct inode *vinode, struct posix_acl *_acl, int type)
+int bch2_set_acl(struct user_namespace *mnt_userns,
+ struct inode *vinode, struct posix_acl *_acl, int type)
{
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
mode = inode_u.bi_mode;
if (type == ACL_TYPE_ACCESS) {
- ret = posix_acl_update_mode(&inode->v, &mode, &acl);
+ ret = posix_acl_update_mode(mnt_userns, &inode->v, &mode, &acl);
if (ret)
goto btree_err;
}
struct bch_inode_unpacked *,
const struct bch_hash_info *,
struct posix_acl *, int);
-int bch2_set_acl(struct inode *, struct posix_acl *, int);
+int bch2_set_acl(struct user_namespace *, struct inode *, struct posix_acl *, int);
int bch2_acl_chmod(struct btree_trans *, struct bch_inode_unpacked *,
umode_t, struct posix_acl **);
.val_to_text = key_type_inline_data_to_text, \
}
-static const struct bkey_ops bch2_bkey_ops[] = {
+const struct bkey_ops bch2_bkey_ops[] = {
#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
BCH_BKEY_TYPES()
#undef x
: false;
}
-enum merge_result bch2_bkey_merge(struct bch_fs *c,
- struct bkey_s l, struct bkey_s r)
+bool bch2_bkey_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
{
const struct bkey_ops *ops = &bch2_bkey_ops[l.k->type];
- enum merge_result ret;
- if (bch2_key_merging_disabled ||
- !ops->key_merge ||
- l.k->type != r.k->type ||
- bversion_cmp(l.k->version, r.k->version) ||
- bpos_cmp(l.k->p, bkey_start_pos(r.k)))
- return BCH_MERGE_NOMERGE;
-
- ret = ops->key_merge(c, l, r);
-
- if (ret != BCH_MERGE_NOMERGE)
- l.k->needs_whiteout |= r.k->needs_whiteout;
- return ret;
+ return bch2_bkey_maybe_mergable(l.k, r.k) && ops->key_merge(c, l, r);
}
static const struct old_bkey_type {
extern const char * const bch2_bkey_types[];
-enum merge_result {
- BCH_MERGE_NOMERGE,
-
- /*
- * The keys were mergeable, but would have overflowed size - so instead
- * l was changed to the maximum size, and both keys were modified:
- */
- BCH_MERGE_PARTIAL,
- BCH_MERGE_MERGE,
-};
-
struct bkey_ops {
/* Returns reason for being invalid if invalid, else NULL: */
const char * (*key_invalid)(const struct bch_fs *,
struct bkey_s_c);
void (*swab)(struct bkey_s);
bool (*key_normalize)(struct bch_fs *, struct bkey_s);
- enum merge_result (*key_merge)(struct bch_fs *,
- struct bkey_s, struct bkey_s);
+ bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
void (*compat)(enum btree_id id, unsigned version,
unsigned big_endian, int write,
struct bkey_s);
};
+extern const struct bkey_ops bch2_bkey_ops[];
+
const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c);
const char *__bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c,
enum btree_node_type);
bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
-enum merge_result bch2_bkey_merge(struct bch_fs *,
- struct bkey_s, struct bkey_s);
+static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct bkey *r)
+{
+ return l->type == r->type &&
+ !bversion_cmp(l->version, r->version) &&
+ !bpos_cmp(l->p, bkey_start_pos(r)) &&
+ (u64) l->size + r->size <= KEY_SIZE_MAX &&
+ bch2_bkey_ops[l->type].key_merge &&
+ !bch2_key_merging_disabled;
+}
+
+bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
return NULL;
}
- /*
- * Unlock before doing IO:
- *
- * XXX: ideally should be dropping all btree node locks here
- */
- if (iter && btree_node_read_locked(iter, level + 1))
- btree_node_unlock(iter, level + 1);
+ /* Unlock before doing IO: */
+ if (iter && sync)
+ bch2_trans_unlock(iter->trans);
bch2_btree_node_read(c, b, sync);
return NULL;
}
+ /*
+ * XXX: this will probably always fail because btree_iter_relock()
+ * currently fails for iterators that aren't pointed at a valid btree
+ * node
+ */
+ if (iter && !bch2_trans_relock(iter->trans)) {
+ six_unlock_intent(&b->c.lock);
+ return ERR_PTR(-EINTR);
+ }
+
if (lock_type == SIX_LOCK_read)
six_lock_downgrade(&b->c.lock);
}
}
- /* XXX: waiting on IO with btree locks held: */
- wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
- TASK_UNINTERRUPTIBLE);
+ if (unlikely(btree_node_read_in_flight(b))) {
+ six_unlock_type(&b->c.lock, lock_type);
+ bch2_trans_unlock(iter->trans);
+
+ wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
+ TASK_UNINTERRUPTIBLE);
+
+ /*
+ * XXX: check if this always fails - btree_iter_relock()
+ * currently fails for iterators that aren't pointed at a valid
+ * btree node
+ */
+ if (iter && !bch2_trans_relock(iter->trans))
+ return ERR_PTR(-EINTR);
+ goto retry;
+ }
prefetch(b->aux_data);
#include <linux/sched/task.h>
#include <trace/events/bcachefs.h>
+#define DROP_THIS_NODE 10
+#define DROP_PREV_NODE 11
+
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
preempt_disable();
(bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&cur->key)), buf2))) {
if (prev &&
bpos_cmp(expected_start, cur->data->min_key) > 0 &&
- BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data))
+ BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) {
+ if (bkey_cmp(prev->data->min_key,
+ cur->data->min_key) <= 0)
+ return DROP_PREV_NODE;
+
ret = set_node_max(c, prev,
- bpos_predecessor(cur->data->min_key));
- else
+ bpos_predecessor(cur->data->min_key));
+ } else {
+ if (bkey_cmp(expected_start, b->data->max_key) >= 0)
+ return DROP_THIS_NODE;
+
ret = set_node_min(c, cur, expected_start);
+ }
if (ret)
return ret;
}
return ret;
}
-#define DROP_THIS_NODE 10
-
static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
{
struct btree_and_journal_iter iter;
struct bkey_s_c k;
- struct bkey_buf tmp;
+ struct bkey_buf prev_k, cur_k;
struct btree *prev = NULL, *cur = NULL;
bool have_child, dropped_children = false;
char buf[200];
return 0;
again:
have_child = dropped_children = false;
- bch2_bkey_buf_init(&tmp);
+ bch2_bkey_buf_init(&prev_k);
+ bch2_bkey_buf_init(&cur_k);
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
bch2_btree_and_journal_iter_advance(&iter);
- bch2_bkey_buf_reassemble(&tmp, c, k);
+ bch2_bkey_buf_reassemble(&cur_k, c, k);
- cur = bch2_btree_node_get_noiter(c, tmp.k,
+ cur = bch2_btree_node_get_noiter(c, cur_k.k,
b->c.btree_id, b->c.level - 1,
false);
ret = PTR_ERR_OR_ZERO(cur);
" %s",
bch2_btree_ids[b->c.btree_id],
b->c.level - 1,
- (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(tmp.k)), buf))) {
- bch2_btree_node_evict(c, tmp.k);
+ (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(cur_k.k)), buf))) {
+ bch2_btree_node_evict(c, cur_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
- b->c.level, tmp.k->k.p);
+ b->c.level, cur_k.k->k.p);
if (ret)
goto err;
continue;
ret = btree_repair_node_start(c, b, prev, cur);
if (prev)
six_unlock_read(&prev->c.lock);
- prev = cur;
- cur = NULL;
- if (ret)
+ if (ret == DROP_PREV_NODE) {
+ bch2_btree_node_evict(c, prev_k.k);
+ ret = bch2_journal_key_delete(c, b->c.btree_id,
+ b->c.level, prev_k.k->k.p);
+ if (ret)
+ goto err;
+ goto again;
+ } else if (ret == DROP_THIS_NODE) {
+ bch2_btree_node_evict(c, cur_k.k);
+ ret = bch2_journal_key_delete(c, b->c.btree_id,
+ b->c.level, cur_k.k->k.p);
+ if (ret)
+ goto err;
+ continue;
+ } else if (ret)
break;
+
+ prev = cur;
+ cur = NULL;
+ bch2_bkey_buf_copy(&prev_k, c, cur_k.k);
}
if (!ret && !IS_ERR_OR_NULL(prev)) {
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
- bch2_bkey_buf_reassemble(&tmp, c, k);
+ bch2_bkey_buf_reassemble(&cur_k, c, k);
bch2_btree_and_journal_iter_advance(&iter);
- cur = bch2_btree_node_get_noiter(c, tmp.k,
+ cur = bch2_btree_node_get_noiter(c, cur_k.k,
b->c.btree_id, b->c.level - 1,
false);
ret = PTR_ERR_OR_ZERO(cur);
cur = NULL;
if (ret == DROP_THIS_NODE) {
- bch2_btree_node_evict(c, tmp.k);
+ bch2_btree_node_evict(c, cur_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
- b->c.level, tmp.k->k.p);
+ b->c.level, cur_k.k->k.p);
dropped_children = true;
}
six_unlock_read(&cur->c.lock);
bch2_btree_and_journal_iter_exit(&iter);
- bch2_bkey_buf_exit(&tmp, c);
+ bch2_bkey_buf_exit(&prev_k, c);
+ bch2_bkey_buf_exit(&cur_k, c);
if (!ret && dropped_children)
goto again;
#include <trace/events/bcachefs.h>
static void btree_iter_set_search_pos(struct btree_iter *, struct bpos);
+static struct btree_iter *btree_iter_child_alloc(struct btree_iter *, unsigned long);
+static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *);
+static void btree_iter_copy(struct btree_iter *, struct btree_iter *);
static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
{
/* peek_all() doesn't skip deleted keys */
static inline struct bkey_s_c btree_iter_level_peek_all(struct btree_iter *iter,
- struct btree_iter_level *l,
- struct bkey *u)
+ struct btree_iter_level *l)
{
- return __btree_iter_unpack(iter, l, u,
+ return __btree_iter_unpack(iter, l, &iter->k,
bch2_btree_node_iter_peek_all(&l->iter, l->b));
}
if (iter->flags & BTREE_ITER_PREFETCH)
btree_iter_prefetch(iter);
+ if (btree_node_read_locked(iter, level + 1))
+ btree_node_unlock(iter, level + 1);
iter->level = level;
+
+ bch2_btree_iter_verify_locks(iter);
err:
bch2_bkey_buf_exit(&tmp, c);
return ret;
return ret;
}
-static struct bkey_i *btree_trans_peek_updates(struct btree_trans *trans,
- enum btree_id btree_id, struct bpos pos)
+static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter,
+ struct bpos pos)
{
struct btree_insert_entry *i;
- trans_for_each_update2(trans, i)
- if ((cmp_int(btree_id, i->iter->btree_id) ?:
- bkey_cmp(pos, i->k->k.p)) <= 0) {
- if (btree_id == i->iter->btree_id)
+ if (!(iter->flags & BTREE_ITER_WITH_UPDATES))
+ return NULL;
+
+ trans_for_each_update(iter->trans, i)
+ if ((cmp_int(iter->btree_id, i->iter->btree_id) ?:
+ bkey_cmp(pos, i->k->k.p)) <= 0) {
+ if (iter->btree_id == i->iter->btree_id)
return i->k;
break;
}
return NULL;
}
-static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter, bool with_updates)
+/**
+ * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
+ * current position
+ */
+struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
{
struct bpos search_key = btree_iter_search_key(iter);
struct bkey_i *next_update;
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter);
start:
- next_update = with_updates
- ? btree_trans_peek_updates(iter->trans, iter->btree_id, search_key)
- : NULL;
+ next_update = btree_trans_peek_updates(iter, search_key);
btree_iter_set_search_pos(iter, search_key);
while (1) {
k = btree_iter_level_peek(iter, &iter->l[0]);
if (next_update &&
- bpos_cmp(next_update->k.p, iter->real_pos) <= 0)
+ bpos_cmp(next_update->k.p, iter->real_pos) <= 0) {
+ iter->k = next_update->k;
k = bkey_i_to_s_c(next_update);
+ }
if (likely(k.k)) {
if (bkey_deleted(k.k)) {
return k;
}
-/**
- * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
- * current position
- */
-struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
-{
- return __btree_iter_peek(iter, false);
-}
-
/**
* bch2_btree_iter_next: returns first key greater than iterator's current
* position
return bch2_btree_iter_peek(iter);
}
-struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
-{
- return __btree_iter_peek(iter, true);
-}
-
-struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter)
-{
- if (!bch2_btree_iter_advance(iter))
- return bkey_s_c_null;
-
- return bch2_btree_iter_peek_with_updates(iter);
-}
-
/**
* bch2_btree_iter_peek_prev: returns first key less than or equal to
* iterator's current position
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
+ EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter);
return bch2_btree_iter_peek_prev(iter);
}
-static inline struct bkey_s_c
-__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
-{
- struct bkey_s_c k;
- struct bpos pos, next_start;
-
- /* keys & holes can't span inode numbers: */
- if (iter->pos.offset == KEY_OFFSET_MAX) {
- if (iter->pos.inode == KEY_INODE_MAX)
- return bkey_s_c_null;
-
- bch2_btree_iter_set_pos(iter, bkey_successor(iter, iter->pos));
- }
-
- pos = iter->pos;
- k = bch2_btree_iter_peek(iter);
- iter->pos = pos;
-
- if (bkey_err(k))
- return k;
-
- if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0)
- return k;
-
- next_start = k.k ? bkey_start_pos(k.k) : POS_MAX;
-
- bkey_init(&iter->k);
- iter->k.p = iter->pos;
- bch2_key_resize(&iter->k,
- min_t(u64, KEY_SIZE_MAX,
- (next_start.inode == iter->pos.inode
- ? next_start.offset
- : KEY_OFFSET_MAX) -
- iter->pos.offset));
-
- EBUG_ON(!iter->k.size);
-
- bch2_btree_iter_verify_entry_exit(iter);
- bch2_btree_iter_verify(iter);
-
- return (struct bkey_s_c) { &iter->k, NULL };
-}
-
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
{
- struct btree_iter_level *l = &iter->l[0];
+ struct bpos search_key = btree_iter_search_key(iter);
struct bkey_s_c k;
int ret;
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter);
- btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
+ btree_iter_set_search_pos(iter, search_key);
- if (iter->flags & BTREE_ITER_IS_EXTENTS)
- return __bch2_btree_iter_peek_slot_extents(iter);
+ /* extents can't span inode numbers: */
+ if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
+ iter->pos.offset == KEY_OFFSET_MAX) {
+ if (iter->pos.inode == KEY_INODE_MAX)
+ return bkey_s_c_null;
+
+ bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
+ }
ret = btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
- k = btree_iter_level_peek_all(iter, l, &iter->k);
+ if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) {
+ struct bkey_i *next_update = btree_trans_peek_updates(iter, search_key);
- EBUG_ON(k.k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0);
+ k = btree_iter_level_peek_all(iter, &iter->l[0]);
+ EBUG_ON(k.k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0);
- if (!k.k || bkey_cmp(iter->pos, k.k->p)) {
- /* hole */
- bkey_init(&iter->k);
- iter->k.p = iter->pos;
- k = (struct bkey_s_c) { &iter->k, NULL };
+ if (next_update &&
+ (!k.k || bpos_cmp(next_update->k.p, k.k->p) <= 0)) {
+ iter->k = next_update->k;
+ k = bkey_i_to_s_c(next_update);
+ }
+ } else {
+ if ((iter->flags & BTREE_ITER_INTENT)) {
+ struct btree_iter *child =
+ btree_iter_child_alloc(iter, _THIS_IP_);
+
+ btree_iter_copy(child, iter);
+ k = bch2_btree_iter_peek(child);
+
+ if (k.k && !bkey_err(k))
+ iter->k = child->k;
+ } else {
+ struct bpos pos = iter->pos;
+
+ k = bch2_btree_iter_peek(iter);
+ iter->pos = pos;
+ }
+
+ if (unlikely(bkey_err(k)))
+ return k;
+ }
+
+ if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) {
+ if (!k.k ||
+ ((iter->flags & BTREE_ITER_ALL_SNAPSHOTS)
+ ? bpos_cmp(iter->pos, k.k->p)
+ : bkey_cmp(iter->pos, k.k->p))) {
+ bkey_init(&iter->k);
+ iter->k.p = iter->pos;
+ k = (struct bkey_s_c) { &iter->k, NULL };
+ }
+ } else {
+ struct bpos next = k.k ? bkey_start_pos(k.k) : POS_MAX;
+
+ if (bkey_cmp(iter->pos, next) < 0) {
+ bkey_init(&iter->k);
+ iter->k.p = iter->pos;
+ bch2_key_resize(&iter->k,
+ min_t(u64, KEY_SIZE_MAX,
+ (next.inode == iter->pos.inode
+ ? next.offset
+ : KEY_OFFSET_MAX) -
+ iter->pos.offset));
+
+ k = (struct bkey_s_c) { &iter->k, NULL };
+ EBUG_ON(!k.k->size);
+ }
}
bch2_btree_iter_verify_entry_exit(iter);
struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *iter)
{
+ struct bkey_i *next_update;
struct bkey_cached *ck;
int ret;
EBUG_ON(btree_iter_type(iter) != BTREE_ITER_CACHED);
bch2_btree_iter_verify(iter);
+ next_update = btree_trans_peek_updates(iter, iter->pos);
+ if (next_update && !bpos_cmp(next_update->k.p, iter->pos))
+ return bkey_i_to_s_c(next_update);
+
ret = btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
/* new transactional stuff: */
+static void btree_iter_child_free(struct btree_iter *iter)
+{
+ struct btree_iter *child = btree_iter_child(iter);
+
+ if (child) {
+ bch2_trans_iter_free(iter->trans, child);
+ iter->child_idx = U8_MAX;
+ }
+}
+
+static struct btree_iter *btree_iter_child_alloc(struct btree_iter *iter,
+ unsigned long ip)
+{
+ struct btree_trans *trans = iter->trans;
+ struct btree_iter *child = btree_iter_child(iter);
+
+ if (!child) {
+ child = btree_trans_iter_alloc(trans);
+ child->ip_allocated = ip;
+ iter->child_idx = child->idx;
+
+ trans->iters_live |= 1ULL << child->idx;
+ trans->iters_touched |= 1ULL << child->idx;
+ }
+
+ return child;
+}
+
static inline void __bch2_trans_iter_free(struct btree_trans *trans,
unsigned idx)
{
+ btree_iter_child_free(&trans->iters[idx]);
+
__bch2_btree_iter_unlock(&trans->iters[idx]);
trans->iters_linked &= ~(1ULL << idx);
trans->iters_live &= ~(1ULL << idx);
static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
{
+ struct btree_iter *iter;
unsigned idx;
if (unlikely(trans->iters_linked ==
btree_trans_iter_alloc_fail(trans);
idx = __ffs64(~trans->iters_linked);
-
+ iter = &trans->iters[idx];
+
+ iter->trans = trans;
+ iter->idx = idx;
+ iter->child_idx = U8_MAX;
+ iter->flags = 0;
+ iter->nodes_locked = 0;
+ iter->nodes_intent_locked = 0;
trans->iters_linked |= 1ULL << idx;
- trans->iters[idx].idx = idx;
- trans->iters[idx].flags = 0;
- return &trans->iters[idx];
+ return iter;
}
-static inline void btree_iter_copy(struct btree_iter *dst,
- struct btree_iter *src)
+static void btree_iter_copy(struct btree_iter *dst, struct btree_iter *src)
{
- unsigned i, idx = dst->idx;
+ unsigned i;
- *dst = *src;
- dst->idx = idx;
- dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT;
+ __bch2_btree_iter_unlock(dst);
+ btree_iter_child_free(dst);
+
+ memcpy(&dst->flags, &src->flags,
+ sizeof(struct btree_iter) - offsetof(struct btree_iter, flags));
for (i = 0; i < BTREE_MAX_DEPTH; i++)
if (btree_node_locked(dst, i))
p = trans->mem + trans->mem_top;
trans->mem_top += size;
+ memset(p, 0, size);
return p;
}
trans->iters_touched &= trans->iters_live;
trans->nr_updates = 0;
- trans->nr_updates2 = 0;
trans->mem_top = 0;
trans->hooks = NULL;
trans->iters = p; p += iters_bytes;
trans->updates = p; p += updates_bytes;
- trans->updates2 = p; p += updates_bytes;
}
void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
bch2_trans_unlock(trans);
#ifdef CONFIG_BCACHEFS_DEBUG
+ if (trans->iters_live) {
+ struct btree_iter *iter;
+
+ trans_for_each_iter(trans, iter)
+ btree_iter_child_free(iter);
+ }
+
if (trans->iters_live) {
struct btree_iter *iter;
return init_srcu_struct(&c->btree_trans_barrier) ?:
mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
sizeof(struct btree_iter) * nr +
- sizeof(struct btree_insert_entry) * nr +
sizeof(struct btree_insert_entry) * nr) ?:
mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
BTREE_TRANS_MEM_MAX);
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
-struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *);
-struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *);
-
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
iter->should_be_locked = false;
}
+static inline struct btree_iter *btree_iter_child(struct btree_iter *iter)
+{
+ return iter->child_idx == U8_MAX ? NULL
+ : iter->trans->iters + iter->child_idx;
+}
+
/* Sort order for locking btree iterators: */
static inline int btree_iter_lock_cmp(const struct btree_iter *l,
const struct btree_iter *r)
* @pos or the first key strictly greater than @pos
*/
#define BTREE_ITER_IS_EXTENTS (1 << 6)
-#define BTREE_ITER_ERROR (1 << 7)
-#define BTREE_ITER_SET_POS_AFTER_COMMIT (1 << 8)
-#define BTREE_ITER_CACHED_NOFILL (1 << 9)
-#define BTREE_ITER_CACHED_NOCREATE (1 << 10)
-#define BTREE_ITER_NOT_EXTENTS (1 << 11)
-#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
+#define BTREE_ITER_NOT_EXTENTS (1 << 7)
+#define BTREE_ITER_ERROR (1 << 8)
+#define BTREE_ITER_SET_POS_AFTER_COMMIT (1 << 9)
+#define BTREE_ITER_CACHED_NOFILL (1 << 10)
+#define BTREE_ITER_CACHED_NOCREATE (1 << 11)
+#define BTREE_ITER_WITH_UPDATES (1 << 12)
+#define BTREE_ITER_ALL_SNAPSHOTS (1 << 13)
enum btree_iter_uptodate {
BTREE_ITER_UPTODATE = 0,
*/
struct btree_iter {
struct btree_trans *trans;
- struct bpos pos;
- /* what we're searching for/what the iterator actually points to: */
- struct bpos real_pos;
- struct bpos pos_after_commit;
+ unsigned long ip_allocated;
+
+ u8 idx;
+ u8 child_idx;
+
+ /* btree_iter_copy starts here: */
+ u16 flags;
+
/* When we're filtering by snapshot, the snapshot ID we're looking for: */
unsigned snapshot;
- u16 flags;
- u8 idx;
+ struct bpos pos;
+ struct bpos real_pos;
+ struct bpos pos_after_commit;
enum btree_id btree_id:4;
enum btree_iter_uptodate uptodate:3;
* bch2_btree_iter_next_slot() can correctly advance pos.
*/
struct bkey k;
- unsigned long ip_allocated;
};
static inline enum btree_iter_type
enum btree_id btree_id:8;
u8 level;
unsigned trans_triggers_run:1;
- unsigned is_extent:1;
struct bkey_i *k;
struct btree_iter *iter;
};
int srcu_idx;
u8 nr_updates;
- u8 nr_updates2;
unsigned used_mempool:1;
unsigned error:1;
unsigned in_traverse_all:1;
struct btree_iter *iters;
struct btree_insert_entry *updates;
- struct btree_insert_entry *updates2;
/* update path: */
struct btree_trans_commit_hook *hooks;
(_i) < (_trans)->updates + (_trans)->nr_updates; \
(_i)++)
-#define trans_for_each_update2(_trans, _i) \
- for ((_i) = (_trans)->updates2; \
- (_i) < (_trans)->updates2 + (_trans)->nr_updates2; \
- (_i)++)
-
#endif /* _BCACHEFS_BTREE_UPDATE_H */
static inline bool same_leaf_as_prev(struct btree_trans *trans,
struct btree_insert_entry *i)
{
- return i != trans->updates2 &&
+ return i != trans->updates &&
iter_l(i[0].iter)->b == iter_l(i[-1].iter)->b;
}
static inline void btree_insert_entry_checks(struct btree_trans *trans,
struct btree_insert_entry *i)
{
- BUG_ON(!i->is_extent && bpos_cmp(i->k->k.p, i->iter->real_pos));
+ BUG_ON(bpos_cmp(i->k->k.p, i->iter->real_pos));
BUG_ON(i->level != i->iter->level);
BUG_ON(i->btree_id != i->iter->btree_id);
}
h = h->next;
}
- trans_for_each_update2(trans, i) {
+ trans_for_each_update(trans, i) {
/* Multiple inserts might go to same leaf: */
if (!same_leaf_as_prev(trans, i))
u64s = 0;
if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
if (bch2_journal_seq_verify)
- trans_for_each_update2(trans, i)
+ trans_for_each_update(trans, i)
i->k->k.version.lo = trans->journal_res.seq;
else if (bch2_inject_invalid_keys)
- trans_for_each_update2(trans, i)
+ trans_for_each_update(trans, i)
i->k->k.version = MAX_VERSION;
}
if (unlikely(c->gc_pos.phase))
bch2_trans_mark_gc(trans);
- trans_for_each_update2(trans, i)
+ trans_for_each_update(trans, i)
do_btree_insert_one(trans, i);
err:
if (marking) {
BUG_ON(iter->level);
- trans_for_each_update2(trans, i) {
+ trans_for_each_update(trans, i) {
if (iter_l(i->iter)->b != b)
continue;
struct btree_iter *iter;
int ret;
- trans_for_each_update2(trans, i) {
+ trans_for_each_update(trans, i) {
struct btree *b;
BUG_ON(!btree_node_intent_locked(i->iter, i->level));
}
}
- trans_for_each_update2(trans, i)
+ trans_for_each_update(trans, i)
BUG_ON(!btree_node_intent_locked(i->iter, i->level));
ret = bch2_journal_preres_get(&c->journal,
}
}
- trans_for_each_update2(trans, i) {
+ trans_for_each_update(trans, i) {
const char *invalid = bch2_bkey_invalid(c,
bkey_i_to_s_c(i->k), i->bkey_type);
if (invalid) {
}
bch2_btree_trans_verify_locks(trans);
- trans_for_each_update2(trans, i)
+ trans_for_each_update(trans, i)
if (!same_leaf_as_prev(trans, i))
bch2_btree_node_lock_for_insert(c,
iter_l(i->iter)->b, i->iter);
ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
- trans_for_each_update2(trans, i)
+ trans_for_each_update(trans, i)
if (!same_leaf_as_prev(trans, i))
bch2_btree_node_unlock_write_inlined(iter_l(i->iter)->b,
i->iter);
return 0;
}
-static void __bch2_trans_update2(struct btree_trans *trans,
- struct btree_insert_entry n)
-{
- struct btree_insert_entry *i;
-
- btree_insert_entry_checks(trans, &n);
-
- EBUG_ON(trans->nr_updates2 >= BTREE_ITER_MAX);
-
- n.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
-
- trans_for_each_update2(trans, i)
- if (btree_insert_entry_cmp(&n, i) <= 0)
- break;
-
- if (i < trans->updates2 + trans->nr_updates2 &&
- !btree_insert_entry_cmp(&n, i))
- *i = n;
- else
- array_insert_item(trans->updates2, trans->nr_updates2,
- i - trans->updates2, n);
-}
-
-static void bch2_trans_update2(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_i *insert)
-{
- __bch2_trans_update2(trans, (struct btree_insert_entry) {
- .bkey_type = __btree_node_type(iter->level, iter->btree_id),
- .btree_id = iter->btree_id,
- .level = iter->level,
- .iter = iter,
- .k = insert,
- });
-}
-
-static int extent_update_to_keys(struct btree_trans *trans,
- struct btree_insert_entry n)
+static int __btree_delete_at(struct btree_trans *trans, enum btree_id btree_id,
+ struct bpos pos, unsigned trigger_flags)
{
+ struct btree_iter *iter;
+ struct bkey_i *update;
int ret;
- ret = bch2_extent_can_insert(trans, n.iter, n.k);
- if (ret)
+ update = bch2_trans_kmalloc(trans, sizeof(struct bkey));
+ if ((ret = PTR_ERR_OR_ZERO(update)))
return ret;
- if (bkey_deleted(&n.k->k))
- return 0;
-
- n.iter = bch2_trans_get_iter(trans, n.iter->btree_id, n.k->k.p,
- BTREE_ITER_INTENT|
- BTREE_ITER_NOT_EXTENTS);
- n.is_extent = false;
+ bkey_init(&update->k);
+ update->k.p = pos;
- __bch2_trans_update2(trans, n);
- bch2_trans_iter_put(trans, n.iter);
+ iter = bch2_trans_get_iter(trans, btree_id, pos,
+ BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_INTENT);
+ bch2_trans_update(trans, iter, update, trigger_flags);
+ bch2_trans_iter_put(trans, iter);
return 0;
}
static int extent_handle_overwrites(struct btree_trans *trans,
- enum btree_id btree_id,
- struct bkey_i *insert)
+ struct btree_insert_entry *i)
{
+ struct bch_fs *c = trans->c;
struct btree_iter *iter, *update_iter;
- struct bpos start = bkey_start_pos(&insert->k);
+ struct bpos start = bkey_start_pos(&i->k->k);
struct bkey_i *update;
struct bkey_s_c k;
int ret = 0;
- iter = bch2_trans_get_iter(trans, btree_id, start,
- BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek_with_updates(iter);
+ iter = bch2_trans_get_iter(trans, i->btree_id, start,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_WITH_UPDATES|
+ BTREE_ITER_NOT_EXTENTS);
+ k = bch2_btree_iter_peek(iter);
+ if (!k.k || (ret = bkey_err(k)))
+ goto out;
- while (k.k && !(ret = bkey_err(k))) {
- if (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0)
- break;
+ if (bch2_bkey_maybe_mergable(k.k, &i->k->k)) {
+ struct bpos l_pos = k.k->p;
+
+ update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+ if ((ret = PTR_ERR_OR_ZERO(update)))
+ goto out;
+
+ bkey_reassemble(update, k);
+ if (bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(i->k))) {
+ ret = __btree_delete_at(trans, i->btree_id, l_pos,
+ i->trigger_flags);
+ if (ret)
+ goto out;
+
+ i->k = update;
+ goto next;
+ }
+ }
+
+ if (!bkey_cmp(k.k->p, bkey_start_pos(&i->k->k)))
+ goto next;
+
+ while (bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) > 0) {
if (bkey_cmp(bkey_start_pos(k.k), start) < 0) {
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
if ((ret = PTR_ERR_OR_ZERO(update)))
- break;
+ goto out;
bkey_reassemble(update, k);
bch2_cut_back(start, update);
- update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p,
+ update_iter = bch2_trans_get_iter(trans, i->btree_id, update->k.p,
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_INTENT);
- bch2_trans_update2(trans, update_iter, update);
+ bch2_trans_update(trans, update_iter, update, i->trigger_flags);
bch2_trans_iter_put(trans, update_iter);
}
- if (bkey_cmp(k.k->p, insert->k.p) < 0 ||
- (!bkey_cmp(k.k->p, insert->k.p) && bkey_deleted(&insert->k))) {
- update = bch2_trans_kmalloc(trans, sizeof(struct bkey));
- if ((ret = PTR_ERR_OR_ZERO(update)))
- break;
-
- bkey_init(&update->k);
- update->k.p = k.k->p;
-
- update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p,
- BTREE_ITER_NOT_EXTENTS|
- BTREE_ITER_INTENT);
- bch2_trans_update2(trans, update_iter, update);
- bch2_trans_iter_put(trans, update_iter);
+ if (bkey_cmp(k.k->p, i->k->k.p) <= 0) {
+ ret = __btree_delete_at(trans, i->btree_id, k.k->p,
+ i->trigger_flags);
+ if (ret)
+ goto out;
}
- if (bkey_cmp(k.k->p, insert->k.p) > 0) {
+ if (bkey_cmp(k.k->p, i->k->k.p) > 0) {
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
if ((ret = PTR_ERR_OR_ZERO(update)))
- break;
+ goto out;
bkey_reassemble(update, k);
- bch2_cut_front(insert->k.p, update);
+ bch2_cut_front(i->k->k.p, update);
- update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p,
+ update_iter = bch2_trans_get_iter(trans, i->btree_id, update->k.p,
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_INTENT);
- bch2_trans_update2(trans, update_iter, update);
+ bch2_trans_update(trans, update_iter, update,
+ i->trigger_flags);
bch2_trans_iter_put(trans, update_iter);
- break;
+ goto out;
}
-
- k = bch2_btree_iter_next_with_updates(iter);
+next:
+ k = bch2_btree_iter_next(iter);
+ if (!k.k || (ret = bkey_err(k)))
+ goto out;
}
+
+ bch2_bkey_merge(c, bkey_i_to_s(i->k), k);
+out:
bch2_trans_iter_put(trans, iter);
return ret;
}
} while (trans_trigger_run);
- /* Turn extents updates into keys: */
- trans_for_each_update(trans, i)
- if (i->is_extent) {
- ret = extent_handle_overwrites(trans, i->btree_id, i->k);
- if (unlikely(ret))
- goto out;
- }
-
trans_for_each_update(trans, i) {
- ret = i->is_extent
- ? extent_update_to_keys(trans, *i)
- : (__bch2_trans_update2(trans, *i), 0);
- if (unlikely(ret))
- goto out;
- }
-
- trans_for_each_update2(trans, i) {
ret = bch2_btree_iter_traverse(i->iter);
if (unlikely(ret)) {
trace_trans_restart_traverse(trans->ip, _RET_IP_,
.bkey_type = __btree_node_type(iter->level, iter->btree_id),
.btree_id = iter->btree_id,
.level = iter->level,
- .is_extent = (iter->flags & BTREE_ITER_IS_EXTENTS) != 0,
.iter = iter,
.k = k
};
+ bool is_extent = (iter->flags & BTREE_ITER_IS_EXTENTS) != 0;
+ int ret = 0;
BUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
#ifdef CONFIG_BCACHEFS_DEBUG
BUG_ON(bkey_cmp(iter->pos,
- n.is_extent ? bkey_start_pos(&k->k) : k->k.p));
+ is_extent ? bkey_start_pos(&k->k) : k->k.p));
trans_for_each_update(trans, i) {
- BUG_ON(bkey_cmp(i->iter->pos,
- i->is_extent ? bkey_start_pos(&i->k->k) : i->k->k.p));
+ BUG_ON(bkey_cmp(i->iter->pos, i->k->k.p));
BUG_ON(i != trans->updates &&
btree_insert_entry_cmp(i - 1, i) >= 0);
}
#endif
- iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
+ if (is_extent) {
+ ret = bch2_extent_can_insert(trans, n.iter, n.k);
+ if (ret)
+ return ret;
+
+ ret = extent_handle_overwrites(trans, &n);
+ if (ret)
+ return ret;
- if (n.is_extent) {
iter->pos_after_commit = k->k.p;
iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT;
+
+ if (bkey_deleted(&n.k->k))
+ return 0;
+
+ n.iter = bch2_trans_get_iter(trans, n.btree_id, n.k->k.p,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_NOT_EXTENTS);
+ bch2_trans_iter_put(trans, n.iter);
}
+ BUG_ON(n.iter->flags & BTREE_ITER_IS_EXTENTS);
+
+ n.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
+
/*
* Pending updates are kept sorted: first, find position of new update,
* then delete/trim any updates the new update overwrites:
*/
- if (!n.is_extent) {
- trans_for_each_update(trans, i)
- if (btree_insert_entry_cmp(&n, i) <= 0)
- break;
-
- if (i < trans->updates + trans->nr_updates &&
- !btree_insert_entry_cmp(&n, i))
- *i = n;
- else
- array_insert_item(trans->updates, trans->nr_updates,
- i - trans->updates, n);
- } else {
- trans_for_each_update(trans, i)
- if (btree_insert_entry_cmp(&n, i) < 0)
- break;
-
- while (i > trans->updates &&
- i[-1].btree_id == n.btree_id &&
- bkey_cmp(bkey_start_pos(&n.k->k),
- bkey_start_pos(&i[-1].k->k)) <= 0) {
- --i;
- array_remove_item(trans->updates, trans->nr_updates,
- i - trans->updates);
- }
-
- if (i > trans->updates &&
- i[-1].btree_id == n.btree_id &&
- bkey_cmp(bkey_start_pos(&n.k->k), i[-1].k->k.p) < 0)
- bch2_cut_back(bkey_start_pos(&n.k->k), i[-1].k);
-
- if (i < trans->updates + trans->nr_updates &&
- i->btree_id == n.btree_id &&
- bkey_cmp(n.k->k.p, bkey_start_pos(&i->k->k)) > 0) {
- if (bkey_cmp(bkey_start_pos(&n.k->k),
- bkey_start_pos(&i->k->k)) > 0) {
- struct btree_insert_entry split = *i;
- int ret;
-
- BUG_ON(trans->nr_updates + 1 >= BTREE_ITER_MAX);
-
- split.k = bch2_trans_kmalloc(trans, bkey_bytes(&i->k->k));
- ret = PTR_ERR_OR_ZERO(split.k);
- if (ret)
- return ret;
-
- bkey_copy(split.k, i->k);
- bch2_cut_back(bkey_start_pos(&n.k->k), split.k);
-
- split.iter = bch2_trans_get_iter(trans, split.btree_id,
- bkey_start_pos(&split.k->k),
- BTREE_ITER_INTENT);
- split.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
- bch2_trans_iter_put(trans, split.iter);
- array_insert_item(trans->updates, trans->nr_updates,
- i - trans->updates, split);
- i++;
- }
-
- /*
- * When we have an extent that overwrites the start of another
- * update, trimming that extent will mean the iterator's
- * position has to change since the iterator position has to
- * match the extent's start pos - but we don't want to change
- * the iterator pos if some other code is using it, so we may
- * need to clone it:
- */
- if (btree_iter_live(trans, i->iter)) {
- i->iter = bch2_trans_copy_iter(trans, i->iter);
-
- i->iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
- bch2_trans_iter_put(trans, i->iter);
- }
-
- bch2_cut_front(n.k->k.p, i->k);
- bch2_btree_iter_set_pos(i->iter, n.k->k.p);
- }
+ trans_for_each_update(trans, i)
+ if (btree_insert_entry_cmp(&n, i) <= 0)
+ break;
+ if (i < trans->updates + trans->nr_updates &&
+ !btree_insert_entry_cmp(&n, i)) {
+ BUG_ON(i->trans_triggers_run);
+ *i = n;
+ } else
array_insert_item(trans->updates, trans->nr_updates,
i - trans->updates, n);
- }
return 0;
}
return 0;
}
-static int __reflink_p_frag_references(struct bkey_s_c_reflink_p p,
- u64 p_start, u64 p_end,
- u64 v_start, u64 v_end)
-{
- if (p_start == p_end)
- return false;
-
- p_start += le64_to_cpu(p.v->idx);
- p_end += le64_to_cpu(p.v->idx);
-
- if (p_end <= v_start)
- return false;
- if (p_start >= v_end)
- return false;
- return true;
-}
-
-static int reflink_p_frag_references(struct bkey_s_c_reflink_p p,
- u64 start, u64 end,
- struct bkey_s_c k)
-{
- return __reflink_p_frag_references(p, start, end,
- bkey_start_offset(k.k),
- k.k->p.offset);
-}
-
static int __bch2_mark_reflink_p(struct bch_fs *c,
struct bkey_s_c_reflink_p p,
u64 idx, unsigned sectors,
{
struct reflink_gc *r;
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
- int frags_referenced;
while (1) {
if (*r_idx >= c->reflink_gc_nr)
(*r_idx)++;
}
- frags_referenced =
- __reflink_p_frag_references(p, 0, front_frag,
- r->offset - r->size, r->offset) +
- __reflink_p_frag_references(p, back_frag, p.k->size,
- r->offset - r->size, r->offset);
-
- if (frags_referenced == 2) {
- BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE_SPLIT));
- add = -add;
- } else if (frags_referenced == 1) {
- BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE));
- add = 0;
- }
-
BUG_ON((s64) r->refcount + add < 0);
r->refcount += add;
return NULL;
}
-static int trans_get_key(struct btree_trans *trans,
- enum btree_id btree_id, struct bpos pos,
- struct btree_iter **iter,
- struct bkey_s_c *k)
-{
- unsigned flags = btree_id != BTREE_ID_alloc
- ? BTREE_ITER_SLOTS
- : BTREE_ITER_CACHED;
- int ret;
-
- *iter = trans_get_update(trans, btree_id, pos, k);
- if (*iter)
- return 1;
-
- *iter = bch2_trans_get_iter(trans, btree_id, pos,
- flags|BTREE_ITER_INTENT);
- *k = __bch2_btree_iter_peek(*iter, flags);
- ret = bkey_err(*k);
- if (ret)
- bch2_trans_iter_put(trans, *iter);
- return ret;
-}
-
static struct bkey_alloc_buf *
bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter,
const struct bch_extent_ptr *ptr,
struct bch_replicas_padded r;
int ret = 0;
- ret = trans_get_key(trans, BTREE_ID_stripes, POS(0, p.ec.idx), &iter, &k);
- if (ret < 0)
- return ret;
+ iter = bch2_trans_get_iter(trans, BTREE_ID_stripes, POS(0, p.ec.idx),
+ BTREE_ITER_INTENT|
+ BTREE_ITER_WITH_UPDATES);
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
if (k.k->type != KEY_TYPE_stripe) {
bch2_fs_inconsistent(c,
(u64) p.ec.idx);
bch2_inconsistent_error(c);
ret = -EIO;
- goto out;
+ goto err;
}
if (!bch2_ptr_matches_stripe(bkey_s_c_to_stripe(k).v, p)) {
"stripe pointer doesn't match stripe %llu",
(u64) p.ec.idx);
ret = -EIO;
- goto out;
+ goto err;
}
s = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
ret = PTR_ERR_OR_ZERO(s);
if (ret)
- goto out;
+ goto err;
bkey_reassemble(&s->k_i, k);
stripe_blockcount_set(&s->v, p.ec.block,
bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i));
r.e.data_type = data_type;
update_replicas_list(trans, &r.e, sectors);
-out:
+err:
bch2_trans_iter_put(trans, iter);
return ret;
}
static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c_reflink_p p,
u64 idx, unsigned sectors,
- unsigned front_frag,
- unsigned back_frag,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_i *n;
__le64 *refcount;
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
- int frags_referenced;
s64 ret;
- ret = trans_get_key(trans, BTREE_ID_reflink,
- POS(0, idx), &iter, &k);
- if (ret < 0)
- return ret;
+ iter = bch2_trans_get_iter(trans, BTREE_ID_reflink, POS(0, idx),
+ BTREE_ITER_INTENT|
+ BTREE_ITER_WITH_UPDATES);
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
sectors = min_t(u64, sectors, k.k->p.offset - idx);
- frags_referenced =
- reflink_p_frag_references(p, 0, front_frag, k) +
- reflink_p_frag_references(p, back_frag, p.k->size, k);
-
- if (frags_referenced == 2) {
- BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE_SPLIT));
- add = -add;
- } else if (frags_referenced == 1) {
- BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE));
- goto out;
- }
-
n = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
ret = PTR_ERR_OR_ZERO(n);
if (ret)
ret = bch2_trans_update(trans, iter, n, 0);
if (ret)
goto err;
-out:
+
ret = sectors;
err:
bch2_trans_iter_put(trans, iter);
s64 sectors, unsigned flags)
{
u64 idx = le64_to_cpu(p.v->idx) + offset;
- unsigned front_frag, back_frag;
s64 ret = 0;
if (sectors < 0)
sectors = -sectors;
- BUG_ON(offset + sectors > p.k->size);
-
- front_frag = offset;
- back_frag = offset + sectors;
+ BUG_ON(offset || sectors != p.k->size);
while (sectors) {
- ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors,
- front_frag, back_frag, flags);
+ ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors, flags);
if (ret < 0)
return ret;
if (!btree_node_type_needs_gc(iter->btree_id))
return 0;
- if (!btree_node_type_is_extents(iter->btree_id)) {
- if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
- old = bch2_btree_iter_peek_slot(iter);
- ret = bkey_err(old);
- if (ret)
- return ret;
- } else {
- struct bkey_cached *ck = (void *) iter->l[0].b;
-
- BUG_ON(!ck->valid);
- old = bkey_i_to_s_c(ck->k);
- }
-
- if (old.k->type == new->k.type) {
- ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
- BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
- } else {
- ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
- BTREE_TRIGGER_INSERT|flags) ?:
- bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
- BTREE_TRIGGER_OVERWRITE|flags);
- }
- } else {
- struct btree_iter *copy;
- struct bkey _old;
-
- EBUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
-
- bkey_init(&_old);
- old = (struct bkey_s_c) { &_old, NULL };
-
- ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
- 0, new->k.size,
- BTREE_TRIGGER_INSERT);
+ if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
+ old = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(old);
if (ret)
return ret;
+ } else {
+ struct bkey_cached *ck = (void *) iter->l[0].b;
- copy = bch2_trans_copy_iter(trans, iter);
-
- for_each_btree_key_continue(copy, 0, old, ret) {
- unsigned offset = 0;
- s64 sectors = -((s64) old.k->size);
-
- flags |= BTREE_TRIGGER_OVERWRITE;
-
- if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
- break;
-
- switch (bch2_extent_overlap(&new->k, old.k)) {
- case BCH_EXTENT_OVERLAP_ALL:
- offset = 0;
- sectors = -((s64) old.k->size);
- break;
- case BCH_EXTENT_OVERLAP_BACK:
- offset = bkey_start_offset(&new->k) -
- bkey_start_offset(old.k);
- sectors = bkey_start_offset(&new->k) -
- old.k->p.offset;
- break;
- case BCH_EXTENT_OVERLAP_FRONT:
- offset = 0;
- sectors = bkey_start_offset(old.k) -
- new->k.p.offset;
- break;
- case BCH_EXTENT_OVERLAP_MIDDLE:
- offset = bkey_start_offset(&new->k) -
- bkey_start_offset(old.k);
- sectors = -((s64) new->k.size);
- flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
- break;
- }
-
- BUG_ON(sectors >= 0);
+ BUG_ON(!ck->valid);
+ old = bkey_i_to_s_c(ck->k);
+ }
- ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
- offset, sectors, flags);
- if (ret)
- break;
- }
- bch2_trans_iter_put(trans, copy);
+ if (old.k->type == new->k.type &&
+ !btree_node_type_is_extents(iter->btree_id)) {
+ ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
+ BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
+ } else {
+ ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, new->k.size,
+ BTREE_TRIGGER_INSERT|flags) ?:
+ bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, -((s64) old.k->size),
+ BTREE_TRIGGER_OVERWRITE|flags);
}
return ret;
this_cpu_add(ca->io_done->sectors[rw][data_type], buf->size);
while (offset < bytes) {
- unsigned nr_iovecs = min_t(size_t, BIO_MAX_PAGES,
+ unsigned nr_iovecs = min_t(size_t, BIO_MAX_VECS,
DIV_ROUND_UP(bytes, PAGE_SIZE));
unsigned b = min_t(size_t, bytes - offset,
nr_iovecs << PAGE_SHIFT);
bch2_bkey_ptrs_to_text(out, c, k);
}
-enum merge_result bch2_extent_merge(struct bch_fs *c,
- struct bkey_s _l, struct bkey_s _r)
+bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
{
- struct bkey_s_extent l = bkey_s_to_extent(_l);
- struct bkey_s_extent r = bkey_s_to_extent(_r);
- union bch_extent_entry *en_l = l.v->start;
- union bch_extent_entry *en_r = r.v->start;
- struct bch_extent_crc_unpacked crc_l, crc_r;
-
- if (bkey_val_u64s(l.k) != bkey_val_u64s(r.k))
- return BCH_MERGE_NOMERGE;
-
- crc_l = bch2_extent_crc_unpack(l.k, NULL);
-
- extent_for_each_entry(l, en_l) {
- en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
+ struct bkey_ptrs l_ptrs = bch2_bkey_ptrs(l);
+ struct bkey_ptrs_c r_ptrs = bch2_bkey_ptrs_c(r);
+ union bch_extent_entry *en_l;
+ const union bch_extent_entry *en_r;
+ struct extent_ptr_decoded lp, rp;
+ bool use_right_ptr;
+ struct bch_dev *ca;
+ en_l = l_ptrs.start;
+ en_r = r_ptrs.start;
+ while (en_l < l_ptrs.end && en_r < r_ptrs.end) {
if (extent_entry_type(en_l) != extent_entry_type(en_r))
- return BCH_MERGE_NOMERGE;
-
- switch (extent_entry_type(en_l)) {
- case BCH_EXTENT_ENTRY_ptr: {
- const struct bch_extent_ptr *lp = &en_l->ptr;
- const struct bch_extent_ptr *rp = &en_r->ptr;
- struct bch_dev *ca;
-
- if (lp->offset + crc_l.compressed_size != rp->offset ||
- lp->dev != rp->dev ||
- lp->gen != rp->gen)
- return BCH_MERGE_NOMERGE;
-
- /* We don't allow extents to straddle buckets: */
- ca = bch_dev_bkey_exists(c, lp->dev);
-
- if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
- return BCH_MERGE_NOMERGE;
-
- break;
- }
- case BCH_EXTENT_ENTRY_stripe_ptr:
- if (en_l->stripe_ptr.block != en_r->stripe_ptr.block ||
- en_l->stripe_ptr.idx != en_r->stripe_ptr.idx)
- return BCH_MERGE_NOMERGE;
- break;
- case BCH_EXTENT_ENTRY_crc32:
- case BCH_EXTENT_ENTRY_crc64:
- case BCH_EXTENT_ENTRY_crc128:
- crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
- crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
-
- if (crc_l.csum_type != crc_r.csum_type ||
- crc_l.compression_type != crc_r.compression_type ||
- crc_l.nonce != crc_r.nonce)
- return BCH_MERGE_NOMERGE;
-
- if (crc_l.offset + crc_l.live_size != crc_l.compressed_size ||
- crc_r.offset)
- return BCH_MERGE_NOMERGE;
-
- if (!bch2_checksum_mergeable(crc_l.csum_type))
- return BCH_MERGE_NOMERGE;
+ return false;
- if (crc_is_compressed(crc_l))
- return BCH_MERGE_NOMERGE;
+ en_l = extent_entry_next(en_l);
+ en_r = extent_entry_next(en_r);
+ }
- if (crc_l.csum_type &&
- crc_l.uncompressed_size +
- crc_r.uncompressed_size > c->sb.encoded_extent_max)
- return BCH_MERGE_NOMERGE;
+ if (en_l < l_ptrs.end || en_r < r_ptrs.end)
+ return false;
- if (crc_l.uncompressed_size + crc_r.uncompressed_size >
+ en_l = l_ptrs.start;
+ en_r = r_ptrs.start;
+ lp.crc = bch2_extent_crc_unpack(l.k, NULL);
+ rp.crc = bch2_extent_crc_unpack(r.k, NULL);
+
+ while (__bkey_ptr_next_decode(l.k, l_ptrs.end, lp, en_l) &&
+ __bkey_ptr_next_decode(r.k, r_ptrs.end, rp, en_r)) {
+ if (lp.ptr.offset + lp.crc.offset + lp.crc.live_size !=
+ rp.ptr.offset + rp.crc.offset ||
+ lp.ptr.dev != rp.ptr.dev ||
+ lp.ptr.gen != rp.ptr.gen ||
+ lp.has_ec != rp.has_ec)
+ return false;
+
+ /* Extents may not straddle buckets: */
+ ca = bch_dev_bkey_exists(c, lp.ptr.dev);
+ if (PTR_BUCKET_NR(ca, &lp.ptr) != PTR_BUCKET_NR(ca, &rp.ptr))
+ return false;
+
+ if (lp.has_ec != rp.has_ec ||
+ (lp.has_ec &&
+ (lp.ec.block != rp.ec.block ||
+ lp.ec.redundancy != rp.ec.redundancy ||
+ lp.ec.idx != rp.ec.idx)))
+ return false;
+
+ if (lp.crc.compression_type != rp.crc.compression_type ||
+ lp.crc.nonce != rp.crc.nonce)
+ return false;
+
+ if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <=
+ lp.crc.uncompressed_size) {
+ /* can use left extent's crc entry */
+ } else if (lp.crc.live_size <= rp.crc.offset ) {
+ /* can use right extent's crc entry */
+ } else {
+ /* check if checksums can be merged: */
+ if (lp.crc.csum_type != rp.crc.csum_type ||
+ lp.crc.nonce != rp.crc.nonce ||
+ crc_is_compressed(lp.crc) ||
+ !bch2_checksum_mergeable(lp.crc.csum_type))
+ return false;
+
+ if (lp.crc.offset + lp.crc.live_size != lp.crc.compressed_size ||
+ rp.crc.offset)
+ return false;
+
+ if (lp.crc.csum_type &&
+ lp.crc.uncompressed_size +
+ rp.crc.uncompressed_size > c->sb.encoded_extent_max)
+ return false;
+
+ if (lp.crc.uncompressed_size + rp.crc.uncompressed_size >
bch2_crc_field_size_max[extent_entry_type(en_l)])
- return BCH_MERGE_NOMERGE;
-
- break;
- default:
- return BCH_MERGE_NOMERGE;
+ return false;
}
- }
-
- extent_for_each_entry(l, en_l) {
- struct bch_extent_crc_unpacked crc_l, crc_r;
-
- en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
-
- if (!extent_entry_is_crc(en_l))
- continue;
- crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
- crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
-
- crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
- crc_l.csum,
- crc_r.csum,
- crc_r.uncompressed_size << 9);
+ en_l = extent_entry_next(en_l);
+ en_r = extent_entry_next(en_r);
+ }
- crc_l.uncompressed_size += crc_r.uncompressed_size;
- crc_l.compressed_size += crc_r.compressed_size;
+ use_right_ptr = false;
+ en_l = l_ptrs.start;
+ en_r = r_ptrs.start;
+ while (en_l < l_ptrs.end) {
+ if (extent_entry_type(en_l) == BCH_EXTENT_ENTRY_ptr &&
+ use_right_ptr)
+ en_l->ptr = en_r->ptr;
+
+ if (extent_entry_is_crc(en_l)) {
+ struct bch_extent_crc_unpacked crc_l =
+ bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
+ struct bch_extent_crc_unpacked crc_r =
+ bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
+
+ use_right_ptr = false;
+
+ if (crc_l.offset + crc_l.live_size + crc_r.live_size <=
+ crc_l.uncompressed_size) {
+ /* can use left extent's crc entry */
+ } else if (crc_l.live_size <= crc_r.offset ) {
+ /* can use right extent's crc entry */
+ crc_r.offset -= crc_l.live_size;
+ bch2_extent_crc_pack(entry_to_crc(en_l), crc_r,
+ extent_entry_type(en_l));
+ use_right_ptr = true;
+ } else {
+ crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
+ crc_l.csum,
+ crc_r.csum,
+ crc_r.uncompressed_size << 9);
+
+ crc_l.uncompressed_size += crc_r.uncompressed_size;
+ crc_l.compressed_size += crc_r.compressed_size;
+ bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
+ extent_entry_type(en_l));
+ }
+ }
- bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
- extent_entry_type(en_l));
+ en_l = extent_entry_next(en_l);
+ en_r = extent_entry_next(en_r);
}
bch2_key_resize(l.k, l.k->size + r.k->size);
-
- return BCH_MERGE_MERGE;
+ return true;
}
/* KEY_TYPE_reservation: */
r.v->nr_replicas);
}
-enum merge_result bch2_reservation_merge(struct bch_fs *c,
- struct bkey_s _l, struct bkey_s _r)
+bool bch2_reservation_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
{
struct bkey_s_reservation l = bkey_s_to_reservation(_l);
- struct bkey_s_reservation r = bkey_s_to_reservation(_r);
+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(_r);
if (l.v->generation != r.v->generation ||
l.v->nr_replicas != r.v->nr_replicas)
- return BCH_MERGE_NOMERGE;
-
- if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
- bch2_key_resize(l.k, KEY_SIZE_MAX);
- bch2_cut_front_s(l.k->p, r.s);
- return BCH_MERGE_PARTIAL;
- }
+ return false;
bch2_key_resize(l.k, l.k->size + r.k->size);
-
- return BCH_MERGE_MERGE;
+ return true;
}
/* Extent checksum entries: */
const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-enum merge_result bch2_extent_merge(struct bch_fs *,
- struct bkey_s, struct bkey_s);
+bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
#define bch2_bkey_ops_extent (struct bkey_ops) { \
.key_invalid = bch2_extent_invalid, \
const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-enum merge_result bch2_reservation_merge(struct bch_fs *,
- struct bkey_s, struct bkey_s);
+bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
#define bch2_bkey_ops_reservation (struct bkey_ops) { \
.key_invalid = bch2_reservation_invalid, \
unsigned n = min_t(unsigned,
readpages_iter.nr_pages -
readpages_iter.idx,
- BIO_MAX_PAGES);
+ BIO_MAX_VECS);
struct bch_read_bio *rbio =
rbio_init(bio_alloc_bioset(GFP_NOFS, n, &c->bio_read),
opts);
{
struct bch_write_op *op;
- w->io = container_of(bio_alloc_bioset(GFP_NOFS,
- BIO_MAX_PAGES,
+ w->io = container_of(bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS,
&c->writepage_bioset),
struct bch_writepage_io, op.wbio.bio);
(w->io->op.res.nr_replicas != nr_replicas_this_write ||
bio_full(&w->io->op.wbio.bio, PAGE_SIZE) ||
w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >=
- (BIO_MAX_PAGES * PAGE_SIZE) ||
+ (BIO_MAX_VECS * PAGE_SIZE) ||
bio_end_sector(&w->io->op.wbio.bio) != sector))
bch2_writepage_do_io(w);
iter->count -= shorten;
bio = bio_alloc_bioset(GFP_KERNEL,
- iov_iter_npages(iter, BIO_MAX_PAGES),
+ iov_iter_npages(iter, BIO_MAX_VECS),
&c->dio_read_bioset);
bio->bi_end_io = bch2_direct_IO_read_endio;
goto start;
while (iter->count) {
bio = bio_alloc_bioset(GFP_KERNEL,
- iov_iter_npages(iter, BIO_MAX_PAGES),
+ iov_iter_npages(iter, BIO_MAX_VECS),
&c->bio_read);
bio->bi_end_io = bch2_direct_IO_read_split_endio;
start:
}
bio = bio_alloc_bioset(GFP_KERNEL,
- iov_iter_npages(iter, BIO_MAX_PAGES),
+ iov_iter_npages(iter, BIO_MAX_VECS),
&c->dio_write_bioset);
dio = container_of(bio, struct dio_write, op.wbio.bio);
init_completion(&dio->done);
return ret;
truncate_setsize(&inode->v, iattr->ia_size);
- setattr_copy(&inode->v, iattr);
+ /* ATTR_MODE will never be set here, ns argument isn't needed: */
+ setattr_copy(NULL, &inode->v, iattr);
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode_size(c, inode, inode->v.i_size,
if (unlikely(ret))
goto err;
- setattr_copy(&inode->v, iattr);
+ /* ATTR_MODE will never be set here, ns argument isn't needed: */
+ setattr_copy(NULL, &inode->v, iattr);
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL,
return ret;
inode_lock(&inode->v);
- if (!inode_owner_or_capable(&inode->v)) {
+ if (!inode_owner_or_capable(file_mnt_user_ns(file), &inode->v)) {
ret = -EACCES;
goto setflags_out;
}
return ret;
inode_lock(&inode->v);
- if (!inode_owner_or_capable(&inode->v)) {
+ if (!inode_owner_or_capable(file_mnt_user_ns(file), &inode->v)) {
ret = -EACCES;
goto err;
}
}
static struct bch_inode_info *
-__bch2_create(struct bch_inode_info *dir, struct dentry *dentry,
+__bch2_create(struct user_namespace *mnt_userns,
+ struct bch_inode_info *dir, struct dentry *dentry,
umode_t mode, dev_t rdev, bool tmpfile)
{
struct bch_fs *c = dir->v.i_sb->s_fs_info;
- struct user_namespace *ns = dir->v.i_sb->s_user_ns;
struct btree_trans trans;
struct bch_inode_unpacked dir_u;
struct bch_inode_info *inode, *old;
ret = bch2_create_trans(&trans, dir->v.i_ino, &dir_u, &inode_u,
!tmpfile ? &dentry->d_name : NULL,
- from_kuid(ns, current_fsuid()),
- from_kgid(ns, current_fsgid()),
+ from_kuid(mnt_userns, current_fsuid()),
+ from_kgid(mnt_userns, current_fsgid()),
mode, rdev,
default_acl, acl) ?:
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
return d_splice_alias(vinode, dentry);
}
-static int bch2_mknod(struct inode *vdir, struct dentry *dentry,
+static int bch2_mknod(struct user_namespace *mnt_userns,
+ struct inode *vdir, struct dentry *dentry,
umode_t mode, dev_t rdev)
{
struct bch_inode_info *inode =
- __bch2_create(to_bch_ei(vdir), dentry, mode, rdev, false);
+ __bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, rdev, false);
if (IS_ERR(inode))
return PTR_ERR(inode);
return 0;
}
-static int bch2_create(struct inode *vdir, struct dentry *dentry,
+static int bch2_create(struct user_namespace *mnt_userns,
+ struct inode *vdir, struct dentry *dentry,
umode_t mode, bool excl)
{
- return bch2_mknod(vdir, dentry, mode|S_IFREG, 0);
+ return bch2_mknod(mnt_userns, vdir, dentry, mode|S_IFREG, 0);
}
static int __bch2_link(struct bch_fs *c,
return ret;
}
-static int bch2_symlink(struct inode *vdir, struct dentry *dentry,
+static int bch2_symlink(struct user_namespace *mnt_userns,
+ struct inode *vdir, struct dentry *dentry,
const char *symname)
{
struct bch_fs *c = vdir->i_sb->s_fs_info;
struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
int ret;
- inode = __bch2_create(dir, dentry, S_IFLNK|S_IRWXUGO, 0, true);
+ inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0, true);
if (unlikely(IS_ERR(inode)))
return PTR_ERR(inode);
return ret;
}
-static int bch2_mkdir(struct inode *vdir, struct dentry *dentry, umode_t mode)
+static int bch2_mkdir(struct user_namespace *mnt_userns,
+ struct inode *vdir, struct dentry *dentry, umode_t mode)
{
- return bch2_mknod(vdir, dentry, mode|S_IFDIR, 0);
+ return bch2_mknod(mnt_userns, vdir, dentry, mode|S_IFDIR, 0);
}
-static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry,
+static int bch2_rename2(struct user_namespace *mnt_userns,
+ struct inode *src_vdir, struct dentry *src_dentry,
struct inode *dst_vdir, struct dentry *dst_dentry,
unsigned flags)
{
return ret;
}
-void bch2_setattr_copy(struct bch_inode_info *inode,
+void bch2_setattr_copy(struct user_namespace *mnt_userns,
+ struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
if (ia_valid & ATTR_UID)
- bi->bi_uid = from_kuid(c->vfs_sb->s_user_ns, attr->ia_uid);
+ bi->bi_uid = from_kuid(mnt_userns, attr->ia_uid);
if (ia_valid & ATTR_GID)
- bi->bi_gid = from_kgid(c->vfs_sb->s_user_ns, attr->ia_gid);
+ bi->bi_gid = from_kgid(mnt_userns, attr->ia_gid);
if (ia_valid & ATTR_ATIME)
bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
: inode->v.i_gid;
if (!in_group_p(gid) &&
- !capable_wrt_inode_uidgid(&inode->v, CAP_FSETID))
+ !capable_wrt_inode_uidgid(mnt_userns, &inode->v, CAP_FSETID))
mode &= ~S_ISGID;
bi->bi_mode = mode;
}
}
-static int bch2_setattr_nonsize(struct bch_inode_info *inode,
+static int bch2_setattr_nonsize(struct user_namespace *mnt_userns,
+ struct bch_inode_info *inode,
struct iattr *attr)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
if (ret)
goto btree_err;
- bch2_setattr_copy(inode, &inode_u, attr);
+ bch2_setattr_copy(mnt_userns, inode, &inode_u, attr);
if (attr->ia_valid & ATTR_MODE) {
ret = bch2_acl_chmod(&trans, &inode_u, inode_u.bi_mode, &acl);
return ret;
}
-static int bch2_getattr(const struct path *path, struct kstat *stat,
+static int bch2_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned query_flags)
{
struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
return 0;
}
-static int bch2_setattr(struct dentry *dentry, struct iattr *iattr)
+static int bch2_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *iattr)
{
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
int ret;
lockdep_assert_held(&inode->v.i_rwsem);
- ret = setattr_prepare(dentry, iattr);
+ ret = setattr_prepare(mnt_userns, dentry, iattr);
if (ret)
return ret;
return iattr->ia_valid & ATTR_SIZE
? bch2_truncate(inode, iattr)
- : bch2_setattr_nonsize(inode, iattr);
+ : bch2_setattr_nonsize(mnt_userns, inode, iattr);
}
-static int bch2_tmpfile(struct inode *vdir, struct dentry *dentry, umode_t mode)
+static int bch2_tmpfile(struct user_namespace *mnt_userns,
+ struct inode *vdir, struct dentry *dentry, umode_t mode)
{
struct bch_inode_info *inode =
- __bch2_create(to_bch_ei(vdir), dentry, mode, 0, true);
+ __bch2_create(mnt_userns, to_bch_ei(vdir), dentry, mode, 0, true);
if (IS_ERR(inode))
return PTR_ERR(inode);
pr_buf(out, "idx %llu", le64_to_cpu(p.v->idx));
}
-enum merge_result bch2_reflink_p_merge(struct bch_fs *c,
- struct bkey_s _l, struct bkey_s _r)
+bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
{
struct bkey_s_reflink_p l = bkey_s_to_reflink_p(_l);
- struct bkey_s_reflink_p r = bkey_s_to_reflink_p(_r);
+ struct bkey_s_c_reflink_p r = bkey_s_c_to_reflink_p(_r);
- if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx))
- return BCH_MERGE_NOMERGE;
+ /*
+ * Disabled for now, the triggers code needs to be reworked for merging
+ * of reflink pointers to work:
+ */
+ return false;
- if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
- bch2_key_resize(l.k, KEY_SIZE_MAX);
- bch2_cut_front_s(l.k->p, _r);
- return BCH_MERGE_PARTIAL;
- }
+ if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx))
+ return false;
bch2_key_resize(l.k, l.k->size + r.k->size);
-
- return BCH_MERGE_MERGE;
+ return true;
}
/* indirect extents */
bch2_bkey_ptrs_to_text(out, c, k);
}
+bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
+{
+ struct bkey_s_reflink_v l = bkey_s_to_reflink_v(_l);
+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(_r);
+
+ return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r);
+}
+
/* indirect inline data */
const char *bch2_indirect_inline_data_invalid(const struct bch_fs *c,
/* rewind iter to start of hole, if necessary: */
bch2_btree_iter_set_pos(reflink_iter, bkey_start_pos(k.k));
- r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_val_bytes(&orig->k));
+ r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k));
ret = PTR_ERR_OR_ZERO(r_v);
if (ret)
goto err;
if (ret)
goto err;
- r_p = bch2_trans_kmalloc(trans, sizeof(*r_p));
- if (IS_ERR(r_p)) {
- ret = PTR_ERR(r_p);
- goto err;
- }
-
orig->k.type = KEY_TYPE_reflink_p;
r_p = bkey_i_to_reflink_p(orig);
set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
const char *bch2_reflink_p_invalid(const struct bch_fs *, struct bkey_s_c);
void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
-enum merge_result bch2_reflink_p_merge(struct bch_fs *,
- struct bkey_s, struct bkey_s);
+bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
#define bch2_bkey_ops_reflink_p (struct bkey_ops) { \
.key_invalid = bch2_reflink_p_invalid, \
}
static int bch2_xattr_set_handler(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *vinode,
const char *name, const void *value,
size_t size, int flags)
}
static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *vinode,
const char *name, const void *value,
size_t size, int flags)