#include "bcachefs.h"
#include "btree_key_cache.h"
+#include "btree_write_buffer.h"
#include "bkey_methods.h"
#include "btree_update.h"
#include "buckets.h"
+#include "compress.h"
#include "error.h"
#include "extents.h"
#include "extent_update.h"
#include "inode.h"
#include "str_hash.h"
+#include "snapshot.h"
#include "subvolume.h"
#include "varint.h"
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
struct bch_inode_unpacked unpacked;
- int ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i),
- &unpacked);
+ ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i), &unpacked);
BUG_ON(ret);
BUG_ON(unpacked.bi_inum != inode->bi_inum);
BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed);
return bch2_inode_unpack_slowpath(k, unpacked);
}
-int bch2_inode_peek(struct btree_trans *trans,
+static int bch2_inode_peek_nowarn(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_inode_unpacked *inode,
subvol_inum inum, unsigned flags)
return ret;
}
+int bch2_inode_peek(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_inode_unpacked *inode,
+ subvol_inum inum, unsigned flags)
+{
+ int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags);
+ bch_err_msg(trans->c, ret, "looking up inum %u:%llu:", inum.subvol, inum.inum);
+ return ret;
+}
+
int bch2_inode_write(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_inode_unpacked *inode)
return -BCH_ERR_invalid_bkey;
}
- if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) {
- prt_printf(err, "invalid data checksum type (%u >= %u)",
- unpacked.bi_compression, BCH_COMPRESSION_OPT_NR + 1);
+ if (unpacked.bi_compression &&
+ !bch2_compression_opt_valid(unpacked.bi_compression - 1)) {
+ prt_printf(err, "invalid compression opt %u",
+ unpacked.bi_compression - 1);
return -BCH_ERR_invalid_bkey;
}
__bch2_inode_unpacked_to_text(out, &inode);
}
+static inline u64 bkey_inode_flags(struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_inode:
+ return le32_to_cpu(bkey_s_c_to_inode(k).v->bi_flags);
+ case KEY_TYPE_inode_v2:
+ return le64_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_flags);
+ case KEY_TYPE_inode_v3:
+ return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_flags);
+ default:
+ return 0;
+ }
+}
+
+static inline bool bkey_is_deleted_inode(struct bkey_s_c k)
+{
+ return bkey_inode_flags(k) & BCH_INODE_UNLINKED;
+}
+
int bch2_trans_mark_inode(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c old,
unsigned flags)
{
int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k);
+ bool old_deleted = bkey_is_deleted_inode(old);
+ bool new_deleted = bkey_is_deleted_inode(bkey_i_to_s_c(new));
if (nr) {
int ret = bch2_replicas_deltas_realloc(trans, 0);
d->nr_inodes += nr;
}
+ if (old_deleted != new_deleted) {
+ int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new->k.p, new_deleted);
+ if (ret)
+ return ret;
+ }
+
return 0;
}
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_i delete;
+ struct bpos end = POS(inum.inum, U64_MAX);
u32 snapshot;
int ret = 0;
* extent iterator:
*/
bch2_trans_iter_init(trans, &iter, id, POS(inum.inum, 0),
- BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
+ BTREE_ITER_INTENT);
while (1) {
bch2_trans_begin(trans);
bch2_btree_iter_set_snapshot(&iter, snapshot);
- k = bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX));
+ k = bch2_btree_iter_peek_upto(&iter, end);
ret = bkey_err(k);
if (ret)
goto err;
bkey_init(&delete.k);
delete.k.p = iter.pos;
+ if (iter.flags & BTREE_ITER_IS_EXTENTS)
+ bch2_key_resize(&delete.k,
+ bpos_min(end, k.k->p).offset -
+ iter.pos.offset);
+
ret = bch2_trans_update(trans, &iter, &delete, 0) ?:
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
{
- struct btree_trans trans;
+ struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct bkey_i_inode_generation delete;
struct bch_inode_unpacked inode_u;
u32 snapshot;
int ret;
- bch2_trans_init(&trans, c, 0, 1024);
-
/*
* If this was a directory, there shouldn't be any real dirents left -
* but there could be whiteouts (from hash collisions) that we should
* XXX: the dirent could ideally would delete whiteouts when they're no
* longer needed
*/
- ret = bch2_inode_delete_keys(&trans, inum, BTREE_ID_extents) ?:
- bch2_inode_delete_keys(&trans, inum, BTREE_ID_xattrs) ?:
- bch2_inode_delete_keys(&trans, inum, BTREE_ID_dirents);
+ ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?:
+ bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?:
+ bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents);
if (ret)
goto err;
retry:
- bch2_trans_begin(&trans);
+ bch2_trans_begin(trans);
- ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
- k = bch2_bkey_get_iter(&trans, &iter, BTREE_ID_inodes,
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
SPOS(0, inum.inum, snapshot),
BTREE_ITER_INTENT|BTREE_ITER_CACHED);
ret = bkey_err(k);
goto err;
if (!bkey_is_inode(k.k)) {
- bch2_fs_inconsistent(trans.c,
+ bch2_fs_inconsistent(c,
"inode %llu:%u not found when deleting",
inum.inum, snapshot);
ret = -EIO;
delete.k.p = iter.pos;
delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
- ret = bch2_trans_update(&trans, &iter, &delete.k_i, 0) ?:
- bch2_trans_commit(&trans, NULL, NULL,
+ ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
err:
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_trans_iter_exit(trans, &iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- bch2_trans_exit(&trans);
+ bch2_trans_put(trans);
+ return ret;
+}
+
+int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *trans,
+ subvol_inum inum,
+ struct bch_inode_unpacked *inode)
+{
+ struct btree_iter iter;
+ int ret;
+
+ ret = bch2_inode_peek_nowarn(trans, &iter, inode, inum, 0);
+ if (!ret)
+ bch2_trans_iter_exit(trans, &iter);
return ret;
}
struct bch_inode_unpacked *inode)
{
return bch2_trans_do(c, NULL, NULL, 0,
- bch2_inode_find_by_inum_trans(&trans, inum, inode));
+ bch2_inode_find_by_inum_trans(trans, inum, inode));
}
int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0;
}
+int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts)
+{
+ struct bch_inode_unpacked inode;
+ int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode));
+
+ if (ret)
+ return ret;
+
+ bch2_inode_opts_get(opts, trans->c, &inode);
+ return 0;
+}
+
int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
{
struct bch_fs *c = trans->c;
return ret ?: -BCH_ERR_transaction_restart_nested;
}
+
+static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bch_inode_unpacked inode;
+ int ret;
+
+ if (bch2_snapshot_is_internal_node(c, pos.snapshot))
+ return 0;
+
+ if (!fsck_err_on(c->sb.clean, c,
+ "filesystem marked as clean but have deleted inode %llu:%u",
+ pos.offset, pos.snapshot))
+ return 0;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, pos, BTREE_ITER_CACHED);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode;
+ if (fsck_err_on(!bkey_is_inode(k.k), c,
+ "nonexistent inode %llu:%u in deleted_inodes btree",
+ pos.offset, pos.snapshot))
+ goto delete;
+
+ ret = bch2_inode_unpack(k, &inode);
+ if (ret)
+ goto err;
+
+ if (fsck_err_on(S_ISDIR(inode.bi_mode), c,
+ "directory %llu:%u in deleted_inodes btree",
+ pos.offset, pos.snapshot))
+ goto delete;
+
+ if (fsck_err_on(!(inode.bi_flags & BCH_INODE_UNLINKED), c,
+ "non-deleted inode %llu:%u in deleted_inodes btree",
+ pos.offset, pos.snapshot))
+ goto delete;
+
+ return 1;
+err:
+fsck_err:
+ return ret;
+delete:
+ return bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false);
+}
+
+int bch2_delete_dead_inodes(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ ret = bch2_btree_write_buffer_flush_sync(trans);
+ if (ret)
+ goto err;
+
+ /*
+ * Weird transaction restart handling here because on successful delete,
+ * bch2_inode_rm_snapshot() will return a nested transaction restart,
+ * but we can't retry because the btree write buffer won't have been
+ * flushed and we'd spin:
+ */
+ for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+ ret = lockrestart_do(trans, may_delete_deleted_inode(trans, k.k->p));
+ if (ret < 0)
+ break;
+
+ if (ret) {
+ if (!test_bit(BCH_FS_RW, &c->flags)) {
+ bch2_trans_unlock(trans);
+ bch2_fs_lazy_rw(c);
+ }
+
+ ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ break;
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ bch2_trans_put(trans);
+
+ return ret;
+}