#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "bkey_on_stack.h"
#include "btree_update.h"
#include "buckets.h"
#include "clock.h"
#include "error.h"
#include "extents.h"
+#include "extent_update.h"
#include "fs.h"
#include "fs-io.h"
#include "fsck.h"
#include <linux/migrate.h>
#include <linux/mmu_context.h>
#include <linux/pagevec.h>
+#include <linux/rmap.h>
#include <linux/sched/signal.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/uio.h>
#include <trace/events/bcachefs.h>
#include <trace/events/writeback.h>
+static inline struct address_space *faults_disabled_mapping(void)
+{
+ return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL);
+}
+
+static inline void set_fdm_dropped_locks(void)
+{
+ current->faults_disabled_mapping =
+ (void *) (((unsigned long) current->faults_disabled_mapping)|1);
+}
+
+static inline bool fdm_dropped_locks(void)
+{
+ return ((unsigned long) current->faults_disabled_mapping) & 1;
+}
+
struct quota_res {
u64 sectors;
};
sync:1,
free_iov:1;
struct quota_res quota_res;
+ u64 written;
struct iov_iter iter;
struct iovec inline_vecs[2];
/* for newly allocated pages: */
static void __bch2_page_state_release(struct page *page)
{
- struct bch_page_state *s = __bch2_page_state(page);
-
- if (!s)
- return;
-
- ClearPagePrivate(page);
- set_page_private(page, 0);
- put_page(page);
- kfree(s);
+ kfree(detach_page_private(page));
}
static void bch2_page_state_release(struct page *page)
{
- struct bch_page_state *s = bch2_page_state(page);
-
- if (!s)
- return;
-
- ClearPagePrivate(page);
- set_page_private(page, 0);
- put_page(page);
- kfree(s);
+ EBUG_ON(!PageLocked(page));
+ __bch2_page_state_release(page);
}
/* for newly allocated pages: */
return NULL;
spin_lock_init(&s->lock);
- /*
- * migrate_page_move_mapping() assumes that pages with private data
- * have their count elevated by 1.
- */
- get_page(page);
- set_page_private(page, (unsigned long) s);
- SetPagePrivate(page);
+ attach_page_private(page, s);
return s;
}
vm_fault_t bch2_page_fault(struct vm_fault *vmf)
{
struct file *file = vmf->vma->vm_file;
+ struct address_space *mapping = file->f_mapping;
+ struct address_space *fdm = faults_disabled_mapping();
struct bch_inode_info *inode = file_bch_inode(file);
int ret;
+ if (fdm == mapping)
+ return VM_FAULT_SIGBUS;
+
+ /* Lock ordering: */
+ if (fdm > mapping) {
+ struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
+
+ if (bch2_pagecache_add_tryget(&inode->ei_pagecache_lock))
+ goto got_lock;
+
+ bch2_pagecache_block_put(&fdm_host->ei_pagecache_lock);
+
+ bch2_pagecache_add_get(&inode->ei_pagecache_lock);
+ bch2_pagecache_add_put(&inode->ei_pagecache_lock);
+
+ bch2_pagecache_block_get(&fdm_host->ei_pagecache_lock);
+
+ /* Signal that lock has been dropped: */
+ set_fdm_dropped_locks();
+ return VM_FAULT_SIGBUS;
+ }
+
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
+got_lock:
ret = filemap_fault(vmf);
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
EBUG_ON(!PageLocked(page));
EBUG_ON(!PageLocked(newpage));
- ret = migrate_page_move_mapping(mapping, newpage, page, mode, 0);
+ ret = migrate_page_move_mapping(mapping, newpage, page, 0);
if (ret != MIGRATEPAGE_SUCCESS)
return ret;
- if (PagePrivate(page)) {
- ClearPagePrivate(page);
- get_page(newpage);
- set_page_private(newpage, page_private(page));
- set_page_private(page, 0);
- put_page(page);
- SetPagePrivate(newpage);
- }
+ if (PagePrivate(page))
+ attach_page_private(newpage, detach_page_private(page));
if (mode != MIGRATE_SYNC_NO_COPY)
migrate_page_copy(newpage, page);
bio_put(bio);
}
-static inline void page_state_init_for_read(struct page *page)
-{
- SetPagePrivate(page);
- page->private = 0;
-}
-
struct readpages_iter {
struct address_space *mapping;
struct page **pages;
unsigned nr_pages;
- unsigned nr_added;
unsigned idx;
pgoff_t offset;
};
static int readpages_iter_init(struct readpages_iter *iter,
- struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+ struct readahead_control *ractl)
{
+ unsigned i, nr_pages = readahead_count(ractl);
+
memset(iter, 0, sizeof(*iter));
- iter->mapping = mapping;
- iter->offset = list_last_entry(pages, struct page, lru)->index;
+ iter->mapping = ractl->mapping;
+ iter->offset = readahead_index(ractl);
+ iter->nr_pages = nr_pages;
iter->pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS);
if (!iter->pages)
return -ENOMEM;
- while (!list_empty(pages)) {
- struct page *page = list_last_entry(pages, struct page, lru);
-
- __bch2_page_state_create(page, __GFP_NOFAIL);
-
- iter->pages[iter->nr_pages++] = page;
- list_del(&page->lru);
+ nr_pages = __readahead_batch(ractl, iter->pages, nr_pages);
+ for (i = 0; i < nr_pages; i++) {
+ __bch2_page_state_create(iter->pages[i], __GFP_NOFAIL);
+ put_page(iter->pages[i]);
}
return 0;
static inline struct page *readpage_iter_next(struct readpages_iter *iter)
{
- struct page *page;
- unsigned i;
- int ret;
-
- BUG_ON(iter->idx > iter->nr_added);
- BUG_ON(iter->nr_added > iter->nr_pages);
-
- if (iter->idx < iter->nr_added)
- goto out;
-
- while (1) {
- if (iter->idx == iter->nr_pages)
- return NULL;
-
- ret = add_to_page_cache_lru_vec(iter->mapping,
- iter->pages + iter->nr_added,
- iter->nr_pages - iter->nr_added,
- iter->offset + iter->nr_added,
- GFP_NOFS);
- if (ret > 0)
- break;
-
- page = iter->pages[iter->nr_added];
- iter->idx++;
- iter->nr_added++;
-
- __bch2_page_state_release(page);
- put_page(page);
- }
-
- iter->nr_added += ret;
+ if (iter->idx >= iter->nr_pages)
+ return NULL;
- for (i = iter->idx; i < iter->nr_added; i++)
- put_page(iter->pages[i]);
-out:
EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx);
return iter->pages[iter->idx];
struct bvec_iter iter;
struct bio_vec bv;
unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
- ? 0 : bch2_bkey_nr_ptrs_allocated(k);
+ ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
unsigned state = k.k->type == KEY_TYPE_reservation
? SECTOR_RESERVED
: SECTOR_ALLOCATED;
}
}
+static bool extent_partial_reads_expensive(struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *i;
+
+ bkey_for_each_crc(k.k, ptrs, crc, i)
+ if (crc.csum_type || crc.compression_type)
+ return true;
+ return false;
+}
+
static void readpage_bio_extend(struct readpages_iter *iter,
struct bio *bio,
unsigned sectors_this_extent,
struct readpages_iter *readpages_iter)
{
struct bch_fs *c = trans->c;
+ struct bkey_on_stack sk;
int flags = BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE;
int ret = 0;
rbio->c = c;
rbio->start_time = local_clock();
+
+ bkey_on_stack_init(&sk);
retry:
while (1) {
- BKEY_PADDED(k) tmp;
struct bkey_s_c k;
unsigned bytes, sectors, offset_into_extent;
if (ret)
break;
- bkey_reassemble(&tmp.k, k);
- k = bkey_i_to_s_c(&tmp.k);
-
offset_into_extent = iter->pos.offset -
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
+ bkey_on_stack_reassemble(&sk, c, k);
+
ret = bch2_read_indirect_extent(trans,
- &offset_into_extent, &tmp.k);
+ &offset_into_extent, &sk);
if (ret)
break;
+ k = bkey_i_to_s_c(sk.k);
+
sectors = min(sectors, k.k->size - offset_into_extent);
bch2_trans_unlock(trans);
- if (readpages_iter) {
- bool want_full_extent = false;
-
- if (bkey_extent_is_data(k.k)) {
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- const union bch_extent_entry *i;
- struct extent_ptr_decoded p;
-
- bkey_for_each_ptr_decode(k.k, ptrs, p, i)
- want_full_extent |= ((p.crc.csum_type != 0) |
- (p.crc.compression_type != 0));
- }
-
- readpage_bio_extend(readpages_iter, &rbio->bio,
- sectors, want_full_extent);
- }
+ if (readpages_iter)
+ readpage_bio_extend(readpages_iter, &rbio->bio, sectors,
+ extent_partial_reads_expensive(k));
bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
swap(rbio->bio.bi_iter.bi_size, bytes);
if (bkey_extent_is_allocation(k.k))
bch2_add_page_sectors(&rbio->bio, k);
- bch2_read_extent(c, rbio, k, offset_into_extent, flags);
+ bch2_read_extent(trans, rbio, k, offset_into_extent, flags);
if (flags & BCH_READ_LAST_FRAGMENT)
- return;
+ break;
swap(rbio->bio.bi_iter.bi_size, bytes);
bio_advance(&rbio->bio, bytes);
if (ret == -EINTR)
goto retry;
- bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
- bio_endio(&rbio->bio);
+ if (ret) {
+ bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
+ bio_endio(&rbio->bio);
+ }
+
+ bkey_on_stack_exit(&sk, c);
}
-int bch2_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+void bch2_readahead(struct readahead_control *ractl)
{
- struct bch_inode_info *inode = to_bch_ei(mapping->host);
+ struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
struct btree_trans trans;
struct readpages_iter readpages_iter;
int ret;
- ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages);
+ ret = readpages_iter_init(&readpages_iter, ractl);
BUG_ON(ret);
bch2_trans_init(&trans, c, 0, 0);
bch2_trans_exit(&trans);
kfree(readpages_iter.pages);
-
- return 0;
}
static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
}
}
+ if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) {
+ bio_for_each_segment_all(bvec, bio, iter) {
+ struct bch_page_state *s;
+
+ s = __bch2_page_state(bvec->bv_page);
+ spin_lock(&s->lock);
+ for (i = 0; i < PAGE_SECTORS; i++)
+ s->s[i].nr_replicas = 0;
+ spin_unlock(&s->lock);
+ }
+ }
+
/*
* racing with fallocate can cause us to add fewer sectors than
* expected - but we shouldn't add more sectors than expected:
* possible, else allocating a new one:
*/
static void bch2_writepage_io_alloc(struct bch_fs *c,
+ struct writeback_control *wbc,
struct bch_writepage_state *w,
struct bch_inode_info *inode,
u64 sector,
op->write_point = writepoint_hashed(inode->ei_last_dirtied);
op->pos = POS(inode->v.i_ino, sector);
op->wbio.bio.bi_iter.bi_sector = sector;
+ op->wbio.bio.bi_opf = wbc_to_write_flags(wbc);
}
static int __bch2_writepage(struct page *page,
if (w->io &&
(w->io->op.res.nr_replicas != nr_replicas_this_write ||
- bio_full(&w->io->op.wbio.bio) ||
- w->io->op.wbio.bio.bi_iter.bi_size >= (256U << 20) ||
+ bio_full(&w->io->op.wbio.bio, PAGE_SIZE) ||
+ w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >=
+ (BIO_MAX_PAGES * PAGE_SIZE) ||
bio_end_sector(&w->io->op.wbio.bio) != sector))
bch2_writepage_do_io(w);
if (!w->io)
- bch2_writepage_io_alloc(c, w, inode, sector,
+ bch2_writepage_io_alloc(c, wbc, w, inode, sector,
nr_replicas_this_write);
atomic_inc(&s->write_count);
w->io->op.i_sectors_delta -= dirty_sectors;
w->io->op.new_i_size = i_size;
- if (wbc->sync_mode == WB_SYNC_ALL)
- w->io->op.wbio.bio.bi_opf |= REQ_SYNC;
-
offset += sectors;
}
if (!pg_copied)
break;
+ if (!PageUptodate(page) &&
+ pg_copied != PAGE_SIZE &&
+ pos + copied + pg_copied < inode->v.i_size) {
+ zero_user(page, 0, PAGE_SIZE);
+ break;
+ }
+
flush_dcache_page(page);
iov_iter_advance(iter, pg_copied);
copied += pg_copied;
+
+ if (pg_copied != pg_len)
+ break;
}
if (!copied)
goto out;
- if (copied < len &&
- ((offset + copied) & (PAGE_SIZE - 1))) {
- struct page *page = pages[(offset + copied) >> PAGE_SHIFT];
-
- if (!PageUptodate(page)) {
- zero_user(page, 0, PAGE_SIZE);
- copied -= (offset + copied) & (PAGE_SIZE - 1);
- }
- }
-
spin_lock(&inode->v.i_lock);
if (pos + copied > inode->v.i_size)
i_size_write(&inode->v, pos + copied);
}
pos += ret;
written += ret;
+ ret = 0;
balance_dirty_pages_ratelimited(mapping);
} while (iov_iter_count(iter));
/* O_DIRECT writes */
+static void bch2_dio_write_loop_async(struct bch_write_op *);
+
static long bch2_dio_write_loop(struct dio_write *dio)
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
- struct bch_fs *c = dio->op.c;
struct kiocb *req = dio->req;
struct address_space *mapping = req->ki_filp->f_mapping;
struct bch_inode_info *inode = file_bch_inode(req->ki_filp);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bio *bio = &dio->op.wbio.bio;
struct bvec_iter_all iter;
struct bio_vec *bv;
- unsigned unaligned;
- u64 new_i_size;
- bool sync;
+ unsigned unaligned, iter_count;
+ bool sync = dio->sync, dropped_locks;
long ret;
if (dio->loop)
goto loop;
while (1) {
+ iter_count = dio->iter.count;
+
if (kthread)
- use_mm(dio->mm);
+ kthread_use_mm(dio->mm);
BUG_ON(current->faults_disabled_mapping);
current->faults_disabled_mapping = mapping;
ret = bio_iov_iter_get_pages(bio, &dio->iter);
+ dropped_locks = fdm_dropped_locks();
+
current->faults_disabled_mapping = NULL;
if (kthread)
- unuse_mm(dio->mm);
+ kthread_unuse_mm(dio->mm);
+
+ /*
+ * If the fault handler returned an error but also signalled
+ * that it dropped & retook ei_pagecache_lock, we just need to
+ * re-shoot down the page cache and retry:
+ */
+ if (dropped_locks && ret)
+ ret = 0;
if (unlikely(ret < 0))
goto err;
+ if (unlikely(dropped_locks)) {
+ ret = write_invalidate_inode_pages_range(mapping,
+ req->ki_pos,
+ req->ki_pos + iter_count - 1);
+ if (unlikely(ret))
+ goto err;
+
+ if (!bio->bi_iter.bi_size)
+ continue;
+ }
+
unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
bio->bi_iter.bi_size -= unaligned;
iov_iter_revert(&dio->iter, unaligned);
goto err;
}
- dio->op.pos = POS(inode->v.i_ino,
- (req->ki_pos >> 9) + dio->op.written);
+ bch2_write_op_init(&dio->op, c, io_opts(c, &inode->ei_inode));
+ dio->op.end_io = bch2_dio_write_loop_async;
+ dio->op.target = dio->op.opts.foreground_target;
+ op_journal_seq_set(&dio->op, &inode->ei_journal_seq);
+ dio->op.write_point = writepoint_hashed((unsigned long) current);
+ dio->op.nr_replicas = dio->op.opts.data_replicas;
+ dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
+
+ if ((req->ki_flags & IOCB_DSYNC) &&
+ !c->opts.journal_flush_disabled)
+ dio->op.flags |= BCH_WRITE_FLUSH;
+
+ ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio),
+ dio->op.opts.data_replicas, 0);
+ if (unlikely(ret) &&
+ !bch2_check_range_allocated(c, dio->op.pos,
+ bio_sectors(bio), dio->op.opts.data_replicas))
+ goto err;
task_io_account_write(bio->bi_iter.bi_size);
iov = kmalloc(dio->iter.nr_segs * sizeof(*iov),
GFP_KERNEL);
if (unlikely(!iov)) {
- dio->sync = true;
+ dio->sync = sync = true;
goto do_io;
}
dio->loop = true;
closure_call(&dio->op.cl, bch2_write, NULL, NULL);
- if (dio->sync)
+ if (sync)
wait_for_completion(&dio->done);
else
return -EIOCBQUEUED;
loop:
i_sectors_acct(c, inode, &dio->quota_res,
dio->op.i_sectors_delta);
- dio->op.i_sectors_delta = 0;
-
- new_i_size = req->ki_pos + ((u64) dio->op.written << 9);
+ req->ki_pos += (u64) dio->op.written << 9;
+ dio->written += dio->op.written;
spin_lock(&inode->v.i_lock);
- if (new_i_size > inode->v.i_size)
- i_size_write(&inode->v, new_i_size);
+ if (req->ki_pos > inode->v.i_size)
+ i_size_write(&inode->v, req->ki_pos);
spin_unlock(&inode->v.i_lock);
bio_for_each_segment_all(bv, bio, iter)
reinit_completion(&dio->done);
}
- ret = dio->op.error ?: ((long) dio->op.written << 9);
+ ret = dio->op.error ?: ((long) dio->written << 9);
err:
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
- bch2_disk_reservation_put(c, &dio->op.res);
bch2_quota_reservation_put(c, inode, &dio->quota_res);
if (dio->free_iov)
kfree(dio->iter.iov);
- sync = dio->sync;
bio_put(bio);
/* inode->i_dio_count is our ref on inode and thus bch_fs */
struct address_space *mapping = file->f_mapping;
struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
struct dio_write *dio;
struct bio *bio;
bool locked = true, extending;
dio->sync = is_sync_kiocb(req) || extending;
dio->free_iov = false;
dio->quota_res.sectors = 0;
+ dio->written = 0;
dio->iter = *iter;
- bch2_write_op_init(&dio->op, c, opts);
- dio->op.end_io = bch2_dio_write_loop_async;
- dio->op.target = opts.foreground_target;
- op_journal_seq_set(&dio->op, &inode->ei_journal_seq);
- dio->op.write_point = writepoint_hashed((unsigned long) current);
- dio->op.flags |= BCH_WRITE_NOPUT_RESERVATION;
-
- if ((req->ki_flags & IOCB_DSYNC) &&
- !c->opts.journal_flush_disabled)
- dio->op.flags |= BCH_WRITE_FLUSH;
-
ret = bch2_quota_reservation_add(c, inode, &dio->quota_res,
iter->count >> 9, true);
if (unlikely(ret))
goto err_put_bio;
- dio->op.nr_replicas = dio->op.opts.data_replicas;
-
- ret = bch2_disk_reservation_get(c, &dio->op.res, iter->count >> 9,
- dio->op.opts.data_replicas, 0);
- if (unlikely(ret) &&
- !bch2_check_range_allocated(c, POS(inode->v.i_ino,
- req->ki_pos >> 9),
- iter->count >> 9,
- dio->op.opts.data_replicas))
- goto err_put_bio;
-
ret = write_invalidate_inode_pages_range(mapping,
req->ki_pos,
req->ki_pos + iter->count - 1);
err:
if (locked)
inode_unlock(&inode->v);
- if (ret > 0)
- req->ki_pos += ret;
return ret;
err_put_bio:
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
- bch2_disk_reservation_put(c, &dio->op.res);
bch2_quota_reservation_put(c, inode, &dio->quota_res);
bio_put(bio);
inode_dio_end(&inode->v);
ret = bch2_get_page_disk_reservation(c, inode, page, false);
BUG_ON(ret);
+ /*
+ * This removes any writeable userspace mappings; we need to force
+ * .page_mkwrite to be called again before any mmapped writes, to
+ * redirty the full page:
+ */
+ page_mkclean(page);
__set_page_dirty_nobuffers(page);
unlock:
unlock_page(page);
ret = PTR_ERR_OR_ZERO(iter);
bch2_trans_exit(&trans);
+ if (ret)
+ goto err;
+
+ /*
+ * check this before next assertion; on filesystem error our normal
+ * invariants are a bit broken (truncate has to truncate the page cache
+ * before the inode).
+ */
+ ret = bch2_journal_error(&c->journal);
if (ret)
goto err;
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
+ struct bkey_on_stack copy;
struct btree_trans trans;
- struct btree_iter *src, *dst, *del = NULL;
+ struct btree_iter *src, *dst;
loff_t shift, new_size;
u64 src_start;
int ret;
if ((offset | len) & (block_bytes(c) - 1))
return -EINVAL;
+ bkey_on_stack_init(©);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
/*
while (1) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
- BKEY_PADDED(k) copy;
struct bkey_i delete;
struct bkey_s_c k;
struct bpos next_pos;
struct bpos move_pos = POS(inode->v.i_ino, offset >> 9);
struct bpos atomic_end;
- unsigned commit_flags = BTREE_INSERT_NOFAIL|
- BTREE_INSERT_ATOMIC|
- BTREE_INSERT_USE_RESERVE;
+ unsigned trigger_flags = 0;
k = insert
? bch2_btree_iter_peek_prev(src)
bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
break;
reassemble:
- bkey_reassemble(©.k, k);
+ bkey_on_stack_reassemble(©, c, k);
if (insert &&
- bkey_cmp(bkey_start_pos(k.k), move_pos) < 0) {
- bch2_cut_front(move_pos, ©.k);
- bch2_btree_iter_set_pos(src, bkey_start_pos(©.k.k));
- }
+ bkey_cmp(bkey_start_pos(k.k), move_pos) < 0)
+ bch2_cut_front(move_pos, copy.k);
- copy.k.k.p.offset += shift >> 9;
- bch2_btree_iter_set_pos(dst, bkey_start_pos(©.k.k));
+ copy.k->k.p.offset += shift >> 9;
+ bch2_btree_iter_set_pos(dst, bkey_start_pos(©.k->k));
- ret = bch2_extent_atomic_end(dst, ©.k, &atomic_end);
+ ret = bch2_extent_atomic_end(dst, copy.k, &atomic_end);
if (ret)
goto bkey_err;
- if (bkey_cmp(atomic_end, copy.k.k.p)) {
+ if (bkey_cmp(atomic_end, copy.k->k.p)) {
if (insert) {
move_pos = atomic_end;
move_pos.offset -= shift >> 9;
goto reassemble;
} else {
- bch2_cut_back(atomic_end, ©.k.k);
+ bch2_cut_back(atomic_end, copy.k);
}
}
bkey_init(&delete.k);
- delete.k.p = src->pos;
- bch2_key_resize(&delete.k, copy.k.k.size);
+ delete.k.p = copy.k->k.p;
+ delete.k.size = copy.k->k.size;
+ delete.k.p.offset -= shift >> 9;
next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
- /*
- * If the new and old keys overlap (because we're moving an
- * extent that's bigger than the amount we're collapsing by),
- * we need to trim the delete key here so they don't overlap
- * because overlaps on insertions aren't handled before
- * triggers are run, so the overwrite will get double counted
- * by the triggers machinery:
- */
- if (insert &&
- bkey_cmp(bkey_start_pos(©.k.k), delete.k.p) < 0) {
- bch2_cut_back(bkey_start_pos(©.k.k), &delete.k);
- } else if (!insert &&
- bkey_cmp(copy.k.k.p,
- bkey_start_pos(&delete.k)) > 0) {
- bch2_cut_front(copy.k.k.p, &delete);
-
- del = bch2_trans_copy_iter(&trans, src);
- BUG_ON(IS_ERR_OR_NULL(del));
-
- bch2_btree_iter_set_pos(del,
- bkey_start_pos(&delete.k));
- }
-
- bch2_trans_update(&trans, dst, ©.k);
- bch2_trans_update(&trans, del ?: src, &delete);
-
- if (copy.k.k.size == k.k->size) {
+ if (copy.k->k.size == k.k->size) {
/*
* If we're moving the entire extent, we can skip
* running triggers:
*/
- commit_flags |= BTREE_INSERT_NOMARK;
+ trigger_flags |= BTREE_TRIGGER_NORUN;
} else {
/* We might end up splitting compressed extents: */
unsigned nr_ptrs =
- bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(©.k));
+ bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k));
ret = bch2_disk_reservation_get(c, &disk_res,
- copy.k.k.size, nr_ptrs,
+ copy.k->k.size, nr_ptrs,
BCH_DISK_RESERVATION_NOFAIL);
BUG_ON(ret);
}
- ret = bch2_trans_commit(&trans, &disk_res,
- &inode->ei_journal_seq,
- commit_flags);
+ bch2_btree_iter_set_pos(src, bkey_start_pos(&delete.k));
+
+ ret = bch2_trans_update(&trans, src, &delete, trigger_flags) ?:
+ bch2_trans_update(&trans, dst, copy.k, trigger_flags) ?:
+ bch2_trans_commit(&trans, &disk_res,
+ &inode->ei_journal_seq,
+ BTREE_INSERT_NOFAIL);
bch2_disk_reservation_put(c, &disk_res);
bkey_err:
- if (del)
- bch2_trans_iter_put(&trans, del);
- del = NULL;
-
if (!ret)
bch2_btree_iter_set_pos(src, next_pos);
}
err:
bch2_trans_exit(&trans);
+ bkey_on_stack_exit(©, c);
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
return ret;
struct bkey_i_reservation reservation;
struct bkey_s_c k;
+ bch2_trans_begin(&trans);
+
k = bch2_btree_iter_peek_slot(iter);
if ((ret = bkey_err(k)))
goto bkey_err;
reservation.k.p = k.k->p;
reservation.k.size = k.k->size;
- bch2_cut_front(iter->pos, &reservation.k_i);
- bch2_cut_back(end_pos, &reservation.k);
+ bch2_cut_front(iter->pos, &reservation.k_i);
+ bch2_cut_back(end_pos, &reservation.k_i);
sectors = reservation.k.size;
- reservation.v.nr_replicas = bch2_bkey_nr_dirty_ptrs(k);
+ reservation.v.nr_replicas = bch2_bkey_nr_ptrs_allocated(k);
if (!bkey_extent_is_allocation(k.k)) {
ret = bch2_quota_reservation_add(c, inode,
}
if (reservation.v.nr_replicas < replicas ||
- bch2_extent_is_compressed(k)) {
+ bch2_bkey_sectors_compressed(k)) {
ret = bch2_disk_reservation_get(c, &disk_res, sectors,
replicas, 0);
if (unlikely(ret))
reservation.v.nr_replicas = disk_res.nr_replicas;
}
- bch2_trans_begin_updates(&trans);
-
ret = bch2_extent_update(&trans, iter, &reservation.k_i,
&disk_res, &inode->ei_journal_seq,
0, &i_sectors_delta);
u64 aligned_len;
loff_t ret = 0;
+ if (!c->opts.reflink)
+ return -EOPNOTSUPP;
+
if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY))
return -EINVAL;