git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/fs-io-pagecache.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 #ifndef NO_BCACHEFS_FS
   3
   4 #include "bcachefs.h"
   5 #include "btree_iter.h"
   6 #include "extents.h"
   7 #include "fs-io.h"
   8 #include "fs-io-pagecache.h"
   9 #include "subvolume.h"
  10
  11 #include <linux/pagevec.h>
  12 #include <linux/writeback.h>
  13
  14 int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
  15                                      loff_t start, u64 end,
  16                                      int fgp_flags, gfp_t gfp,
  17                                      folios *folios)
  18 {
  19         struct folio *f;
  20         u64 pos = start;
  21         int ret = 0;
  22
  23         while (pos < end) {
  24                 if ((u64) pos >= (u64) start + (1ULL << 20))
  25                         fgp_flags &= ~FGP_CREAT;
  26
  27                 ret = darray_make_room_gfp(folios, 1, gfp & GFP_KERNEL);
  28                 if (ret)
  29                         break;
  30
  31                 f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp);
  32                 if (IS_ERR_OR_NULL(f))
  33                         break;
  34
  35                 BUG_ON(folios->nr && folio_pos(f) != pos);
  36
  37                 pos = folio_end_pos(f);
  38                 darray_push(folios, f);
  39         }
  40
  41         if (!folios->nr && !ret && (fgp_flags & FGP_CREAT))
  42                 ret = -ENOMEM;
  43
  44         return folios->nr ? 0 : ret;
  45 }
  46
  47 /* pagecache_block must be held */
  48 int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
  49                                             loff_t start, loff_t end)
  50 {
  51         int ret;
  52
  53         /*
  54          * XXX: the way this is currently implemented, we can spin if a process
  55          * is continually redirtying a specific page
  56          */
  57         do {
  58                 if (!mapping->nrpages)
  59                         return 0;
  60
  61                 ret = filemap_write_and_wait_range(mapping, start, end);
  62                 if (ret)
  63                         break;
  64
  65                 if (!mapping->nrpages)
  66                         return 0;
  67
  68                 ret = invalidate_inode_pages2_range(mapping,
  69                                 start >> PAGE_SHIFT,
  70                                 end >> PAGE_SHIFT);
  71         } while (ret == -EBUSY);
  72
  73         return ret;
  74 }
  75
  76 static const char * const bch2_folio_sector_states[] = {
  77 #define x(n)    #n,
  78         BCH_FOLIO_SECTOR_STATE()
  79 #undef x
  80         NULL
  81 };
  82
  83 static inline enum bch_folio_sector_state
  84 folio_sector_dirty(enum bch_folio_sector_state state)
  85 {
  86         switch (state) {
  87         case SECTOR_unallocated:
  88                 return SECTOR_dirty;
  89         case SECTOR_reserved:
  90                 return SECTOR_dirty_reserved;
  91         default:
  92                 return state;
  93         }
  94 }
  95
  96 static inline enum bch_folio_sector_state
  97 folio_sector_undirty(enum bch_folio_sector_state state)
  98 {
  99         switch (state) {
 100         case SECTOR_dirty:
 101                 return SECTOR_unallocated;
 102         case SECTOR_dirty_reserved:
 103                 return SECTOR_reserved;
 104         default:
 105                 return state;
 106         }
 107 }
 108
 109 static inline enum bch_folio_sector_state
 110 folio_sector_reserve(enum bch_folio_sector_state state)
 111 {
 112         switch (state) {
 113         case SECTOR_unallocated:
 114                 return SECTOR_reserved;
 115         case SECTOR_dirty:
 116                 return SECTOR_dirty_reserved;
 117         default:
 118                 return state;
 119         }
 120 }
 121
 122 /* for newly allocated folios: */
 123 struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp)
 124 {
 125         struct bch_folio *s;
 126
 127         s = kzalloc(sizeof(*s) +
 128                     sizeof(struct bch_folio_sector) *
 129                     folio_sectors(folio), gfp);
 130         if (!s)
 131                 return NULL;
 132
 133         spin_lock_init(&s->lock);
 134         folio_attach_private(folio, s);
 135         return s;
 136 }
 137
 138 struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp)
 139 {
 140         return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
 141 }
 142
 143 static unsigned bkey_to_sector_state(struct bkey_s_c k)
 144 {
 145         if (bkey_extent_is_reservation(k))
 146                 return SECTOR_reserved;
 147         if (bkey_extent_is_allocation(k.k))
 148                 return SECTOR_allocated;
 149         return SECTOR_unallocated;
 150 }
 151
 152 static void __bch2_folio_set(struct folio *folio,
 153                              unsigned pg_offset, unsigned pg_len,
 154                              unsigned nr_ptrs, unsigned state)
 155 {
 156         struct bch_folio *s = bch2_folio(folio);
 157         unsigned i, sectors = folio_sectors(folio);
 158
 159         BUG_ON(pg_offset >= sectors);
 160         BUG_ON(pg_offset + pg_len > sectors);
 161
 162         spin_lock(&s->lock);
 163
 164         for (i = pg_offset; i < pg_offset + pg_len; i++) {
 165                 s->s[i].nr_replicas     = nr_ptrs;
 166                 bch2_folio_sector_set(folio, s, i, state);
 167         }
 168
 169         if (i == sectors)
 170                 s->uptodate = true;
 171
 172         spin_unlock(&s->lock);
 173 }
 174
 175 /*
 176  * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
 177  * extents btree:
 178  */
 179 int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
 180                    struct folio **folios, unsigned nr_folios)
 181 {
 182         struct btree_trans trans;
 183         struct btree_iter iter;
 184         struct bkey_s_c k;
 185         struct bch_folio *s;
 186         u64 offset = folio_sector(folios[0]);
 187         unsigned folio_idx;
 188         u32 snapshot;
 189         bool need_set = false;
 190         int ret;
 191
 192         for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
 193                 s = bch2_folio_create(folios[folio_idx], GFP_KERNEL);
 194                 if (!s)
 195                         return -ENOMEM;
 196
 197                 need_set |= !s->uptodate;
 198         }
 199
 200         if (!need_set)
 201                 return 0;
 202
 203         folio_idx = 0;
 204         bch2_trans_init(&trans, c, 0, 0);
 205 retry:
 206         bch2_trans_begin(&trans);
 207
 208         ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
 209         if (ret)
 210                 goto err;
 211
 212         for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
 213                            SPOS(inum.inum, offset, snapshot),
 214                            BTREE_ITER_SLOTS, k, ret) {
 215                 unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
 216                 unsigned state = bkey_to_sector_state(k);
 217
 218                 while (folio_idx < nr_folios) {
 219                         struct folio *folio = folios[folio_idx];
 220                         u64 folio_start = folio_sector(folio);
 221                         u64 folio_end   = folio_end_sector(folio);
 222                         unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
 223                                 folio_start;
 224                         unsigned folio_len = min(k.k->p.offset, folio_end) -
 225                                 folio_offset - folio_start;
 226
 227                         BUG_ON(k.k->p.offset < folio_start);
 228                         BUG_ON(bkey_start_offset(k.k) > folio_end);
 229
 230                         if (!bch2_folio(folio)->uptodate)
 231                                 __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state);
 232
 233                         if (k.k->p.offset < folio_end)
 234                                 break;
 235                         folio_idx++;
 236                 }
 237
 238                 if (folio_idx == nr_folios)
 239                         break;
 240         }
 241
 242         offset = iter.pos.offset;
 243         bch2_trans_iter_exit(&trans, &iter);
 244 err:
 245         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
 246                 goto retry;
 247         bch2_trans_exit(&trans);
 248
 249         return ret;
 250 }
 251
 252 void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
 253 {
 254         struct bvec_iter iter;
 255         struct folio_vec fv;
 256         unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
 257                 ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
 258         unsigned state = bkey_to_sector_state(k);
 259
 260         bio_for_each_folio(fv, bio, iter)
 261                 __bch2_folio_set(fv.fv_folio,
 262                                  fv.fv_offset >> 9,
 263                                  fv.fv_len >> 9,
 264                                  nr_ptrs, state);
 265 }
 266
 267 void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
 268                                      u64 start, u64 end)
 269 {
 270         pgoff_t index = start >> PAGE_SECTORS_SHIFT;
 271         pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
 272         struct folio_batch fbatch;
 273         unsigned i, j;
 274
 275         if (end <= start)
 276                 return;
 277
 278         folio_batch_init(&fbatch);
 279
 280         while (filemap_get_folios(inode->v.i_mapping,
 281                                   &index, end_index, &fbatch)) {
 282                 for (i = 0; i < folio_batch_count(&fbatch); i++) {
 283                         struct folio *folio = fbatch.folios[i];
 284                         u64 folio_start = folio_sector(folio);
 285                         u64 folio_end = folio_end_sector(folio);
 286                         unsigned folio_offset = max(start, folio_start) - folio_start;
 287                         unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
 288                         struct bch_folio *s;
 289
 290                         BUG_ON(end <= folio_start);
 291
 292                         folio_lock(folio);
 293                         s = bch2_folio(folio);
 294
 295                         if (s) {
 296                                 spin_lock(&s->lock);
 297                                 for (j = folio_offset; j < folio_offset + folio_len; j++)
 298                                         s->s[j].nr_replicas = 0;
 299                                 spin_unlock(&s->lock);
 300                         }
 301
 302                         folio_unlock(folio);
 303                 }
 304                 folio_batch_release(&fbatch);
 305                 cond_resched();
 306         }
 307 }
 308
 309 void bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
 310                                   u64 start, u64 end)
 311 {
 312         struct bch_fs *c = inode->v.i_sb->s_fs_info;
 313         pgoff_t index = start >> PAGE_SECTORS_SHIFT;
 314         pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
 315         struct folio_batch fbatch;
 316         s64 i_sectors_delta = 0;
 317         unsigned i, j;
 318
 319         if (end <= start)
 320                 return;
 321
 322         folio_batch_init(&fbatch);
 323
 324         while (filemap_get_folios(inode->v.i_mapping,
 325                                   &index, end_index, &fbatch)) {
 326                 for (i = 0; i < folio_batch_count(&fbatch); i++) {
 327                         struct folio *folio = fbatch.folios[i];
 328                         u64 folio_start = folio_sector(folio);
 329                         u64 folio_end = folio_end_sector(folio);
 330                         unsigned folio_offset = max(start, folio_start) - folio_start;
 331                         unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
 332                         struct bch_folio *s;
 333
 334                         BUG_ON(end <= folio_start);
 335
 336                         folio_lock(folio);
 337                         s = bch2_folio(folio);
 338
 339                         if (s) {
 340                                 spin_lock(&s->lock);
 341                                 for (j = folio_offset; j < folio_offset + folio_len; j++) {
 342                                         i_sectors_delta -= s->s[j].state == SECTOR_dirty;
 343                                         bch2_folio_sector_set(folio, s, j,
 344                                                 folio_sector_reserve(s->s[j].state));
 345                                 }
 346                                 spin_unlock(&s->lock);
 347                         }
 348
 349                         folio_unlock(folio);
 350                 }
 351                 folio_batch_release(&fbatch);
 352                 cond_resched();
 353         }
 354
 355         bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
 356 }
 357
 358 static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
 359                                           unsigned nr_replicas)
 360 {
 361         return max(0, (int) nr_replicas -
 362                    s->nr_replicas -
 363                    s->replicas_reserved);
 364 }
 365
 366 int bch2_get_folio_disk_reservation(struct bch_fs *c,
 367                                 struct bch_inode_info *inode,
 368                                 struct folio *folio, bool check_enospc)
 369 {
 370         struct bch_folio *s = bch2_folio_create(folio, 0);
 371         unsigned nr_replicas = inode_nr_replicas(c, inode);
 372         struct disk_reservation disk_res = { 0 };
 373         unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
 374         int ret;
 375
 376         if (!s)
 377                 return -ENOMEM;
 378
 379         for (i = 0; i < sectors; i++)
 380                 disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
 381
 382         if (!disk_res_sectors)
 383                 return 0;
 384
 385         ret = bch2_disk_reservation_get(c, &disk_res,
 386                                         disk_res_sectors, 1,
 387                                         !check_enospc
 388                                         ? BCH_DISK_RESERVATION_NOFAIL
 389                                         : 0);
 390         if (unlikely(ret))
 391                 return ret;
 392
 393         for (i = 0; i < sectors; i++)
 394                 s->s[i].replicas_reserved +=
 395                         sectors_to_reserve(&s->s[i], nr_replicas);
 396
 397         return 0;
 398 }
 399
 400 void bch2_folio_reservation_put(struct bch_fs *c,
 401                         struct bch_inode_info *inode,
 402                         struct bch2_folio_reservation *res)
 403 {
 404         bch2_disk_reservation_put(c, &res->disk);
 405         bch2_quota_reservation_put(c, inode, &res->quota);
 406 }
 407
 408 int bch2_folio_reservation_get(struct bch_fs *c,
 409                         struct bch_inode_info *inode,
 410                         struct folio *folio,
 411                         struct bch2_folio_reservation *res,
 412                         unsigned offset, unsigned len)
 413 {
 414         struct bch_folio *s = bch2_folio_create(folio, 0);
 415         unsigned i, disk_sectors = 0, quota_sectors = 0;
 416         int ret;
 417
 418         if (!s)
 419                 return -ENOMEM;
 420
 421         BUG_ON(!s->uptodate);
 422
 423         for (i = round_down(offset, block_bytes(c)) >> 9;
 424              i < round_up(offset + len, block_bytes(c)) >> 9;
 425              i++) {
 426                 disk_sectors += sectors_to_reserve(&s->s[i],
 427                                                 res->disk.nr_replicas);
 428                 quota_sectors += s->s[i].state == SECTOR_unallocated;
 429         }
 430
 431         if (disk_sectors) {
 432                 ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0);
 433                 if (unlikely(ret))
 434                         return ret;
 435         }
 436
 437         if (quota_sectors) {
 438                 ret = bch2_quota_reservation_add(c, inode, &res->quota,
 439                                                  quota_sectors, true);
 440                 if (unlikely(ret)) {
 441                         struct disk_reservation tmp = {
 442                                 .sectors = disk_sectors
 443                         };
 444
 445                         bch2_disk_reservation_put(c, &tmp);
 446                         res->disk.sectors -= disk_sectors;
 447                         return ret;
 448                 }
 449         }
 450
 451         return 0;
 452 }
 453
 454 static void bch2_clear_folio_bits(struct folio *folio)
 455 {
 456         struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
 457         struct bch_fs *c = inode->v.i_sb->s_fs_info;
 458         struct bch_folio *s = bch2_folio(folio);
 459         struct disk_reservation disk_res = { 0 };
 460         int i, sectors = folio_sectors(folio), dirty_sectors = 0;
 461
 462         if (!s)
 463                 return;
 464
 465         EBUG_ON(!folio_test_locked(folio));
 466         EBUG_ON(folio_test_writeback(folio));
 467
 468         for (i = 0; i < sectors; i++) {
 469                 disk_res.sectors += s->s[i].replicas_reserved;
 470                 s->s[i].replicas_reserved = 0;
 471
 472                 dirty_sectors -= s->s[i].state == SECTOR_dirty;
 473                 bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state));
 474         }
 475
 476         bch2_disk_reservation_put(c, &disk_res);
 477
 478         bch2_i_sectors_acct(c, inode, NULL, dirty_sectors);
 479
 480         bch2_folio_release(folio);
 481 }
 482
 483 void bch2_set_folio_dirty(struct bch_fs *c,
 484                           struct bch_inode_info *inode,
 485                           struct folio *folio,
 486                           struct bch2_folio_reservation *res,
 487                           unsigned offset, unsigned len)
 488 {
 489         struct bch_folio *s = bch2_folio(folio);
 490         unsigned i, dirty_sectors = 0;
 491
 492         WARN_ON((u64) folio_pos(folio) + offset + len >
 493                 round_up((u64) i_size_read(&inode->v), block_bytes(c)));
 494
 495         BUG_ON(!s->uptodate);
 496
 497         spin_lock(&s->lock);
 498
 499         for (i = round_down(offset, block_bytes(c)) >> 9;
 500              i < round_up(offset + len, block_bytes(c)) >> 9;
 501              i++) {
 502                 unsigned sectors = sectors_to_reserve(&s->s[i],
 503                                                 res->disk.nr_replicas);
 504
 505                 /*
 506                  * This can happen if we race with the error path in
 507                  * bch2_writepage_io_done():
 508                  */
 509                 sectors = min_t(unsigned, sectors, res->disk.sectors);
 510
 511                 s->s[i].replicas_reserved += sectors;
 512                 res->disk.sectors -= sectors;
 513
 514                 dirty_sectors += s->s[i].state == SECTOR_unallocated;
 515
 516                 bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
 517         }
 518
 519         spin_unlock(&s->lock);
 520
 521         bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
 522
 523         if (!folio_test_dirty(folio))
 524                 filemap_dirty_folio(inode->v.i_mapping, folio);
 525 }
 526
 527 vm_fault_t bch2_page_fault(struct vm_fault *vmf)
 528 {
 529         struct file *file = vmf->vma->vm_file;
 530         struct address_space *mapping = file->f_mapping;
 531         struct address_space *fdm = faults_disabled_mapping();
 532         struct bch_inode_info *inode = file_bch_inode(file);
 533         vm_fault_t ret;
 534
 535         if (fdm == mapping)
 536                 return VM_FAULT_SIGBUS;
 537
 538         /* Lock ordering: */
 539         if (fdm > mapping) {
 540                 struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
 541
 542                 if (bch2_pagecache_add_tryget(inode))
 543                         goto got_lock;
 544
 545                 bch2_pagecache_block_put(fdm_host);
 546
 547                 bch2_pagecache_add_get(inode);
 548                 bch2_pagecache_add_put(inode);
 549
 550                 bch2_pagecache_block_get(fdm_host);
 551
 552                 /* Signal that lock has been dropped: */
 553                 set_fdm_dropped_locks();
 554                 return VM_FAULT_SIGBUS;
 555         }
 556
 557         bch2_pagecache_add_get(inode);
 558 got_lock:
 559         ret = filemap_fault(vmf);
 560         bch2_pagecache_add_put(inode);
 561
 562         return ret;
 563 }
 564
 565 vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
 566 {
 567         struct folio *folio = page_folio(vmf->page);
 568         struct file *file = vmf->vma->vm_file;
 569         struct bch_inode_info *inode = file_bch_inode(file);
 570         struct address_space *mapping = file->f_mapping;
 571         struct bch_fs *c = inode->v.i_sb->s_fs_info;
 572         struct bch2_folio_reservation res;
 573         unsigned len;
 574         loff_t isize;
 575         vm_fault_t ret;
 576
 577         bch2_folio_reservation_init(c, inode, &res);
 578
 579         sb_start_pagefault(inode->v.i_sb);
 580         file_update_time(file);
 581
 582         /*
 583          * Not strictly necessary, but helps avoid dio writes livelocking in
 584          * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
 585          * a bch2_write_invalidate_inode_pages_range() that works without dropping
 586          * page lock before invalidating page
 587          */
 588         bch2_pagecache_add_get(inode);
 589
 590         folio_lock(folio);
 591         isize = i_size_read(&inode->v);
 592
 593         if (folio->mapping != mapping || folio_pos(folio) >= isize) {
 594                 folio_unlock(folio);
 595                 ret = VM_FAULT_NOPAGE;
 596                 goto out;
 597         }
 598
 599         len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
 600
 601         if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
 602             bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
 603                 folio_unlock(folio);
 604                 ret = VM_FAULT_SIGBUS;
 605                 goto out;
 606         }
 607
 608         bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
 609         bch2_folio_reservation_put(c, inode, &res);
 610
 611         folio_wait_stable(folio);
 612         ret = VM_FAULT_LOCKED;
 613 out:
 614         bch2_pagecache_add_put(inode);
 615         sb_end_pagefault(inode->v.i_sb);
 616
 617         return ret;
 618 }
 619
 620 void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
 621 {
 622         if (offset || length < folio_size(folio))
 623                 return;
 624
 625         bch2_clear_folio_bits(folio);
 626 }
 627
 628 bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
 629 {
 630         if (folio_test_dirty(folio) || folio_test_writeback(folio))
 631                 return false;
 632
 633         bch2_clear_folio_bits(folio);
 634         return true;
 635 }
 636
 637 /* fseek: */
 638
 639 static int folio_data_offset(struct folio *folio, loff_t pos,
 640                              unsigned min_replicas)
 641 {
 642         struct bch_folio *s = bch2_folio(folio);
 643         unsigned i, sectors = folio_sectors(folio);
 644
 645         if (s)
 646                 for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
 647                         if (s->s[i].state >= SECTOR_dirty &&
 648                             s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
 649                                 return i << SECTOR_SHIFT;
 650
 651         return -1;
 652 }
 653
 654 loff_t bch2_seek_pagecache_data(struct inode *vinode,
 655                                 loff_t start_offset,
 656                                 loff_t end_offset,
 657                                 unsigned min_replicas,
 658                                 bool nonblock)
 659 {
 660         struct folio_batch fbatch;
 661         pgoff_t start_index     = start_offset >> PAGE_SHIFT;
 662         pgoff_t end_index       = end_offset >> PAGE_SHIFT;
 663         pgoff_t index           = start_index;
 664         unsigned i;
 665         loff_t ret;
 666         int offset;
 667
 668         folio_batch_init(&fbatch);
 669
 670         while (filemap_get_folios(vinode->i_mapping,
 671                                   &index, end_index, &fbatch)) {
 672                 for (i = 0; i < folio_batch_count(&fbatch); i++) {
 673                         struct folio *folio = fbatch.folios[i];
 674
 675                         if (!nonblock) {
 676                                 folio_lock(folio);
 677                         } else if (!folio_trylock(folio)) {
 678                                 folio_batch_release(&fbatch);
 679                                 return -EAGAIN;
 680                         }
 681
 682                         offset = folio_data_offset(folio,
 683                                         max(folio_pos(folio), start_offset),
 684                                         min_replicas);
 685                         if (offset >= 0) {
 686                                 ret = clamp(folio_pos(folio) + offset,
 687                                             start_offset, end_offset);
 688                                 folio_unlock(folio);
 689                                 folio_batch_release(&fbatch);
 690                                 return ret;
 691                         }
 692                         folio_unlock(folio);
 693                 }
 694                 folio_batch_release(&fbatch);
 695                 cond_resched();
 696         }
 697
 698         return end_offset;
 699 }
 700
 701 /*
 702  * Search for a hole in a folio.
 703  *
 704  * The filemap layer returns -ENOENT if no folio exists, so reuse the same error
 705  * code to indicate a pagecache hole exists at the returned offset. Otherwise
 706  * return 0 if the folio is filled with data, or an error code. This function
 707  * can return -EAGAIN if nonblock is specified.
 708  */
 709 static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
 710                               unsigned min_replicas, bool nonblock)
 711 {
 712         struct folio *folio;
 713         struct bch_folio *s;
 714         unsigned i, sectors;
 715         int ret = -ENOENT;
 716
 717         folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
 718                                     FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0);
 719         if (IS_ERR(folio))
 720                 return PTR_ERR(folio);
 721
 722         s = bch2_folio(folio);
 723         if (!s)
 724                 goto unlock;
 725
 726         sectors = folio_sectors(folio);
 727         for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
 728                 if (s->s[i].state < SECTOR_dirty ||
 729                     s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
 730                         *offset = max(*offset,
 731                                       folio_pos(folio) + (i << SECTOR_SHIFT));
 732                         goto unlock;
 733                 }
 734
 735         *offset = folio_end_pos(folio);
 736         ret = 0;
 737 unlock:
 738         folio_unlock(folio);
 739         folio_put(folio);
 740         return ret;
 741 }
 742
 743 loff_t bch2_seek_pagecache_hole(struct inode *vinode,
 744                                 loff_t start_offset,
 745                                 loff_t end_offset,
 746                                 unsigned min_replicas,
 747                                 bool nonblock)
 748 {
 749         struct address_space *mapping = vinode->i_mapping;
 750         loff_t offset = start_offset;
 751         loff_t ret = 0;
 752
 753         while (!ret && offset < end_offset)
 754                 ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock);
 755
 756         if (ret && ret != -ENOENT)
 757                 return ret;
 758         return min(offset, end_offset);
 759 }
 760
 761 int bch2_clamp_data_hole(struct inode *inode,
 762                          u64 *hole_start,
 763                          u64 *hole_end,
 764                          unsigned min_replicas,
 765                          bool nonblock)
 766 {
 767         loff_t ret;
 768
 769         ret = bch2_seek_pagecache_hole(inode,
 770                 *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
 771         if (ret < 0)
 772                 return ret;
 773
 774         *hole_start = ret;
 775
 776         if (*hole_start == *hole_end)
 777                 return 0;
 778
 779         ret = bch2_seek_pagecache_data(inode,
 780                 *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
 781         if (ret < 0)
 782                 return ret;
 783
 784         *hole_end = ret;
 785         return 0;
 786 }
 787
 788 #endif /* NO_BCACHEFS_FS */