]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/fs-io-pagecache.c
Update bcachefs sources to 481b5f343248 bcachefs: Better error messages for missing...
[bcachefs-tools-debian] / libbcachefs / fs-io-pagecache.c
1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef NO_BCACHEFS_FS
3
4 #include "bcachefs.h"
5 #include "btree_iter.h"
6 #include "extents.h"
7 #include "fs-io.h"
8 #include "fs-io-pagecache.h"
9 #include "subvolume.h"
10
11 #include <linux/pagevec.h>
12 #include <linux/writeback.h>
13
14 int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
15                                      loff_t start, u64 end,
16                                      fgf_t fgp_flags, gfp_t gfp,
17                                      folios *fs)
18 {
19         struct folio *f;
20         u64 pos = start;
21         int ret = 0;
22
23         while (pos < end) {
24                 if ((u64) pos >= (u64) start + (1ULL << 20))
25                         fgp_flags &= ~FGP_CREAT;
26
27                 ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
28                 if (ret)
29                         break;
30
31                 f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp);
32                 if (IS_ERR_OR_NULL(f))
33                         break;
34
35                 BUG_ON(fs->nr && folio_pos(f) != pos);
36
37                 pos = folio_end_pos(f);
38                 darray_push(fs, f);
39         }
40
41         if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
42                 ret = -ENOMEM;
43
44         return fs->nr ? 0 : ret;
45 }
46
47 /* pagecache_block must be held */
48 int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
49                                             loff_t start, loff_t end)
50 {
51         int ret;
52
53         /*
54          * XXX: the way this is currently implemented, we can spin if a process
55          * is continually redirtying a specific page
56          */
57         do {
58                 if (!mapping->nrpages)
59                         return 0;
60
61                 ret = filemap_write_and_wait_range(mapping, start, end);
62                 if (ret)
63                         break;
64
65                 if (!mapping->nrpages)
66                         return 0;
67
68                 ret = invalidate_inode_pages2_range(mapping,
69                                 start >> PAGE_SHIFT,
70                                 end >> PAGE_SHIFT);
71         } while (ret == -EBUSY);
72
73         return ret;
74 }
75
76 #if 0
77 /* Useful for debug tracing: */
78 static const char * const bch2_folio_sector_states[] = {
79 #define x(n)    #n,
80         BCH_FOLIO_SECTOR_STATE()
81 #undef x
82         NULL
83 };
84 #endif
85
86 static inline enum bch_folio_sector_state
87 folio_sector_dirty(enum bch_folio_sector_state state)
88 {
89         switch (state) {
90         case SECTOR_unallocated:
91                 return SECTOR_dirty;
92         case SECTOR_reserved:
93                 return SECTOR_dirty_reserved;
94         default:
95                 return state;
96         }
97 }
98
99 static inline enum bch_folio_sector_state
100 folio_sector_undirty(enum bch_folio_sector_state state)
101 {
102         switch (state) {
103         case SECTOR_dirty:
104                 return SECTOR_unallocated;
105         case SECTOR_dirty_reserved:
106                 return SECTOR_reserved;
107         default:
108                 return state;
109         }
110 }
111
112 static inline enum bch_folio_sector_state
113 folio_sector_reserve(enum bch_folio_sector_state state)
114 {
115         switch (state) {
116         case SECTOR_unallocated:
117                 return SECTOR_reserved;
118         case SECTOR_dirty:
119                 return SECTOR_dirty_reserved;
120         default:
121                 return state;
122         }
123 }
124
125 /* for newly allocated folios: */
126 struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp)
127 {
128         struct bch_folio *s;
129
130         s = kzalloc(sizeof(*s) +
131                     sizeof(struct bch_folio_sector) *
132                     folio_sectors(folio), gfp);
133         if (!s)
134                 return NULL;
135
136         spin_lock_init(&s->lock);
137         folio_attach_private(folio, s);
138         return s;
139 }
140
141 struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp)
142 {
143         return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
144 }
145
146 static unsigned bkey_to_sector_state(struct bkey_s_c k)
147 {
148         if (bkey_extent_is_reservation(k))
149                 return SECTOR_reserved;
150         if (bkey_extent_is_allocation(k.k))
151                 return SECTOR_allocated;
152         return SECTOR_unallocated;
153 }
154
155 static void __bch2_folio_set(struct folio *folio,
156                              unsigned pg_offset, unsigned pg_len,
157                              unsigned nr_ptrs, unsigned state)
158 {
159         struct bch_folio *s = bch2_folio(folio);
160         unsigned i, sectors = folio_sectors(folio);
161
162         BUG_ON(pg_offset >= sectors);
163         BUG_ON(pg_offset + pg_len > sectors);
164
165         spin_lock(&s->lock);
166
167         for (i = pg_offset; i < pg_offset + pg_len; i++) {
168                 s->s[i].nr_replicas     = nr_ptrs;
169                 bch2_folio_sector_set(folio, s, i, state);
170         }
171
172         if (i == sectors)
173                 s->uptodate = true;
174
175         spin_unlock(&s->lock);
176 }
177
178 /*
179  * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
180  * extents btree:
181  */
182 int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
183                    struct folio **fs, unsigned nr_folios)
184 {
185         struct btree_trans *trans;
186         struct btree_iter iter;
187         struct bkey_s_c k;
188         struct bch_folio *s;
189         u64 offset = folio_sector(fs[0]);
190         unsigned folio_idx;
191         u32 snapshot;
192         bool need_set = false;
193         int ret;
194
195         for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
196                 s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
197                 if (!s)
198                         return -ENOMEM;
199
200                 need_set |= !s->uptodate;
201         }
202
203         if (!need_set)
204                 return 0;
205
206         folio_idx = 0;
207         trans = bch2_trans_get(c);
208 retry:
209         bch2_trans_begin(trans);
210
211         ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
212         if (ret)
213                 goto err;
214
215         for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
216                            SPOS(inum.inum, offset, snapshot),
217                            BTREE_ITER_SLOTS, k, ret) {
218                 unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
219                 unsigned state = bkey_to_sector_state(k);
220
221                 while (folio_idx < nr_folios) {
222                         struct folio *folio = fs[folio_idx];
223                         u64 folio_start = folio_sector(folio);
224                         u64 folio_end   = folio_end_sector(folio);
225                         unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
226                                 folio_start;
227                         unsigned folio_len = min(k.k->p.offset, folio_end) -
228                                 folio_offset - folio_start;
229
230                         BUG_ON(k.k->p.offset < folio_start);
231                         BUG_ON(bkey_start_offset(k.k) > folio_end);
232
233                         if (!bch2_folio(folio)->uptodate)
234                                 __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state);
235
236                         if (k.k->p.offset < folio_end)
237                                 break;
238                         folio_idx++;
239                 }
240
241                 if (folio_idx == nr_folios)
242                         break;
243         }
244
245         offset = iter.pos.offset;
246         bch2_trans_iter_exit(trans, &iter);
247 err:
248         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
249                 goto retry;
250         bch2_trans_put(trans);
251
252         return ret;
253 }
254
255 void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
256 {
257         struct bvec_iter iter;
258         struct folio_vec fv;
259         unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
260                 ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
261         unsigned state = bkey_to_sector_state(k);
262
263         bio_for_each_folio(fv, bio, iter)
264                 __bch2_folio_set(fv.fv_folio,
265                                  fv.fv_offset >> 9,
266                                  fv.fv_len >> 9,
267                                  nr_ptrs, state);
268 }
269
270 void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
271                                      u64 start, u64 end)
272 {
273         pgoff_t index = start >> PAGE_SECTORS_SHIFT;
274         pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
275         struct folio_batch fbatch;
276         unsigned i, j;
277
278         if (end <= start)
279                 return;
280
281         folio_batch_init(&fbatch);
282
283         while (filemap_get_folios(inode->v.i_mapping,
284                                   &index, end_index, &fbatch)) {
285                 for (i = 0; i < folio_batch_count(&fbatch); i++) {
286                         struct folio *folio = fbatch.folios[i];
287                         u64 folio_start = folio_sector(folio);
288                         u64 folio_end = folio_end_sector(folio);
289                         unsigned folio_offset = max(start, folio_start) - folio_start;
290                         unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
291                         struct bch_folio *s;
292
293                         BUG_ON(end <= folio_start);
294
295                         folio_lock(folio);
296                         s = bch2_folio(folio);
297
298                         if (s) {
299                                 spin_lock(&s->lock);
300                                 for (j = folio_offset; j < folio_offset + folio_len; j++)
301                                         s->s[j].nr_replicas = 0;
302                                 spin_unlock(&s->lock);
303                         }
304
305                         folio_unlock(folio);
306                 }
307                 folio_batch_release(&fbatch);
308                 cond_resched();
309         }
310 }
311
312 int bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
313                                  u64 *start, u64 end,
314                                  bool nonblocking)
315 {
316         struct bch_fs *c = inode->v.i_sb->s_fs_info;
317         pgoff_t index = *start >> PAGE_SECTORS_SHIFT;
318         pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
319         struct folio_batch fbatch;
320         s64 i_sectors_delta = 0;
321         int ret = 0;
322
323         if (end <= *start)
324                 return 0;
325
326         folio_batch_init(&fbatch);
327
328         while (filemap_get_folios(inode->v.i_mapping,
329                                   &index, end_index, &fbatch)) {
330                 for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) {
331                         struct folio *folio = fbatch.folios[i];
332
333                         if (!nonblocking)
334                                 folio_lock(folio);
335                         else if (!folio_trylock(folio)) {
336                                 folio_batch_release(&fbatch);
337                                 ret = -EAGAIN;
338                                 break;
339                         }
340
341                         u64 folio_start = folio_sector(folio);
342                         u64 folio_end = folio_end_sector(folio);
343
344                         BUG_ON(end <= folio_start);
345
346                         *start = min(end, folio_end);
347
348                         struct bch_folio *s = bch2_folio(folio);
349                         if (s) {
350                                 unsigned folio_offset = max(*start, folio_start) - folio_start;
351                                 unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
352
353                                 spin_lock(&s->lock);
354                                 for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) {
355                                         i_sectors_delta -= s->s[j].state == SECTOR_dirty;
356                                         bch2_folio_sector_set(folio, s, j,
357                                                 folio_sector_reserve(s->s[j].state));
358                                 }
359                                 spin_unlock(&s->lock);
360                         }
361
362                         folio_unlock(folio);
363                 }
364                 folio_batch_release(&fbatch);
365                 cond_resched();
366         }
367
368         bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
369         return ret;
370 }
371
372 static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
373                                           unsigned nr_replicas)
374 {
375         return max(0, (int) nr_replicas -
376                    s->nr_replicas -
377                    s->replicas_reserved);
378 }
379
380 int bch2_get_folio_disk_reservation(struct bch_fs *c,
381                                 struct bch_inode_info *inode,
382                                 struct folio *folio, bool check_enospc)
383 {
384         struct bch_folio *s = bch2_folio_create(folio, 0);
385         unsigned nr_replicas = inode_nr_replicas(c, inode);
386         struct disk_reservation disk_res = { 0 };
387         unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
388         int ret;
389
390         if (!s)
391                 return -ENOMEM;
392
393         for (i = 0; i < sectors; i++)
394                 disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
395
396         if (!disk_res_sectors)
397                 return 0;
398
399         ret = bch2_disk_reservation_get(c, &disk_res,
400                                         disk_res_sectors, 1,
401                                         !check_enospc
402                                         ? BCH_DISK_RESERVATION_NOFAIL
403                                         : 0);
404         if (unlikely(ret))
405                 return ret;
406
407         for (i = 0; i < sectors; i++)
408                 s->s[i].replicas_reserved +=
409                         sectors_to_reserve(&s->s[i], nr_replicas);
410
411         return 0;
412 }
413
414 void bch2_folio_reservation_put(struct bch_fs *c,
415                         struct bch_inode_info *inode,
416                         struct bch2_folio_reservation *res)
417 {
418         bch2_disk_reservation_put(c, &res->disk);
419         bch2_quota_reservation_put(c, inode, &res->quota);
420 }
421
422 int bch2_folio_reservation_get(struct bch_fs *c,
423                         struct bch_inode_info *inode,
424                         struct folio *folio,
425                         struct bch2_folio_reservation *res,
426                         unsigned offset, unsigned len)
427 {
428         struct bch_folio *s = bch2_folio_create(folio, 0);
429         unsigned i, disk_sectors = 0, quota_sectors = 0;
430         int ret;
431
432         if (!s)
433                 return -ENOMEM;
434
435         BUG_ON(!s->uptodate);
436
437         for (i = round_down(offset, block_bytes(c)) >> 9;
438              i < round_up(offset + len, block_bytes(c)) >> 9;
439              i++) {
440                 disk_sectors += sectors_to_reserve(&s->s[i],
441                                                 res->disk.nr_replicas);
442                 quota_sectors += s->s[i].state == SECTOR_unallocated;
443         }
444
445         if (disk_sectors) {
446                 ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0);
447                 if (unlikely(ret))
448                         return ret;
449         }
450
451         if (quota_sectors) {
452                 ret = bch2_quota_reservation_add(c, inode, &res->quota,
453                                                  quota_sectors, true);
454                 if (unlikely(ret)) {
455                         struct disk_reservation tmp = {
456                                 .sectors = disk_sectors
457                         };
458
459                         bch2_disk_reservation_put(c, &tmp);
460                         res->disk.sectors -= disk_sectors;
461                         return ret;
462                 }
463         }
464
465         return 0;
466 }
467
468 static void bch2_clear_folio_bits(struct folio *folio)
469 {
470         struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
471         struct bch_fs *c = inode->v.i_sb->s_fs_info;
472         struct bch_folio *s = bch2_folio(folio);
473         struct disk_reservation disk_res = { 0 };
474         int i, sectors = folio_sectors(folio), dirty_sectors = 0;
475
476         if (!s)
477                 return;
478
479         EBUG_ON(!folio_test_locked(folio));
480         EBUG_ON(folio_test_writeback(folio));
481
482         for (i = 0; i < sectors; i++) {
483                 disk_res.sectors += s->s[i].replicas_reserved;
484                 s->s[i].replicas_reserved = 0;
485
486                 dirty_sectors -= s->s[i].state == SECTOR_dirty;
487                 bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state));
488         }
489
490         bch2_disk_reservation_put(c, &disk_res);
491
492         bch2_i_sectors_acct(c, inode, NULL, dirty_sectors);
493
494         bch2_folio_release(folio);
495 }
496
497 void bch2_set_folio_dirty(struct bch_fs *c,
498                           struct bch_inode_info *inode,
499                           struct folio *folio,
500                           struct bch2_folio_reservation *res,
501                           unsigned offset, unsigned len)
502 {
503         struct bch_folio *s = bch2_folio(folio);
504         unsigned i, dirty_sectors = 0;
505
506         WARN_ON((u64) folio_pos(folio) + offset + len >
507                 round_up((u64) i_size_read(&inode->v), block_bytes(c)));
508
509         BUG_ON(!s->uptodate);
510
511         spin_lock(&s->lock);
512
513         for (i = round_down(offset, block_bytes(c)) >> 9;
514              i < round_up(offset + len, block_bytes(c)) >> 9;
515              i++) {
516                 unsigned sectors = sectors_to_reserve(&s->s[i],
517                                                 res->disk.nr_replicas);
518
519                 /*
520                  * This can happen if we race with the error path in
521                  * bch2_writepage_io_done():
522                  */
523                 sectors = min_t(unsigned, sectors, res->disk.sectors);
524
525                 s->s[i].replicas_reserved += sectors;
526                 res->disk.sectors -= sectors;
527
528                 dirty_sectors += s->s[i].state == SECTOR_unallocated;
529
530                 bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
531         }
532
533         spin_unlock(&s->lock);
534
535         bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
536
537         if (!folio_test_dirty(folio))
538                 filemap_dirty_folio(inode->v.i_mapping, folio);
539 }
540
541 vm_fault_t bch2_page_fault(struct vm_fault *vmf)
542 {
543         struct file *file = vmf->vma->vm_file;
544         struct address_space *mapping = file->f_mapping;
545         struct address_space *fdm = faults_disabled_mapping();
546         struct bch_inode_info *inode = file_bch_inode(file);
547         vm_fault_t ret;
548
549         if (fdm == mapping)
550                 return VM_FAULT_SIGBUS;
551
552         /* Lock ordering: */
553         if (fdm > mapping) {
554                 struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
555
556                 if (bch2_pagecache_add_tryget(inode))
557                         goto got_lock;
558
559                 bch2_pagecache_block_put(fdm_host);
560
561                 bch2_pagecache_add_get(inode);
562                 bch2_pagecache_add_put(inode);
563
564                 bch2_pagecache_block_get(fdm_host);
565
566                 /* Signal that lock has been dropped: */
567                 set_fdm_dropped_locks();
568                 return VM_FAULT_SIGBUS;
569         }
570
571         bch2_pagecache_add_get(inode);
572 got_lock:
573         ret = filemap_fault(vmf);
574         bch2_pagecache_add_put(inode);
575
576         return ret;
577 }
578
579 vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
580 {
581         struct folio *folio = page_folio(vmf->page);
582         struct file *file = vmf->vma->vm_file;
583         struct bch_inode_info *inode = file_bch_inode(file);
584         struct address_space *mapping = file->f_mapping;
585         struct bch_fs *c = inode->v.i_sb->s_fs_info;
586         struct bch2_folio_reservation res;
587         unsigned len;
588         loff_t isize;
589         vm_fault_t ret;
590
591         bch2_folio_reservation_init(c, inode, &res);
592
593         sb_start_pagefault(inode->v.i_sb);
594         file_update_time(file);
595
596         /*
597          * Not strictly necessary, but helps avoid dio writes livelocking in
598          * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
599          * a bch2_write_invalidate_inode_pages_range() that works without dropping
600          * page lock before invalidating page
601          */
602         bch2_pagecache_add_get(inode);
603
604         folio_lock(folio);
605         isize = i_size_read(&inode->v);
606
607         if (folio->mapping != mapping || folio_pos(folio) >= isize) {
608                 folio_unlock(folio);
609                 ret = VM_FAULT_NOPAGE;
610                 goto out;
611         }
612
613         len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
614
615         if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
616             bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
617                 folio_unlock(folio);
618                 ret = VM_FAULT_SIGBUS;
619                 goto out;
620         }
621
622         bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
623         bch2_folio_reservation_put(c, inode, &res);
624
625         folio_wait_stable(folio);
626         ret = VM_FAULT_LOCKED;
627 out:
628         bch2_pagecache_add_put(inode);
629         sb_end_pagefault(inode->v.i_sb);
630
631         return ret;
632 }
633
634 void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
635 {
636         if (offset || length < folio_size(folio))
637                 return;
638
639         bch2_clear_folio_bits(folio);
640 }
641
642 bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
643 {
644         if (folio_test_dirty(folio) || folio_test_writeback(folio))
645                 return false;
646
647         bch2_clear_folio_bits(folio);
648         return true;
649 }
650
651 /* fseek: */
652
653 static int folio_data_offset(struct folio *folio, loff_t pos,
654                              unsigned min_replicas)
655 {
656         struct bch_folio *s = bch2_folio(folio);
657         unsigned i, sectors = folio_sectors(folio);
658
659         if (s)
660                 for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
661                         if (s->s[i].state >= SECTOR_dirty &&
662                             s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
663                                 return i << SECTOR_SHIFT;
664
665         return -1;
666 }
667
668 loff_t bch2_seek_pagecache_data(struct inode *vinode,
669                                 loff_t start_offset,
670                                 loff_t end_offset,
671                                 unsigned min_replicas,
672                                 bool nonblock)
673 {
674         struct folio_batch fbatch;
675         pgoff_t start_index     = start_offset >> PAGE_SHIFT;
676         pgoff_t end_index       = end_offset >> PAGE_SHIFT;
677         pgoff_t index           = start_index;
678         unsigned i;
679         loff_t ret;
680         int offset;
681
682         folio_batch_init(&fbatch);
683
684         while (filemap_get_folios(vinode->i_mapping,
685                                   &index, end_index, &fbatch)) {
686                 for (i = 0; i < folio_batch_count(&fbatch); i++) {
687                         struct folio *folio = fbatch.folios[i];
688
689                         if (!nonblock) {
690                                 folio_lock(folio);
691                         } else if (!folio_trylock(folio)) {
692                                 folio_batch_release(&fbatch);
693                                 return -EAGAIN;
694                         }
695
696                         offset = folio_data_offset(folio,
697                                         max(folio_pos(folio), start_offset),
698                                         min_replicas);
699                         if (offset >= 0) {
700                                 ret = clamp(folio_pos(folio) + offset,
701                                             start_offset, end_offset);
702                                 folio_unlock(folio);
703                                 folio_batch_release(&fbatch);
704                                 return ret;
705                         }
706                         folio_unlock(folio);
707                 }
708                 folio_batch_release(&fbatch);
709                 cond_resched();
710         }
711
712         return end_offset;
713 }
714
715 /*
716  * Search for a hole in a folio.
717  *
718  * The filemap layer returns -ENOENT if no folio exists, so reuse the same error
719  * code to indicate a pagecache hole exists at the returned offset. Otherwise
720  * return 0 if the folio is filled with data, or an error code. This function
721  * can return -EAGAIN if nonblock is specified.
722  */
723 static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
724                               unsigned min_replicas, bool nonblock)
725 {
726         struct folio *folio;
727         struct bch_folio *s;
728         unsigned i, sectors;
729         int ret = -ENOENT;
730
731         folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
732                                     FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0);
733         if (IS_ERR(folio))
734                 return PTR_ERR(folio);
735
736         s = bch2_folio(folio);
737         if (!s)
738                 goto unlock;
739
740         sectors = folio_sectors(folio);
741         for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
742                 if (s->s[i].state < SECTOR_dirty ||
743                     s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
744                         *offset = max(*offset,
745                                       folio_pos(folio) + (i << SECTOR_SHIFT));
746                         goto unlock;
747                 }
748
749         *offset = folio_end_pos(folio);
750         ret = 0;
751 unlock:
752         folio_unlock(folio);
753         folio_put(folio);
754         return ret;
755 }
756
757 loff_t bch2_seek_pagecache_hole(struct inode *vinode,
758                                 loff_t start_offset,
759                                 loff_t end_offset,
760                                 unsigned min_replicas,
761                                 bool nonblock)
762 {
763         struct address_space *mapping = vinode->i_mapping;
764         loff_t offset = start_offset;
765         loff_t ret = 0;
766
767         while (!ret && offset < end_offset)
768                 ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock);
769
770         if (ret && ret != -ENOENT)
771                 return ret;
772         return min(offset, end_offset);
773 }
774
775 int bch2_clamp_data_hole(struct inode *inode,
776                          u64 *hole_start,
777                          u64 *hole_end,
778                          unsigned min_replicas,
779                          bool nonblock)
780 {
781         loff_t ret;
782
783         ret = bch2_seek_pagecache_hole(inode,
784                 *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
785         if (ret < 0)
786                 return ret;
787
788         *hole_start = ret;
789
790         if (*hole_start == *hole_end)
791                 return 0;
792
793         ret = bch2_seek_pagecache_data(inode,
794                 *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
795         if (ret < 0)
796                 return ret;
797
798         *hole_end = ret;
799         return 0;
800 }
801
802 #endif /* NO_BCACHEFS_FS */