]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/inode.c
Rename from bcache-tools to bcachefs-tools
[bcachefs-tools-debian] / libbcachefs / inode.c
1
2 #include "bcachefs.h"
3 #include "bkey_methods.h"
4 #include "btree_update.h"
5 #include "extents.h"
6 #include "inode.h"
7 #include "io.h"
8 #include "keylist.h"
9
10 #include <linux/random.h>
11
12 #include <asm/unaligned.h>
13
14 #define FIELD_BYTES()                                           \
15
16 static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
17 static const u8 bits_table[8] = {
18         1  * 8 - 1,
19         2  * 8 - 2,
20         3  * 8 - 3,
21         4  * 8 - 4,
22         6  * 8 - 5,
23         8  * 8 - 6,
24         10 * 8 - 7,
25         13 * 8 - 8,
26 };
27
28 static int inode_encode_field(u8 *out, u8 *end, const u64 in[2])
29 {
30         unsigned bytes, bits, shift;
31
32         if (likely(!in[1]))
33                 bits = fls64(in[0]);
34         else
35                 bits = fls64(in[1]) + 64;
36
37         for (shift = 1; shift <= 8; shift++)
38                 if (bits < bits_table[shift - 1])
39                         goto got_shift;
40
41         BUG();
42 got_shift:
43         bytes = byte_table[shift - 1];
44
45         BUG_ON(out + bytes > end);
46
47         if (likely(bytes <= 8)) {
48                 u64 b = cpu_to_be64(in[0]);
49
50                 memcpy(out, (void *) &b + 8 - bytes, bytes);
51         } else {
52                 u64 b = cpu_to_be64(in[1]);
53
54                 memcpy(out, (void *) &b + 16 - bytes, bytes);
55                 put_unaligned_be64(in[0], out + bytes - 8);
56         }
57
58         *out |= (1 << 8) >> shift;
59
60         return bytes;
61 }
62
63 static int inode_decode_field(const u8 *in, const u8 *end,
64                               u64 out[2], unsigned *out_bits)
65 {
66         unsigned bytes, bits, shift;
67
68         if (in >= end)
69                 return -1;
70
71         if (!*in)
72                 return -1;
73
74         /*
75          * position of highest set bit indicates number of bytes:
76          * shift = number of bits to remove in high byte:
77          */
78         shift   = 8 - __fls(*in); /* 1 <= shift <= 8 */
79         bytes   = byte_table[shift - 1];
80         bits    = bytes * 8 - shift;
81
82         if (in + bytes > end)
83                 return -1;
84
85         /*
86          * we're assuming it's safe to deref up to 7 bytes < in; this will work
87          * because keys always start quite a bit more than 7 bytes after the
88          * start of the btree node header:
89          */
90         if (likely(bytes <= 8)) {
91                 out[0] = get_unaligned_be64(in + bytes - 8);
92                 out[0] <<= 64 - bits;
93                 out[0] >>= 64 - bits;
94                 out[1] = 0;
95         } else {
96                 out[0] = get_unaligned_be64(in + bytes - 8);
97                 out[1] = get_unaligned_be64(in + bytes - 16);
98                 out[1] <<= 128 - bits;
99                 out[1] >>= 128 - bits;
100         }
101
102         *out_bits = out[1] ? 64 + fls64(out[1]) : fls64(out[0]);
103         return bytes;
104 }
105
106 void bch2_inode_pack(struct bkey_inode_buf *packed,
107                      const struct bch_inode_unpacked *inode)
108 {
109         u8 *out = packed->inode.v.fields;
110         u8 *end = (void *) &packed[1];
111         u8 *last_nonzero_field = out;
112         u64 field[2];
113         unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
114
115         bkey_inode_init(&packed->inode.k_i);
116         packed->inode.k.p.inode         = inode->inum;
117         packed->inode.v.i_hash_seed     = inode->i_hash_seed;
118         packed->inode.v.i_flags         = cpu_to_le32(inode->i_flags);
119         packed->inode.v.i_mode          = cpu_to_le16(inode->i_mode);
120
121 #define BCH_INODE_FIELD(_name, _bits)                                   \
122         field[0] = inode->_name;                                        \
123         field[1] = 0;                                                   \
124         out += inode_encode_field(out, end, field);                     \
125         nr_fields++;                                                    \
126                                                                         \
127         if (field[0] | field[1]) {                                      \
128                 last_nonzero_field = out;                               \
129                 last_nonzero_fieldnr = nr_fields;                       \
130         }
131
132         BCH_INODE_FIELDS()
133 #undef  BCH_INODE_FIELD
134
135         out = last_nonzero_field;
136         nr_fields = last_nonzero_fieldnr;
137
138         set_bkey_val_bytes(&packed->inode.k, out - (u8 *) &packed->inode.v);
139         memset(out, 0,
140                (u8 *) &packed->inode.v +
141                bkey_val_bytes(&packed->inode.k) - out);
142
143         SET_INODE_NR_FIELDS(&packed->inode.v, nr_fields);
144
145         if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
146                 struct bch_inode_unpacked unpacked;
147
148                 int ret = bch2_inode_unpack(inode_i_to_s_c(&packed->inode),
149                                            &unpacked);
150                 BUG_ON(ret);
151                 BUG_ON(unpacked.inum            != inode->inum);
152                 BUG_ON(unpacked.i_hash_seed     != inode->i_hash_seed);
153                 BUG_ON(unpacked.i_mode          != inode->i_mode);
154
155 #define BCH_INODE_FIELD(_name, _bits)   BUG_ON(unpacked._name != inode->_name);
156                 BCH_INODE_FIELDS()
157 #undef  BCH_INODE_FIELD
158         }
159 }
160
161 int bch2_inode_unpack(struct bkey_s_c_inode inode,
162                       struct bch_inode_unpacked *unpacked)
163 {
164         const u8 *in = inode.v->fields;
165         const u8 *end = (void *) inode.v + bkey_val_bytes(inode.k);
166         u64 field[2];
167         unsigned fieldnr = 0, field_bits;
168         int ret;
169
170         unpacked->inum          = inode.k->p.inode;
171         unpacked->i_hash_seed   = inode.v->i_hash_seed;
172         unpacked->i_flags       = le32_to_cpu(inode.v->i_flags);
173         unpacked->i_mode        = le16_to_cpu(inode.v->i_mode);
174
175 #define BCH_INODE_FIELD(_name, _bits)                                   \
176         if (fieldnr++ == INODE_NR_FIELDS(inode.v)) {                    \
177                 memset(&unpacked->_name, 0,                             \
178                        sizeof(*unpacked) -                              \
179                        offsetof(struct bch_inode_unpacked, _name));     \
180                 return 0;                                               \
181         }                                                               \
182                                                                         \
183         ret = inode_decode_field(in, end, field, &field_bits);          \
184         if (ret < 0)                                                    \
185                 return ret;                                             \
186                                                                         \
187         if (field_bits > sizeof(unpacked->_name) * 8)                   \
188                 return -1;                                              \
189                                                                         \
190         unpacked->_name = field[0];                                     \
191         in += ret;
192
193         BCH_INODE_FIELDS()
194 #undef  BCH_INODE_FIELD
195
196         /* XXX: signal if there were more fields than expected? */
197
198         return 0;
199 }
200
201 static const char *bch2_inode_invalid(const struct bch_fs *c,
202                                       struct bkey_s_c k)
203 {
204         if (k.k->p.offset)
205                 return "nonzero offset";
206
207         switch (k.k->type) {
208         case BCH_INODE_FS: {
209                 struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
210                 struct bch_inode_unpacked unpacked;
211
212                 if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
213                         return "incorrect value size";
214
215                 if (k.k->p.inode < BLOCKDEV_INODE_MAX)
216                         return "fs inode in blockdev range";
217
218                 if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
219                         return "invalid str hash type";
220
221                 if (bch2_inode_unpack(inode, &unpacked))
222                         return "invalid variable length fields";
223
224                 return NULL;
225         }
226         case BCH_INODE_BLOCKDEV:
227                 if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_blockdev))
228                         return "incorrect value size";
229
230                 if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
231                         return "blockdev inode in fs range";
232
233                 return NULL;
234         default:
235                 return "invalid type";
236         }
237 }
238
239 static void bch2_inode_to_text(struct bch_fs *c, char *buf,
240                                size_t size, struct bkey_s_c k)
241 {
242         struct bkey_s_c_inode inode;
243         struct bch_inode_unpacked unpacked;
244
245         switch (k.k->type) {
246         case BCH_INODE_FS:
247                 inode = bkey_s_c_to_inode(k);
248                 if (bch2_inode_unpack(inode, &unpacked)) {
249                         scnprintf(buf, size, "(unpack error)");
250                         break;
251                 }
252
253                 scnprintf(buf, size, "i_size %llu", unpacked.i_size);
254                 break;
255         }
256 }
257
258 const struct bkey_ops bch2_bkey_inode_ops = {
259         .key_invalid    = bch2_inode_invalid,
260         .val_to_text    = bch2_inode_to_text,
261 };
262
263 void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
264                      uid_t uid, gid_t gid, umode_t mode, dev_t rdev)
265 {
266         s64 now = timespec_to_bch2_time(c, CURRENT_TIME);
267
268         memset(inode_u, 0, sizeof(*inode_u));
269
270         /* ick */
271         inode_u->i_flags |= c->sb.str_hash_type << INODE_STR_HASH_OFFSET;
272         get_random_bytes(&inode_u->i_hash_seed, sizeof(inode_u->i_hash_seed));
273
274         inode_u->i_mode         = mode;
275         inode_u->i_uid          = uid;
276         inode_u->i_gid          = gid;
277         inode_u->i_dev          = rdev;
278         inode_u->i_atime        = now;
279         inode_u->i_mtime        = now;
280         inode_u->i_ctime        = now;
281         inode_u->i_otime        = now;
282 }
283
284 int bch2_inode_create(struct bch_fs *c, struct bkey_i *inode,
285                       u64 min, u64 max, u64 *hint)
286 {
287         struct btree_iter iter;
288         bool searched_from_start = false;
289         int ret;
290
291         if (!max)
292                 max = ULLONG_MAX;
293
294         if (c->opts.inodes_32bit)
295                 max = min_t(u64, max, U32_MAX);
296
297         if (*hint >= max || *hint < min)
298                 *hint = min;
299
300         if (*hint == min)
301                 searched_from_start = true;
302 again:
303         bch2_btree_iter_init_intent(&iter, c, BTREE_ID_INODES, POS(*hint, 0));
304
305         while (1) {
306                 struct bkey_s_c k = bch2_btree_iter_peek_with_holes(&iter);
307
308                 ret = btree_iter_err(k);
309                 if (ret) {
310                         bch2_btree_iter_unlock(&iter);
311                         return ret;
312                 }
313
314                 if (k.k->type < BCH_INODE_FS) {
315                         inode->k.p = k.k->p;
316
317                         pr_debug("inserting inode %llu (size %u)",
318                                  inode->k.p.inode, inode->k.u64s);
319
320                         ret = bch2_btree_insert_at(c, NULL, NULL, NULL,
321                                         BTREE_INSERT_ATOMIC,
322                                         BTREE_INSERT_ENTRY(&iter, inode));
323
324                         if (ret == -EINTR)
325                                 continue;
326
327                         bch2_btree_iter_unlock(&iter);
328                         if (!ret)
329                                 *hint = k.k->p.inode + 1;
330
331                         return ret;
332                 } else {
333                         if (iter.pos.inode == max)
334                                 break;
335                         /* slot used */
336                         bch2_btree_iter_advance_pos(&iter);
337                 }
338         }
339         bch2_btree_iter_unlock(&iter);
340
341         if (!searched_from_start) {
342                 /* Retry from start */
343                 *hint = min;
344                 searched_from_start = true;
345                 goto again;
346         }
347
348         return -ENOSPC;
349 }
350
351 int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size,
352                         struct extent_insert_hook *hook, u64 *journal_seq)
353 {
354         return bch2_discard(c, POS(inode_nr, new_size), POS(inode_nr + 1, 0),
355                            ZERO_VERSION, NULL, hook, journal_seq);
356 }
357
358 int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
359 {
360         struct bkey_i delete;
361         int ret;
362
363         ret = bch2_inode_truncate(c, inode_nr, 0, NULL, NULL);
364         if (ret < 0)
365                 return ret;
366
367         ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS,
368                                      POS(inode_nr, 0),
369                                      POS(inode_nr + 1, 0),
370                                      ZERO_VERSION, NULL, NULL, NULL);
371         if (ret < 0)
372                 return ret;
373
374         /*
375          * If this was a directory, there shouldn't be any real dirents left -
376          * but there could be whiteouts (from hash collisions) that we should
377          * delete:
378          *
379          * XXX: the dirent could ideally would delete whitouts when they're no
380          * longer needed
381          */
382         ret = bch2_btree_delete_range(c, BTREE_ID_DIRENTS,
383                                      POS(inode_nr, 0),
384                                      POS(inode_nr + 1, 0),
385                                      ZERO_VERSION, NULL, NULL, NULL);
386         if (ret < 0)
387                 return ret;
388
389         bkey_init(&delete.k);
390         delete.k.p.inode = inode_nr;
391
392         return bch2_btree_insert(c, BTREE_ID_INODES, &delete, NULL,
393                                 NULL, NULL, BTREE_INSERT_NOFAIL);
394 }
395
396 int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr,
397                             struct bch_inode_unpacked *inode)
398 {
399         struct btree_iter iter;
400         struct bkey_s_c k;
401         int ret = -ENOENT;
402
403         for_each_btree_key_with_holes(&iter, c, BTREE_ID_INODES,
404                                       POS(inode_nr, 0), k) {
405                 switch (k.k->type) {
406                 case BCH_INODE_FS:
407                         ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
408                         break;
409                 default:
410                         /* hole, not found */
411                         break;
412                 }
413
414                 break;
415
416         }
417
418         return bch2_btree_iter_unlock(&iter) ?: ret;
419 }
420
421 int bch2_cached_dev_inode_find_by_uuid(struct bch_fs *c, uuid_le *uuid,
422                                        struct bkey_i_inode_blockdev *ret)
423 {
424         struct btree_iter iter;
425         struct bkey_s_c k;
426
427         for_each_btree_key(&iter, c, BTREE_ID_INODES, POS(0, 0), k) {
428                 if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
429                         break;
430
431                 if (k.k->type == BCH_INODE_BLOCKDEV) {
432                         struct bkey_s_c_inode_blockdev inode =
433                                 bkey_s_c_to_inode_blockdev(k);
434
435                         pr_debug("found inode %llu: %pU (u64s %u)",
436                                  inode.k->p.inode, inode.v->i_uuid.b,
437                                  inode.k->u64s);
438
439                         if (CACHED_DEV(inode.v) &&
440                             !memcmp(uuid, &inode.v->i_uuid, 16)) {
441                                 bkey_reassemble(&ret->k_i, k);
442                                 bch2_btree_iter_unlock(&iter);
443                                 return 0;
444                         }
445                 }
446
447                 bch2_btree_iter_cond_resched(&iter);
448         }
449         bch2_btree_iter_unlock(&iter);
450         return -ENOENT;
451 }