verity.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * fs/ext4/verity.c: fs-verity support for ext4
  4. *
  5. * Copyright 2019 Google LLC
  6. */
  7. /*
  8. * Implementation of fsverity_operations for ext4.
  9. *
  10. * ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past
  11. * the end of the file, starting at the first 64K boundary beyond i_size. This
  12. * approach works because (a) verity files are readonly, and (b) pages fully
  13. * beyond i_size aren't visible to userspace but can be read/written internally
  14. * by ext4 with only some relatively small changes to ext4. This approach
  15. * avoids having to depend on the EA_INODE feature and on rearchitecturing
  16. * ext4's xattr support to support paging multi-gigabyte xattrs into memory, and
  17. * to support encrypting xattrs. Note that the verity metadata *must* be
  18. * encrypted when the file is, since it contains hashes of the plaintext data.
  19. *
  20. * Using a 64K boundary rather than a 4K one keeps things ready for
  21. * architectures with 64K pages, and it doesn't necessarily waste space on-disk
  22. * since there can be a hole between i_size and the start of the Merkle tree.
  23. */
  24. #include <linux/quotaops.h>
  25. #include "ext4.h"
  26. #include "ext4_extents.h"
  27. #include "ext4_jbd2.h"
  28. static inline loff_t ext4_verity_metadata_pos(const struct inode *inode)
  29. {
  30. return round_up(inode->i_size, 65536);
  31. }
  32. /*
  33. * Read some verity metadata from the inode. __vfs_read() can't be used because
  34. * we need to read beyond i_size.
  35. */
  36. static int pagecache_read(struct inode *inode, void *buf, size_t count,
  37. loff_t pos)
  38. {
  39. while (count) {
  40. struct folio *folio;
  41. size_t n;
  42. folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT,
  43. NULL);
  44. if (IS_ERR(folio))
  45. return PTR_ERR(folio);
  46. n = memcpy_from_file_folio(buf, folio, pos, count);
  47. folio_put(folio);
  48. buf += n;
  49. pos += n;
  50. count -= n;
  51. }
  52. return 0;
  53. }
  54. /*
  55. * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY.
  56. * kernel_write() can't be used because the file descriptor is readonly.
  57. */
  58. static int pagecache_write(struct inode *inode, const void *buf, size_t count,
  59. loff_t pos)
  60. {
  61. struct address_space *mapping = inode->i_mapping;
  62. const struct address_space_operations *aops = mapping->a_ops;
  63. if (pos + count > inode->i_sb->s_maxbytes)
  64. return -EFBIG;
  65. while (count) {
  66. size_t n = min_t(size_t, count,
  67. PAGE_SIZE - offset_in_page(pos));
  68. struct folio *folio;
  69. void *fsdata = NULL;
  70. int res;
  71. res = aops->write_begin(NULL, mapping, pos, n, &folio, &fsdata);
  72. if (res)
  73. return res;
  74. memcpy_to_folio(folio, offset_in_folio(folio, pos), buf, n);
  75. res = aops->write_end(NULL, mapping, pos, n, n, folio, fsdata);
  76. if (res < 0)
  77. return res;
  78. if (res != n)
  79. return -EIO;
  80. buf += n;
  81. pos += n;
  82. count -= n;
  83. }
  84. return 0;
  85. }
  86. static int ext4_begin_enable_verity(struct file *filp)
  87. {
  88. struct inode *inode = file_inode(filp);
  89. const int credits = 2; /* superblock and inode for ext4_orphan_add() */
  90. handle_t *handle;
  91. int err;
  92. if (IS_DAX(inode) || ext4_test_inode_flag(inode, EXT4_INODE_DAX))
  93. return -EINVAL;
  94. if (ext4_verity_in_progress(inode))
  95. return -EBUSY;
  96. /*
  97. * Since the file was opened readonly, we have to initialize the jbd
  98. * inode and quotas here and not rely on ->open() doing it. This must
  99. * be done before evicting the inline data.
  100. */
  101. err = ext4_inode_attach_jinode(inode);
  102. if (err)
  103. return err;
  104. err = dquot_initialize(inode);
  105. if (err)
  106. return err;
  107. err = ext4_convert_inline_data(inode);
  108. if (err)
  109. return err;
  110. if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
  111. ext4_warning_inode(inode,
  112. "verity is only allowed on extent-based files");
  113. return -EOPNOTSUPP;
  114. }
  115. /*
  116. * ext4 uses the last allocated block to find the verity descriptor, so
  117. * we must remove any other blocks past EOF which might confuse things.
  118. */
  119. err = ext4_truncate(inode);
  120. if (err)
  121. return err;
  122. handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
  123. if (IS_ERR(handle))
  124. return PTR_ERR(handle);
  125. err = ext4_orphan_add(handle, inode);
  126. if (err == 0)
  127. ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
  128. ext4_journal_stop(handle);
  129. return err;
  130. }
  131. /*
  132. * ext4 stores the verity descriptor beginning on the next filesystem block
  133. * boundary after the Merkle tree. Then, the descriptor size is stored in the
  134. * last 4 bytes of the last allocated filesystem block --- which is either the
  135. * block in which the descriptor ends, or the next block after that if there
  136. * weren't at least 4 bytes remaining.
  137. *
  138. * We can't simply store the descriptor in an xattr because it *must* be
  139. * encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt
  140. * xattrs. Also, if the descriptor includes a large signature blob it may be
  141. * too large to store in an xattr without the EA_INODE feature.
  142. */
  143. static int ext4_write_verity_descriptor(struct inode *inode, const void *desc,
  144. size_t desc_size, u64 merkle_tree_size)
  145. {
  146. const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) +
  147. merkle_tree_size, i_blocksize(inode));
  148. const u64 desc_end = desc_pos + desc_size;
  149. const __le32 desc_size_disk = cpu_to_le32(desc_size);
  150. const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk),
  151. i_blocksize(inode)) -
  152. sizeof(desc_size_disk);
  153. int err;
  154. err = pagecache_write(inode, desc, desc_size, desc_pos);
  155. if (err)
  156. return err;
  157. return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk),
  158. desc_size_pos);
  159. }
  160. static int ext4_end_enable_verity(struct file *filp, const void *desc,
  161. size_t desc_size, u64 merkle_tree_size)
  162. {
  163. struct inode *inode = file_inode(filp);
  164. const int credits = 2; /* superblock and inode for ext4_orphan_del() */
  165. handle_t *handle;
  166. struct ext4_iloc iloc;
  167. int err = 0;
  168. /*
  169. * If an error already occurred (which fs/verity/ signals by passing
  170. * desc == NULL), then only clean-up is needed.
  171. */
  172. if (desc == NULL)
  173. goto cleanup;
  174. /* Append the verity descriptor. */
  175. err = ext4_write_verity_descriptor(inode, desc, desc_size,
  176. merkle_tree_size);
  177. if (err)
  178. goto cleanup;
  179. /*
  180. * Write all pages (both data and verity metadata). Note that this must
  181. * happen before clearing EXT4_STATE_VERITY_IN_PROGRESS; otherwise pages
  182. * beyond i_size won't be written properly. For crash consistency, this
  183. * also must happen before the verity inode flag gets persisted.
  184. */
  185. err = filemap_write_and_wait(inode->i_mapping);
  186. if (err)
  187. goto cleanup;
  188. /*
  189. * Finally, set the verity inode flag and remove the inode from the
  190. * orphan list (in a single transaction).
  191. */
  192. handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
  193. if (IS_ERR(handle)) {
  194. err = PTR_ERR(handle);
  195. goto cleanup;
  196. }
  197. ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_VERITY, handle);
  198. err = ext4_orphan_del(handle, inode);
  199. if (err)
  200. goto stop_and_cleanup;
  201. err = ext4_reserve_inode_write(handle, inode, &iloc);
  202. if (err)
  203. goto stop_and_cleanup;
  204. ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
  205. ext4_set_inode_flags(inode, false);
  206. err = ext4_mark_iloc_dirty(handle, inode, &iloc);
  207. if (err)
  208. goto stop_and_cleanup;
  209. ext4_journal_stop(handle);
  210. ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
  211. return 0;
  212. stop_and_cleanup:
  213. ext4_journal_stop(handle);
  214. cleanup:
  215. /*
  216. * Verity failed to be enabled, so clean up by truncating any verity
  217. * metadata that was written beyond i_size (both from cache and from
  218. * disk), removing the inode from the orphan list (if it wasn't done
  219. * already), and clearing EXT4_STATE_VERITY_IN_PROGRESS.
  220. */
  221. truncate_inode_pages(inode->i_mapping, inode->i_size);
  222. ext4_truncate(inode);
  223. ext4_orphan_del(NULL, inode);
  224. ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
  225. return err;
  226. }
  227. static int ext4_get_verity_descriptor_location(struct inode *inode,
  228. size_t *desc_size_ret,
  229. u64 *desc_pos_ret)
  230. {
  231. struct ext4_ext_path *path;
  232. struct ext4_extent *last_extent;
  233. u32 end_lblk;
  234. u64 desc_size_pos;
  235. __le32 desc_size_disk;
  236. u32 desc_size;
  237. u64 desc_pos;
  238. int err;
  239. /*
  240. * Descriptor size is in last 4 bytes of last allocated block.
  241. * See ext4_write_verity_descriptor().
  242. */
  243. if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
  244. EXT4_ERROR_INODE(inode, "verity file doesn't use extents");
  245. return -EFSCORRUPTED;
  246. }
  247. path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
  248. if (IS_ERR(path))
  249. return PTR_ERR(path);
  250. last_extent = path[path->p_depth].p_ext;
  251. if (!last_extent) {
  252. EXT4_ERROR_INODE(inode, "verity file has no extents");
  253. ext4_free_ext_path(path);
  254. return -EFSCORRUPTED;
  255. }
  256. end_lblk = le32_to_cpu(last_extent->ee_block) +
  257. ext4_ext_get_actual_len(last_extent);
  258. desc_size_pos = EXT4_LBLK_TO_B(inode, end_lblk);
  259. ext4_free_ext_path(path);
  260. if (desc_size_pos < sizeof(desc_size_disk))
  261. goto bad;
  262. desc_size_pos -= sizeof(desc_size_disk);
  263. err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk),
  264. desc_size_pos);
  265. if (err)
  266. return err;
  267. desc_size = le32_to_cpu(desc_size_disk);
  268. /*
  269. * The descriptor is stored just before the desc_size_disk, but starting
  270. * on a filesystem block boundary.
  271. */
  272. if (desc_size > INT_MAX || desc_size > desc_size_pos)
  273. goto bad;
  274. desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode));
  275. if (desc_pos < ext4_verity_metadata_pos(inode))
  276. goto bad;
  277. *desc_size_ret = desc_size;
  278. *desc_pos_ret = desc_pos;
  279. return 0;
  280. bad:
  281. EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor");
  282. return -EFSCORRUPTED;
  283. }
  284. static int ext4_get_verity_descriptor(struct inode *inode, void *buf,
  285. size_t buf_size)
  286. {
  287. size_t desc_size = 0;
  288. u64 desc_pos = 0;
  289. int err;
  290. err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos);
  291. if (err)
  292. return err;
  293. if (buf_size) {
  294. if (desc_size > buf_size)
  295. return -ERANGE;
  296. err = pagecache_read(inode, buf, desc_size, desc_pos);
  297. if (err)
  298. return err;
  299. }
  300. return desc_size;
  301. }
  302. static struct page *ext4_read_merkle_tree_page(struct inode *inode,
  303. pgoff_t index)
  304. {
  305. index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
  306. return generic_read_merkle_tree_page(inode, index);
  307. }
  308. static void ext4_readahead_merkle_tree(struct inode *inode, pgoff_t index,
  309. unsigned long nr_pages)
  310. {
  311. index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
  312. generic_readahead_merkle_tree(inode, index, nr_pages);
  313. }
  314. static int ext4_write_merkle_tree_block(struct file *file, const void *buf,
  315. u64 pos, unsigned int size)
  316. {
  317. pos += ext4_verity_metadata_pos(file_inode(file));
  318. return pagecache_write(file_inode(file), buf, size, pos);
  319. }
  320. const struct fsverity_operations ext4_verityops = {
  321. .begin_enable_verity = ext4_begin_enable_verity,
  322. .end_enable_verity = ext4_end_enable_verity,
  323. .get_verity_descriptor = ext4_get_verity_descriptor,
  324. .read_merkle_tree_page = ext4_read_merkle_tree_page,
  325. .readahead_merkle_tree = ext4_readahead_merkle_tree,
  326. .write_merkle_tree_block = ext4_write_merkle_tree_block,
  327. };