move_extent.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658
  1. // SPDX-License-Identifier: LGPL-2.1
  2. /*
  3. * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
  4. * Written by Takashi Sato <t-sato@yk.jp.nec.com>
  5. * Akira Fujita <a-fujita@rs.jp.nec.com>
  6. */
  7. #include <linux/fs.h>
  8. #include <linux/quotaops.h>
  9. #include <linux/slab.h>
  10. #include <linux/sched/mm.h>
  11. #include "ext4_jbd2.h"
  12. #include "ext4.h"
  13. #include "ext4_extents.h"
  14. #include <trace/events/ext4.h>
  15. struct mext_data {
  16. struct inode *orig_inode; /* Origin file inode */
  17. struct inode *donor_inode; /* Donor file inode */
  18. struct ext4_map_blocks orig_map;/* Origin file's move mapping */
  19. ext4_lblk_t donor_lblk; /* Start block of the donor file */
  20. };
  21. /**
  22. * ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem
  23. * @first: inode to be locked
  24. * @second: inode to be locked
  25. *
  26. * Acquire write lock of i_data_sem of the two inodes
  27. */
  28. void
  29. ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
  30. {
  31. if (first < second) {
  32. down_write(&EXT4_I(first)->i_data_sem);
  33. down_write_nested(&EXT4_I(second)->i_data_sem, I_DATA_SEM_OTHER);
  34. } else {
  35. down_write(&EXT4_I(second)->i_data_sem);
  36. down_write_nested(&EXT4_I(first)->i_data_sem, I_DATA_SEM_OTHER);
  37. }
  38. }
  39. /**
  40. * ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem
  41. *
  42. * @orig_inode: original inode structure to be released its lock first
  43. * @donor_inode: donor inode structure to be released its lock second
  44. * Release write lock of i_data_sem of two inodes (orig and donor).
  45. */
  46. void
  47. ext4_double_up_write_data_sem(struct inode *orig_inode,
  48. struct inode *donor_inode)
  49. {
  50. up_write(&EXT4_I(orig_inode)->i_data_sem);
  51. up_write(&EXT4_I(donor_inode)->i_data_sem);
  52. }
  53. /* Grab and lock folio on both @inode1 and @inode2 by inode order. */
  54. static int mext_folio_double_lock(struct inode *inode1, struct inode *inode2,
  55. pgoff_t index1, pgoff_t index2, size_t len,
  56. struct folio *folio[2])
  57. {
  58. struct address_space *mapping[2];
  59. unsigned int flags;
  60. fgf_t fgp_flags = FGP_WRITEBEGIN;
  61. BUG_ON(!inode1 || !inode2);
  62. if (inode1 < inode2) {
  63. mapping[0] = inode1->i_mapping;
  64. mapping[1] = inode2->i_mapping;
  65. } else {
  66. swap(index1, index2);
  67. mapping[0] = inode2->i_mapping;
  68. mapping[1] = inode1->i_mapping;
  69. }
  70. flags = memalloc_nofs_save();
  71. fgp_flags |= fgf_set_order(len);
  72. folio[0] = __filemap_get_folio(mapping[0], index1, fgp_flags,
  73. mapping_gfp_mask(mapping[0]));
  74. if (IS_ERR(folio[0])) {
  75. memalloc_nofs_restore(flags);
  76. return PTR_ERR(folio[0]);
  77. }
  78. folio[1] = __filemap_get_folio(mapping[1], index2, fgp_flags,
  79. mapping_gfp_mask(mapping[1]));
  80. memalloc_nofs_restore(flags);
  81. if (IS_ERR(folio[1])) {
  82. folio_unlock(folio[0]);
  83. folio_put(folio[0]);
  84. return PTR_ERR(folio[1]);
  85. }
  86. /*
  87. * __filemap_get_folio() may not wait on folio's writeback if
  88. * BDI not demand that. But it is reasonable to be very conservative
  89. * here and explicitly wait on folio's writeback
  90. */
  91. folio_wait_writeback(folio[0]);
  92. folio_wait_writeback(folio[1]);
  93. if (inode1 > inode2)
  94. swap(folio[0], folio[1]);
  95. return 0;
  96. }
  97. static void mext_folio_double_unlock(struct folio *folio[2])
  98. {
  99. folio_unlock(folio[0]);
  100. folio_put(folio[0]);
  101. folio_unlock(folio[1]);
  102. folio_put(folio[1]);
  103. }
  104. /* Force folio buffers uptodate w/o dropping folio's lock */
  105. static int mext_folio_mkuptodate(struct folio *folio, size_t from, size_t to)
  106. {
  107. struct inode *inode = folio->mapping->host;
  108. sector_t block;
  109. struct buffer_head *bh, *head;
  110. unsigned int blocksize, block_start, block_end;
  111. int nr = 0;
  112. bool partial = false;
  113. BUG_ON(!folio_test_locked(folio));
  114. BUG_ON(folio_test_writeback(folio));
  115. if (folio_test_uptodate(folio))
  116. return 0;
  117. blocksize = i_blocksize(inode);
  118. head = folio_buffers(folio);
  119. if (!head)
  120. head = create_empty_buffers(folio, blocksize, 0);
  121. block = folio_pos(folio) >> inode->i_blkbits;
  122. block_end = 0;
  123. bh = head;
  124. do {
  125. block_start = block_end;
  126. block_end = block_start + blocksize;
  127. if (block_end <= from || block_start >= to) {
  128. if (!buffer_uptodate(bh))
  129. partial = true;
  130. continue;
  131. }
  132. if (buffer_uptodate(bh))
  133. continue;
  134. if (!buffer_mapped(bh)) {
  135. int err = ext4_get_block(inode, block, bh, 0);
  136. if (err)
  137. return err;
  138. if (!buffer_mapped(bh)) {
  139. folio_zero_range(folio, block_start, blocksize);
  140. set_buffer_uptodate(bh);
  141. continue;
  142. }
  143. }
  144. lock_buffer(bh);
  145. if (buffer_uptodate(bh)) {
  146. unlock_buffer(bh);
  147. continue;
  148. }
  149. ext4_read_bh_nowait(bh, 0, NULL, false);
  150. nr++;
  151. } while (block++, (bh = bh->b_this_page) != head);
  152. /* No io required */
  153. if (!nr)
  154. goto out;
  155. bh = head;
  156. do {
  157. if (bh_offset(bh) + blocksize <= from)
  158. continue;
  159. if (bh_offset(bh) >= to)
  160. break;
  161. wait_on_buffer(bh);
  162. if (buffer_uptodate(bh))
  163. continue;
  164. return -EIO;
  165. } while ((bh = bh->b_this_page) != head);
  166. out:
  167. if (!partial)
  168. folio_mark_uptodate(folio);
  169. return 0;
  170. }
  171. enum mext_move_type {MEXT_SKIP_EXTENT, MEXT_MOVE_EXTENT, MEXT_COPY_DATA};
  172. /*
  173. * Start to move extent between the origin inode and the donor inode,
  174. * hold one folio for each inode and check the candidate moving extent
  175. * mapping status again.
  176. */
  177. static int mext_move_begin(struct mext_data *mext, struct folio *folio[2],
  178. enum mext_move_type *move_type)
  179. {
  180. struct inode *orig_inode = mext->orig_inode;
  181. struct inode *donor_inode = mext->donor_inode;
  182. unsigned int blkbits = orig_inode->i_blkbits;
  183. struct ext4_map_blocks donor_map = {0};
  184. loff_t orig_pos, donor_pos;
  185. size_t move_len;
  186. int ret;
  187. orig_pos = ((loff_t)mext->orig_map.m_lblk) << blkbits;
  188. donor_pos = ((loff_t)mext->donor_lblk) << blkbits;
  189. ret = mext_folio_double_lock(orig_inode, donor_inode,
  190. orig_pos >> PAGE_SHIFT, donor_pos >> PAGE_SHIFT,
  191. ((size_t)mext->orig_map.m_len) << blkbits, folio);
  192. if (ret)
  193. return ret;
  194. /*
  195. * Check the origin inode's mapping information again under the
  196. * folio lock, as we do not hold the i_data_sem at all times, and
  197. * it may change during the concurrent write-back operation.
  198. */
  199. if (mext->orig_map.m_seq != READ_ONCE(EXT4_I(orig_inode)->i_es_seq)) {
  200. ret = -ESTALE;
  201. goto error;
  202. }
  203. /* Adjust the moving length according to the length of shorter folio. */
  204. move_len = umin(folio_pos(folio[0]) + folio_size(folio[0]) - orig_pos,
  205. folio_pos(folio[1]) + folio_size(folio[1]) - donor_pos);
  206. move_len >>= blkbits;
  207. if (move_len < mext->orig_map.m_len)
  208. mext->orig_map.m_len = move_len;
  209. donor_map.m_lblk = mext->donor_lblk;
  210. donor_map.m_len = mext->orig_map.m_len;
  211. donor_map.m_flags = 0;
  212. ret = ext4_map_blocks(NULL, donor_inode, &donor_map, 0);
  213. if (ret < 0)
  214. goto error;
  215. /* Adjust the moving length according to the donor mapping length. */
  216. mext->orig_map.m_len = donor_map.m_len;
  217. /* Skip moving if the donor range is a hole or a delalloc extent. */
  218. if (!(donor_map.m_flags & (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN)))
  219. *move_type = MEXT_SKIP_EXTENT;
  220. /* If both mapping ranges are unwritten, no need to copy data. */
  221. else if ((mext->orig_map.m_flags & EXT4_MAP_UNWRITTEN) &&
  222. (donor_map.m_flags & EXT4_MAP_UNWRITTEN))
  223. *move_type = MEXT_MOVE_EXTENT;
  224. else
  225. *move_type = MEXT_COPY_DATA;
  226. return 0;
  227. error:
  228. mext_folio_double_unlock(folio);
  229. return ret;
  230. }
  231. /*
  232. * Re-create the new moved mapping buffers of the original inode and commit
  233. * the entire written range.
  234. */
  235. static int mext_folio_mkwrite(struct inode *inode, struct folio *folio,
  236. size_t from, size_t to)
  237. {
  238. unsigned int blocksize = i_blocksize(inode);
  239. struct buffer_head *bh, *head;
  240. size_t block_start, block_end;
  241. sector_t block;
  242. int ret;
  243. head = folio_buffers(folio);
  244. if (!head)
  245. head = create_empty_buffers(folio, blocksize, 0);
  246. block = folio_pos(folio) >> inode->i_blkbits;
  247. block_end = 0;
  248. bh = head;
  249. do {
  250. block_start = block_end;
  251. block_end = block_start + blocksize;
  252. if (block_end <= from || block_start >= to)
  253. continue;
  254. ret = ext4_get_block(inode, block, bh, 0);
  255. if (ret)
  256. return ret;
  257. } while (block++, (bh = bh->b_this_page) != head);
  258. block_commit_write(folio, from, to);
  259. return 0;
  260. }
  261. /*
  262. * Save the data in original inode extent blocks and replace one folio size
  263. * aligned original inode extent with one or one partial donor inode extent,
  264. * and then write out the saved data in new original inode blocks. Pass out
  265. * the replaced block count through m_len. Return 0 on success, and an error
  266. * code otherwise.
  267. */
  268. static int mext_move_extent(struct mext_data *mext, u64 *m_len)
  269. {
  270. struct inode *orig_inode = mext->orig_inode;
  271. struct inode *donor_inode = mext->donor_inode;
  272. struct ext4_map_blocks *orig_map = &mext->orig_map;
  273. unsigned int blkbits = orig_inode->i_blkbits;
  274. struct folio *folio[2] = {NULL, NULL};
  275. loff_t from, length;
  276. enum mext_move_type move_type = 0;
  277. handle_t *handle;
  278. u64 r_len = 0;
  279. unsigned int credits;
  280. int ret, ret2;
  281. *m_len = 0;
  282. trace_ext4_move_extent_enter(orig_inode, orig_map, donor_inode,
  283. mext->donor_lblk);
  284. credits = ext4_chunk_trans_extent(orig_inode, 0) * 2;
  285. handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, credits);
  286. if (IS_ERR(handle)) {
  287. ret = PTR_ERR(handle);
  288. goto out;
  289. }
  290. ext4_fc_mark_ineligible(orig_inode->i_sb, EXT4_FC_REASON_MOVE_EXT,
  291. handle);
  292. ret = mext_move_begin(mext, folio, &move_type);
  293. if (ret)
  294. goto stop_handle;
  295. if (move_type == MEXT_SKIP_EXTENT)
  296. goto unlock;
  297. /*
  298. * Copy the data. First, read the original inode data into the page
  299. * cache. Then, release the existing mapping relationships and swap
  300. * the extent. Finally, re-establish the new mapping relationships
  301. * and dirty the page cache.
  302. */
  303. if (move_type == MEXT_COPY_DATA) {
  304. from = offset_in_folio(folio[0],
  305. ((loff_t)orig_map->m_lblk) << blkbits);
  306. length = ((loff_t)orig_map->m_len) << blkbits;
  307. ret = mext_folio_mkuptodate(folio[0], from, from + length);
  308. if (ret)
  309. goto unlock;
  310. }
  311. if (!filemap_release_folio(folio[0], 0) ||
  312. !filemap_release_folio(folio[1], 0)) {
  313. ret = -EBUSY;
  314. goto unlock;
  315. }
  316. /* Move extent */
  317. ext4_double_down_write_data_sem(orig_inode, donor_inode);
  318. *m_len = ext4_swap_extents(handle, orig_inode, donor_inode,
  319. orig_map->m_lblk, mext->donor_lblk,
  320. orig_map->m_len, 1, &ret);
  321. ext4_double_up_write_data_sem(orig_inode, donor_inode);
  322. /* A short-length swap cannot occur after a successful swap extent. */
  323. if (WARN_ON_ONCE(!ret && (*m_len != orig_map->m_len)))
  324. ret = -EIO;
  325. if (!(*m_len) || (move_type == MEXT_MOVE_EXTENT))
  326. goto unlock;
  327. /* Copy data */
  328. length = (*m_len) << blkbits;
  329. ret2 = mext_folio_mkwrite(orig_inode, folio[0], from, from + length);
  330. if (ret2) {
  331. if (!ret)
  332. ret = ret2;
  333. goto repair_branches;
  334. }
  335. /*
  336. * Even in case of data=writeback it is reasonable to pin
  337. * inode to transaction, to prevent unexpected data loss.
  338. */
  339. ret2 = ext4_jbd2_inode_add_write(handle, orig_inode,
  340. ((loff_t)orig_map->m_lblk) << blkbits, length);
  341. if (!ret)
  342. ret = ret2;
  343. unlock:
  344. mext_folio_double_unlock(folio);
  345. stop_handle:
  346. ext4_journal_stop(handle);
  347. out:
  348. trace_ext4_move_extent_exit(orig_inode, orig_map->m_lblk, donor_inode,
  349. mext->donor_lblk, orig_map->m_len, *m_len,
  350. move_type, ret);
  351. return ret;
  352. repair_branches:
  353. ret2 = 0;
  354. ext4_double_down_write_data_sem(orig_inode, donor_inode);
  355. r_len = ext4_swap_extents(handle, donor_inode, orig_inode,
  356. mext->donor_lblk, orig_map->m_lblk,
  357. *m_len, 0, &ret2);
  358. ext4_double_up_write_data_sem(orig_inode, donor_inode);
  359. if (ret2 || r_len != *m_len) {
  360. ext4_error_inode_block(orig_inode, (sector_t)(orig_map->m_lblk),
  361. EIO, "Unable to copy data block, data will be lost!");
  362. ret = -EIO;
  363. }
  364. *m_len = 0;
  365. goto unlock;
  366. }
  367. /*
  368. * Check the validity of the basic filesystem environment and the
  369. * inodes' support status.
  370. */
  371. static int mext_check_validity(struct inode *orig_inode,
  372. struct inode *donor_inode)
  373. {
  374. struct super_block *sb = orig_inode->i_sb;
  375. /* origin and donor should be different inodes */
  376. if (orig_inode == donor_inode) {
  377. ext4_debug("ext4 move extent: The argument files should not be same inode [ino:orig %lu, donor %lu]\n",
  378. orig_inode->i_ino, donor_inode->i_ino);
  379. return -EINVAL;
  380. }
  381. /* origin and donor should belone to the same filesystem */
  382. if (orig_inode->i_sb != donor_inode->i_sb) {
  383. ext4_debug("ext4 move extent: The argument files should be in same FS [ino:orig %lu, donor %lu]\n",
  384. orig_inode->i_ino, donor_inode->i_ino);
  385. return -EINVAL;
  386. }
  387. /* Regular file check */
  388. if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
  389. ext4_debug("ext4 move extent: The argument files should be regular file [ino:orig %lu, donor %lu]\n",
  390. orig_inode->i_ino, donor_inode->i_ino);
  391. return -EINVAL;
  392. }
  393. if (ext4_has_feature_bigalloc(sb)) {
  394. ext4_msg(sb, KERN_ERR,
  395. "Online defrag not supported with bigalloc");
  396. return -EOPNOTSUPP;
  397. }
  398. if (IS_DAX(orig_inode)) {
  399. ext4_msg(sb, KERN_ERR,
  400. "Online defrag not supported with DAX");
  401. return -EOPNOTSUPP;
  402. }
  403. /*
  404. * TODO: it's not obvious how to swap blocks for inodes with full
  405. * journaling enabled.
  406. */
  407. if (ext4_should_journal_data(orig_inode) ||
  408. ext4_should_journal_data(donor_inode)) {
  409. ext4_msg(sb, KERN_ERR,
  410. "Online defrag not supported with data journaling");
  411. return -EOPNOTSUPP;
  412. }
  413. if (IS_ENCRYPTED(orig_inode) || IS_ENCRYPTED(donor_inode)) {
  414. ext4_msg(sb, KERN_ERR,
  415. "Online defrag not supported for encrypted files");
  416. return -EOPNOTSUPP;
  417. }
  418. /* Ext4 move extent supports only extent based file */
  419. if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS)) ||
  420. !(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
  421. ext4_msg(sb, KERN_ERR,
  422. "Online defrag not supported for non-extent files");
  423. return -EOPNOTSUPP;
  424. }
  425. if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
  426. ext4_debug("ext4 move extent: suid or sgid is set to donor file [ino:orig %lu, donor %lu]\n",
  427. orig_inode->i_ino, donor_inode->i_ino);
  428. return -EINVAL;
  429. }
  430. if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode)) {
  431. ext4_debug("ext4 move extent: donor should not be immutable or append file [ino:orig %lu, donor %lu]\n",
  432. orig_inode->i_ino, donor_inode->i_ino);
  433. return -EPERM;
  434. }
  435. /* Ext4 move extent does not support swap files */
  436. if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
  437. ext4_debug("ext4 move extent: The argument files should not be swap files [ino:orig %lu, donor %lu]\n",
  438. orig_inode->i_ino, donor_inode->i_ino);
  439. return -ETXTBSY;
  440. }
  441. if (ext4_is_quota_file(orig_inode) || ext4_is_quota_file(donor_inode)) {
  442. ext4_debug("ext4 move extent: The argument files should not be quota files [ino:orig %lu, donor %lu]\n",
  443. orig_inode->i_ino, donor_inode->i_ino);
  444. return -EOPNOTSUPP;
  445. }
  446. if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
  447. ext4_debug("ext4 move extent: File size is 0 byte\n");
  448. return -EINVAL;
  449. }
  450. return 0;
  451. }
  452. /*
  453. * Check the moving range of ext4_move_extents() whether the files can be
  454. * exchanged with each other, and adjust the length to fit within the file
  455. * size. Return 0 on success, or a negative error value on failure.
  456. */
  457. static int mext_check_adjust_range(struct inode *orig_inode,
  458. struct inode *donor_inode, __u64 orig_start,
  459. __u64 donor_start, __u64 *len)
  460. {
  461. __u64 orig_eof, donor_eof;
  462. /* Start offset should be same */
  463. if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
  464. (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
  465. ext4_debug("ext4 move extent: orig and donor's start offsets are not aligned [ino:orig %lu, donor %lu]\n",
  466. orig_inode->i_ino, donor_inode->i_ino);
  467. return -EINVAL;
  468. }
  469. if ((orig_start >= EXT_MAX_BLOCKS) ||
  470. (donor_start >= EXT_MAX_BLOCKS) ||
  471. (*len > EXT_MAX_BLOCKS) ||
  472. (donor_start + *len >= EXT_MAX_BLOCKS) ||
  473. (orig_start + *len >= EXT_MAX_BLOCKS)) {
  474. ext4_debug("ext4 move extent: Can't handle over [%u] blocks [ino:orig %lu, donor %lu]\n",
  475. EXT_MAX_BLOCKS,
  476. orig_inode->i_ino, donor_inode->i_ino);
  477. return -EINVAL;
  478. }
  479. orig_eof = EXT4_B_TO_LBLK(orig_inode, i_size_read(orig_inode));
  480. donor_eof = EXT4_B_TO_LBLK(donor_inode, i_size_read(donor_inode));
  481. if (orig_eof <= orig_start)
  482. *len = 0;
  483. else if (orig_eof < orig_start + *len - 1)
  484. *len = orig_eof - orig_start;
  485. if (donor_eof <= donor_start)
  486. *len = 0;
  487. else if (donor_eof < donor_start + *len - 1)
  488. *len = donor_eof - donor_start;
  489. if (!*len) {
  490. ext4_debug("ext4 move extent: len should not be 0 [ino:orig %lu, donor %lu]\n",
  491. orig_inode->i_ino, donor_inode->i_ino);
  492. return -EINVAL;
  493. }
  494. return 0;
  495. }
  496. /**
  497. * ext4_move_extents - Exchange the specified range of a file
  498. *
  499. * @o_filp: file structure of the original file
  500. * @d_filp: file structure of the donor file
  501. * @orig_blk: start offset in block for orig
  502. * @donor_blk: start offset in block for donor
  503. * @len: the number of blocks to be moved
  504. * @moved_len: moved block length
  505. *
  506. * This function returns 0 and moved block length is set in moved_len
  507. * if succeed, otherwise returns error value.
  508. */
  509. int ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
  510. __u64 donor_blk, __u64 len, __u64 *moved_len)
  511. {
  512. struct inode *orig_inode = file_inode(o_filp);
  513. struct inode *donor_inode = file_inode(d_filp);
  514. struct mext_data mext;
  515. struct super_block *sb = orig_inode->i_sb;
  516. struct ext4_sb_info *sbi = EXT4_SB(sb);
  517. int retries = 0;
  518. u64 m_len;
  519. int ret;
  520. *moved_len = 0;
  521. /* Protect orig and donor inodes against a truncate */
  522. lock_two_nondirectories(orig_inode, donor_inode);
  523. ret = mext_check_validity(orig_inode, donor_inode);
  524. if (ret)
  525. goto out;
  526. /* Wait for all existing dio workers */
  527. inode_dio_wait(orig_inode);
  528. inode_dio_wait(donor_inode);
  529. /* Check and adjust the specified move_extent range. */
  530. ret = mext_check_adjust_range(orig_inode, donor_inode, orig_blk,
  531. donor_blk, &len);
  532. if (ret)
  533. goto out;
  534. mext.orig_inode = orig_inode;
  535. mext.donor_inode = donor_inode;
  536. while (len) {
  537. mext.orig_map.m_lblk = orig_blk;
  538. mext.orig_map.m_len = len;
  539. mext.orig_map.m_flags = 0;
  540. mext.donor_lblk = donor_blk;
  541. ret = ext4_map_blocks(NULL, orig_inode, &mext.orig_map, 0);
  542. if (ret < 0)
  543. goto out;
  544. /* Skip moving if it is a hole or a delalloc extent. */
  545. if (mext.orig_map.m_flags &
  546. (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN)) {
  547. ret = mext_move_extent(&mext, &m_len);
  548. *moved_len += m_len;
  549. if (!ret)
  550. goto next;
  551. /* Move failed or partially failed. */
  552. if (m_len) {
  553. orig_blk += m_len;
  554. donor_blk += m_len;
  555. len -= m_len;
  556. }
  557. if (ret == -ESTALE)
  558. continue;
  559. if (ret == -ENOSPC &&
  560. ext4_should_retry_alloc(sb, &retries))
  561. continue;
  562. if (ret == -EBUSY &&
  563. sbi->s_journal && retries++ < 4 &&
  564. jbd2_journal_force_commit_nested(sbi->s_journal))
  565. continue;
  566. goto out;
  567. }
  568. next:
  569. orig_blk += mext.orig_map.m_len;
  570. donor_blk += mext.orig_map.m_len;
  571. len -= mext.orig_map.m_len;
  572. retries = 0;
  573. }
  574. out:
  575. if (*moved_len) {
  576. ext4_discard_preallocations(orig_inode);
  577. ext4_discard_preallocations(donor_inode);
  578. }
  579. unlock_two_nondirectories(orig_inode, donor_inode);
  580. return ret;
  581. }