recovery.c 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * This file is part of UBIFS.
  4. *
  5. * Copyright (C) 2006-2008 Nokia Corporation
  6. *
  7. * Authors: Adrian Hunter
  8. * Artem Bityutskiy (Битюцкий Артём)
  9. */
  10. /*
  11. * This file implements functions needed to recover from unclean un-mounts.
  12. * When UBIFS is mounted, it checks a flag on the master node to determine if
  13. * an un-mount was completed successfully. If not, the process of mounting
  14. * incorporates additional checking and fixing of on-flash data structures.
  15. * UBIFS always cleans away all remnants of an unclean un-mount, so that
  16. * errors do not accumulate. However UBIFS defers recovery if it is mounted
  17. * read-only, and the flash is not modified in that case.
  18. *
  19. * The general UBIFS approach to the recovery is that it recovers from
  20. * corruptions which could be caused by power cuts, but it refuses to recover
  21. * from corruption caused by other reasons. And UBIFS tries to distinguish
  22. * between these 2 reasons of corruptions and silently recover in the former
  23. * case and loudly complain in the latter case.
  24. *
  25. * UBIFS writes only to erased LEBs, so it writes only to the flash space
  26. * containing only 0xFFs. UBIFS also always writes strictly from the beginning
  27. * of the LEB to the end. And UBIFS assumes that the underlying flash media
  28. * writes in @c->max_write_size bytes at a time.
  29. *
  30. * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min.
  31. * I/O unit corresponding to offset X to contain corrupted data, all the
  32. * following min. I/O units have to contain empty space (all 0xFFs). If this is
  33. * not true, the corruption cannot be the result of a power cut, and UBIFS
  34. * refuses to mount.
  35. */
  36. #include <linux/crc32.h>
  37. #include <linux/slab.h>
  38. #include "ubifs.h"
  39. /**
  40. * is_empty - determine whether a buffer is empty (contains all 0xff).
  41. * @buf: buffer to clean
  42. * @len: length of buffer
  43. *
  44. * This function returns %1 if the buffer is empty (contains all 0xff) otherwise
  45. * %0 is returned.
  46. */
  47. static int is_empty(void *buf, int len)
  48. {
  49. uint8_t *p = buf;
  50. int i;
  51. for (i = 0; i < len; i++)
  52. if (*p++ != 0xff)
  53. return 0;
  54. return 1;
  55. }
  56. /**
  57. * first_non_ff - find offset of the first non-0xff byte.
  58. * @buf: buffer to search in
  59. * @len: length of buffer
  60. *
  61. * This function returns offset of the first non-0xff byte in @buf or %-1 if
  62. * the buffer contains only 0xff bytes.
  63. */
  64. static int first_non_ff(void *buf, int len)
  65. {
  66. uint8_t *p = buf;
  67. int i;
  68. for (i = 0; i < len; i++)
  69. if (*p++ != 0xff)
  70. return i;
  71. return -1;
  72. }
  73. /**
  74. * get_master_node - get the last valid master node allowing for corruption.
  75. * @c: UBIFS file-system description object
  76. * @lnum: LEB number
  77. * @pbuf: buffer containing the LEB read, is returned here
  78. * @mst: master node, if found, is returned here
  79. * @cor: corruption, if found, is returned here
  80. *
  81. * This function allocates a buffer, reads the LEB into it, and finds and
  82. * returns the last valid master node allowing for one area of corruption.
  83. * The corrupt area, if there is one, must be consistent with the assumption
  84. * that it is the result of an unclean unmount while the master node was being
  85. * written. Under those circumstances, it is valid to use the previously written
  86. * master node.
  87. *
  88. * This function returns %0 on success and a negative error code on failure.
  89. */
  90. static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
  91. struct ubifs_mst_node **mst, void **cor)
  92. {
  93. const int sz = c->mst_node_alsz;
  94. int err, offs, len;
  95. void *sbuf, *buf;
  96. sbuf = vmalloc(c->leb_size);
  97. if (!sbuf)
  98. return -ENOMEM;
  99. err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0);
  100. if (err && err != -EBADMSG)
  101. goto out_free;
  102. /* Find the first position that is definitely not a node */
  103. offs = 0;
  104. buf = sbuf;
  105. len = c->leb_size;
  106. while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) {
  107. struct ubifs_ch *ch = buf;
  108. if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
  109. break;
  110. offs += sz;
  111. buf += sz;
  112. len -= sz;
  113. }
  114. /* See if there was a valid master node before that */
  115. if (offs) {
  116. int ret;
  117. offs -= sz;
  118. buf -= sz;
  119. len += sz;
  120. ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
  121. if (ret != SCANNED_A_NODE && offs) {
  122. /* Could have been corruption so check one place back */
  123. offs -= sz;
  124. buf -= sz;
  125. len += sz;
  126. ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
  127. if (ret != SCANNED_A_NODE)
  128. /*
  129. * We accept only one area of corruption because
  130. * we are assuming that it was caused while
  131. * trying to write a master node.
  132. */
  133. goto out_err;
  134. }
  135. if (ret == SCANNED_A_NODE) {
  136. struct ubifs_ch *ch = buf;
  137. if (ch->node_type != UBIFS_MST_NODE)
  138. goto out_err;
  139. dbg_rcvry("found a master node at %d:%d", lnum, offs);
  140. *mst = buf;
  141. offs += sz;
  142. buf += sz;
  143. len -= sz;
  144. }
  145. }
  146. /* Check for corruption */
  147. if (offs < c->leb_size) {
  148. if (!is_empty(buf, min_t(int, len, sz))) {
  149. *cor = buf;
  150. dbg_rcvry("found corruption at %d:%d", lnum, offs);
  151. }
  152. offs += sz;
  153. buf += sz;
  154. len -= sz;
  155. }
  156. /* Check remaining empty space */
  157. if (offs < c->leb_size)
  158. if (!is_empty(buf, len))
  159. goto out_err;
  160. *pbuf = sbuf;
  161. return 0;
  162. out_err:
  163. err = -EINVAL;
  164. out_free:
  165. vfree(sbuf);
  166. *mst = NULL;
  167. *cor = NULL;
  168. return err;
  169. }
  170. /**
  171. * write_rcvrd_mst_node - write recovered master node.
  172. * @c: UBIFS file-system description object
  173. * @mst: master node
  174. *
  175. * This function returns %0 on success and a negative error code on failure.
  176. */
  177. static int write_rcvrd_mst_node(struct ubifs_info *c,
  178. struct ubifs_mst_node *mst)
  179. {
  180. int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
  181. __le32 save_flags;
  182. dbg_rcvry("recovery");
  183. save_flags = mst->flags;
  184. mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
  185. err = ubifs_prepare_node_hmac(c, mst, UBIFS_MST_NODE_SZ,
  186. offsetof(struct ubifs_mst_node, hmac), 1);
  187. if (err)
  188. goto out;
  189. err = ubifs_leb_change(c, lnum, mst, sz);
  190. if (err)
  191. goto out;
  192. err = ubifs_leb_change(c, lnum + 1, mst, sz);
  193. if (err)
  194. goto out;
  195. out:
  196. mst->flags = save_flags;
  197. return err;
  198. }
  199. /**
  200. * ubifs_recover_master_node - recover the master node.
  201. * @c: UBIFS file-system description object
  202. *
  203. * This function recovers the master node from corruption that may occur due to
  204. * an unclean unmount.
  205. *
  206. * This function returns %0 on success and a negative error code on failure.
  207. */
  208. int ubifs_recover_master_node(struct ubifs_info *c)
  209. {
  210. void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL;
  211. struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst;
  212. const int sz = c->mst_node_alsz;
  213. int err, offs1, offs2;
  214. dbg_rcvry("recovery");
  215. err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1);
  216. if (err)
  217. goto out_free;
  218. err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2);
  219. if (err)
  220. goto out_free;
  221. if (mst1) {
  222. offs1 = (void *)mst1 - buf1;
  223. if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) &&
  224. (offs1 == 0 && !cor1)) {
  225. /*
  226. * mst1 was written by recovery at offset 0 with no
  227. * corruption.
  228. */
  229. dbg_rcvry("recovery recovery");
  230. mst = mst1;
  231. } else if (mst2) {
  232. offs2 = (void *)mst2 - buf2;
  233. if (offs1 == offs2) {
  234. /* Same offset, so must be the same */
  235. if (ubifs_compare_master_node(c, mst1, mst2))
  236. goto out_err;
  237. mst = mst1;
  238. } else if (offs2 + sz == offs1) {
  239. /* 1st LEB was written, 2nd was not */
  240. if (cor1)
  241. goto out_err;
  242. mst = mst1;
  243. } else if (offs1 == 0 &&
  244. c->leb_size - offs2 - sz < sz) {
  245. /* 1st LEB was unmapped and written, 2nd not */
  246. if (cor1)
  247. goto out_err;
  248. mst = mst1;
  249. } else
  250. goto out_err;
  251. } else {
  252. /*
  253. * 2nd LEB was unmapped and about to be written, so
  254. * there must be only one master node in the first LEB
  255. * and no corruption.
  256. */
  257. if (offs1 != 0 || cor1)
  258. goto out_err;
  259. mst = mst1;
  260. }
  261. } else {
  262. if (!mst2)
  263. goto out_err;
  264. /*
  265. * 1st LEB was unmapped and about to be written, so there must
  266. * be no room left in 2nd LEB.
  267. */
  268. offs2 = (void *)mst2 - buf2;
  269. if (offs2 + sz + sz <= c->leb_size)
  270. goto out_err;
  271. mst = mst2;
  272. }
  273. ubifs_msg(c, "recovered master node from LEB %d",
  274. (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
  275. memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
  276. if (c->ro_mount) {
  277. /* Read-only mode. Keep a copy for switching to rw mode */
  278. c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
  279. if (!c->rcvrd_mst_node) {
  280. err = -ENOMEM;
  281. goto out_free;
  282. }
  283. memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
  284. /*
  285. * We had to recover the master node, which means there was an
  286. * unclean reboot. However, it is possible that the master node
  287. * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
  288. * E.g., consider the following chain of events:
  289. *
  290. * 1. UBIFS was cleanly unmounted, so the master node is clean
  291. * 2. UBIFS is being mounted R/W and starts changing the master
  292. * node in the first (%UBIFS_MST_LNUM). A power cut happens,
  293. * so this LEB ends up with some amount of garbage at the
  294. * end.
  295. * 3. UBIFS is being mounted R/O. We reach this place and
  296. * recover the master node from the second LEB
  297. * (%UBIFS_MST_LNUM + 1). But we cannot update the media
  298. * because we are being mounted R/O. We have to defer the
  299. * operation.
  300. * 4. However, this master node (@c->mst_node) is marked as
  301. * clean (since the step 1). And if we just return, the
  302. * mount code will be confused and won't recover the master
  303. * node when it is re-mounter R/W later.
  304. *
  305. * Thus, to force the recovery by marking the master node as
  306. * dirty.
  307. */
  308. c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
  309. } else {
  310. /* Write the recovered master node */
  311. c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
  312. err = write_rcvrd_mst_node(c, c->mst_node);
  313. if (err)
  314. goto out_free;
  315. }
  316. vfree(buf2);
  317. vfree(buf1);
  318. return 0;
  319. out_err:
  320. err = -EINVAL;
  321. out_free:
  322. ubifs_err(c, "failed to recover master node");
  323. if (mst1) {
  324. ubifs_err(c, "dumping first master node");
  325. ubifs_dump_node(c, mst1, c->leb_size - ((void *)mst1 - buf1));
  326. }
  327. if (mst2) {
  328. ubifs_err(c, "dumping second master node");
  329. ubifs_dump_node(c, mst2, c->leb_size - ((void *)mst2 - buf2));
  330. }
  331. vfree(buf2);
  332. vfree(buf1);
  333. return err;
  334. }
  335. /**
  336. * ubifs_write_rcvrd_mst_node - write the recovered master node.
  337. * @c: UBIFS file-system description object
  338. *
  339. * This function writes the master node that was recovered during mounting in
  340. * read-only mode and must now be written because we are remounting rw.
  341. *
  342. * This function returns %0 on success and a negative error code on failure.
  343. */
  344. int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
  345. {
  346. int err;
  347. if (!c->rcvrd_mst_node)
  348. return 0;
  349. c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
  350. c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
  351. err = write_rcvrd_mst_node(c, c->rcvrd_mst_node);
  352. if (err)
  353. return err;
  354. kfree(c->rcvrd_mst_node);
  355. c->rcvrd_mst_node = NULL;
  356. return 0;
  357. }
  358. /**
  359. * is_last_write - determine if an offset was in the last write to a LEB.
  360. * @c: UBIFS file-system description object
  361. * @buf: buffer to check
  362. * @offs: offset to check
  363. *
  364. * This function returns %1 if @offs was in the last write to the LEB whose data
  365. * is in @buf, otherwise %0 is returned. The determination is made by checking
  366. * for subsequent empty space starting from the next @c->max_write_size
  367. * boundary.
  368. */
  369. static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
  370. {
  371. int empty_offs, check_len;
  372. uint8_t *p;
  373. /*
  374. * Round up to the next @c->max_write_size boundary i.e. @offs is in
  375. * the last wbuf written. After that should be empty space.
  376. */
  377. empty_offs = ALIGN(offs + 1, c->max_write_size);
  378. check_len = c->leb_size - empty_offs;
  379. p = buf + empty_offs - offs;
  380. return is_empty(p, check_len);
  381. }
  382. /**
  383. * clean_buf - clean the data from an LEB sitting in a buffer.
  384. * @c: UBIFS file-system description object
  385. * @buf: buffer to clean
  386. * @lnum: LEB number to clean
  387. * @offs: offset from which to clean
  388. * @len: length of buffer
  389. *
  390. * This function pads up to the next min_io_size boundary (if there is one) and
  391. * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
  392. * @c->min_io_size boundary.
  393. */
  394. static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
  395. int *offs, int *len)
  396. {
  397. int empty_offs, pad_len;
  398. dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
  399. ubifs_assert(c, !(*offs & 7));
  400. empty_offs = ALIGN(*offs, c->min_io_size);
  401. pad_len = empty_offs - *offs;
  402. ubifs_pad(c, *buf, pad_len);
  403. *offs += pad_len;
  404. *buf += pad_len;
  405. *len -= pad_len;
  406. memset(*buf, 0xff, c->leb_size - empty_offs);
  407. }
  408. /**
  409. * no_more_nodes - determine if there are no more nodes in a buffer.
  410. * @c: UBIFS file-system description object
  411. * @buf: buffer to check
  412. * @len: length of buffer
  413. * @lnum: LEB number of the LEB from which @buf was read
  414. * @offs: offset from which @buf was read
  415. *
  416. * This function ensures that the corrupted node at @offs is the last thing
  417. * written to a LEB. This function returns %1 if more data is not found and
  418. * %0 if more data is found.
  419. */
  420. static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
  421. int lnum, int offs)
  422. {
  423. struct ubifs_ch *ch = buf;
  424. int skip, dlen = le32_to_cpu(ch->len);
  425. /* Check for empty space after the corrupt node's common header */
  426. skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs;
  427. if (is_empty(buf + skip, len - skip))
  428. return 1;
  429. /*
  430. * The area after the common header size is not empty, so the common
  431. * header must be intact. Check it.
  432. */
  433. if (ubifs_check_node(c, buf, len, lnum, offs, 1, 0) != -EUCLEAN) {
  434. dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
  435. return 0;
  436. }
  437. /* Now we know the corrupt node's length we can skip over it */
  438. skip = ALIGN(offs + dlen, c->max_write_size) - offs;
  439. /* After which there should be empty space */
  440. if (is_empty(buf + skip, len - skip))
  441. return 1;
  442. dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip);
  443. return 0;
  444. }
  445. /**
  446. * fix_unclean_leb - fix an unclean LEB.
  447. * @c: UBIFS file-system description object
  448. * @sleb: scanned LEB information
  449. * @start: offset where scan started
  450. */
  451. static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
  452. int start)
  453. {
  454. int lnum = sleb->lnum, endpt = start;
  455. /* Get the end offset of the last node we are keeping */
  456. if (!list_empty(&sleb->nodes)) {
  457. struct ubifs_scan_node *snod;
  458. snod = list_entry(sleb->nodes.prev,
  459. struct ubifs_scan_node, list);
  460. endpt = snod->offs + snod->len;
  461. }
  462. if (c->ro_mount && !c->remounting_rw) {
  463. /* Add to recovery list */
  464. struct ubifs_unclean_leb *ucleb;
  465. dbg_rcvry("need to fix LEB %d start %d endpt %d",
  466. lnum, start, sleb->endpt);
  467. ucleb = kzalloc_obj(struct ubifs_unclean_leb, GFP_NOFS);
  468. if (!ucleb)
  469. return -ENOMEM;
  470. ucleb->lnum = lnum;
  471. ucleb->endpt = endpt;
  472. list_add_tail(&ucleb->list, &c->unclean_leb_list);
  473. } else {
  474. /* Write the fixed LEB back to flash */
  475. int err;
  476. dbg_rcvry("fixing LEB %d start %d endpt %d",
  477. lnum, start, sleb->endpt);
  478. if (endpt == 0) {
  479. err = ubifs_leb_unmap(c, lnum);
  480. if (err)
  481. return err;
  482. } else {
  483. int len = ALIGN(endpt, c->min_io_size);
  484. if (start) {
  485. err = ubifs_leb_read(c, lnum, sleb->buf, 0,
  486. start, 1);
  487. if (err)
  488. return err;
  489. }
  490. /* Pad to min_io_size */
  491. if (len > endpt) {
  492. int pad_len = len - ALIGN(endpt, 8);
  493. if (pad_len > 0) {
  494. void *buf = sleb->buf + len - pad_len;
  495. ubifs_pad(c, buf, pad_len);
  496. }
  497. }
  498. err = ubifs_leb_change(c, lnum, sleb->buf, len);
  499. if (err)
  500. return err;
  501. }
  502. }
  503. return 0;
  504. }
  505. /**
  506. * drop_last_group - drop the last group of nodes.
  507. * @sleb: scanned LEB information
  508. * @offs: offset of dropped nodes is returned here
  509. *
  510. * This is a helper function for 'ubifs_recover_leb()' which drops the last
  511. * group of nodes of the scanned LEB.
  512. */
  513. static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
  514. {
  515. while (!list_empty(&sleb->nodes)) {
  516. struct ubifs_scan_node *snod;
  517. struct ubifs_ch *ch;
  518. snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
  519. list);
  520. ch = snod->node;
  521. if (ch->group_type != UBIFS_IN_NODE_GROUP)
  522. break;
  523. dbg_rcvry("dropping grouped node at %d:%d",
  524. sleb->lnum, snod->offs);
  525. *offs = snod->offs;
  526. list_del(&snod->list);
  527. kfree(snod);
  528. sleb->nodes_cnt -= 1;
  529. }
  530. }
  531. /**
  532. * drop_last_node - drop the last node.
  533. * @sleb: scanned LEB information
  534. * @offs: offset of dropped nodes is returned here
  535. *
  536. * This is a helper function for 'ubifs_recover_leb()' which drops the last
  537. * node of the scanned LEB.
  538. */
  539. static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
  540. {
  541. struct ubifs_scan_node *snod;
  542. if (!list_empty(&sleb->nodes)) {
  543. snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
  544. list);
  545. dbg_rcvry("dropping last node at %d:%d",
  546. sleb->lnum, snod->offs);
  547. *offs = snod->offs;
  548. list_del(&snod->list);
  549. kfree(snod);
  550. sleb->nodes_cnt -= 1;
  551. }
  552. }
  553. /**
  554. * ubifs_recover_leb - scan and recover a LEB.
  555. * @c: UBIFS file-system description object
  556. * @lnum: LEB number
  557. * @offs: offset
  558. * @sbuf: LEB-sized buffer to use
  559. * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not
  560. * belong to any journal head)
  561. *
  562. * This function does a scan of a LEB, but caters for errors that might have
  563. * been caused by the unclean unmount from which we are attempting to recover.
  564. * Returns the scanned information on success and a negative error code on
  565. * failure.
  566. */
  567. struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
  568. int offs, void *sbuf, int jhead)
  569. {
  570. int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
  571. int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
  572. struct ubifs_scan_leb *sleb;
  573. void *buf = sbuf + offs;
  574. dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
  575. sleb = ubifs_start_scan(c, lnum, offs, sbuf);
  576. if (IS_ERR(sleb))
  577. return sleb;
  578. ubifs_assert(c, len >= 8);
  579. while (len >= 8) {
  580. dbg_scan("look at LEB %d:%d (%d bytes left)",
  581. lnum, offs, len);
  582. cond_resched();
  583. /*
  584. * Scan quietly until there is an error from which we cannot
  585. * recover
  586. */
  587. ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
  588. if (ret == SCANNED_A_NODE) {
  589. /* A valid node, and not a padding node */
  590. struct ubifs_ch *ch = buf;
  591. int node_len;
  592. err = ubifs_add_snod(c, sleb, buf, offs);
  593. if (err)
  594. goto error;
  595. node_len = ALIGN(le32_to_cpu(ch->len), 8);
  596. offs += node_len;
  597. buf += node_len;
  598. len -= node_len;
  599. } else if (ret > 0) {
  600. /* Padding bytes or a valid padding node */
  601. offs += ret;
  602. buf += ret;
  603. len -= ret;
  604. } else if (ret == SCANNED_EMPTY_SPACE ||
  605. ret == SCANNED_GARBAGE ||
  606. ret == SCANNED_A_BAD_PAD_NODE ||
  607. ret == SCANNED_A_CORRUPT_NODE) {
  608. dbg_rcvry("found corruption (%d) at %d:%d",
  609. ret, lnum, offs);
  610. break;
  611. } else {
  612. ubifs_err(c, "unexpected return value %d", ret);
  613. err = -EINVAL;
  614. goto error;
  615. }
  616. }
  617. if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
  618. if (!is_last_write(c, buf, offs))
  619. goto corrupted_rescan;
  620. } else if (ret == SCANNED_A_CORRUPT_NODE) {
  621. if (!no_more_nodes(c, buf, len, lnum, offs))
  622. goto corrupted_rescan;
  623. } else if (!is_empty(buf, len)) {
  624. if (!is_last_write(c, buf, offs)) {
  625. int corruption = first_non_ff(buf, len);
  626. /*
  627. * See header comment for this file for more
  628. * explanations about the reasons we have this check.
  629. */
  630. ubifs_err(c, "corrupt empty space LEB %d:%d, corruption starts at %d",
  631. lnum, offs, corruption);
  632. /* Make sure we dump interesting non-0xFF data */
  633. offs += corruption;
  634. buf += corruption;
  635. goto corrupted;
  636. }
  637. }
  638. min_io_unit = round_down(offs, c->min_io_size);
  639. if (grouped)
  640. /*
  641. * If nodes are grouped, always drop the incomplete group at
  642. * the end.
  643. */
  644. drop_last_group(sleb, &offs);
  645. if (jhead == GCHD) {
  646. /*
  647. * If this LEB belongs to the GC head then while we are in the
  648. * middle of the same min. I/O unit keep dropping nodes. So
  649. * basically, what we want is to make sure that the last min.
  650. * I/O unit where we saw the corruption is dropped completely
  651. * with all the uncorrupted nodes which may possibly sit there.
  652. *
  653. * In other words, let's name the min. I/O unit where the
  654. * corruption starts B, and the previous min. I/O unit A. The
  655. * below code tries to deal with a situation when half of B
  656. * contains valid nodes or the end of a valid node, and the
  657. * second half of B contains corrupted data or garbage. This
  658. * means that UBIFS had been writing to B just before the power
  659. * cut happened. I do not know how realistic is this scenario
  660. * that half of the min. I/O unit had been written successfully
  661. * and the other half not, but this is possible in our 'failure
  662. * mode emulation' infrastructure at least.
  663. *
  664. * So what is the problem, why we need to drop those nodes? Why
  665. * can't we just clean-up the second half of B by putting a
  666. * padding node there? We can, and this works fine with one
  667. * exception which was reproduced with power cut emulation
  668. * testing and happens extremely rarely.
  669. *
  670. * Imagine the file-system is full, we run GC which starts
  671. * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
  672. * the current GC head LEB). The @c->gc_lnum is -1, which means
  673. * that GC will retain LEB X and will try to continue. Imagine
  674. * that LEB X is currently the dirtiest LEB, and the amount of
  675. * used space in LEB Y is exactly the same as amount of free
  676. * space in LEB X.
  677. *
  678. * And a power cut happens when nodes are moved from LEB X to
  679. * LEB Y. We are here trying to recover LEB Y which is the GC
  680. * head LEB. We find the min. I/O unit B as described above.
  681. * Then we clean-up LEB Y by padding min. I/O unit. And later
  682. * 'ubifs_rcvry_gc_commit()' function fails, because it cannot
  683. * find a dirty LEB which could be GC'd into LEB Y! Even LEB X
  684. * does not match because the amount of valid nodes there does
  685. * not fit the free space in LEB Y any more! And this is
  686. * because of the padding node which we added to LEB Y. The
  687. * user-visible effect of this which I once observed and
  688. * analysed is that we cannot mount the file-system with
  689. * -ENOSPC error.
  690. *
  691. * So obviously, to make sure that situation does not happen we
  692. * should free min. I/O unit B in LEB Y completely and the last
  693. * used min. I/O unit in LEB Y should be A. This is basically
  694. * what the below code tries to do.
  695. */
  696. while (offs > min_io_unit)
  697. drop_last_node(sleb, &offs);
  698. }
  699. buf = sbuf + offs;
  700. len = c->leb_size - offs;
  701. clean_buf(c, &buf, lnum, &offs, &len);
  702. ubifs_end_scan(c, sleb, lnum, offs);
  703. err = fix_unclean_leb(c, sleb, start);
  704. if (err)
  705. goto error;
  706. return sleb;
  707. corrupted_rescan:
  708. /* Re-scan the corrupted data with verbose messages */
  709. ubifs_err(c, "corruption %d", ret);
  710. ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
  711. corrupted:
  712. ubifs_scanned_corruption(c, lnum, offs, buf);
  713. err = -EUCLEAN;
  714. error:
  715. ubifs_err(c, "LEB %d scanning failed", lnum);
  716. ubifs_scan_destroy(sleb);
  717. return ERR_PTR(err);
  718. }
  719. /**
  720. * get_cs_sqnum - get commit start sequence number.
  721. * @c: UBIFS file-system description object
  722. * @lnum: LEB number of commit start node
  723. * @offs: offset of commit start node
  724. * @cs_sqnum: commit start sequence number is returned here
  725. *
  726. * This function returns %0 on success and a negative error code on failure.
  727. */
  728. static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
  729. unsigned long long *cs_sqnum)
  730. {
  731. struct ubifs_cs_node *cs_node = NULL;
  732. int err, ret;
  733. dbg_rcvry("at %d:%d", lnum, offs);
  734. cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL);
  735. if (!cs_node)
  736. return -ENOMEM;
  737. if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
  738. goto out_err;
  739. err = ubifs_leb_read(c, lnum, (void *)cs_node, offs,
  740. UBIFS_CS_NODE_SZ, 0);
  741. if (err && err != -EBADMSG)
  742. goto out_free;
  743. ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
  744. if (ret != SCANNED_A_NODE) {
  745. ubifs_err(c, "Not a valid node");
  746. goto out_err;
  747. }
  748. if (cs_node->ch.node_type != UBIFS_CS_NODE) {
  749. ubifs_err(c, "Not a CS node, type is %d", cs_node->ch.node_type);
  750. goto out_err;
  751. }
  752. if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
  753. ubifs_err(c, "CS node cmt_no %llu != current cmt_no %llu",
  754. (unsigned long long)le64_to_cpu(cs_node->cmt_no),
  755. c->cmt_no);
  756. goto out_err;
  757. }
  758. *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum);
  759. dbg_rcvry("commit start sqnum %llu", *cs_sqnum);
  760. kfree(cs_node);
  761. return 0;
  762. out_err:
  763. err = -EINVAL;
  764. out_free:
  765. ubifs_err(c, "failed to get CS sqnum");
  766. kfree(cs_node);
  767. return err;
  768. }
  769. /**
  770. * ubifs_recover_log_leb - scan and recover a log LEB.
  771. * @c: UBIFS file-system description object
  772. * @lnum: LEB number
  773. * @offs: offset
  774. * @sbuf: LEB-sized buffer to use
  775. *
  776. * This function does a scan of a LEB, but caters for errors that might have
  777. * been caused by unclean reboots from which we are attempting to recover
  778. * (assume that only the last log LEB can be corrupted by an unclean reboot).
  779. *
  780. * This function returns %0 on success and a negative error code on failure.
  781. */
  782. struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
  783. int offs, void *sbuf)
  784. {
  785. struct ubifs_scan_leb *sleb;
  786. int next_lnum;
  787. dbg_rcvry("LEB %d", lnum);
  788. next_lnum = lnum + 1;
  789. if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs)
  790. next_lnum = UBIFS_LOG_LNUM;
  791. if (next_lnum != c->ltail_lnum) {
  792. /*
  793. * We can only recover at the end of the log, so check that the
  794. * next log LEB is empty or out of date.
  795. */
  796. sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0);
  797. if (IS_ERR(sleb))
  798. return sleb;
  799. if (sleb->nodes_cnt) {
  800. struct ubifs_scan_node *snod;
  801. unsigned long long cs_sqnum = c->cs_sqnum;
  802. snod = list_entry(sleb->nodes.next,
  803. struct ubifs_scan_node, list);
  804. if (cs_sqnum == 0) {
  805. int err;
  806. err = get_cs_sqnum(c, lnum, offs, &cs_sqnum);
  807. if (err) {
  808. ubifs_scan_destroy(sleb);
  809. return ERR_PTR(err);
  810. }
  811. }
  812. if (snod->sqnum > cs_sqnum) {
  813. ubifs_err(c, "unrecoverable log corruption in LEB %d",
  814. lnum);
  815. ubifs_scan_destroy(sleb);
  816. return ERR_PTR(-EUCLEAN);
  817. }
  818. }
  819. ubifs_scan_destroy(sleb);
  820. }
  821. return ubifs_recover_leb(c, lnum, offs, sbuf, -1);
  822. }
  823. /**
  824. * recover_head - recover a head.
  825. * @c: UBIFS file-system description object
  826. * @lnum: LEB number of head to recover
  827. * @offs: offset of head to recover
  828. * @sbuf: LEB-sized buffer to use
  829. *
  830. * This function ensures that there is no data on the flash at a head location.
  831. *
  832. * This function returns %0 on success and a negative error code on failure.
  833. */
  834. static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf)
  835. {
  836. int len = c->max_write_size, err;
  837. if (offs + len > c->leb_size)
  838. len = c->leb_size - offs;
  839. if (!len)
  840. return 0;
  841. /* Read at the head location and check it is empty flash */
  842. err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1);
  843. if (err || !is_empty(sbuf, len)) {
  844. dbg_rcvry("cleaning head at %d:%d", lnum, offs);
  845. if (offs == 0)
  846. return ubifs_leb_unmap(c, lnum);
  847. err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1);
  848. if (err)
  849. return err;
  850. return ubifs_leb_change(c, lnum, sbuf, offs);
  851. }
  852. return 0;
  853. }
  854. /**
  855. * ubifs_recover_inl_heads - recover index and LPT heads.
  856. * @c: UBIFS file-system description object
  857. * @sbuf: LEB-sized buffer to use
  858. *
  859. * This function ensures that there is no data on the flash at the index and
  860. * LPT head locations.
  861. *
  862. * This deals with the recovery of a half-completed journal commit. UBIFS is
  863. * careful never to overwrite the last version of the index or the LPT. Because
  864. * the index and LPT are wandering trees, data from a half-completed commit will
  865. * not be referenced anywhere in UBIFS. The data will be either in LEBs that are
  866. * assumed to be empty and will be unmapped anyway before use, or in the index
  867. * and LPT heads.
  868. *
  869. * This function returns %0 on success and a negative error code on failure.
  870. */
  871. int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
  872. {
  873. int err;
  874. ubifs_assert(c, !c->ro_mount || c->remounting_rw);
  875. dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
  876. err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
  877. if (err)
  878. return err;
  879. dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
  880. return recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
  881. }
  882. /**
  883. * clean_an_unclean_leb - read and write a LEB to remove corruption.
  884. * @c: UBIFS file-system description object
  885. * @ucleb: unclean LEB information
  886. * @sbuf: LEB-sized buffer to use
  887. *
  888. * This function reads a LEB up to a point pre-determined by the mount recovery,
  889. * checks the nodes, and writes the result back to the flash, thereby cleaning
  890. * off any following corruption, or non-fatal ECC errors.
  891. *
  892. * This function returns %0 on success and a negative error code on failure.
  893. */
  894. static int clean_an_unclean_leb(struct ubifs_info *c,
  895. struct ubifs_unclean_leb *ucleb, void *sbuf)
  896. {
  897. int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
  898. void *buf = sbuf;
  899. dbg_rcvry("LEB %d len %d", lnum, len);
  900. if (len == 0) {
  901. /* Nothing to read, just unmap it */
  902. return ubifs_leb_unmap(c, lnum);
  903. }
  904. err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
  905. if (err && err != -EBADMSG)
  906. return err;
  907. while (len >= 8) {
  908. int ret;
  909. cond_resched();
  910. /* Scan quietly until there is an error */
  911. ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
  912. if (ret == SCANNED_A_NODE) {
  913. /* A valid node, and not a padding node */
  914. struct ubifs_ch *ch = buf;
  915. int node_len;
  916. node_len = ALIGN(le32_to_cpu(ch->len), 8);
  917. offs += node_len;
  918. buf += node_len;
  919. len -= node_len;
  920. continue;
  921. }
  922. if (ret > 0) {
  923. /* Padding bytes or a valid padding node */
  924. offs += ret;
  925. buf += ret;
  926. len -= ret;
  927. continue;
  928. }
  929. if (ret == SCANNED_EMPTY_SPACE) {
  930. ubifs_err(c, "unexpected empty space at %d:%d",
  931. lnum, offs);
  932. return -EUCLEAN;
  933. }
  934. if (quiet) {
  935. /* Redo the last scan but noisily */
  936. quiet = 0;
  937. continue;
  938. }
  939. ubifs_scanned_corruption(c, lnum, offs, buf);
  940. return -EUCLEAN;
  941. }
  942. /* Pad to min_io_size */
  943. len = ALIGN(ucleb->endpt, c->min_io_size);
  944. if (len > ucleb->endpt) {
  945. int pad_len = len - ALIGN(ucleb->endpt, 8);
  946. if (pad_len > 0) {
  947. buf = c->sbuf + len - pad_len;
  948. ubifs_pad(c, buf, pad_len);
  949. }
  950. }
  951. /* Write back the LEB atomically */
  952. err = ubifs_leb_change(c, lnum, sbuf, len);
  953. if (err)
  954. return err;
  955. dbg_rcvry("cleaned LEB %d", lnum);
  956. return 0;
  957. }
  958. /**
  959. * ubifs_clean_lebs - clean LEBs recovered during read-only mount.
  960. * @c: UBIFS file-system description object
  961. * @sbuf: LEB-sized buffer to use
  962. *
  963. * This function cleans a LEB identified during recovery that needs to be
  964. * written but was not because UBIFS was mounted read-only. This happens when
  965. * remounting to read-write mode.
  966. *
  967. * This function returns %0 on success and a negative error code on failure.
  968. */
  969. int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf)
  970. {
  971. dbg_rcvry("recovery");
  972. while (!list_empty(&c->unclean_leb_list)) {
  973. struct ubifs_unclean_leb *ucleb;
  974. int err;
  975. ucleb = list_entry(c->unclean_leb_list.next,
  976. struct ubifs_unclean_leb, list);
  977. err = clean_an_unclean_leb(c, ucleb, sbuf);
  978. if (err)
  979. return err;
  980. list_del(&ucleb->list);
  981. kfree(ucleb);
  982. }
  983. return 0;
  984. }
  985. /**
  986. * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
  987. * @c: UBIFS file-system description object
  988. *
  989. * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
  990. * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
  991. * zero in case of success and a negative error code in case of failure.
  992. */
  993. static int grab_empty_leb(struct ubifs_info *c)
  994. {
  995. int lnum, err;
  996. /*
  997. * Note, it is very important to first search for an empty LEB and then
  998. * run the commit, not vice-versa. The reason is that there might be
  999. * only one empty LEB at the moment, the one which has been the
  1000. * @c->gc_lnum just before the power cut happened. During the regular
  1001. * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
  1002. * one but GC can grab it. But at this moment this single empty LEB is
  1003. * not marked as taken, so if we run commit - what happens? Right, the
  1004. * commit will grab it and write the index there. Remember that the
  1005. * index always expands as long as there is free space, and it only
  1006. * starts consolidating when we run out of space.
  1007. *
  1008. * IOW, if we run commit now, we might not be able to find a free LEB
  1009. * after this.
  1010. */
  1011. lnum = ubifs_find_free_leb_for_idx(c);
  1012. if (lnum < 0) {
  1013. ubifs_err(c, "could not find an empty LEB");
  1014. ubifs_dump_lprops(c);
  1015. ubifs_dump_budg(c, &c->bi);
  1016. return lnum;
  1017. }
  1018. /* Reset the index flag */
  1019. err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
  1020. LPROPS_INDEX, 0);
  1021. if (err)
  1022. return err;
  1023. c->gc_lnum = lnum;
  1024. dbg_rcvry("found empty LEB %d, run commit", lnum);
  1025. return ubifs_run_commit(c);
  1026. }
  1027. /**
  1028. * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
  1029. * @c: UBIFS file-system description object
  1030. *
  1031. * Out-of-place garbage collection requires always one empty LEB with which to
  1032. * start garbage collection. The LEB number is recorded in c->gc_lnum and is
  1033. * written to the master node on unmounting. In the case of an unclean unmount
  1034. * the value of gc_lnum recorded in the master node is out of date and cannot
  1035. * be used. Instead, recovery must allocate an empty LEB for this purpose.
  1036. * However, there may not be enough empty space, in which case it must be
  1037. * possible to GC the dirtiest LEB into the GC head LEB.
  1038. *
  1039. * This function also runs the commit which causes the TNC updates from
  1040. * size-recovery and orphans to be written to the flash. That is important to
  1041. * ensure correct replay order for subsequent mounts.
  1042. *
  1043. * This function returns %0 on success and a negative error code on failure.
  1044. */
  1045. int ubifs_rcvry_gc_commit(struct ubifs_info *c)
  1046. {
  1047. struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
  1048. struct ubifs_lprops lp;
  1049. int err;
  1050. dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
  1051. c->gc_lnum = -1;
  1052. if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
  1053. return grab_empty_leb(c);
  1054. err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
  1055. if (err) {
  1056. if (err != -ENOSPC)
  1057. return err;
  1058. dbg_rcvry("could not find a dirty LEB");
  1059. return grab_empty_leb(c);
  1060. }
  1061. ubifs_assert(c, !(lp.flags & LPROPS_INDEX));
  1062. ubifs_assert(c, lp.free + lp.dirty >= wbuf->offs);
  1063. /*
  1064. * We run the commit before garbage collection otherwise subsequent
  1065. * mounts will see the GC and orphan deletion in a different order.
  1066. */
  1067. dbg_rcvry("committing");
  1068. err = ubifs_run_commit(c);
  1069. if (err)
  1070. return err;
  1071. dbg_rcvry("GC'ing LEB %d", lp.lnum);
  1072. mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
  1073. err = ubifs_garbage_collect_leb(c, &lp);
  1074. if (err >= 0) {
  1075. int err2 = ubifs_wbuf_sync_nolock(wbuf);
  1076. if (err2)
  1077. err = err2;
  1078. }
  1079. mutex_unlock(&wbuf->io_mutex);
  1080. if (err < 0) {
  1081. ubifs_err(c, "GC failed, error %d", err);
  1082. if (err == -EAGAIN)
  1083. err = -EINVAL;
  1084. return err;
  1085. }
  1086. ubifs_assert(c, err == LEB_RETAINED);
  1087. if (err != LEB_RETAINED)
  1088. return -EINVAL;
  1089. err = ubifs_leb_unmap(c, c->gc_lnum);
  1090. if (err)
  1091. return err;
  1092. dbg_rcvry("allocated LEB %d for GC", lp.lnum);
  1093. return 0;
  1094. }
  1095. /**
  1096. * struct size_entry - inode size information for recovery.
  1097. * @rb: link in the RB-tree of sizes
  1098. * @inum: inode number
  1099. * @i_size: size on inode
  1100. * @d_size: maximum size based on data nodes
  1101. * @exists: indicates whether the inode exists
  1102. * @inode: inode if pinned in memory awaiting rw mode to fix it
  1103. */
  1104. struct size_entry {
  1105. struct rb_node rb;
  1106. ino_t inum;
  1107. loff_t i_size;
  1108. loff_t d_size;
  1109. int exists;
  1110. struct inode *inode;
  1111. };
  1112. /**
  1113. * add_ino - add an entry to the size tree.
  1114. * @c: UBIFS file-system description object
  1115. * @inum: inode number
  1116. * @i_size: size on inode
  1117. * @d_size: maximum size based on data nodes
  1118. * @exists: indicates whether the inode exists
  1119. */
  1120. static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size,
  1121. loff_t d_size, int exists)
  1122. {
  1123. struct rb_node **p = &c->size_tree.rb_node, *parent = NULL;
  1124. struct size_entry *e;
  1125. while (*p) {
  1126. parent = *p;
  1127. e = rb_entry(parent, struct size_entry, rb);
  1128. if (inum < e->inum)
  1129. p = &(*p)->rb_left;
  1130. else
  1131. p = &(*p)->rb_right;
  1132. }
  1133. e = kzalloc_obj(struct size_entry);
  1134. if (!e)
  1135. return -ENOMEM;
  1136. e->inum = inum;
  1137. e->i_size = i_size;
  1138. e->d_size = d_size;
  1139. e->exists = exists;
  1140. rb_link_node(&e->rb, parent, p);
  1141. rb_insert_color(&e->rb, &c->size_tree);
  1142. return 0;
  1143. }
  1144. /**
  1145. * find_ino - find an entry on the size tree.
  1146. * @c: UBIFS file-system description object
  1147. * @inum: inode number
  1148. */
  1149. static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum)
  1150. {
  1151. struct rb_node *p = c->size_tree.rb_node;
  1152. struct size_entry *e;
  1153. while (p) {
  1154. e = rb_entry(p, struct size_entry, rb);
  1155. if (inum < e->inum)
  1156. p = p->rb_left;
  1157. else if (inum > e->inum)
  1158. p = p->rb_right;
  1159. else
  1160. return e;
  1161. }
  1162. return NULL;
  1163. }
  1164. /**
  1165. * remove_ino - remove an entry from the size tree.
  1166. * @c: UBIFS file-system description object
  1167. * @inum: inode number
  1168. */
  1169. static void remove_ino(struct ubifs_info *c, ino_t inum)
  1170. {
  1171. struct size_entry *e = find_ino(c, inum);
  1172. if (!e)
  1173. return;
  1174. rb_erase(&e->rb, &c->size_tree);
  1175. kfree(e);
  1176. }
  1177. /**
  1178. * ubifs_destroy_size_tree - free resources related to the size tree.
  1179. * @c: UBIFS file-system description object
  1180. */
  1181. void ubifs_destroy_size_tree(struct ubifs_info *c)
  1182. {
  1183. struct size_entry *e, *n;
  1184. rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb) {
  1185. iput(e->inode);
  1186. kfree(e);
  1187. }
  1188. c->size_tree = RB_ROOT;
  1189. }
  1190. /**
  1191. * ubifs_recover_size_accum - accumulate inode sizes for recovery.
  1192. * @c: UBIFS file-system description object
  1193. * @key: node key
  1194. * @deletion: node is for a deletion
  1195. * @new_size: inode size
  1196. *
  1197. * This function has two purposes:
  1198. * 1) to ensure there are no data nodes that fall outside the inode size
  1199. * 2) to ensure there are no data nodes for inodes that do not exist
  1200. * To accomplish those purposes, a rb-tree is constructed containing an entry
  1201. * for each inode number in the journal that has not been deleted, and recording
  1202. * the size from the inode node, the maximum size of any data node (also altered
  1203. * by truncations) and a flag indicating a inode number for which no inode node
  1204. * was present in the journal.
  1205. *
  1206. * Note that there is still the possibility that there are data nodes that have
  1207. * been committed that are beyond the inode size, however the only way to find
  1208. * them would be to scan the entire index. Alternatively, some provision could
  1209. * be made to record the size of inodes at the start of commit, which would seem
  1210. * very cumbersome for a scenario that is quite unlikely and the only negative
  1211. * consequence of which is wasted space.
  1212. *
  1213. * This functions returns %0 on success and a negative error code on failure.
  1214. */
  1215. int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
  1216. int deletion, loff_t new_size)
  1217. {
  1218. ino_t inum = key_inum(c, key);
  1219. struct size_entry *e;
  1220. int err;
  1221. switch (key_type(c, key)) {
  1222. case UBIFS_INO_KEY:
  1223. if (deletion)
  1224. remove_ino(c, inum);
  1225. else {
  1226. e = find_ino(c, inum);
  1227. if (e) {
  1228. e->i_size = new_size;
  1229. e->exists = 1;
  1230. } else {
  1231. err = add_ino(c, inum, new_size, 0, 1);
  1232. if (err)
  1233. return err;
  1234. }
  1235. }
  1236. break;
  1237. case UBIFS_DATA_KEY:
  1238. e = find_ino(c, inum);
  1239. if (e) {
  1240. if (new_size > e->d_size)
  1241. e->d_size = new_size;
  1242. } else {
  1243. err = add_ino(c, inum, 0, new_size, 0);
  1244. if (err)
  1245. return err;
  1246. }
  1247. break;
  1248. case UBIFS_TRUN_KEY:
  1249. e = find_ino(c, inum);
  1250. if (e)
  1251. e->d_size = new_size;
  1252. break;
  1253. }
  1254. return 0;
  1255. }
  1256. /**
  1257. * fix_size_in_place - fix inode size in place on flash.
  1258. * @c: UBIFS file-system description object
  1259. * @e: inode size information for recovery
  1260. */
  1261. static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
  1262. {
  1263. struct ubifs_ino_node *ino = c->sbuf;
  1264. unsigned char *p;
  1265. union ubifs_key key;
  1266. int err, lnum, offs, len;
  1267. loff_t i_size;
  1268. /* Locate the inode node LEB number and offset */
  1269. ino_key_init(c, &key, e->inum);
  1270. err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs);
  1271. if (err)
  1272. goto out;
  1273. /*
  1274. * If the size recorded on the inode node is greater than the size that
  1275. * was calculated from nodes in the journal then don't change the inode.
  1276. */
  1277. i_size = le64_to_cpu(ino->size);
  1278. if (i_size >= e->d_size)
  1279. return 0;
  1280. /* Read the LEB */
  1281. err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1);
  1282. if (err)
  1283. goto out;
  1284. /* Change the size field and recalculate the CRC */
  1285. ino = c->sbuf + offs;
  1286. ino->size = cpu_to_le64(e->d_size);
  1287. len = le32_to_cpu(ino->ch.len);
  1288. ubifs_crc_node((void *)ino, len);
  1289. /* Work out where data in the LEB ends and free space begins */
  1290. p = c->sbuf;
  1291. len = c->leb_size - 1;
  1292. while (p[len] == 0xff)
  1293. len -= 1;
  1294. len = ALIGN(len + 1, c->min_io_size);
  1295. /* Atomically write the fixed LEB back again */
  1296. err = ubifs_leb_change(c, lnum, c->sbuf, len);
  1297. if (err)
  1298. goto out;
  1299. dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
  1300. (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
  1301. return 0;
  1302. out:
  1303. ubifs_warn(c, "inode %lu failed to fix size %lld -> %lld error %d",
  1304. (unsigned long)e->inum, e->i_size, e->d_size, err);
  1305. return err;
  1306. }
  1307. /**
  1308. * inode_fix_size - fix inode size
  1309. * @c: UBIFS file-system description object
  1310. * @e: inode size information for recovery
  1311. */
  1312. static int inode_fix_size(struct ubifs_info *c, struct size_entry *e)
  1313. {
  1314. struct inode *inode;
  1315. struct ubifs_inode *ui;
  1316. int err;
  1317. if (c->ro_mount)
  1318. ubifs_assert(c, !e->inode);
  1319. if (e->inode) {
  1320. /* Remounting rw, pick up inode we stored earlier */
  1321. inode = e->inode;
  1322. } else {
  1323. inode = ubifs_iget(c->vfs_sb, e->inum);
  1324. if (IS_ERR(inode))
  1325. return PTR_ERR(inode);
  1326. if (inode->i_size >= e->d_size) {
  1327. /*
  1328. * The original inode in the index already has a size
  1329. * big enough, nothing to do
  1330. */
  1331. iput(inode);
  1332. return 0;
  1333. }
  1334. dbg_rcvry("ino %lu size %lld -> %lld",
  1335. (unsigned long)e->inum,
  1336. inode->i_size, e->d_size);
  1337. ui = ubifs_inode(inode);
  1338. inode->i_size = e->d_size;
  1339. ui->ui_size = e->d_size;
  1340. ui->synced_i_size = e->d_size;
  1341. e->inode = inode;
  1342. }
  1343. /*
  1344. * In readonly mode just keep the inode pinned in memory until we go
  1345. * readwrite. In readwrite mode write the inode to the journal with the
  1346. * fixed size.
  1347. */
  1348. if (c->ro_mount)
  1349. return 0;
  1350. err = ubifs_jnl_write_inode(c, inode);
  1351. iput(inode);
  1352. if (err)
  1353. return err;
  1354. rb_erase(&e->rb, &c->size_tree);
  1355. kfree(e);
  1356. return 0;
  1357. }
  1358. /**
  1359. * ubifs_recover_size - recover inode size.
  1360. * @c: UBIFS file-system description object
  1361. * @in_place: If true, do a in-place size fixup
  1362. *
  1363. * This function attempts to fix inode size discrepancies identified by the
  1364. * 'ubifs_recover_size_accum()' function.
  1365. *
  1366. * This functions returns %0 on success and a negative error code on failure.
  1367. */
  1368. int ubifs_recover_size(struct ubifs_info *c, bool in_place)
  1369. {
  1370. struct rb_node *this = rb_first(&c->size_tree);
  1371. while (this) {
  1372. struct size_entry *e;
  1373. int err;
  1374. e = rb_entry(this, struct size_entry, rb);
  1375. this = rb_next(this);
  1376. if (!e->exists) {
  1377. union ubifs_key key;
  1378. ino_key_init(c, &key, e->inum);
  1379. err = ubifs_tnc_lookup(c, &key, c->sbuf);
  1380. if (err && err != -ENOENT)
  1381. return err;
  1382. if (err == -ENOENT) {
  1383. /* Remove data nodes that have no inode */
  1384. dbg_rcvry("removing ino %lu",
  1385. (unsigned long)e->inum);
  1386. err = ubifs_tnc_remove_ino(c, e->inum);
  1387. if (err)
  1388. return err;
  1389. } else {
  1390. struct ubifs_ino_node *ino = c->sbuf;
  1391. e->exists = 1;
  1392. e->i_size = le64_to_cpu(ino->size);
  1393. }
  1394. }
  1395. if (e->exists && e->i_size < e->d_size) {
  1396. ubifs_assert(c, !(c->ro_mount && in_place));
  1397. /*
  1398. * We found data that is outside the found inode size,
  1399. * fixup the inode size
  1400. */
  1401. if (in_place) {
  1402. err = fix_size_in_place(c, e);
  1403. if (err)
  1404. return err;
  1405. iput(e->inode);
  1406. } else {
  1407. err = inode_fix_size(c, e);
  1408. if (err)
  1409. return err;
  1410. continue;
  1411. }
  1412. }
  1413. rb_erase(&e->rb, &c->size_tree);
  1414. kfree(e);
  1415. }
  1416. return 0;
  1417. }