badblocks.c 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Bad block management
  4. *
  5. * - Heavily based on MD badblocks code from Neil Brown
  6. *
  7. * Copyright (c) 2015, Intel Corporation.
  8. */
  9. #include <linux/badblocks.h>
  10. #include <linux/seqlock.h>
  11. #include <linux/device.h>
  12. #include <linux/kernel.h>
  13. #include <linux/module.h>
  14. #include <linux/stddef.h>
  15. #include <linux/types.h>
  16. #include <linux/slab.h>
  17. /*
  18. * The purpose of badblocks set/clear is to manage bad blocks ranges which are
  19. * identified by LBA addresses.
  20. *
  21. * When the caller of badblocks_set() wants to set a range of bad blocks, the
  22. * setting range can be acked or unacked. And the setting range may merge,
  23. * overwrite, skip the overlapped already set range, depends on who they are
  24. * overlapped or adjacent, and the acknowledgment type of the ranges. It can be
  25. * more complicated when the setting range covers multiple already set bad block
  26. * ranges, with restrictions of maximum length of each bad range and the bad
  27. * table space limitation.
  28. *
  29. * It is difficult and unnecessary to take care of all the possible situations,
  30. * for setting a large range of bad blocks, we can handle it by dividing the
  31. * large range into smaller ones when encounter overlap, max range length or
  32. * bad table full conditions. Every time only a smaller piece of the bad range
  33. * is handled with a limited number of conditions how it is interacted with
  34. * possible overlapped or adjacent already set bad block ranges. Then the hard
  35. * complicated problem can be much simpler to handle in proper way.
  36. *
  37. * When setting a range of bad blocks to the bad table, the simplified situations
  38. * to be considered are, (The already set bad blocks ranges are naming with
  39. * prefix E, and the setting bad blocks range is naming with prefix S)
  40. *
  41. * 1) A setting range is not overlapped or adjacent to any other already set bad
  42. * block range.
  43. * +--------+
  44. * | S |
  45. * +--------+
  46. * +-------------+ +-------------+
  47. * | E1 | | E2 |
  48. * +-------------+ +-------------+
  49. * For this situation if the bad blocks table is not full, just allocate a
  50. * free slot from the bad blocks table to mark the setting range S. The
  51. * result is,
  52. * +-------------+ +--------+ +-------------+
  53. * | E1 | | S | | E2 |
  54. * +-------------+ +--------+ +-------------+
  55. * 2) A setting range starts exactly at a start LBA of an already set bad blocks
  56. * range.
  57. * 2.1) The setting range size < already set range size
  58. * +--------+
  59. * | S |
  60. * +--------+
  61. * +-------------+
  62. * | E |
  63. * +-------------+
  64. * 2.1.1) If S and E are both acked or unacked range, the setting range S can
  65. * be merged into existing bad range E. The result is,
  66. * +-------------+
  67. * | S |
  68. * +-------------+
  69. * 2.1.2) If S is unacked setting and E is acked, the setting will be denied, and
  70. * the result is,
  71. * +-------------+
  72. * | E |
  73. * +-------------+
  74. * 2.1.3) If S is acked setting and E is unacked, range S can overwrite on E.
  75. * An extra slot from the bad blocks table will be allocated for S, and head
  76. * of E will move to end of the inserted range S. The result is,
  77. * +--------+----+
  78. * | S | E |
  79. * +--------+----+
  80. * 2.2) The setting range size == already set range size
  81. * 2.2.1) If S and E are both acked or unacked range, the setting range S can
  82. * be merged into existing bad range E. The result is,
  83. * +-------------+
  84. * | S |
  85. * +-------------+
  86. * 2.2.2) If S is unacked setting and E is acked, the setting will be denied, and
  87. * the result is,
  88. * +-------------+
  89. * | E |
  90. * +-------------+
  91. * 2.2.3) If S is acked setting and E is unacked, range S can overwrite all of
  92. bad blocks range E. The result is,
  93. * +-------------+
  94. * | S |
  95. * +-------------+
  96. * 2.3) The setting range size > already set range size
  97. * +-------------------+
  98. * | S |
  99. * +-------------------+
  100. * +-------------+
  101. * | E |
  102. * +-------------+
  103. * For such situation, the setting range S can be treated as two parts, the
  104. * first part (S1) is as same size as the already set range E, the second
  105. * part (S2) is the rest of setting range.
  106. * +-------------+-----+ +-------------+ +-----+
  107. * | S1 | S2 | | S1 | | S2 |
  108. * +-------------+-----+ ===> +-------------+ +-----+
  109. * +-------------+ +-------------+
  110. * | E | | E |
  111. * +-------------+ +-------------+
  112. * Now we only focus on how to handle the setting range S1 and already set
  113. * range E, which are already explained in 2.2), for the rest S2 it will be
  114. * handled later in next loop.
  115. * 3) A setting range starts before the start LBA of an already set bad blocks
  116. * range.
  117. * +-------------+
  118. * | S |
  119. * +-------------+
  120. * +-------------+
  121. * | E |
  122. * +-------------+
  123. * For this situation, the setting range S can be divided into two parts, the
  124. * first (S1) ends at the start LBA of already set range E, the second part
  125. * (S2) starts exactly at a start LBA of the already set range E.
  126. * +----+---------+ +----+ +---------+
  127. * | S1 | S2 | | S1 | | S2 |
  128. * +----+---------+ ===> +----+ +---------+
  129. * +-------------+ +-------------+
  130. * | E | | E |
  131. * +-------------+ +-------------+
  132. * Now only the first part S1 should be handled in this loop, which is in
  133. * similar condition as 1). The rest part S2 has exact same start LBA address
  134. * of the already set range E, they will be handled in next loop in one of
  135. * situations in 2).
  136. * 4) A setting range starts after the start LBA of an already set bad blocks
  137. * range.
  138. * 4.1) If the setting range S exactly matches the tail part of already set bad
  139. * blocks range E, like the following chart shows,
  140. * +---------+
  141. * | S |
  142. * +---------+
  143. * +-------------+
  144. * | E |
  145. * +-------------+
  146. * 4.1.1) If range S and E have same acknowledge value (both acked or unacked),
  147. * they will be merged into one, the result is,
  148. * +-------------+
  149. * | S |
  150. * +-------------+
  151. * 4.1.2) If range E is acked and the setting range S is unacked, the setting
  152. * request of S will be rejected, the result is,
  153. * +-------------+
  154. * | E |
  155. * +-------------+
  156. * 4.1.3) If range E is unacked, and the setting range S is acked, then S may
  157. * overwrite the overlapped range of E, the result is,
  158. * +---+---------+
  159. * | E | S |
  160. * +---+---------+
  161. * 4.2) If the setting range S stays in middle of an already set range E, like
  162. * the following chart shows,
  163. * +----+
  164. * | S |
  165. * +----+
  166. * +--------------+
  167. * | E |
  168. * +--------------+
  169. * 4.2.1) If range S and E have same acknowledge value (both acked or unacked),
  170. * they will be merged into one, the result is,
  171. * +--------------+
  172. * | S |
  173. * +--------------+
  174. * 4.2.2) If range E is acked and the setting range S is unacked, the setting
  175. * request of S will be rejected, the result is also,
  176. * +--------------+
  177. * | E |
  178. * +--------------+
  179. * 4.2.3) If range E is unacked, and the setting range S is acked, then S will
  180. * inserted into middle of E and split previous range E into two parts (E1
  181. * and E2), the result is,
  182. * +----+----+----+
  183. * | E1 | S | E2 |
  184. * +----+----+----+
  185. * 4.3) If the setting bad blocks range S is overlapped with an already set bad
  186. * blocks range E. The range S starts after the start LBA of range E, and
  187. * ends after the end LBA of range E, as the following chart shows,
  188. * +-------------------+
  189. * | S |
  190. * +-------------------+
  191. * +-------------+
  192. * | E |
  193. * +-------------+
  194. * For this situation the range S can be divided into two parts, the first
  195. * part (S1) ends at end range E, and the second part (S2) has rest range of
  196. * origin S.
  197. * +---------+---------+ +---------+ +---------+
  198. * | S1 | S2 | | S1 | | S2 |
  199. * +---------+---------+ ===> +---------+ +---------+
  200. * +-------------+ +-------------+
  201. * | E | | E |
  202. * +-------------+ +-------------+
  203. * Now in this loop the setting range S1 and already set range E can be
  204. * handled as the situations 4.1), the rest range S2 will be handled in next
  205. * loop and ignored in this loop.
  206. * 5) A setting bad blocks range S is adjacent to one or more already set bad
  207. * blocks range(s), and they are all acked or unacked range.
  208. * 5.1) Front merge: If the already set bad blocks range E is before setting
  209. * range S and they are adjacent,
  210. * +------+
  211. * | S |
  212. * +------+
  213. * +-------+
  214. * | E |
  215. * +-------+
  216. * 5.1.1) When total size of range S and E <= BB_MAX_LEN, and their acknowledge
  217. * values are same, the setting range S can front merges into range E. The
  218. * result is,
  219. * +--------------+
  220. * | S |
  221. * +--------------+
  222. * 5.1.2) Otherwise these two ranges cannot merge, just insert the setting
  223. * range S right after already set range E into the bad blocks table. The
  224. * result is,
  225. * +--------+------+
  226. * | E | S |
  227. * +--------+------+
  228. * 6) Special cases which above conditions cannot handle
  229. * 6.1) Multiple already set ranges may merge into less ones in a full bad table
  230. * +-------------------------------------------------------+
  231. * | S |
  232. * +-------------------------------------------------------+
  233. * |<----- BB_MAX_LEN ----->|
  234. * +-----+ +-----+ +-----+
  235. * | E1 | | E2 | | E3 |
  236. * +-----+ +-----+ +-----+
  237. * In the above example, when the bad blocks table is full, inserting the
  238. * first part of setting range S will fail because no more available slot
  239. * can be allocated from bad blocks table. In this situation a proper
  240. * setting method should be go though all the setting bad blocks range and
  241. * look for chance to merge already set ranges into less ones. When there
  242. * is available slot from bad blocks table, re-try again to handle more
  243. * setting bad blocks ranges as many as possible.
  244. * +------------------------+
  245. * | S3 |
  246. * +------------------------+
  247. * |<----- BB_MAX_LEN ----->|
  248. * +-----+-----+-----+---+-----+--+
  249. * | S1 | S2 |
  250. * +-----+-----+-----+---+-----+--+
  251. * The above chart shows although the first part (S3) cannot be inserted due
  252. * to no-space in bad blocks table, but the following E1, E2 and E3 ranges
  253. * can be merged with rest part of S into less range S1 and S2. Now there is
  254. * 1 free slot in bad blocks table.
  255. * +------------------------+-----+-----+-----+---+-----+--+
  256. * | S3 | S1 | S2 |
  257. * +------------------------+-----+-----+-----+---+-----+--+
  258. * Since the bad blocks table is not full anymore, re-try again for the
  259. * origin setting range S. Now the setting range S3 can be inserted into the
  260. * bad blocks table with previous freed slot from multiple ranges merge.
  261. * 6.2) Front merge after overwrite
  262. * In the following example, in bad blocks table, E1 is an acked bad blocks
  263. * range and E2 is an unacked bad blocks range, therefore they are not able
  264. * to merge into a larger range. The setting bad blocks range S is acked,
  265. * therefore part of E2 can be overwritten by S.
  266. * +--------+
  267. * | S | acknowledged
  268. * +--------+ S: 1
  269. * +-------+-------------+ E1: 1
  270. * | E1 | E2 | E2: 0
  271. * +-------+-------------+
  272. * With previous simplified routines, after overwriting part of E2 with S,
  273. * the bad blocks table should be (E3 is remaining part of E2 which is not
  274. * overwritten by S),
  275. * acknowledged
  276. * +-------+--------+----+ S: 1
  277. * | E1 | S | E3 | E1: 1
  278. * +-------+--------+----+ E3: 0
  279. * The above result is correct but not perfect. Range E1 and S in the bad
  280. * blocks table are all acked, merging them into a larger one range may
  281. * occupy less bad blocks table space and make badblocks_check() faster.
  282. * Therefore in such situation, after overwriting range S, the previous range
  283. * E1 should be checked for possible front combination. Then the ideal
  284. * result can be,
  285. * +----------------+----+ acknowledged
  286. * | E1 | E3 | E1: 1
  287. * +----------------+----+ E3: 0
  288. * 6.3) Behind merge: If the already set bad blocks range E is behind the setting
  289. * range S and they are adjacent. Normally we don't need to care about this
  290. * because front merge handles this while going though range S from head to
  291. * tail, except for the tail part of range S. When the setting range S are
  292. * fully handled, all the above simplified routine doesn't check whether the
  293. * tail LBA of range S is adjacent to the next already set range and not
  294. * merge them even it is possible.
  295. * +------+
  296. * | S |
  297. * +------+
  298. * +-------+
  299. * | E |
  300. * +-------+
  301. * For the above special situation, when the setting range S are all handled
  302. * and the loop ends, an extra check is necessary for whether next already
  303. * set range E is right after S and mergeable.
  304. * 6.3.1) When total size of range E and S <= BB_MAX_LEN, and their acknowledge
  305. * values are same, the setting range S can behind merges into range E. The
  306. * result is,
  307. * +--------------+
  308. * | S |
  309. * +--------------+
  310. * 6.3.2) Otherwise these two ranges cannot merge, just insert the setting range
  311. * S in front of the already set range E in the bad blocks table. The result
  312. * is,
  313. * +------+-------+
  314. * | S | E |
  315. * +------+-------+
  316. *
  317. * All the above 5 simplified situations and 3 special cases may cover 99%+ of
  318. * the bad block range setting conditions. Maybe there is some rare corner case
  319. * is not considered and optimized, it won't hurt if badblocks_set() fails due
  320. * to no space, or some ranges are not merged to save bad blocks table space.
  321. *
  322. * Inside badblocks_set() each loop starts by jumping to re_insert label, every
  323. * time for the new loop prev_badblocks() is called to find an already set range
  324. * which starts before or at current setting range. Since the setting bad blocks
  325. * range is handled from head to tail, most of the cases it is unnecessary to do
  326. * the binary search inside prev_badblocks(), it is possible to provide a hint
  327. * to prev_badblocks() for a fast path, then the expensive binary search can be
  328. * avoided. In my test with the hint to prev_badblocks(), except for the first
  329. * loop, all rested calls to prev_badblocks() can go into the fast path and
  330. * return correct bad blocks table index immediately.
  331. *
  332. *
  333. * Clearing a bad blocks range from the bad block table has similar idea as
  334. * setting does, but much more simpler. The only thing needs to be noticed is
  335. * when the clearing range hits middle of a bad block range, the existing bad
  336. * block range will split into two, and one more item should be added into the
  337. * bad block table. The simplified situations to be considered are, (The already
  338. * set bad blocks ranges in bad block table are naming with prefix E, and the
  339. * clearing bad blocks range is naming with prefix C)
  340. *
  341. * 1) A clearing range is not overlapped to any already set ranges in bad block
  342. * table.
  343. * +-----+ | +-----+ | +-----+
  344. * | C | | | C | | | C |
  345. * +-----+ or +-----+ or +-----+
  346. * +---+ | +----+ +----+ | +---+
  347. * | E | | | E1 | | E2 | | | E |
  348. * +---+ | +----+ +----+ | +---+
  349. * For the above situations, no bad block to be cleared and no failure
  350. * happens, simply returns 0.
  351. * 2) The clearing range hits middle of an already setting bad blocks range in
  352. * the bad block table.
  353. * +---+
  354. * | C |
  355. * +---+
  356. * +-----------------+
  357. * | E |
  358. * +-----------------+
  359. * In this situation if the bad block table is not full, the range E will be
  360. * split into two ranges E1 and E2. The result is,
  361. * +------+ +------+
  362. * | E1 | | E2 |
  363. * +------+ +------+
  364. * 3) The clearing range starts exactly at same LBA as an already set bad block range
  365. * from the bad block table.
  366. * 3.1) Partially covered at head part
  367. * +------------+
  368. * | C |
  369. * +------------+
  370. * +-----------------+
  371. * | E |
  372. * +-----------------+
  373. * For this situation, the overlapped already set range will update the
  374. * start LBA to end of C and shrink the range to BB_LEN(E) - BB_LEN(C). No
  375. * item deleted from bad block table. The result is,
  376. * +----+
  377. * | E1 |
  378. * +----+
  379. * 3.2) Exact fully covered
  380. * +-----------------+
  381. * | C |
  382. * +-----------------+
  383. * +-----------------+
  384. * | E |
  385. * +-----------------+
  386. * For this situation the whole bad blocks range E will be cleared and its
  387. * corresponded item is deleted from the bad block table.
  388. * 4) The clearing range exactly ends at same LBA as an already set bad block
  389. * range.
  390. * +-------+
  391. * | C |
  392. * +-------+
  393. * +-----------------+
  394. * | E |
  395. * +-----------------+
  396. * For the above situation, the already set range E is updated to shrink its
  397. * end to the start of C, and reduce its length to BB_LEN(E) - BB_LEN(C).
  398. * The result is,
  399. * +---------+
  400. * | E |
  401. * +---------+
  402. * 5) The clearing range is partially overlapped with an already set bad block
  403. * range from the bad block table.
  404. * 5.1) The already set bad block range is front overlapped with the clearing
  405. * range.
  406. * +----------+
  407. * | C |
  408. * +----------+
  409. * +------------+
  410. * | E |
  411. * +------------+
  412. * For such situation, the clearing range C can be treated as two parts. The
  413. * first part ends at the start LBA of range E, and the second part starts at
  414. * same LBA of range E.
  415. * +----+-----+ +----+ +-----+
  416. * | C1 | C2 | | C1 | | C2 |
  417. * +----+-----+ ===> +----+ +-----+
  418. * +------------+ +------------+
  419. * | E | | E |
  420. * +------------+ +------------+
  421. * Now the first part C1 can be handled as condition 1), and the second part C2 can be
  422. * handled as condition 3.1) in next loop.
  423. * 5.2) The already set bad block range is behind overlaopped with the clearing
  424. * range.
  425. * +----------+
  426. * | C |
  427. * +----------+
  428. * +------------+
  429. * | E |
  430. * +------------+
  431. * For such situation, the clearing range C can be treated as two parts. The
  432. * first part C1 ends at same end LBA of range E, and the second part starts
  433. * at end LBA of range E.
  434. * +----+-----+ +----+ +-----+
  435. * | C1 | C2 | | C1 | | C2 |
  436. * +----+-----+ ===> +----+ +-----+
  437. * +------------+ +------------+
  438. * | E | | E |
  439. * +------------+ +------------+
  440. * Now the first part clearing range C1 can be handled as condition 4), and
  441. * the second part clearing range C2 can be handled as condition 1) in next
  442. * loop.
  443. *
  444. * All bad blocks range clearing can be simplified into the above 5 situations
  445. * by only handling the head part of the clearing range in each run of the
  446. * while-loop. The idea is similar to bad blocks range setting but much
  447. * simpler.
  448. */
  449. /*
  450. * Find the range starts at-or-before 's' from bad table. The search
  451. * starts from index 'hint' and stops at index 'hint_end' from the bad
  452. * table.
  453. */
  454. static int prev_by_hint(struct badblocks *bb, sector_t s, int hint)
  455. {
  456. int hint_end = hint + 2;
  457. u64 *p = bb->page;
  458. int ret = -1;
  459. while ((hint < hint_end) && ((hint + 1) <= bb->count) &&
  460. (BB_OFFSET(p[hint]) <= s)) {
  461. if ((hint + 1) == bb->count || BB_OFFSET(p[hint + 1]) > s) {
  462. ret = hint;
  463. break;
  464. }
  465. hint++;
  466. }
  467. return ret;
  468. }
  469. /*
  470. * Find the range starts at-or-before bad->start. If 'hint' is provided
  471. * (hint >= 0) then search in the bad table from hint firstly. It is
  472. * very probably the wanted bad range can be found from the hint index,
  473. * then the unnecessary while-loop iteration can be avoided.
  474. */
  475. static int prev_badblocks(struct badblocks *bb, struct badblocks_context *bad,
  476. int hint)
  477. {
  478. sector_t s = bad->start;
  479. int ret = -1;
  480. int lo, hi;
  481. u64 *p;
  482. if (!bb->count)
  483. goto out;
  484. if (hint >= 0) {
  485. ret = prev_by_hint(bb, s, hint);
  486. if (ret >= 0)
  487. goto out;
  488. }
  489. lo = 0;
  490. hi = bb->count;
  491. p = bb->page;
  492. /* The following bisect search might be unnecessary */
  493. if (BB_OFFSET(p[lo]) > s)
  494. return -1;
  495. if (BB_OFFSET(p[hi - 1]) <= s)
  496. return hi - 1;
  497. /* Do bisect search in bad table */
  498. while (hi - lo > 1) {
  499. int mid = (lo + hi)/2;
  500. sector_t a = BB_OFFSET(p[mid]);
  501. if (a == s) {
  502. ret = mid;
  503. goto out;
  504. }
  505. if (a < s)
  506. lo = mid;
  507. else
  508. hi = mid;
  509. }
  510. if (BB_OFFSET(p[lo]) <= s)
  511. ret = lo;
  512. out:
  513. return ret;
  514. }
  515. /*
  516. * Return 'true' if the range indicated by 'bad' can be forward
  517. * merged with the bad range (from the bad table) indexed by 'prev'.
  518. */
  519. static bool can_merge_front(struct badblocks *bb, int prev,
  520. struct badblocks_context *bad)
  521. {
  522. sector_t s = bad->start;
  523. u64 *p = bb->page;
  524. if (BB_ACK(p[prev]) == bad->ack &&
  525. (s < BB_END(p[prev]) ||
  526. (s == BB_END(p[prev]) && (BB_LEN(p[prev]) < BB_MAX_LEN))))
  527. return true;
  528. return false;
  529. }
  530. /*
  531. * Do forward merge for range indicated by 'bad' and the bad range
  532. * (from bad table) indexed by 'prev'. The return value is sectors
  533. * merged from bad->len.
  534. */
  535. static int front_merge(struct badblocks *bb, int prev, struct badblocks_context *bad)
  536. {
  537. sector_t sectors = bad->len;
  538. sector_t s = bad->start;
  539. u64 *p = bb->page;
  540. int merged = 0;
  541. WARN_ON(s > BB_END(p[prev]));
  542. if (s < BB_END(p[prev])) {
  543. merged = min_t(sector_t, sectors, BB_END(p[prev]) - s);
  544. } else {
  545. merged = min_t(sector_t, sectors, BB_MAX_LEN - BB_LEN(p[prev]));
  546. if ((prev + 1) < bb->count &&
  547. merged > (BB_OFFSET(p[prev + 1]) - BB_END(p[prev]))) {
  548. merged = BB_OFFSET(p[prev + 1]) - BB_END(p[prev]);
  549. }
  550. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  551. BB_LEN(p[prev]) + merged, bad->ack);
  552. }
  553. return merged;
  554. }
  555. /*
  556. * 'Combine' is a special case which can_merge_front() is not able to
  557. * handle: If a bad range (indexed by 'prev' from bad table) exactly
  558. * starts as bad->start, and the bad range ahead of 'prev' (indexed by
  559. * 'prev - 1' from bad table) exactly ends at where 'prev' starts, and
  560. * the sum of their lengths does not exceed BB_MAX_LEN limitation, then
  561. * these two bad range (from bad table) can be combined.
  562. *
  563. * Return 'true' if bad ranges indexed by 'prev' and 'prev - 1' from bad
  564. * table can be combined.
  565. */
  566. static bool can_combine_front(struct badblocks *bb, int prev,
  567. struct badblocks_context *bad)
  568. {
  569. u64 *p = bb->page;
  570. if ((prev > 0) &&
  571. (BB_OFFSET(p[prev]) == bad->start) &&
  572. (BB_END(p[prev - 1]) == BB_OFFSET(p[prev])) &&
  573. (BB_LEN(p[prev - 1]) + BB_LEN(p[prev]) <= BB_MAX_LEN) &&
  574. (BB_ACK(p[prev - 1]) == BB_ACK(p[prev])))
  575. return true;
  576. return false;
  577. }
  578. /*
  579. * Combine the bad ranges indexed by 'prev' and 'prev - 1' (from bad
  580. * table) into one larger bad range, and the new range is indexed by
  581. * 'prev - 1'.
  582. * The caller of front_combine() will decrease bb->count, therefore
  583. * it is unnecessary to clear p[perv] after front merge.
  584. */
  585. static void front_combine(struct badblocks *bb, int prev)
  586. {
  587. u64 *p = bb->page;
  588. p[prev - 1] = BB_MAKE(BB_OFFSET(p[prev - 1]),
  589. BB_LEN(p[prev - 1]) + BB_LEN(p[prev]),
  590. BB_ACK(p[prev]));
  591. if ((prev + 1) < bb->count)
  592. memmove(p + prev, p + prev + 1, (bb->count - prev - 1) * 8);
  593. }
  594. /*
  595. * Return 'true' if the range indicated by 'bad' is exactly forward
  596. * overlapped with the bad range (from bad table) indexed by 'front'.
  597. * Exactly forward overlap means the bad range (from bad table) indexed
  598. * by 'prev' does not cover the whole range indicated by 'bad'.
  599. */
  600. static bool overlap_front(struct badblocks *bb, int front,
  601. struct badblocks_context *bad)
  602. {
  603. u64 *p = bb->page;
  604. if (bad->start >= BB_OFFSET(p[front]) &&
  605. bad->start < BB_END(p[front]))
  606. return true;
  607. return false;
  608. }
  609. /*
  610. * Return 'true' if the range indicated by 'bad' is exactly backward
  611. * overlapped with the bad range (from bad table) indexed by 'behind'.
  612. */
  613. static bool overlap_behind(struct badblocks *bb, struct badblocks_context *bad,
  614. int behind)
  615. {
  616. u64 *p = bb->page;
  617. if (bad->start < BB_OFFSET(p[behind]) &&
  618. (bad->start + bad->len) > BB_OFFSET(p[behind]))
  619. return true;
  620. return false;
  621. }
  622. /*
  623. * Return 'true' if the range indicated by 'bad' can overwrite the bad
  624. * range (from bad table) indexed by 'prev'.
  625. *
  626. * The range indicated by 'bad' can overwrite the bad range indexed by
  627. * 'prev' when,
  628. * 1) The whole range indicated by 'bad' can cover partial or whole bad
  629. * range (from bad table) indexed by 'prev'.
  630. * 2) The ack value of 'bad' is larger or equal to the ack value of bad
  631. * range 'prev'.
  632. *
  633. * If the overwriting doesn't cover the whole bad range (from bad table)
  634. * indexed by 'prev', new range might be split from existing bad range,
  635. * 1) The overwrite covers head or tail part of existing bad range, 1
  636. * extra bad range will be split and added into the bad table.
  637. * 2) The overwrite covers middle of existing bad range, 2 extra bad
  638. * ranges will be split (ahead and after the overwritten range) and
  639. * added into the bad table.
  640. * The number of extra split ranges of the overwriting is stored in
  641. * 'extra' and returned for the caller.
  642. */
  643. static bool can_front_overwrite(struct badblocks *bb, int prev,
  644. struct badblocks_context *bad, int *extra)
  645. {
  646. u64 *p = bb->page;
  647. int len;
  648. WARN_ON(!overlap_front(bb, prev, bad));
  649. if (BB_ACK(p[prev]) >= bad->ack)
  650. return false;
  651. if (BB_END(p[prev]) <= (bad->start + bad->len)) {
  652. len = BB_END(p[prev]) - bad->start;
  653. if (BB_OFFSET(p[prev]) == bad->start)
  654. *extra = 0;
  655. else
  656. *extra = 1;
  657. bad->len = len;
  658. } else {
  659. if (BB_OFFSET(p[prev]) == bad->start)
  660. *extra = 1;
  661. else
  662. /*
  663. * prev range will be split into two, beside the overwritten
  664. * one, an extra slot needed from bad table.
  665. */
  666. *extra = 2;
  667. }
  668. if ((bb->count + (*extra)) > MAX_BADBLOCKS)
  669. return false;
  670. return true;
  671. }
  672. /*
  673. * Do the overwrite from the range indicated by 'bad' to the bad range
  674. * (from bad table) indexed by 'prev'.
  675. * The previously called can_front_overwrite() will provide how many
  676. * extra bad range(s) might be split and added into the bad table. All
  677. * the splitting cases in the bad table will be handled here.
  678. */
  679. static int front_overwrite(struct badblocks *bb, int prev,
  680. struct badblocks_context *bad, int extra)
  681. {
  682. u64 *p = bb->page;
  683. sector_t orig_end = BB_END(p[prev]);
  684. int orig_ack = BB_ACK(p[prev]);
  685. switch (extra) {
  686. case 0:
  687. p[prev] = BB_MAKE(BB_OFFSET(p[prev]), BB_LEN(p[prev]),
  688. bad->ack);
  689. break;
  690. case 1:
  691. if (BB_OFFSET(p[prev]) == bad->start) {
  692. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  693. bad->len, bad->ack);
  694. memmove(p + prev + 2, p + prev + 1,
  695. (bb->count - prev - 1) * 8);
  696. p[prev + 1] = BB_MAKE(bad->start + bad->len,
  697. orig_end - BB_END(p[prev]),
  698. orig_ack);
  699. } else {
  700. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  701. bad->start - BB_OFFSET(p[prev]),
  702. orig_ack);
  703. /*
  704. * prev +2 -> prev + 1 + 1, which is for,
  705. * 1) prev + 1: the slot index of the previous one
  706. * 2) + 1: one more slot for extra being 1.
  707. */
  708. memmove(p + prev + 2, p + prev + 1,
  709. (bb->count - prev - 1) * 8);
  710. p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
  711. }
  712. break;
  713. case 2:
  714. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  715. bad->start - BB_OFFSET(p[prev]),
  716. orig_ack);
  717. /*
  718. * prev + 3 -> prev + 1 + 2, which is for,
  719. * 1) prev + 1: the slot index of the previous one
  720. * 2) + 2: two more slots for extra being 2.
  721. */
  722. memmove(p + prev + 3, p + prev + 1,
  723. (bb->count - prev - 1) * 8);
  724. p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
  725. p[prev + 2] = BB_MAKE(BB_END(p[prev + 1]),
  726. orig_end - BB_END(p[prev + 1]),
  727. orig_ack);
  728. break;
  729. default:
  730. break;
  731. }
  732. return bad->len;
  733. }
  734. /*
  735. * Explicitly insert a range indicated by 'bad' to the bad table, where
  736. * the location is indexed by 'at'.
  737. */
  738. static int insert_at(struct badblocks *bb, int at, struct badblocks_context *bad)
  739. {
  740. u64 *p = bb->page;
  741. int len;
  742. WARN_ON(badblocks_full(bb));
  743. len = min_t(sector_t, bad->len, BB_MAX_LEN);
  744. if (at < bb->count)
  745. memmove(p + at + 1, p + at, (bb->count - at) * 8);
  746. p[at] = BB_MAKE(bad->start, len, bad->ack);
  747. return len;
  748. }
  749. static void badblocks_update_acked(struct badblocks *bb)
  750. {
  751. bool unacked = false;
  752. u64 *p = bb->page;
  753. int i;
  754. if (!bb->unacked_exist)
  755. return;
  756. for (i = 0; i < bb->count ; i++) {
  757. if (!BB_ACK(p[i])) {
  758. unacked = true;
  759. break;
  760. }
  761. }
  762. if (!unacked)
  763. bb->unacked_exist = 0;
  764. }
  765. /*
  766. * Return 'true' if the range indicated by 'bad' is exactly backward
  767. * overlapped with the bad range (from bad table) indexed by 'behind'.
  768. */
  769. static bool try_adjacent_combine(struct badblocks *bb, int prev)
  770. {
  771. u64 *p = bb->page;
  772. if (prev >= 0 && (prev + 1) < bb->count &&
  773. BB_END(p[prev]) == BB_OFFSET(p[prev + 1]) &&
  774. (BB_LEN(p[prev]) + BB_LEN(p[prev + 1])) <= BB_MAX_LEN &&
  775. BB_ACK(p[prev]) == BB_ACK(p[prev + 1])) {
  776. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  777. BB_LEN(p[prev]) + BB_LEN(p[prev + 1]),
  778. BB_ACK(p[prev]));
  779. if ((prev + 2) < bb->count)
  780. memmove(p + prev + 1, p + prev + 2,
  781. (bb->count - (prev + 2)) * 8);
  782. bb->count--;
  783. return true;
  784. }
  785. return false;
  786. }
  787. /* Do exact work to set bad block range into the bad block table */
  788. static bool _badblocks_set(struct badblocks *bb, sector_t s, sector_t sectors,
  789. int acknowledged)
  790. {
  791. int len = 0, added = 0;
  792. struct badblocks_context bad;
  793. int prev = -1, hint = -1;
  794. unsigned long flags;
  795. u64 *p;
  796. if (bb->shift < 0)
  797. /* badblocks are disabled */
  798. return false;
  799. if (sectors == 0)
  800. /* Invalid sectors number */
  801. return false;
  802. if (bb->shift) {
  803. /* round the start down, and the end up */
  804. sector_t next = s + sectors;
  805. rounddown(s, 1 << bb->shift);
  806. roundup(next, 1 << bb->shift);
  807. sectors = next - s;
  808. }
  809. write_seqlock_irqsave(&bb->lock, flags);
  810. bad.ack = acknowledged;
  811. p = bb->page;
  812. re_insert:
  813. bad.start = s;
  814. bad.len = sectors;
  815. len = 0;
  816. if (badblocks_full(bb))
  817. goto out;
  818. if (badblocks_empty(bb)) {
  819. len = insert_at(bb, 0, &bad);
  820. bb->count++;
  821. added++;
  822. goto update_sectors;
  823. }
  824. prev = prev_badblocks(bb, &bad, hint);
  825. /* start before all badblocks */
  826. if (prev < 0) {
  827. /* insert on the first */
  828. if (bad.len > (BB_OFFSET(p[0]) - bad.start))
  829. bad.len = BB_OFFSET(p[0]) - bad.start;
  830. len = insert_at(bb, 0, &bad);
  831. bb->count++;
  832. added++;
  833. hint = ++prev;
  834. goto update_sectors;
  835. }
  836. /* in case p[prev-1] can be merged with p[prev] */
  837. if (can_combine_front(bb, prev, &bad)) {
  838. front_combine(bb, prev);
  839. bb->count--;
  840. added++;
  841. hint = prev;
  842. goto update_sectors;
  843. }
  844. if (can_merge_front(bb, prev, &bad)) {
  845. len = front_merge(bb, prev, &bad);
  846. added++;
  847. hint = prev;
  848. goto update_sectors;
  849. }
  850. if (overlap_front(bb, prev, &bad)) {
  851. int extra = 0;
  852. if (!can_front_overwrite(bb, prev, &bad, &extra)) {
  853. if (extra > 0)
  854. goto out;
  855. len = min_t(sector_t,
  856. BB_END(p[prev]) - s, sectors);
  857. hint = prev;
  858. goto update_sectors;
  859. }
  860. len = front_overwrite(bb, prev, &bad, extra);
  861. added++;
  862. bb->count += extra;
  863. if (can_combine_front(bb, prev, &bad)) {
  864. front_combine(bb, prev);
  865. bb->count--;
  866. }
  867. hint = prev;
  868. goto update_sectors;
  869. }
  870. /* cannot merge and there is space in bad table */
  871. if ((prev + 1) < bb->count &&
  872. overlap_behind(bb, &bad, prev + 1))
  873. bad.len = min_t(sector_t,
  874. bad.len, BB_OFFSET(p[prev + 1]) - bad.start);
  875. len = insert_at(bb, prev + 1, &bad);
  876. bb->count++;
  877. added++;
  878. hint = ++prev;
  879. update_sectors:
  880. s += len;
  881. sectors -= len;
  882. if (sectors > 0)
  883. goto re_insert;
  884. /*
  885. * Check whether the following already set range can be
  886. * merged. (prev < 0) condition is not handled here,
  887. * because it's already complicated enough.
  888. */
  889. try_adjacent_combine(bb, prev);
  890. out:
  891. if (added) {
  892. set_changed(bb);
  893. if (!acknowledged)
  894. bb->unacked_exist = 1;
  895. else
  896. badblocks_update_acked(bb);
  897. }
  898. write_sequnlock_irqrestore(&bb->lock, flags);
  899. return sectors == 0;
  900. }
  901. /*
  902. * Clear the bad block range from bad block table which is front overlapped
  903. * with the clearing range. The return value is how many sectors from an
  904. * already set bad block range are cleared. If the whole bad block range is
  905. * covered by the clearing range and fully cleared, 'delete' is set as 1 for
  906. * the caller to reduce bb->count.
  907. */
  908. static int front_clear(struct badblocks *bb, int prev,
  909. struct badblocks_context *bad, int *deleted)
  910. {
  911. sector_t sectors = bad->len;
  912. sector_t s = bad->start;
  913. u64 *p = bb->page;
  914. int cleared = 0;
  915. *deleted = 0;
  916. if (s == BB_OFFSET(p[prev])) {
  917. if (BB_LEN(p[prev]) > sectors) {
  918. p[prev] = BB_MAKE(BB_OFFSET(p[prev]) + sectors,
  919. BB_LEN(p[prev]) - sectors,
  920. BB_ACK(p[prev]));
  921. cleared = sectors;
  922. } else {
  923. /* BB_LEN(p[prev]) <= sectors */
  924. cleared = BB_LEN(p[prev]);
  925. if ((prev + 1) < bb->count)
  926. memmove(p + prev, p + prev + 1,
  927. (bb->count - prev - 1) * 8);
  928. *deleted = 1;
  929. }
  930. } else if (s > BB_OFFSET(p[prev])) {
  931. if (BB_END(p[prev]) <= (s + sectors)) {
  932. cleared = BB_END(p[prev]) - s;
  933. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  934. s - BB_OFFSET(p[prev]),
  935. BB_ACK(p[prev]));
  936. } else {
  937. /* Splitting is handled in front_splitting_clear() */
  938. BUG();
  939. }
  940. }
  941. return cleared;
  942. }
  943. /*
  944. * Handle the condition that the clearing range hits middle of an already set
  945. * bad block range from bad block table. In this condition the existing bad
  946. * block range is split into two after the middle part is cleared.
  947. */
  948. static int front_splitting_clear(struct badblocks *bb, int prev,
  949. struct badblocks_context *bad)
  950. {
  951. u64 *p = bb->page;
  952. u64 end = BB_END(p[prev]);
  953. int ack = BB_ACK(p[prev]);
  954. sector_t sectors = bad->len;
  955. sector_t s = bad->start;
  956. p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
  957. s - BB_OFFSET(p[prev]),
  958. ack);
  959. memmove(p + prev + 2, p + prev + 1, (bb->count - prev - 1) * 8);
  960. p[prev + 1] = BB_MAKE(s + sectors, end - s - sectors, ack);
  961. return sectors;
  962. }
  963. /* Do the exact work to clear bad block range from the bad block table */
  964. static bool _badblocks_clear(struct badblocks *bb, sector_t s, sector_t sectors)
  965. {
  966. struct badblocks_context bad;
  967. int prev = -1, hint = -1;
  968. int len = 0, cleared = 0;
  969. u64 *p;
  970. if (bb->shift < 0)
  971. /* badblocks are disabled */
  972. return false;
  973. if (sectors == 0)
  974. /* Invalid sectors number */
  975. return false;
  976. if (bb->shift) {
  977. sector_t target;
  978. /* When clearing we round the start up and the end down.
  979. * This should not matter as the shift should align with
  980. * the block size and no rounding should ever be needed.
  981. * However it is better the think a block is bad when it
  982. * isn't than to think a block is not bad when it is.
  983. */
  984. target = s + sectors;
  985. roundup(s, 1 << bb->shift);
  986. rounddown(target, 1 << bb->shift);
  987. sectors = target - s;
  988. }
  989. write_seqlock_irq(&bb->lock);
  990. bad.ack = true;
  991. p = bb->page;
  992. re_clear:
  993. bad.start = s;
  994. bad.len = sectors;
  995. if (badblocks_empty(bb)) {
  996. len = sectors;
  997. cleared++;
  998. goto update_sectors;
  999. }
  1000. prev = prev_badblocks(bb, &bad, hint);
  1001. /* Start before all badblocks */
  1002. if (prev < 0) {
  1003. if (overlap_behind(bb, &bad, 0)) {
  1004. len = BB_OFFSET(p[0]) - s;
  1005. hint = 0;
  1006. } else {
  1007. len = sectors;
  1008. }
  1009. /*
  1010. * Both situations are to clear non-bad range,
  1011. * should be treated as successful
  1012. */
  1013. cleared++;
  1014. goto update_sectors;
  1015. }
  1016. /* Start after all badblocks */
  1017. if ((prev + 1) >= bb->count && !overlap_front(bb, prev, &bad)) {
  1018. len = sectors;
  1019. cleared++;
  1020. goto update_sectors;
  1021. }
  1022. /* Clear will split a bad record but the table is full */
  1023. if (badblocks_full(bb) && (BB_OFFSET(p[prev]) < bad.start) &&
  1024. (BB_END(p[prev]) > (bad.start + sectors))) {
  1025. len = sectors;
  1026. goto update_sectors;
  1027. }
  1028. if (overlap_front(bb, prev, &bad)) {
  1029. if ((BB_OFFSET(p[prev]) < bad.start) &&
  1030. (BB_END(p[prev]) > (bad.start + bad.len))) {
  1031. /* Splitting */
  1032. if ((bb->count + 1) <= MAX_BADBLOCKS) {
  1033. len = front_splitting_clear(bb, prev, &bad);
  1034. bb->count += 1;
  1035. cleared++;
  1036. } else {
  1037. /* No space to split, give up */
  1038. len = sectors;
  1039. }
  1040. } else {
  1041. int deleted = 0;
  1042. len = front_clear(bb, prev, &bad, &deleted);
  1043. bb->count -= deleted;
  1044. cleared++;
  1045. hint = prev;
  1046. }
  1047. goto update_sectors;
  1048. }
  1049. /* Not front overlap, but behind overlap */
  1050. if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) {
  1051. len = BB_OFFSET(p[prev + 1]) - bad.start;
  1052. hint = prev + 1;
  1053. /* Clear non-bad range should be treated as successful */
  1054. cleared++;
  1055. goto update_sectors;
  1056. }
  1057. /* Not cover any badblocks range in the table */
  1058. len = sectors;
  1059. /* Clear non-bad range should be treated as successful */
  1060. cleared++;
  1061. update_sectors:
  1062. s += len;
  1063. sectors -= len;
  1064. if (sectors > 0)
  1065. goto re_clear;
  1066. if (cleared) {
  1067. badblocks_update_acked(bb);
  1068. set_changed(bb);
  1069. }
  1070. write_sequnlock_irq(&bb->lock);
  1071. if (!cleared)
  1072. return false;
  1073. return true;
  1074. }
  1075. /* Do the exact work to check bad blocks range from the bad block table */
  1076. static int _badblocks_check(struct badblocks *bb, sector_t s, sector_t sectors,
  1077. sector_t *first_bad, sector_t *bad_sectors)
  1078. {
  1079. int prev = -1, hint = -1, set = 0;
  1080. struct badblocks_context bad;
  1081. int unacked_badblocks = 0;
  1082. int acked_badblocks = 0;
  1083. u64 *p = bb->page;
  1084. int len, rv;
  1085. re_check:
  1086. bad.start = s;
  1087. bad.len = sectors;
  1088. if (badblocks_empty(bb)) {
  1089. len = sectors;
  1090. goto update_sectors;
  1091. }
  1092. prev = prev_badblocks(bb, &bad, hint);
  1093. /* start after all badblocks */
  1094. if ((prev >= 0) &&
  1095. ((prev + 1) >= bb->count) && !overlap_front(bb, prev, &bad)) {
  1096. len = sectors;
  1097. goto update_sectors;
  1098. }
  1099. /* Overlapped with front badblocks record */
  1100. if ((prev >= 0) && overlap_front(bb, prev, &bad)) {
  1101. if (BB_ACK(p[prev]))
  1102. acked_badblocks++;
  1103. else
  1104. unacked_badblocks++;
  1105. if (BB_END(p[prev]) >= (s + sectors))
  1106. len = sectors;
  1107. else
  1108. len = BB_END(p[prev]) - s;
  1109. if (set == 0) {
  1110. *first_bad = BB_OFFSET(p[prev]);
  1111. *bad_sectors = BB_LEN(p[prev]);
  1112. set = 1;
  1113. }
  1114. goto update_sectors;
  1115. }
  1116. /* Not front overlap, but behind overlap */
  1117. if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) {
  1118. len = BB_OFFSET(p[prev + 1]) - bad.start;
  1119. hint = prev + 1;
  1120. goto update_sectors;
  1121. }
  1122. /* not cover any badblocks range in the table */
  1123. len = sectors;
  1124. update_sectors:
  1125. /* This situation should never happen */
  1126. WARN_ON(sectors < len);
  1127. s += len;
  1128. sectors -= len;
  1129. if (sectors > 0)
  1130. goto re_check;
  1131. if (unacked_badblocks > 0)
  1132. rv = -1;
  1133. else if (acked_badblocks > 0)
  1134. rv = 1;
  1135. else
  1136. rv = 0;
  1137. return rv;
  1138. }
  1139. /**
  1140. * badblocks_check() - check a given range for bad sectors
  1141. * @bb: the badblocks structure that holds all badblock information
  1142. * @s: sector (start) at which to check for badblocks
  1143. * @sectors: number of sectors to check for badblocks
  1144. * @first_bad: pointer to store location of the first badblock
  1145. * @bad_sectors: pointer to store number of badblocks after @first_bad
  1146. *
  1147. * We can record which blocks on each device are 'bad' and so just
  1148. * fail those blocks, or that stripe, rather than the whole device.
  1149. * Entries in the bad-block table are 64bits wide. This comprises:
  1150. * Length of bad-range, in sectors: 0-511 for lengths 1-512
  1151. * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
  1152. * A 'shift' can be set so that larger blocks are tracked and
  1153. * consequently larger devices can be covered.
  1154. * 'Acknowledged' flag - 1 bit. - the most significant bit.
  1155. *
  1156. * Locking of the bad-block table uses a seqlock so badblocks_check
  1157. * might need to retry if it is very unlucky.
  1158. * We will sometimes want to check for bad blocks in a bi_end_io function,
  1159. * so we use the write_seqlock_irq variant.
  1160. *
  1161. * When looking for a bad block we specify a range and want to
  1162. * know if any block in the range is bad. So we binary-search
  1163. * to the last range that starts at-or-before the given endpoint,
  1164. * (or "before the sector after the target range")
  1165. * then see if it ends after the given start.
  1166. *
  1167. * Return:
  1168. * 0: there are no known bad blocks in the range
  1169. * 1: there are known bad block which are all acknowledged
  1170. * -1: there are bad blocks which have not yet been acknowledged in metadata.
  1171. * plus the start/length of the first bad section we overlap.
  1172. */
  1173. int badblocks_check(struct badblocks *bb, sector_t s, sector_t sectors,
  1174. sector_t *first_bad, sector_t *bad_sectors)
  1175. {
  1176. unsigned int seq;
  1177. int rv;
  1178. WARN_ON(bb->shift < 0 || sectors == 0);
  1179. if (bb->shift > 0) {
  1180. /* round the start down, and the end up */
  1181. sector_t target = s + sectors;
  1182. rounddown(s, 1 << bb->shift);
  1183. roundup(target, 1 << bb->shift);
  1184. sectors = target - s;
  1185. }
  1186. retry:
  1187. seq = read_seqbegin(&bb->lock);
  1188. rv = _badblocks_check(bb, s, sectors, first_bad, bad_sectors);
  1189. if (read_seqretry(&bb->lock, seq))
  1190. goto retry;
  1191. return rv;
  1192. }
  1193. EXPORT_SYMBOL_GPL(badblocks_check);
  1194. /**
  1195. * badblocks_set() - Add a range of bad blocks to the table.
  1196. * @bb: the badblocks structure that holds all badblock information
  1197. * @s: first sector to mark as bad
  1198. * @sectors: number of sectors to mark as bad
  1199. * @acknowledged: weather to mark the bad sectors as acknowledged
  1200. *
  1201. * This might extend the table, or might contract it if two adjacent ranges
  1202. * can be merged. We binary-search to find the 'insertion' point, then
  1203. * decide how best to handle it.
  1204. *
  1205. * Return:
  1206. * true: success
  1207. * false: failed to set badblocks (out of space). Parital setting will be
  1208. * treated as failure.
  1209. */
  1210. bool badblocks_set(struct badblocks *bb, sector_t s, sector_t sectors,
  1211. int acknowledged)
  1212. {
  1213. return _badblocks_set(bb, s, sectors, acknowledged);
  1214. }
  1215. EXPORT_SYMBOL_GPL(badblocks_set);
  1216. /**
  1217. * badblocks_clear() - Remove a range of bad blocks to the table.
  1218. * @bb: the badblocks structure that holds all badblock information
  1219. * @s: first sector to mark as bad
  1220. * @sectors: number of sectors to mark as bad
  1221. *
  1222. * This may involve extending the table if we spilt a region,
  1223. * but it must not fail. So if the table becomes full, we just
  1224. * drop the remove request.
  1225. *
  1226. * Return:
  1227. * true: success
  1228. * false: failed to clear badblocks
  1229. */
  1230. bool badblocks_clear(struct badblocks *bb, sector_t s, sector_t sectors)
  1231. {
  1232. return _badblocks_clear(bb, s, sectors);
  1233. }
  1234. EXPORT_SYMBOL_GPL(badblocks_clear);
  1235. /**
  1236. * ack_all_badblocks() - Acknowledge all bad blocks in a list.
  1237. * @bb: the badblocks structure that holds all badblock information
  1238. *
  1239. * This only succeeds if ->changed is clear. It is used by
  1240. * in-kernel metadata updates
  1241. */
  1242. void ack_all_badblocks(struct badblocks *bb)
  1243. {
  1244. if (bb->page == NULL || bb->changed)
  1245. /* no point even trying */
  1246. return;
  1247. write_seqlock_irq(&bb->lock);
  1248. if (bb->changed == 0 && bb->unacked_exist) {
  1249. u64 *p = bb->page;
  1250. int i;
  1251. for (i = 0; i < bb->count ; i++) {
  1252. if (!BB_ACK(p[i])) {
  1253. sector_t start = BB_OFFSET(p[i]);
  1254. int len = BB_LEN(p[i]);
  1255. p[i] = BB_MAKE(start, len, 1);
  1256. }
  1257. }
  1258. for (i = 0; i < bb->count ; i++)
  1259. while (try_adjacent_combine(bb, i))
  1260. ;
  1261. bb->unacked_exist = 0;
  1262. }
  1263. write_sequnlock_irq(&bb->lock);
  1264. }
  1265. EXPORT_SYMBOL_GPL(ack_all_badblocks);
  1266. /**
  1267. * badblocks_show() - sysfs access to bad-blocks list
  1268. * @bb: the badblocks structure that holds all badblock information
  1269. * @page: buffer received from sysfs
  1270. * @unack: weather to show unacknowledged badblocks
  1271. *
  1272. * Return:
  1273. * Length of returned data
  1274. */
  1275. ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
  1276. {
  1277. size_t len;
  1278. int i;
  1279. u64 *p = bb->page;
  1280. unsigned seq;
  1281. if (bb->shift < 0)
  1282. return 0;
  1283. retry:
  1284. seq = read_seqbegin(&bb->lock);
  1285. len = 0;
  1286. i = 0;
  1287. while (len < PAGE_SIZE && i < bb->count) {
  1288. sector_t s = BB_OFFSET(p[i]);
  1289. unsigned int length = BB_LEN(p[i]);
  1290. int ack = BB_ACK(p[i]);
  1291. i++;
  1292. if (unack && ack)
  1293. continue;
  1294. len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
  1295. (unsigned long long)s << bb->shift,
  1296. length << bb->shift);
  1297. }
  1298. if (unack && len == 0)
  1299. bb->unacked_exist = 0;
  1300. if (read_seqretry(&bb->lock, seq))
  1301. goto retry;
  1302. return len;
  1303. }
  1304. EXPORT_SYMBOL_GPL(badblocks_show);
  1305. /**
  1306. * badblocks_store() - sysfs access to bad-blocks list
  1307. * @bb: the badblocks structure that holds all badblock information
  1308. * @page: buffer received from sysfs
  1309. * @len: length of data received from sysfs
  1310. * @unack: weather to show unacknowledged badblocks
  1311. *
  1312. * Return:
  1313. * Length of the buffer processed or -ve error.
  1314. */
  1315. ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
  1316. int unack)
  1317. {
  1318. unsigned long long sector;
  1319. int length;
  1320. char newline;
  1321. switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
  1322. case 3:
  1323. if (newline != '\n')
  1324. return -EINVAL;
  1325. fallthrough;
  1326. case 2:
  1327. if (length <= 0)
  1328. return -EINVAL;
  1329. break;
  1330. default:
  1331. return -EINVAL;
  1332. }
  1333. if (!badblocks_set(bb, sector, length, !unack))
  1334. return -ENOSPC;
  1335. return len;
  1336. }
  1337. EXPORT_SYMBOL_GPL(badblocks_store);
  1338. static int __badblocks_init(struct device *dev, struct badblocks *bb,
  1339. int enable)
  1340. {
  1341. bb->dev = dev;
  1342. bb->count = 0;
  1343. if (enable)
  1344. bb->shift = 0;
  1345. else
  1346. bb->shift = -1;
  1347. if (dev)
  1348. bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
  1349. else
  1350. bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
  1351. if (!bb->page) {
  1352. bb->shift = -1;
  1353. return -ENOMEM;
  1354. }
  1355. seqlock_init(&bb->lock);
  1356. return 0;
  1357. }
  1358. /**
  1359. * badblocks_init() - initialize the badblocks structure
  1360. * @bb: the badblocks structure that holds all badblock information
  1361. * @enable: weather to enable badblocks accounting
  1362. *
  1363. * Return:
  1364. * 0: success
  1365. * -ve errno: on error
  1366. */
  1367. int badblocks_init(struct badblocks *bb, int enable)
  1368. {
  1369. return __badblocks_init(NULL, bb, enable);
  1370. }
  1371. EXPORT_SYMBOL_GPL(badblocks_init);
  1372. int devm_init_badblocks(struct device *dev, struct badblocks *bb)
  1373. {
  1374. if (!bb)
  1375. return -EINVAL;
  1376. return __badblocks_init(dev, bb, 1);
  1377. }
  1378. EXPORT_SYMBOL_GPL(devm_init_badblocks);
  1379. /**
  1380. * badblocks_exit() - free the badblocks structure
  1381. * @bb: the badblocks structure that holds all badblock information
  1382. */
  1383. void badblocks_exit(struct badblocks *bb)
  1384. {
  1385. if (!bb)
  1386. return;
  1387. if (bb->dev)
  1388. devm_kfree(bb->dev, bb->page);
  1389. else
  1390. kfree(bb->page);
  1391. bb->page = NULL;
  1392. }
  1393. EXPORT_SYMBOL_GPL(badblocks_exit);