jfs_xtree.c 68 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (C) International Business Machines Corp., 2000-2005
  4. */
  5. /*
  6. * jfs_xtree.c: extent allocation descriptor B+-tree manager
  7. */
  8. #include <linux/fs.h>
  9. #include <linux/module.h>
  10. #include <linux/quotaops.h>
  11. #include <linux/seq_file.h>
  12. #include "jfs_incore.h"
  13. #include "jfs_filsys.h"
  14. #include "jfs_metapage.h"
  15. #include "jfs_dmap.h"
  16. #include "jfs_dinode.h"
  17. #include "jfs_superblock.h"
  18. #include "jfs_debug.h"
  19. /*
  20. * xtree local flag
  21. */
  22. #define XT_INSERT 0x00000001
  23. /*
  24. * xtree key/entry comparison: extent offset
  25. *
  26. * return:
  27. * -1: k < start of extent
  28. * 0: start_of_extent <= k <= end_of_extent
  29. * 1: k > end_of_extent
  30. */
  31. #define XT_CMP(CMP, K, X, OFFSET64)\
  32. {\
  33. OFFSET64 = offsetXAD(X);\
  34. (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
  35. ((K) < OFFSET64) ? -1 : 0;\
  36. }
  37. /* write a xad entry */
  38. #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
  39. {\
  40. (XAD)->flag = (FLAG);\
  41. XADoffset((XAD), (OFF));\
  42. XADlength((XAD), (LEN));\
  43. XADaddress((XAD), (ADDR));\
  44. }
  45. #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
  46. /* for consistency */
  47. #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
  48. #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
  49. BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
  50. /* xtree entry parameter descriptor */
  51. struct xtsplit {
  52. struct metapage *mp;
  53. s16 index;
  54. u8 flag;
  55. s64 off;
  56. s64 addr;
  57. int len;
  58. struct pxdlist *pxdlist;
  59. };
  60. /*
  61. * statistics
  62. */
  63. #ifdef CONFIG_JFS_STATISTICS
  64. static struct {
  65. uint search;
  66. uint fastSearch;
  67. uint split;
  68. } xtStat;
  69. #endif
  70. /*
  71. * forward references
  72. */
  73. static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
  74. struct btstack * btstack, int flag);
  75. static int xtSplitUp(tid_t tid,
  76. struct inode *ip,
  77. struct xtsplit * split, struct btstack * btstack);
  78. static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
  79. struct metapage ** rmpp, s64 * rbnp);
  80. static int xtSplitRoot(tid_t tid, struct inode *ip,
  81. struct xtsplit * split, struct metapage ** rmpp);
  82. /*
  83. * xt_getpage()
  84. *
  85. * function: get the page buffer for a specified block address.
  86. *
  87. * parameters:
  88. * ip - pointer to the inode
  89. * bn - block number (s64) of the xtree page to be retrieved;
  90. * mp - pointer to a metapage pointer where the page buffer is returned;
  91. *
  92. * returns:
  93. * A pointer to the xtree page (xtpage_t) on success, -EIO on error.
  94. */
  95. static inline xtpage_t *xt_getpage(struct inode *ip, s64 bn, struct metapage **mp)
  96. {
  97. xtpage_t *p;
  98. int rc;
  99. BT_GETPAGE(ip, bn, *mp, xtpage_t, PSIZE, p, rc, i_xtroot);
  100. if (rc)
  101. return ERR_PTR(rc);
  102. if ((le16_to_cpu(p->header.nextindex) < XTENTRYSTART) ||
  103. (le16_to_cpu(p->header.nextindex) >
  104. le16_to_cpu(p->header.maxentry)) ||
  105. (le16_to_cpu(p->header.maxentry) >
  106. ((bn == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) {
  107. jfs_error(ip->i_sb, "xt_getpage: xtree page corrupt\n");
  108. BT_PUTPAGE(*mp);
  109. *mp = NULL;
  110. return ERR_PTR(-EIO);
  111. }
  112. return p;
  113. }
  114. /*
  115. * xtLookup()
  116. *
  117. * function: map a single page into a physical extent;
  118. */
  119. int xtLookup(struct inode *ip, s64 lstart,
  120. s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
  121. {
  122. int rc = 0;
  123. struct btstack btstack;
  124. int cmp;
  125. s64 bn;
  126. struct metapage *mp;
  127. xtpage_t *p;
  128. int index;
  129. xad_t *xad;
  130. s64 next, size, xoff, xend;
  131. int xlen;
  132. s64 xaddr;
  133. *paddr = 0;
  134. *plen = llen;
  135. if (!no_check) {
  136. /* is lookup offset beyond eof ? */
  137. size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
  138. JFS_SBI(ip->i_sb)->l2bsize;
  139. if (lstart >= size)
  140. return 0;
  141. }
  142. /*
  143. * search for the xad entry covering the logical extent
  144. */
  145. //search:
  146. if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
  147. jfs_err("xtLookup: xtSearch returned %d", rc);
  148. return rc;
  149. }
  150. /*
  151. * compute the physical extent covering logical extent
  152. *
  153. * N.B. search may have failed (e.g., hole in sparse file),
  154. * and returned the index of the next entry.
  155. */
  156. /* retrieve search result */
  157. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  158. /* is xad found covering start of logical extent ?
  159. * lstart is a page start address,
  160. * i.e., lstart cannot start in a hole;
  161. */
  162. if (cmp) {
  163. if (next)
  164. *plen = min(next - lstart, llen);
  165. goto out;
  166. }
  167. /*
  168. * lxd covered by xad
  169. */
  170. xad = &p->xad[index];
  171. xoff = offsetXAD(xad);
  172. xlen = lengthXAD(xad);
  173. xend = xoff + xlen;
  174. xaddr = addressXAD(xad);
  175. /* initialize new pxd */
  176. *pflag = xad->flag;
  177. *paddr = xaddr + (lstart - xoff);
  178. /* a page must be fully covered by an xad */
  179. *plen = min(xend - lstart, llen);
  180. out:
  181. XT_PUTPAGE(mp);
  182. return rc;
  183. }
  184. /*
  185. * xtSearch()
  186. *
  187. * function: search for the xad entry covering specified offset.
  188. *
  189. * parameters:
  190. * ip - file object;
  191. * xoff - extent offset;
  192. * nextp - address of next extent (if any) for search miss
  193. * cmpp - comparison result:
  194. * btstack - traverse stack;
  195. * flag - search process flag (XT_INSERT);
  196. *
  197. * returns:
  198. * btstack contains (bn, index) of search path traversed to the entry.
  199. * *cmpp is set to result of comparison with the entry returned.
  200. * the page containing the entry is pinned at exit.
  201. */
  202. static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
  203. int *cmpp, struct btstack * btstack, int flag)
  204. {
  205. struct jfs_inode_info *jfs_ip = JFS_IP(ip);
  206. int cmp = 1; /* init for empty page */
  207. s64 bn; /* block number */
  208. struct metapage *mp; /* page buffer */
  209. xtpage_t *p; /* page */
  210. xad_t *xad;
  211. int base, index, lim, btindex;
  212. struct btframe *btsp;
  213. int nsplit = 0; /* number of pages to split */
  214. s64 t64;
  215. s64 next = 0;
  216. INCREMENT(xtStat.search);
  217. BT_CLR(btstack);
  218. btstack->nsplit = 0;
  219. /*
  220. * search down tree from root:
  221. *
  222. * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
  223. * internal page, child page Pi contains entry with k, Ki <= K < Kj.
  224. *
  225. * if entry with search key K is not found
  226. * internal page search find the entry with largest key Ki
  227. * less than K which point to the child page to search;
  228. * leaf page search find the entry with smallest key Kj
  229. * greater than K so that the returned index is the position of
  230. * the entry to be shifted right for insertion of new entry.
  231. * for empty tree, search key is greater than any key of the tree.
  232. *
  233. * by convention, root bn = 0.
  234. */
  235. for (bn = 0;;) {
  236. /* get/pin the page to search */
  237. p = xt_getpage(ip, bn, &mp);
  238. if (IS_ERR(p))
  239. return PTR_ERR(p);
  240. /* try sequential access heuristics with the previous
  241. * access entry in target leaf page:
  242. * once search narrowed down into the target leaf,
  243. * key must either match an entry in the leaf or
  244. * key entry does not exist in the tree;
  245. */
  246. //fastSearch:
  247. if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
  248. (p->header.flag & BT_LEAF) &&
  249. (index = jfs_ip->btindex) <
  250. le16_to_cpu(p->header.nextindex)) {
  251. xad = &p->xad[index];
  252. t64 = offsetXAD(xad);
  253. if (xoff < t64 + lengthXAD(xad)) {
  254. if (xoff >= t64) {
  255. *cmpp = 0;
  256. goto out;
  257. }
  258. /* stop sequential access heuristics */
  259. goto binarySearch;
  260. } else { /* (t64 + lengthXAD(xad)) <= xoff */
  261. /* try next sequential entry */
  262. index++;
  263. if (index <
  264. le16_to_cpu(p->header.nextindex)) {
  265. xad++;
  266. t64 = offsetXAD(xad);
  267. if (xoff < t64 + lengthXAD(xad)) {
  268. if (xoff >= t64) {
  269. *cmpp = 0;
  270. goto out;
  271. }
  272. /* miss: key falls between
  273. * previous and this entry
  274. */
  275. *cmpp = 1;
  276. next = t64;
  277. goto out;
  278. }
  279. /* (xoff >= t64 + lengthXAD(xad));
  280. * matching entry may be further out:
  281. * stop heuristic search
  282. */
  283. /* stop sequential access heuristics */
  284. goto binarySearch;
  285. }
  286. /* (index == p->header.nextindex);
  287. * miss: key entry does not exist in
  288. * the target leaf/tree
  289. */
  290. *cmpp = 1;
  291. goto out;
  292. }
  293. /*
  294. * if hit, return index of the entry found, and
  295. * if miss, where new entry with search key is
  296. * to be inserted;
  297. */
  298. out:
  299. /* compute number of pages to split */
  300. if (flag & XT_INSERT) {
  301. if (p->header.nextindex == /* little-endian */
  302. p->header.maxentry)
  303. nsplit++;
  304. else
  305. nsplit = 0;
  306. btstack->nsplit = nsplit;
  307. }
  308. /* save search result */
  309. btsp = btstack->top;
  310. btsp->bn = bn;
  311. btsp->index = index;
  312. btsp->mp = mp;
  313. /* update sequential access heuristics */
  314. jfs_ip->btindex = index;
  315. if (nextp)
  316. *nextp = next;
  317. INCREMENT(xtStat.fastSearch);
  318. return 0;
  319. }
  320. /* well, ... full search now */
  321. binarySearch:
  322. lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
  323. /*
  324. * binary search with search key K on the current page
  325. */
  326. for (base = XTENTRYSTART; lim; lim >>= 1) {
  327. index = base + (lim >> 1);
  328. XT_CMP(cmp, xoff, &p->xad[index], t64);
  329. if (cmp == 0) {
  330. /*
  331. * search hit
  332. */
  333. /* search hit - leaf page:
  334. * return the entry found
  335. */
  336. if (p->header.flag & BT_LEAF) {
  337. *cmpp = cmp;
  338. /* compute number of pages to split */
  339. if (flag & XT_INSERT) {
  340. if (p->header.nextindex ==
  341. p->header.maxentry)
  342. nsplit++;
  343. else
  344. nsplit = 0;
  345. btstack->nsplit = nsplit;
  346. }
  347. /* save search result */
  348. btsp = btstack->top;
  349. btsp->bn = bn;
  350. btsp->index = index;
  351. btsp->mp = mp;
  352. /* init sequential access heuristics */
  353. btindex = jfs_ip->btindex;
  354. if (index == btindex ||
  355. index == btindex + 1)
  356. jfs_ip->btorder = BT_SEQUENTIAL;
  357. else
  358. jfs_ip->btorder = BT_RANDOM;
  359. jfs_ip->btindex = index;
  360. return 0;
  361. }
  362. /* search hit - internal page:
  363. * descend/search its child page
  364. */
  365. if (index < le16_to_cpu(p->header.nextindex)-1)
  366. next = offsetXAD(&p->xad[index + 1]);
  367. goto next;
  368. }
  369. if (cmp > 0) {
  370. base = index + 1;
  371. --lim;
  372. }
  373. }
  374. /*
  375. * search miss
  376. *
  377. * base is the smallest index with key (Kj) greater than
  378. * search key (K) and may be zero or maxentry index.
  379. */
  380. if (base < le16_to_cpu(p->header.nextindex))
  381. next = offsetXAD(&p->xad[base]);
  382. /*
  383. * search miss - leaf page:
  384. *
  385. * return location of entry (base) where new entry with
  386. * search key K is to be inserted.
  387. */
  388. if (p->header.flag & BT_LEAF) {
  389. *cmpp = cmp;
  390. /* compute number of pages to split */
  391. if (flag & XT_INSERT) {
  392. if (p->header.nextindex ==
  393. p->header.maxentry)
  394. nsplit++;
  395. else
  396. nsplit = 0;
  397. btstack->nsplit = nsplit;
  398. }
  399. /* save search result */
  400. btsp = btstack->top;
  401. btsp->bn = bn;
  402. btsp->index = base;
  403. btsp->mp = mp;
  404. /* init sequential access heuristics */
  405. btindex = jfs_ip->btindex;
  406. if (base == btindex || base == btindex + 1)
  407. jfs_ip->btorder = BT_SEQUENTIAL;
  408. else
  409. jfs_ip->btorder = BT_RANDOM;
  410. jfs_ip->btindex = base;
  411. if (nextp)
  412. *nextp = next;
  413. return 0;
  414. }
  415. /*
  416. * search miss - non-leaf page:
  417. *
  418. * if base is non-zero, decrement base by one to get the parent
  419. * entry of the child page to search.
  420. */
  421. index = base ? base - 1 : base;
  422. /*
  423. * go down to child page
  424. */
  425. next:
  426. /* update number of pages to split */
  427. if (p->header.nextindex == p->header.maxentry)
  428. nsplit++;
  429. else
  430. nsplit = 0;
  431. /* push (bn, index) of the parent page/entry */
  432. if (BT_STACK_FULL(btstack)) {
  433. jfs_error(ip->i_sb, "stack overrun!\n");
  434. XT_PUTPAGE(mp);
  435. return -EIO;
  436. }
  437. BT_PUSH(btstack, bn, index);
  438. /* get the child page block number */
  439. bn = addressXAD(&p->xad[index]);
  440. /* unpin the parent page */
  441. XT_PUTPAGE(mp);
  442. }
  443. }
  444. /*
  445. * xtInsert()
  446. *
  447. * function:
  448. *
  449. * parameter:
  450. * tid - transaction id;
  451. * ip - file object;
  452. * xflag - extent flag (XAD_NOTRECORDED):
  453. * xoff - extent offset;
  454. * xlen - extent length;
  455. * xaddrp - extent address pointer (in/out):
  456. * if (*xaddrp)
  457. * caller allocated data extent at *xaddrp;
  458. * else
  459. * allocate data extent and return its xaddr;
  460. * flag -
  461. *
  462. * return:
  463. */
  464. int xtInsert(tid_t tid, /* transaction id */
  465. struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
  466. int flag)
  467. {
  468. int rc = 0;
  469. s64 xaddr, hint;
  470. struct metapage *mp; /* meta-page buffer */
  471. xtpage_t *p; /* base B+-tree index page */
  472. s64 bn;
  473. int index, nextindex;
  474. struct btstack btstack; /* traverse stack */
  475. struct xtsplit split; /* split information */
  476. xad_t *xad;
  477. int cmp;
  478. s64 next;
  479. struct tlock *tlck;
  480. struct xtlock *xtlck;
  481. jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
  482. /*
  483. * search for the entry location at which to insert:
  484. *
  485. * xtFastSearch() and xtSearch() both returns (leaf page
  486. * pinned, index at which to insert).
  487. * n.b. xtSearch() may return index of maxentry of
  488. * the full page.
  489. */
  490. if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
  491. return rc;
  492. /* retrieve search result */
  493. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  494. /* This test must follow XT_GETSEARCH since mp must be valid if
  495. * we branch to out: */
  496. if ((cmp == 0) || (next && (xlen > next - xoff))) {
  497. rc = -EEXIST;
  498. goto out;
  499. }
  500. /*
  501. * allocate data extent requested
  502. *
  503. * allocation hint: last xad
  504. */
  505. if ((xaddr = *xaddrp) == 0) {
  506. if (index > XTENTRYSTART) {
  507. xad = &p->xad[index - 1];
  508. hint = addressXAD(xad) + lengthXAD(xad) - 1;
  509. } else
  510. hint = 0;
  511. if ((rc = dquot_alloc_block(ip, xlen)))
  512. goto out;
  513. if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
  514. dquot_free_block(ip, xlen);
  515. goto out;
  516. }
  517. }
  518. /*
  519. * insert entry for new extent
  520. */
  521. xflag |= XAD_NEW;
  522. /*
  523. * if the leaf page is full, split the page and
  524. * propagate up the router entry for the new page from split
  525. *
  526. * The xtSplitUp() will insert the entry and unpin the leaf page.
  527. */
  528. nextindex = le16_to_cpu(p->header.nextindex);
  529. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  530. split.mp = mp;
  531. split.index = index;
  532. split.flag = xflag;
  533. split.off = xoff;
  534. split.len = xlen;
  535. split.addr = xaddr;
  536. split.pxdlist = NULL;
  537. if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
  538. /* undo data extent allocation */
  539. if (*xaddrp == 0) {
  540. dbFree(ip, xaddr, (s64) xlen);
  541. dquot_free_block(ip, xlen);
  542. }
  543. return rc;
  544. }
  545. *xaddrp = xaddr;
  546. return 0;
  547. }
  548. /*
  549. * insert the new entry into the leaf page
  550. */
  551. /*
  552. * acquire a transaction lock on the leaf page;
  553. *
  554. * action: xad insertion/extension;
  555. */
  556. BT_MARK_DIRTY(mp, ip);
  557. /* if insert into middle, shift right remaining entries. */
  558. if (index < nextindex)
  559. memmove(&p->xad[index + 1], &p->xad[index],
  560. (nextindex - index) * sizeof(xad_t));
  561. /* insert the new entry: mark the entry NEW */
  562. xad = &p->xad[index];
  563. XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
  564. /* advance next available entry index */
  565. le16_add_cpu(&p->header.nextindex, 1);
  566. /* Don't log it if there are no links to the file */
  567. if (!test_cflag(COMMIT_Nolink, ip)) {
  568. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  569. xtlck = (struct xtlock *) & tlck->lock;
  570. xtlck->lwm.offset =
  571. (xtlck->lwm.offset) ? min(index,
  572. (int)xtlck->lwm.offset) : index;
  573. xtlck->lwm.length =
  574. le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
  575. }
  576. *xaddrp = xaddr;
  577. out:
  578. /* unpin the leaf page */
  579. XT_PUTPAGE(mp);
  580. return rc;
  581. }
  582. /*
  583. * xtSplitUp()
  584. *
  585. * function:
  586. * split full pages as propagating insertion up the tree
  587. *
  588. * parameter:
  589. * tid - transaction id;
  590. * ip - file object;
  591. * split - entry parameter descriptor;
  592. * btstack - traverse stack from xtSearch()
  593. *
  594. * return:
  595. */
  596. static int
  597. xtSplitUp(tid_t tid,
  598. struct inode *ip, struct xtsplit * split, struct btstack * btstack)
  599. {
  600. int rc = 0;
  601. struct metapage *smp;
  602. xtpage_t *sp; /* split page */
  603. struct metapage *rmp;
  604. s64 rbn; /* new right page block number */
  605. struct metapage *rcmp;
  606. xtpage_t *rcp; /* right child page */
  607. s64 rcbn; /* right child page block number */
  608. int skip; /* index of entry of insertion */
  609. int nextindex; /* next available entry index of p */
  610. struct btframe *parent; /* parent page entry on traverse stack */
  611. xad_t *xad;
  612. s64 xaddr;
  613. int xlen;
  614. int nsplit; /* number of pages split */
  615. struct pxdlist pxdlist;
  616. pxd_t *pxd;
  617. struct tlock *tlck;
  618. struct xtlock *xtlck;
  619. smp = split->mp;
  620. sp = XT_PAGE(ip, smp);
  621. /* is inode xtree root extension/inline EA area free ? */
  622. if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
  623. (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
  624. (JFS_IP(ip)->mode2 & INLINEEA)) {
  625. sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
  626. JFS_IP(ip)->mode2 &= ~INLINEEA;
  627. BT_MARK_DIRTY(smp, ip);
  628. /*
  629. * acquire a transaction lock on the leaf page;
  630. *
  631. * action: xad insertion/extension;
  632. */
  633. /* if insert into middle, shift right remaining entries. */
  634. skip = split->index;
  635. nextindex = le16_to_cpu(sp->header.nextindex);
  636. if (skip < nextindex)
  637. memmove(&sp->xad[skip + 1], &sp->xad[skip],
  638. (nextindex - skip) * sizeof(xad_t));
  639. /* insert the new entry: mark the entry NEW */
  640. xad = &sp->xad[skip];
  641. XT_PUTENTRY(xad, split->flag, split->off, split->len,
  642. split->addr);
  643. /* advance next available entry index */
  644. le16_add_cpu(&sp->header.nextindex, 1);
  645. /* Don't log it if there are no links to the file */
  646. if (!test_cflag(COMMIT_Nolink, ip)) {
  647. tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
  648. xtlck = (struct xtlock *) & tlck->lock;
  649. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  650. min(skip, (int)xtlck->lwm.offset) : skip;
  651. xtlck->lwm.length =
  652. le16_to_cpu(sp->header.nextindex) -
  653. xtlck->lwm.offset;
  654. }
  655. return 0;
  656. }
  657. /*
  658. * allocate new index blocks to cover index page split(s)
  659. *
  660. * allocation hint: ?
  661. */
  662. if (split->pxdlist == NULL) {
  663. nsplit = btstack->nsplit;
  664. split->pxdlist = &pxdlist;
  665. pxdlist.maxnpxd = pxdlist.npxd = 0;
  666. pxd = &pxdlist.pxd[0];
  667. xlen = JFS_SBI(ip->i_sb)->nbperpage;
  668. for (; nsplit > 0; nsplit--, pxd++) {
  669. if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
  670. == 0) {
  671. PXDaddress(pxd, xaddr);
  672. PXDlength(pxd, xlen);
  673. pxdlist.maxnpxd++;
  674. continue;
  675. }
  676. /* undo allocation */
  677. XT_PUTPAGE(smp);
  678. return rc;
  679. }
  680. }
  681. /*
  682. * Split leaf page <sp> into <sp> and a new right page <rp>.
  683. *
  684. * The split routines insert the new entry into the leaf page,
  685. * and acquire txLock as appropriate.
  686. * return <rp> pinned and its block number <rpbn>.
  687. */
  688. rc = (sp->header.flag & BT_ROOT) ?
  689. xtSplitRoot(tid, ip, split, &rmp) :
  690. xtSplitPage(tid, ip, split, &rmp, &rbn);
  691. XT_PUTPAGE(smp);
  692. if (rc)
  693. return -EIO;
  694. /*
  695. * propagate up the router entry for the leaf page just split
  696. *
  697. * insert a router entry for the new page into the parent page,
  698. * propagate the insert/split up the tree by walking back the stack
  699. * of (bn of parent page, index of child page entry in parent page)
  700. * that were traversed during the search for the page that split.
  701. *
  702. * the propagation of insert/split up the tree stops if the root
  703. * splits or the page inserted into doesn't have to split to hold
  704. * the new entry.
  705. *
  706. * the parent entry for the split page remains the same, and
  707. * a new entry is inserted at its right with the first key and
  708. * block number of the new right page.
  709. *
  710. * There are a maximum of 3 pages pinned at any time:
  711. * right child, left parent and right parent (when the parent splits)
  712. * to keep the child page pinned while working on the parent.
  713. * make sure that all pins are released at exit.
  714. */
  715. while ((parent = BT_POP(btstack)) != NULL) {
  716. /* parent page specified by stack frame <parent> */
  717. /* keep current child pages <rcp> pinned */
  718. rcmp = rmp;
  719. rcbn = rbn;
  720. rcp = XT_PAGE(ip, rcmp);
  721. /*
  722. * insert router entry in parent for new right child page <rp>
  723. */
  724. /* get/pin the parent page <sp> */
  725. sp = xt_getpage(ip, parent->bn, &smp);
  726. if (IS_ERR(sp)) {
  727. XT_PUTPAGE(rcmp);
  728. return PTR_ERR(sp);
  729. }
  730. /*
  731. * The new key entry goes ONE AFTER the index of parent entry,
  732. * because the split was to the right.
  733. */
  734. skip = parent->index + 1;
  735. /*
  736. * split or shift right remaining entries of the parent page
  737. */
  738. nextindex = le16_to_cpu(sp->header.nextindex);
  739. /*
  740. * parent page is full - split the parent page
  741. */
  742. if (nextindex == le16_to_cpu(sp->header.maxentry)) {
  743. /* init for parent page split */
  744. split->mp = smp;
  745. split->index = skip; /* index at insert */
  746. split->flag = XAD_NEW;
  747. split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
  748. split->len = JFS_SBI(ip->i_sb)->nbperpage;
  749. split->addr = rcbn;
  750. /* unpin previous right child page */
  751. XT_PUTPAGE(rcmp);
  752. /* The split routines insert the new entry,
  753. * and acquire txLock as appropriate.
  754. * return <rp> pinned and its block number <rpbn>.
  755. */
  756. rc = (sp->header.flag & BT_ROOT) ?
  757. xtSplitRoot(tid, ip, split, &rmp) :
  758. xtSplitPage(tid, ip, split, &rmp, &rbn);
  759. if (rc) {
  760. XT_PUTPAGE(smp);
  761. return rc;
  762. }
  763. XT_PUTPAGE(smp);
  764. /* keep new child page <rp> pinned */
  765. }
  766. /*
  767. * parent page is not full - insert in parent page
  768. */
  769. else {
  770. /*
  771. * insert router entry in parent for the right child
  772. * page from the first entry of the right child page:
  773. */
  774. /*
  775. * acquire a transaction lock on the parent page;
  776. *
  777. * action: router xad insertion;
  778. */
  779. BT_MARK_DIRTY(smp, ip);
  780. /*
  781. * if insert into middle, shift right remaining entries
  782. */
  783. if (skip < nextindex)
  784. memmove(&sp->xad[skip + 1], &sp->xad[skip],
  785. (nextindex -
  786. skip) << L2XTSLOTSIZE);
  787. /* insert the router entry */
  788. xad = &sp->xad[skip];
  789. XT_PUTENTRY(xad, XAD_NEW,
  790. offsetXAD(&rcp->xad[XTENTRYSTART]),
  791. JFS_SBI(ip->i_sb)->nbperpage, rcbn);
  792. /* advance next available entry index. */
  793. le16_add_cpu(&sp->header.nextindex, 1);
  794. /* Don't log it if there are no links to the file */
  795. if (!test_cflag(COMMIT_Nolink, ip)) {
  796. tlck = txLock(tid, ip, smp,
  797. tlckXTREE | tlckGROW);
  798. xtlck = (struct xtlock *) & tlck->lock;
  799. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  800. min(skip, (int)xtlck->lwm.offset) : skip;
  801. xtlck->lwm.length =
  802. le16_to_cpu(sp->header.nextindex) -
  803. xtlck->lwm.offset;
  804. }
  805. /* unpin parent page */
  806. XT_PUTPAGE(smp);
  807. /* exit propagate up */
  808. break;
  809. }
  810. }
  811. /* unpin current right page */
  812. XT_PUTPAGE(rmp);
  813. return 0;
  814. }
  815. /*
  816. * xtSplitPage()
  817. *
  818. * function:
  819. * split a full non-root page into
  820. * original/split/left page and new right page
  821. * i.e., the original/split page remains as left page.
  822. *
  823. * parameter:
  824. * int tid,
  825. * struct inode *ip,
  826. * struct xtsplit *split,
  827. * struct metapage **rmpp,
  828. * u64 *rbnp,
  829. *
  830. * return:
  831. * Pointer to page in which to insert or NULL on error.
  832. */
  833. static int
  834. xtSplitPage(tid_t tid, struct inode *ip,
  835. struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
  836. {
  837. int rc = 0;
  838. struct metapage *smp;
  839. xtpage_t *sp;
  840. struct metapage *rmp;
  841. xtpage_t *rp; /* new right page allocated */
  842. s64 rbn; /* new right page block number */
  843. struct metapage *mp;
  844. xtpage_t *p;
  845. s64 nextbn;
  846. int skip, maxentry, middle, righthalf, n;
  847. xad_t *xad;
  848. struct pxdlist *pxdlist;
  849. pxd_t *pxd;
  850. struct tlock *tlck;
  851. struct xtlock *sxtlck = NULL, *rxtlck = NULL;
  852. int quota_allocation = 0;
  853. smp = split->mp;
  854. sp = XT_PAGE(ip, smp);
  855. INCREMENT(xtStat.split);
  856. pxdlist = split->pxdlist;
  857. pxd = &pxdlist->pxd[pxdlist->npxd];
  858. pxdlist->npxd++;
  859. rbn = addressPXD(pxd);
  860. /* Allocate blocks to quota. */
  861. rc = dquot_alloc_block(ip, lengthPXD(pxd));
  862. if (rc)
  863. goto clean_up;
  864. quota_allocation += lengthPXD(pxd);
  865. /*
  866. * allocate the new right page for the split
  867. */
  868. rmp = get_metapage(ip, rbn, PSIZE, 1);
  869. if (rmp == NULL) {
  870. rc = -EIO;
  871. goto clean_up;
  872. }
  873. jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
  874. BT_MARK_DIRTY(rmp, ip);
  875. /*
  876. * action: new page;
  877. */
  878. rp = (xtpage_t *) rmp->data;
  879. rp->header.self = *pxd;
  880. rp->header.flag = sp->header.flag & BT_TYPE;
  881. rp->header.maxentry = sp->header.maxentry; /* little-endian */
  882. rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
  883. BT_MARK_DIRTY(smp, ip);
  884. /* Don't log it if there are no links to the file */
  885. if (!test_cflag(COMMIT_Nolink, ip)) {
  886. /*
  887. * acquire a transaction lock on the new right page;
  888. */
  889. tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
  890. rxtlck = (struct xtlock *) & tlck->lock;
  891. rxtlck->lwm.offset = XTENTRYSTART;
  892. /*
  893. * acquire a transaction lock on the split page
  894. */
  895. tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
  896. sxtlck = (struct xtlock *) & tlck->lock;
  897. }
  898. /*
  899. * initialize/update sibling pointers of <sp> and <rp>
  900. */
  901. nextbn = le64_to_cpu(sp->header.next);
  902. rp->header.next = cpu_to_le64(nextbn);
  903. rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
  904. sp->header.next = cpu_to_le64(rbn);
  905. skip = split->index;
  906. /*
  907. * sequential append at tail (after last entry of last page)
  908. *
  909. * if splitting the last page on a level because of appending
  910. * a entry to it (skip is maxentry), it's likely that the access is
  911. * sequential. adding an empty page on the side of the level is less
  912. * work and can push the fill factor much higher than normal.
  913. * if we're wrong it's no big deal - we will do the split the right
  914. * way next time.
  915. * (it may look like it's equally easy to do a similar hack for
  916. * reverse sorted data, that is, split the tree left, but it's not.
  917. * Be my guest.)
  918. */
  919. if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
  920. /*
  921. * acquire a transaction lock on the new/right page;
  922. *
  923. * action: xad insertion;
  924. */
  925. /* insert entry at the first entry of the new right page */
  926. xad = &rp->xad[XTENTRYSTART];
  927. XT_PUTENTRY(xad, split->flag, split->off, split->len,
  928. split->addr);
  929. rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
  930. if (!test_cflag(COMMIT_Nolink, ip)) {
  931. /* rxtlck->lwm.offset = XTENTRYSTART; */
  932. rxtlck->lwm.length = 1;
  933. }
  934. *rmpp = rmp;
  935. *rbnp = rbn;
  936. jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
  937. return 0;
  938. }
  939. /*
  940. * non-sequential insert (at possibly middle page)
  941. */
  942. /*
  943. * update previous pointer of old next/right page of <sp>
  944. */
  945. if (nextbn != 0) {
  946. p = xt_getpage(ip, nextbn, &mp);
  947. if (IS_ERR(p)) {
  948. XT_PUTPAGE(rmp);
  949. return PTR_ERR(p);
  950. }
  951. BT_MARK_DIRTY(mp, ip);
  952. /*
  953. * acquire a transaction lock on the next page;
  954. *
  955. * action:sibling pointer update;
  956. */
  957. if (!test_cflag(COMMIT_Nolink, ip))
  958. tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
  959. p->header.prev = cpu_to_le64(rbn);
  960. /* sibling page may have been updated previously, or
  961. * it may be updated later;
  962. */
  963. XT_PUTPAGE(mp);
  964. }
  965. /*
  966. * split the data between the split and new/right pages
  967. */
  968. maxentry = le16_to_cpu(sp->header.maxentry);
  969. middle = maxentry >> 1;
  970. righthalf = maxentry - middle;
  971. /*
  972. * skip index in old split/left page - insert into left page:
  973. */
  974. if (skip <= middle) {
  975. /* move right half of split page to the new right page */
  976. memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
  977. righthalf << L2XTSLOTSIZE);
  978. /* shift right tail of left half to make room for new entry */
  979. if (skip < middle)
  980. memmove(&sp->xad[skip + 1], &sp->xad[skip],
  981. (middle - skip) << L2XTSLOTSIZE);
  982. /* insert new entry */
  983. xad = &sp->xad[skip];
  984. XT_PUTENTRY(xad, split->flag, split->off, split->len,
  985. split->addr);
  986. /* update page header */
  987. sp->header.nextindex = cpu_to_le16(middle + 1);
  988. if (!test_cflag(COMMIT_Nolink, ip)) {
  989. sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
  990. min(skip, (int)sxtlck->lwm.offset) : skip;
  991. }
  992. rp->header.nextindex =
  993. cpu_to_le16(XTENTRYSTART + righthalf);
  994. }
  995. /*
  996. * skip index in new right page - insert into right page:
  997. */
  998. else {
  999. /* move left head of right half to right page */
  1000. n = skip - middle;
  1001. memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
  1002. n << L2XTSLOTSIZE);
  1003. /* insert new entry */
  1004. n += XTENTRYSTART;
  1005. xad = &rp->xad[n];
  1006. XT_PUTENTRY(xad, split->flag, split->off, split->len,
  1007. split->addr);
  1008. /* move right tail of right half to right page */
  1009. if (skip < maxentry)
  1010. memmove(&rp->xad[n + 1], &sp->xad[skip],
  1011. (maxentry - skip) << L2XTSLOTSIZE);
  1012. /* update page header */
  1013. sp->header.nextindex = cpu_to_le16(middle);
  1014. if (!test_cflag(COMMIT_Nolink, ip)) {
  1015. sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
  1016. min(middle, (int)sxtlck->lwm.offset) : middle;
  1017. }
  1018. rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
  1019. righthalf + 1);
  1020. }
  1021. if (!test_cflag(COMMIT_Nolink, ip)) {
  1022. sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
  1023. sxtlck->lwm.offset;
  1024. /* rxtlck->lwm.offset = XTENTRYSTART; */
  1025. rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
  1026. XTENTRYSTART;
  1027. }
  1028. *rmpp = rmp;
  1029. *rbnp = rbn;
  1030. jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
  1031. return rc;
  1032. clean_up:
  1033. /* Rollback quota allocation. */
  1034. if (quota_allocation)
  1035. dquot_free_block(ip, quota_allocation);
  1036. return (rc);
  1037. }
  1038. /*
  1039. * xtSplitRoot()
  1040. *
  1041. * function:
  1042. * split the full root page into original/root/split page and new
  1043. * right page
  1044. * i.e., root remains fixed in tree anchor (inode) and the root is
  1045. * copied to a single new right child page since root page <<
  1046. * non-root page, and the split root page contains a single entry
  1047. * for the new right child page.
  1048. *
  1049. * parameter:
  1050. * int tid,
  1051. * struct inode *ip,
  1052. * struct xtsplit *split,
  1053. * struct metapage **rmpp)
  1054. *
  1055. * return:
  1056. * Pointer to page in which to insert or NULL on error.
  1057. */
  1058. static int
  1059. xtSplitRoot(tid_t tid,
  1060. struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
  1061. {
  1062. xtpage_t *sp;
  1063. struct metapage *rmp;
  1064. xtpage_t *rp;
  1065. s64 rbn;
  1066. int skip, nextindex;
  1067. xad_t *xad;
  1068. pxd_t *pxd;
  1069. struct pxdlist *pxdlist;
  1070. struct tlock *tlck;
  1071. struct xtlock *xtlck;
  1072. int rc;
  1073. sp = (xtpage_t *) &JFS_IP(ip)->i_xtroot;
  1074. INCREMENT(xtStat.split);
  1075. /*
  1076. * allocate a single (right) child page
  1077. */
  1078. pxdlist = split->pxdlist;
  1079. pxd = &pxdlist->pxd[pxdlist->npxd];
  1080. pxdlist->npxd++;
  1081. rbn = addressPXD(pxd);
  1082. rmp = get_metapage(ip, rbn, PSIZE, 1);
  1083. if (rmp == NULL)
  1084. return -EIO;
  1085. /* Allocate blocks to quota. */
  1086. rc = dquot_alloc_block(ip, lengthPXD(pxd));
  1087. if (rc) {
  1088. release_metapage(rmp);
  1089. return rc;
  1090. }
  1091. jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
  1092. /*
  1093. * acquire a transaction lock on the new right page;
  1094. *
  1095. * action: new page;
  1096. */
  1097. BT_MARK_DIRTY(rmp, ip);
  1098. rp = (xtpage_t *) rmp->data;
  1099. rp->header.flag =
  1100. (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
  1101. rp->header.self = *pxd;
  1102. rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
  1103. rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
  1104. /* initialize sibling pointers */
  1105. rp->header.next = 0;
  1106. rp->header.prev = 0;
  1107. /*
  1108. * copy the in-line root page into new right page extent
  1109. */
  1110. nextindex = le16_to_cpu(sp->header.maxentry);
  1111. memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
  1112. (nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
  1113. /*
  1114. * insert the new entry into the new right/child page
  1115. * (skip index in the new right page will not change)
  1116. */
  1117. skip = split->index;
  1118. /* if insert into middle, shift right remaining entries */
  1119. if (skip != nextindex)
  1120. memmove(&rp->xad[skip + 1], &rp->xad[skip],
  1121. (nextindex - skip) * sizeof(xad_t));
  1122. xad = &rp->xad[skip];
  1123. XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
  1124. /* update page header */
  1125. rp->header.nextindex = cpu_to_le16(nextindex + 1);
  1126. if (!test_cflag(COMMIT_Nolink, ip)) {
  1127. tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
  1128. xtlck = (struct xtlock *) & tlck->lock;
  1129. xtlck->lwm.offset = XTENTRYSTART;
  1130. xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
  1131. XTENTRYSTART;
  1132. }
  1133. /*
  1134. * reset the root
  1135. *
  1136. * init root with the single entry for the new right page
  1137. * set the 1st entry offset to 0, which force the left-most key
  1138. * at any level of the tree to be less than any search key.
  1139. */
  1140. /*
  1141. * acquire a transaction lock on the root page (in-memory inode);
  1142. *
  1143. * action: root split;
  1144. */
  1145. BT_MARK_DIRTY(split->mp, ip);
  1146. xad = &sp->xad[XTENTRYSTART];
  1147. XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
  1148. /* update page header of root */
  1149. sp->header.flag &= ~BT_LEAF;
  1150. sp->header.flag |= BT_INTERNAL;
  1151. sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
  1152. if (!test_cflag(COMMIT_Nolink, ip)) {
  1153. tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
  1154. xtlck = (struct xtlock *) & tlck->lock;
  1155. xtlck->lwm.offset = XTENTRYSTART;
  1156. xtlck->lwm.length = 1;
  1157. }
  1158. *rmpp = rmp;
  1159. jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
  1160. return 0;
  1161. }
  1162. /*
  1163. * xtExtend()
  1164. *
  1165. * function: extend in-place;
  1166. *
  1167. * note: existing extent may or may not have been committed.
  1168. * caller is responsible for pager buffer cache update, and
  1169. * working block allocation map update;
  1170. * update pmap: alloc whole extended extent;
  1171. */
  1172. int xtExtend(tid_t tid, /* transaction id */
  1173. struct inode *ip, s64 xoff, /* delta extent offset */
  1174. s32 xlen, /* delta extent length */
  1175. int flag)
  1176. {
  1177. int rc = 0;
  1178. int cmp;
  1179. struct metapage *mp; /* meta-page buffer */
  1180. xtpage_t *p; /* base B+-tree index page */
  1181. s64 bn;
  1182. int index, nextindex, len;
  1183. struct btstack btstack; /* traverse stack */
  1184. struct xtsplit split; /* split information */
  1185. xad_t *xad;
  1186. s64 xaddr;
  1187. struct tlock *tlck;
  1188. struct xtlock *xtlck = NULL;
  1189. jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
  1190. /* there must exist extent to be extended */
  1191. if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
  1192. return rc;
  1193. /* retrieve search result */
  1194. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  1195. if (cmp != 0) {
  1196. XT_PUTPAGE(mp);
  1197. jfs_error(ip->i_sb, "xtSearch did not find extent\n");
  1198. return -EIO;
  1199. }
  1200. /* extension must be contiguous */
  1201. xad = &p->xad[index];
  1202. if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
  1203. XT_PUTPAGE(mp);
  1204. jfs_error(ip->i_sb, "extension is not contiguous\n");
  1205. return -EIO;
  1206. }
  1207. /*
  1208. * acquire a transaction lock on the leaf page;
  1209. *
  1210. * action: xad insertion/extension;
  1211. */
  1212. BT_MARK_DIRTY(mp, ip);
  1213. if (!test_cflag(COMMIT_Nolink, ip)) {
  1214. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  1215. xtlck = (struct xtlock *) & tlck->lock;
  1216. }
  1217. /* extend will overflow extent ? */
  1218. xlen = lengthXAD(xad) + xlen;
  1219. if ((len = xlen - MAXXLEN) <= 0)
  1220. goto extendOld;
  1221. /*
  1222. * extent overflow: insert entry for new extent
  1223. */
  1224. //insertNew:
  1225. xoff = offsetXAD(xad) + MAXXLEN;
  1226. xaddr = addressXAD(xad) + MAXXLEN;
  1227. nextindex = le16_to_cpu(p->header.nextindex);
  1228. /*
  1229. * if the leaf page is full, insert the new entry and
  1230. * propagate up the router entry for the new page from split
  1231. *
  1232. * The xtSplitUp() will insert the entry and unpin the leaf page.
  1233. */
  1234. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  1235. /* xtSpliUp() unpins leaf pages */
  1236. split.mp = mp;
  1237. split.index = index + 1;
  1238. split.flag = XAD_NEW;
  1239. split.off = xoff; /* split offset */
  1240. split.len = len;
  1241. split.addr = xaddr;
  1242. split.pxdlist = NULL;
  1243. if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
  1244. return rc;
  1245. /* get back old page */
  1246. p = xt_getpage(ip, bn, &mp);
  1247. if (IS_ERR(p))
  1248. return PTR_ERR(p);
  1249. /*
  1250. * if leaf root has been split, original root has been
  1251. * copied to new child page, i.e., original entry now
  1252. * resides on the new child page;
  1253. */
  1254. if (p->header.flag & BT_INTERNAL) {
  1255. ASSERT(p->header.nextindex ==
  1256. cpu_to_le16(XTENTRYSTART + 1));
  1257. xad = &p->xad[XTENTRYSTART];
  1258. bn = addressXAD(xad);
  1259. XT_PUTPAGE(mp);
  1260. /* get new child page */
  1261. p = xt_getpage(ip, bn, &mp);
  1262. if (IS_ERR(p))
  1263. return PTR_ERR(p);
  1264. BT_MARK_DIRTY(mp, ip);
  1265. if (!test_cflag(COMMIT_Nolink, ip)) {
  1266. tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
  1267. xtlck = (struct xtlock *) & tlck->lock;
  1268. }
  1269. }
  1270. }
  1271. /*
  1272. * insert the new entry into the leaf page
  1273. */
  1274. else {
  1275. /* insert the new entry: mark the entry NEW */
  1276. xad = &p->xad[index + 1];
  1277. XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
  1278. /* advance next available entry index */
  1279. le16_add_cpu(&p->header.nextindex, 1);
  1280. }
  1281. /* get back old entry */
  1282. xad = &p->xad[index];
  1283. xlen = MAXXLEN;
  1284. /*
  1285. * extend old extent
  1286. */
  1287. extendOld:
  1288. XADlength(xad, xlen);
  1289. if (!(xad->flag & XAD_NEW))
  1290. xad->flag |= XAD_EXTENDED;
  1291. if (!test_cflag(COMMIT_Nolink, ip)) {
  1292. xtlck->lwm.offset =
  1293. (xtlck->lwm.offset) ? min(index,
  1294. (int)xtlck->lwm.offset) : index;
  1295. xtlck->lwm.length =
  1296. le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
  1297. }
  1298. /* unpin the leaf page */
  1299. XT_PUTPAGE(mp);
  1300. return rc;
  1301. }
  1302. /*
  1303. * xtUpdate()
  1304. *
  1305. * function: update XAD;
  1306. *
  1307. * update extent for allocated_but_not_recorded or
  1308. * compressed extent;
  1309. *
  1310. * parameter:
  1311. * nxad - new XAD;
  1312. * logical extent of the specified XAD must be completely
  1313. * contained by an existing XAD;
  1314. */
  1315. int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
  1316. { /* new XAD */
  1317. int rc = 0;
  1318. int cmp;
  1319. struct metapage *mp; /* meta-page buffer */
  1320. xtpage_t *p; /* base B+-tree index page */
  1321. s64 bn;
  1322. int index0, index, newindex, nextindex;
  1323. struct btstack btstack; /* traverse stack */
  1324. struct xtsplit split; /* split information */
  1325. xad_t *xad, *lxad, *rxad;
  1326. int xflag;
  1327. s64 nxoff, xoff;
  1328. int nxlen, xlen, lxlen, rxlen;
  1329. s64 nxaddr, xaddr;
  1330. struct tlock *tlck;
  1331. struct xtlock *xtlck = NULL;
  1332. int newpage = 0;
  1333. /* there must exist extent to be tailgated */
  1334. nxoff = offsetXAD(nxad);
  1335. nxlen = lengthXAD(nxad);
  1336. nxaddr = addressXAD(nxad);
  1337. if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
  1338. return rc;
  1339. /* retrieve search result */
  1340. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
  1341. if (cmp != 0) {
  1342. XT_PUTPAGE(mp);
  1343. jfs_error(ip->i_sb, "Could not find extent\n");
  1344. return -EIO;
  1345. }
  1346. BT_MARK_DIRTY(mp, ip);
  1347. /*
  1348. * acquire tlock of the leaf page containing original entry
  1349. */
  1350. if (!test_cflag(COMMIT_Nolink, ip)) {
  1351. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  1352. xtlck = (struct xtlock *) & tlck->lock;
  1353. }
  1354. xad = &p->xad[index0];
  1355. xflag = xad->flag;
  1356. xoff = offsetXAD(xad);
  1357. xlen = lengthXAD(xad);
  1358. xaddr = addressXAD(xad);
  1359. /* nXAD must be completely contained within XAD */
  1360. if ((xoff > nxoff) ||
  1361. (nxoff + nxlen > xoff + xlen)) {
  1362. XT_PUTPAGE(mp);
  1363. jfs_error(ip->i_sb,
  1364. "nXAD in not completely contained within XAD\n");
  1365. return -EIO;
  1366. }
  1367. index = index0;
  1368. newindex = index + 1;
  1369. nextindex = le16_to_cpu(p->header.nextindex);
  1370. if (xoff < nxoff)
  1371. goto coalesceRight;
  1372. /*
  1373. * coalesce with left XAD
  1374. */
  1375. /* is XAD first entry of page ? */
  1376. if (index == XTENTRYSTART)
  1377. goto replace;
  1378. /* is nXAD logically and physically contiguous with lXAD ? */
  1379. lxad = &p->xad[index - 1];
  1380. lxlen = lengthXAD(lxad);
  1381. if (!(lxad->flag & XAD_NOTRECORDED) &&
  1382. (nxoff == offsetXAD(lxad) + lxlen) &&
  1383. (nxaddr == addressXAD(lxad) + lxlen) &&
  1384. (lxlen + nxlen < MAXXLEN)) {
  1385. /* extend right lXAD */
  1386. index0 = index - 1;
  1387. XADlength(lxad, lxlen + nxlen);
  1388. /* If we just merged two extents together, need to make sure the
  1389. * right extent gets logged. If the left one is marked XAD_NEW,
  1390. * then we know it will be logged. Otherwise, mark as
  1391. * XAD_EXTENDED
  1392. */
  1393. if (!(lxad->flag & XAD_NEW))
  1394. lxad->flag |= XAD_EXTENDED;
  1395. if (xlen > nxlen) {
  1396. /* truncate XAD */
  1397. XADoffset(xad, xoff + nxlen);
  1398. XADlength(xad, xlen - nxlen);
  1399. XADaddress(xad, xaddr + nxlen);
  1400. goto out;
  1401. } else { /* (xlen == nxlen) */
  1402. /* remove XAD */
  1403. if (index < nextindex - 1)
  1404. memmove(&p->xad[index], &p->xad[index + 1],
  1405. (nextindex - index -
  1406. 1) << L2XTSLOTSIZE);
  1407. p->header.nextindex =
  1408. cpu_to_le16(le16_to_cpu(p->header.nextindex) -
  1409. 1);
  1410. index = index0;
  1411. newindex = index + 1;
  1412. nextindex = le16_to_cpu(p->header.nextindex);
  1413. xoff = nxoff = offsetXAD(lxad);
  1414. xlen = nxlen = lxlen + nxlen;
  1415. xaddr = nxaddr = addressXAD(lxad);
  1416. goto coalesceRight;
  1417. }
  1418. }
  1419. /*
  1420. * replace XAD with nXAD
  1421. */
  1422. replace: /* (nxoff == xoff) */
  1423. if (nxlen == xlen) {
  1424. /* replace XAD with nXAD:recorded */
  1425. *xad = *nxad;
  1426. xad->flag = xflag & ~XAD_NOTRECORDED;
  1427. goto coalesceRight;
  1428. } else /* (nxlen < xlen) */
  1429. goto updateLeft;
  1430. /*
  1431. * coalesce with right XAD
  1432. */
  1433. coalesceRight: /* (xoff <= nxoff) */
  1434. /* is XAD last entry of page ? */
  1435. if (newindex == nextindex) {
  1436. if (xoff == nxoff)
  1437. goto out;
  1438. goto updateRight;
  1439. }
  1440. /* is nXAD logically and physically contiguous with rXAD ? */
  1441. rxad = &p->xad[index + 1];
  1442. rxlen = lengthXAD(rxad);
  1443. if (!(rxad->flag & XAD_NOTRECORDED) &&
  1444. (nxoff + nxlen == offsetXAD(rxad)) &&
  1445. (nxaddr + nxlen == addressXAD(rxad)) &&
  1446. (rxlen + nxlen < MAXXLEN)) {
  1447. /* extend left rXAD */
  1448. XADoffset(rxad, nxoff);
  1449. XADlength(rxad, rxlen + nxlen);
  1450. XADaddress(rxad, nxaddr);
  1451. /* If we just merged two extents together, need to make sure
  1452. * the left extent gets logged. If the right one is marked
  1453. * XAD_NEW, then we know it will be logged. Otherwise, mark as
  1454. * XAD_EXTENDED
  1455. */
  1456. if (!(rxad->flag & XAD_NEW))
  1457. rxad->flag |= XAD_EXTENDED;
  1458. if (xlen > nxlen)
  1459. /* truncate XAD */
  1460. XADlength(xad, xlen - nxlen);
  1461. else { /* (xlen == nxlen) */
  1462. /* remove XAD */
  1463. memmove(&p->xad[index], &p->xad[index + 1],
  1464. (nextindex - index - 1) << L2XTSLOTSIZE);
  1465. p->header.nextindex =
  1466. cpu_to_le16(le16_to_cpu(p->header.nextindex) -
  1467. 1);
  1468. }
  1469. goto out;
  1470. } else if (xoff == nxoff)
  1471. goto out;
  1472. if (xoff >= nxoff) {
  1473. XT_PUTPAGE(mp);
  1474. jfs_error(ip->i_sb, "xoff >= nxoff\n");
  1475. return -EIO;
  1476. }
  1477. /*
  1478. * split XAD into (lXAD, nXAD):
  1479. *
  1480. * |---nXAD--->
  1481. * --|----------XAD----------|--
  1482. * |-lXAD-|
  1483. */
  1484. updateRight: /* (xoff < nxoff) */
  1485. /* truncate old XAD as lXAD:not_recorded */
  1486. xad = &p->xad[index];
  1487. XADlength(xad, nxoff - xoff);
  1488. /* insert nXAD:recorded */
  1489. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  1490. /* xtSpliUp() unpins leaf pages */
  1491. split.mp = mp;
  1492. split.index = newindex;
  1493. split.flag = xflag & ~XAD_NOTRECORDED;
  1494. split.off = nxoff;
  1495. split.len = nxlen;
  1496. split.addr = nxaddr;
  1497. split.pxdlist = NULL;
  1498. if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
  1499. return rc;
  1500. /* get back old page */
  1501. p = xt_getpage(ip, bn, &mp);
  1502. if (IS_ERR(p))
  1503. return PTR_ERR(p);
  1504. /*
  1505. * if leaf root has been split, original root has been
  1506. * copied to new child page, i.e., original entry now
  1507. * resides on the new child page;
  1508. */
  1509. if (p->header.flag & BT_INTERNAL) {
  1510. ASSERT(p->header.nextindex ==
  1511. cpu_to_le16(XTENTRYSTART + 1));
  1512. xad = &p->xad[XTENTRYSTART];
  1513. bn = addressXAD(xad);
  1514. XT_PUTPAGE(mp);
  1515. /* get new child page */
  1516. p = xt_getpage(ip, bn, &mp);
  1517. if (IS_ERR(p))
  1518. return PTR_ERR(p);
  1519. BT_MARK_DIRTY(mp, ip);
  1520. if (!test_cflag(COMMIT_Nolink, ip)) {
  1521. tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
  1522. xtlck = (struct xtlock *) & tlck->lock;
  1523. }
  1524. } else {
  1525. /* is nXAD on new page ? */
  1526. if (newindex >
  1527. (le16_to_cpu(p->header.maxentry) >> 1)) {
  1528. newindex =
  1529. newindex -
  1530. le16_to_cpu(p->header.nextindex) +
  1531. XTENTRYSTART;
  1532. newpage = 1;
  1533. }
  1534. }
  1535. } else {
  1536. /* if insert into middle, shift right remaining entries */
  1537. if (newindex < nextindex)
  1538. memmove(&p->xad[newindex + 1], &p->xad[newindex],
  1539. (nextindex - newindex) << L2XTSLOTSIZE);
  1540. /* insert the entry */
  1541. xad = &p->xad[newindex];
  1542. *xad = *nxad;
  1543. xad->flag = xflag & ~XAD_NOTRECORDED;
  1544. /* advance next available entry index. */
  1545. p->header.nextindex =
  1546. cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
  1547. }
  1548. /*
  1549. * does nXAD force 3-way split ?
  1550. *
  1551. * |---nXAD--->|
  1552. * --|----------XAD-------------|--
  1553. * |-lXAD-| |-rXAD -|
  1554. */
  1555. if (nxoff + nxlen == xoff + xlen)
  1556. goto out;
  1557. /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
  1558. if (newpage) {
  1559. /* close out old page */
  1560. if (!test_cflag(COMMIT_Nolink, ip)) {
  1561. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  1562. min(index0, (int)xtlck->lwm.offset) : index0;
  1563. xtlck->lwm.length =
  1564. le16_to_cpu(p->header.nextindex) -
  1565. xtlck->lwm.offset;
  1566. }
  1567. bn = le64_to_cpu(p->header.next);
  1568. XT_PUTPAGE(mp);
  1569. /* get new right page */
  1570. p = xt_getpage(ip, bn, &mp);
  1571. if (IS_ERR(p))
  1572. return PTR_ERR(p);
  1573. BT_MARK_DIRTY(mp, ip);
  1574. if (!test_cflag(COMMIT_Nolink, ip)) {
  1575. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  1576. xtlck = (struct xtlock *) & tlck->lock;
  1577. }
  1578. index0 = index = newindex;
  1579. } else
  1580. index++;
  1581. newindex = index + 1;
  1582. nextindex = le16_to_cpu(p->header.nextindex);
  1583. xlen = xlen - (nxoff - xoff);
  1584. xoff = nxoff;
  1585. xaddr = nxaddr;
  1586. /* recompute split pages */
  1587. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  1588. XT_PUTPAGE(mp);
  1589. if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
  1590. return rc;
  1591. /* retrieve search result */
  1592. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
  1593. if (cmp != 0) {
  1594. XT_PUTPAGE(mp);
  1595. jfs_error(ip->i_sb, "xtSearch failed\n");
  1596. return -EIO;
  1597. }
  1598. if (index0 != index) {
  1599. XT_PUTPAGE(mp);
  1600. jfs_error(ip->i_sb, "unexpected value of index\n");
  1601. return -EIO;
  1602. }
  1603. }
  1604. /*
  1605. * split XAD into (nXAD, rXAD)
  1606. *
  1607. * ---nXAD---|
  1608. * --|----------XAD----------|--
  1609. * |-rXAD-|
  1610. */
  1611. updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */
  1612. /* update old XAD with nXAD:recorded */
  1613. xad = &p->xad[index];
  1614. *xad = *nxad;
  1615. xad->flag = xflag & ~XAD_NOTRECORDED;
  1616. /* insert rXAD:not_recorded */
  1617. xoff = xoff + nxlen;
  1618. xlen = xlen - nxlen;
  1619. xaddr = xaddr + nxlen;
  1620. if (nextindex == le16_to_cpu(p->header.maxentry)) {
  1621. /*
  1622. printf("xtUpdate.updateLeft.split p:0x%p\n", p);
  1623. */
  1624. /* xtSpliUp() unpins leaf pages */
  1625. split.mp = mp;
  1626. split.index = newindex;
  1627. split.flag = xflag;
  1628. split.off = xoff;
  1629. split.len = xlen;
  1630. split.addr = xaddr;
  1631. split.pxdlist = NULL;
  1632. if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
  1633. return rc;
  1634. /* get back old page */
  1635. p = xt_getpage(ip, bn, &mp);
  1636. if (IS_ERR(p))
  1637. return PTR_ERR(p);
  1638. /*
  1639. * if leaf root has been split, original root has been
  1640. * copied to new child page, i.e., original entry now
  1641. * resides on the new child page;
  1642. */
  1643. if (p->header.flag & BT_INTERNAL) {
  1644. ASSERT(p->header.nextindex ==
  1645. cpu_to_le16(XTENTRYSTART + 1));
  1646. xad = &p->xad[XTENTRYSTART];
  1647. bn = addressXAD(xad);
  1648. XT_PUTPAGE(mp);
  1649. /* get new child page */
  1650. p = xt_getpage(ip, bn, &mp);
  1651. if (IS_ERR(p))
  1652. return PTR_ERR(p);
  1653. BT_MARK_DIRTY(mp, ip);
  1654. if (!test_cflag(COMMIT_Nolink, ip)) {
  1655. tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
  1656. xtlck = (struct xtlock *) & tlck->lock;
  1657. }
  1658. }
  1659. } else {
  1660. /* if insert into middle, shift right remaining entries */
  1661. if (newindex < nextindex)
  1662. memmove(&p->xad[newindex + 1], &p->xad[newindex],
  1663. (nextindex - newindex) << L2XTSLOTSIZE);
  1664. /* insert the entry */
  1665. xad = &p->xad[newindex];
  1666. XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
  1667. /* advance next available entry index. */
  1668. p->header.nextindex =
  1669. cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
  1670. }
  1671. out:
  1672. if (!test_cflag(COMMIT_Nolink, ip)) {
  1673. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  1674. min(index0, (int)xtlck->lwm.offset) : index0;
  1675. xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
  1676. xtlck->lwm.offset;
  1677. }
  1678. /* unpin the leaf page */
  1679. XT_PUTPAGE(mp);
  1680. return rc;
  1681. }
  1682. /*
  1683. * xtAppend()
  1684. *
  1685. * function: grow in append mode from contiguous region specified ;
  1686. *
  1687. * parameter:
  1688. * tid - transaction id;
  1689. * ip - file object;
  1690. * xflag - extent flag:
  1691. * xoff - extent offset;
  1692. * maxblocks - max extent length;
  1693. * xlen - extent length (in/out);
  1694. * xaddrp - extent address pointer (in/out):
  1695. * flag -
  1696. *
  1697. * return:
  1698. */
  1699. int xtAppend(tid_t tid, /* transaction id */
  1700. struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
  1701. s32 * xlenp, /* (in/out) */
  1702. s64 * xaddrp, /* (in/out) */
  1703. int flag)
  1704. {
  1705. int rc = 0;
  1706. struct metapage *mp; /* meta-page buffer */
  1707. xtpage_t *p; /* base B+-tree index page */
  1708. s64 bn, xaddr;
  1709. int index, nextindex;
  1710. struct btstack btstack; /* traverse stack */
  1711. struct xtsplit split; /* split information */
  1712. xad_t *xad;
  1713. int cmp;
  1714. struct tlock *tlck;
  1715. struct xtlock *xtlck;
  1716. int nsplit, nblocks, xlen;
  1717. struct pxdlist pxdlist;
  1718. pxd_t *pxd;
  1719. s64 next;
  1720. xaddr = *xaddrp;
  1721. xlen = *xlenp;
  1722. jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
  1723. (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
  1724. /*
  1725. * search for the entry location at which to insert:
  1726. *
  1727. * xtFastSearch() and xtSearch() both returns (leaf page
  1728. * pinned, index at which to insert).
  1729. * n.b. xtSearch() may return index of maxentry of
  1730. * the full page.
  1731. */
  1732. if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
  1733. return rc;
  1734. /* retrieve search result */
  1735. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  1736. if (cmp == 0) {
  1737. rc = -EEXIST;
  1738. goto out;
  1739. }
  1740. if (next)
  1741. xlen = min(xlen, (int)(next - xoff));
  1742. //insert:
  1743. /*
  1744. * insert entry for new extent
  1745. */
  1746. xflag |= XAD_NEW;
  1747. /*
  1748. * if the leaf page is full, split the page and
  1749. * propagate up the router entry for the new page from split
  1750. *
  1751. * The xtSplitUp() will insert the entry and unpin the leaf page.
  1752. */
  1753. nextindex = le16_to_cpu(p->header.nextindex);
  1754. if (nextindex < le16_to_cpu(p->header.maxentry))
  1755. goto insertLeaf;
  1756. /*
  1757. * allocate new index blocks to cover index page split(s)
  1758. */
  1759. nsplit = btstack.nsplit;
  1760. split.pxdlist = &pxdlist;
  1761. pxdlist.maxnpxd = pxdlist.npxd = 0;
  1762. pxd = &pxdlist.pxd[0];
  1763. nblocks = JFS_SBI(ip->i_sb)->nbperpage;
  1764. for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
  1765. if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
  1766. PXDaddress(pxd, xaddr);
  1767. PXDlength(pxd, nblocks);
  1768. pxdlist.maxnpxd++;
  1769. continue;
  1770. }
  1771. /* undo allocation */
  1772. goto out;
  1773. }
  1774. xlen = min(xlen, maxblocks);
  1775. /*
  1776. * allocate data extent requested
  1777. */
  1778. if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
  1779. goto out;
  1780. split.mp = mp;
  1781. split.index = index;
  1782. split.flag = xflag;
  1783. split.off = xoff;
  1784. split.len = xlen;
  1785. split.addr = xaddr;
  1786. if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
  1787. /* undo data extent allocation */
  1788. dbFree(ip, *xaddrp, (s64) * xlenp);
  1789. return rc;
  1790. }
  1791. *xaddrp = xaddr;
  1792. *xlenp = xlen;
  1793. return 0;
  1794. /*
  1795. * insert the new entry into the leaf page
  1796. */
  1797. insertLeaf:
  1798. /*
  1799. * allocate data extent requested
  1800. */
  1801. if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
  1802. goto out;
  1803. BT_MARK_DIRTY(mp, ip);
  1804. /*
  1805. * acquire a transaction lock on the leaf page;
  1806. *
  1807. * action: xad insertion/extension;
  1808. */
  1809. tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  1810. xtlck = (struct xtlock *) & tlck->lock;
  1811. /* insert the new entry: mark the entry NEW */
  1812. xad = &p->xad[index];
  1813. XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
  1814. /* advance next available entry index */
  1815. le16_add_cpu(&p->header.nextindex, 1);
  1816. xtlck->lwm.offset =
  1817. (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
  1818. xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
  1819. xtlck->lwm.offset;
  1820. *xaddrp = xaddr;
  1821. *xlenp = xlen;
  1822. out:
  1823. /* unpin the leaf page */
  1824. XT_PUTPAGE(mp);
  1825. return rc;
  1826. }
  1827. /*
  1828. * xtInitRoot()
  1829. *
  1830. * initialize file root (inline in inode)
  1831. */
  1832. void xtInitRoot(tid_t tid, struct inode *ip)
  1833. {
  1834. xtroot_t *p;
  1835. /*
  1836. * acquire a transaction lock on the root
  1837. *
  1838. * action:
  1839. */
  1840. txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
  1841. tlckXTREE | tlckNEW);
  1842. p = &JFS_IP(ip)->i_xtroot;
  1843. p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
  1844. p->header.nextindex = cpu_to_le16(XTENTRYSTART);
  1845. if (S_ISDIR(ip->i_mode))
  1846. p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
  1847. else {
  1848. p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
  1849. ip->i_size = 0;
  1850. }
  1851. return;
  1852. }
  1853. /*
  1854. * We can run into a deadlock truncating a file with a large number of
  1855. * xtree pages (large fragmented file). A robust fix would entail a
  1856. * reservation system where we would reserve a number of metadata pages
  1857. * and tlocks which we would be guaranteed without a deadlock. Without
  1858. * this, a partial fix is to limit number of metadata pages we will lock
  1859. * in a single transaction. Currently we will truncate the file so that
  1860. * no more than 50 leaf pages will be locked. The caller of xtTruncate
  1861. * will be responsible for ensuring that the current transaction gets
  1862. * committed, and that subsequent transactions are created to truncate
  1863. * the file further if needed.
  1864. */
  1865. #define MAX_TRUNCATE_LEAVES 50
  1866. /*
  1867. * xtTruncate()
  1868. *
  1869. * function:
  1870. * traverse for truncation logging backward bottom up;
  1871. * terminate at the last extent entry at the current subtree
  1872. * root page covering new down size.
  1873. * truncation may occur within the last extent entry.
  1874. *
  1875. * parameter:
  1876. * int tid,
  1877. * struct inode *ip,
  1878. * s64 newsize,
  1879. * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
  1880. *
  1881. * return:
  1882. *
  1883. * note:
  1884. * PWMAP:
  1885. * 1. truncate (non-COMMIT_NOLINK file)
  1886. * by jfs_truncate() or jfs_open(O_TRUNC):
  1887. * xtree is updated;
  1888. * 2. truncate index table of directory when last entry removed
  1889. * map update via tlock at commit time;
  1890. * PMAP:
  1891. * Call xtTruncate_pmap instead
  1892. * WMAP:
  1893. * 1. remove (free zero link count) on last reference release
  1894. * (pmap has been freed at commit zero link count);
  1895. * 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
  1896. * xtree is updated;
  1897. * map update directly at truncation time;
  1898. *
  1899. * if (DELETE)
  1900. * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
  1901. * else if (TRUNCATE)
  1902. * must write LOG_NOREDOPAGE for deleted index page;
  1903. *
  1904. * pages may already have been tlocked by anonymous transactions
  1905. * during file growth (i.e., write) before truncation;
  1906. *
  1907. * except last truncated entry, deleted entries remains as is
  1908. * in the page (nextindex is updated) for other use
  1909. * (e.g., log/update allocation map): this avoid copying the page
  1910. * info but delay free of pages;
  1911. *
  1912. */
  1913. s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
  1914. {
  1915. s64 teof;
  1916. struct metapage *mp;
  1917. xtpage_t *p;
  1918. s64 bn;
  1919. int index, nextindex;
  1920. xad_t *xad;
  1921. s64 xoff, xaddr;
  1922. int xlen, len, freexlen;
  1923. struct btstack btstack;
  1924. struct btframe *parent;
  1925. struct tblock *tblk = NULL;
  1926. struct tlock *tlck = NULL;
  1927. struct xtlock *xtlck = NULL;
  1928. struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */
  1929. struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */
  1930. s64 nfreed;
  1931. int freed, log;
  1932. int locked_leaves = 0;
  1933. /* save object truncation type */
  1934. if (tid) {
  1935. tblk = tid_to_tblock(tid);
  1936. tblk->xflag |= flag;
  1937. }
  1938. nfreed = 0;
  1939. flag &= COMMIT_MAP;
  1940. assert(flag != COMMIT_PMAP);
  1941. if (flag == COMMIT_PWMAP)
  1942. log = 1;
  1943. else {
  1944. log = 0;
  1945. xadlock.flag = mlckFREEXADLIST;
  1946. xadlock.index = 1;
  1947. }
  1948. /*
  1949. * if the newsize is not an integral number of pages,
  1950. * the file between newsize and next page boundary will
  1951. * be cleared.
  1952. * if truncating into a file hole, it will cause
  1953. * a full block to be allocated for the logical block.
  1954. */
  1955. /*
  1956. * release page blocks of truncated region <teof, eof>
  1957. *
  1958. * free the data blocks from the leaf index blocks.
  1959. * delete the parent index entries corresponding to
  1960. * the freed child data/index blocks.
  1961. * free the index blocks themselves which aren't needed
  1962. * in new sized file.
  1963. *
  1964. * index blocks are updated only if the blocks are to be
  1965. * retained in the new sized file.
  1966. * if type is PMAP, the data and index pages are NOT
  1967. * freed, and the data and index blocks are NOT freed
  1968. * from working map.
  1969. * (this will allow continued access of data/index of
  1970. * temporary file (zerolink count file truncated to zero-length)).
  1971. */
  1972. teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
  1973. JFS_SBI(ip->i_sb)->l2bsize;
  1974. /* clear stack */
  1975. BT_CLR(&btstack);
  1976. /*
  1977. * start with root
  1978. *
  1979. * root resides in the inode
  1980. */
  1981. bn = 0;
  1982. /*
  1983. * first access of each page:
  1984. */
  1985. getPage:
  1986. p = xt_getpage(ip, bn, &mp);
  1987. if (IS_ERR(p))
  1988. return PTR_ERR(p);
  1989. /* process entries backward from last index */
  1990. index = le16_to_cpu(p->header.nextindex) - 1;
  1991. /* Since this is the rightmost page at this level, and we may have
  1992. * already freed a page that was formerly to the right, let's make
  1993. * sure that the next pointer is zero.
  1994. */
  1995. if (p->header.next) {
  1996. if (log)
  1997. /*
  1998. * Make sure this change to the header is logged.
  1999. * If we really truncate this leaf, the flag
  2000. * will be changed to tlckTRUNCATE
  2001. */
  2002. tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
  2003. BT_MARK_DIRTY(mp, ip);
  2004. p->header.next = 0;
  2005. }
  2006. if (p->header.flag & BT_INTERNAL)
  2007. goto getChild;
  2008. /*
  2009. * leaf page
  2010. */
  2011. freed = 0;
  2012. /* does region covered by leaf page precede Teof ? */
  2013. xad = &p->xad[index];
  2014. xoff = offsetXAD(xad);
  2015. xlen = lengthXAD(xad);
  2016. if (teof >= xoff + xlen) {
  2017. XT_PUTPAGE(mp);
  2018. goto getParent;
  2019. }
  2020. /* (re)acquire tlock of the leaf page */
  2021. if (log) {
  2022. if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
  2023. /*
  2024. * We need to limit the size of the transaction
  2025. * to avoid exhausting pagecache & tlocks
  2026. */
  2027. XT_PUTPAGE(mp);
  2028. newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
  2029. goto getParent;
  2030. }
  2031. tlck = txLock(tid, ip, mp, tlckXTREE);
  2032. tlck->type = tlckXTREE | tlckTRUNCATE;
  2033. xtlck = (struct xtlock *) & tlck->lock;
  2034. xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
  2035. }
  2036. BT_MARK_DIRTY(mp, ip);
  2037. /*
  2038. * scan backward leaf page entries
  2039. */
  2040. for (; index >= XTENTRYSTART; index--) {
  2041. xad = &p->xad[index];
  2042. xoff = offsetXAD(xad);
  2043. xlen = lengthXAD(xad);
  2044. xaddr = addressXAD(xad);
  2045. /*
  2046. * The "data" for a directory is indexed by the block
  2047. * device's address space. This metadata must be invalidated
  2048. * here
  2049. */
  2050. if (S_ISDIR(ip->i_mode) && (teof == 0))
  2051. invalidate_xad_metapages(ip, *xad);
  2052. /*
  2053. * entry beyond eof: continue scan of current page
  2054. * xad
  2055. * ---|---=======------->
  2056. * eof
  2057. */
  2058. if (teof < xoff) {
  2059. nfreed += xlen;
  2060. continue;
  2061. }
  2062. /*
  2063. * (xoff <= teof): last entry to be deleted from page;
  2064. * If other entries remain in page: keep and update the page.
  2065. */
  2066. /*
  2067. * eof == entry_start: delete the entry
  2068. * xad
  2069. * -------|=======------->
  2070. * eof
  2071. *
  2072. */
  2073. if (teof == xoff) {
  2074. nfreed += xlen;
  2075. if (index == XTENTRYSTART)
  2076. break;
  2077. nextindex = index;
  2078. }
  2079. /*
  2080. * eof within the entry: truncate the entry.
  2081. * xad
  2082. * -------===|===------->
  2083. * eof
  2084. */
  2085. else if (teof < xoff + xlen) {
  2086. /* update truncated entry */
  2087. len = teof - xoff;
  2088. freexlen = xlen - len;
  2089. XADlength(xad, len);
  2090. /* save pxd of truncated extent in tlck */
  2091. xaddr += len;
  2092. if (log) { /* COMMIT_PWMAP */
  2093. xtlck->lwm.offset = (xtlck->lwm.offset) ?
  2094. min(index, (int)xtlck->lwm.offset) : index;
  2095. xtlck->lwm.length = index + 1 -
  2096. xtlck->lwm.offset;
  2097. xtlck->twm.offset = index;
  2098. pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
  2099. pxdlock->flag = mlckFREEPXD;
  2100. PXDaddress(&pxdlock->pxd, xaddr);
  2101. PXDlength(&pxdlock->pxd, freexlen);
  2102. }
  2103. /* free truncated extent */
  2104. else { /* COMMIT_WMAP */
  2105. pxdlock = (struct pxd_lock *) & xadlock;
  2106. pxdlock->flag = mlckFREEPXD;
  2107. PXDaddress(&pxdlock->pxd, xaddr);
  2108. PXDlength(&pxdlock->pxd, freexlen);
  2109. txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
  2110. /* reset map lock */
  2111. xadlock.flag = mlckFREEXADLIST;
  2112. }
  2113. /* current entry is new last entry; */
  2114. nextindex = index + 1;
  2115. nfreed += freexlen;
  2116. }
  2117. /*
  2118. * eof beyond the entry:
  2119. * xad
  2120. * -------=======---|--->
  2121. * eof
  2122. */
  2123. else { /* (xoff + xlen < teof) */
  2124. nextindex = index + 1;
  2125. }
  2126. if (nextindex < le16_to_cpu(p->header.nextindex)) {
  2127. if (!log) { /* COMMIT_WAMP */
  2128. xadlock.xdlist = &p->xad[nextindex];
  2129. xadlock.count =
  2130. le16_to_cpu(p->header.nextindex) -
  2131. nextindex;
  2132. txFreeMap(ip, (struct maplock *) & xadlock,
  2133. NULL, COMMIT_WMAP);
  2134. }
  2135. p->header.nextindex = cpu_to_le16(nextindex);
  2136. }
  2137. XT_PUTPAGE(mp);
  2138. /* assert(freed == 0); */
  2139. goto getParent;
  2140. } /* end scan of leaf page entries */
  2141. freed = 1;
  2142. /*
  2143. * leaf page become empty: free the page if type != PMAP
  2144. */
  2145. if (log) { /* COMMIT_PWMAP */
  2146. /* txCommit() with tlckFREE:
  2147. * free data extents covered by leaf [XTENTRYSTART:hwm);
  2148. * invalidate leaf if COMMIT_PWMAP;
  2149. * if (TRUNCATE), will write LOG_NOREDOPAGE;
  2150. */
  2151. tlck->type = tlckXTREE | tlckFREE;
  2152. } else { /* COMMIT_WAMP */
  2153. /* free data extents covered by leaf */
  2154. xadlock.xdlist = &p->xad[XTENTRYSTART];
  2155. xadlock.count =
  2156. le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
  2157. txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
  2158. }
  2159. if (p->header.flag & BT_ROOT) {
  2160. p->header.flag &= ~BT_INTERNAL;
  2161. p->header.flag |= BT_LEAF;
  2162. p->header.nextindex = cpu_to_le16(XTENTRYSTART);
  2163. XT_PUTPAGE(mp); /* debug */
  2164. goto out;
  2165. } else {
  2166. if (log) { /* COMMIT_PWMAP */
  2167. /* page will be invalidated at tx completion
  2168. */
  2169. XT_PUTPAGE(mp);
  2170. } else { /* COMMIT_WMAP */
  2171. if (mp->lid)
  2172. lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
  2173. /* invalidate empty leaf page */
  2174. discard_metapage(mp);
  2175. }
  2176. }
  2177. /*
  2178. * the leaf page become empty: delete the parent entry
  2179. * for the leaf page if the parent page is to be kept
  2180. * in the new sized file.
  2181. */
  2182. /*
  2183. * go back up to the parent page
  2184. */
  2185. getParent:
  2186. /* pop/restore parent entry for the current child page */
  2187. if ((parent = BT_POP(&btstack)) == NULL)
  2188. /* current page must have been root */
  2189. goto out;
  2190. /* get back the parent page */
  2191. bn = parent->bn;
  2192. p = xt_getpage(ip, bn, &mp);
  2193. if (IS_ERR(p))
  2194. return PTR_ERR(p);
  2195. index = parent->index;
  2196. /*
  2197. * child page was not empty:
  2198. */
  2199. if (freed == 0) {
  2200. /* has any entry deleted from parent ? */
  2201. if (index < le16_to_cpu(p->header.nextindex) - 1) {
  2202. /* (re)acquire tlock on the parent page */
  2203. if (log) { /* COMMIT_PWMAP */
  2204. /* txCommit() with tlckTRUNCATE:
  2205. * free child extents covered by parent [);
  2206. */
  2207. tlck = txLock(tid, ip, mp, tlckXTREE);
  2208. xtlck = (struct xtlock *) & tlck->lock;
  2209. if (!(tlck->type & tlckTRUNCATE)) {
  2210. xtlck->hwm.offset =
  2211. le16_to_cpu(p->header.
  2212. nextindex) - 1;
  2213. tlck->type =
  2214. tlckXTREE | tlckTRUNCATE;
  2215. }
  2216. } else { /* COMMIT_WMAP */
  2217. /* free child extents covered by parent */
  2218. xadlock.xdlist = &p->xad[index + 1];
  2219. xadlock.count =
  2220. le16_to_cpu(p->header.nextindex) -
  2221. index - 1;
  2222. txFreeMap(ip, (struct maplock *) & xadlock,
  2223. NULL, COMMIT_WMAP);
  2224. }
  2225. BT_MARK_DIRTY(mp, ip);
  2226. p->header.nextindex = cpu_to_le16(index + 1);
  2227. }
  2228. XT_PUTPAGE(mp);
  2229. goto getParent;
  2230. }
  2231. /*
  2232. * child page was empty:
  2233. */
  2234. nfreed += lengthXAD(&p->xad[index]);
  2235. /*
  2236. * During working map update, child page's tlock must be handled
  2237. * before parent's. This is because the parent's tlock will cause
  2238. * the child's disk space to be marked available in the wmap, so
  2239. * it's important that the child page be released by that time.
  2240. *
  2241. * ToDo: tlocks should be on doubly-linked list, so we can
  2242. * quickly remove it and add it to the end.
  2243. */
  2244. /*
  2245. * Move parent page's tlock to the end of the tid's tlock list
  2246. */
  2247. if (log && mp->lid && (tblk->last != mp->lid) &&
  2248. lid_to_tlock(mp->lid)->tid) {
  2249. lid_t lid = mp->lid;
  2250. struct tlock *prev;
  2251. tlck = lid_to_tlock(lid);
  2252. if (tblk->next == lid)
  2253. tblk->next = tlck->next;
  2254. else {
  2255. for (prev = lid_to_tlock(tblk->next);
  2256. prev->next != lid;
  2257. prev = lid_to_tlock(prev->next)) {
  2258. assert(prev->next);
  2259. }
  2260. prev->next = tlck->next;
  2261. }
  2262. lid_to_tlock(tblk->last)->next = lid;
  2263. tlck->next = 0;
  2264. tblk->last = lid;
  2265. }
  2266. /*
  2267. * parent page become empty: free the page
  2268. */
  2269. if (index == XTENTRYSTART) {
  2270. if (log) { /* COMMIT_PWMAP */
  2271. /* txCommit() with tlckFREE:
  2272. * free child extents covered by parent;
  2273. * invalidate parent if COMMIT_PWMAP;
  2274. */
  2275. tlck = txLock(tid, ip, mp, tlckXTREE);
  2276. xtlck = (struct xtlock *) & tlck->lock;
  2277. xtlck->hwm.offset =
  2278. le16_to_cpu(p->header.nextindex) - 1;
  2279. tlck->type = tlckXTREE | tlckFREE;
  2280. } else { /* COMMIT_WMAP */
  2281. /* free child extents covered by parent */
  2282. xadlock.xdlist = &p->xad[XTENTRYSTART];
  2283. xadlock.count =
  2284. le16_to_cpu(p->header.nextindex) -
  2285. XTENTRYSTART;
  2286. txFreeMap(ip, (struct maplock *) & xadlock, NULL,
  2287. COMMIT_WMAP);
  2288. }
  2289. BT_MARK_DIRTY(mp, ip);
  2290. if (p->header.flag & BT_ROOT) {
  2291. p->header.flag &= ~BT_INTERNAL;
  2292. p->header.flag |= BT_LEAF;
  2293. p->header.nextindex = cpu_to_le16(XTENTRYSTART);
  2294. if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
  2295. /*
  2296. * Shrink root down to allow inline
  2297. * EA (otherwise fsck complains)
  2298. */
  2299. p->header.maxentry =
  2300. cpu_to_le16(XTROOTINITSLOT);
  2301. JFS_IP(ip)->mode2 |= INLINEEA;
  2302. }
  2303. XT_PUTPAGE(mp); /* debug */
  2304. goto out;
  2305. } else {
  2306. if (log) { /* COMMIT_PWMAP */
  2307. /* page will be invalidated at tx completion
  2308. */
  2309. XT_PUTPAGE(mp);
  2310. } else { /* COMMIT_WMAP */
  2311. if (mp->lid)
  2312. lid_to_tlock(mp->lid)->flag |=
  2313. tlckFREELOCK;
  2314. /* invalidate parent page */
  2315. discard_metapage(mp);
  2316. }
  2317. /* parent has become empty and freed:
  2318. * go back up to its parent page
  2319. */
  2320. /* freed = 1; */
  2321. goto getParent;
  2322. }
  2323. }
  2324. /*
  2325. * parent page still has entries for front region;
  2326. */
  2327. else {
  2328. /* try truncate region covered by preceding entry
  2329. * (process backward)
  2330. */
  2331. index--;
  2332. /* go back down to the child page corresponding
  2333. * to the entry
  2334. */
  2335. goto getChild;
  2336. }
  2337. /*
  2338. * internal page: go down to child page of current entry
  2339. */
  2340. getChild:
  2341. /* save current parent entry for the child page */
  2342. if (BT_STACK_FULL(&btstack)) {
  2343. jfs_error(ip->i_sb, "stack overrun!\n");
  2344. XT_PUTPAGE(mp);
  2345. return -EIO;
  2346. }
  2347. BT_PUSH(&btstack, bn, index);
  2348. /* get child page */
  2349. xad = &p->xad[index];
  2350. bn = addressXAD(xad);
  2351. /*
  2352. * first access of each internal entry:
  2353. */
  2354. /* release parent page */
  2355. XT_PUTPAGE(mp);
  2356. /* process the child page */
  2357. goto getPage;
  2358. out:
  2359. /*
  2360. * update file resource stat
  2361. */
  2362. /* set size
  2363. */
  2364. if (S_ISDIR(ip->i_mode) && !newsize)
  2365. ip->i_size = 1; /* fsck hates zero-length directories */
  2366. else
  2367. ip->i_size = newsize;
  2368. /* update quota allocation to reflect freed blocks */
  2369. dquot_free_block(ip, nfreed);
  2370. /*
  2371. * free tlock of invalidated pages
  2372. */
  2373. if (flag == COMMIT_WMAP)
  2374. txFreelock(ip);
  2375. return newsize;
  2376. }
  2377. /*
  2378. * xtTruncate_pmap()
  2379. *
  2380. * function:
  2381. * Perform truncate to zero length for deleted file, leaving the
  2382. * xtree and working map untouched. This allows the file to
  2383. * be accessed via open file handles, while the delete of the file
  2384. * is committed to disk.
  2385. *
  2386. * parameter:
  2387. * tid_t tid,
  2388. * struct inode *ip,
  2389. * s64 committed_size)
  2390. *
  2391. * return: new committed size
  2392. *
  2393. * note:
  2394. *
  2395. * To avoid deadlock by holding too many transaction locks, the
  2396. * truncation may be broken up into multiple transactions.
  2397. * The committed_size keeps track of part of the file has been
  2398. * freed from the pmaps.
  2399. */
  2400. s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
  2401. {
  2402. s64 bn;
  2403. struct btstack btstack;
  2404. int cmp;
  2405. int index;
  2406. int locked_leaves = 0;
  2407. struct metapage *mp;
  2408. xtpage_t *p;
  2409. struct btframe *parent;
  2410. int rc;
  2411. struct tblock *tblk;
  2412. struct tlock *tlck = NULL;
  2413. xad_t *xad;
  2414. int xlen;
  2415. s64 xoff;
  2416. struct xtlock *xtlck = NULL;
  2417. /* save object truncation type */
  2418. tblk = tid_to_tblock(tid);
  2419. tblk->xflag |= COMMIT_PMAP;
  2420. /* clear stack */
  2421. BT_CLR(&btstack);
  2422. if (committed_size) {
  2423. xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
  2424. rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
  2425. if (rc)
  2426. return rc;
  2427. XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  2428. if (cmp != 0) {
  2429. XT_PUTPAGE(mp);
  2430. jfs_error(ip->i_sb, "did not find extent\n");
  2431. return -EIO;
  2432. }
  2433. } else {
  2434. /*
  2435. * start with root
  2436. *
  2437. * root resides in the inode
  2438. */
  2439. bn = 0;
  2440. /*
  2441. * first access of each page:
  2442. */
  2443. getPage:
  2444. p = xt_getpage(ip, bn, &mp);
  2445. if (IS_ERR(p))
  2446. return PTR_ERR(p);
  2447. /* process entries backward from last index */
  2448. index = le16_to_cpu(p->header.nextindex) - 1;
  2449. if (p->header.flag & BT_INTERNAL)
  2450. goto getChild;
  2451. }
  2452. /*
  2453. * leaf page
  2454. */
  2455. if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
  2456. /*
  2457. * We need to limit the size of the transaction
  2458. * to avoid exhausting pagecache & tlocks
  2459. */
  2460. xad = &p->xad[index];
  2461. xoff = offsetXAD(xad);
  2462. xlen = lengthXAD(xad);
  2463. XT_PUTPAGE(mp);
  2464. return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
  2465. }
  2466. tlck = txLock(tid, ip, mp, tlckXTREE);
  2467. tlck->type = tlckXTREE | tlckFREE;
  2468. xtlck = (struct xtlock *) & tlck->lock;
  2469. xtlck->hwm.offset = index;
  2470. XT_PUTPAGE(mp);
  2471. /*
  2472. * go back up to the parent page
  2473. */
  2474. getParent:
  2475. /* pop/restore parent entry for the current child page */
  2476. if ((parent = BT_POP(&btstack)) == NULL)
  2477. /* current page must have been root */
  2478. goto out;
  2479. /* get back the parent page */
  2480. bn = parent->bn;
  2481. p = xt_getpage(ip, bn, &mp);
  2482. if (IS_ERR(p))
  2483. return PTR_ERR(p);
  2484. index = parent->index;
  2485. /*
  2486. * parent page become empty: free the page
  2487. */
  2488. if (index == XTENTRYSTART) {
  2489. /* txCommit() with tlckFREE:
  2490. * free child extents covered by parent;
  2491. * invalidate parent if COMMIT_PWMAP;
  2492. */
  2493. tlck = txLock(tid, ip, mp, tlckXTREE);
  2494. xtlck = (struct xtlock *) & tlck->lock;
  2495. xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
  2496. tlck->type = tlckXTREE | tlckFREE;
  2497. XT_PUTPAGE(mp);
  2498. if (p->header.flag & BT_ROOT) {
  2499. goto out;
  2500. } else {
  2501. goto getParent;
  2502. }
  2503. }
  2504. /*
  2505. * parent page still has entries for front region;
  2506. */
  2507. else
  2508. index--;
  2509. /*
  2510. * internal page: go down to child page of current entry
  2511. */
  2512. getChild:
  2513. /* save current parent entry for the child page */
  2514. if (BT_STACK_FULL(&btstack)) {
  2515. jfs_error(ip->i_sb, "stack overrun!\n");
  2516. XT_PUTPAGE(mp);
  2517. return -EIO;
  2518. }
  2519. BT_PUSH(&btstack, bn, index);
  2520. /* get child page */
  2521. xad = &p->xad[index];
  2522. bn = addressXAD(xad);
  2523. /*
  2524. * first access of each internal entry:
  2525. */
  2526. /* release parent page */
  2527. XT_PUTPAGE(mp);
  2528. /* process the child page */
  2529. goto getPage;
  2530. out:
  2531. return 0;
  2532. }
  2533. #ifdef CONFIG_JFS_STATISTICS
  2534. int jfs_xtstat_proc_show(struct seq_file *m, void *v)
  2535. {
  2536. seq_printf(m,
  2537. "JFS Xtree statistics\n"
  2538. "====================\n"
  2539. "searches = %d\n"
  2540. "fast searches = %d\n"
  2541. "splits = %d\n",
  2542. xtStat.search,
  2543. xtStat.fastSearch,
  2544. xtStat.split);
  2545. return 0;
  2546. }
  2547. #endif