free-space-cache.c 116 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2008 Red Hat. All rights reserved.
  4. */
  5. #include <linux/pagemap.h>
  6. #include <linux/sched.h>
  7. #include <linux/sched/signal.h>
  8. #include <linux/slab.h>
  9. #include <linux/math64.h>
  10. #include <linux/ratelimit.h>
  11. #include <linux/error-injection.h>
  12. #include <linux/sched/mm.h>
  13. #include <linux/string_choices.h>
  14. #include "extent-tree.h"
  15. #include "fs.h"
  16. #include "messages.h"
  17. #include "misc.h"
  18. #include "free-space-cache.h"
  19. #include "transaction.h"
  20. #include "disk-io.h"
  21. #include "extent_io.h"
  22. #include "space-info.h"
  23. #include "block-group.h"
  24. #include "discard.h"
  25. #include "subpage.h"
  26. #include "inode-item.h"
  27. #include "accessors.h"
  28. #include "file-item.h"
  29. #include "file.h"
  30. #include "super.h"
  31. #include "relocation.h"
  32. #define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
  33. #define MAX_CACHE_BYTES_PER_GIG SZ_64K
  34. #define FORCE_EXTENT_THRESHOLD SZ_1M
  35. static struct kmem_cache *btrfs_free_space_cachep;
  36. static struct kmem_cache *btrfs_free_space_bitmap_cachep;
  37. struct btrfs_trim_range {
  38. u64 start;
  39. u64 bytes;
  40. struct list_head list;
  41. };
  42. static int link_free_space(struct btrfs_free_space_ctl *ctl,
  43. struct btrfs_free_space *info);
  44. static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
  45. struct btrfs_free_space *info, bool update_stat);
  46. static int search_bitmap(struct btrfs_free_space_ctl *ctl,
  47. struct btrfs_free_space *bitmap_info, u64 *offset,
  48. u64 *bytes, bool for_alloc);
  49. static void free_bitmap(struct btrfs_free_space_ctl *ctl,
  50. struct btrfs_free_space *bitmap_info);
  51. static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
  52. struct btrfs_free_space *info, u64 offset,
  53. u64 bytes, bool update_stats);
  54. static void btrfs_crc32c_final(u32 crc, u8 *result)
  55. {
  56. put_unaligned_le32(~crc, result);
  57. }
  58. static void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
  59. {
  60. struct btrfs_free_space *info;
  61. struct rb_node *node;
  62. while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
  63. info = rb_entry(node, struct btrfs_free_space, offset_index);
  64. if (!info->bitmap) {
  65. unlink_free_space(ctl, info, true);
  66. kmem_cache_free(btrfs_free_space_cachep, info);
  67. } else {
  68. free_bitmap(ctl, info);
  69. }
  70. cond_resched_lock(&ctl->tree_lock);
  71. }
  72. }
  73. static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
  74. struct btrfs_path *path,
  75. u64 offset)
  76. {
  77. struct btrfs_key key;
  78. struct btrfs_key location;
  79. struct btrfs_disk_key disk_key;
  80. struct btrfs_free_space_header *header;
  81. struct extent_buffer *leaf;
  82. struct btrfs_inode *inode;
  83. unsigned nofs_flag;
  84. int ret;
  85. key.objectid = BTRFS_FREE_SPACE_OBJECTID;
  86. key.type = 0;
  87. key.offset = offset;
  88. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  89. if (ret < 0)
  90. return ERR_PTR(ret);
  91. if (ret > 0) {
  92. btrfs_release_path(path);
  93. return ERR_PTR(-ENOENT);
  94. }
  95. leaf = path->nodes[0];
  96. header = btrfs_item_ptr(leaf, path->slots[0],
  97. struct btrfs_free_space_header);
  98. btrfs_free_space_key(leaf, header, &disk_key);
  99. btrfs_disk_key_to_cpu(&location, &disk_key);
  100. btrfs_release_path(path);
  101. /*
  102. * We are often under a trans handle at this point, so we need to make
  103. * sure NOFS is set to keep us from deadlocking.
  104. */
  105. nofs_flag = memalloc_nofs_save();
  106. inode = btrfs_iget_path(location.objectid, root, path);
  107. btrfs_release_path(path);
  108. memalloc_nofs_restore(nofs_flag);
  109. if (IS_ERR(inode))
  110. return ERR_CAST(inode);
  111. mapping_set_gfp_mask(inode->vfs_inode.i_mapping,
  112. mapping_gfp_constraint(inode->vfs_inode.i_mapping,
  113. ~(__GFP_FS | __GFP_HIGHMEM)));
  114. return &inode->vfs_inode;
  115. }
  116. struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
  117. struct btrfs_path *path)
  118. {
  119. struct btrfs_fs_info *fs_info = block_group->fs_info;
  120. struct inode *inode = NULL;
  121. u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
  122. spin_lock(&block_group->lock);
  123. if (block_group->inode)
  124. inode = igrab(&block_group->inode->vfs_inode);
  125. spin_unlock(&block_group->lock);
  126. if (inode)
  127. return inode;
  128. inode = __lookup_free_space_inode(fs_info->tree_root, path,
  129. block_group->start);
  130. if (IS_ERR(inode))
  131. return inode;
  132. spin_lock(&block_group->lock);
  133. if (!((BTRFS_I(inode)->flags & flags) == flags)) {
  134. btrfs_info(fs_info, "Old style space inode found, converting.");
  135. BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
  136. BTRFS_INODE_NODATACOW;
  137. block_group->disk_cache_state = BTRFS_DC_CLEAR;
  138. }
  139. if (!test_and_set_bit(BLOCK_GROUP_FLAG_IREF, &block_group->runtime_flags))
  140. block_group->inode = BTRFS_I(igrab(inode));
  141. spin_unlock(&block_group->lock);
  142. return inode;
  143. }
  144. static int __create_free_space_inode(struct btrfs_root *root,
  145. struct btrfs_trans_handle *trans,
  146. struct btrfs_path *path,
  147. u64 ino, u64 offset)
  148. {
  149. struct btrfs_key key;
  150. struct btrfs_disk_key disk_key;
  151. struct btrfs_free_space_header *header;
  152. struct btrfs_inode_item *inode_item;
  153. struct extent_buffer *leaf;
  154. /* We inline CRCs for the free disk space cache */
  155. const u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC |
  156. BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
  157. int ret;
  158. ret = btrfs_insert_empty_inode(trans, root, path, ino);
  159. if (ret)
  160. return ret;
  161. leaf = path->nodes[0];
  162. inode_item = btrfs_item_ptr(leaf, path->slots[0],
  163. struct btrfs_inode_item);
  164. btrfs_item_key(leaf, &disk_key, path->slots[0]);
  165. memzero_extent_buffer(leaf, (unsigned long)inode_item,
  166. sizeof(*inode_item));
  167. btrfs_set_inode_generation(leaf, inode_item, trans->transid);
  168. btrfs_set_inode_size(leaf, inode_item, 0);
  169. btrfs_set_inode_nbytes(leaf, inode_item, 0);
  170. btrfs_set_inode_uid(leaf, inode_item, 0);
  171. btrfs_set_inode_gid(leaf, inode_item, 0);
  172. btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
  173. btrfs_set_inode_flags(leaf, inode_item, flags);
  174. btrfs_set_inode_nlink(leaf, inode_item, 1);
  175. btrfs_set_inode_transid(leaf, inode_item, trans->transid);
  176. btrfs_set_inode_block_group(leaf, inode_item, offset);
  177. btrfs_release_path(path);
  178. key.objectid = BTRFS_FREE_SPACE_OBJECTID;
  179. key.type = 0;
  180. key.offset = offset;
  181. ret = btrfs_insert_empty_item(trans, root, path, &key,
  182. sizeof(struct btrfs_free_space_header));
  183. if (ret < 0) {
  184. btrfs_release_path(path);
  185. return ret;
  186. }
  187. leaf = path->nodes[0];
  188. header = btrfs_item_ptr(leaf, path->slots[0],
  189. struct btrfs_free_space_header);
  190. memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header));
  191. btrfs_set_free_space_key(leaf, header, &disk_key);
  192. btrfs_release_path(path);
  193. return 0;
  194. }
  195. int create_free_space_inode(struct btrfs_trans_handle *trans,
  196. struct btrfs_block_group *block_group,
  197. struct btrfs_path *path)
  198. {
  199. int ret;
  200. u64 ino;
  201. ret = btrfs_get_free_objectid(trans->fs_info->tree_root, &ino);
  202. if (ret < 0)
  203. return ret;
  204. return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
  205. ino, block_group->start);
  206. }
  207. /*
  208. * inode is an optional sink: if it is NULL, btrfs_remove_free_space_inode
  209. * handles lookup, otherwise it takes ownership and iputs the inode.
  210. * Don't reuse an inode pointer after passing it into this function.
  211. */
  212. int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
  213. struct inode *inode,
  214. struct btrfs_block_group *block_group)
  215. {
  216. BTRFS_PATH_AUTO_FREE(path);
  217. struct btrfs_key key;
  218. int ret = 0;
  219. path = btrfs_alloc_path();
  220. if (!path)
  221. return -ENOMEM;
  222. if (!inode)
  223. inode = lookup_free_space_inode(block_group, path);
  224. if (IS_ERR(inode)) {
  225. if (PTR_ERR(inode) != -ENOENT)
  226. ret = PTR_ERR(inode);
  227. return ret;
  228. }
  229. ret = btrfs_orphan_add(trans, BTRFS_I(inode));
  230. if (ret) {
  231. btrfs_add_delayed_iput(BTRFS_I(inode));
  232. return ret;
  233. }
  234. clear_nlink(inode);
  235. /* One for the block groups ref */
  236. spin_lock(&block_group->lock);
  237. if (test_and_clear_bit(BLOCK_GROUP_FLAG_IREF, &block_group->runtime_flags)) {
  238. block_group->inode = NULL;
  239. spin_unlock(&block_group->lock);
  240. iput(inode);
  241. } else {
  242. spin_unlock(&block_group->lock);
  243. }
  244. /* One for the lookup ref */
  245. btrfs_add_delayed_iput(BTRFS_I(inode));
  246. key.objectid = BTRFS_FREE_SPACE_OBJECTID;
  247. key.type = 0;
  248. key.offset = block_group->start;
  249. ret = btrfs_search_slot(trans, trans->fs_info->tree_root, &key, path,
  250. -1, 1);
  251. if (ret) {
  252. if (ret > 0)
  253. ret = 0;
  254. return ret;
  255. }
  256. return btrfs_del_item(trans, trans->fs_info->tree_root, path);
  257. }
  258. int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
  259. struct btrfs_block_group *block_group,
  260. struct inode *vfs_inode)
  261. {
  262. struct btrfs_truncate_control control = {
  263. .inode = BTRFS_I(vfs_inode),
  264. .new_size = 0,
  265. .ino = btrfs_ino(BTRFS_I(vfs_inode)),
  266. .min_type = BTRFS_EXTENT_DATA_KEY,
  267. .clear_extent_range = true,
  268. };
  269. struct btrfs_inode *inode = BTRFS_I(vfs_inode);
  270. struct btrfs_root *root = inode->root;
  271. struct extent_state *cached_state = NULL;
  272. int ret = 0;
  273. bool locked = false;
  274. if (block_group) {
  275. BTRFS_PATH_AUTO_FREE(path);
  276. path = btrfs_alloc_path();
  277. if (!path) {
  278. ret = -ENOMEM;
  279. goto fail;
  280. }
  281. locked = true;
  282. mutex_lock(&trans->transaction->cache_write_mutex);
  283. if (!list_empty(&block_group->io_list)) {
  284. list_del_init(&block_group->io_list);
  285. btrfs_wait_cache_io(trans, block_group, path);
  286. btrfs_put_block_group(block_group);
  287. }
  288. /*
  289. * now that we've truncated the cache away, its no longer
  290. * setup or written
  291. */
  292. spin_lock(&block_group->lock);
  293. block_group->disk_cache_state = BTRFS_DC_CLEAR;
  294. spin_unlock(&block_group->lock);
  295. }
  296. btrfs_i_size_write(inode, 0);
  297. truncate_pagecache(vfs_inode, 0);
  298. btrfs_lock_extent(&inode->io_tree, 0, (u64)-1, &cached_state);
  299. btrfs_drop_extent_map_range(inode, 0, (u64)-1, false);
  300. /*
  301. * We skip the throttling logic for free space cache inodes, so we don't
  302. * need to check for -EAGAIN.
  303. */
  304. ret = btrfs_truncate_inode_items(trans, root, &control);
  305. inode_sub_bytes(&inode->vfs_inode, control.sub_bytes);
  306. btrfs_inode_safe_disk_i_size_write(inode, control.last_size);
  307. btrfs_unlock_extent(&inode->io_tree, 0, (u64)-1, &cached_state);
  308. if (ret)
  309. goto fail;
  310. ret = btrfs_update_inode(trans, inode);
  311. fail:
  312. if (locked)
  313. mutex_unlock(&trans->transaction->cache_write_mutex);
  314. if (ret)
  315. btrfs_abort_transaction(trans, ret);
  316. return ret;
  317. }
  318. static void readahead_cache(struct inode *inode)
  319. {
  320. struct file_ra_state ra;
  321. pgoff_t last_index;
  322. file_ra_state_init(&ra, inode->i_mapping);
  323. last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
  324. page_cache_sync_readahead(inode->i_mapping, &ra, NULL, 0, last_index);
  325. }
  326. static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
  327. int write)
  328. {
  329. int num_pages;
  330. num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
  331. /* Make sure we can fit our crcs and generation into the first page */
  332. if (write && (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
  333. return -ENOSPC;
  334. memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
  335. io_ctl->pages = kzalloc_objs(struct page *, num_pages, GFP_NOFS);
  336. if (!io_ctl->pages)
  337. return -ENOMEM;
  338. io_ctl->num_pages = num_pages;
  339. io_ctl->fs_info = inode_to_fs_info(inode);
  340. io_ctl->inode = inode;
  341. return 0;
  342. }
  343. ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO);
  344. static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
  345. {
  346. kfree(io_ctl->pages);
  347. io_ctl->pages = NULL;
  348. }
  349. static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
  350. {
  351. if (io_ctl->cur) {
  352. io_ctl->cur = NULL;
  353. io_ctl->orig = NULL;
  354. }
  355. }
  356. static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
  357. {
  358. ASSERT(io_ctl->index < io_ctl->num_pages);
  359. io_ctl->page = io_ctl->pages[io_ctl->index++];
  360. io_ctl->cur = page_address(io_ctl->page);
  361. io_ctl->orig = io_ctl->cur;
  362. io_ctl->size = PAGE_SIZE;
  363. if (clear)
  364. clear_page(io_ctl->cur);
  365. }
  366. static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
  367. {
  368. int i;
  369. io_ctl_unmap_page(io_ctl);
  370. for (i = 0; i < io_ctl->num_pages; i++) {
  371. if (io_ctl->pages[i]) {
  372. btrfs_folio_clear_checked(io_ctl->fs_info,
  373. page_folio(io_ctl->pages[i]),
  374. page_offset(io_ctl->pages[i]),
  375. PAGE_SIZE);
  376. unlock_page(io_ctl->pages[i]);
  377. put_page(io_ctl->pages[i]);
  378. }
  379. }
  380. }
  381. static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
  382. {
  383. struct folio *folio;
  384. struct inode *inode = io_ctl->inode;
  385. gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
  386. int i;
  387. for (i = 0; i < io_ctl->num_pages; i++) {
  388. int ret;
  389. folio = __filemap_get_folio(inode->i_mapping, i,
  390. FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
  391. mask);
  392. if (IS_ERR(folio)) {
  393. io_ctl_drop_pages(io_ctl);
  394. return PTR_ERR(folio);
  395. }
  396. ret = set_folio_extent_mapped(folio);
  397. if (ret < 0) {
  398. folio_unlock(folio);
  399. folio_put(folio);
  400. io_ctl_drop_pages(io_ctl);
  401. return ret;
  402. }
  403. io_ctl->pages[i] = &folio->page;
  404. if (uptodate && !folio_test_uptodate(folio)) {
  405. btrfs_read_folio(NULL, folio);
  406. folio_lock(folio);
  407. if (folio->mapping != inode->i_mapping) {
  408. btrfs_err(BTRFS_I(inode)->root->fs_info,
  409. "free space cache page truncated");
  410. io_ctl_drop_pages(io_ctl);
  411. return -EIO;
  412. }
  413. if (!folio_test_uptodate(folio)) {
  414. btrfs_err(BTRFS_I(inode)->root->fs_info,
  415. "error reading free space cache");
  416. io_ctl_drop_pages(io_ctl);
  417. return -EIO;
  418. }
  419. }
  420. }
  421. for (i = 0; i < io_ctl->num_pages; i++)
  422. clear_page_dirty_for_io(io_ctl->pages[i]);
  423. return 0;
  424. }
  425. static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
  426. {
  427. io_ctl_map_page(io_ctl, 1);
  428. /*
  429. * Skip the csum areas. If we don't check crcs then we just have a
  430. * 64bit chunk at the front of the first page.
  431. */
  432. io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
  433. io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
  434. put_unaligned_le64(generation, io_ctl->cur);
  435. io_ctl->cur += sizeof(u64);
  436. }
  437. static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
  438. {
  439. u64 cache_gen;
  440. /*
  441. * Skip the crc area. If we don't check crcs then we just have a 64bit
  442. * chunk at the front of the first page.
  443. */
  444. io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
  445. io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
  446. cache_gen = get_unaligned_le64(io_ctl->cur);
  447. if (cache_gen != generation) {
  448. btrfs_err_rl(io_ctl->fs_info,
  449. "space cache generation (%llu) does not match inode (%llu)",
  450. cache_gen, generation);
  451. io_ctl_unmap_page(io_ctl);
  452. return -EIO;
  453. }
  454. io_ctl->cur += sizeof(u64);
  455. return 0;
  456. }
  457. static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
  458. {
  459. u32 *tmp;
  460. u32 crc = ~(u32)0;
  461. unsigned offset = 0;
  462. if (index == 0)
  463. offset = sizeof(u32) * io_ctl->num_pages;
  464. crc = crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
  465. btrfs_crc32c_final(crc, (u8 *)&crc);
  466. io_ctl_unmap_page(io_ctl);
  467. tmp = page_address(io_ctl->pages[0]);
  468. tmp += index;
  469. *tmp = crc;
  470. }
  471. static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
  472. {
  473. u32 *tmp, val;
  474. u32 crc = ~(u32)0;
  475. unsigned offset = 0;
  476. if (index == 0)
  477. offset = sizeof(u32) * io_ctl->num_pages;
  478. tmp = page_address(io_ctl->pages[0]);
  479. tmp += index;
  480. val = *tmp;
  481. io_ctl_map_page(io_ctl, 0);
  482. crc = crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
  483. btrfs_crc32c_final(crc, (u8 *)&crc);
  484. if (val != crc) {
  485. btrfs_err_rl(io_ctl->fs_info,
  486. "csum mismatch on free space cache");
  487. io_ctl_unmap_page(io_ctl);
  488. return -EIO;
  489. }
  490. return 0;
  491. }
  492. static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
  493. void *bitmap)
  494. {
  495. struct btrfs_free_space_entry *entry;
  496. if (!io_ctl->cur)
  497. return -ENOSPC;
  498. entry = io_ctl->cur;
  499. put_unaligned_le64(offset, &entry->offset);
  500. put_unaligned_le64(bytes, &entry->bytes);
  501. entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
  502. BTRFS_FREE_SPACE_EXTENT;
  503. io_ctl->cur += sizeof(struct btrfs_free_space_entry);
  504. io_ctl->size -= sizeof(struct btrfs_free_space_entry);
  505. if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
  506. return 0;
  507. io_ctl_set_crc(io_ctl, io_ctl->index - 1);
  508. /* No more pages to map */
  509. if (io_ctl->index >= io_ctl->num_pages)
  510. return 0;
  511. /* map the next page */
  512. io_ctl_map_page(io_ctl, 1);
  513. return 0;
  514. }
  515. static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
  516. {
  517. if (!io_ctl->cur)
  518. return -ENOSPC;
  519. /*
  520. * If we aren't at the start of the current page, unmap this one and
  521. * map the next one if there is any left.
  522. */
  523. if (io_ctl->cur != io_ctl->orig) {
  524. io_ctl_set_crc(io_ctl, io_ctl->index - 1);
  525. if (io_ctl->index >= io_ctl->num_pages)
  526. return -ENOSPC;
  527. io_ctl_map_page(io_ctl, 0);
  528. }
  529. copy_page(io_ctl->cur, bitmap);
  530. io_ctl_set_crc(io_ctl, io_ctl->index - 1);
  531. if (io_ctl->index < io_ctl->num_pages)
  532. io_ctl_map_page(io_ctl, 0);
  533. return 0;
  534. }
  535. static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
  536. {
  537. /*
  538. * If we're not on the boundary we know we've modified the page and we
  539. * need to crc the page.
  540. */
  541. if (io_ctl->cur != io_ctl->orig)
  542. io_ctl_set_crc(io_ctl, io_ctl->index - 1);
  543. else
  544. io_ctl_unmap_page(io_ctl);
  545. while (io_ctl->index < io_ctl->num_pages) {
  546. io_ctl_map_page(io_ctl, 1);
  547. io_ctl_set_crc(io_ctl, io_ctl->index - 1);
  548. }
  549. }
  550. static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
  551. struct btrfs_free_space *entry, u8 *type)
  552. {
  553. struct btrfs_free_space_entry *e;
  554. int ret;
  555. if (!io_ctl->cur) {
  556. ret = io_ctl_check_crc(io_ctl, io_ctl->index);
  557. if (ret)
  558. return ret;
  559. }
  560. e = io_ctl->cur;
  561. entry->offset = get_unaligned_le64(&e->offset);
  562. entry->bytes = get_unaligned_le64(&e->bytes);
  563. *type = e->type;
  564. io_ctl->cur += sizeof(struct btrfs_free_space_entry);
  565. io_ctl->size -= sizeof(struct btrfs_free_space_entry);
  566. if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
  567. return 0;
  568. io_ctl_unmap_page(io_ctl);
  569. return 0;
  570. }
  571. static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
  572. struct btrfs_free_space *entry)
  573. {
  574. int ret;
  575. ret = io_ctl_check_crc(io_ctl, io_ctl->index);
  576. if (ret)
  577. return ret;
  578. copy_page(entry->bitmap, io_ctl->cur);
  579. io_ctl_unmap_page(io_ctl);
  580. return 0;
  581. }
  582. static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
  583. {
  584. struct btrfs_block_group *block_group = ctl->block_group;
  585. u64 max_bytes;
  586. u64 bitmap_bytes;
  587. u64 extent_bytes;
  588. u64 size = block_group->length;
  589. u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
  590. u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
  591. max_bitmaps = max_t(u64, max_bitmaps, 1);
  592. if (ctl->total_bitmaps > max_bitmaps)
  593. btrfs_err(block_group->fs_info,
  594. "invalid free space control: bg start=%llu len=%llu total_bitmaps=%u unit=%u max_bitmaps=%llu bytes_per_bg=%llu",
  595. block_group->start, block_group->length,
  596. ctl->total_bitmaps, ctl->unit, max_bitmaps,
  597. bytes_per_bg);
  598. ASSERT(ctl->total_bitmaps <= max_bitmaps);
  599. /*
  600. * We are trying to keep the total amount of memory used per 1GiB of
  601. * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation
  602. * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
  603. * bitmaps, we may end up using more memory than this.
  604. */
  605. if (size < SZ_1G)
  606. max_bytes = MAX_CACHE_BYTES_PER_GIG;
  607. else
  608. max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
  609. bitmap_bytes = ctl->total_bitmaps * ctl->unit;
  610. /*
  611. * we want the extent entry threshold to always be at most 1/2 the max
  612. * bytes we can have, or whatever is less than that.
  613. */
  614. extent_bytes = max_bytes - bitmap_bytes;
  615. extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
  616. ctl->extents_thresh =
  617. div_u64(extent_bytes, sizeof(struct btrfs_free_space));
  618. }
  619. static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
  620. struct btrfs_free_space_ctl *ctl,
  621. struct btrfs_path *path, u64 offset)
  622. {
  623. struct btrfs_fs_info *fs_info = root->fs_info;
  624. struct btrfs_free_space_header *header;
  625. struct extent_buffer *leaf;
  626. struct btrfs_io_ctl io_ctl;
  627. struct btrfs_key key;
  628. struct btrfs_free_space *e, *n;
  629. LIST_HEAD(bitmaps);
  630. u64 num_entries;
  631. u64 num_bitmaps;
  632. u64 generation;
  633. u8 type;
  634. int ret = 0;
  635. /* Nothing in the space cache, goodbye */
  636. if (!i_size_read(inode))
  637. return 0;
  638. key.objectid = BTRFS_FREE_SPACE_OBJECTID;
  639. key.type = 0;
  640. key.offset = offset;
  641. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  642. if (ret < 0)
  643. return 0;
  644. else if (ret > 0) {
  645. btrfs_release_path(path);
  646. return 0;
  647. }
  648. ret = -1;
  649. leaf = path->nodes[0];
  650. header = btrfs_item_ptr(leaf, path->slots[0],
  651. struct btrfs_free_space_header);
  652. num_entries = btrfs_free_space_entries(leaf, header);
  653. num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
  654. generation = btrfs_free_space_generation(leaf, header);
  655. btrfs_release_path(path);
  656. if (!BTRFS_I(inode)->generation) {
  657. btrfs_info(fs_info,
  658. "the free space cache file (%llu) is invalid, skip it",
  659. offset);
  660. return 0;
  661. }
  662. if (BTRFS_I(inode)->generation != generation) {
  663. btrfs_err(fs_info,
  664. "free space inode generation (%llu) did not match free space cache generation (%llu)",
  665. BTRFS_I(inode)->generation, generation);
  666. return 0;
  667. }
  668. if (!num_entries)
  669. return 0;
  670. ret = io_ctl_init(&io_ctl, inode, 0);
  671. if (ret)
  672. return ret;
  673. readahead_cache(inode);
  674. ret = io_ctl_prepare_pages(&io_ctl, true);
  675. if (ret)
  676. goto out;
  677. ret = io_ctl_check_crc(&io_ctl, 0);
  678. if (ret)
  679. goto free_cache;
  680. ret = io_ctl_check_generation(&io_ctl, generation);
  681. if (ret)
  682. goto free_cache;
  683. while (num_entries) {
  684. e = kmem_cache_zalloc(btrfs_free_space_cachep,
  685. GFP_NOFS);
  686. if (!e) {
  687. ret = -ENOMEM;
  688. goto free_cache;
  689. }
  690. ret = io_ctl_read_entry(&io_ctl, e, &type);
  691. if (ret) {
  692. kmem_cache_free(btrfs_free_space_cachep, e);
  693. goto free_cache;
  694. }
  695. if (!e->bytes) {
  696. ret = -1;
  697. kmem_cache_free(btrfs_free_space_cachep, e);
  698. goto free_cache;
  699. }
  700. if (type == BTRFS_FREE_SPACE_EXTENT) {
  701. spin_lock(&ctl->tree_lock);
  702. ret = link_free_space(ctl, e);
  703. spin_unlock(&ctl->tree_lock);
  704. if (ret) {
  705. btrfs_err(fs_info,
  706. "Duplicate entries in free space cache, dumping");
  707. kmem_cache_free(btrfs_free_space_cachep, e);
  708. goto free_cache;
  709. }
  710. } else {
  711. ASSERT(num_bitmaps);
  712. num_bitmaps--;
  713. e->bitmap = kmem_cache_zalloc(
  714. btrfs_free_space_bitmap_cachep, GFP_NOFS);
  715. if (!e->bitmap) {
  716. ret = -ENOMEM;
  717. kmem_cache_free(
  718. btrfs_free_space_cachep, e);
  719. goto free_cache;
  720. }
  721. spin_lock(&ctl->tree_lock);
  722. ret = link_free_space(ctl, e);
  723. if (ret) {
  724. spin_unlock(&ctl->tree_lock);
  725. btrfs_err(fs_info,
  726. "Duplicate entries in free space cache, dumping");
  727. kmem_cache_free(btrfs_free_space_bitmap_cachep, e->bitmap);
  728. kmem_cache_free(btrfs_free_space_cachep, e);
  729. goto free_cache;
  730. }
  731. ctl->total_bitmaps++;
  732. recalculate_thresholds(ctl);
  733. spin_unlock(&ctl->tree_lock);
  734. list_add_tail(&e->list, &bitmaps);
  735. }
  736. num_entries--;
  737. }
  738. io_ctl_unmap_page(&io_ctl);
  739. /*
  740. * We add the bitmaps at the end of the entries in order that
  741. * the bitmap entries are added to the cache.
  742. */
  743. list_for_each_entry_safe(e, n, &bitmaps, list) {
  744. list_del_init(&e->list);
  745. ret = io_ctl_read_bitmap(&io_ctl, e);
  746. if (ret)
  747. goto free_cache;
  748. }
  749. io_ctl_drop_pages(&io_ctl);
  750. ret = 1;
  751. out:
  752. io_ctl_free(&io_ctl);
  753. return ret;
  754. free_cache:
  755. io_ctl_drop_pages(&io_ctl);
  756. spin_lock(&ctl->tree_lock);
  757. __btrfs_remove_free_space_cache(ctl);
  758. spin_unlock(&ctl->tree_lock);
  759. goto out;
  760. }
  761. static int copy_free_space_cache(struct btrfs_block_group *block_group,
  762. struct btrfs_free_space_ctl *ctl)
  763. {
  764. struct btrfs_free_space *info;
  765. struct rb_node *n;
  766. int ret = 0;
  767. while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) {
  768. info = rb_entry(n, struct btrfs_free_space, offset_index);
  769. if (!info->bitmap) {
  770. const u64 offset = info->offset;
  771. const u64 bytes = info->bytes;
  772. unlink_free_space(ctl, info, true);
  773. spin_unlock(&ctl->tree_lock);
  774. kmem_cache_free(btrfs_free_space_cachep, info);
  775. ret = btrfs_add_free_space(block_group, offset, bytes);
  776. spin_lock(&ctl->tree_lock);
  777. } else {
  778. u64 offset = info->offset;
  779. u64 bytes = ctl->unit;
  780. ret = search_bitmap(ctl, info, &offset, &bytes, false);
  781. if (ret == 0) {
  782. bitmap_clear_bits(ctl, info, offset, bytes, true);
  783. spin_unlock(&ctl->tree_lock);
  784. ret = btrfs_add_free_space(block_group, offset,
  785. bytes);
  786. spin_lock(&ctl->tree_lock);
  787. } else {
  788. free_bitmap(ctl, info);
  789. ret = 0;
  790. }
  791. }
  792. cond_resched_lock(&ctl->tree_lock);
  793. }
  794. return ret;
  795. }
  796. static struct lock_class_key btrfs_free_space_inode_key;
  797. int load_free_space_cache(struct btrfs_block_group *block_group)
  798. {
  799. struct btrfs_fs_info *fs_info = block_group->fs_info;
  800. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  801. struct btrfs_free_space_ctl tmp_ctl = {};
  802. struct inode *inode;
  803. struct btrfs_path *path;
  804. int ret = 0;
  805. bool matched;
  806. u64 used = block_group->used;
  807. /*
  808. * Because we could potentially discard our loaded free space, we want
  809. * to load everything into a temporary structure first, and then if it's
  810. * valid copy it all into the actual free space ctl.
  811. */
  812. btrfs_init_free_space_ctl(block_group, &tmp_ctl);
  813. /*
  814. * If this block group has been marked to be cleared for one reason or
  815. * another then we can't trust the on disk cache, so just return.
  816. */
  817. spin_lock(&block_group->lock);
  818. if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
  819. spin_unlock(&block_group->lock);
  820. return 0;
  821. }
  822. spin_unlock(&block_group->lock);
  823. path = btrfs_alloc_path();
  824. if (!path)
  825. return 0;
  826. path->search_commit_root = true;
  827. path->skip_locking = true;
  828. /*
  829. * We must pass a path with search_commit_root set to btrfs_iget in
  830. * order to avoid a deadlock when allocating extents for the tree root.
  831. *
  832. * When we are COWing an extent buffer from the tree root, when looking
  833. * for a free extent, at extent-tree.c:find_free_extent(), we can find
  834. * block group without its free space cache loaded. When we find one
  835. * we must load its space cache which requires reading its free space
  836. * cache's inode item from the root tree. If this inode item is located
  837. * in the same leaf that we started COWing before, then we end up in
  838. * deadlock on the extent buffer (trying to read lock it when we
  839. * previously write locked it).
  840. *
  841. * It's safe to read the inode item using the commit root because
  842. * block groups, once loaded, stay in memory forever (until they are
  843. * removed) as well as their space caches once loaded. New block groups
  844. * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
  845. * we will never try to read their inode item while the fs is mounted.
  846. */
  847. inode = lookup_free_space_inode(block_group, path);
  848. if (IS_ERR(inode)) {
  849. btrfs_free_path(path);
  850. return 0;
  851. }
  852. /* We may have converted the inode and made the cache invalid. */
  853. spin_lock(&block_group->lock);
  854. if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
  855. spin_unlock(&block_group->lock);
  856. btrfs_free_path(path);
  857. goto out;
  858. }
  859. spin_unlock(&block_group->lock);
  860. /*
  861. * Reinitialize the class of struct inode's mapping->invalidate_lock for
  862. * free space inodes to prevent false positives related to locks for normal
  863. * inodes.
  864. */
  865. lockdep_set_class(&(&inode->i_data)->invalidate_lock,
  866. &btrfs_free_space_inode_key);
  867. ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl,
  868. path, block_group->start);
  869. btrfs_free_path(path);
  870. if (ret <= 0)
  871. goto out;
  872. matched = (tmp_ctl.free_space == (block_group->length - used -
  873. block_group->bytes_super));
  874. if (matched) {
  875. spin_lock(&tmp_ctl.tree_lock);
  876. ret = copy_free_space_cache(block_group, &tmp_ctl);
  877. spin_unlock(&tmp_ctl.tree_lock);
  878. /*
  879. * ret == 1 means we successfully loaded the free space cache,
  880. * so we need to re-set it here.
  881. */
  882. if (ret == 0)
  883. ret = 1;
  884. } else {
  885. /*
  886. * We need to call the _locked variant so we don't try to update
  887. * the discard counters.
  888. */
  889. spin_lock(&tmp_ctl.tree_lock);
  890. __btrfs_remove_free_space_cache(&tmp_ctl);
  891. spin_unlock(&tmp_ctl.tree_lock);
  892. btrfs_warn(fs_info,
  893. "block group %llu has wrong amount of free space",
  894. block_group->start);
  895. ret = -1;
  896. }
  897. out:
  898. if (ret < 0) {
  899. /* This cache is bogus, make sure it gets cleared */
  900. spin_lock(&block_group->lock);
  901. block_group->disk_cache_state = BTRFS_DC_CLEAR;
  902. spin_unlock(&block_group->lock);
  903. ret = 0;
  904. btrfs_warn(fs_info,
  905. "failed to load free space cache for block group %llu, rebuilding it now",
  906. block_group->start);
  907. }
  908. spin_lock(&ctl->tree_lock);
  909. btrfs_discard_update_discardable(block_group);
  910. spin_unlock(&ctl->tree_lock);
  911. iput(inode);
  912. return ret;
  913. }
  914. static noinline_for_stack
  915. int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
  916. struct btrfs_free_space_ctl *ctl,
  917. struct btrfs_block_group *block_group,
  918. int *entries, int *bitmaps,
  919. struct list_head *bitmap_list)
  920. {
  921. int ret;
  922. struct btrfs_free_cluster *cluster = NULL;
  923. struct btrfs_free_cluster *cluster_locked = NULL;
  924. struct rb_node *node = rb_first(&ctl->free_space_offset);
  925. struct btrfs_trim_range *trim_entry;
  926. /* Get the cluster for this block_group if it exists */
  927. if (!list_empty(&block_group->cluster_list)) {
  928. cluster = list_first_entry(&block_group->cluster_list,
  929. struct btrfs_free_cluster, block_group_list);
  930. }
  931. if (!node && cluster) {
  932. cluster_locked = cluster;
  933. spin_lock(&cluster_locked->lock);
  934. node = rb_first(&cluster->root);
  935. cluster = NULL;
  936. }
  937. /* Write out the extent entries */
  938. while (node) {
  939. struct btrfs_free_space *e;
  940. e = rb_entry(node, struct btrfs_free_space, offset_index);
  941. *entries += 1;
  942. ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes,
  943. e->bitmap);
  944. if (ret)
  945. goto fail;
  946. if (e->bitmap) {
  947. list_add_tail(&e->list, bitmap_list);
  948. *bitmaps += 1;
  949. }
  950. node = rb_next(node);
  951. if (!node && cluster) {
  952. node = rb_first(&cluster->root);
  953. cluster_locked = cluster;
  954. spin_lock(&cluster_locked->lock);
  955. cluster = NULL;
  956. }
  957. }
  958. if (cluster_locked) {
  959. spin_unlock(&cluster_locked->lock);
  960. cluster_locked = NULL;
  961. }
  962. /*
  963. * Make sure we don't miss any range that was removed from our rbtree
  964. * because trimming is running. Otherwise after a umount+mount (or crash
  965. * after committing the transaction) we would leak free space and get
  966. * an inconsistent free space cache report from fsck.
  967. */
  968. list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) {
  969. ret = io_ctl_add_entry(io_ctl, trim_entry->start,
  970. trim_entry->bytes, NULL);
  971. if (ret)
  972. goto fail;
  973. *entries += 1;
  974. }
  975. return 0;
  976. fail:
  977. if (cluster_locked)
  978. spin_unlock(&cluster_locked->lock);
  979. return -ENOSPC;
  980. }
  981. static noinline_for_stack int
  982. update_cache_item(struct btrfs_trans_handle *trans,
  983. struct btrfs_root *root,
  984. struct inode *inode,
  985. struct btrfs_path *path, u64 offset,
  986. int entries, int bitmaps)
  987. {
  988. struct btrfs_key key;
  989. struct btrfs_free_space_header *header;
  990. struct extent_buffer *leaf;
  991. int ret;
  992. key.objectid = BTRFS_FREE_SPACE_OBJECTID;
  993. key.type = 0;
  994. key.offset = offset;
  995. ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
  996. if (ret < 0) {
  997. btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
  998. EXTENT_DELALLOC, NULL);
  999. return ret;
  1000. }
  1001. leaf = path->nodes[0];
  1002. if (ret > 0) {
  1003. struct btrfs_key found_key;
  1004. ASSERT(path->slots[0]);
  1005. path->slots[0]--;
  1006. btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
  1007. if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
  1008. found_key.offset != offset) {
  1009. btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
  1010. inode->i_size - 1, EXTENT_DELALLOC,
  1011. NULL);
  1012. btrfs_release_path(path);
  1013. return -ENOENT;
  1014. }
  1015. }
  1016. BTRFS_I(inode)->generation = trans->transid;
  1017. header = btrfs_item_ptr(leaf, path->slots[0],
  1018. struct btrfs_free_space_header);
  1019. btrfs_set_free_space_entries(leaf, header, entries);
  1020. btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
  1021. btrfs_set_free_space_generation(leaf, header, trans->transid);
  1022. btrfs_release_path(path);
  1023. return 0;
  1024. }
  1025. static noinline_for_stack int write_pinned_extent_entries(
  1026. struct btrfs_trans_handle *trans,
  1027. struct btrfs_block_group *block_group,
  1028. struct btrfs_io_ctl *io_ctl,
  1029. int *entries)
  1030. {
  1031. u64 start, extent_start, extent_end, len;
  1032. const u64 block_group_end = btrfs_block_group_end(block_group);
  1033. struct extent_io_tree *unpin = NULL;
  1034. int ret;
  1035. /*
  1036. * We want to add any pinned extents to our free space cache
  1037. * so we don't leak the space
  1038. *
  1039. * We shouldn't have switched the pinned extents yet so this is the
  1040. * right one
  1041. */
  1042. unpin = &trans->transaction->pinned_extents;
  1043. start = block_group->start;
  1044. while (start < block_group_end) {
  1045. if (!btrfs_find_first_extent_bit(unpin, start,
  1046. &extent_start, &extent_end,
  1047. EXTENT_DIRTY, NULL))
  1048. return 0;
  1049. /* This pinned extent is out of our range */
  1050. if (extent_start >= block_group_end)
  1051. return 0;
  1052. extent_start = max(extent_start, start);
  1053. extent_end = min(block_group_end, extent_end + 1);
  1054. len = extent_end - extent_start;
  1055. *entries += 1;
  1056. ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL);
  1057. if (ret)
  1058. return -ENOSPC;
  1059. start = extent_end;
  1060. }
  1061. return 0;
  1062. }
  1063. static noinline_for_stack int
  1064. write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
  1065. {
  1066. struct btrfs_free_space *entry, *next;
  1067. int ret;
  1068. /* Write out the bitmaps */
  1069. list_for_each_entry_safe(entry, next, bitmap_list, list) {
  1070. ret = io_ctl_add_bitmap(io_ctl, entry->bitmap);
  1071. if (ret)
  1072. return -ENOSPC;
  1073. list_del_init(&entry->list);
  1074. }
  1075. return 0;
  1076. }
  1077. static int flush_dirty_cache(struct inode *inode)
  1078. {
  1079. int ret;
  1080. ret = btrfs_wait_ordered_range(BTRFS_I(inode), 0, (u64)-1);
  1081. if (ret)
  1082. btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
  1083. EXTENT_DELALLOC, NULL);
  1084. return ret;
  1085. }
  1086. static void noinline_for_stack
  1087. cleanup_bitmap_list(struct list_head *bitmap_list)
  1088. {
  1089. struct btrfs_free_space *entry, *next;
  1090. list_for_each_entry_safe(entry, next, bitmap_list, list)
  1091. list_del_init(&entry->list);
  1092. }
  1093. static void noinline_for_stack
  1094. cleanup_write_cache_enospc(struct inode *inode,
  1095. struct btrfs_io_ctl *io_ctl,
  1096. struct extent_state **cached_state)
  1097. {
  1098. io_ctl_drop_pages(io_ctl);
  1099. btrfs_unlock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
  1100. cached_state);
  1101. }
  1102. static int __btrfs_wait_cache_io(struct btrfs_root *root,
  1103. struct btrfs_trans_handle *trans,
  1104. struct btrfs_block_group *block_group,
  1105. struct btrfs_io_ctl *io_ctl,
  1106. struct btrfs_path *path, u64 offset)
  1107. {
  1108. int ret;
  1109. struct inode *inode = io_ctl->inode;
  1110. if (!inode)
  1111. return 0;
  1112. /* Flush the dirty pages in the cache file. */
  1113. ret = flush_dirty_cache(inode);
  1114. if (ret)
  1115. goto out;
  1116. /* Update the cache item to tell everyone this cache file is valid. */
  1117. ret = update_cache_item(trans, root, inode, path, offset,
  1118. io_ctl->entries, io_ctl->bitmaps);
  1119. out:
  1120. if (ret) {
  1121. invalidate_inode_pages2(inode->i_mapping);
  1122. BTRFS_I(inode)->generation = 0;
  1123. if (block_group)
  1124. btrfs_debug(root->fs_info,
  1125. "failed to write free space cache for block group %llu error %d",
  1126. block_group->start, ret);
  1127. }
  1128. btrfs_update_inode(trans, BTRFS_I(inode));
  1129. if (block_group) {
  1130. /* the dirty list is protected by the dirty_bgs_lock */
  1131. spin_lock(&trans->transaction->dirty_bgs_lock);
  1132. /* the disk_cache_state is protected by the block group lock */
  1133. spin_lock(&block_group->lock);
  1134. /*
  1135. * only mark this as written if we didn't get put back on
  1136. * the dirty list while waiting for IO. Otherwise our
  1137. * cache state won't be right, and we won't get written again
  1138. */
  1139. if (!ret && list_empty(&block_group->dirty_list))
  1140. block_group->disk_cache_state = BTRFS_DC_WRITTEN;
  1141. else if (ret)
  1142. block_group->disk_cache_state = BTRFS_DC_ERROR;
  1143. spin_unlock(&block_group->lock);
  1144. spin_unlock(&trans->transaction->dirty_bgs_lock);
  1145. io_ctl->inode = NULL;
  1146. iput(inode);
  1147. }
  1148. return ret;
  1149. }
  1150. int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
  1151. struct btrfs_block_group *block_group,
  1152. struct btrfs_path *path)
  1153. {
  1154. return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans,
  1155. block_group, &block_group->io_ctl,
  1156. path, block_group->start);
  1157. }
  1158. /*
  1159. * Write out cached info to an inode.
  1160. *
  1161. * @inode: freespace inode we are writing out
  1162. * @ctl: free space cache we are going to write out
  1163. * @block_group: block_group for this cache if it belongs to a block_group
  1164. * @io_ctl: holds context for the io
  1165. * @trans: the trans handle
  1166. *
  1167. * This function writes out a free space cache struct to disk for quick recovery
  1168. * on mount. This will return 0 if it was successful in writing the cache out,
  1169. * or an errno if it was not.
  1170. */
  1171. static int __btrfs_write_out_cache(struct inode *inode,
  1172. struct btrfs_free_space_ctl *ctl,
  1173. struct btrfs_block_group *block_group,
  1174. struct btrfs_trans_handle *trans)
  1175. {
  1176. struct btrfs_io_ctl *io_ctl = &block_group->io_ctl;
  1177. struct extent_state *cached_state = NULL;
  1178. LIST_HEAD(bitmap_list);
  1179. int entries = 0;
  1180. int bitmaps = 0;
  1181. int ret;
  1182. int must_iput = 0;
  1183. int i_size;
  1184. if (!i_size_read(inode))
  1185. return -EIO;
  1186. WARN_ON(io_ctl->pages);
  1187. ret = io_ctl_init(io_ctl, inode, 1);
  1188. if (ret)
  1189. return ret;
  1190. if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) {
  1191. down_write(&block_group->data_rwsem);
  1192. spin_lock(&block_group->lock);
  1193. if (block_group->delalloc_bytes) {
  1194. block_group->disk_cache_state = BTRFS_DC_WRITTEN;
  1195. spin_unlock(&block_group->lock);
  1196. up_write(&block_group->data_rwsem);
  1197. BTRFS_I(inode)->generation = 0;
  1198. ret = 0;
  1199. must_iput = 1;
  1200. goto out;
  1201. }
  1202. spin_unlock(&block_group->lock);
  1203. }
  1204. /* Lock all pages first so we can lock the extent safely. */
  1205. ret = io_ctl_prepare_pages(io_ctl, false);
  1206. if (ret)
  1207. goto out_unlock;
  1208. btrfs_lock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
  1209. &cached_state);
  1210. io_ctl_set_generation(io_ctl, trans->transid);
  1211. mutex_lock(&ctl->cache_writeout_mutex);
  1212. /* Write out the extent entries in the free space cache */
  1213. spin_lock(&ctl->tree_lock);
  1214. ret = write_cache_extent_entries(io_ctl, ctl,
  1215. block_group, &entries, &bitmaps,
  1216. &bitmap_list);
  1217. if (ret)
  1218. goto out_nospc_locked;
  1219. /*
  1220. * Some spaces that are freed in the current transaction are pinned,
  1221. * they will be added into free space cache after the transaction is
  1222. * committed, we shouldn't lose them.
  1223. *
  1224. * If this changes while we are working we'll get added back to
  1225. * the dirty list and redo it. No locking needed
  1226. */
  1227. ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries);
  1228. if (ret)
  1229. goto out_nospc_locked;
  1230. /*
  1231. * At last, we write out all the bitmaps and keep cache_writeout_mutex
  1232. * locked while doing it because a concurrent trim can be manipulating
  1233. * or freeing the bitmap.
  1234. */
  1235. ret = write_bitmap_entries(io_ctl, &bitmap_list);
  1236. spin_unlock(&ctl->tree_lock);
  1237. mutex_unlock(&ctl->cache_writeout_mutex);
  1238. if (ret)
  1239. goto out_nospc;
  1240. /* Zero out the rest of the pages just to make sure */
  1241. io_ctl_zero_remaining_pages(io_ctl);
  1242. /* Everything is written out, now we dirty the pages in the file. */
  1243. i_size = i_size_read(inode);
  1244. for (int i = 0; i < round_up(i_size, PAGE_SIZE) / PAGE_SIZE; i++) {
  1245. u64 dirty_start = i * PAGE_SIZE;
  1246. u64 dirty_len = min_t(u64, dirty_start + PAGE_SIZE, i_size) - dirty_start;
  1247. ret = btrfs_dirty_folio(BTRFS_I(inode), page_folio(io_ctl->pages[i]),
  1248. dirty_start, dirty_len, &cached_state, false);
  1249. if (ret < 0)
  1250. goto out_nospc;
  1251. }
  1252. if (block_group->flags & BTRFS_BLOCK_GROUP_DATA)
  1253. up_write(&block_group->data_rwsem);
  1254. /*
  1255. * Release the pages and unlock the extent, we will flush
  1256. * them out later
  1257. */
  1258. io_ctl_drop_pages(io_ctl);
  1259. io_ctl_free(io_ctl);
  1260. btrfs_unlock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
  1261. &cached_state);
  1262. /*
  1263. * at this point the pages are under IO and we're happy,
  1264. * The caller is responsible for waiting on them and updating
  1265. * the cache and the inode
  1266. */
  1267. io_ctl->entries = entries;
  1268. io_ctl->bitmaps = bitmaps;
  1269. ret = btrfs_fdatawrite_range(BTRFS_I(inode), 0, (u64)-1);
  1270. if (ret)
  1271. goto out;
  1272. return 0;
  1273. out_nospc_locked:
  1274. cleanup_bitmap_list(&bitmap_list);
  1275. spin_unlock(&ctl->tree_lock);
  1276. mutex_unlock(&ctl->cache_writeout_mutex);
  1277. out_nospc:
  1278. cleanup_write_cache_enospc(inode, io_ctl, &cached_state);
  1279. out_unlock:
  1280. if (block_group->flags & BTRFS_BLOCK_GROUP_DATA)
  1281. up_write(&block_group->data_rwsem);
  1282. out:
  1283. io_ctl->inode = NULL;
  1284. io_ctl_free(io_ctl);
  1285. if (ret) {
  1286. invalidate_inode_pages2(inode->i_mapping);
  1287. BTRFS_I(inode)->generation = 0;
  1288. }
  1289. btrfs_update_inode(trans, BTRFS_I(inode));
  1290. if (must_iput)
  1291. iput(inode);
  1292. return ret;
  1293. }
  1294. int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
  1295. struct btrfs_block_group *block_group,
  1296. struct btrfs_path *path)
  1297. {
  1298. struct btrfs_fs_info *fs_info = trans->fs_info;
  1299. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  1300. struct inode *inode;
  1301. int ret = 0;
  1302. spin_lock(&block_group->lock);
  1303. if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
  1304. spin_unlock(&block_group->lock);
  1305. return 0;
  1306. }
  1307. spin_unlock(&block_group->lock);
  1308. inode = lookup_free_space_inode(block_group, path);
  1309. if (IS_ERR(inode))
  1310. return 0;
  1311. ret = __btrfs_write_out_cache(inode, ctl, block_group, trans);
  1312. if (ret) {
  1313. btrfs_debug(fs_info,
  1314. "failed to write free space cache for block group %llu error %d",
  1315. block_group->start, ret);
  1316. spin_lock(&block_group->lock);
  1317. block_group->disk_cache_state = BTRFS_DC_ERROR;
  1318. spin_unlock(&block_group->lock);
  1319. block_group->io_ctl.inode = NULL;
  1320. iput(inode);
  1321. }
  1322. /*
  1323. * if ret == 0 the caller is expected to call btrfs_wait_cache_io
  1324. * to wait for IO and put the inode
  1325. */
  1326. return ret;
  1327. }
  1328. static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
  1329. u64 offset)
  1330. {
  1331. ASSERT(offset >= bitmap_start);
  1332. offset -= bitmap_start;
  1333. return (unsigned long)(div_u64(offset, unit));
  1334. }
  1335. static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
  1336. {
  1337. return (unsigned long)(div_u64(bytes, unit));
  1338. }
  1339. static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
  1340. u64 offset)
  1341. {
  1342. u64 bitmap_start;
  1343. u64 bytes_per_bitmap;
  1344. bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
  1345. bitmap_start = offset - ctl->start;
  1346. bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
  1347. bitmap_start *= bytes_per_bitmap;
  1348. bitmap_start += ctl->start;
  1349. return bitmap_start;
  1350. }
  1351. static int tree_insert_offset(struct btrfs_free_space_ctl *ctl,
  1352. struct btrfs_free_cluster *cluster,
  1353. struct btrfs_free_space *new_entry)
  1354. {
  1355. struct rb_root *root;
  1356. struct rb_node **p;
  1357. struct rb_node *parent = NULL;
  1358. lockdep_assert_held(&ctl->tree_lock);
  1359. if (cluster) {
  1360. lockdep_assert_held(&cluster->lock);
  1361. root = &cluster->root;
  1362. } else {
  1363. root = &ctl->free_space_offset;
  1364. }
  1365. p = &root->rb_node;
  1366. while (*p) {
  1367. struct btrfs_free_space *info;
  1368. parent = *p;
  1369. info = rb_entry(parent, struct btrfs_free_space, offset_index);
  1370. if (new_entry->offset < info->offset) {
  1371. p = &(*p)->rb_left;
  1372. } else if (new_entry->offset > info->offset) {
  1373. p = &(*p)->rb_right;
  1374. } else {
  1375. /*
  1376. * we could have a bitmap entry and an extent entry
  1377. * share the same offset. If this is the case, we want
  1378. * the extent entry to always be found first if we do a
  1379. * linear search through the tree, since we want to have
  1380. * the quickest allocation time, and allocating from an
  1381. * extent is faster than allocating from a bitmap. So
  1382. * if we're inserting a bitmap and we find an entry at
  1383. * this offset, we want to go right, or after this entry
  1384. * logically. If we are inserting an extent and we've
  1385. * found a bitmap, we want to go left, or before
  1386. * logically.
  1387. */
  1388. if (new_entry->bitmap) {
  1389. if (info->bitmap) {
  1390. WARN_ON_ONCE(1);
  1391. return -EEXIST;
  1392. }
  1393. p = &(*p)->rb_right;
  1394. } else {
  1395. if (!info->bitmap) {
  1396. WARN_ON_ONCE(1);
  1397. return -EEXIST;
  1398. }
  1399. p = &(*p)->rb_left;
  1400. }
  1401. }
  1402. }
  1403. rb_link_node(&new_entry->offset_index, parent, p);
  1404. rb_insert_color(&new_entry->offset_index, root);
  1405. return 0;
  1406. }
  1407. /*
  1408. * This is a little subtle. We *only* have ->max_extent_size set if we actually
  1409. * searched through the bitmap and figured out the largest ->max_extent_size,
  1410. * otherwise it's 0. In the case that it's 0 we don't want to tell the
  1411. * allocator the wrong thing, we want to use the actual real max_extent_size
  1412. * we've found already if it's larger, or we want to use ->bytes.
  1413. *
  1414. * This matters because find_free_space() will skip entries who's ->bytes is
  1415. * less than the required bytes. So if we didn't search down this bitmap, we
  1416. * may pick some previous entry that has a smaller ->max_extent_size than we
  1417. * have. For example, assume we have two entries, one that has
  1418. * ->max_extent_size set to 4K and ->bytes set to 1M. A second entry hasn't set
  1419. * ->max_extent_size yet, has ->bytes set to 8K and it's contiguous. We will
  1420. * call into find_free_space(), and return with max_extent_size == 4K, because
  1421. * that first bitmap entry had ->max_extent_size set, but the second one did
  1422. * not. If instead we returned 8K we'd come in searching for 8K, and find the
  1423. * 8K contiguous range.
  1424. *
  1425. * Consider the other case, we have 2 8K chunks in that second entry and still
  1426. * don't have ->max_extent_size set. We'll return 16K, and the next time the
  1427. * allocator comes in it'll fully search our second bitmap, and this time it'll
  1428. * get an uptodate value of 8K as the maximum chunk size. Then we'll get the
  1429. * right allocation the next loop through.
  1430. */
  1431. static inline u64 get_max_extent_size(const struct btrfs_free_space *entry)
  1432. {
  1433. if (entry->bitmap && entry->max_extent_size)
  1434. return entry->max_extent_size;
  1435. return entry->bytes;
  1436. }
  1437. /*
  1438. * We want the largest entry to be leftmost, so this is inverted from what you'd
  1439. * normally expect.
  1440. */
  1441. static bool entry_less(struct rb_node *node, const struct rb_node *parent)
  1442. {
  1443. const struct btrfs_free_space *entry, *exist;
  1444. entry = rb_entry(node, struct btrfs_free_space, bytes_index);
  1445. exist = rb_entry(parent, struct btrfs_free_space, bytes_index);
  1446. return get_max_extent_size(exist) < get_max_extent_size(entry);
  1447. }
  1448. /*
  1449. * searches the tree for the given offset.
  1450. *
  1451. * fuzzy - If this is set, then we are trying to make an allocation, and we just
  1452. * want a section that has at least bytes size and comes at or after the given
  1453. * offset.
  1454. */
  1455. static struct btrfs_free_space *
  1456. tree_search_offset(struct btrfs_free_space_ctl *ctl,
  1457. u64 offset, int bitmap_only, int fuzzy)
  1458. {
  1459. struct rb_node *n = ctl->free_space_offset.rb_node;
  1460. struct btrfs_free_space *entry = NULL, *prev = NULL;
  1461. lockdep_assert_held(&ctl->tree_lock);
  1462. /* find entry that is closest to the 'offset' */
  1463. while (n) {
  1464. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  1465. prev = entry;
  1466. if (offset < entry->offset)
  1467. n = n->rb_left;
  1468. else if (offset > entry->offset)
  1469. n = n->rb_right;
  1470. else
  1471. break;
  1472. entry = NULL;
  1473. }
  1474. if (bitmap_only) {
  1475. if (!entry)
  1476. return NULL;
  1477. if (entry->bitmap)
  1478. return entry;
  1479. /*
  1480. * bitmap entry and extent entry may share same offset,
  1481. * in that case, bitmap entry comes after extent entry.
  1482. */
  1483. n = rb_next(n);
  1484. if (!n)
  1485. return NULL;
  1486. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  1487. if (entry->offset != offset)
  1488. return NULL;
  1489. WARN_ON(!entry->bitmap);
  1490. return entry;
  1491. } else if (entry) {
  1492. if (entry->bitmap) {
  1493. /*
  1494. * if previous extent entry covers the offset,
  1495. * we should return it instead of the bitmap entry
  1496. */
  1497. n = rb_prev(&entry->offset_index);
  1498. if (n) {
  1499. prev = rb_entry(n, struct btrfs_free_space,
  1500. offset_index);
  1501. if (!prev->bitmap &&
  1502. prev->offset + prev->bytes > offset)
  1503. entry = prev;
  1504. }
  1505. }
  1506. return entry;
  1507. }
  1508. if (!prev)
  1509. return NULL;
  1510. /* find last entry before the 'offset' */
  1511. entry = prev;
  1512. if (entry->offset > offset) {
  1513. n = rb_prev(&entry->offset_index);
  1514. if (n) {
  1515. entry = rb_entry(n, struct btrfs_free_space,
  1516. offset_index);
  1517. ASSERT(entry->offset <= offset);
  1518. } else {
  1519. if (fuzzy)
  1520. return entry;
  1521. else
  1522. return NULL;
  1523. }
  1524. }
  1525. if (entry->bitmap) {
  1526. n = rb_prev(&entry->offset_index);
  1527. if (n) {
  1528. prev = rb_entry(n, struct btrfs_free_space,
  1529. offset_index);
  1530. if (!prev->bitmap &&
  1531. prev->offset + prev->bytes > offset)
  1532. return prev;
  1533. }
  1534. if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
  1535. return entry;
  1536. } else if (entry->offset + entry->bytes > offset)
  1537. return entry;
  1538. if (!fuzzy)
  1539. return NULL;
  1540. while (1) {
  1541. n = rb_next(&entry->offset_index);
  1542. if (!n)
  1543. return NULL;
  1544. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  1545. if (entry->bitmap) {
  1546. if (entry->offset + BITS_PER_BITMAP *
  1547. ctl->unit > offset)
  1548. break;
  1549. } else {
  1550. if (entry->offset + entry->bytes > offset)
  1551. break;
  1552. }
  1553. }
  1554. return entry;
  1555. }
  1556. static inline void unlink_free_space(struct btrfs_free_space_ctl *ctl,
  1557. struct btrfs_free_space *info,
  1558. bool update_stat)
  1559. {
  1560. lockdep_assert_held(&ctl->tree_lock);
  1561. rb_erase(&info->offset_index, &ctl->free_space_offset);
  1562. rb_erase_cached(&info->bytes_index, &ctl->free_space_bytes);
  1563. ctl->free_extents--;
  1564. if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
  1565. ctl->discardable_extents[BTRFS_STAT_CURR]--;
  1566. ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes;
  1567. }
  1568. if (update_stat)
  1569. ctl->free_space -= info->bytes;
  1570. }
  1571. static int link_free_space(struct btrfs_free_space_ctl *ctl,
  1572. struct btrfs_free_space *info)
  1573. {
  1574. int ret = 0;
  1575. lockdep_assert_held(&ctl->tree_lock);
  1576. ASSERT(info->bytes || info->bitmap);
  1577. ret = tree_insert_offset(ctl, NULL, info);
  1578. if (ret)
  1579. return ret;
  1580. rb_add_cached(&info->bytes_index, &ctl->free_space_bytes, entry_less);
  1581. if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
  1582. ctl->discardable_extents[BTRFS_STAT_CURR]++;
  1583. ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
  1584. }
  1585. ctl->free_space += info->bytes;
  1586. ctl->free_extents++;
  1587. return ret;
  1588. }
  1589. static void relink_bitmap_entry(struct btrfs_free_space_ctl *ctl,
  1590. struct btrfs_free_space *info)
  1591. {
  1592. ASSERT(info->bitmap);
  1593. /*
  1594. * If our entry is empty it's because we're on a cluster and we don't
  1595. * want to re-link it into our ctl bytes index.
  1596. */
  1597. if (RB_EMPTY_NODE(&info->bytes_index))
  1598. return;
  1599. lockdep_assert_held(&ctl->tree_lock);
  1600. rb_erase_cached(&info->bytes_index, &ctl->free_space_bytes);
  1601. rb_add_cached(&info->bytes_index, &ctl->free_space_bytes, entry_less);
  1602. }
  1603. static inline void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
  1604. struct btrfs_free_space *info,
  1605. u64 offset, u64 bytes, bool update_stat)
  1606. {
  1607. unsigned long start, count, end;
  1608. int extent_delta = -1;
  1609. start = offset_to_bit(info->offset, ctl->unit, offset);
  1610. count = bytes_to_bits(bytes, ctl->unit);
  1611. end = start + count;
  1612. ASSERT(end <= BITS_PER_BITMAP);
  1613. bitmap_clear(info->bitmap, start, count);
  1614. info->bytes -= bytes;
  1615. if (info->max_extent_size > ctl->unit)
  1616. info->max_extent_size = 0;
  1617. relink_bitmap_entry(ctl, info);
  1618. if (start && test_bit(start - 1, info->bitmap))
  1619. extent_delta++;
  1620. if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
  1621. extent_delta++;
  1622. info->bitmap_extents += extent_delta;
  1623. if (!btrfs_free_space_trimmed(info)) {
  1624. ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
  1625. ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
  1626. }
  1627. if (update_stat)
  1628. ctl->free_space -= bytes;
  1629. }
  1630. static void btrfs_bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
  1631. struct btrfs_free_space *info, u64 offset,
  1632. u64 bytes)
  1633. {
  1634. unsigned long start, count, end;
  1635. int extent_delta = 1;
  1636. start = offset_to_bit(info->offset, ctl->unit, offset);
  1637. count = bytes_to_bits(bytes, ctl->unit);
  1638. end = start + count;
  1639. ASSERT(end <= BITS_PER_BITMAP);
  1640. bitmap_set(info->bitmap, start, count);
  1641. /*
  1642. * We set some bytes, we have no idea what the max extent size is
  1643. * anymore.
  1644. */
  1645. info->max_extent_size = 0;
  1646. info->bytes += bytes;
  1647. ctl->free_space += bytes;
  1648. relink_bitmap_entry(ctl, info);
  1649. if (start && test_bit(start - 1, info->bitmap))
  1650. extent_delta--;
  1651. if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
  1652. extent_delta--;
  1653. info->bitmap_extents += extent_delta;
  1654. if (!btrfs_free_space_trimmed(info)) {
  1655. ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
  1656. ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes;
  1657. }
  1658. }
  1659. /*
  1660. * If we can not find suitable extent, we will use bytes to record
  1661. * the size of the max extent.
  1662. */
  1663. static int search_bitmap(struct btrfs_free_space_ctl *ctl,
  1664. struct btrfs_free_space *bitmap_info, u64 *offset,
  1665. u64 *bytes, bool for_alloc)
  1666. {
  1667. unsigned long found_bits = 0;
  1668. unsigned long max_bits = 0;
  1669. unsigned long bits, i;
  1670. unsigned long next_zero;
  1671. unsigned long extent_bits;
  1672. /*
  1673. * Skip searching the bitmap if we don't have a contiguous section that
  1674. * is large enough for this allocation.
  1675. */
  1676. if (for_alloc &&
  1677. bitmap_info->max_extent_size &&
  1678. bitmap_info->max_extent_size < *bytes) {
  1679. *bytes = bitmap_info->max_extent_size;
  1680. return -1;
  1681. }
  1682. i = offset_to_bit(bitmap_info->offset, ctl->unit,
  1683. max_t(u64, *offset, bitmap_info->offset));
  1684. bits = bytes_to_bits(*bytes, ctl->unit);
  1685. for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
  1686. if (for_alloc && bits == 1) {
  1687. found_bits = 1;
  1688. break;
  1689. }
  1690. next_zero = find_next_zero_bit(bitmap_info->bitmap,
  1691. BITS_PER_BITMAP, i);
  1692. extent_bits = next_zero - i;
  1693. if (extent_bits >= bits) {
  1694. found_bits = extent_bits;
  1695. break;
  1696. } else if (extent_bits > max_bits) {
  1697. max_bits = extent_bits;
  1698. }
  1699. i = next_zero;
  1700. }
  1701. if (found_bits) {
  1702. *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
  1703. *bytes = (u64)(found_bits) * ctl->unit;
  1704. return 0;
  1705. }
  1706. *bytes = (u64)(max_bits) * ctl->unit;
  1707. bitmap_info->max_extent_size = *bytes;
  1708. relink_bitmap_entry(ctl, bitmap_info);
  1709. return -1;
  1710. }
  1711. /* Cache the size of the max extent in bytes */
  1712. static struct btrfs_free_space *
  1713. find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
  1714. unsigned long align, u64 *max_extent_size, bool use_bytes_index)
  1715. {
  1716. struct btrfs_free_space *entry;
  1717. struct rb_node *node;
  1718. u64 tmp;
  1719. u64 align_off;
  1720. int ret;
  1721. if (!ctl->free_space_offset.rb_node)
  1722. return NULL;
  1723. again:
  1724. if (use_bytes_index) {
  1725. node = rb_first_cached(&ctl->free_space_bytes);
  1726. } else {
  1727. entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset),
  1728. 0, 1);
  1729. if (!entry)
  1730. return NULL;
  1731. node = &entry->offset_index;
  1732. }
  1733. for (; node; node = rb_next(node)) {
  1734. if (use_bytes_index)
  1735. entry = rb_entry(node, struct btrfs_free_space,
  1736. bytes_index);
  1737. else
  1738. entry = rb_entry(node, struct btrfs_free_space,
  1739. offset_index);
  1740. /*
  1741. * If we are using the bytes index then all subsequent entries
  1742. * in this tree are going to be < bytes, so simply set the max
  1743. * extent size and exit the loop.
  1744. *
  1745. * If we're using the offset index then we need to keep going
  1746. * through the rest of the tree.
  1747. */
  1748. if (entry->bytes < *bytes) {
  1749. *max_extent_size = max(get_max_extent_size(entry),
  1750. *max_extent_size);
  1751. if (use_bytes_index)
  1752. break;
  1753. continue;
  1754. }
  1755. /* make sure the space returned is big enough
  1756. * to match our requested alignment
  1757. */
  1758. if (*bytes >= align) {
  1759. tmp = entry->offset - ctl->start + align - 1;
  1760. tmp = div64_u64(tmp, align);
  1761. tmp = tmp * align + ctl->start;
  1762. align_off = tmp - entry->offset;
  1763. } else {
  1764. align_off = 0;
  1765. tmp = entry->offset;
  1766. }
  1767. /*
  1768. * We don't break here if we're using the bytes index because we
  1769. * may have another entry that has the correct alignment that is
  1770. * the right size, so we don't want to miss that possibility.
  1771. * At worst this adds another loop through the logic, but if we
  1772. * broke here we could prematurely ENOSPC.
  1773. */
  1774. if (entry->bytes < *bytes + align_off) {
  1775. *max_extent_size = max(get_max_extent_size(entry),
  1776. *max_extent_size);
  1777. continue;
  1778. }
  1779. if (entry->bitmap) {
  1780. struct rb_node *old_next = rb_next(node);
  1781. u64 size = *bytes;
  1782. ret = search_bitmap(ctl, entry, &tmp, &size, true);
  1783. if (!ret) {
  1784. *offset = tmp;
  1785. *bytes = size;
  1786. return entry;
  1787. } else {
  1788. *max_extent_size =
  1789. max(get_max_extent_size(entry),
  1790. *max_extent_size);
  1791. }
  1792. /*
  1793. * The bitmap may have gotten re-arranged in the space
  1794. * index here because the max_extent_size may have been
  1795. * updated. Start from the beginning again if this
  1796. * happened.
  1797. */
  1798. if (use_bytes_index && old_next != rb_next(node))
  1799. goto again;
  1800. continue;
  1801. }
  1802. *offset = tmp;
  1803. *bytes = entry->bytes - align_off;
  1804. return entry;
  1805. }
  1806. return NULL;
  1807. }
  1808. static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
  1809. struct btrfs_free_space *info, u64 offset)
  1810. {
  1811. info->offset = offset_to_bitmap(ctl, offset);
  1812. info->bytes = 0;
  1813. info->bitmap_extents = 0;
  1814. INIT_LIST_HEAD(&info->list);
  1815. link_free_space(ctl, info);
  1816. ctl->total_bitmaps++;
  1817. recalculate_thresholds(ctl);
  1818. }
  1819. static void free_bitmap(struct btrfs_free_space_ctl *ctl,
  1820. struct btrfs_free_space *bitmap_info)
  1821. {
  1822. /*
  1823. * Normally when this is called, the bitmap is completely empty. However,
  1824. * if we are blowing up the free space cache for one reason or another
  1825. * via __btrfs_remove_free_space_cache(), then it may not be freed and
  1826. * we may leave stats on the table.
  1827. */
  1828. if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) {
  1829. ctl->discardable_extents[BTRFS_STAT_CURR] -=
  1830. bitmap_info->bitmap_extents;
  1831. ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes;
  1832. }
  1833. unlink_free_space(ctl, bitmap_info, true);
  1834. kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
  1835. kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
  1836. ctl->total_bitmaps--;
  1837. recalculate_thresholds(ctl);
  1838. }
  1839. static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
  1840. struct btrfs_free_space *bitmap_info,
  1841. u64 *offset, u64 *bytes)
  1842. {
  1843. u64 end;
  1844. u64 search_start, search_bytes;
  1845. int ret;
  1846. again:
  1847. end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
  1848. /*
  1849. * We need to search for bits in this bitmap. We could only cover some
  1850. * of the extent in this bitmap thanks to how we add space, so we need
  1851. * to search for as much as it as we can and clear that amount, and then
  1852. * go searching for the next bit.
  1853. */
  1854. search_start = *offset;
  1855. search_bytes = ctl->unit;
  1856. search_bytes = min(search_bytes, end - search_start + 1);
  1857. ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes,
  1858. false);
  1859. if (ret < 0 || search_start != *offset)
  1860. return -EINVAL;
  1861. /* We may have found more bits than what we need */
  1862. search_bytes = min(search_bytes, *bytes);
  1863. /* Cannot clear past the end of the bitmap */
  1864. search_bytes = min(search_bytes, end - search_start + 1);
  1865. bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes, true);
  1866. *offset += search_bytes;
  1867. *bytes -= search_bytes;
  1868. if (*bytes) {
  1869. struct rb_node *next = rb_next(&bitmap_info->offset_index);
  1870. if (!bitmap_info->bytes)
  1871. free_bitmap(ctl, bitmap_info);
  1872. /*
  1873. * no entry after this bitmap, but we still have bytes to
  1874. * remove, so something has gone wrong.
  1875. */
  1876. if (!next)
  1877. return -EINVAL;
  1878. bitmap_info = rb_entry(next, struct btrfs_free_space,
  1879. offset_index);
  1880. /*
  1881. * if the next entry isn't a bitmap we need to return to let the
  1882. * extent stuff do its work.
  1883. */
  1884. if (!bitmap_info->bitmap)
  1885. return -EAGAIN;
  1886. /*
  1887. * Ok the next item is a bitmap, but it may not actually hold
  1888. * the information for the rest of this free space stuff, so
  1889. * look for it, and if we don't find it return so we can try
  1890. * everything over again.
  1891. */
  1892. search_start = *offset;
  1893. search_bytes = ctl->unit;
  1894. ret = search_bitmap(ctl, bitmap_info, &search_start,
  1895. &search_bytes, false);
  1896. if (ret < 0 || search_start != *offset)
  1897. return -EAGAIN;
  1898. goto again;
  1899. } else if (!bitmap_info->bytes)
  1900. free_bitmap(ctl, bitmap_info);
  1901. return 0;
  1902. }
  1903. static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
  1904. struct btrfs_free_space *info, u64 offset,
  1905. u64 bytes, enum btrfs_trim_state trim_state)
  1906. {
  1907. u64 bytes_to_set = 0;
  1908. u64 end;
  1909. /*
  1910. * This is a tradeoff to make bitmap trim state minimal. We mark the
  1911. * whole bitmap untrimmed if at any point we add untrimmed regions.
  1912. */
  1913. if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) {
  1914. if (btrfs_free_space_trimmed(info)) {
  1915. ctl->discardable_extents[BTRFS_STAT_CURR] +=
  1916. info->bitmap_extents;
  1917. ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
  1918. }
  1919. info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
  1920. }
  1921. end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
  1922. bytes_to_set = min(end - offset, bytes);
  1923. btrfs_bitmap_set_bits(ctl, info, offset, bytes_to_set);
  1924. return bytes_to_set;
  1925. }
  1926. static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
  1927. struct btrfs_free_space *info)
  1928. {
  1929. struct btrfs_block_group *block_group = ctl->block_group;
  1930. struct btrfs_fs_info *fs_info = block_group->fs_info;
  1931. bool forced = false;
  1932. #ifdef CONFIG_BTRFS_DEBUG
  1933. if (btrfs_should_fragment_free_space(block_group))
  1934. forced = true;
  1935. #endif
  1936. /* This is a way to reclaim large regions from the bitmaps. */
  1937. if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD)
  1938. return false;
  1939. /*
  1940. * If we are below the extents threshold then we can add this as an
  1941. * extent, and don't have to deal with the bitmap
  1942. */
  1943. if (!forced && ctl->free_extents < ctl->extents_thresh) {
  1944. /*
  1945. * If this block group has some small extents we don't want to
  1946. * use up all of our free slots in the cache with them, we want
  1947. * to reserve them to larger extents, however if we have plenty
  1948. * of cache left then go ahead and add them, no sense in adding
  1949. * the overhead of a bitmap if we don't have to.
  1950. */
  1951. if (info->bytes <= fs_info->sectorsize * 8) {
  1952. if (ctl->free_extents * 3 <= ctl->extents_thresh)
  1953. return false;
  1954. } else {
  1955. return false;
  1956. }
  1957. }
  1958. /*
  1959. * The original block groups from mkfs can be really small, like 8
  1960. * megabytes, so don't bother with a bitmap for those entries. However
  1961. * some block groups can be smaller than what a bitmap would cover but
  1962. * are still large enough that they could overflow the 32k memory limit,
  1963. * so allow those block groups to still be allowed to have a bitmap
  1964. * entry.
  1965. */
  1966. if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length)
  1967. return false;
  1968. return true;
  1969. }
  1970. static const struct btrfs_free_space_op free_space_op = {
  1971. .use_bitmap = use_bitmap,
  1972. };
  1973. static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
  1974. struct btrfs_free_space *info)
  1975. {
  1976. struct btrfs_free_space *bitmap_info;
  1977. struct btrfs_block_group *block_group = NULL;
  1978. int added = 0;
  1979. u64 bytes, offset, bytes_added;
  1980. enum btrfs_trim_state trim_state;
  1981. int ret;
  1982. bytes = info->bytes;
  1983. offset = info->offset;
  1984. trim_state = info->trim_state;
  1985. if (!ctl->op->use_bitmap(ctl, info))
  1986. return 0;
  1987. if (ctl->op == &free_space_op)
  1988. block_group = ctl->block_group;
  1989. again:
  1990. /*
  1991. * Since we link bitmaps right into the cluster we need to see if we
  1992. * have a cluster here, and if so and it has our bitmap we need to add
  1993. * the free space to that bitmap.
  1994. */
  1995. if (block_group && !list_empty(&block_group->cluster_list)) {
  1996. struct btrfs_free_cluster *cluster;
  1997. struct rb_node *node;
  1998. struct btrfs_free_space *entry;
  1999. cluster = list_first_entry(&block_group->cluster_list,
  2000. struct btrfs_free_cluster, block_group_list);
  2001. spin_lock(&cluster->lock);
  2002. node = rb_first(&cluster->root);
  2003. if (!node) {
  2004. spin_unlock(&cluster->lock);
  2005. goto no_cluster_bitmap;
  2006. }
  2007. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  2008. if (!entry->bitmap) {
  2009. spin_unlock(&cluster->lock);
  2010. goto no_cluster_bitmap;
  2011. }
  2012. if (entry->offset == offset_to_bitmap(ctl, offset)) {
  2013. bytes_added = add_bytes_to_bitmap(ctl, entry, offset,
  2014. bytes, trim_state);
  2015. bytes -= bytes_added;
  2016. offset += bytes_added;
  2017. }
  2018. spin_unlock(&cluster->lock);
  2019. if (!bytes) {
  2020. ret = 1;
  2021. goto out;
  2022. }
  2023. }
  2024. no_cluster_bitmap:
  2025. bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
  2026. 1, 0);
  2027. if (!bitmap_info) {
  2028. ASSERT(added == 0);
  2029. goto new_bitmap;
  2030. }
  2031. bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
  2032. trim_state);
  2033. bytes -= bytes_added;
  2034. offset += bytes_added;
  2035. added = 0;
  2036. if (!bytes) {
  2037. ret = 1;
  2038. goto out;
  2039. } else
  2040. goto again;
  2041. new_bitmap:
  2042. if (info && info->bitmap) {
  2043. add_new_bitmap(ctl, info, offset);
  2044. added = 1;
  2045. info = NULL;
  2046. goto again;
  2047. } else {
  2048. spin_unlock(&ctl->tree_lock);
  2049. /* no pre-allocated info, allocate a new one */
  2050. if (!info) {
  2051. info = kmem_cache_zalloc(btrfs_free_space_cachep,
  2052. GFP_NOFS);
  2053. if (!info) {
  2054. spin_lock(&ctl->tree_lock);
  2055. ret = -ENOMEM;
  2056. goto out;
  2057. }
  2058. }
  2059. /* allocate the bitmap */
  2060. info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep,
  2061. GFP_NOFS);
  2062. info->trim_state = BTRFS_TRIM_STATE_TRIMMED;
  2063. spin_lock(&ctl->tree_lock);
  2064. if (!info->bitmap) {
  2065. ret = -ENOMEM;
  2066. goto out;
  2067. }
  2068. goto again;
  2069. }
  2070. out:
  2071. if (info) {
  2072. if (info->bitmap)
  2073. kmem_cache_free(btrfs_free_space_bitmap_cachep,
  2074. info->bitmap);
  2075. kmem_cache_free(btrfs_free_space_cachep, info);
  2076. }
  2077. return ret;
  2078. }
  2079. /*
  2080. * Free space merging rules:
  2081. * 1) Merge trimmed areas together
  2082. * 2) Let untrimmed areas coalesce with trimmed areas
  2083. * 3) Always pull neighboring regions from bitmaps
  2084. *
  2085. * The above rules are for when we merge free space based on btrfs_trim_state.
  2086. * Rules 2 and 3 are subtle because they are suboptimal, but are done for the
  2087. * same reason: to promote larger extent regions which makes life easier for
  2088. * find_free_extent(). Rule 2 enables coalescing based on the common path
  2089. * being returning free space from btrfs_finish_extent_commit(). So when free
  2090. * space is trimmed, it will prevent aggregating trimmed new region and
  2091. * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents
  2092. * and provide find_free_extent() with the largest extents possible hoping for
  2093. * the reuse path.
  2094. */
  2095. static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
  2096. struct btrfs_free_space *info, bool update_stat)
  2097. {
  2098. struct btrfs_free_space *left_info = NULL;
  2099. struct btrfs_free_space *right_info;
  2100. bool merged = false;
  2101. u64 offset = info->offset;
  2102. u64 bytes = info->bytes;
  2103. const bool is_trimmed = btrfs_free_space_trimmed(info);
  2104. struct rb_node *right_prev = NULL;
  2105. /*
  2106. * first we want to see if there is free space adjacent to the range we
  2107. * are adding, if there is remove that struct and add a new one to
  2108. * cover the entire range
  2109. */
  2110. right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
  2111. if (right_info)
  2112. right_prev = rb_prev(&right_info->offset_index);
  2113. if (right_prev)
  2114. left_info = rb_entry(right_prev, struct btrfs_free_space, offset_index);
  2115. else if (!right_info)
  2116. left_info = tree_search_offset(ctl, offset - 1, 0, 0);
  2117. /* See try_merge_free_space() comment. */
  2118. if (right_info && !right_info->bitmap &&
  2119. (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
  2120. unlink_free_space(ctl, right_info, update_stat);
  2121. info->bytes += right_info->bytes;
  2122. kmem_cache_free(btrfs_free_space_cachep, right_info);
  2123. merged = true;
  2124. }
  2125. /* See try_merge_free_space() comment. */
  2126. if (left_info && !left_info->bitmap &&
  2127. left_info->offset + left_info->bytes == offset &&
  2128. (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
  2129. unlink_free_space(ctl, left_info, update_stat);
  2130. info->offset = left_info->offset;
  2131. info->bytes += left_info->bytes;
  2132. kmem_cache_free(btrfs_free_space_cachep, left_info);
  2133. merged = true;
  2134. }
  2135. return merged;
  2136. }
  2137. static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl,
  2138. struct btrfs_free_space *info,
  2139. bool update_stat)
  2140. {
  2141. struct btrfs_free_space *bitmap;
  2142. unsigned long i;
  2143. unsigned long j;
  2144. const u64 end = info->offset + info->bytes;
  2145. const u64 bitmap_offset = offset_to_bitmap(ctl, end);
  2146. u64 bytes;
  2147. bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
  2148. if (!bitmap)
  2149. return false;
  2150. i = offset_to_bit(bitmap->offset, ctl->unit, end);
  2151. j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i);
  2152. if (j == i)
  2153. return false;
  2154. bytes = (j - i) * ctl->unit;
  2155. info->bytes += bytes;
  2156. /* See try_merge_free_space() comment. */
  2157. if (!btrfs_free_space_trimmed(bitmap))
  2158. info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
  2159. bitmap_clear_bits(ctl, bitmap, end, bytes, update_stat);
  2160. if (!bitmap->bytes)
  2161. free_bitmap(ctl, bitmap);
  2162. return true;
  2163. }
  2164. static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl,
  2165. struct btrfs_free_space *info,
  2166. bool update_stat)
  2167. {
  2168. struct btrfs_free_space *bitmap;
  2169. u64 bitmap_offset;
  2170. unsigned long i;
  2171. unsigned long j;
  2172. unsigned long prev_j;
  2173. u64 bytes;
  2174. bitmap_offset = offset_to_bitmap(ctl, info->offset);
  2175. /* If we're on a boundary, try the previous logical bitmap. */
  2176. if (bitmap_offset == info->offset) {
  2177. if (info->offset == 0)
  2178. return false;
  2179. bitmap_offset = offset_to_bitmap(ctl, info->offset - 1);
  2180. }
  2181. bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
  2182. if (!bitmap)
  2183. return false;
  2184. i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1;
  2185. j = 0;
  2186. prev_j = (unsigned long)-1;
  2187. for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) {
  2188. if (j > i)
  2189. break;
  2190. prev_j = j;
  2191. }
  2192. if (prev_j == i)
  2193. return false;
  2194. if (prev_j == (unsigned long)-1)
  2195. bytes = (i + 1) * ctl->unit;
  2196. else
  2197. bytes = (i - prev_j) * ctl->unit;
  2198. info->offset -= bytes;
  2199. info->bytes += bytes;
  2200. /* See try_merge_free_space() comment. */
  2201. if (!btrfs_free_space_trimmed(bitmap))
  2202. info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
  2203. bitmap_clear_bits(ctl, bitmap, info->offset, bytes, update_stat);
  2204. if (!bitmap->bytes)
  2205. free_bitmap(ctl, bitmap);
  2206. return true;
  2207. }
  2208. /*
  2209. * We prefer always to allocate from extent entries, both for clustered and
  2210. * non-clustered allocation requests. So when attempting to add a new extent
  2211. * entry, try to see if there's adjacent free space in bitmap entries, and if
  2212. * there is, migrate that space from the bitmaps to the extent.
  2213. * Like this we get better chances of satisfying space allocation requests
  2214. * because we attempt to satisfy them based on a single cache entry, and never
  2215. * on 2 or more entries - even if the entries represent a contiguous free space
  2216. * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry
  2217. * ends).
  2218. */
  2219. static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
  2220. struct btrfs_free_space *info,
  2221. bool update_stat)
  2222. {
  2223. /*
  2224. * Only work with disconnected entries, as we can change their offset,
  2225. * and must be extent entries.
  2226. */
  2227. ASSERT(!info->bitmap);
  2228. ASSERT(RB_EMPTY_NODE(&info->offset_index));
  2229. if (ctl->total_bitmaps > 0) {
  2230. bool stole_end;
  2231. bool stole_front = false;
  2232. stole_end = steal_from_bitmap_to_end(ctl, info, update_stat);
  2233. if (ctl->total_bitmaps > 0)
  2234. stole_front = steal_from_bitmap_to_front(ctl, info,
  2235. update_stat);
  2236. if (stole_end || stole_front)
  2237. try_merge_free_space(ctl, info, update_stat);
  2238. }
  2239. }
  2240. static int __btrfs_add_free_space(struct btrfs_block_group *block_group,
  2241. u64 offset, u64 bytes,
  2242. enum btrfs_trim_state trim_state)
  2243. {
  2244. struct btrfs_fs_info *fs_info = block_group->fs_info;
  2245. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  2246. struct btrfs_free_space *info;
  2247. int ret = 0;
  2248. u64 filter_bytes = bytes;
  2249. ASSERT(!btrfs_is_zoned(fs_info));
  2250. info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
  2251. if (!info)
  2252. return -ENOMEM;
  2253. info->offset = offset;
  2254. info->bytes = bytes;
  2255. info->trim_state = trim_state;
  2256. RB_CLEAR_NODE(&info->offset_index);
  2257. RB_CLEAR_NODE(&info->bytes_index);
  2258. spin_lock(&ctl->tree_lock);
  2259. if (try_merge_free_space(ctl, info, true))
  2260. goto link;
  2261. /*
  2262. * There was no extent directly to the left or right of this new
  2263. * extent then we know we're going to have to allocate a new extent, so
  2264. * before we do that see if we need to drop this into a bitmap
  2265. */
  2266. ret = insert_into_bitmap(ctl, info);
  2267. if (ret < 0) {
  2268. goto out;
  2269. } else if (ret) {
  2270. ret = 0;
  2271. goto out;
  2272. }
  2273. link:
  2274. /*
  2275. * Only steal free space from adjacent bitmaps if we're sure we're not
  2276. * going to add the new free space to existing bitmap entries - because
  2277. * that would mean unnecessary work that would be reverted. Therefore
  2278. * attempt to steal space from bitmaps if we're adding an extent entry.
  2279. */
  2280. steal_from_bitmap(ctl, info, true);
  2281. filter_bytes = max(filter_bytes, info->bytes);
  2282. ret = link_free_space(ctl, info);
  2283. if (ret)
  2284. kmem_cache_free(btrfs_free_space_cachep, info);
  2285. out:
  2286. btrfs_discard_update_discardable(block_group);
  2287. spin_unlock(&ctl->tree_lock);
  2288. if (ret) {
  2289. btrfs_crit(fs_info, "unable to add free space :%d", ret);
  2290. ASSERT(ret != -EEXIST);
  2291. }
  2292. if (trim_state != BTRFS_TRIM_STATE_TRIMMED) {
  2293. btrfs_discard_check_filter(block_group, filter_bytes);
  2294. btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
  2295. }
  2296. return ret;
  2297. }
  2298. static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
  2299. u64 bytenr, u64 size, bool used)
  2300. {
  2301. struct btrfs_space_info *sinfo = block_group->space_info;
  2302. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  2303. u64 offset = bytenr - block_group->start;
  2304. u64 to_free, to_unusable;
  2305. int bg_reclaim_threshold = 0;
  2306. bool initial;
  2307. u64 reclaimable_unusable;
  2308. spin_lock(&block_group->lock);
  2309. initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
  2310. WARN_ON(!initial && offset + size > block_group->zone_capacity);
  2311. if (!initial)
  2312. bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
  2313. if (!used)
  2314. to_free = size;
  2315. else if (initial)
  2316. to_free = block_group->zone_capacity;
  2317. else if (offset >= block_group->alloc_offset)
  2318. to_free = size;
  2319. else if (offset + size <= block_group->alloc_offset)
  2320. to_free = 0;
  2321. else
  2322. to_free = offset + size - block_group->alloc_offset;
  2323. to_unusable = size - to_free;
  2324. spin_lock(&ctl->tree_lock);
  2325. ctl->free_space += to_free;
  2326. spin_unlock(&ctl->tree_lock);
  2327. /*
  2328. * If the block group is read-only, we should account freed space into
  2329. * bytes_readonly.
  2330. */
  2331. if (!block_group->ro) {
  2332. block_group->zone_unusable += to_unusable;
  2333. WARN_ON(block_group->zone_unusable > block_group->length);
  2334. }
  2335. if (!used) {
  2336. block_group->alloc_offset -= size;
  2337. }
  2338. reclaimable_unusable = block_group->zone_unusable -
  2339. (block_group->length - block_group->zone_capacity);
  2340. /* All the region is now unusable. Mark it as unused and reclaim */
  2341. if (block_group->zone_unusable == block_group->length) {
  2342. btrfs_mark_bg_unused(block_group);
  2343. } else if (bg_reclaim_threshold &&
  2344. reclaimable_unusable >=
  2345. mult_perc(block_group->zone_capacity, bg_reclaim_threshold)) {
  2346. btrfs_mark_bg_to_reclaim(block_group);
  2347. }
  2348. spin_unlock(&block_group->lock);
  2349. return 0;
  2350. }
  2351. int btrfs_add_free_space(struct btrfs_block_group *block_group,
  2352. u64 bytenr, u64 size)
  2353. {
  2354. enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
  2355. if (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED)
  2356. return 0;
  2357. if (btrfs_is_zoned(block_group->fs_info))
  2358. return __btrfs_add_free_space_zoned(block_group, bytenr, size,
  2359. true);
  2360. if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
  2361. trim_state = BTRFS_TRIM_STATE_TRIMMED;
  2362. return __btrfs_add_free_space(block_group, bytenr, size, trim_state);
  2363. }
  2364. int btrfs_add_free_space_unused(struct btrfs_block_group *block_group,
  2365. u64 bytenr, u64 size)
  2366. {
  2367. if (btrfs_is_zoned(block_group->fs_info))
  2368. return __btrfs_add_free_space_zoned(block_group, bytenr, size,
  2369. false);
  2370. return btrfs_add_free_space(block_group, bytenr, size);
  2371. }
  2372. /*
  2373. * This is a subtle distinction because when adding free space back in general,
  2374. * we want it to be added as untrimmed for async. But in the case where we add
  2375. * it on loading of a block group, we want to consider it trimmed.
  2376. */
  2377. int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
  2378. u64 bytenr, u64 size)
  2379. {
  2380. enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
  2381. if (btrfs_is_zoned(block_group->fs_info))
  2382. return __btrfs_add_free_space_zoned(block_group, bytenr, size,
  2383. true);
  2384. if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
  2385. btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
  2386. trim_state = BTRFS_TRIM_STATE_TRIMMED;
  2387. return __btrfs_add_free_space(block_group, bytenr, size, trim_state);
  2388. }
  2389. int btrfs_remove_free_space(struct btrfs_block_group *block_group,
  2390. u64 offset, u64 bytes)
  2391. {
  2392. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  2393. struct btrfs_free_space *info;
  2394. int ret;
  2395. bool re_search = false;
  2396. if (btrfs_is_zoned(block_group->fs_info)) {
  2397. /*
  2398. * This can happen with conventional zones when replaying log.
  2399. * Since the allocation info of tree-log nodes are not recorded
  2400. * to the extent-tree, calculate_alloc_pointer() failed to
  2401. * advance the allocation pointer after last allocated tree log
  2402. * node blocks.
  2403. *
  2404. * This function is called from
  2405. * btrfs_pin_extent_for_log_replay() when replaying the log.
  2406. * Advance the pointer not to overwrite the tree-log nodes.
  2407. */
  2408. if (block_group->start + block_group->alloc_offset <
  2409. offset + bytes) {
  2410. block_group->alloc_offset =
  2411. offset + bytes - block_group->start;
  2412. }
  2413. return 0;
  2414. }
  2415. spin_lock(&ctl->tree_lock);
  2416. again:
  2417. ret = 0;
  2418. if (!bytes)
  2419. goto out_lock;
  2420. info = tree_search_offset(ctl, offset, 0, 0);
  2421. if (!info) {
  2422. /*
  2423. * oops didn't find an extent that matched the space we wanted
  2424. * to remove, look for a bitmap instead
  2425. */
  2426. info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
  2427. 1, 0);
  2428. if (!info) {
  2429. /*
  2430. * If we found a partial bit of our free space in a
  2431. * bitmap but then couldn't find the other part this may
  2432. * be a problem, so WARN about it.
  2433. */
  2434. WARN_ON(re_search);
  2435. goto out_lock;
  2436. }
  2437. }
  2438. re_search = false;
  2439. if (!info->bitmap) {
  2440. unlink_free_space(ctl, info, true);
  2441. if (offset == info->offset) {
  2442. u64 to_free = min(bytes, info->bytes);
  2443. info->bytes -= to_free;
  2444. info->offset += to_free;
  2445. if (info->bytes) {
  2446. ret = link_free_space(ctl, info);
  2447. WARN_ON(ret);
  2448. } else {
  2449. kmem_cache_free(btrfs_free_space_cachep, info);
  2450. }
  2451. offset += to_free;
  2452. bytes -= to_free;
  2453. goto again;
  2454. } else {
  2455. u64 old_end = info->bytes + info->offset;
  2456. info->bytes = offset - info->offset;
  2457. ret = link_free_space(ctl, info);
  2458. WARN_ON(ret);
  2459. if (ret)
  2460. goto out_lock;
  2461. /* Not enough bytes in this entry to satisfy us */
  2462. if (old_end < offset + bytes) {
  2463. bytes -= old_end - offset;
  2464. offset = old_end;
  2465. goto again;
  2466. } else if (old_end == offset + bytes) {
  2467. /* all done */
  2468. goto out_lock;
  2469. }
  2470. spin_unlock(&ctl->tree_lock);
  2471. ret = __btrfs_add_free_space(block_group,
  2472. offset + bytes,
  2473. old_end - (offset + bytes),
  2474. info->trim_state);
  2475. WARN_ON(ret);
  2476. return ret;
  2477. }
  2478. }
  2479. ret = remove_from_bitmap(ctl, info, &offset, &bytes);
  2480. if (ret == -EAGAIN) {
  2481. re_search = true;
  2482. goto again;
  2483. }
  2484. out_lock:
  2485. btrfs_discard_update_discardable(block_group);
  2486. spin_unlock(&ctl->tree_lock);
  2487. return ret;
  2488. }
  2489. void btrfs_dump_free_space(struct btrfs_block_group *block_group,
  2490. u64 bytes)
  2491. {
  2492. struct btrfs_fs_info *fs_info = block_group->fs_info;
  2493. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  2494. struct btrfs_free_space *info;
  2495. struct rb_node *n;
  2496. int count = 0;
  2497. /*
  2498. * Zoned btrfs does not use free space tree and cluster. Just print
  2499. * out the free space after the allocation offset.
  2500. */
  2501. if (btrfs_is_zoned(fs_info)) {
  2502. btrfs_info(fs_info, "free space %llu active %d",
  2503. block_group->zone_capacity - block_group->alloc_offset,
  2504. test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
  2505. &block_group->runtime_flags));
  2506. return;
  2507. }
  2508. spin_lock(&ctl->tree_lock);
  2509. for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
  2510. info = rb_entry(n, struct btrfs_free_space, offset_index);
  2511. if (info->bytes >= bytes && !block_group->ro)
  2512. count++;
  2513. btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s",
  2514. info->offset, info->bytes, str_yes_no(info->bitmap));
  2515. }
  2516. spin_unlock(&ctl->tree_lock);
  2517. btrfs_info(fs_info, "block group has cluster?: %s",
  2518. str_no_yes(list_empty(&block_group->cluster_list)));
  2519. btrfs_info(fs_info,
  2520. "%d free space entries at or bigger than %llu bytes",
  2521. count, bytes);
  2522. }
  2523. void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
  2524. struct btrfs_free_space_ctl *ctl)
  2525. {
  2526. struct btrfs_fs_info *fs_info = block_group->fs_info;
  2527. spin_lock_init(&ctl->tree_lock);
  2528. ctl->unit = fs_info->sectorsize;
  2529. ctl->start = block_group->start;
  2530. ctl->block_group = block_group;
  2531. ctl->op = &free_space_op;
  2532. ctl->free_space_bytes = RB_ROOT_CACHED;
  2533. INIT_LIST_HEAD(&ctl->trimming_ranges);
  2534. mutex_init(&ctl->cache_writeout_mutex);
  2535. /*
  2536. * we only want to have 32k of ram per block group for keeping
  2537. * track of free space, and if we pass 1/2 of that we want to
  2538. * start converting things over to using bitmaps
  2539. */
  2540. ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space);
  2541. }
  2542. /*
  2543. * for a given cluster, put all of its extents back into the free
  2544. * space cache. If the block group passed doesn't match the block group
  2545. * pointed to by the cluster, someone else raced in and freed the
  2546. * cluster already. In that case, we just return without changing anything
  2547. */
  2548. static void __btrfs_return_cluster_to_free_space(
  2549. struct btrfs_block_group *block_group,
  2550. struct btrfs_free_cluster *cluster)
  2551. {
  2552. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  2553. struct rb_node *node;
  2554. lockdep_assert_held(&ctl->tree_lock);
  2555. spin_lock(&cluster->lock);
  2556. if (cluster->block_group != block_group) {
  2557. spin_unlock(&cluster->lock);
  2558. return;
  2559. }
  2560. cluster->block_group = NULL;
  2561. cluster->window_start = 0;
  2562. list_del_init(&cluster->block_group_list);
  2563. node = rb_first(&cluster->root);
  2564. while (node) {
  2565. struct btrfs_free_space *entry;
  2566. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  2567. node = rb_next(&entry->offset_index);
  2568. rb_erase(&entry->offset_index, &cluster->root);
  2569. RB_CLEAR_NODE(&entry->offset_index);
  2570. if (!entry->bitmap) {
  2571. /* Merging treats extents as if they were new */
  2572. if (!btrfs_free_space_trimmed(entry)) {
  2573. ctl->discardable_extents[BTRFS_STAT_CURR]--;
  2574. ctl->discardable_bytes[BTRFS_STAT_CURR] -=
  2575. entry->bytes;
  2576. }
  2577. try_merge_free_space(ctl, entry, false);
  2578. steal_from_bitmap(ctl, entry, false);
  2579. /* As we insert directly, update these statistics */
  2580. if (!btrfs_free_space_trimmed(entry)) {
  2581. ctl->discardable_extents[BTRFS_STAT_CURR]++;
  2582. ctl->discardable_bytes[BTRFS_STAT_CURR] +=
  2583. entry->bytes;
  2584. }
  2585. }
  2586. tree_insert_offset(ctl, NULL, entry);
  2587. rb_add_cached(&entry->bytes_index, &ctl->free_space_bytes,
  2588. entry_less);
  2589. }
  2590. cluster->root = RB_ROOT;
  2591. spin_unlock(&cluster->lock);
  2592. btrfs_put_block_group(block_group);
  2593. }
  2594. void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
  2595. {
  2596. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  2597. struct btrfs_free_cluster *cluster;
  2598. struct list_head *head;
  2599. spin_lock(&ctl->tree_lock);
  2600. while ((head = block_group->cluster_list.next) !=
  2601. &block_group->cluster_list) {
  2602. cluster = list_entry(head, struct btrfs_free_cluster,
  2603. block_group_list);
  2604. WARN_ON(cluster->block_group != block_group);
  2605. __btrfs_return_cluster_to_free_space(block_group, cluster);
  2606. cond_resched_lock(&ctl->tree_lock);
  2607. }
  2608. __btrfs_remove_free_space_cache(ctl);
  2609. btrfs_discard_update_discardable(block_group);
  2610. spin_unlock(&ctl->tree_lock);
  2611. }
  2612. /*
  2613. * Walk @block_group's free space rb_tree to determine if everything is trimmed.
  2614. */
  2615. bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
  2616. {
  2617. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  2618. struct btrfs_free_space *info;
  2619. struct rb_node *node;
  2620. bool ret = true;
  2621. if (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED &&
  2622. !test_bit(BLOCK_GROUP_FLAG_STRIPE_REMOVAL_PENDING, &block_group->runtime_flags) &&
  2623. block_group->identity_remap_count == 0) {
  2624. return true;
  2625. }
  2626. spin_lock(&ctl->tree_lock);
  2627. node = rb_first(&ctl->free_space_offset);
  2628. while (node) {
  2629. info = rb_entry(node, struct btrfs_free_space, offset_index);
  2630. if (!btrfs_free_space_trimmed(info)) {
  2631. ret = false;
  2632. break;
  2633. }
  2634. node = rb_next(node);
  2635. }
  2636. spin_unlock(&ctl->tree_lock);
  2637. return ret;
  2638. }
  2639. u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
  2640. u64 offset, u64 bytes, u64 empty_size,
  2641. u64 *max_extent_size)
  2642. {
  2643. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  2644. struct btrfs_discard_ctl *discard_ctl =
  2645. &block_group->fs_info->discard_ctl;
  2646. struct btrfs_free_space *entry = NULL;
  2647. u64 bytes_search = bytes + empty_size;
  2648. u64 ret = 0;
  2649. u64 align_gap = 0;
  2650. u64 align_gap_len = 0;
  2651. enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
  2652. bool use_bytes_index = (offset == block_group->start);
  2653. ASSERT(!btrfs_is_zoned(block_group->fs_info));
  2654. spin_lock(&ctl->tree_lock);
  2655. entry = find_free_space(ctl, &offset, &bytes_search,
  2656. block_group->full_stripe_len, max_extent_size,
  2657. use_bytes_index);
  2658. if (!entry)
  2659. goto out;
  2660. ret = offset;
  2661. if (entry->bitmap) {
  2662. bitmap_clear_bits(ctl, entry, offset, bytes, true);
  2663. if (!btrfs_free_space_trimmed(entry))
  2664. atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
  2665. if (!entry->bytes)
  2666. free_bitmap(ctl, entry);
  2667. } else {
  2668. unlink_free_space(ctl, entry, true);
  2669. align_gap_len = offset - entry->offset;
  2670. align_gap = entry->offset;
  2671. align_gap_trim_state = entry->trim_state;
  2672. if (!btrfs_free_space_trimmed(entry))
  2673. atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
  2674. entry->offset = offset + bytes;
  2675. WARN_ON(entry->bytes < bytes + align_gap_len);
  2676. entry->bytes -= bytes + align_gap_len;
  2677. if (!entry->bytes)
  2678. kmem_cache_free(btrfs_free_space_cachep, entry);
  2679. else
  2680. link_free_space(ctl, entry);
  2681. }
  2682. out:
  2683. btrfs_discard_update_discardable(block_group);
  2684. spin_unlock(&ctl->tree_lock);
  2685. if (align_gap_len)
  2686. __btrfs_add_free_space(block_group, align_gap, align_gap_len,
  2687. align_gap_trim_state);
  2688. return ret;
  2689. }
  2690. /*
  2691. * given a cluster, put all of its extents back into the free space
  2692. * cache. If a block group is passed, this function will only free
  2693. * a cluster that belongs to the passed block group.
  2694. *
  2695. * Otherwise, it'll get a reference on the block group pointed to by the
  2696. * cluster and remove the cluster from it.
  2697. */
  2698. void btrfs_return_cluster_to_free_space(
  2699. struct btrfs_block_group *block_group,
  2700. struct btrfs_free_cluster *cluster)
  2701. {
  2702. struct btrfs_free_space_ctl *ctl;
  2703. /* first, get a safe pointer to the block group */
  2704. spin_lock(&cluster->lock);
  2705. if (!block_group) {
  2706. block_group = cluster->block_group;
  2707. if (!block_group) {
  2708. spin_unlock(&cluster->lock);
  2709. return;
  2710. }
  2711. } else if (cluster->block_group != block_group) {
  2712. /* someone else has already freed it don't redo their work */
  2713. spin_unlock(&cluster->lock);
  2714. return;
  2715. }
  2716. btrfs_get_block_group(block_group);
  2717. spin_unlock(&cluster->lock);
  2718. ctl = block_group->free_space_ctl;
  2719. /* now return any extents the cluster had on it */
  2720. spin_lock(&ctl->tree_lock);
  2721. __btrfs_return_cluster_to_free_space(block_group, cluster);
  2722. spin_unlock(&ctl->tree_lock);
  2723. btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
  2724. /* finally drop our ref */
  2725. btrfs_put_block_group(block_group);
  2726. }
  2727. static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
  2728. struct btrfs_free_cluster *cluster,
  2729. struct btrfs_free_space *entry,
  2730. u64 bytes, u64 min_start,
  2731. u64 *max_extent_size)
  2732. {
  2733. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  2734. int ret2;
  2735. u64 search_start = cluster->window_start;
  2736. u64 search_bytes = bytes;
  2737. u64 ret = 0;
  2738. search_start = min_start;
  2739. search_bytes = bytes;
  2740. ret2 = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
  2741. if (ret2) {
  2742. *max_extent_size = max(get_max_extent_size(entry),
  2743. *max_extent_size);
  2744. return 0;
  2745. }
  2746. ret = search_start;
  2747. bitmap_clear_bits(ctl, entry, ret, bytes, false);
  2748. return ret;
  2749. }
  2750. /*
  2751. * given a cluster, try to allocate 'bytes' from it, returns 0
  2752. * if it couldn't find anything suitably large, or a logical disk offset
  2753. * if things worked out
  2754. */
  2755. u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
  2756. struct btrfs_free_cluster *cluster, u64 bytes,
  2757. u64 min_start, u64 *max_extent_size)
  2758. {
  2759. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  2760. struct btrfs_discard_ctl *discard_ctl =
  2761. &block_group->fs_info->discard_ctl;
  2762. struct btrfs_free_space *entry = NULL;
  2763. struct rb_node *node;
  2764. u64 ret = 0;
  2765. ASSERT(!btrfs_is_zoned(block_group->fs_info));
  2766. spin_lock(&cluster->lock);
  2767. if (bytes > cluster->max_size)
  2768. goto out;
  2769. if (cluster->block_group != block_group)
  2770. goto out;
  2771. node = rb_first(&cluster->root);
  2772. if (!node)
  2773. goto out;
  2774. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  2775. while (1) {
  2776. if (entry->bytes < bytes)
  2777. *max_extent_size = max(get_max_extent_size(entry),
  2778. *max_extent_size);
  2779. if (entry->bytes < bytes ||
  2780. (!entry->bitmap && entry->offset < min_start)) {
  2781. node = rb_next(&entry->offset_index);
  2782. if (!node)
  2783. break;
  2784. entry = rb_entry(node, struct btrfs_free_space,
  2785. offset_index);
  2786. continue;
  2787. }
  2788. if (entry->bitmap) {
  2789. ret = btrfs_alloc_from_bitmap(block_group,
  2790. cluster, entry, bytes,
  2791. cluster->window_start,
  2792. max_extent_size);
  2793. if (ret == 0) {
  2794. node = rb_next(&entry->offset_index);
  2795. if (!node)
  2796. break;
  2797. entry = rb_entry(node, struct btrfs_free_space,
  2798. offset_index);
  2799. continue;
  2800. }
  2801. cluster->window_start += bytes;
  2802. } else {
  2803. ret = entry->offset;
  2804. entry->offset += bytes;
  2805. entry->bytes -= bytes;
  2806. }
  2807. break;
  2808. }
  2809. out:
  2810. spin_unlock(&cluster->lock);
  2811. if (!ret)
  2812. return 0;
  2813. spin_lock(&ctl->tree_lock);
  2814. if (!btrfs_free_space_trimmed(entry))
  2815. atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
  2816. ctl->free_space -= bytes;
  2817. if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
  2818. ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
  2819. spin_lock(&cluster->lock);
  2820. if (entry->bytes == 0) {
  2821. rb_erase(&entry->offset_index, &cluster->root);
  2822. ctl->free_extents--;
  2823. if (entry->bitmap) {
  2824. kmem_cache_free(btrfs_free_space_bitmap_cachep,
  2825. entry->bitmap);
  2826. ctl->total_bitmaps--;
  2827. recalculate_thresholds(ctl);
  2828. } else if (!btrfs_free_space_trimmed(entry)) {
  2829. ctl->discardable_extents[BTRFS_STAT_CURR]--;
  2830. }
  2831. kmem_cache_free(btrfs_free_space_cachep, entry);
  2832. }
  2833. spin_unlock(&cluster->lock);
  2834. spin_unlock(&ctl->tree_lock);
  2835. return ret;
  2836. }
  2837. static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
  2838. struct btrfs_free_space *entry,
  2839. struct btrfs_free_cluster *cluster,
  2840. u64 offset, u64 bytes,
  2841. u64 cont1_bytes, u64 min_bytes)
  2842. {
  2843. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  2844. unsigned long next_zero;
  2845. unsigned long i;
  2846. unsigned long want_bits;
  2847. unsigned long min_bits;
  2848. unsigned long found_bits;
  2849. unsigned long max_bits = 0;
  2850. unsigned long start = 0;
  2851. unsigned long total_found = 0;
  2852. int ret;
  2853. lockdep_assert_held(&ctl->tree_lock);
  2854. i = offset_to_bit(entry->offset, ctl->unit,
  2855. max_t(u64, offset, entry->offset));
  2856. want_bits = bytes_to_bits(bytes, ctl->unit);
  2857. min_bits = bytes_to_bits(min_bytes, ctl->unit);
  2858. /*
  2859. * Don't bother looking for a cluster in this bitmap if it's heavily
  2860. * fragmented.
  2861. */
  2862. if (entry->max_extent_size &&
  2863. entry->max_extent_size < cont1_bytes)
  2864. return -ENOSPC;
  2865. again:
  2866. found_bits = 0;
  2867. for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
  2868. next_zero = find_next_zero_bit(entry->bitmap,
  2869. BITS_PER_BITMAP, i);
  2870. if (next_zero - i >= min_bits) {
  2871. found_bits = next_zero - i;
  2872. if (found_bits > max_bits)
  2873. max_bits = found_bits;
  2874. break;
  2875. }
  2876. if (next_zero - i > max_bits)
  2877. max_bits = next_zero - i;
  2878. i = next_zero;
  2879. }
  2880. if (!found_bits) {
  2881. entry->max_extent_size = (u64)max_bits * ctl->unit;
  2882. return -ENOSPC;
  2883. }
  2884. if (!total_found) {
  2885. start = i;
  2886. cluster->max_size = 0;
  2887. }
  2888. total_found += found_bits;
  2889. if (cluster->max_size < found_bits * ctl->unit)
  2890. cluster->max_size = found_bits * ctl->unit;
  2891. if (total_found < want_bits || cluster->max_size < cont1_bytes) {
  2892. i = next_zero + 1;
  2893. goto again;
  2894. }
  2895. cluster->window_start = start * ctl->unit + entry->offset;
  2896. rb_erase(&entry->offset_index, &ctl->free_space_offset);
  2897. rb_erase_cached(&entry->bytes_index, &ctl->free_space_bytes);
  2898. /*
  2899. * We need to know if we're currently on the normal space index when we
  2900. * manipulate the bitmap so that we know we need to remove and re-insert
  2901. * it into the space_index tree. Clear the bytes_index node here so the
  2902. * bitmap manipulation helpers know not to mess with the space_index
  2903. * until this bitmap entry is added back into the normal cache.
  2904. */
  2905. RB_CLEAR_NODE(&entry->bytes_index);
  2906. ret = tree_insert_offset(ctl, cluster, entry);
  2907. ASSERT(!ret); /* -EEXIST; Logic error */
  2908. trace_btrfs_setup_cluster(block_group, cluster,
  2909. total_found * ctl->unit, 1);
  2910. return 0;
  2911. }
  2912. /*
  2913. * This searches the block group for just extents to fill the cluster with.
  2914. * Try to find a cluster with at least bytes total bytes, at least one
  2915. * extent of cont1_bytes, and other clusters of at least min_bytes.
  2916. */
  2917. static noinline int
  2918. setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
  2919. struct btrfs_free_cluster *cluster,
  2920. struct list_head *bitmaps, u64 offset, u64 bytes,
  2921. u64 cont1_bytes, u64 min_bytes)
  2922. {
  2923. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  2924. struct btrfs_free_space *first = NULL;
  2925. struct btrfs_free_space *entry = NULL;
  2926. struct btrfs_free_space *last;
  2927. struct rb_node *node;
  2928. u64 window_free;
  2929. u64 max_extent;
  2930. u64 total_size = 0;
  2931. lockdep_assert_held(&ctl->tree_lock);
  2932. entry = tree_search_offset(ctl, offset, 0, 1);
  2933. if (!entry)
  2934. return -ENOSPC;
  2935. /*
  2936. * We don't want bitmaps, so just move along until we find a normal
  2937. * extent entry.
  2938. */
  2939. while (entry->bitmap || entry->bytes < min_bytes) {
  2940. if (entry->bitmap && list_empty(&entry->list))
  2941. list_add_tail(&entry->list, bitmaps);
  2942. node = rb_next(&entry->offset_index);
  2943. if (!node)
  2944. return -ENOSPC;
  2945. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  2946. }
  2947. window_free = entry->bytes;
  2948. max_extent = entry->bytes;
  2949. first = entry;
  2950. last = entry;
  2951. for (node = rb_next(&entry->offset_index); node;
  2952. node = rb_next(&entry->offset_index)) {
  2953. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  2954. if (entry->bitmap) {
  2955. if (list_empty(&entry->list))
  2956. list_add_tail(&entry->list, bitmaps);
  2957. continue;
  2958. }
  2959. if (entry->bytes < min_bytes)
  2960. continue;
  2961. last = entry;
  2962. window_free += entry->bytes;
  2963. if (entry->bytes > max_extent)
  2964. max_extent = entry->bytes;
  2965. }
  2966. if (window_free < bytes || max_extent < cont1_bytes)
  2967. return -ENOSPC;
  2968. cluster->window_start = first->offset;
  2969. node = &first->offset_index;
  2970. /*
  2971. * now we've found our entries, pull them out of the free space
  2972. * cache and put them into the cluster rbtree
  2973. */
  2974. do {
  2975. int ret;
  2976. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  2977. node = rb_next(&entry->offset_index);
  2978. if (entry->bitmap || entry->bytes < min_bytes)
  2979. continue;
  2980. rb_erase(&entry->offset_index, &ctl->free_space_offset);
  2981. rb_erase_cached(&entry->bytes_index, &ctl->free_space_bytes);
  2982. ret = tree_insert_offset(ctl, cluster, entry);
  2983. total_size += entry->bytes;
  2984. ASSERT(!ret); /* -EEXIST; Logic error */
  2985. } while (node && entry != last);
  2986. cluster->max_size = max_extent;
  2987. trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
  2988. return 0;
  2989. }
  2990. /*
  2991. * This specifically looks for bitmaps that may work in the cluster, we assume
  2992. * that we have already failed to find extents that will work.
  2993. */
  2994. static noinline int
  2995. setup_cluster_bitmap(struct btrfs_block_group *block_group,
  2996. struct btrfs_free_cluster *cluster,
  2997. struct list_head *bitmaps, u64 offset, u64 bytes,
  2998. u64 cont1_bytes, u64 min_bytes)
  2999. {
  3000. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  3001. struct btrfs_free_space *entry = NULL;
  3002. int ret = -ENOSPC;
  3003. u64 bitmap_offset = offset_to_bitmap(ctl, offset);
  3004. if (ctl->total_bitmaps == 0)
  3005. return -ENOSPC;
  3006. /*
  3007. * The bitmap that covers offset won't be in the list unless offset
  3008. * is just its start offset.
  3009. */
  3010. if (!list_empty(bitmaps))
  3011. entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
  3012. if (!entry || entry->offset != bitmap_offset) {
  3013. entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
  3014. if (entry && list_empty(&entry->list))
  3015. list_add(&entry->list, bitmaps);
  3016. }
  3017. list_for_each_entry(entry, bitmaps, list) {
  3018. if (entry->bytes < bytes)
  3019. continue;
  3020. ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
  3021. bytes, cont1_bytes, min_bytes);
  3022. if (!ret)
  3023. return 0;
  3024. }
  3025. /*
  3026. * The bitmaps list has all the bitmaps that record free space
  3027. * starting after offset, so no more search is required.
  3028. */
  3029. return -ENOSPC;
  3030. }
  3031. /*
  3032. * here we try to find a cluster of blocks in a block group. The goal
  3033. * is to find at least bytes+empty_size.
  3034. * We might not find them all in one contiguous area.
  3035. *
  3036. * returns zero and sets up cluster if things worked out, otherwise
  3037. * it returns -enospc
  3038. */
  3039. int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
  3040. struct btrfs_free_cluster *cluster,
  3041. u64 offset, u64 bytes, u64 empty_size)
  3042. {
  3043. struct btrfs_fs_info *fs_info = block_group->fs_info;
  3044. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  3045. struct btrfs_free_space *entry, *tmp;
  3046. LIST_HEAD(bitmaps);
  3047. u64 min_bytes;
  3048. u64 cont1_bytes;
  3049. int ret;
  3050. /*
  3051. * Choose the minimum extent size we'll require for this
  3052. * cluster. For SSD_SPREAD, don't allow any fragmentation.
  3053. * For metadata, allow allocates with smaller extents. For
  3054. * data, keep it dense.
  3055. */
  3056. if (btrfs_test_opt(fs_info, SSD_SPREAD)) {
  3057. cont1_bytes = bytes + empty_size;
  3058. min_bytes = cont1_bytes;
  3059. } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
  3060. cont1_bytes = bytes;
  3061. min_bytes = fs_info->sectorsize;
  3062. } else {
  3063. cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
  3064. min_bytes = fs_info->sectorsize;
  3065. }
  3066. spin_lock(&ctl->tree_lock);
  3067. /*
  3068. * If we know we don't have enough space to make a cluster don't even
  3069. * bother doing all the work to try and find one.
  3070. */
  3071. if (ctl->free_space < bytes) {
  3072. spin_unlock(&ctl->tree_lock);
  3073. return -ENOSPC;
  3074. }
  3075. spin_lock(&cluster->lock);
  3076. /* someone already found a cluster, hooray */
  3077. if (cluster->block_group) {
  3078. ret = 0;
  3079. goto out;
  3080. }
  3081. trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
  3082. min_bytes);
  3083. ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
  3084. bytes + empty_size,
  3085. cont1_bytes, min_bytes);
  3086. if (ret)
  3087. ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
  3088. offset, bytes + empty_size,
  3089. cont1_bytes, min_bytes);
  3090. /* Clear our temporary list */
  3091. list_for_each_entry_safe(entry, tmp, &bitmaps, list)
  3092. list_del_init(&entry->list);
  3093. if (!ret) {
  3094. btrfs_get_block_group(block_group);
  3095. list_add_tail(&cluster->block_group_list,
  3096. &block_group->cluster_list);
  3097. cluster->block_group = block_group;
  3098. } else {
  3099. trace_btrfs_failed_cluster_setup(block_group);
  3100. }
  3101. out:
  3102. spin_unlock(&cluster->lock);
  3103. spin_unlock(&ctl->tree_lock);
  3104. return ret;
  3105. }
  3106. /*
  3107. * simple code to zero out a cluster
  3108. */
  3109. void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
  3110. {
  3111. spin_lock_init(&cluster->lock);
  3112. spin_lock_init(&cluster->refill_lock);
  3113. cluster->root = RB_ROOT;
  3114. cluster->max_size = 0;
  3115. cluster->fragmented = false;
  3116. INIT_LIST_HEAD(&cluster->block_group_list);
  3117. cluster->block_group = NULL;
  3118. }
  3119. static int do_trimming(struct btrfs_block_group *block_group,
  3120. u64 *total_trimmed, u64 start, u64 bytes,
  3121. u64 reserved_start, u64 reserved_bytes,
  3122. enum btrfs_trim_state reserved_trim_state,
  3123. struct btrfs_trim_range *trim_entry)
  3124. {
  3125. struct btrfs_space_info *space_info = block_group->space_info;
  3126. struct btrfs_fs_info *fs_info = block_group->fs_info;
  3127. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  3128. int ret;
  3129. bool bg_ro;
  3130. const u64 end = start + bytes;
  3131. const u64 reserved_end = reserved_start + reserved_bytes;
  3132. enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
  3133. u64 trimmed = 0;
  3134. spin_lock(&space_info->lock);
  3135. spin_lock(&block_group->lock);
  3136. bg_ro = block_group->ro;
  3137. if (!bg_ro) {
  3138. block_group->reserved += reserved_bytes;
  3139. spin_unlock(&block_group->lock);
  3140. space_info->bytes_reserved += reserved_bytes;
  3141. } else {
  3142. spin_unlock(&block_group->lock);
  3143. }
  3144. spin_unlock(&space_info->lock);
  3145. ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed, false);
  3146. if (!ret) {
  3147. *total_trimmed += trimmed;
  3148. trim_state = BTRFS_TRIM_STATE_TRIMMED;
  3149. }
  3150. mutex_lock(&ctl->cache_writeout_mutex);
  3151. if (reserved_start < start)
  3152. __btrfs_add_free_space(block_group, reserved_start,
  3153. start - reserved_start,
  3154. reserved_trim_state);
  3155. if (end < reserved_end)
  3156. __btrfs_add_free_space(block_group, end, reserved_end - end,
  3157. reserved_trim_state);
  3158. __btrfs_add_free_space(block_group, start, bytes, trim_state);
  3159. list_del(&trim_entry->list);
  3160. mutex_unlock(&ctl->cache_writeout_mutex);
  3161. if (!bg_ro) {
  3162. spin_lock(&space_info->lock);
  3163. spin_lock(&block_group->lock);
  3164. bg_ro = block_group->ro;
  3165. block_group->reserved -= reserved_bytes;
  3166. spin_unlock(&block_group->lock);
  3167. space_info->bytes_reserved -= reserved_bytes;
  3168. if (bg_ro)
  3169. space_info->bytes_readonly += reserved_bytes;
  3170. spin_unlock(&space_info->lock);
  3171. }
  3172. return ret;
  3173. }
  3174. /*
  3175. * If @async is set, then we will trim 1 region and return.
  3176. */
  3177. static int trim_no_bitmap(struct btrfs_block_group *block_group,
  3178. u64 *total_trimmed, u64 start, u64 end, u64 minlen,
  3179. bool async)
  3180. {
  3181. struct btrfs_discard_ctl *discard_ctl =
  3182. &block_group->fs_info->discard_ctl;
  3183. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  3184. struct btrfs_free_space *entry;
  3185. struct rb_node *node;
  3186. int ret = 0;
  3187. u64 extent_start;
  3188. u64 extent_bytes;
  3189. enum btrfs_trim_state extent_trim_state;
  3190. u64 bytes;
  3191. const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
  3192. while (start < end) {
  3193. struct btrfs_trim_range trim_entry;
  3194. mutex_lock(&ctl->cache_writeout_mutex);
  3195. spin_lock(&ctl->tree_lock);
  3196. if (ctl->free_space < minlen)
  3197. goto out_unlock;
  3198. entry = tree_search_offset(ctl, start, 0, 1);
  3199. if (!entry)
  3200. goto out_unlock;
  3201. /* Skip bitmaps and if async, already trimmed entries */
  3202. while (entry->bitmap ||
  3203. (async && btrfs_free_space_trimmed(entry))) {
  3204. node = rb_next(&entry->offset_index);
  3205. if (!node)
  3206. goto out_unlock;
  3207. entry = rb_entry(node, struct btrfs_free_space,
  3208. offset_index);
  3209. }
  3210. if (entry->offset >= end)
  3211. goto out_unlock;
  3212. extent_start = entry->offset;
  3213. extent_bytes = entry->bytes;
  3214. extent_trim_state = entry->trim_state;
  3215. if (async) {
  3216. start = entry->offset;
  3217. bytes = entry->bytes;
  3218. if (bytes < minlen) {
  3219. spin_unlock(&ctl->tree_lock);
  3220. mutex_unlock(&ctl->cache_writeout_mutex);
  3221. goto next;
  3222. }
  3223. unlink_free_space(ctl, entry, true);
  3224. /*
  3225. * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
  3226. * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim
  3227. * X when we come back around. So trim it now.
  3228. */
  3229. if (max_discard_size &&
  3230. bytes >= (max_discard_size +
  3231. BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
  3232. bytes = max_discard_size;
  3233. extent_bytes = max_discard_size;
  3234. entry->offset += max_discard_size;
  3235. entry->bytes -= max_discard_size;
  3236. link_free_space(ctl, entry);
  3237. } else {
  3238. kmem_cache_free(btrfs_free_space_cachep, entry);
  3239. }
  3240. } else {
  3241. start = max(start, extent_start);
  3242. bytes = min(extent_start + extent_bytes, end) - start;
  3243. if (bytes < minlen) {
  3244. spin_unlock(&ctl->tree_lock);
  3245. mutex_unlock(&ctl->cache_writeout_mutex);
  3246. goto next;
  3247. }
  3248. unlink_free_space(ctl, entry, true);
  3249. kmem_cache_free(btrfs_free_space_cachep, entry);
  3250. }
  3251. spin_unlock(&ctl->tree_lock);
  3252. trim_entry.start = extent_start;
  3253. trim_entry.bytes = extent_bytes;
  3254. list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
  3255. mutex_unlock(&ctl->cache_writeout_mutex);
  3256. ret = do_trimming(block_group, total_trimmed, start, bytes,
  3257. extent_start, extent_bytes, extent_trim_state,
  3258. &trim_entry);
  3259. if (ret) {
  3260. block_group->discard_cursor = start + bytes;
  3261. break;
  3262. }
  3263. next:
  3264. start += bytes;
  3265. block_group->discard_cursor = start;
  3266. if (async && *total_trimmed)
  3267. break;
  3268. if (btrfs_trim_interrupted()) {
  3269. ret = -ERESTARTSYS;
  3270. break;
  3271. }
  3272. cond_resched();
  3273. }
  3274. return ret;
  3275. out_unlock:
  3276. block_group->discard_cursor = btrfs_block_group_end(block_group);
  3277. spin_unlock(&ctl->tree_lock);
  3278. mutex_unlock(&ctl->cache_writeout_mutex);
  3279. return ret;
  3280. }
  3281. void btrfs_trim_fully_remapped_block_group(struct btrfs_block_group *bg)
  3282. {
  3283. struct btrfs_fs_info *fs_info = bg->fs_info;
  3284. struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
  3285. int ret = 0;
  3286. u64 bytes, trimmed;
  3287. const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
  3288. u64 end = btrfs_block_group_end(bg);
  3289. if (!test_bit(BLOCK_GROUP_FLAG_STRIPE_REMOVAL_PENDING, &bg->runtime_flags)) {
  3290. bg->discard_cursor = end;
  3291. if (bg->used == 0) {
  3292. spin_lock(&fs_info->unused_bgs_lock);
  3293. if (!list_empty(&bg->bg_list)) {
  3294. list_del_init(&bg->bg_list);
  3295. btrfs_put_block_group(bg);
  3296. }
  3297. spin_unlock(&fs_info->unused_bgs_lock);
  3298. btrfs_mark_bg_unused(bg);
  3299. }
  3300. return;
  3301. }
  3302. bytes = end - bg->discard_cursor;
  3303. if (max_discard_size &&
  3304. bytes >= (max_discard_size + BTRFS_ASYNC_DISCARD_MIN_FILTER))
  3305. bytes = max_discard_size;
  3306. ret = btrfs_discard_extent(fs_info, bg->discard_cursor, bytes, &trimmed, false);
  3307. if (ret)
  3308. return;
  3309. bg->discard_cursor += trimmed;
  3310. if (bg->discard_cursor < end)
  3311. return;
  3312. btrfs_complete_bg_remapping(bg);
  3313. }
  3314. /*
  3315. * If we break out of trimming a bitmap prematurely, we should reset the
  3316. * trimming bit. In a rather contrived case, it's possible to race here so
  3317. * reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
  3318. *
  3319. * start = start of bitmap
  3320. * end = near end of bitmap
  3321. *
  3322. * Thread 1: Thread 2:
  3323. * trim_bitmaps(start)
  3324. * trim_bitmaps(end)
  3325. * end_trimming_bitmap()
  3326. * reset_trimming_bitmap()
  3327. */
  3328. static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset)
  3329. {
  3330. struct btrfs_free_space *entry;
  3331. spin_lock(&ctl->tree_lock);
  3332. entry = tree_search_offset(ctl, offset, 1, 0);
  3333. if (entry) {
  3334. if (btrfs_free_space_trimmed(entry)) {
  3335. ctl->discardable_extents[BTRFS_STAT_CURR] +=
  3336. entry->bitmap_extents;
  3337. ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes;
  3338. }
  3339. entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
  3340. }
  3341. spin_unlock(&ctl->tree_lock);
  3342. }
  3343. static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl,
  3344. struct btrfs_free_space *entry)
  3345. {
  3346. if (btrfs_free_space_trimming_bitmap(entry)) {
  3347. entry->trim_state = BTRFS_TRIM_STATE_TRIMMED;
  3348. ctl->discardable_extents[BTRFS_STAT_CURR] -=
  3349. entry->bitmap_extents;
  3350. ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes;
  3351. }
  3352. }
  3353. /*
  3354. * If @async is set, then we will trim 1 region and return.
  3355. */
  3356. static int trim_bitmaps(struct btrfs_block_group *block_group,
  3357. u64 *total_trimmed, u64 start, u64 end, u64 minlen,
  3358. u64 maxlen, bool async)
  3359. {
  3360. struct btrfs_discard_ctl *discard_ctl =
  3361. &block_group->fs_info->discard_ctl;
  3362. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  3363. struct btrfs_free_space *entry;
  3364. int ret = 0;
  3365. int ret2;
  3366. u64 bytes;
  3367. u64 offset = offset_to_bitmap(ctl, start);
  3368. const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
  3369. while (offset < end) {
  3370. bool next_bitmap = false;
  3371. struct btrfs_trim_range trim_entry;
  3372. mutex_lock(&ctl->cache_writeout_mutex);
  3373. spin_lock(&ctl->tree_lock);
  3374. if (ctl->free_space < minlen) {
  3375. block_group->discard_cursor =
  3376. btrfs_block_group_end(block_group);
  3377. spin_unlock(&ctl->tree_lock);
  3378. mutex_unlock(&ctl->cache_writeout_mutex);
  3379. break;
  3380. }
  3381. entry = tree_search_offset(ctl, offset, 1, 0);
  3382. /*
  3383. * Bitmaps are marked trimmed lossily now to prevent constant
  3384. * discarding of the same bitmap (the reason why we are bound
  3385. * by the filters). So, retrim the block group bitmaps when we
  3386. * are preparing to punt to the unused_bgs list. This uses
  3387. * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED
  3388. * which is the only discard index which sets minlen to 0.
  3389. */
  3390. if (!entry || (async && minlen && start == offset &&
  3391. btrfs_free_space_trimmed(entry))) {
  3392. spin_unlock(&ctl->tree_lock);
  3393. mutex_unlock(&ctl->cache_writeout_mutex);
  3394. next_bitmap = true;
  3395. goto next;
  3396. }
  3397. /*
  3398. * Async discard bitmap trimming begins at by setting the start
  3399. * to be key.objectid and the offset_to_bitmap() aligns to the
  3400. * start of the bitmap. This lets us know we are fully
  3401. * scanning the bitmap rather than only some portion of it.
  3402. */
  3403. if (start == offset)
  3404. entry->trim_state = BTRFS_TRIM_STATE_TRIMMING;
  3405. bytes = minlen;
  3406. ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
  3407. if (ret2 || start >= end) {
  3408. /*
  3409. * We lossily consider a bitmap trimmed if we only skip
  3410. * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER.
  3411. */
  3412. if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER)
  3413. end_trimming_bitmap(ctl, entry);
  3414. else
  3415. entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
  3416. spin_unlock(&ctl->tree_lock);
  3417. mutex_unlock(&ctl->cache_writeout_mutex);
  3418. next_bitmap = true;
  3419. goto next;
  3420. }
  3421. /*
  3422. * We already trimmed a region, but are using the locking above
  3423. * to reset the trim_state.
  3424. */
  3425. if (async && *total_trimmed) {
  3426. spin_unlock(&ctl->tree_lock);
  3427. mutex_unlock(&ctl->cache_writeout_mutex);
  3428. return ret;
  3429. }
  3430. bytes = min(bytes, end - start);
  3431. if (bytes < minlen || (async && maxlen && bytes > maxlen)) {
  3432. spin_unlock(&ctl->tree_lock);
  3433. mutex_unlock(&ctl->cache_writeout_mutex);
  3434. goto next;
  3435. }
  3436. /*
  3437. * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
  3438. * If X < @minlen, we won't trim X when we come back around.
  3439. * So trim it now. We differ here from trimming extents as we
  3440. * don't keep individual state per bit.
  3441. */
  3442. if (async &&
  3443. max_discard_size &&
  3444. bytes > (max_discard_size + minlen))
  3445. bytes = max_discard_size;
  3446. bitmap_clear_bits(ctl, entry, start, bytes, true);
  3447. if (entry->bytes == 0)
  3448. free_bitmap(ctl, entry);
  3449. spin_unlock(&ctl->tree_lock);
  3450. trim_entry.start = start;
  3451. trim_entry.bytes = bytes;
  3452. list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
  3453. mutex_unlock(&ctl->cache_writeout_mutex);
  3454. ret = do_trimming(block_group, total_trimmed, start, bytes,
  3455. start, bytes, 0, &trim_entry);
  3456. if (ret) {
  3457. reset_trimming_bitmap(ctl, offset);
  3458. block_group->discard_cursor =
  3459. btrfs_block_group_end(block_group);
  3460. break;
  3461. }
  3462. next:
  3463. if (next_bitmap) {
  3464. offset += BITS_PER_BITMAP * ctl->unit;
  3465. start = offset;
  3466. } else {
  3467. start += bytes;
  3468. }
  3469. block_group->discard_cursor = start;
  3470. if (btrfs_trim_interrupted()) {
  3471. if (start != offset)
  3472. reset_trimming_bitmap(ctl, offset);
  3473. ret = -ERESTARTSYS;
  3474. break;
  3475. }
  3476. cond_resched();
  3477. }
  3478. if (offset >= end)
  3479. block_group->discard_cursor = end;
  3480. return ret;
  3481. }
  3482. int btrfs_trim_block_group(struct btrfs_block_group *block_group,
  3483. u64 *trimmed, u64 start, u64 end, u64 minlen)
  3484. {
  3485. struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
  3486. int ret;
  3487. u64 rem = 0;
  3488. ASSERT(!btrfs_is_zoned(block_group->fs_info));
  3489. *trimmed = 0;
  3490. spin_lock(&block_group->lock);
  3491. if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags)) {
  3492. spin_unlock(&block_group->lock);
  3493. return 0;
  3494. }
  3495. btrfs_freeze_block_group(block_group);
  3496. spin_unlock(&block_group->lock);
  3497. ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
  3498. if (ret)
  3499. goto out;
  3500. ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false);
  3501. div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem);
  3502. /* If we ended in the middle of a bitmap, reset the trimming flag */
  3503. if (rem)
  3504. reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
  3505. out:
  3506. btrfs_unfreeze_block_group(block_group);
  3507. return ret;
  3508. }
  3509. int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
  3510. u64 *trimmed, u64 start, u64 end, u64 minlen,
  3511. bool async)
  3512. {
  3513. int ret;
  3514. *trimmed = 0;
  3515. spin_lock(&block_group->lock);
  3516. if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags)) {
  3517. spin_unlock(&block_group->lock);
  3518. return 0;
  3519. }
  3520. btrfs_freeze_block_group(block_group);
  3521. spin_unlock(&block_group->lock);
  3522. ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
  3523. btrfs_unfreeze_block_group(block_group);
  3524. return ret;
  3525. }
  3526. int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
  3527. u64 *trimmed, u64 start, u64 end, u64 minlen,
  3528. u64 maxlen, bool async)
  3529. {
  3530. int ret;
  3531. *trimmed = 0;
  3532. spin_lock(&block_group->lock);
  3533. if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags)) {
  3534. spin_unlock(&block_group->lock);
  3535. return 0;
  3536. }
  3537. btrfs_freeze_block_group(block_group);
  3538. spin_unlock(&block_group->lock);
  3539. ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
  3540. async);
  3541. btrfs_unfreeze_block_group(block_group);
  3542. return ret;
  3543. }
  3544. bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info)
  3545. {
  3546. return btrfs_super_cache_generation(fs_info->super_copy);
  3547. }
  3548. static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
  3549. struct btrfs_trans_handle *trans)
  3550. {
  3551. struct btrfs_block_group *block_group;
  3552. struct rb_node *node;
  3553. btrfs_info(fs_info, "cleaning free space cache v1");
  3554. node = rb_first_cached(&fs_info->block_group_cache_tree);
  3555. while (node) {
  3556. int ret;
  3557. block_group = rb_entry(node, struct btrfs_block_group, cache_node);
  3558. ret = btrfs_remove_free_space_inode(trans, NULL, block_group);
  3559. if (ret)
  3560. return ret;
  3561. node = rb_next(node);
  3562. }
  3563. return 0;
  3564. }
  3565. int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active)
  3566. {
  3567. struct btrfs_trans_handle *trans;
  3568. int ret;
  3569. /*
  3570. * update_super_roots will appropriately set or unset
  3571. * super_copy->cache_generation based on SPACE_CACHE and
  3572. * BTRFS_FS_CLEANUP_SPACE_CACHE_V1. For this reason, we need a
  3573. * transaction commit whether we are enabling space cache v1 and don't
  3574. * have any other work to do, or are disabling it and removing free
  3575. * space inodes.
  3576. */
  3577. trans = btrfs_start_transaction(fs_info->tree_root, 0);
  3578. if (IS_ERR(trans))
  3579. return PTR_ERR(trans);
  3580. if (!active) {
  3581. set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
  3582. ret = cleanup_free_space_cache_v1(fs_info, trans);
  3583. if (unlikely(ret)) {
  3584. btrfs_abort_transaction(trans, ret);
  3585. btrfs_end_transaction(trans);
  3586. goto out;
  3587. }
  3588. }
  3589. ret = btrfs_commit_transaction(trans);
  3590. out:
  3591. clear_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
  3592. return ret;
  3593. }
  3594. int __init btrfs_free_space_init(void)
  3595. {
  3596. btrfs_free_space_cachep = KMEM_CACHE(btrfs_free_space, 0);
  3597. if (!btrfs_free_space_cachep)
  3598. return -ENOMEM;
  3599. btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
  3600. PAGE_SIZE, PAGE_SIZE,
  3601. 0, NULL);
  3602. if (!btrfs_free_space_bitmap_cachep) {
  3603. kmem_cache_destroy(btrfs_free_space_cachep);
  3604. return -ENOMEM;
  3605. }
  3606. return 0;
  3607. }
  3608. void __cold btrfs_free_space_exit(void)
  3609. {
  3610. kmem_cache_destroy(btrfs_free_space_cachep);
  3611. kmem_cache_destroy(btrfs_free_space_bitmap_cachep);
  3612. }
  3613. #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
  3614. /*
  3615. * Use this if you need to make a bitmap or extent entry specifically, it
  3616. * doesn't do any of the merging that add_free_space does, this acts a lot like
  3617. * how the free space cache loading stuff works, so you can get really weird
  3618. * configurations.
  3619. */
  3620. int test_add_free_space_entry(struct btrfs_block_group *cache,
  3621. u64 offset, u64 bytes, bool bitmap)
  3622. {
  3623. struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
  3624. struct btrfs_free_space *info = NULL, *bitmap_info;
  3625. void *map = NULL;
  3626. enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED;
  3627. u64 bytes_added;
  3628. int ret;
  3629. again:
  3630. if (!info) {
  3631. info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
  3632. if (!info)
  3633. return -ENOMEM;
  3634. }
  3635. if (!bitmap) {
  3636. spin_lock(&ctl->tree_lock);
  3637. info->offset = offset;
  3638. info->bytes = bytes;
  3639. info->max_extent_size = 0;
  3640. ret = link_free_space(ctl, info);
  3641. spin_unlock(&ctl->tree_lock);
  3642. if (ret)
  3643. kmem_cache_free(btrfs_free_space_cachep, info);
  3644. return ret;
  3645. }
  3646. if (!map) {
  3647. map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS);
  3648. if (!map) {
  3649. kmem_cache_free(btrfs_free_space_cachep, info);
  3650. return -ENOMEM;
  3651. }
  3652. }
  3653. spin_lock(&ctl->tree_lock);
  3654. bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
  3655. 1, 0);
  3656. if (!bitmap_info) {
  3657. info->bitmap = map;
  3658. map = NULL;
  3659. add_new_bitmap(ctl, info, offset);
  3660. bitmap_info = info;
  3661. info = NULL;
  3662. }
  3663. bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
  3664. trim_state);
  3665. bytes -= bytes_added;
  3666. offset += bytes_added;
  3667. spin_unlock(&ctl->tree_lock);
  3668. if (bytes)
  3669. goto again;
  3670. if (info)
  3671. kmem_cache_free(btrfs_free_space_cachep, info);
  3672. if (map)
  3673. kmem_cache_free(btrfs_free_space_bitmap_cachep, map);
  3674. return 0;
  3675. }
  3676. /*
  3677. * Checks to see if the given range is in the free space cache. This is really
  3678. * just used to check the absence of space, so if there is free space in the
  3679. * range at all we will return 1.
  3680. */
  3681. int test_check_exists(struct btrfs_block_group *cache,
  3682. u64 offset, u64 bytes)
  3683. {
  3684. struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
  3685. struct btrfs_free_space *info;
  3686. int ret = 0;
  3687. spin_lock(&ctl->tree_lock);
  3688. info = tree_search_offset(ctl, offset, 0, 0);
  3689. if (!info) {
  3690. info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
  3691. 1, 0);
  3692. if (!info)
  3693. goto out;
  3694. }
  3695. have_info:
  3696. if (info->bitmap) {
  3697. u64 bit_off, bit_bytes;
  3698. struct rb_node *n;
  3699. struct btrfs_free_space *tmp;
  3700. bit_off = offset;
  3701. bit_bytes = ctl->unit;
  3702. ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false);
  3703. if (!ret) {
  3704. if (bit_off == offset) {
  3705. ret = 1;
  3706. goto out;
  3707. } else if (bit_off > offset &&
  3708. offset + bytes > bit_off) {
  3709. ret = 1;
  3710. goto out;
  3711. }
  3712. }
  3713. n = rb_prev(&info->offset_index);
  3714. while (n) {
  3715. tmp = rb_entry(n, struct btrfs_free_space,
  3716. offset_index);
  3717. if (tmp->offset + tmp->bytes < offset)
  3718. break;
  3719. if (offset + bytes < tmp->offset) {
  3720. n = rb_prev(&tmp->offset_index);
  3721. continue;
  3722. }
  3723. info = tmp;
  3724. goto have_info;
  3725. }
  3726. n = rb_next(&info->offset_index);
  3727. while (n) {
  3728. tmp = rb_entry(n, struct btrfs_free_space,
  3729. offset_index);
  3730. if (offset + bytes < tmp->offset)
  3731. break;
  3732. if (tmp->offset + tmp->bytes < offset) {
  3733. n = rb_next(&tmp->offset_index);
  3734. continue;
  3735. }
  3736. info = tmp;
  3737. goto have_info;
  3738. }
  3739. ret = 0;
  3740. goto out;
  3741. }
  3742. if (info->offset == offset) {
  3743. ret = 1;
  3744. goto out;
  3745. }
  3746. if (offset > info->offset && offset < info->offset + info->bytes)
  3747. ret = 1;
  3748. out:
  3749. spin_unlock(&ctl->tree_lock);
  3750. return ret;
  3751. }
  3752. #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */