extents.c 172 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
  4. * Written by Alex Tomas <alex@clusterfs.com>
  5. *
  6. * Architecture independence:
  7. * Copyright (c) 2005, Bull S.A.
  8. * Written by Pierre Peiffer <pierre.peiffer@bull.net>
  9. */
  10. /*
  11. * Extents support for EXT4
  12. *
  13. * TODO:
  14. * - ext4*_error() should be used in some situations
  15. * - analyze all BUG()/BUG_ON(), use -EIO where appropriate
  16. * - smart tree reduction
  17. */
  18. #include <linux/fs.h>
  19. #include <linux/time.h>
  20. #include <linux/jbd2.h>
  21. #include <linux/highuid.h>
  22. #include <linux/pagemap.h>
  23. #include <linux/quotaops.h>
  24. #include <linux/string.h>
  25. #include <linux/slab.h>
  26. #include <linux/uaccess.h>
  27. #include <linux/fiemap.h>
  28. #include <linux/iomap.h>
  29. #include <linux/sched/mm.h>
  30. #include "ext4_jbd2.h"
  31. #include "ext4_extents.h"
  32. #include "xattr.h"
  33. #include <kunit/static_stub.h>
  34. #include <trace/events/ext4.h>
  35. /*
  36. * used by extent splitting.
  37. */
  38. #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
  39. due to ENOSPC */
  40. static struct ext4_ext_path *ext4_split_convert_extents(
  41. handle_t *handle, struct inode *inode, struct ext4_map_blocks *map,
  42. struct ext4_ext_path *path, int flags, unsigned int *allocated);
  43. static __le32 ext4_extent_block_csum(struct inode *inode,
  44. struct ext4_extent_header *eh)
  45. {
  46. struct ext4_inode_info *ei = EXT4_I(inode);
  47. __u32 csum;
  48. csum = ext4_chksum(ei->i_csum_seed, (__u8 *)eh,
  49. EXT4_EXTENT_TAIL_OFFSET(eh));
  50. return cpu_to_le32(csum);
  51. }
  52. static int ext4_extent_block_csum_verify(struct inode *inode,
  53. struct ext4_extent_header *eh)
  54. {
  55. struct ext4_extent_tail *et;
  56. if (!ext4_has_feature_metadata_csum(inode->i_sb))
  57. return 1;
  58. et = find_ext4_extent_tail(eh);
  59. if (et->et_checksum != ext4_extent_block_csum(inode, eh))
  60. return 0;
  61. return 1;
  62. }
  63. static void ext4_extent_block_csum_set(struct inode *inode,
  64. struct ext4_extent_header *eh)
  65. {
  66. struct ext4_extent_tail *et;
  67. if (!ext4_has_feature_metadata_csum(inode->i_sb))
  68. return;
  69. et = find_ext4_extent_tail(eh);
  70. et->et_checksum = ext4_extent_block_csum(inode, eh);
  71. }
  72. static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
  73. struct inode *inode,
  74. struct ext4_ext_path *path,
  75. ext4_lblk_t split, int flags);
  76. static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
  77. {
  78. /*
  79. * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
  80. * moment, get_block can be called only for blocks inside i_size since
  81. * page cache has been already dropped and writes are blocked by
  82. * i_rwsem. So we can safely drop the i_data_sem here.
  83. */
  84. BUG_ON(EXT4_JOURNAL(inode) == NULL);
  85. ext4_discard_preallocations(inode);
  86. up_write(&EXT4_I(inode)->i_data_sem);
  87. *dropped = 1;
  88. return 0;
  89. }
  90. static inline void ext4_ext_path_brelse(struct ext4_ext_path *path)
  91. {
  92. brelse(path->p_bh);
  93. path->p_bh = NULL;
  94. }
  95. static void ext4_ext_drop_refs(struct ext4_ext_path *path)
  96. {
  97. int depth, i;
  98. if (IS_ERR_OR_NULL(path))
  99. return;
  100. depth = path->p_depth;
  101. for (i = 0; i <= depth; i++, path++)
  102. ext4_ext_path_brelse(path);
  103. }
  104. void ext4_free_ext_path(struct ext4_ext_path *path)
  105. {
  106. if (IS_ERR_OR_NULL(path))
  107. return;
  108. ext4_ext_drop_refs(path);
  109. kfree(path);
  110. }
  111. /*
  112. * Make sure 'handle' has at least 'check_cred' credits. If not, restart
  113. * transaction with 'restart_cred' credits. The function drops i_data_sem
  114. * when restarting transaction and gets it after transaction is restarted.
  115. *
  116. * The function returns 0 on success, 1 if transaction had to be restarted,
  117. * and < 0 in case of fatal error.
  118. */
  119. int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
  120. int check_cred, int restart_cred,
  121. int revoke_cred)
  122. {
  123. int ret;
  124. int dropped = 0;
  125. ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred,
  126. revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped));
  127. if (dropped)
  128. down_write(&EXT4_I(inode)->i_data_sem);
  129. return ret;
  130. }
  131. /*
  132. * could return:
  133. * - EROFS
  134. * - ENOMEM
  135. */
  136. static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
  137. struct ext4_ext_path *path)
  138. {
  139. int err = 0;
  140. if (path->p_bh) {
  141. /* path points to block */
  142. BUFFER_TRACE(path->p_bh, "get_write_access");
  143. err = ext4_journal_get_write_access(handle, inode->i_sb,
  144. path->p_bh, EXT4_JTR_NONE);
  145. /*
  146. * The extent buffer's verified bit will be set again in
  147. * __ext4_ext_dirty(). We could leave an inconsistent
  148. * buffer if the extents updating procudure break off du
  149. * to some error happens, force to check it again.
  150. */
  151. if (!err)
  152. clear_buffer_verified(path->p_bh);
  153. }
  154. /* path points to leaf/index in inode body */
  155. /* we use in-core data, no need to protect them */
  156. return err;
  157. }
  158. /*
  159. * could return:
  160. * - EROFS
  161. * - ENOMEM
  162. * - EIO
  163. */
  164. int __ext4_ext_dirty(const char *where, unsigned int line,
  165. handle_t *handle, struct inode *inode,
  166. struct ext4_ext_path *path)
  167. {
  168. int err;
  169. KUNIT_STATIC_STUB_REDIRECT(__ext4_ext_dirty, where, line, handle, inode,
  170. path);
  171. WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
  172. if (path->p_bh) {
  173. ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
  174. /* path points to block */
  175. err = __ext4_handle_dirty_metadata(where, line, handle,
  176. inode, path->p_bh);
  177. /* Extents updating done, re-set verified flag */
  178. if (!err)
  179. set_buffer_verified(path->p_bh);
  180. } else {
  181. /* path points to leaf/index in inode body */
  182. err = ext4_mark_inode_dirty(handle, inode);
  183. }
  184. return err;
  185. }
  186. #define ext4_ext_dirty(handle, inode, path) \
  187. __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
  188. static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
  189. struct ext4_ext_path *path,
  190. ext4_lblk_t block)
  191. {
  192. if (path) {
  193. int depth = path->p_depth;
  194. struct ext4_extent *ex;
  195. /*
  196. * Try to predict block placement assuming that we are
  197. * filling in a file which will eventually be
  198. * non-sparse --- i.e., in the case of libbfd writing
  199. * an ELF object sections out-of-order but in a way
  200. * the eventually results in a contiguous object or
  201. * executable file, or some database extending a table
  202. * space file. However, this is actually somewhat
  203. * non-ideal if we are writing a sparse file such as
  204. * qemu or KVM writing a raw image file that is going
  205. * to stay fairly sparse, since it will end up
  206. * fragmenting the file system's free space. Maybe we
  207. * should have some hueristics or some way to allow
  208. * userspace to pass a hint to file system,
  209. * especially if the latter case turns out to be
  210. * common.
  211. */
  212. ex = path[depth].p_ext;
  213. if (ex) {
  214. ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
  215. ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
  216. if (block > ext_block)
  217. return ext_pblk + (block - ext_block);
  218. else
  219. return ext_pblk - (ext_block - block);
  220. }
  221. /* it looks like index is empty;
  222. * try to find starting block from index itself */
  223. if (path[depth].p_bh)
  224. return path[depth].p_bh->b_blocknr;
  225. }
  226. /* OK. use inode's group */
  227. return ext4_inode_to_goal_block(inode);
  228. }
  229. /*
  230. * Allocation for a meta data block
  231. */
  232. static ext4_fsblk_t
  233. ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
  234. struct ext4_ext_path *path,
  235. struct ext4_extent *ex, int *err, unsigned int flags)
  236. {
  237. ext4_fsblk_t goal, newblock;
  238. goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
  239. newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
  240. NULL, err);
  241. return newblock;
  242. }
  243. static inline int ext4_ext_space_block(struct inode *inode, int check)
  244. {
  245. int size;
  246. size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
  247. / sizeof(struct ext4_extent);
  248. #ifdef AGGRESSIVE_TEST
  249. if (!check && size > 6)
  250. size = 6;
  251. #endif
  252. return size;
  253. }
  254. static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
  255. {
  256. int size;
  257. size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
  258. / sizeof(struct ext4_extent_idx);
  259. #ifdef AGGRESSIVE_TEST
  260. if (!check && size > 5)
  261. size = 5;
  262. #endif
  263. return size;
  264. }
  265. static inline int ext4_ext_space_root(struct inode *inode, int check)
  266. {
  267. int size;
  268. size = sizeof(EXT4_I(inode)->i_data);
  269. size -= sizeof(struct ext4_extent_header);
  270. size /= sizeof(struct ext4_extent);
  271. #ifdef AGGRESSIVE_TEST
  272. if (!check && size > 3)
  273. size = 3;
  274. #endif
  275. return size;
  276. }
  277. static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
  278. {
  279. int size;
  280. size = sizeof(EXT4_I(inode)->i_data);
  281. size -= sizeof(struct ext4_extent_header);
  282. size /= sizeof(struct ext4_extent_idx);
  283. #ifdef AGGRESSIVE_TEST
  284. if (!check && size > 4)
  285. size = 4;
  286. #endif
  287. return size;
  288. }
  289. static inline struct ext4_ext_path *
  290. ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
  291. struct ext4_ext_path *path, ext4_lblk_t lblk,
  292. int nofail)
  293. {
  294. int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_SPLIT_NOMERGE;
  295. if (nofail)
  296. flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL;
  297. return ext4_split_extent_at(handle, inode, path, lblk, flags);
  298. }
  299. static int
  300. ext4_ext_max_entries(struct inode *inode, int depth)
  301. {
  302. int max;
  303. if (depth == ext_depth(inode)) {
  304. if (depth == 0)
  305. max = ext4_ext_space_root(inode, 1);
  306. else
  307. max = ext4_ext_space_root_idx(inode, 1);
  308. } else {
  309. if (depth == 0)
  310. max = ext4_ext_space_block(inode, 1);
  311. else
  312. max = ext4_ext_space_block_idx(inode, 1);
  313. }
  314. return max;
  315. }
  316. static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
  317. {
  318. ext4_fsblk_t block = ext4_ext_pblock(ext);
  319. int len = ext4_ext_get_actual_len(ext);
  320. ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
  321. /*
  322. * We allow neither:
  323. * - zero length
  324. * - overflow/wrap-around
  325. */
  326. if (lblock + len <= lblock)
  327. return 0;
  328. return ext4_inode_block_valid(inode, block, len);
  329. }
  330. static int ext4_valid_extent_idx(struct inode *inode,
  331. struct ext4_extent_idx *ext_idx)
  332. {
  333. ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
  334. return ext4_inode_block_valid(inode, block, 1);
  335. }
  336. static int ext4_valid_extent_entries(struct inode *inode,
  337. struct ext4_extent_header *eh,
  338. ext4_lblk_t lblk, ext4_fsblk_t *pblk,
  339. int depth)
  340. {
  341. unsigned short entries;
  342. ext4_lblk_t lblock = 0;
  343. ext4_lblk_t cur = 0;
  344. if (eh->eh_entries == 0)
  345. return 1;
  346. entries = le16_to_cpu(eh->eh_entries);
  347. if (depth == 0) {
  348. /* leaf entries */
  349. struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
  350. /*
  351. * The logical block in the first entry should equal to
  352. * the number in the index block.
  353. */
  354. if (depth != ext_depth(inode) &&
  355. lblk != le32_to_cpu(ext->ee_block))
  356. return 0;
  357. while (entries) {
  358. if (!ext4_valid_extent(inode, ext))
  359. return 0;
  360. /* Check for overlapping extents */
  361. lblock = le32_to_cpu(ext->ee_block);
  362. if (lblock < cur) {
  363. *pblk = ext4_ext_pblock(ext);
  364. return 0;
  365. }
  366. cur = lblock + ext4_ext_get_actual_len(ext);
  367. ext++;
  368. entries--;
  369. }
  370. } else {
  371. struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
  372. /*
  373. * The logical block in the first entry should equal to
  374. * the number in the parent index block.
  375. */
  376. if (depth != ext_depth(inode) &&
  377. lblk != le32_to_cpu(ext_idx->ei_block))
  378. return 0;
  379. while (entries) {
  380. if (!ext4_valid_extent_idx(inode, ext_idx))
  381. return 0;
  382. /* Check for overlapping index extents */
  383. lblock = le32_to_cpu(ext_idx->ei_block);
  384. if (lblock < cur) {
  385. *pblk = ext4_idx_pblock(ext_idx);
  386. return 0;
  387. }
  388. ext_idx++;
  389. entries--;
  390. cur = lblock + 1;
  391. }
  392. }
  393. return 1;
  394. }
  395. static int __ext4_ext_check(const char *function, unsigned int line,
  396. struct inode *inode, struct ext4_extent_header *eh,
  397. int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk)
  398. {
  399. const char *error_msg;
  400. int max = 0, err = -EFSCORRUPTED;
  401. if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
  402. error_msg = "invalid magic";
  403. goto corrupted;
  404. }
  405. if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
  406. error_msg = "unexpected eh_depth";
  407. goto corrupted;
  408. }
  409. if (unlikely(eh->eh_max == 0)) {
  410. error_msg = "invalid eh_max";
  411. goto corrupted;
  412. }
  413. max = ext4_ext_max_entries(inode, depth);
  414. if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
  415. error_msg = "too large eh_max";
  416. goto corrupted;
  417. }
  418. if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
  419. error_msg = "invalid eh_entries";
  420. goto corrupted;
  421. }
  422. if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
  423. error_msg = "eh_entries is 0 but eh_depth is > 0";
  424. goto corrupted;
  425. }
  426. if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
  427. error_msg = "invalid extent entries";
  428. goto corrupted;
  429. }
  430. if (unlikely(depth > 32)) {
  431. error_msg = "too large eh_depth";
  432. goto corrupted;
  433. }
  434. /* Verify checksum on non-root extent tree nodes */
  435. if (ext_depth(inode) != depth &&
  436. !ext4_extent_block_csum_verify(inode, eh)) {
  437. error_msg = "extent tree corrupted";
  438. err = -EFSBADCRC;
  439. goto corrupted;
  440. }
  441. return 0;
  442. corrupted:
  443. ext4_error_inode_err(inode, function, line, 0, -err,
  444. "pblk %llu bad header/extent: %s - magic %x, "
  445. "entries %u, max %u(%u), depth %u(%u)",
  446. (unsigned long long) pblk, error_msg,
  447. le16_to_cpu(eh->eh_magic),
  448. le16_to_cpu(eh->eh_entries),
  449. le16_to_cpu(eh->eh_max),
  450. max, le16_to_cpu(eh->eh_depth), depth);
  451. return err;
  452. }
  453. #define ext4_ext_check(inode, eh, depth, pblk) \
  454. __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0)
  455. int ext4_ext_check_inode(struct inode *inode)
  456. {
  457. return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
  458. }
  459. static void ext4_cache_extents(struct inode *inode,
  460. struct ext4_extent_header *eh)
  461. {
  462. struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
  463. ext4_lblk_t prev = 0;
  464. int i;
  465. KUNIT_STATIC_STUB_REDIRECT(ext4_cache_extents, inode, eh);
  466. for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
  467. unsigned int status = EXTENT_STATUS_WRITTEN;
  468. ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
  469. int len = ext4_ext_get_actual_len(ex);
  470. if (prev && (prev != lblk))
  471. ext4_es_cache_extent(inode, prev, lblk - prev, ~0,
  472. EXTENT_STATUS_HOLE);
  473. if (ext4_ext_is_unwritten(ex))
  474. status = EXTENT_STATUS_UNWRITTEN;
  475. ext4_es_cache_extent(inode, lblk, len,
  476. ext4_ext_pblock(ex), status);
  477. prev = lblk + len;
  478. }
  479. }
  480. static struct buffer_head *
  481. __read_extent_tree_block(const char *function, unsigned int line,
  482. struct inode *inode, struct ext4_extent_idx *idx,
  483. int depth, int flags)
  484. {
  485. struct buffer_head *bh;
  486. int err;
  487. gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS;
  488. ext4_fsblk_t pblk;
  489. if (flags & EXT4_EX_NOFAIL)
  490. gfp_flags |= __GFP_NOFAIL;
  491. pblk = ext4_idx_pblock(idx);
  492. bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags);
  493. if (unlikely(!bh))
  494. return ERR_PTR(-ENOMEM);
  495. if (!bh_uptodate_or_lock(bh)) {
  496. trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
  497. err = ext4_read_bh(bh, 0, NULL, false);
  498. if (err < 0)
  499. goto errout;
  500. }
  501. if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
  502. return bh;
  503. err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh),
  504. depth, pblk, le32_to_cpu(idx->ei_block));
  505. if (err)
  506. goto errout;
  507. set_buffer_verified(bh);
  508. /*
  509. * If this is a leaf block, cache all of its entries
  510. */
  511. if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
  512. struct ext4_extent_header *eh = ext_block_hdr(bh);
  513. ext4_cache_extents(inode, eh);
  514. }
  515. return bh;
  516. errout:
  517. put_bh(bh);
  518. return ERR_PTR(err);
  519. }
  520. #define read_extent_tree_block(inode, idx, depth, flags) \
  521. __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \
  522. (depth), (flags))
  523. /*
  524. * This function is called to cache a file's extent information in the
  525. * extent status tree
  526. */
  527. int ext4_ext_precache(struct inode *inode)
  528. {
  529. struct ext4_inode_info *ei = EXT4_I(inode);
  530. struct ext4_ext_path *path = NULL;
  531. struct buffer_head *bh;
  532. int i = 0, depth, ret = 0;
  533. if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
  534. return 0; /* not an extent-mapped inode */
  535. ext4_check_map_extents_env(inode);
  536. down_read(&ei->i_data_sem);
  537. depth = ext_depth(inode);
  538. /* Don't cache anything if there are no external extent blocks */
  539. if (!depth) {
  540. up_read(&ei->i_data_sem);
  541. return ret;
  542. }
  543. path = kzalloc_objs(struct ext4_ext_path, depth + 1, GFP_NOFS);
  544. if (path == NULL) {
  545. up_read(&ei->i_data_sem);
  546. return -ENOMEM;
  547. }
  548. path[0].p_hdr = ext_inode_hdr(inode);
  549. ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0);
  550. if (ret)
  551. goto out;
  552. path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr);
  553. while (i >= 0) {
  554. /*
  555. * If this is a leaf block or we've reached the end of
  556. * the index block, go up
  557. */
  558. if ((i == depth) ||
  559. path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) {
  560. ext4_ext_path_brelse(path + i);
  561. i--;
  562. continue;
  563. }
  564. bh = read_extent_tree_block(inode, path[i].p_idx++,
  565. depth - i - 1,
  566. EXT4_EX_FORCE_CACHE);
  567. if (IS_ERR(bh)) {
  568. ret = PTR_ERR(bh);
  569. break;
  570. }
  571. i++;
  572. path[i].p_bh = bh;
  573. path[i].p_hdr = ext_block_hdr(bh);
  574. path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr);
  575. }
  576. ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
  577. out:
  578. up_read(&ei->i_data_sem);
  579. ext4_free_ext_path(path);
  580. return ret;
  581. }
  582. #ifdef EXT_DEBUG
  583. static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
  584. {
  585. int k, l = path->p_depth;
  586. ext_debug(inode, "path:");
  587. for (k = 0; k <= l; k++, path++) {
  588. if (path->p_idx) {
  589. ext_debug(inode, " %d->%llu",
  590. le32_to_cpu(path->p_idx->ei_block),
  591. ext4_idx_pblock(path->p_idx));
  592. } else if (path->p_ext) {
  593. ext_debug(inode, " %d:[%d]%d:%llu ",
  594. le32_to_cpu(path->p_ext->ee_block),
  595. ext4_ext_is_unwritten(path->p_ext),
  596. ext4_ext_get_actual_len(path->p_ext),
  597. ext4_ext_pblock(path->p_ext));
  598. } else
  599. ext_debug(inode, " []");
  600. }
  601. ext_debug(inode, "\n");
  602. }
  603. static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
  604. {
  605. int depth = ext_depth(inode);
  606. struct ext4_extent_header *eh;
  607. struct ext4_extent *ex;
  608. int i;
  609. if (IS_ERR_OR_NULL(path))
  610. return;
  611. eh = path[depth].p_hdr;
  612. ex = EXT_FIRST_EXTENT(eh);
  613. ext_debug(inode, "Displaying leaf extents\n");
  614. for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
  615. ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
  616. ext4_ext_is_unwritten(ex),
  617. ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
  618. }
  619. ext_debug(inode, "\n");
  620. }
  621. static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
  622. ext4_fsblk_t newblock, int level)
  623. {
  624. int depth = ext_depth(inode);
  625. struct ext4_extent *ex;
  626. if (depth != level) {
  627. struct ext4_extent_idx *idx;
  628. idx = path[level].p_idx;
  629. while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
  630. ext_debug(inode, "%d: move %d:%llu in new index %llu\n",
  631. level, le32_to_cpu(idx->ei_block),
  632. ext4_idx_pblock(idx), newblock);
  633. idx++;
  634. }
  635. return;
  636. }
  637. ex = path[depth].p_ext;
  638. while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
  639. ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n",
  640. le32_to_cpu(ex->ee_block),
  641. ext4_ext_pblock(ex),
  642. ext4_ext_is_unwritten(ex),
  643. ext4_ext_get_actual_len(ex),
  644. newblock);
  645. ex++;
  646. }
  647. }
  648. #else
  649. #define ext4_ext_show_path(inode, path)
  650. #define ext4_ext_show_leaf(inode, path)
  651. #define ext4_ext_show_move(inode, path, newblock, level)
  652. #endif
  653. /*
  654. * ext4_ext_binsearch_idx:
  655. * binary search for the closest index of the given block
  656. * the header must be checked before calling this
  657. */
  658. static void
  659. ext4_ext_binsearch_idx(struct inode *inode,
  660. struct ext4_ext_path *path, ext4_lblk_t block)
  661. {
  662. struct ext4_extent_header *eh = path->p_hdr;
  663. struct ext4_extent_idx *r, *l, *m;
  664. ext_debug(inode, "binsearch for %u(idx): ", block);
  665. l = EXT_FIRST_INDEX(eh) + 1;
  666. r = EXT_LAST_INDEX(eh);
  667. while (l <= r) {
  668. m = l + (r - l) / 2;
  669. ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
  670. le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
  671. r, le32_to_cpu(r->ei_block));
  672. if (block < le32_to_cpu(m->ei_block))
  673. r = m - 1;
  674. else
  675. l = m + 1;
  676. }
  677. path->p_idx = l - 1;
  678. ext_debug(inode, " -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
  679. ext4_idx_pblock(path->p_idx));
  680. #ifdef CHECK_BINSEARCH
  681. {
  682. struct ext4_extent_idx *chix, *ix;
  683. int k;
  684. chix = ix = EXT_FIRST_INDEX(eh);
  685. for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
  686. if (k != 0 && le32_to_cpu(ix->ei_block) <=
  687. le32_to_cpu(ix[-1].ei_block)) {
  688. printk(KERN_DEBUG "k=%d, ix=0x%p, "
  689. "first=0x%p\n", k,
  690. ix, EXT_FIRST_INDEX(eh));
  691. printk(KERN_DEBUG "%u <= %u\n",
  692. le32_to_cpu(ix->ei_block),
  693. le32_to_cpu(ix[-1].ei_block));
  694. }
  695. BUG_ON(k && le32_to_cpu(ix->ei_block)
  696. <= le32_to_cpu(ix[-1].ei_block));
  697. if (block < le32_to_cpu(ix->ei_block))
  698. break;
  699. chix = ix;
  700. }
  701. BUG_ON(chix != path->p_idx);
  702. }
  703. #endif
  704. }
  705. /*
  706. * ext4_ext_binsearch:
  707. * binary search for closest extent of the given block
  708. * the header must be checked before calling this
  709. */
  710. static void
  711. ext4_ext_binsearch(struct inode *inode,
  712. struct ext4_ext_path *path, ext4_lblk_t block)
  713. {
  714. struct ext4_extent_header *eh = path->p_hdr;
  715. struct ext4_extent *r, *l, *m;
  716. if (eh->eh_entries == 0) {
  717. /*
  718. * this leaf is empty:
  719. * we get such a leaf in split/add case
  720. */
  721. return;
  722. }
  723. ext_debug(inode, "binsearch for %u: ", block);
  724. l = EXT_FIRST_EXTENT(eh) + 1;
  725. r = EXT_LAST_EXTENT(eh);
  726. while (l <= r) {
  727. m = l + (r - l) / 2;
  728. ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
  729. le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
  730. r, le32_to_cpu(r->ee_block));
  731. if (block < le32_to_cpu(m->ee_block))
  732. r = m - 1;
  733. else
  734. l = m + 1;
  735. }
  736. path->p_ext = l - 1;
  737. ext_debug(inode, " -> %d:%llu:[%d]%d ",
  738. le32_to_cpu(path->p_ext->ee_block),
  739. ext4_ext_pblock(path->p_ext),
  740. ext4_ext_is_unwritten(path->p_ext),
  741. ext4_ext_get_actual_len(path->p_ext));
  742. #ifdef CHECK_BINSEARCH
  743. {
  744. struct ext4_extent *chex, *ex;
  745. int k;
  746. chex = ex = EXT_FIRST_EXTENT(eh);
  747. for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
  748. BUG_ON(k && le32_to_cpu(ex->ee_block)
  749. <= le32_to_cpu(ex[-1].ee_block));
  750. if (block < le32_to_cpu(ex->ee_block))
  751. break;
  752. chex = ex;
  753. }
  754. BUG_ON(chex != path->p_ext);
  755. }
  756. #endif
  757. }
  758. void ext4_ext_tree_init(handle_t *handle, struct inode *inode)
  759. {
  760. struct ext4_extent_header *eh;
  761. eh = ext_inode_hdr(inode);
  762. eh->eh_depth = 0;
  763. eh->eh_entries = 0;
  764. eh->eh_magic = EXT4_EXT_MAGIC;
  765. eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
  766. eh->eh_generation = 0;
  767. ext4_mark_inode_dirty(handle, inode);
  768. }
  769. struct ext4_ext_path *
  770. ext4_find_extent(struct inode *inode, ext4_lblk_t block,
  771. struct ext4_ext_path *path, int flags)
  772. {
  773. struct ext4_extent_header *eh;
  774. struct buffer_head *bh;
  775. short int depth, i, ppos = 0;
  776. int ret;
  777. gfp_t gfp_flags = GFP_NOFS;
  778. KUNIT_STATIC_STUB_REDIRECT(ext4_find_extent, inode, block, path, flags);
  779. if (flags & EXT4_EX_NOFAIL)
  780. gfp_flags |= __GFP_NOFAIL;
  781. eh = ext_inode_hdr(inode);
  782. depth = ext_depth(inode);
  783. if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) {
  784. EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d",
  785. depth);
  786. ret = -EFSCORRUPTED;
  787. goto err;
  788. }
  789. if (path) {
  790. ext4_ext_drop_refs(path);
  791. if (depth > path[0].p_maxdepth) {
  792. kfree(path);
  793. path = NULL;
  794. }
  795. }
  796. if (!path) {
  797. /* account possible depth increase */
  798. path = kzalloc_objs(struct ext4_ext_path, depth + 2, gfp_flags);
  799. if (unlikely(!path))
  800. return ERR_PTR(-ENOMEM);
  801. path[0].p_maxdepth = depth + 1;
  802. }
  803. path[0].p_hdr = eh;
  804. path[0].p_bh = NULL;
  805. i = depth;
  806. if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
  807. ext4_cache_extents(inode, eh);
  808. /* walk through the tree */
  809. while (i) {
  810. ext_debug(inode, "depth %d: num %d, max %d\n",
  811. ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
  812. ext4_ext_binsearch_idx(inode, path + ppos, block);
  813. path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
  814. path[ppos].p_depth = i;
  815. path[ppos].p_ext = NULL;
  816. bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags);
  817. if (IS_ERR(bh)) {
  818. ret = PTR_ERR(bh);
  819. goto err;
  820. }
  821. eh = ext_block_hdr(bh);
  822. ppos++;
  823. path[ppos].p_bh = bh;
  824. path[ppos].p_hdr = eh;
  825. }
  826. path[ppos].p_depth = i;
  827. path[ppos].p_ext = NULL;
  828. path[ppos].p_idx = NULL;
  829. /* find extent */
  830. ext4_ext_binsearch(inode, path + ppos, block);
  831. /* if not an empty leaf */
  832. if (path[ppos].p_ext)
  833. path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
  834. ext4_ext_show_path(inode, path);
  835. return path;
  836. err:
  837. ext4_free_ext_path(path);
  838. return ERR_PTR(ret);
  839. }
  840. /*
  841. * ext4_ext_insert_index:
  842. * insert new index [@logical;@ptr] into the block at @curp;
  843. * check where to insert: before @curp or after @curp
  844. */
  845. static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
  846. struct ext4_ext_path *curp,
  847. int logical, ext4_fsblk_t ptr)
  848. {
  849. struct ext4_extent_idx *ix;
  850. int len, err;
  851. err = ext4_ext_get_access(handle, inode, curp);
  852. if (err)
  853. return err;
  854. if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
  855. EXT4_ERROR_INODE(inode,
  856. "logical %d == ei_block %d!",
  857. logical, le32_to_cpu(curp->p_idx->ei_block));
  858. return -EFSCORRUPTED;
  859. }
  860. if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
  861. >= le16_to_cpu(curp->p_hdr->eh_max))) {
  862. EXT4_ERROR_INODE(inode,
  863. "eh_entries %d >= eh_max %d!",
  864. le16_to_cpu(curp->p_hdr->eh_entries),
  865. le16_to_cpu(curp->p_hdr->eh_max));
  866. return -EFSCORRUPTED;
  867. }
  868. if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
  869. /* insert after */
  870. ext_debug(inode, "insert new index %d after: %llu\n",
  871. logical, ptr);
  872. ix = curp->p_idx + 1;
  873. } else {
  874. /* insert before */
  875. ext_debug(inode, "insert new index %d before: %llu\n",
  876. logical, ptr);
  877. ix = curp->p_idx;
  878. }
  879. if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
  880. EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
  881. return -EFSCORRUPTED;
  882. }
  883. len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
  884. BUG_ON(len < 0);
  885. if (len > 0) {
  886. ext_debug(inode, "insert new index %d: "
  887. "move %d indices from 0x%p to 0x%p\n",
  888. logical, len, ix, ix + 1);
  889. memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
  890. }
  891. ix->ei_block = cpu_to_le32(logical);
  892. ext4_idx_store_pblock(ix, ptr);
  893. le16_add_cpu(&curp->p_hdr->eh_entries, 1);
  894. if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
  895. EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
  896. return -EFSCORRUPTED;
  897. }
  898. err = ext4_ext_dirty(handle, inode, curp);
  899. ext4_std_error(inode->i_sb, err);
  900. return err;
  901. }
  902. /*
  903. * ext4_ext_split:
  904. * inserts new subtree into the path, using free index entry
  905. * at depth @at:
  906. * - allocates all needed blocks (new leaf and all intermediate index blocks)
  907. * - makes decision where to split
  908. * - moves remaining extents and index entries (right to the split point)
  909. * into the newly allocated blocks
  910. * - initializes subtree
  911. */
  912. static int ext4_ext_split(handle_t *handle, struct inode *inode,
  913. unsigned int flags,
  914. struct ext4_ext_path *path,
  915. struct ext4_extent *newext, int at)
  916. {
  917. struct buffer_head *bh = NULL;
  918. int depth = ext_depth(inode);
  919. struct ext4_extent_header *neh;
  920. struct ext4_extent_idx *fidx;
  921. int i = at, k, m, a;
  922. ext4_fsblk_t newblock, oldblock;
  923. __le32 border;
  924. ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
  925. gfp_t gfp_flags = GFP_NOFS;
  926. int err = 0;
  927. size_t ext_size = 0;
  928. if (flags & EXT4_EX_NOFAIL)
  929. gfp_flags |= __GFP_NOFAIL;
  930. /* make decision: where to split? */
  931. /* FIXME: now decision is simplest: at current extent */
  932. /* if current leaf will be split, then we should use
  933. * border from split point */
  934. if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
  935. EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
  936. return -EFSCORRUPTED;
  937. }
  938. if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
  939. border = path[depth].p_ext[1].ee_block;
  940. ext_debug(inode, "leaf will be split."
  941. " next leaf starts at %d\n",
  942. le32_to_cpu(border));
  943. } else {
  944. border = newext->ee_block;
  945. ext_debug(inode, "leaf will be added."
  946. " next leaf starts at %d\n",
  947. le32_to_cpu(border));
  948. }
  949. /*
  950. * If error occurs, then we break processing
  951. * and mark filesystem read-only. index won't
  952. * be inserted and tree will be in consistent
  953. * state. Next mount will repair buffers too.
  954. */
  955. /*
  956. * Get array to track all allocated blocks.
  957. * We need this to handle errors and free blocks
  958. * upon them.
  959. */
  960. ablocks = kzalloc_objs(ext4_fsblk_t, depth, gfp_flags);
  961. if (!ablocks)
  962. return -ENOMEM;
  963. /* allocate all needed blocks */
  964. ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at);
  965. for (a = 0; a < depth - at; a++) {
  966. newblock = ext4_ext_new_meta_block(handle, inode, path,
  967. newext, &err, flags);
  968. if (newblock == 0)
  969. goto cleanup;
  970. ablocks[a] = newblock;
  971. }
  972. /* initialize new leaf */
  973. newblock = ablocks[--a];
  974. if (unlikely(newblock == 0)) {
  975. EXT4_ERROR_INODE(inode, "newblock == 0!");
  976. err = -EFSCORRUPTED;
  977. goto cleanup;
  978. }
  979. bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
  980. if (unlikely(!bh)) {
  981. err = -ENOMEM;
  982. goto cleanup;
  983. }
  984. lock_buffer(bh);
  985. err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
  986. EXT4_JTR_NONE);
  987. if (err)
  988. goto cleanup;
  989. neh = ext_block_hdr(bh);
  990. neh->eh_entries = 0;
  991. neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
  992. neh->eh_magic = EXT4_EXT_MAGIC;
  993. neh->eh_depth = 0;
  994. neh->eh_generation = 0;
  995. /* move remainder of path[depth] to the new leaf */
  996. if (unlikely(path[depth].p_hdr->eh_entries !=
  997. path[depth].p_hdr->eh_max)) {
  998. EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
  999. path[depth].p_hdr->eh_entries,
  1000. path[depth].p_hdr->eh_max);
  1001. err = -EFSCORRUPTED;
  1002. goto cleanup;
  1003. }
  1004. /* start copy from next extent */
  1005. m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++;
  1006. ext4_ext_show_move(inode, path, newblock, depth);
  1007. if (m) {
  1008. struct ext4_extent *ex;
  1009. ex = EXT_FIRST_EXTENT(neh);
  1010. memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);
  1011. le16_add_cpu(&neh->eh_entries, m);
  1012. }
  1013. /* zero out unused area in the extent block */
  1014. ext_size = sizeof(struct ext4_extent_header) +
  1015. sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries);
  1016. memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
  1017. ext4_extent_block_csum_set(inode, neh);
  1018. set_buffer_uptodate(bh);
  1019. unlock_buffer(bh);
  1020. err = ext4_handle_dirty_metadata(handle, inode, bh);
  1021. if (err)
  1022. goto cleanup;
  1023. brelse(bh);
  1024. bh = NULL;
  1025. /* correct old leaf */
  1026. if (m) {
  1027. err = ext4_ext_get_access(handle, inode, path + depth);
  1028. if (err)
  1029. goto cleanup;
  1030. le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
  1031. err = ext4_ext_dirty(handle, inode, path + depth);
  1032. if (err)
  1033. goto cleanup;
  1034. }
  1035. /* create intermediate indexes */
  1036. k = depth - at - 1;
  1037. if (unlikely(k < 0)) {
  1038. EXT4_ERROR_INODE(inode, "k %d < 0!", k);
  1039. err = -EFSCORRUPTED;
  1040. goto cleanup;
  1041. }
  1042. if (k)
  1043. ext_debug(inode, "create %d intermediate indices\n", k);
  1044. /* insert new index into current index block */
  1045. /* current depth stored in i var */
  1046. i = depth - 1;
  1047. while (k--) {
  1048. oldblock = newblock;
  1049. newblock = ablocks[--a];
  1050. bh = sb_getblk(inode->i_sb, newblock);
  1051. if (unlikely(!bh)) {
  1052. err = -ENOMEM;
  1053. goto cleanup;
  1054. }
  1055. lock_buffer(bh);
  1056. err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
  1057. EXT4_JTR_NONE);
  1058. if (err)
  1059. goto cleanup;
  1060. neh = ext_block_hdr(bh);
  1061. neh->eh_entries = cpu_to_le16(1);
  1062. neh->eh_magic = EXT4_EXT_MAGIC;
  1063. neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
  1064. neh->eh_depth = cpu_to_le16(depth - i);
  1065. neh->eh_generation = 0;
  1066. fidx = EXT_FIRST_INDEX(neh);
  1067. fidx->ei_block = border;
  1068. ext4_idx_store_pblock(fidx, oldblock);
  1069. ext_debug(inode, "int.index at %d (block %llu): %u -> %llu\n",
  1070. i, newblock, le32_to_cpu(border), oldblock);
  1071. /* move remainder of path[i] to the new index block */
  1072. if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
  1073. EXT_LAST_INDEX(path[i].p_hdr))) {
  1074. EXT4_ERROR_INODE(inode,
  1075. "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
  1076. le32_to_cpu(path[i].p_ext->ee_block));
  1077. err = -EFSCORRUPTED;
  1078. goto cleanup;
  1079. }
  1080. /* start copy indexes */
  1081. m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
  1082. ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
  1083. EXT_MAX_INDEX(path[i].p_hdr));
  1084. ext4_ext_show_move(inode, path, newblock, i);
  1085. if (m) {
  1086. memmove(++fidx, path[i].p_idx,
  1087. sizeof(struct ext4_extent_idx) * m);
  1088. le16_add_cpu(&neh->eh_entries, m);
  1089. }
  1090. /* zero out unused area in the extent block */
  1091. ext_size = sizeof(struct ext4_extent_header) +
  1092. (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries));
  1093. memset(bh->b_data + ext_size, 0,
  1094. inode->i_sb->s_blocksize - ext_size);
  1095. ext4_extent_block_csum_set(inode, neh);
  1096. set_buffer_uptodate(bh);
  1097. unlock_buffer(bh);
  1098. err = ext4_handle_dirty_metadata(handle, inode, bh);
  1099. if (err)
  1100. goto cleanup;
  1101. brelse(bh);
  1102. bh = NULL;
  1103. /* correct old index */
  1104. if (m) {
  1105. err = ext4_ext_get_access(handle, inode, path + i);
  1106. if (err)
  1107. goto cleanup;
  1108. le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
  1109. err = ext4_ext_dirty(handle, inode, path + i);
  1110. if (err)
  1111. goto cleanup;
  1112. }
  1113. i--;
  1114. }
  1115. /* insert new index */
  1116. err = ext4_ext_insert_index(handle, inode, path + at,
  1117. le32_to_cpu(border), newblock);
  1118. cleanup:
  1119. if (bh) {
  1120. if (buffer_locked(bh))
  1121. unlock_buffer(bh);
  1122. brelse(bh);
  1123. }
  1124. if (err) {
  1125. /* free all allocated blocks in error case */
  1126. for (i = 0; i < depth; i++) {
  1127. if (!ablocks[i])
  1128. continue;
  1129. ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
  1130. EXT4_FREE_BLOCKS_METADATA);
  1131. }
  1132. }
  1133. kfree(ablocks);
  1134. return err;
  1135. }
  1136. /*
  1137. * ext4_ext_grow_indepth:
  1138. * implements tree growing procedure:
  1139. * - allocates new block
  1140. * - moves top-level data (index block or leaf) into the new block
  1141. * - initializes new top-level, creating index that points to the
  1142. * just created block
  1143. */
  1144. static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
  1145. unsigned int flags)
  1146. {
  1147. struct ext4_extent_header *neh;
  1148. struct buffer_head *bh;
  1149. ext4_fsblk_t newblock, goal = 0;
  1150. struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
  1151. int err = 0;
  1152. size_t ext_size = 0;
  1153. /* Try to prepend new index to old one */
  1154. if (ext_depth(inode))
  1155. goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
  1156. if (goal > le32_to_cpu(es->s_first_data_block)) {
  1157. flags |= EXT4_MB_HINT_TRY_GOAL;
  1158. goal--;
  1159. } else
  1160. goal = ext4_inode_to_goal_block(inode);
  1161. newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
  1162. NULL, &err);
  1163. if (newblock == 0)
  1164. return err;
  1165. bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
  1166. if (unlikely(!bh))
  1167. return -ENOMEM;
  1168. lock_buffer(bh);
  1169. err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
  1170. EXT4_JTR_NONE);
  1171. if (err) {
  1172. unlock_buffer(bh);
  1173. goto out;
  1174. }
  1175. ext_size = sizeof(EXT4_I(inode)->i_data);
  1176. /* move top-level index/leaf into new block */
  1177. memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size);
  1178. /* zero out unused area in the extent block */
  1179. memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
  1180. /* set size of new block */
  1181. neh = ext_block_hdr(bh);
  1182. /* old root could have indexes or leaves
  1183. * so calculate e_max right way */
  1184. if (ext_depth(inode))
  1185. neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
  1186. else
  1187. neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
  1188. neh->eh_magic = EXT4_EXT_MAGIC;
  1189. ext4_extent_block_csum_set(inode, neh);
  1190. set_buffer_uptodate(bh);
  1191. set_buffer_verified(bh);
  1192. unlock_buffer(bh);
  1193. err = ext4_handle_dirty_metadata(handle, inode, bh);
  1194. if (err)
  1195. goto out;
  1196. /* Update top-level index: num,max,pointer */
  1197. neh = ext_inode_hdr(inode);
  1198. neh->eh_entries = cpu_to_le16(1);
  1199. ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
  1200. if (neh->eh_depth == 0) {
  1201. /* Root extent block becomes index block */
  1202. neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
  1203. EXT_FIRST_INDEX(neh)->ei_block =
  1204. EXT_FIRST_EXTENT(neh)->ee_block;
  1205. }
  1206. ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %llu\n",
  1207. le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
  1208. le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
  1209. ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
  1210. le16_add_cpu(&neh->eh_depth, 1);
  1211. err = ext4_mark_inode_dirty(handle, inode);
  1212. out:
  1213. brelse(bh);
  1214. return err;
  1215. }
  1216. /*
  1217. * ext4_ext_create_new_leaf:
  1218. * finds empty index and adds new leaf.
  1219. * if no free index is found, then it requests in-depth growing.
  1220. */
  1221. static struct ext4_ext_path *
  1222. ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
  1223. unsigned int mb_flags, unsigned int gb_flags,
  1224. struct ext4_ext_path *path,
  1225. struct ext4_extent *newext)
  1226. {
  1227. struct ext4_ext_path *curp;
  1228. int depth, i, err = 0;
  1229. ext4_lblk_t ee_block = le32_to_cpu(newext->ee_block);
  1230. repeat:
  1231. i = depth = ext_depth(inode);
  1232. /* walk up to the tree and look for free index entry */
  1233. curp = path + depth;
  1234. while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
  1235. i--;
  1236. curp--;
  1237. }
  1238. /* we use already allocated block for index block,
  1239. * so subsequent data blocks should be contiguous */
  1240. if (EXT_HAS_FREE_INDEX(curp)) {
  1241. /* if we found index with free entry, then use that
  1242. * entry: create all needed subtree and add new leaf */
  1243. err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
  1244. if (err)
  1245. goto errout;
  1246. /* refill path */
  1247. path = ext4_find_extent(inode, ee_block, path, gb_flags);
  1248. return path;
  1249. }
  1250. /* tree is full, time to grow in depth */
  1251. err = ext4_ext_grow_indepth(handle, inode, mb_flags);
  1252. if (err)
  1253. goto errout;
  1254. /* refill path */
  1255. path = ext4_find_extent(inode, ee_block, path, gb_flags);
  1256. if (IS_ERR(path))
  1257. return path;
  1258. /*
  1259. * only first (depth 0 -> 1) produces free space;
  1260. * in all other cases we have to split the grown tree
  1261. */
  1262. depth = ext_depth(inode);
  1263. if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
  1264. /* now we need to split */
  1265. goto repeat;
  1266. }
  1267. return path;
  1268. errout:
  1269. ext4_free_ext_path(path);
  1270. return ERR_PTR(err);
  1271. }
  1272. /*
  1273. * search the closest allocated block to the left for *logical
  1274. * and returns it at @logical + it's physical address at @phys
  1275. * if *logical is the smallest allocated block, the function
  1276. * returns 0 at @phys
  1277. * return value contains 0 (success) or error code
  1278. */
  1279. static int ext4_ext_search_left(struct inode *inode,
  1280. struct ext4_ext_path *path,
  1281. ext4_lblk_t *logical, ext4_fsblk_t *phys)
  1282. {
  1283. struct ext4_extent_idx *ix;
  1284. struct ext4_extent *ex;
  1285. int depth, ee_len;
  1286. if (unlikely(path == NULL)) {
  1287. EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
  1288. return -EFSCORRUPTED;
  1289. }
  1290. depth = path->p_depth;
  1291. *phys = 0;
  1292. if (depth == 0 && path->p_ext == NULL)
  1293. return 0;
  1294. /* usually extent in the path covers blocks smaller
  1295. * then *logical, but it can be that extent is the
  1296. * first one in the file */
  1297. ex = path[depth].p_ext;
  1298. ee_len = ext4_ext_get_actual_len(ex);
  1299. if (*logical < le32_to_cpu(ex->ee_block)) {
  1300. if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
  1301. EXT4_ERROR_INODE(inode,
  1302. "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
  1303. *logical, le32_to_cpu(ex->ee_block));
  1304. return -EFSCORRUPTED;
  1305. }
  1306. while (--depth >= 0) {
  1307. ix = path[depth].p_idx;
  1308. if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
  1309. EXT4_ERROR_INODE(inode,
  1310. "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
  1311. ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
  1312. le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block),
  1313. depth);
  1314. return -EFSCORRUPTED;
  1315. }
  1316. }
  1317. return 0;
  1318. }
  1319. if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
  1320. EXT4_ERROR_INODE(inode,
  1321. "logical %d < ee_block %d + ee_len %d!",
  1322. *logical, le32_to_cpu(ex->ee_block), ee_len);
  1323. return -EFSCORRUPTED;
  1324. }
  1325. *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
  1326. *phys = ext4_ext_pblock(ex) + ee_len - 1;
  1327. return 0;
  1328. }
  1329. /*
  1330. * Search the closest allocated block to the right for *logical
  1331. * and returns it at @logical + it's physical address at @phys.
  1332. * If not exists, return 0 and @phys is set to 0. We will return
  1333. * 1 which means we found an allocated block and ret_ex is valid.
  1334. * Or return a (< 0) error code.
  1335. */
  1336. static int ext4_ext_search_right(struct inode *inode,
  1337. struct ext4_ext_path *path,
  1338. ext4_lblk_t *logical, ext4_fsblk_t *phys,
  1339. struct ext4_extent *ret_ex, int flags)
  1340. {
  1341. struct buffer_head *bh = NULL;
  1342. struct ext4_extent_header *eh;
  1343. struct ext4_extent_idx *ix;
  1344. struct ext4_extent *ex;
  1345. int depth; /* Note, NOT eh_depth; depth from top of tree */
  1346. int ee_len;
  1347. if (unlikely(path == NULL)) {
  1348. EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
  1349. return -EFSCORRUPTED;
  1350. }
  1351. depth = path->p_depth;
  1352. *phys = 0;
  1353. if (depth == 0 && path->p_ext == NULL)
  1354. return 0;
  1355. /* usually extent in the path covers blocks smaller
  1356. * then *logical, but it can be that extent is the
  1357. * first one in the file */
  1358. ex = path[depth].p_ext;
  1359. ee_len = ext4_ext_get_actual_len(ex);
  1360. if (*logical < le32_to_cpu(ex->ee_block)) {
  1361. if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
  1362. EXT4_ERROR_INODE(inode,
  1363. "first_extent(path[%d].p_hdr) != ex",
  1364. depth);
  1365. return -EFSCORRUPTED;
  1366. }
  1367. while (--depth >= 0) {
  1368. ix = path[depth].p_idx;
  1369. if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
  1370. EXT4_ERROR_INODE(inode,
  1371. "ix != EXT_FIRST_INDEX *logical %d!",
  1372. *logical);
  1373. return -EFSCORRUPTED;
  1374. }
  1375. }
  1376. goto found_extent;
  1377. }
  1378. if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
  1379. EXT4_ERROR_INODE(inode,
  1380. "logical %d < ee_block %d + ee_len %d!",
  1381. *logical, le32_to_cpu(ex->ee_block), ee_len);
  1382. return -EFSCORRUPTED;
  1383. }
  1384. if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
  1385. /* next allocated block in this leaf */
  1386. ex++;
  1387. goto found_extent;
  1388. }
  1389. /* go up and search for index to the right */
  1390. while (--depth >= 0) {
  1391. ix = path[depth].p_idx;
  1392. if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
  1393. goto got_index;
  1394. }
  1395. /* we've gone up to the root and found no index to the right */
  1396. return 0;
  1397. got_index:
  1398. /* we've found index to the right, let's
  1399. * follow it and find the closest allocated
  1400. * block to the right */
  1401. ix++;
  1402. while (++depth < path->p_depth) {
  1403. /* subtract from p_depth to get proper eh_depth */
  1404. bh = read_extent_tree_block(inode, ix, path->p_depth - depth,
  1405. flags);
  1406. if (IS_ERR(bh))
  1407. return PTR_ERR(bh);
  1408. eh = ext_block_hdr(bh);
  1409. ix = EXT_FIRST_INDEX(eh);
  1410. put_bh(bh);
  1411. }
  1412. bh = read_extent_tree_block(inode, ix, path->p_depth - depth, flags);
  1413. if (IS_ERR(bh))
  1414. return PTR_ERR(bh);
  1415. eh = ext_block_hdr(bh);
  1416. ex = EXT_FIRST_EXTENT(eh);
  1417. found_extent:
  1418. *logical = le32_to_cpu(ex->ee_block);
  1419. *phys = ext4_ext_pblock(ex);
  1420. if (ret_ex)
  1421. *ret_ex = *ex;
  1422. if (bh)
  1423. put_bh(bh);
  1424. return 1;
  1425. }
  1426. /*
  1427. * ext4_ext_next_allocated_block:
  1428. * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
  1429. * NOTE: it considers block number from index entry as
  1430. * allocated block. Thus, index entries have to be consistent
  1431. * with leaves.
  1432. */
  1433. ext4_lblk_t
  1434. ext4_ext_next_allocated_block(struct ext4_ext_path *path)
  1435. {
  1436. int depth;
  1437. BUG_ON(path == NULL);
  1438. depth = path->p_depth;
  1439. if (depth == 0 && path->p_ext == NULL)
  1440. return EXT_MAX_BLOCKS;
  1441. while (depth >= 0) {
  1442. struct ext4_ext_path *p = &path[depth];
  1443. if (depth == path->p_depth) {
  1444. /* leaf */
  1445. if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr))
  1446. return le32_to_cpu(p->p_ext[1].ee_block);
  1447. } else {
  1448. /* index */
  1449. if (p->p_idx != EXT_LAST_INDEX(p->p_hdr))
  1450. return le32_to_cpu(p->p_idx[1].ei_block);
  1451. }
  1452. depth--;
  1453. }
  1454. return EXT_MAX_BLOCKS;
  1455. }
  1456. /*
  1457. * ext4_ext_next_leaf_block:
  1458. * returns first allocated block from next leaf or EXT_MAX_BLOCKS
  1459. */
  1460. static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
  1461. {
  1462. int depth;
  1463. BUG_ON(path == NULL);
  1464. depth = path->p_depth;
  1465. /* zero-tree has no leaf blocks at all */
  1466. if (depth == 0)
  1467. return EXT_MAX_BLOCKS;
  1468. /* go to index block */
  1469. depth--;
  1470. while (depth >= 0) {
  1471. if (path[depth].p_idx !=
  1472. EXT_LAST_INDEX(path[depth].p_hdr))
  1473. return (ext4_lblk_t)
  1474. le32_to_cpu(path[depth].p_idx[1].ei_block);
  1475. depth--;
  1476. }
  1477. return EXT_MAX_BLOCKS;
  1478. }
  1479. /*
  1480. * ext4_ext_correct_indexes:
  1481. * if leaf gets modified and modified extent is first in the leaf,
  1482. * then we have to correct all indexes above.
  1483. * TODO: do we need to correct tree in all cases?
  1484. */
  1485. static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
  1486. struct ext4_ext_path *path)
  1487. {
  1488. struct ext4_extent_header *eh;
  1489. int depth = ext_depth(inode);
  1490. struct ext4_extent *ex;
  1491. __le32 border;
  1492. int k, err = 0;
  1493. eh = path[depth].p_hdr;
  1494. ex = path[depth].p_ext;
  1495. if (unlikely(ex == NULL || eh == NULL)) {
  1496. EXT4_ERROR_INODE(inode,
  1497. "ex %p == NULL or eh %p == NULL", ex, eh);
  1498. return -EFSCORRUPTED;
  1499. }
  1500. if (depth == 0) {
  1501. /* there is no tree at all */
  1502. return 0;
  1503. }
  1504. if (ex != EXT_FIRST_EXTENT(eh)) {
  1505. /* we correct tree if first leaf got modified only */
  1506. return 0;
  1507. }
  1508. /*
  1509. * TODO: we need correction if border is smaller than current one
  1510. */
  1511. k = depth - 1;
  1512. border = path[depth].p_ext->ee_block;
  1513. err = ext4_ext_get_access(handle, inode, path + k);
  1514. if (err)
  1515. return err;
  1516. if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) {
  1517. EXT4_ERROR_INODE(inode,
  1518. "path[%d].p_idx %p > EXT_LAST_INDEX %p",
  1519. k, path[k].p_idx,
  1520. EXT_LAST_INDEX(path[k].p_hdr));
  1521. return -EFSCORRUPTED;
  1522. }
  1523. path[k].p_idx->ei_block = border;
  1524. err = ext4_ext_dirty(handle, inode, path + k);
  1525. if (err)
  1526. return err;
  1527. while (k--) {
  1528. /* change all left-side indexes */
  1529. if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
  1530. break;
  1531. err = ext4_ext_get_access(handle, inode, path + k);
  1532. if (err)
  1533. goto clean;
  1534. if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) {
  1535. EXT4_ERROR_INODE(inode,
  1536. "path[%d].p_idx %p > EXT_LAST_INDEX %p",
  1537. k, path[k].p_idx,
  1538. EXT_LAST_INDEX(path[k].p_hdr));
  1539. err = -EFSCORRUPTED;
  1540. goto clean;
  1541. }
  1542. path[k].p_idx->ei_block = border;
  1543. err = ext4_ext_dirty(handle, inode, path + k);
  1544. if (err)
  1545. goto clean;
  1546. }
  1547. return 0;
  1548. clean:
  1549. /*
  1550. * The path[k].p_bh is either unmodified or with no verified bit
  1551. * set (see ext4_ext_get_access()). So just clear the verified bit
  1552. * of the successfully modified extents buffers, which will force
  1553. * these extents to be checked to avoid using inconsistent data.
  1554. */
  1555. while (++k < depth)
  1556. clear_buffer_verified(path[k].p_bh);
  1557. return err;
  1558. }
  1559. static int ext4_can_extents_be_merged(struct inode *inode,
  1560. struct ext4_extent *ex1,
  1561. struct ext4_extent *ex2)
  1562. {
  1563. unsigned short ext1_ee_len, ext2_ee_len;
  1564. if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
  1565. return 0;
  1566. ext1_ee_len = ext4_ext_get_actual_len(ex1);
  1567. ext2_ee_len = ext4_ext_get_actual_len(ex2);
  1568. if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
  1569. le32_to_cpu(ex2->ee_block))
  1570. return 0;
  1571. if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
  1572. return 0;
  1573. if (ext4_ext_is_unwritten(ex1) &&
  1574. ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)
  1575. return 0;
  1576. #ifdef AGGRESSIVE_TEST
  1577. if (ext1_ee_len >= 4)
  1578. return 0;
  1579. #endif
  1580. if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
  1581. return 1;
  1582. return 0;
  1583. }
  1584. /*
  1585. * This function tries to merge the "ex" extent to the next extent in the tree.
  1586. * It always tries to merge towards right. If you want to merge towards
  1587. * left, pass "ex - 1" as argument instead of "ex".
  1588. * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
  1589. * 1 if they got merged.
  1590. */
  1591. static int ext4_ext_try_to_merge_right(struct inode *inode,
  1592. struct ext4_ext_path *path,
  1593. struct ext4_extent *ex)
  1594. {
  1595. struct ext4_extent_header *eh;
  1596. unsigned int depth, len;
  1597. int merge_done = 0, unwritten;
  1598. depth = ext_depth(inode);
  1599. BUG_ON(path[depth].p_hdr == NULL);
  1600. eh = path[depth].p_hdr;
  1601. while (ex < EXT_LAST_EXTENT(eh)) {
  1602. if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
  1603. break;
  1604. /* merge with next extent! */
  1605. unwritten = ext4_ext_is_unwritten(ex);
  1606. ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
  1607. + ext4_ext_get_actual_len(ex + 1));
  1608. if (unwritten)
  1609. ext4_ext_mark_unwritten(ex);
  1610. if (ex + 1 < EXT_LAST_EXTENT(eh)) {
  1611. len = (EXT_LAST_EXTENT(eh) - ex - 1)
  1612. * sizeof(struct ext4_extent);
  1613. memmove(ex + 1, ex + 2, len);
  1614. }
  1615. le16_add_cpu(&eh->eh_entries, -1);
  1616. merge_done = 1;
  1617. WARN_ON(eh->eh_entries == 0);
  1618. if (!eh->eh_entries)
  1619. EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
  1620. }
  1621. return merge_done;
  1622. }
  1623. /*
  1624. * This function does a very simple check to see if we can collapse
  1625. * an extent tree with a single extent tree leaf block into the inode.
  1626. */
  1627. static void ext4_ext_try_to_merge_up(handle_t *handle,
  1628. struct inode *inode,
  1629. struct ext4_ext_path *path)
  1630. {
  1631. size_t s;
  1632. unsigned max_root = ext4_ext_space_root(inode, 0);
  1633. ext4_fsblk_t blk;
  1634. if ((path[0].p_depth != 1) ||
  1635. (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
  1636. (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
  1637. return;
  1638. /*
  1639. * We need to modify the block allocation bitmap and the block
  1640. * group descriptor to release the extent tree block. If we
  1641. * can't get the journal credits, give up.
  1642. */
  1643. if (ext4_journal_extend(handle, 2,
  1644. ext4_free_metadata_revoke_credits(inode->i_sb, 1)))
  1645. return;
  1646. /*
  1647. * Copy the extent data up to the inode
  1648. */
  1649. blk = ext4_idx_pblock(path[0].p_idx);
  1650. s = le16_to_cpu(path[1].p_hdr->eh_entries) *
  1651. sizeof(struct ext4_extent_idx);
  1652. s += sizeof(struct ext4_extent_header);
  1653. path[1].p_maxdepth = path[0].p_maxdepth;
  1654. memcpy(path[0].p_hdr, path[1].p_hdr, s);
  1655. path[0].p_depth = 0;
  1656. path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
  1657. (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
  1658. path[0].p_hdr->eh_max = cpu_to_le16(max_root);
  1659. ext4_ext_path_brelse(path + 1);
  1660. ext4_free_blocks(handle, inode, NULL, blk, 1,
  1661. EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
  1662. }
  1663. /*
  1664. * This function tries to merge the @ex extent to neighbours in the tree, then
  1665. * tries to collapse the extent tree into the inode.
  1666. */
  1667. static void ext4_ext_try_to_merge(handle_t *handle,
  1668. struct inode *inode,
  1669. struct ext4_ext_path *path,
  1670. struct ext4_extent *ex)
  1671. {
  1672. struct ext4_extent_header *eh;
  1673. unsigned int depth;
  1674. int merge_done = 0;
  1675. depth = ext_depth(inode);
  1676. BUG_ON(path[depth].p_hdr == NULL);
  1677. eh = path[depth].p_hdr;
  1678. if (ex > EXT_FIRST_EXTENT(eh))
  1679. merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
  1680. if (!merge_done)
  1681. (void) ext4_ext_try_to_merge_right(inode, path, ex);
  1682. ext4_ext_try_to_merge_up(handle, inode, path);
  1683. }
  1684. /*
  1685. * check if a portion of the "newext" extent overlaps with an
  1686. * existing extent.
  1687. *
  1688. * If there is an overlap discovered, it updates the length of the newext
  1689. * such that there will be no overlap, and then returns 1.
  1690. * If there is no overlap found, it returns 0.
  1691. */
  1692. static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
  1693. struct inode *inode,
  1694. struct ext4_extent *newext,
  1695. struct ext4_ext_path *path)
  1696. {
  1697. ext4_lblk_t b1, b2;
  1698. unsigned int depth, len1;
  1699. unsigned int ret = 0;
  1700. b1 = le32_to_cpu(newext->ee_block);
  1701. len1 = ext4_ext_get_actual_len(newext);
  1702. depth = ext_depth(inode);
  1703. if (!path[depth].p_ext)
  1704. goto out;
  1705. b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
  1706. /*
  1707. * get the next allocated block if the extent in the path
  1708. * is before the requested block(s)
  1709. */
  1710. if (b2 < b1) {
  1711. b2 = ext4_ext_next_allocated_block(path);
  1712. if (b2 == EXT_MAX_BLOCKS)
  1713. goto out;
  1714. b2 = EXT4_LBLK_CMASK(sbi, b2);
  1715. }
  1716. /* check for wrap through zero on extent logical start block*/
  1717. if (b1 + len1 < b1) {
  1718. len1 = EXT_MAX_BLOCKS - b1;
  1719. newext->ee_len = cpu_to_le16(len1);
  1720. ret = 1;
  1721. }
  1722. /* check for overlap */
  1723. if (b1 + len1 > b2) {
  1724. newext->ee_len = cpu_to_le16(b2 - b1);
  1725. ret = 1;
  1726. }
  1727. out:
  1728. return ret;
  1729. }
  1730. /*
  1731. * ext4_ext_insert_extent:
  1732. * tries to merge requested extent into the existing extent or
  1733. * inserts requested extent as new one into the tree,
  1734. * creating new leaf in the no-space case.
  1735. */
  1736. struct ext4_ext_path *
  1737. ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
  1738. struct ext4_ext_path *path,
  1739. struct ext4_extent *newext, int gb_flags)
  1740. {
  1741. struct ext4_extent_header *eh;
  1742. struct ext4_extent *ex, *fex;
  1743. struct ext4_extent *nearex; /* nearest extent */
  1744. int depth, len, err = 0;
  1745. ext4_lblk_t next;
  1746. int mb_flags = 0, unwritten;
  1747. KUNIT_STATIC_STUB_REDIRECT(ext4_ext_insert_extent, handle, inode, path,
  1748. newext, gb_flags);
  1749. if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
  1750. mb_flags |= EXT4_MB_DELALLOC_RESERVED;
  1751. if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
  1752. EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
  1753. err = -EFSCORRUPTED;
  1754. goto errout;
  1755. }
  1756. depth = ext_depth(inode);
  1757. ex = path[depth].p_ext;
  1758. eh = path[depth].p_hdr;
  1759. if (unlikely(path[depth].p_hdr == NULL)) {
  1760. EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
  1761. err = -EFSCORRUPTED;
  1762. goto errout;
  1763. }
  1764. /* try to insert block into found extent and return */
  1765. if (ex && !(gb_flags & EXT4_GET_BLOCKS_SPLIT_NOMERGE)) {
  1766. /*
  1767. * Try to see whether we should rather test the extent on
  1768. * right from ex, or from the left of ex. This is because
  1769. * ext4_find_extent() can return either extent on the
  1770. * left, or on the right from the searched position. This
  1771. * will make merging more effective.
  1772. */
  1773. if (ex < EXT_LAST_EXTENT(eh) &&
  1774. (le32_to_cpu(ex->ee_block) +
  1775. ext4_ext_get_actual_len(ex) <
  1776. le32_to_cpu(newext->ee_block))) {
  1777. ex += 1;
  1778. goto prepend;
  1779. } else if ((ex > EXT_FIRST_EXTENT(eh)) &&
  1780. (le32_to_cpu(newext->ee_block) +
  1781. ext4_ext_get_actual_len(newext) <
  1782. le32_to_cpu(ex->ee_block)))
  1783. ex -= 1;
  1784. /* Try to append newex to the ex */
  1785. if (ext4_can_extents_be_merged(inode, ex, newext)) {
  1786. ext_debug(inode, "append [%d]%d block to %u:[%d]%d"
  1787. "(from %llu)\n",
  1788. ext4_ext_is_unwritten(newext),
  1789. ext4_ext_get_actual_len(newext),
  1790. le32_to_cpu(ex->ee_block),
  1791. ext4_ext_is_unwritten(ex),
  1792. ext4_ext_get_actual_len(ex),
  1793. ext4_ext_pblock(ex));
  1794. err = ext4_ext_get_access(handle, inode,
  1795. path + depth);
  1796. if (err)
  1797. goto errout;
  1798. unwritten = ext4_ext_is_unwritten(ex);
  1799. ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
  1800. + ext4_ext_get_actual_len(newext));
  1801. if (unwritten)
  1802. ext4_ext_mark_unwritten(ex);
  1803. nearex = ex;
  1804. goto merge;
  1805. }
  1806. prepend:
  1807. /* Try to prepend newex to the ex */
  1808. if (ext4_can_extents_be_merged(inode, newext, ex)) {
  1809. ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d"
  1810. "(from %llu)\n",
  1811. le32_to_cpu(newext->ee_block),
  1812. ext4_ext_is_unwritten(newext),
  1813. ext4_ext_get_actual_len(newext),
  1814. le32_to_cpu(ex->ee_block),
  1815. ext4_ext_is_unwritten(ex),
  1816. ext4_ext_get_actual_len(ex),
  1817. ext4_ext_pblock(ex));
  1818. err = ext4_ext_get_access(handle, inode,
  1819. path + depth);
  1820. if (err)
  1821. goto errout;
  1822. unwritten = ext4_ext_is_unwritten(ex);
  1823. ex->ee_block = newext->ee_block;
  1824. ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
  1825. ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
  1826. + ext4_ext_get_actual_len(newext));
  1827. if (unwritten)
  1828. ext4_ext_mark_unwritten(ex);
  1829. nearex = ex;
  1830. goto merge;
  1831. }
  1832. }
  1833. depth = ext_depth(inode);
  1834. eh = path[depth].p_hdr;
  1835. if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
  1836. goto has_space;
  1837. /* probably next leaf has space for us? */
  1838. fex = EXT_LAST_EXTENT(eh);
  1839. next = EXT_MAX_BLOCKS;
  1840. if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
  1841. next = ext4_ext_next_leaf_block(path);
  1842. if (next != EXT_MAX_BLOCKS) {
  1843. struct ext4_ext_path *npath;
  1844. ext_debug(inode, "next leaf block - %u\n", next);
  1845. npath = ext4_find_extent(inode, next, NULL, gb_flags);
  1846. if (IS_ERR(npath)) {
  1847. err = PTR_ERR(npath);
  1848. goto errout;
  1849. }
  1850. BUG_ON(npath->p_depth != path->p_depth);
  1851. eh = npath[depth].p_hdr;
  1852. if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
  1853. ext_debug(inode, "next leaf isn't full(%d)\n",
  1854. le16_to_cpu(eh->eh_entries));
  1855. ext4_free_ext_path(path);
  1856. path = npath;
  1857. goto has_space;
  1858. }
  1859. ext_debug(inode, "next leaf has no free space(%d,%d)\n",
  1860. le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
  1861. ext4_free_ext_path(npath);
  1862. }
  1863. /*
  1864. * There is no free space in the found leaf.
  1865. * We're gonna add a new leaf in the tree.
  1866. */
  1867. if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
  1868. mb_flags |= EXT4_MB_USE_RESERVED;
  1869. path = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
  1870. path, newext);
  1871. if (IS_ERR(path))
  1872. return path;
  1873. depth = ext_depth(inode);
  1874. eh = path[depth].p_hdr;
  1875. has_space:
  1876. nearex = path[depth].p_ext;
  1877. err = ext4_ext_get_access(handle, inode, path + depth);
  1878. if (err)
  1879. goto errout;
  1880. if (!nearex) {
  1881. /* there is no extent in this leaf, create first one */
  1882. ext_debug(inode, "first extent in the leaf: %u:%llu:[%d]%d\n",
  1883. le32_to_cpu(newext->ee_block),
  1884. ext4_ext_pblock(newext),
  1885. ext4_ext_is_unwritten(newext),
  1886. ext4_ext_get_actual_len(newext));
  1887. nearex = EXT_FIRST_EXTENT(eh);
  1888. } else {
  1889. if (le32_to_cpu(newext->ee_block)
  1890. > le32_to_cpu(nearex->ee_block)) {
  1891. /* Insert after */
  1892. ext_debug(inode, "insert %u:%llu:[%d]%d before: "
  1893. "nearest %p\n",
  1894. le32_to_cpu(newext->ee_block),
  1895. ext4_ext_pblock(newext),
  1896. ext4_ext_is_unwritten(newext),
  1897. ext4_ext_get_actual_len(newext),
  1898. nearex);
  1899. nearex++;
  1900. } else {
  1901. /* Insert before */
  1902. BUG_ON(newext->ee_block == nearex->ee_block);
  1903. ext_debug(inode, "insert %u:%llu:[%d]%d after: "
  1904. "nearest %p\n",
  1905. le32_to_cpu(newext->ee_block),
  1906. ext4_ext_pblock(newext),
  1907. ext4_ext_is_unwritten(newext),
  1908. ext4_ext_get_actual_len(newext),
  1909. nearex);
  1910. }
  1911. len = EXT_LAST_EXTENT(eh) - nearex + 1;
  1912. if (len > 0) {
  1913. ext_debug(inode, "insert %u:%llu:[%d]%d: "
  1914. "move %d extents from 0x%p to 0x%p\n",
  1915. le32_to_cpu(newext->ee_block),
  1916. ext4_ext_pblock(newext),
  1917. ext4_ext_is_unwritten(newext),
  1918. ext4_ext_get_actual_len(newext),
  1919. len, nearex, nearex + 1);
  1920. memmove(nearex + 1, nearex,
  1921. len * sizeof(struct ext4_extent));
  1922. }
  1923. }
  1924. le16_add_cpu(&eh->eh_entries, 1);
  1925. path[depth].p_ext = nearex;
  1926. nearex->ee_block = newext->ee_block;
  1927. ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
  1928. nearex->ee_len = newext->ee_len;
  1929. merge:
  1930. /* try to merge extents */
  1931. if (!(gb_flags & EXT4_GET_BLOCKS_SPLIT_NOMERGE))
  1932. ext4_ext_try_to_merge(handle, inode, path, nearex);
  1933. /* time to correct all indexes above */
  1934. err = ext4_ext_correct_indexes(handle, inode, path);
  1935. if (err)
  1936. goto errout;
  1937. err = ext4_ext_dirty(handle, inode, path + path->p_depth);
  1938. if (err)
  1939. goto errout;
  1940. return path;
  1941. errout:
  1942. ext4_free_ext_path(path);
  1943. return ERR_PTR(err);
  1944. }
  1945. static int ext4_fill_es_cache_info(struct inode *inode,
  1946. ext4_lblk_t block, ext4_lblk_t num,
  1947. struct fiemap_extent_info *fieinfo)
  1948. {
  1949. ext4_lblk_t next, end = block + num - 1;
  1950. struct extent_status es;
  1951. unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
  1952. unsigned int flags;
  1953. int err;
  1954. while (block <= end) {
  1955. next = 0;
  1956. flags = 0;
  1957. if (!ext4_es_lookup_extent(inode, block, &next, &es, NULL))
  1958. break;
  1959. if (ext4_es_is_unwritten(&es))
  1960. flags |= FIEMAP_EXTENT_UNWRITTEN;
  1961. if (ext4_es_is_delayed(&es))
  1962. flags |= (FIEMAP_EXTENT_DELALLOC |
  1963. FIEMAP_EXTENT_UNKNOWN);
  1964. if (ext4_es_is_hole(&es))
  1965. flags |= EXT4_FIEMAP_EXTENT_HOLE;
  1966. if (next == 0)
  1967. flags |= FIEMAP_EXTENT_LAST;
  1968. if (flags & (FIEMAP_EXTENT_DELALLOC|
  1969. EXT4_FIEMAP_EXTENT_HOLE))
  1970. es.es_pblk = 0;
  1971. else
  1972. es.es_pblk = ext4_es_pblock(&es);
  1973. err = fiemap_fill_next_extent(fieinfo,
  1974. (__u64)es.es_lblk << blksize_bits,
  1975. (__u64)es.es_pblk << blksize_bits,
  1976. (__u64)es.es_len << blksize_bits,
  1977. flags);
  1978. if (next == 0)
  1979. break;
  1980. block = next;
  1981. if (err < 0)
  1982. return err;
  1983. if (err == 1)
  1984. return 0;
  1985. }
  1986. return 0;
  1987. }
  1988. /*
  1989. * ext4_ext_find_hole - find hole around given block according to the given path
  1990. * @inode: inode we lookup in
  1991. * @path: path in extent tree to @lblk
  1992. * @lblk: pointer to logical block around which we want to determine hole
  1993. *
  1994. * Determine hole length (and start if easily possible) around given logical
  1995. * block. We don't try too hard to find the beginning of the hole but @path
  1996. * actually points to extent before @lblk, we provide it.
  1997. *
  1998. * The function returns the length of a hole starting at @lblk. We update @lblk
  1999. * to the beginning of the hole if we managed to find it.
  2000. */
  2001. static ext4_lblk_t ext4_ext_find_hole(struct inode *inode,
  2002. struct ext4_ext_path *path,
  2003. ext4_lblk_t *lblk)
  2004. {
  2005. int depth = ext_depth(inode);
  2006. struct ext4_extent *ex;
  2007. ext4_lblk_t len;
  2008. ex = path[depth].p_ext;
  2009. if (ex == NULL) {
  2010. /* there is no extent yet, so gap is [0;-] */
  2011. *lblk = 0;
  2012. len = EXT_MAX_BLOCKS;
  2013. } else if (*lblk < le32_to_cpu(ex->ee_block)) {
  2014. len = le32_to_cpu(ex->ee_block) - *lblk;
  2015. } else if (*lblk >= le32_to_cpu(ex->ee_block)
  2016. + ext4_ext_get_actual_len(ex)) {
  2017. ext4_lblk_t next;
  2018. *lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
  2019. next = ext4_ext_next_allocated_block(path);
  2020. BUG_ON(next == *lblk);
  2021. len = next - *lblk;
  2022. } else {
  2023. BUG();
  2024. }
  2025. return len;
  2026. }
  2027. /*
  2028. * ext4_ext_rm_idx:
  2029. * removes index from the index block.
  2030. */
  2031. static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
  2032. struct ext4_ext_path *path, int depth)
  2033. {
  2034. int err;
  2035. ext4_fsblk_t leaf;
  2036. int k = depth - 1;
  2037. /* free index block */
  2038. leaf = ext4_idx_pblock(path[k].p_idx);
  2039. if (unlikely(path[k].p_hdr->eh_entries == 0)) {
  2040. EXT4_ERROR_INODE(inode, "path[%d].p_hdr->eh_entries == 0", k);
  2041. return -EFSCORRUPTED;
  2042. }
  2043. err = ext4_ext_get_access(handle, inode, path + k);
  2044. if (err)
  2045. return err;
  2046. if (path[k].p_idx != EXT_LAST_INDEX(path[k].p_hdr)) {
  2047. int len = EXT_LAST_INDEX(path[k].p_hdr) - path[k].p_idx;
  2048. len *= sizeof(struct ext4_extent_idx);
  2049. memmove(path[k].p_idx, path[k].p_idx + 1, len);
  2050. }
  2051. le16_add_cpu(&path[k].p_hdr->eh_entries, -1);
  2052. err = ext4_ext_dirty(handle, inode, path + k);
  2053. if (err)
  2054. return err;
  2055. ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf);
  2056. trace_ext4_ext_rm_idx(inode, leaf);
  2057. ext4_free_blocks(handle, inode, NULL, leaf, 1,
  2058. EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
  2059. while (--k >= 0) {
  2060. if (path[k + 1].p_idx != EXT_FIRST_INDEX(path[k + 1].p_hdr))
  2061. break;
  2062. err = ext4_ext_get_access(handle, inode, path + k);
  2063. if (err)
  2064. goto clean;
  2065. path[k].p_idx->ei_block = path[k + 1].p_idx->ei_block;
  2066. err = ext4_ext_dirty(handle, inode, path + k);
  2067. if (err)
  2068. goto clean;
  2069. }
  2070. return 0;
  2071. clean:
  2072. /*
  2073. * The path[k].p_bh is either unmodified or with no verified bit
  2074. * set (see ext4_ext_get_access()). So just clear the verified bit
  2075. * of the successfully modified extents buffers, which will force
  2076. * these extents to be checked to avoid using inconsistent data.
  2077. */
  2078. while (++k < depth)
  2079. clear_buffer_verified(path[k].p_bh);
  2080. return err;
  2081. }
  2082. /*
  2083. * ext4_ext_calc_credits_for_single_extent:
  2084. * This routine returns max. credits that needed to insert an extent
  2085. * to the extent tree.
  2086. * When pass the actual path, the caller should calculate credits
  2087. * under i_data_sem.
  2088. */
  2089. int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
  2090. struct ext4_ext_path *path)
  2091. {
  2092. if (path) {
  2093. int depth = ext_depth(inode);
  2094. int ret = 0;
  2095. /* probably there is space in leaf? */
  2096. if (le16_to_cpu(path[depth].p_hdr->eh_entries)
  2097. < le16_to_cpu(path[depth].p_hdr->eh_max)) {
  2098. /*
  2099. * There are some space in the leaf tree, no
  2100. * need to account for leaf block credit
  2101. *
  2102. * bitmaps and block group descriptor blocks
  2103. * and other metadata blocks still need to be
  2104. * accounted.
  2105. */
  2106. /* 1 bitmap, 1 block group descriptor */
  2107. ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
  2108. return ret;
  2109. }
  2110. }
  2111. return ext4_chunk_trans_blocks(inode, nrblocks);
  2112. }
  2113. /*
  2114. * How many index/leaf blocks need to change/allocate to add @extents extents?
  2115. *
  2116. * If we add a single extent, then in the worse case, each tree level
  2117. * index/leaf need to be changed in case of the tree split.
  2118. *
  2119. * If more extents are inserted, they could cause the whole tree split more
  2120. * than once, but this is really rare.
  2121. */
  2122. int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
  2123. {
  2124. int index;
  2125. /* If we are converting the inline data, only one is needed here. */
  2126. if (ext4_has_inline_data(inode))
  2127. return 1;
  2128. /*
  2129. * Extent tree can change between the time we estimate credits and
  2130. * the time we actually modify the tree. Assume the worst case.
  2131. */
  2132. if (extents <= 1)
  2133. index = (EXT4_MAX_EXTENT_DEPTH * 2) + extents;
  2134. else
  2135. index = (EXT4_MAX_EXTENT_DEPTH * 3) +
  2136. DIV_ROUND_UP(extents, ext4_ext_space_block(inode, 0));
  2137. return index;
  2138. }
  2139. static inline int get_default_free_blocks_flags(struct inode *inode)
  2140. {
  2141. if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
  2142. ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
  2143. return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
  2144. else if (ext4_should_journal_data(inode))
  2145. return EXT4_FREE_BLOCKS_FORGET;
  2146. return 0;
  2147. }
  2148. /*
  2149. * ext4_rereserve_cluster - increment the reserved cluster count when
  2150. * freeing a cluster with a pending reservation
  2151. *
  2152. * @inode - file containing the cluster
  2153. * @lblk - logical block in cluster to be reserved
  2154. *
  2155. * Increments the reserved cluster count and adjusts quota in a bigalloc
  2156. * file system when freeing a partial cluster containing at least one
  2157. * delayed and unwritten block. A partial cluster meeting that
  2158. * requirement will have a pending reservation. If so, the
  2159. * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to
  2160. * defer reserved and allocated space accounting to a subsequent call
  2161. * to this function.
  2162. */
  2163. static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
  2164. {
  2165. struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
  2166. struct ext4_inode_info *ei = EXT4_I(inode);
  2167. dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));
  2168. spin_lock(&ei->i_block_reservation_lock);
  2169. ei->i_reserved_data_blocks++;
  2170. percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
  2171. spin_unlock(&ei->i_block_reservation_lock);
  2172. percpu_counter_add(&sbi->s_freeclusters_counter, 1);
  2173. ext4_remove_pending(inode, lblk);
  2174. }
  2175. static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
  2176. struct ext4_extent *ex,
  2177. struct partial_cluster *partial,
  2178. ext4_lblk_t from, ext4_lblk_t to)
  2179. {
  2180. struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
  2181. unsigned short ee_len = ext4_ext_get_actual_len(ex);
  2182. ext4_fsblk_t last_pblk, pblk;
  2183. ext4_lblk_t num;
  2184. int flags;
  2185. /* only extent tail removal is allowed */
  2186. if (from < le32_to_cpu(ex->ee_block) ||
  2187. to != le32_to_cpu(ex->ee_block) + ee_len - 1) {
  2188. ext4_error(sbi->s_sb,
  2189. "strange request: removal(2) %u-%u from %u:%u",
  2190. from, to, le32_to_cpu(ex->ee_block), ee_len);
  2191. return 0;
  2192. }
  2193. #ifdef EXTENTS_STATS
  2194. spin_lock(&sbi->s_ext_stats_lock);
  2195. sbi->s_ext_blocks += ee_len;
  2196. sbi->s_ext_extents++;
  2197. if (ee_len < sbi->s_ext_min)
  2198. sbi->s_ext_min = ee_len;
  2199. if (ee_len > sbi->s_ext_max)
  2200. sbi->s_ext_max = ee_len;
  2201. if (ext_depth(inode) > sbi->s_depth_max)
  2202. sbi->s_depth_max = ext_depth(inode);
  2203. spin_unlock(&sbi->s_ext_stats_lock);
  2204. #endif
  2205. trace_ext4_remove_blocks(inode, ex, from, to, partial);
  2206. /*
  2207. * if we have a partial cluster, and it's different from the
  2208. * cluster of the last block in the extent, we free it
  2209. */
  2210. last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
  2211. if (partial->state != initial &&
  2212. partial->pclu != EXT4_B2C(sbi, last_pblk)) {
  2213. if (partial->state == tofree) {
  2214. flags = get_default_free_blocks_flags(inode);
  2215. if (ext4_is_pending(inode, partial->lblk))
  2216. flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
  2217. ext4_free_blocks(handle, inode, NULL,
  2218. EXT4_C2B(sbi, partial->pclu),
  2219. sbi->s_cluster_ratio, flags);
  2220. if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
  2221. ext4_rereserve_cluster(inode, partial->lblk);
  2222. }
  2223. partial->state = initial;
  2224. }
  2225. num = le32_to_cpu(ex->ee_block) + ee_len - from;
  2226. pblk = ext4_ext_pblock(ex) + ee_len - num;
  2227. /*
  2228. * We free the partial cluster at the end of the extent (if any),
  2229. * unless the cluster is used by another extent (partial_cluster
  2230. * state is nofree). If a partial cluster exists here, it must be
  2231. * shared with the last block in the extent.
  2232. */
  2233. flags = get_default_free_blocks_flags(inode);
  2234. /* partial, left end cluster aligned, right end unaligned */
  2235. if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
  2236. (EXT4_LBLK_CMASK(sbi, to) >= from) &&
  2237. (partial->state != nofree)) {
  2238. if (ext4_is_pending(inode, to))
  2239. flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
  2240. ext4_free_blocks(handle, inode, NULL,
  2241. EXT4_PBLK_CMASK(sbi, last_pblk),
  2242. sbi->s_cluster_ratio, flags);
  2243. if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
  2244. ext4_rereserve_cluster(inode, to);
  2245. partial->state = initial;
  2246. flags = get_default_free_blocks_flags(inode);
  2247. }
  2248. flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
  2249. /*
  2250. * For bigalloc file systems, we never free a partial cluster
  2251. * at the beginning of the extent. Instead, we check to see if we
  2252. * need to free it on a subsequent call to ext4_remove_blocks,
  2253. * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
  2254. */
  2255. flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
  2256. ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
  2257. /* reset the partial cluster if we've freed past it */
  2258. if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk))
  2259. partial->state = initial;
  2260. /*
  2261. * If we've freed the entire extent but the beginning is not left
  2262. * cluster aligned and is not marked as ineligible for freeing we
  2263. * record the partial cluster at the beginning of the extent. It
  2264. * wasn't freed by the preceding ext4_free_blocks() call, and we
  2265. * need to look farther to the left to determine if it's to be freed
  2266. * (not shared with another extent). Else, reset the partial
  2267. * cluster - we're either done freeing or the beginning of the
  2268. * extent is left cluster aligned.
  2269. */
  2270. if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
  2271. if (partial->state == initial) {
  2272. partial->pclu = EXT4_B2C(sbi, pblk);
  2273. partial->lblk = from;
  2274. partial->state = tofree;
  2275. }
  2276. } else {
  2277. partial->state = initial;
  2278. }
  2279. return 0;
  2280. }
  2281. /*
  2282. * ext4_ext_rm_leaf() Removes the extents associated with the
  2283. * blocks appearing between "start" and "end". Both "start"
  2284. * and "end" must appear in the same extent or EIO is returned.
  2285. *
  2286. * @handle: The journal handle
  2287. * @inode: The files inode
  2288. * @path: The path to the leaf
  2289. * @partial_cluster: The cluster which we'll have to free if all extents
  2290. * has been released from it. However, if this value is
  2291. * negative, it's a cluster just to the right of the
  2292. * punched region and it must not be freed.
  2293. * @start: The first block to remove
  2294. * @end: The last block to remove
  2295. */
  2296. static int
  2297. ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
  2298. struct ext4_ext_path *path,
  2299. struct partial_cluster *partial,
  2300. ext4_lblk_t start, ext4_lblk_t end)
  2301. {
  2302. struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
  2303. int err = 0, correct_index = 0;
  2304. int depth = ext_depth(inode), credits, revoke_credits;
  2305. struct ext4_extent_header *eh;
  2306. ext4_lblk_t a, b;
  2307. unsigned num;
  2308. ext4_lblk_t ex_ee_block;
  2309. unsigned short ex_ee_len;
  2310. unsigned unwritten = 0;
  2311. struct ext4_extent *ex;
  2312. ext4_fsblk_t pblk;
  2313. /* the header must be checked already in ext4_ext_remove_space() */
  2314. ext_debug(inode, "truncate since %u in leaf to %u\n", start, end);
  2315. if (!path[depth].p_hdr)
  2316. path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
  2317. eh = path[depth].p_hdr;
  2318. if (unlikely(path[depth].p_hdr == NULL)) {
  2319. EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
  2320. return -EFSCORRUPTED;
  2321. }
  2322. /* find where to start removing */
  2323. ex = path[depth].p_ext;
  2324. if (!ex)
  2325. ex = EXT_LAST_EXTENT(eh);
  2326. ex_ee_block = le32_to_cpu(ex->ee_block);
  2327. ex_ee_len = ext4_ext_get_actual_len(ex);
  2328. trace_ext4_ext_rm_leaf(inode, start, ex, partial);
  2329. while (ex >= EXT_FIRST_EXTENT(eh) &&
  2330. ex_ee_block + ex_ee_len > start) {
  2331. if (ext4_ext_is_unwritten(ex))
  2332. unwritten = 1;
  2333. else
  2334. unwritten = 0;
  2335. ext_debug(inode, "remove ext %u:[%d]%d\n", ex_ee_block,
  2336. unwritten, ex_ee_len);
  2337. path[depth].p_ext = ex;
  2338. a = max(ex_ee_block, start);
  2339. b = min(ex_ee_block + ex_ee_len - 1, end);
  2340. ext_debug(inode, " border %u:%u\n", a, b);
  2341. /* If this extent is beyond the end of the hole, skip it */
  2342. if (end < ex_ee_block) {
  2343. /*
  2344. * We're going to skip this extent and move to another,
  2345. * so note that its first cluster is in use to avoid
  2346. * freeing it when removing blocks. Eventually, the
  2347. * right edge of the truncated/punched region will
  2348. * be just to the left.
  2349. */
  2350. if (sbi->s_cluster_ratio > 1) {
  2351. pblk = ext4_ext_pblock(ex);
  2352. partial->pclu = EXT4_B2C(sbi, pblk);
  2353. partial->state = nofree;
  2354. }
  2355. ex--;
  2356. ex_ee_block = le32_to_cpu(ex->ee_block);
  2357. ex_ee_len = ext4_ext_get_actual_len(ex);
  2358. continue;
  2359. } else if (b != ex_ee_block + ex_ee_len - 1) {
  2360. EXT4_ERROR_INODE(inode,
  2361. "can not handle truncate %u:%u "
  2362. "on extent %u:%u",
  2363. start, end, ex_ee_block,
  2364. ex_ee_block + ex_ee_len - 1);
  2365. err = -EFSCORRUPTED;
  2366. goto out;
  2367. } else if (a != ex_ee_block) {
  2368. /* remove tail of the extent */
  2369. num = a - ex_ee_block;
  2370. } else {
  2371. /* remove whole extent: excellent! */
  2372. num = 0;
  2373. }
  2374. /*
  2375. * 3 for leaf, sb, and inode plus 2 (bmap and group
  2376. * descriptor) for each block group; assume two block
  2377. * groups plus ex_ee_len/blocks_per_block_group for
  2378. * the worst case
  2379. */
  2380. credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
  2381. if (ex == EXT_FIRST_EXTENT(eh)) {
  2382. correct_index = 1;
  2383. credits += (ext_depth(inode)) + 1;
  2384. }
  2385. credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
  2386. /*
  2387. * We may end up freeing some index blocks and data from the
  2388. * punched range. Note that partial clusters are accounted for
  2389. * by ext4_free_data_revoke_credits().
  2390. */
  2391. revoke_credits =
  2392. ext4_free_metadata_revoke_credits(inode->i_sb,
  2393. ext_depth(inode)) +
  2394. ext4_free_data_revoke_credits(inode, b - a + 1);
  2395. err = ext4_datasem_ensure_credits(handle, inode, credits,
  2396. credits, revoke_credits);
  2397. if (err) {
  2398. if (err > 0)
  2399. err = -EAGAIN;
  2400. goto out;
  2401. }
  2402. err = ext4_ext_get_access(handle, inode, path + depth);
  2403. if (err)
  2404. goto out;
  2405. err = ext4_remove_blocks(handle, inode, ex, partial, a, b);
  2406. if (err)
  2407. goto out;
  2408. if (num == 0)
  2409. /* this extent is removed; mark slot entirely unused */
  2410. ext4_ext_store_pblock(ex, 0);
  2411. ex->ee_len = cpu_to_le16(num);
  2412. /*
  2413. * Do not mark unwritten if all the blocks in the
  2414. * extent have been removed.
  2415. */
  2416. if (unwritten && num)
  2417. ext4_ext_mark_unwritten(ex);
  2418. /*
  2419. * If the extent was completely released,
  2420. * we need to remove it from the leaf
  2421. */
  2422. if (num == 0) {
  2423. if (end != EXT_MAX_BLOCKS - 1) {
  2424. /*
  2425. * For hole punching, we need to scoot all the
  2426. * extents up when an extent is removed so that
  2427. * we dont have blank extents in the middle
  2428. */
  2429. memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) *
  2430. sizeof(struct ext4_extent));
  2431. /* Now get rid of the one at the end */
  2432. memset(EXT_LAST_EXTENT(eh), 0,
  2433. sizeof(struct ext4_extent));
  2434. }
  2435. le16_add_cpu(&eh->eh_entries, -1);
  2436. }
  2437. err = ext4_ext_dirty(handle, inode, path + depth);
  2438. if (err)
  2439. goto out;
  2440. ext_debug(inode, "new extent: %u:%u:%llu\n", ex_ee_block, num,
  2441. ext4_ext_pblock(ex));
  2442. ex--;
  2443. ex_ee_block = le32_to_cpu(ex->ee_block);
  2444. ex_ee_len = ext4_ext_get_actual_len(ex);
  2445. }
  2446. if (correct_index && eh->eh_entries)
  2447. err = ext4_ext_correct_indexes(handle, inode, path);
  2448. /*
  2449. * If there's a partial cluster and at least one extent remains in
  2450. * the leaf, free the partial cluster if it isn't shared with the
  2451. * current extent. If it is shared with the current extent
  2452. * we reset the partial cluster because we've reached the start of the
  2453. * truncated/punched region and we're done removing blocks.
  2454. */
  2455. if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
  2456. pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
  2457. if (partial->pclu != EXT4_B2C(sbi, pblk)) {
  2458. int flags = get_default_free_blocks_flags(inode);
  2459. if (ext4_is_pending(inode, partial->lblk))
  2460. flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
  2461. ext4_free_blocks(handle, inode, NULL,
  2462. EXT4_C2B(sbi, partial->pclu),
  2463. sbi->s_cluster_ratio, flags);
  2464. if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
  2465. ext4_rereserve_cluster(inode, partial->lblk);
  2466. }
  2467. partial->state = initial;
  2468. }
  2469. /* if this leaf is free, then we should
  2470. * remove it from index block above */
  2471. if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
  2472. err = ext4_ext_rm_idx(handle, inode, path, depth);
  2473. out:
  2474. return err;
  2475. }
  2476. /*
  2477. * ext4_ext_more_to_rm:
  2478. * returns 1 if current index has to be freed (even partial)
  2479. */
  2480. static int
  2481. ext4_ext_more_to_rm(struct ext4_ext_path *path)
  2482. {
  2483. BUG_ON(path->p_idx == NULL);
  2484. if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
  2485. return 0;
  2486. /*
  2487. * if truncate on deeper level happened, it wasn't partial,
  2488. * so we have to consider current index for truncation
  2489. */
  2490. if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
  2491. return 0;
  2492. return 1;
  2493. }
  2494. int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
  2495. ext4_lblk_t end)
  2496. {
  2497. struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
  2498. int depth = ext_depth(inode);
  2499. struct ext4_ext_path *path = NULL;
  2500. struct partial_cluster partial;
  2501. handle_t *handle;
  2502. int i = 0, err = 0;
  2503. int flags = EXT4_EX_NOCACHE | EXT4_EX_NOFAIL;
  2504. partial.pclu = 0;
  2505. partial.lblk = 0;
  2506. partial.state = initial;
  2507. ext_debug(inode, "truncate since %u to %u\n", start, end);
  2508. /* probably first extent we're gonna free will be last in block */
  2509. handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE,
  2510. depth + 1,
  2511. ext4_free_metadata_revoke_credits(inode->i_sb, depth));
  2512. if (IS_ERR(handle))
  2513. return PTR_ERR(handle);
  2514. again:
  2515. trace_ext4_ext_remove_space(inode, start, end, depth);
  2516. /*
  2517. * Check if we are removing extents inside the extent tree. If that
  2518. * is the case, we are going to punch a hole inside the extent tree
  2519. * so we have to check whether we need to split the extent covering
  2520. * the last block to remove so we can easily remove the part of it
  2521. * in ext4_ext_rm_leaf().
  2522. */
  2523. if (end < EXT_MAX_BLOCKS - 1) {
  2524. struct ext4_extent *ex;
  2525. ext4_lblk_t ee_block, ex_end, lblk;
  2526. ext4_fsblk_t pblk;
  2527. /* find extent for or closest extent to this block */
  2528. path = ext4_find_extent(inode, end, NULL, flags);
  2529. if (IS_ERR(path)) {
  2530. ext4_journal_stop(handle);
  2531. return PTR_ERR(path);
  2532. }
  2533. depth = ext_depth(inode);
  2534. /* Leaf not may not exist only if inode has no blocks at all */
  2535. ex = path[depth].p_ext;
  2536. if (!ex) {
  2537. if (depth) {
  2538. EXT4_ERROR_INODE(inode,
  2539. "path[%d].p_hdr == NULL",
  2540. depth);
  2541. err = -EFSCORRUPTED;
  2542. }
  2543. goto out;
  2544. }
  2545. ee_block = le32_to_cpu(ex->ee_block);
  2546. ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1;
  2547. /*
  2548. * See if the last block is inside the extent, if so split
  2549. * the extent at 'end' block so we can easily remove the
  2550. * tail of the first part of the split extent in
  2551. * ext4_ext_rm_leaf().
  2552. */
  2553. if (end >= ee_block && end < ex_end) {
  2554. /*
  2555. * If we're going to split the extent, note that
  2556. * the cluster containing the block after 'end' is
  2557. * in use to avoid freeing it when removing blocks.
  2558. */
  2559. if (sbi->s_cluster_ratio > 1) {
  2560. pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
  2561. partial.pclu = EXT4_B2C(sbi, pblk);
  2562. partial.state = nofree;
  2563. }
  2564. /*
  2565. * Split the extent in two so that 'end' is the last
  2566. * block in the first new extent. Also we should not
  2567. * fail removing space due to ENOSPC so try to use
  2568. * reserved block if that happens.
  2569. */
  2570. path = ext4_force_split_extent_at(handle, inode, path,
  2571. end + 1, 1);
  2572. if (IS_ERR(path)) {
  2573. err = PTR_ERR(path);
  2574. goto out;
  2575. }
  2576. } else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
  2577. partial.state == initial) {
  2578. /*
  2579. * If we're punching, there's an extent to the right.
  2580. * If the partial cluster hasn't been set, set it to
  2581. * that extent's first cluster and its state to nofree
  2582. * so it won't be freed should it contain blocks to be
  2583. * removed. If it's already set (tofree/nofree), we're
  2584. * retrying and keep the original partial cluster info
  2585. * so a cluster marked tofree as a result of earlier
  2586. * extent removal is not lost.
  2587. */
  2588. lblk = ex_end + 1;
  2589. err = ext4_ext_search_right(inode, path, &lblk, &pblk,
  2590. NULL, flags);
  2591. if (err < 0)
  2592. goto out;
  2593. if (pblk) {
  2594. partial.pclu = EXT4_B2C(sbi, pblk);
  2595. partial.state = nofree;
  2596. }
  2597. }
  2598. }
  2599. /*
  2600. * We start scanning from right side, freeing all the blocks
  2601. * after i_size and walking into the tree depth-wise.
  2602. */
  2603. depth = ext_depth(inode);
  2604. if (path) {
  2605. int k = i = depth;
  2606. while (--k > 0)
  2607. path[k].p_block =
  2608. le16_to_cpu(path[k].p_hdr->eh_entries)+1;
  2609. } else {
  2610. path = kzalloc_objs(struct ext4_ext_path, depth + 1,
  2611. GFP_NOFS | __GFP_NOFAIL);
  2612. path[0].p_maxdepth = path[0].p_depth = depth;
  2613. path[0].p_hdr = ext_inode_hdr(inode);
  2614. i = 0;
  2615. if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
  2616. err = -EFSCORRUPTED;
  2617. goto out;
  2618. }
  2619. }
  2620. err = 0;
  2621. while (i >= 0 && err == 0) {
  2622. if (i == depth) {
  2623. /* this is leaf block */
  2624. err = ext4_ext_rm_leaf(handle, inode, path,
  2625. &partial, start, end);
  2626. /* root level has p_bh == NULL, brelse() eats this */
  2627. ext4_ext_path_brelse(path + i);
  2628. i--;
  2629. continue;
  2630. }
  2631. /* this is index block */
  2632. if (!path[i].p_hdr) {
  2633. ext_debug(inode, "initialize header\n");
  2634. path[i].p_hdr = ext_block_hdr(path[i].p_bh);
  2635. }
  2636. if (!path[i].p_idx) {
  2637. /* this level hasn't been touched yet */
  2638. path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
  2639. path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
  2640. ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
  2641. path[i].p_hdr,
  2642. le16_to_cpu(path[i].p_hdr->eh_entries));
  2643. } else {
  2644. /* we were already here, see at next index */
  2645. path[i].p_idx--;
  2646. }
  2647. ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
  2648. i, EXT_FIRST_INDEX(path[i].p_hdr),
  2649. path[i].p_idx);
  2650. if (ext4_ext_more_to_rm(path + i)) {
  2651. struct buffer_head *bh;
  2652. /* go to the next level */
  2653. ext_debug(inode, "move to level %d (block %llu)\n",
  2654. i + 1, ext4_idx_pblock(path[i].p_idx));
  2655. memset(path + i + 1, 0, sizeof(*path));
  2656. bh = read_extent_tree_block(inode, path[i].p_idx,
  2657. depth - i - 1, flags);
  2658. if (IS_ERR(bh)) {
  2659. /* should we reset i_size? */
  2660. err = PTR_ERR(bh);
  2661. break;
  2662. }
  2663. /* Yield here to deal with large extent trees.
  2664. * Should be a no-op if we did IO above. */
  2665. cond_resched();
  2666. if (WARN_ON(i + 1 > depth)) {
  2667. err = -EFSCORRUPTED;
  2668. break;
  2669. }
  2670. path[i + 1].p_bh = bh;
  2671. /* save actual number of indexes since this
  2672. * number is changed at the next iteration */
  2673. path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
  2674. i++;
  2675. } else {
  2676. /* we finished processing this index, go up */
  2677. if (path[i].p_hdr->eh_entries == 0 && i > 0) {
  2678. /* index is empty, remove it;
  2679. * handle must be already prepared by the
  2680. * truncatei_leaf() */
  2681. err = ext4_ext_rm_idx(handle, inode, path, i);
  2682. }
  2683. /* root level has p_bh == NULL, brelse() eats this */
  2684. ext4_ext_path_brelse(path + i);
  2685. i--;
  2686. ext_debug(inode, "return to level %d\n", i);
  2687. }
  2688. }
  2689. trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial,
  2690. path->p_hdr->eh_entries);
  2691. /*
  2692. * if there's a partial cluster and we have removed the first extent
  2693. * in the file, then we also free the partial cluster, if any
  2694. */
  2695. if (partial.state == tofree && err == 0) {
  2696. int flags = get_default_free_blocks_flags(inode);
  2697. if (ext4_is_pending(inode, partial.lblk))
  2698. flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
  2699. ext4_free_blocks(handle, inode, NULL,
  2700. EXT4_C2B(sbi, partial.pclu),
  2701. sbi->s_cluster_ratio, flags);
  2702. if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
  2703. ext4_rereserve_cluster(inode, partial.lblk);
  2704. partial.state = initial;
  2705. }
  2706. /* TODO: flexible tree reduction should be here */
  2707. if (path->p_hdr->eh_entries == 0) {
  2708. /*
  2709. * truncate to zero freed all the tree,
  2710. * so we need to correct eh_depth
  2711. */
  2712. err = ext4_ext_get_access(handle, inode, path);
  2713. if (err == 0) {
  2714. ext_inode_hdr(inode)->eh_depth = 0;
  2715. ext_inode_hdr(inode)->eh_max =
  2716. cpu_to_le16(ext4_ext_space_root(inode, 0));
  2717. err = ext4_ext_dirty(handle, inode, path);
  2718. }
  2719. }
  2720. out:
  2721. ext4_free_ext_path(path);
  2722. path = NULL;
  2723. if (err == -EAGAIN)
  2724. goto again;
  2725. ext4_journal_stop(handle);
  2726. return err;
  2727. }
  2728. /*
  2729. * called at mount time
  2730. */
  2731. void ext4_ext_init(struct super_block *sb)
  2732. {
  2733. /*
  2734. * possible initialization would be here
  2735. */
  2736. if (ext4_has_feature_extents(sb)) {
  2737. #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
  2738. printk(KERN_INFO "EXT4-fs: file extents enabled"
  2739. #ifdef AGGRESSIVE_TEST
  2740. ", aggressive tests"
  2741. #endif
  2742. #ifdef CHECK_BINSEARCH
  2743. ", check binsearch"
  2744. #endif
  2745. #ifdef EXTENTS_STATS
  2746. ", stats"
  2747. #endif
  2748. "\n");
  2749. #endif
  2750. #ifdef EXTENTS_STATS
  2751. spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
  2752. EXT4_SB(sb)->s_ext_min = 1 << 30;
  2753. EXT4_SB(sb)->s_ext_max = 0;
  2754. #endif
  2755. }
  2756. }
  2757. /*
  2758. * called at umount time
  2759. */
  2760. void ext4_ext_release(struct super_block *sb)
  2761. {
  2762. if (!ext4_has_feature_extents(sb))
  2763. return;
  2764. #ifdef EXTENTS_STATS
  2765. if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
  2766. struct ext4_sb_info *sbi = EXT4_SB(sb);
  2767. printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
  2768. sbi->s_ext_blocks, sbi->s_ext_extents,
  2769. sbi->s_ext_blocks / sbi->s_ext_extents);
  2770. printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
  2771. sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
  2772. }
  2773. #endif
  2774. }
  2775. static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
  2776. {
  2777. ext4_lblk_t ee_block;
  2778. ext4_fsblk_t ee_pblock;
  2779. unsigned int ee_len;
  2780. ee_block = le32_to_cpu(ex->ee_block);
  2781. ee_len = ext4_ext_get_actual_len(ex);
  2782. ee_pblock = ext4_ext_pblock(ex);
  2783. if (ee_len == 0)
  2784. return;
  2785. ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
  2786. EXTENT_STATUS_WRITTEN, false);
  2787. }
  2788. /* FIXME!! we need to try to merge to left or right after zero-out */
  2789. int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
  2790. {
  2791. ext4_fsblk_t ee_pblock;
  2792. unsigned int ee_len;
  2793. KUNIT_STATIC_STUB_REDIRECT(ext4_ext_zeroout, inode, ex);
  2794. ee_len = ext4_ext_get_actual_len(ex);
  2795. ee_pblock = ext4_ext_pblock(ex);
  2796. return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
  2797. ee_len);
  2798. }
  2799. /*
  2800. * ext4_split_extent_at() splits an extent at given block.
  2801. *
  2802. * @handle: the journal handle
  2803. * @inode: the file inode
  2804. * @path: the path to the extent
  2805. * @split: the logical block where the extent is splitted.
  2806. * @flags: flags used to insert new extent to extent tree.
  2807. *
  2808. *
  2809. * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
  2810. * of which are same as the original extent. No conversion is performed.
  2811. *
  2812. * Return an extent path pointer on success, or an error pointer on failure. On
  2813. * failure, the extent is restored to original state.
  2814. */
  2815. static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
  2816. struct inode *inode,
  2817. struct ext4_ext_path *path,
  2818. ext4_lblk_t split,
  2819. int flags)
  2820. {
  2821. ext4_fsblk_t newblock;
  2822. ext4_lblk_t ee_block;
  2823. struct ext4_extent *ex, newex, orig_ex;
  2824. struct ext4_extent *ex2 = NULL;
  2825. unsigned int ee_len, depth;
  2826. int err = 0, insert_err = 0, is_unwrit = 0;
  2827. /* Do not cache extents that are in the process of being modified. */
  2828. flags |= EXT4_EX_NOCACHE;
  2829. ext_debug(inode, "logical block %llu\n", (unsigned long long)split);
  2830. ext4_ext_show_leaf(inode, path);
  2831. depth = ext_depth(inode);
  2832. ex = path[depth].p_ext;
  2833. ee_block = le32_to_cpu(ex->ee_block);
  2834. ee_len = ext4_ext_get_actual_len(ex);
  2835. newblock = split - ee_block + ext4_ext_pblock(ex);
  2836. is_unwrit = ext4_ext_is_unwritten(ex);
  2837. BUG_ON(split < ee_block || split >= (ee_block + ee_len));
  2838. /*
  2839. * No split needed
  2840. */
  2841. if (split == ee_block)
  2842. goto out;
  2843. err = ext4_ext_get_access(handle, inode, path + depth);
  2844. if (err)
  2845. goto out;
  2846. /* case a */
  2847. memcpy(&orig_ex, ex, sizeof(orig_ex));
  2848. ex->ee_len = cpu_to_le16(split - ee_block);
  2849. if (is_unwrit)
  2850. ext4_ext_mark_unwritten(ex);
  2851. /*
  2852. * path may lead to new leaf, not to original leaf any more
  2853. * after ext4_ext_insert_extent() returns,
  2854. */
  2855. err = ext4_ext_dirty(handle, inode, path + depth);
  2856. if (err)
  2857. goto fix_extent_len;
  2858. ex2 = &newex;
  2859. ex2->ee_block = cpu_to_le32(split);
  2860. ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
  2861. ext4_ext_store_pblock(ex2, newblock);
  2862. if (is_unwrit)
  2863. ext4_ext_mark_unwritten(ex2);
  2864. path = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
  2865. if (!IS_ERR(path))
  2866. return path;
  2867. insert_err = PTR_ERR(path);
  2868. err = 0;
  2869. if (insert_err != -ENOSPC && insert_err != -EDQUOT &&
  2870. insert_err != -ENOMEM)
  2871. goto out_path;
  2872. /*
  2873. * Get a new path to try to zeroout or fix the extent length.
  2874. * Using EXT4_EX_NOFAIL guarantees that ext4_find_extent()
  2875. * will not return -ENOMEM, otherwise -ENOMEM will cause a
  2876. * retry in do_writepages(), and a WARN_ON may be triggered
  2877. * in ext4_da_update_reserve_space() due to an incorrect
  2878. * ee_len causing the i_reserved_data_blocks exception.
  2879. */
  2880. path = ext4_find_extent(inode, ee_block, NULL, flags | EXT4_EX_NOFAIL);
  2881. if (IS_ERR(path)) {
  2882. EXT4_ERROR_INODE(inode, "Failed split extent on %u, err %ld",
  2883. split, PTR_ERR(path));
  2884. goto out_path;
  2885. }
  2886. depth = ext_depth(inode);
  2887. ex = path[depth].p_ext;
  2888. if (!ex) {
  2889. EXT4_ERROR_INODE(inode,
  2890. "bad extent address lblock: %lu, depth: %d pblock %llu",
  2891. (unsigned long)ee_block, depth, path[depth].p_block);
  2892. err = -EFSCORRUPTED;
  2893. goto out;
  2894. }
  2895. err = ext4_ext_get_access(handle, inode, path + depth);
  2896. if (err)
  2897. goto out;
  2898. fix_extent_len:
  2899. ex->ee_len = orig_ex.ee_len;
  2900. err = ext4_ext_dirty(handle, inode, path + path->p_depth);
  2901. out:
  2902. if (err || insert_err) {
  2903. ext4_free_ext_path(path);
  2904. path = err ? ERR_PTR(err) : ERR_PTR(insert_err);
  2905. }
  2906. out_path:
  2907. if (IS_ERR(path))
  2908. /* Remove all remaining potentially stale extents. */
  2909. ext4_es_remove_extent(inode, ee_block, ee_len);
  2910. ext4_ext_show_leaf(inode, path);
  2911. return path;
  2912. }
  2913. static int ext4_split_extent_zeroout(handle_t *handle, struct inode *inode,
  2914. struct ext4_ext_path *path,
  2915. struct ext4_map_blocks *map, int flags)
  2916. {
  2917. struct ext4_extent *ex;
  2918. unsigned int ee_len, depth;
  2919. ext4_lblk_t ee_block;
  2920. uint64_t lblk, pblk, len;
  2921. int is_unwrit;
  2922. int err = 0;
  2923. depth = ext_depth(inode);
  2924. ex = path[depth].p_ext;
  2925. ee_block = le32_to_cpu(ex->ee_block);
  2926. ee_len = ext4_ext_get_actual_len(ex);
  2927. is_unwrit = ext4_ext_is_unwritten(ex);
  2928. if (flags & EXT4_GET_BLOCKS_CONVERT) {
  2929. /*
  2930. * EXT4_GET_BLOCKS_CONVERT: Caller wants the range specified by
  2931. * map to be initialized. Zeroout everything except the map
  2932. * range.
  2933. */
  2934. loff_t map_end = (loff_t) map->m_lblk + map->m_len;
  2935. loff_t ex_end = (loff_t) ee_block + ee_len;
  2936. if (!is_unwrit)
  2937. /* Shouldn't happen. Just exit */
  2938. return -EINVAL;
  2939. /* zeroout left */
  2940. if (map->m_lblk > ee_block) {
  2941. lblk = ee_block;
  2942. len = map->m_lblk - ee_block;
  2943. pblk = ext4_ext_pblock(ex);
  2944. err = ext4_issue_zeroout(inode, lblk, pblk, len);
  2945. if (err)
  2946. /* ZEROOUT failed, just return original error */
  2947. return err;
  2948. }
  2949. /* zeroout right */
  2950. if (map_end < ex_end) {
  2951. lblk = map_end;
  2952. len = ex_end - map_end;
  2953. pblk = ext4_ext_pblock(ex) + (map_end - ee_block);
  2954. err = ext4_issue_zeroout(inode, lblk, pblk, len);
  2955. if (err)
  2956. /* ZEROOUT failed, just return original error */
  2957. return err;
  2958. }
  2959. } else if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
  2960. /*
  2961. * EXT4_GET_BLOCKS_CONVERT_UNWRITTEN: Caller wants the
  2962. * range specified by map to be marked unwritten.
  2963. * Zeroout the map range leaving rest as it is.
  2964. */
  2965. if (is_unwrit)
  2966. /* Shouldn't happen. Just exit */
  2967. return -EINVAL;
  2968. lblk = map->m_lblk;
  2969. len = map->m_len;
  2970. pblk = ext4_ext_pblock(ex) + (map->m_lblk - ee_block);
  2971. err = ext4_issue_zeroout(inode, lblk, pblk, len);
  2972. if (err)
  2973. /* ZEROOUT failed, just return original error */
  2974. return err;
  2975. } else {
  2976. /*
  2977. * We no longer perform unwritten to unwritten splits in IO paths.
  2978. * Hence this should not happen.
  2979. */
  2980. WARN_ON_ONCE(true);
  2981. return -EINVAL;
  2982. }
  2983. err = ext4_ext_get_access(handle, inode, path + depth);
  2984. if (err)
  2985. return err;
  2986. ext4_ext_mark_initialized(ex);
  2987. err = ext4_ext_dirty(handle, inode, path + depth);
  2988. if (err)
  2989. return err;
  2990. return 0;
  2991. }
  2992. /*
  2993. * ext4_split_extent() splits an extent and mark extent which is covered
  2994. * by @map as split_flags indicates
  2995. *
  2996. * It may result in splitting the extent into multiple extents (up to three)
  2997. * There are three possibilities:
  2998. * a> There is no split required
  2999. * b> Splits in two extents: Split is happening at either end of the extent
  3000. * c> Splits in three extents: Somone is splitting in middle of the extent
  3001. *
  3002. */
  3003. static struct ext4_ext_path *ext4_split_extent(handle_t *handle,
  3004. struct inode *inode,
  3005. struct ext4_ext_path *path,
  3006. struct ext4_map_blocks *map,
  3007. int split_flag, int flags,
  3008. unsigned int *allocated, bool *did_zeroout)
  3009. {
  3010. ext4_lblk_t ee_block, orig_ee_block;
  3011. struct ext4_extent *ex;
  3012. unsigned int ee_len, orig_ee_len, depth;
  3013. int unwritten, orig_unwritten;
  3014. int orig_err = 0;
  3015. depth = ext_depth(inode);
  3016. ex = path[depth].p_ext;
  3017. ee_block = le32_to_cpu(ex->ee_block);
  3018. ee_len = ext4_ext_get_actual_len(ex);
  3019. unwritten = ext4_ext_is_unwritten(ex);
  3020. orig_ee_block = ee_block;
  3021. orig_ee_len = ee_len;
  3022. orig_unwritten = unwritten;
  3023. /* Do not cache extents that are in the process of being modified. */
  3024. flags |= EXT4_EX_NOCACHE;
  3025. if (map->m_lblk + map->m_len < ee_block + ee_len) {
  3026. path = ext4_split_extent_at(handle, inode, path,
  3027. map->m_lblk + map->m_len, flags);
  3028. if (IS_ERR(path))
  3029. goto try_zeroout;
  3030. /*
  3031. * Update path is required because previous ext4_split_extent_at
  3032. * may result in split of original leaf or extent zeroout.
  3033. */
  3034. path = ext4_find_extent(inode, map->m_lblk, path, flags);
  3035. if (IS_ERR(path))
  3036. goto try_zeroout;
  3037. depth = ext_depth(inode);
  3038. ex = path[depth].p_ext;
  3039. if (!ex) {
  3040. EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
  3041. (unsigned long) map->m_lblk);
  3042. ext4_free_ext_path(path);
  3043. return ERR_PTR(-EFSCORRUPTED);
  3044. }
  3045. /* extent would have changed so update original values */
  3046. orig_ee_block = le32_to_cpu(ex->ee_block);
  3047. orig_ee_len = ext4_ext_get_actual_len(ex);
  3048. orig_unwritten = ext4_ext_is_unwritten(ex);
  3049. }
  3050. if (map->m_lblk >= ee_block) {
  3051. path = ext4_split_extent_at(handle, inode, path, map->m_lblk,
  3052. flags);
  3053. if (IS_ERR(path))
  3054. goto try_zeroout;
  3055. }
  3056. goto success;
  3057. try_zeroout:
  3058. /*
  3059. * There was an error in splitting the extent. So instead, just zeroout
  3060. * unwritten portions and convert it to initialized as a last resort. If
  3061. * there is any failure here we just return the original error
  3062. */
  3063. orig_err = PTR_ERR(path);
  3064. if (orig_err != -ENOSPC && orig_err != -EDQUOT && orig_err != -ENOMEM)
  3065. goto out_orig_err;
  3066. /* we can't zeroout? just return the original err */
  3067. if (!(split_flag & EXT4_EXT_MAY_ZEROOUT))
  3068. goto out_orig_err;
  3069. if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
  3070. int max_zeroout_blks =
  3071. EXT4_SB(inode->i_sb)->s_extent_max_zeroout_kb >>
  3072. (inode->i_sb->s_blocksize_bits - 10);
  3073. if (map->m_len > max_zeroout_blks)
  3074. goto out_orig_err;
  3075. }
  3076. path = ext4_find_extent(inode, map->m_lblk, NULL, flags);
  3077. if (IS_ERR(path))
  3078. goto out_orig_err;
  3079. depth = ext_depth(inode);
  3080. ex = path[depth].p_ext;
  3081. ee_block = le32_to_cpu(ex->ee_block);
  3082. ee_len = ext4_ext_get_actual_len(ex);
  3083. unwritten = ext4_ext_is_unwritten(ex);
  3084. /* extent to zeroout should have been unchanged but its not */
  3085. if (WARN_ON(ee_block != orig_ee_block || ee_len != orig_ee_len ||
  3086. unwritten != orig_unwritten))
  3087. goto out_free_path;
  3088. if (ext4_split_extent_zeroout(handle, inode, path, map, flags))
  3089. goto out_free_path;
  3090. /* zeroout succeeded */
  3091. if (did_zeroout)
  3092. *did_zeroout = true;
  3093. success:
  3094. if (allocated) {
  3095. if (map->m_lblk + map->m_len > ee_block + ee_len)
  3096. *allocated = ee_len - (map->m_lblk - ee_block);
  3097. else
  3098. *allocated = map->m_len;
  3099. }
  3100. ext4_ext_show_leaf(inode, path);
  3101. return path;
  3102. out_free_path:
  3103. ext4_free_ext_path(path);
  3104. out_orig_err:
  3105. return ERR_PTR(orig_err);
  3106. }
  3107. /*
  3108. * This function is called by ext4_ext_map_blocks() if someone tries to write
  3109. * to an unwritten extent. It may result in splitting the unwritten
  3110. * extent into multiple extents (up to three - one initialized and two
  3111. * unwritten).
  3112. * There are three possibilities:
  3113. * a> There is no split required: Entire extent should be initialized
  3114. * b> Splits in two extents: Write is happening at either end of the extent
  3115. * c> Splits in three extents: Somone is writing in middle of the extent
  3116. *
  3117. * Pre-conditions:
  3118. * - The extent pointed to by 'path' is unwritten.
  3119. * - The extent pointed to by 'path' contains a superset
  3120. * of the logical span [map->m_lblk, map->m_lblk + map->m_len).
  3121. *
  3122. * Post-conditions on success:
  3123. * - the returned value is the number of blocks beyond map->l_lblk
  3124. * that are allocated and initialized.
  3125. * It is guaranteed to be >= map->m_len.
  3126. */
  3127. static struct ext4_ext_path *
  3128. ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode,
  3129. struct ext4_map_blocks *map, struct ext4_ext_path *path,
  3130. int flags, unsigned int *allocated)
  3131. {
  3132. struct ext4_sb_info *sbi;
  3133. struct ext4_extent_header *eh;
  3134. struct ext4_map_blocks split_map;
  3135. struct ext4_extent zero_ex1, zero_ex2;
  3136. struct ext4_extent *ex, *abut_ex;
  3137. ext4_lblk_t ee_block, eof_block;
  3138. unsigned int ee_len, depth, map_len = map->m_len;
  3139. int err = 0;
  3140. unsigned int max_zeroout = 0;
  3141. ext_debug(inode, "logical block %llu, max_blocks %u\n",
  3142. (unsigned long long)map->m_lblk, map_len);
  3143. sbi = EXT4_SB(inode->i_sb);
  3144. eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
  3145. >> inode->i_sb->s_blocksize_bits;
  3146. if (eof_block < map->m_lblk + map_len)
  3147. eof_block = map->m_lblk + map_len;
  3148. depth = ext_depth(inode);
  3149. eh = path[depth].p_hdr;
  3150. ex = path[depth].p_ext;
  3151. ee_block = le32_to_cpu(ex->ee_block);
  3152. ee_len = ext4_ext_get_actual_len(ex);
  3153. zero_ex1.ee_len = 0;
  3154. zero_ex2.ee_len = 0;
  3155. trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
  3156. /* Pre-conditions */
  3157. BUG_ON(!ext4_ext_is_unwritten(ex));
  3158. BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
  3159. /*
  3160. * Attempt to transfer newly initialized blocks from the currently
  3161. * unwritten extent to its neighbor. This is much cheaper
  3162. * than an insertion followed by a merge as those involve costly
  3163. * memmove() calls. Transferring to the left is the common case in
  3164. * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
  3165. * followed by append writes.
  3166. *
  3167. * Limitations of the current logic:
  3168. * - L1: we do not deal with writes covering the whole extent.
  3169. * This would require removing the extent if the transfer
  3170. * is possible.
  3171. * - L2: we only attempt to merge with an extent stored in the
  3172. * same extent tree node.
  3173. */
  3174. *allocated = 0;
  3175. if ((map->m_lblk == ee_block) &&
  3176. /* See if we can merge left */
  3177. (map_len < ee_len) && /*L1*/
  3178. (ex > EXT_FIRST_EXTENT(eh))) { /*L2*/
  3179. ext4_lblk_t prev_lblk;
  3180. ext4_fsblk_t prev_pblk, ee_pblk;
  3181. unsigned int prev_len;
  3182. abut_ex = ex - 1;
  3183. prev_lblk = le32_to_cpu(abut_ex->ee_block);
  3184. prev_len = ext4_ext_get_actual_len(abut_ex);
  3185. prev_pblk = ext4_ext_pblock(abut_ex);
  3186. ee_pblk = ext4_ext_pblock(ex);
  3187. /*
  3188. * A transfer of blocks from 'ex' to 'abut_ex' is allowed
  3189. * upon those conditions:
  3190. * - C1: abut_ex is initialized,
  3191. * - C2: abut_ex is logically abutting ex,
  3192. * - C3: abut_ex is physically abutting ex,
  3193. * - C4: abut_ex can receive the additional blocks without
  3194. * overflowing the (initialized) length limit.
  3195. */
  3196. if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
  3197. ((prev_lblk + prev_len) == ee_block) && /*C2*/
  3198. ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
  3199. (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
  3200. err = ext4_ext_get_access(handle, inode, path + depth);
  3201. if (err)
  3202. goto errout;
  3203. trace_ext4_ext_convert_to_initialized_fastpath(inode,
  3204. map, ex, abut_ex);
  3205. /* Shift the start of ex by 'map_len' blocks */
  3206. ex->ee_block = cpu_to_le32(ee_block + map_len);
  3207. ext4_ext_store_pblock(ex, ee_pblk + map_len);
  3208. ex->ee_len = cpu_to_le16(ee_len - map_len);
  3209. ext4_ext_mark_unwritten(ex); /* Restore the flag */
  3210. /* Extend abut_ex by 'map_len' blocks */
  3211. abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
  3212. /* Result: number of initialized blocks past m_lblk */
  3213. *allocated = map_len;
  3214. }
  3215. } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
  3216. (map_len < ee_len) && /*L1*/
  3217. ex < EXT_LAST_EXTENT(eh)) { /*L2*/
  3218. /* See if we can merge right */
  3219. ext4_lblk_t next_lblk;
  3220. ext4_fsblk_t next_pblk, ee_pblk;
  3221. unsigned int next_len;
  3222. abut_ex = ex + 1;
  3223. next_lblk = le32_to_cpu(abut_ex->ee_block);
  3224. next_len = ext4_ext_get_actual_len(abut_ex);
  3225. next_pblk = ext4_ext_pblock(abut_ex);
  3226. ee_pblk = ext4_ext_pblock(ex);
  3227. /*
  3228. * A transfer of blocks from 'ex' to 'abut_ex' is allowed
  3229. * upon those conditions:
  3230. * - C1: abut_ex is initialized,
  3231. * - C2: abut_ex is logically abutting ex,
  3232. * - C3: abut_ex is physically abutting ex,
  3233. * - C4: abut_ex can receive the additional blocks without
  3234. * overflowing the (initialized) length limit.
  3235. */
  3236. if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
  3237. ((map->m_lblk + map_len) == next_lblk) && /*C2*/
  3238. ((ee_pblk + ee_len) == next_pblk) && /*C3*/
  3239. (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
  3240. err = ext4_ext_get_access(handle, inode, path + depth);
  3241. if (err)
  3242. goto errout;
  3243. trace_ext4_ext_convert_to_initialized_fastpath(inode,
  3244. map, ex, abut_ex);
  3245. /* Shift the start of abut_ex by 'map_len' blocks */
  3246. abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
  3247. ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
  3248. ex->ee_len = cpu_to_le16(ee_len - map_len);
  3249. ext4_ext_mark_unwritten(ex); /* Restore the flag */
  3250. /* Extend abut_ex by 'map_len' blocks */
  3251. abut_ex->ee_len = cpu_to_le16(next_len + map_len);
  3252. /* Result: number of initialized blocks past m_lblk */
  3253. *allocated = map_len;
  3254. }
  3255. }
  3256. if (*allocated) {
  3257. /* Mark the block containing both extents as dirty */
  3258. err = ext4_ext_dirty(handle, inode, path + depth);
  3259. /* Update path to point to the right extent */
  3260. path[depth].p_ext = abut_ex;
  3261. if (err)
  3262. goto errout;
  3263. goto out;
  3264. } else
  3265. *allocated = ee_len - (map->m_lblk - ee_block);
  3266. WARN_ON(map->m_lblk < ee_block);
  3267. /*
  3268. * It is safe to convert extent to initialized via explicit
  3269. * zeroout only if extent is fully inside i_size or new_size.
  3270. */
  3271. if (ee_block + ee_len <= eof_block)
  3272. max_zeroout = sbi->s_extent_max_zeroout_kb >>
  3273. (inode->i_sb->s_blocksize_bits - 10);
  3274. /*
  3275. * five cases:
  3276. * 1. split the extent into three extents.
  3277. * 2. split the extent into two extents, zeroout the head of the first
  3278. * extent.
  3279. * 3. split the extent into two extents, zeroout the tail of the second
  3280. * extent.
  3281. * 4. split the extent into two extents with out zeroout.
  3282. * 5. no splitting needed, just possibly zeroout the head and / or the
  3283. * tail of the extent.
  3284. */
  3285. split_map.m_lblk = map->m_lblk;
  3286. split_map.m_len = map->m_len;
  3287. if (max_zeroout && (*allocated > split_map.m_len)) {
  3288. if (*allocated <= max_zeroout) {
  3289. /* case 3 or 5 */
  3290. zero_ex1.ee_block =
  3291. cpu_to_le32(split_map.m_lblk +
  3292. split_map.m_len);
  3293. zero_ex1.ee_len =
  3294. cpu_to_le16(*allocated - split_map.m_len);
  3295. ext4_ext_store_pblock(&zero_ex1,
  3296. ext4_ext_pblock(ex) + split_map.m_lblk +
  3297. split_map.m_len - ee_block);
  3298. err = ext4_ext_zeroout(inode, &zero_ex1);
  3299. if (err)
  3300. goto fallback;
  3301. split_map.m_len = *allocated;
  3302. }
  3303. if (split_map.m_lblk - ee_block + split_map.m_len <
  3304. max_zeroout) {
  3305. /* case 2 or 5 */
  3306. if (split_map.m_lblk != ee_block) {
  3307. zero_ex2.ee_block = ex->ee_block;
  3308. zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk -
  3309. ee_block);
  3310. ext4_ext_store_pblock(&zero_ex2,
  3311. ext4_ext_pblock(ex));
  3312. err = ext4_ext_zeroout(inode, &zero_ex2);
  3313. if (err)
  3314. goto fallback;
  3315. }
  3316. split_map.m_len += split_map.m_lblk - ee_block;
  3317. split_map.m_lblk = ee_block;
  3318. *allocated = map->m_len;
  3319. }
  3320. }
  3321. fallback:
  3322. path = ext4_split_convert_extents(handle, inode, &split_map, path,
  3323. flags | EXT4_GET_BLOCKS_CONVERT, NULL);
  3324. if (IS_ERR(path))
  3325. return path;
  3326. out:
  3327. /* If we have gotten a failure, don't zero out status tree */
  3328. ext4_zeroout_es(inode, &zero_ex1);
  3329. ext4_zeroout_es(inode, &zero_ex2);
  3330. return path;
  3331. errout:
  3332. ext4_free_ext_path(path);
  3333. return ERR_PTR(err);
  3334. }
  3335. /*
  3336. * This function is called by ext4_ext_map_blocks() from
  3337. * ext4_get_blocks_dio_write() when DIO to write
  3338. * to an unwritten extent.
  3339. *
  3340. * Writing to an unwritten extent may result in splitting the unwritten
  3341. * extent into multiple initialized/unwritten extents (up to three)
  3342. * There are three possibilities:
  3343. * a> There is no split required: Entire extent should be unwritten
  3344. * b> Splits in two extents: Write is happening at either end of the extent
  3345. * c> Splits in three extents: Somone is writing in middle of the extent
  3346. *
  3347. * This works the same way in the case of initialized -> unwritten conversion.
  3348. *
  3349. * One of more index blocks maybe needed if the extent tree grow after
  3350. * the unwritten extent split. To prevent ENOSPC occur at the IO
  3351. * complete, we need to split the unwritten extent before DIO submit
  3352. * the IO. The unwritten extent called at this time will be split
  3353. * into three unwritten extent(at most). After IO complete, the part
  3354. * being filled will be convert to initialized by the end_io callback function
  3355. * via ext4_convert_unwritten_extents().
  3356. *
  3357. * The size of unwritten extent to be written is passed to the caller via the
  3358. * allocated pointer. Return an extent path pointer on success, or an error
  3359. * pointer on failure.
  3360. */
  3361. static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle,
  3362. struct inode *inode,
  3363. struct ext4_map_blocks *map,
  3364. struct ext4_ext_path *path,
  3365. int flags, unsigned int *allocated)
  3366. {
  3367. ext4_lblk_t eof_block;
  3368. ext4_lblk_t ee_block;
  3369. struct ext4_extent *ex;
  3370. unsigned int ee_len;
  3371. int split_flag = 0, depth, err = 0;
  3372. bool did_zeroout = false;
  3373. ext_debug(inode, "logical block %llu, max_blocks %u\n",
  3374. (unsigned long long)map->m_lblk, map->m_len);
  3375. eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
  3376. >> inode->i_sb->s_blocksize_bits;
  3377. if (eof_block < map->m_lblk + map->m_len)
  3378. eof_block = map->m_lblk + map->m_len;
  3379. depth = ext_depth(inode);
  3380. ex = path[depth].p_ext;
  3381. ee_block = le32_to_cpu(ex->ee_block);
  3382. ee_len = ext4_ext_get_actual_len(ex);
  3383. /* No split needed */
  3384. if (ee_block == map->m_lblk && ee_len == map->m_len)
  3385. goto convert;
  3386. /*
  3387. * It is only safe to convert extent to initialized via explicit
  3388. * zeroout only if extent is fully inside i_size or new_size.
  3389. */
  3390. split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
  3391. /*
  3392. * pass SPLIT_NOMERGE explicitly so we don't end up merging extents we
  3393. * just split.
  3394. */
  3395. path = ext4_split_extent(handle, inode, path, map, split_flag,
  3396. flags | EXT4_GET_BLOCKS_SPLIT_NOMERGE,
  3397. allocated, &did_zeroout);
  3398. if (IS_ERR(path))
  3399. return path;
  3400. convert:
  3401. path = ext4_find_extent(inode, map->m_lblk, path, flags);
  3402. if (IS_ERR(path))
  3403. return path;
  3404. depth = ext_depth(inode);
  3405. ex = path[depth].p_ext;
  3406. /*
  3407. * Conversion is already handled in case of zeroout
  3408. */
  3409. if (!did_zeroout) {
  3410. err = ext4_ext_get_access(handle, inode, path + depth);
  3411. if (err)
  3412. goto err;
  3413. if (flags & EXT4_GET_BLOCKS_CONVERT)
  3414. ext4_ext_mark_initialized(ex);
  3415. else if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)
  3416. ext4_ext_mark_unwritten(ex);
  3417. if (!(flags & EXT4_GET_BLOCKS_SPLIT_NOMERGE))
  3418. /*
  3419. * note: ext4_ext_correct_indexes() isn't needed here because
  3420. * borders are not changed
  3421. */
  3422. ext4_ext_try_to_merge(handle, inode, path, ex);
  3423. err = ext4_ext_dirty(handle, inode, path + depth);
  3424. if (err)
  3425. goto err;
  3426. }
  3427. /* Lets update the extent status tree after conversion */
  3428. if (!(flags & EXT4_EX_NOCACHE))
  3429. ext4_es_insert_extent(inode, le32_to_cpu(ex->ee_block),
  3430. ext4_ext_get_actual_len(ex),
  3431. ext4_ext_pblock(ex),
  3432. ext4_ext_is_unwritten(ex) ?
  3433. EXTENT_STATUS_UNWRITTEN :
  3434. EXTENT_STATUS_WRITTEN,
  3435. false);
  3436. err:
  3437. if (err) {
  3438. ext4_free_ext_path(path);
  3439. return ERR_PTR(err);
  3440. }
  3441. return path;
  3442. }
  3443. static struct ext4_ext_path *
  3444. ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode,
  3445. struct ext4_map_blocks *map,
  3446. struct ext4_ext_path *path, int flags)
  3447. {
  3448. struct ext4_extent *ex;
  3449. ext4_lblk_t ee_block;
  3450. unsigned int ee_len;
  3451. int depth;
  3452. depth = ext_depth(inode);
  3453. ex = path[depth].p_ext;
  3454. ee_block = le32_to_cpu(ex->ee_block);
  3455. ee_len = ext4_ext_get_actual_len(ex);
  3456. ext_debug(inode, "logical block %llu, max_blocks %u\n",
  3457. (unsigned long long)ee_block, ee_len);
  3458. return ext4_split_convert_extents(handle, inode, map, path, flags,
  3459. NULL);
  3460. }
  3461. static struct ext4_ext_path *
  3462. convert_initialized_extent(handle_t *handle, struct inode *inode,
  3463. struct ext4_map_blocks *map,
  3464. struct ext4_ext_path *path,
  3465. int flags,
  3466. unsigned int *allocated)
  3467. {
  3468. struct ext4_extent *ex;
  3469. ext4_lblk_t ee_block;
  3470. unsigned int ee_len;
  3471. int depth;
  3472. /*
  3473. * Make sure that the extent is no bigger than we support with
  3474. * unwritten extent
  3475. */
  3476. if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
  3477. map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
  3478. depth = ext_depth(inode);
  3479. ex = path[depth].p_ext;
  3480. ee_block = le32_to_cpu(ex->ee_block);
  3481. ee_len = ext4_ext_get_actual_len(ex);
  3482. ext_debug(inode, "logical block %llu, max_blocks %u\n",
  3483. (unsigned long long)ee_block, ee_len);
  3484. path = ext4_split_convert_extents(handle, inode, map, path, flags,
  3485. NULL);
  3486. if (IS_ERR(path))
  3487. return path;
  3488. ext4_ext_show_leaf(inode, path);
  3489. ext4_update_inode_fsync_trans(handle, inode, 1);
  3490. /*
  3491. * The extent might be initialized in case of zeroout.
  3492. */
  3493. path = ext4_find_extent(inode, map->m_lblk, path, flags);
  3494. if (IS_ERR(path))
  3495. return path;
  3496. depth = ext_depth(inode);
  3497. ex = path[depth].p_ext;
  3498. if (ext4_ext_is_unwritten(ex))
  3499. map->m_flags |= EXT4_MAP_UNWRITTEN;
  3500. else
  3501. map->m_flags |= EXT4_MAP_MAPPED;
  3502. if (*allocated > map->m_len)
  3503. *allocated = map->m_len;
  3504. map->m_len = *allocated;
  3505. return path;
  3506. }
  3507. static struct ext4_ext_path *
  3508. ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
  3509. struct ext4_map_blocks *map,
  3510. struct ext4_ext_path *path, int flags,
  3511. unsigned int *allocated, ext4_fsblk_t newblock)
  3512. {
  3513. int err = 0;
  3514. ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n",
  3515. (unsigned long long)map->m_lblk, map->m_len, flags,
  3516. *allocated);
  3517. ext4_ext_show_leaf(inode, path);
  3518. /*
  3519. * When writing into unwritten space, we should not fail to
  3520. * allocate metadata blocks for the new extent block if needed.
  3521. */
  3522. flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
  3523. trace_ext4_ext_handle_unwritten_extents(inode, map, flags,
  3524. *allocated, newblock);
  3525. /* IO end_io complete, convert the filled extent to written */
  3526. if (flags & EXT4_GET_BLOCKS_CONVERT) {
  3527. path = ext4_convert_unwritten_extents_endio(handle, inode,
  3528. map, path, flags);
  3529. if (IS_ERR(path))
  3530. return path;
  3531. ext4_update_inode_fsync_trans(handle, inode, 1);
  3532. goto map_out;
  3533. }
  3534. /* buffered IO cases */
  3535. /*
  3536. * repeat fallocate creation request
  3537. * we already have an unwritten extent
  3538. */
  3539. if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
  3540. map->m_flags |= EXT4_MAP_UNWRITTEN;
  3541. goto map_out;
  3542. }
  3543. /* buffered READ or buffered write_begin() lookup */
  3544. if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
  3545. /*
  3546. * We have blocks reserved already. We
  3547. * return allocated blocks so that delalloc
  3548. * won't do block reservation for us. But
  3549. * the buffer head will be unmapped so that
  3550. * a read from the block returns 0s.
  3551. */
  3552. map->m_flags |= EXT4_MAP_UNWRITTEN;
  3553. goto out1;
  3554. }
  3555. /*
  3556. * Default case when (flags & EXT4_GET_BLOCKS_CREATE) == 1.
  3557. * For buffered writes, at writepage time, etc. Convert a
  3558. * discovered unwritten extent to written.
  3559. */
  3560. path = ext4_ext_convert_to_initialized(handle, inode, map, path,
  3561. flags, allocated);
  3562. if (IS_ERR(path))
  3563. return path;
  3564. ext4_update_inode_fsync_trans(handle, inode, 1);
  3565. /*
  3566. * shouldn't get a 0 allocated when converting an unwritten extent
  3567. * unless m_len is 0 (bug) or extent has been corrupted
  3568. */
  3569. if (unlikely(*allocated == 0)) {
  3570. EXT4_ERROR_INODE(inode, "unexpected allocated == 0, m_len = %u",
  3571. map->m_len);
  3572. err = -EFSCORRUPTED;
  3573. goto errout;
  3574. }
  3575. map->m_flags |= EXT4_MAP_NEW;
  3576. map_out:
  3577. map->m_flags |= EXT4_MAP_MAPPED;
  3578. out1:
  3579. map->m_pblk = newblock;
  3580. if (*allocated > map->m_len)
  3581. *allocated = map->m_len;
  3582. map->m_len = *allocated;
  3583. ext4_ext_show_leaf(inode, path);
  3584. return path;
  3585. errout:
  3586. ext4_free_ext_path(path);
  3587. return ERR_PTR(err);
  3588. }
  3589. /*
  3590. * get_implied_cluster_alloc - check to see if the requested
  3591. * allocation (in the map structure) overlaps with a cluster already
  3592. * allocated in an extent.
  3593. * @sb The filesystem superblock structure
  3594. * @map The requested lblk->pblk mapping
  3595. * @ex The extent structure which might contain an implied
  3596. * cluster allocation
  3597. *
  3598. * This function is called by ext4_ext_map_blocks() after we failed to
  3599. * find blocks that were already in the inode's extent tree. Hence,
  3600. * we know that the beginning of the requested region cannot overlap
  3601. * the extent from the inode's extent tree. There are three cases we
  3602. * want to catch. The first is this case:
  3603. *
  3604. * |--- cluster # N--|
  3605. * |--- extent ---| |---- requested region ---|
  3606. * |==========|
  3607. *
  3608. * The second case that we need to test for is this one:
  3609. *
  3610. * |--------- cluster # N ----------------|
  3611. * |--- requested region --| |------- extent ----|
  3612. * |=======================|
  3613. *
  3614. * The third case is when the requested region lies between two extents
  3615. * within the same cluster:
  3616. * |------------- cluster # N-------------|
  3617. * |----- ex -----| |---- ex_right ----|
  3618. * |------ requested region ------|
  3619. * |================|
  3620. *
  3621. * In each of the above cases, we need to set the map->m_pblk and
  3622. * map->m_len so it corresponds to the return the extent labelled as
  3623. * "|====|" from cluster #N, since it is already in use for data in
  3624. * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to
  3625. * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
  3626. * as a new "allocated" block region. Otherwise, we will return 0 and
  3627. * ext4_ext_map_blocks() will then allocate one or more new clusters
  3628. * by calling ext4_mb_new_blocks().
  3629. */
  3630. static int get_implied_cluster_alloc(struct super_block *sb,
  3631. struct ext4_map_blocks *map,
  3632. struct ext4_extent *ex,
  3633. struct ext4_ext_path *path)
  3634. {
  3635. struct ext4_sb_info *sbi = EXT4_SB(sb);
  3636. ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
  3637. ext4_lblk_t ex_cluster_start, ex_cluster_end;
  3638. ext4_lblk_t rr_cluster_start;
  3639. ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
  3640. ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
  3641. unsigned short ee_len = ext4_ext_get_actual_len(ex);
  3642. /* The extent passed in that we are trying to match */
  3643. ex_cluster_start = EXT4_B2C(sbi, ee_block);
  3644. ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
  3645. /* The requested region passed into ext4_map_blocks() */
  3646. rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
  3647. if ((rr_cluster_start == ex_cluster_end) ||
  3648. (rr_cluster_start == ex_cluster_start)) {
  3649. if (rr_cluster_start == ex_cluster_end)
  3650. ee_start += ee_len - 1;
  3651. map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
  3652. map->m_len = min(map->m_len,
  3653. (unsigned) sbi->s_cluster_ratio - c_offset);
  3654. /*
  3655. * Check for and handle this case:
  3656. *
  3657. * |--------- cluster # N-------------|
  3658. * |------- extent ----|
  3659. * |--- requested region ---|
  3660. * |===========|
  3661. */
  3662. if (map->m_lblk < ee_block)
  3663. map->m_len = min(map->m_len, ee_block - map->m_lblk);
  3664. /*
  3665. * Check for the case where there is already another allocated
  3666. * block to the right of 'ex' but before the end of the cluster.
  3667. *
  3668. * |------------- cluster # N-------------|
  3669. * |----- ex -----| |---- ex_right ----|
  3670. * |------ requested region ------|
  3671. * |================|
  3672. */
  3673. if (map->m_lblk > ee_block) {
  3674. ext4_lblk_t next = ext4_ext_next_allocated_block(path);
  3675. map->m_len = min(map->m_len, next - map->m_lblk);
  3676. }
  3677. trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
  3678. return 1;
  3679. }
  3680. trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
  3681. return 0;
  3682. }
  3683. /*
  3684. * Determine hole length around the given logical block, first try to
  3685. * locate and expand the hole from the given @path, and then adjust it
  3686. * if it's partially or completely converted to delayed extents, insert
  3687. * it into the extent cache tree if it's indeed a hole, finally return
  3688. * the length of the determined extent.
  3689. */
  3690. static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode,
  3691. struct ext4_ext_path *path,
  3692. ext4_lblk_t lblk)
  3693. {
  3694. ext4_lblk_t hole_start, len;
  3695. struct extent_status es;
  3696. hole_start = lblk;
  3697. len = ext4_ext_find_hole(inode, path, &hole_start);
  3698. again:
  3699. ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
  3700. hole_start + len - 1, &es);
  3701. if (!es.es_len)
  3702. goto insert_hole;
  3703. /*
  3704. * There's a delalloc extent in the hole, handle it if the delalloc
  3705. * extent is in front of, behind and straddle the queried range.
  3706. */
  3707. if (lblk >= es.es_lblk + es.es_len) {
  3708. /*
  3709. * The delalloc extent is in front of the queried range,
  3710. * find again from the queried start block.
  3711. */
  3712. len -= lblk - hole_start;
  3713. hole_start = lblk;
  3714. goto again;
  3715. } else if (in_range(lblk, es.es_lblk, es.es_len)) {
  3716. /*
  3717. * The delalloc extent containing lblk, it must have been
  3718. * added after ext4_map_blocks() checked the extent status
  3719. * tree so we are not holding i_rwsem and delalloc info is
  3720. * only stabilized by i_data_sem we are going to release
  3721. * soon. Don't modify the extent status tree and report
  3722. * extent as a hole, just adjust the length to the delalloc
  3723. * extent's after lblk.
  3724. */
  3725. len = es.es_lblk + es.es_len - lblk;
  3726. return len;
  3727. } else {
  3728. /*
  3729. * The delalloc extent is partially or completely behind
  3730. * the queried range, update hole length until the
  3731. * beginning of the delalloc extent.
  3732. */
  3733. len = min(es.es_lblk - hole_start, len);
  3734. }
  3735. insert_hole:
  3736. /* Put just found gap into cache to speed up subsequent requests */
  3737. ext_debug(inode, " -> %u:%u\n", hole_start, len);
  3738. ext4_es_cache_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE);
  3739. /* Update hole_len to reflect hole size after lblk */
  3740. if (hole_start != lblk)
  3741. len -= lblk - hole_start;
  3742. return len;
  3743. }
  3744. /*
  3745. * Block allocation/map/preallocation routine for extents based files
  3746. *
  3747. *
  3748. * Need to be called with
  3749. * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
  3750. * (ie, flags is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
  3751. *
  3752. * return > 0, number of blocks already mapped/allocated
  3753. * if flags doesn't contain EXT4_GET_BLOCKS_CREATE and these are pre-allocated blocks
  3754. * buffer head is unmapped
  3755. * otherwise blocks are mapped
  3756. *
  3757. * return = 0, if plain look up failed (blocks have not been allocated)
  3758. * buffer head is unmapped
  3759. *
  3760. * return < 0, error case.
  3761. */
  3762. int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
  3763. struct ext4_map_blocks *map, int flags)
  3764. {
  3765. struct ext4_ext_path *path = NULL;
  3766. struct ext4_extent newex, *ex, ex2;
  3767. struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
  3768. ext4_fsblk_t newblock = 0, pblk;
  3769. int err = 0, depth;
  3770. unsigned int allocated = 0, offset = 0;
  3771. unsigned int allocated_clusters = 0;
  3772. struct ext4_allocation_request ar;
  3773. ext4_lblk_t cluster_offset;
  3774. ext_debug(inode, "blocks %u/%u requested\n", map->m_lblk, map->m_len);
  3775. trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
  3776. /* find extent for this block */
  3777. path = ext4_find_extent(inode, map->m_lblk, NULL, flags);
  3778. if (IS_ERR(path)) {
  3779. err = PTR_ERR(path);
  3780. goto out;
  3781. }
  3782. depth = ext_depth(inode);
  3783. /*
  3784. * consistent leaf must not be empty;
  3785. * this situation is possible, though, _during_ tree modification;
  3786. * this is why assert can't be put in ext4_find_extent()
  3787. */
  3788. if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
  3789. EXT4_ERROR_INODE(inode, "bad extent address "
  3790. "lblock: %lu, depth: %d pblock %lld",
  3791. (unsigned long) map->m_lblk, depth,
  3792. path[depth].p_block);
  3793. err = -EFSCORRUPTED;
  3794. goto out;
  3795. }
  3796. ex = path[depth].p_ext;
  3797. if (ex) {
  3798. ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
  3799. ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
  3800. unsigned short ee_len;
  3801. /*
  3802. * unwritten extents are treated as holes, except that
  3803. * we split out initialized portions during a write.
  3804. */
  3805. ee_len = ext4_ext_get_actual_len(ex);
  3806. trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
  3807. /* if found extent covers block, simply return it */
  3808. if (in_range(map->m_lblk, ee_block, ee_len)) {
  3809. newblock = map->m_lblk - ee_block + ee_start;
  3810. /* number of remaining blocks in the extent */
  3811. allocated = ee_len - (map->m_lblk - ee_block);
  3812. ext_debug(inode, "%u fit into %u:%d -> %llu\n",
  3813. map->m_lblk, ee_block, ee_len, newblock);
  3814. /*
  3815. * If the extent is initialized check whether the
  3816. * caller wants to convert it to unwritten.
  3817. */
  3818. if ((!ext4_ext_is_unwritten(ex)) &&
  3819. (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
  3820. path = convert_initialized_extent(handle,
  3821. inode, map, path, flags, &allocated);
  3822. if (IS_ERR(path))
  3823. err = PTR_ERR(path);
  3824. goto out;
  3825. } else if (!ext4_ext_is_unwritten(ex)) {
  3826. map->m_flags |= EXT4_MAP_MAPPED;
  3827. map->m_pblk = newblock;
  3828. if (allocated > map->m_len)
  3829. allocated = map->m_len;
  3830. map->m_len = allocated;
  3831. ext4_ext_show_leaf(inode, path);
  3832. goto out;
  3833. }
  3834. path = ext4_ext_handle_unwritten_extents(
  3835. handle, inode, map, path, flags,
  3836. &allocated, newblock);
  3837. if (IS_ERR(path))
  3838. err = PTR_ERR(path);
  3839. goto out;
  3840. }
  3841. }
  3842. /*
  3843. * requested block isn't allocated yet;
  3844. * we couldn't try to create block if flags doesn't contain EXT4_GET_BLOCKS_CREATE
  3845. */
  3846. if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
  3847. ext4_lblk_t len;
  3848. len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk);
  3849. map->m_pblk = 0;
  3850. map->m_len = min_t(unsigned int, map->m_len, len);
  3851. goto out;
  3852. }
  3853. /*
  3854. * Okay, we need to do block allocation.
  3855. */
  3856. newex.ee_block = cpu_to_le32(map->m_lblk);
  3857. cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
  3858. /*
  3859. * If we are doing bigalloc, check to see if the extent returned
  3860. * by ext4_find_extent() implies a cluster we can use.
  3861. */
  3862. if (cluster_offset && ex &&
  3863. get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
  3864. ar.len = allocated = map->m_len;
  3865. newblock = map->m_pblk;
  3866. goto got_allocated_blocks;
  3867. }
  3868. /* find neighbour allocated blocks */
  3869. ar.lleft = map->m_lblk;
  3870. err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
  3871. if (err)
  3872. goto out;
  3873. ar.lright = map->m_lblk;
  3874. err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright,
  3875. &ex2, flags);
  3876. if (err < 0)
  3877. goto out;
  3878. /* Check if the extent after searching to the right implies a
  3879. * cluster we can use. */
  3880. if ((sbi->s_cluster_ratio > 1) && err &&
  3881. get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) {
  3882. ar.len = allocated = map->m_len;
  3883. newblock = map->m_pblk;
  3884. err = 0;
  3885. goto got_allocated_blocks;
  3886. }
  3887. /*
  3888. * See if request is beyond maximum number of blocks we can have in
  3889. * a single extent. For an initialized extent this limit is
  3890. * EXT_INIT_MAX_LEN and for an unwritten extent this limit is
  3891. * EXT_UNWRITTEN_MAX_LEN.
  3892. */
  3893. if (map->m_len > EXT_INIT_MAX_LEN &&
  3894. !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
  3895. map->m_len = EXT_INIT_MAX_LEN;
  3896. else if (map->m_len > EXT_UNWRITTEN_MAX_LEN &&
  3897. (flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
  3898. map->m_len = EXT_UNWRITTEN_MAX_LEN;
  3899. /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
  3900. newex.ee_len = cpu_to_le16(map->m_len);
  3901. err = ext4_ext_check_overlap(sbi, inode, &newex, path);
  3902. if (err)
  3903. allocated = ext4_ext_get_actual_len(&newex);
  3904. else
  3905. allocated = map->m_len;
  3906. /* allocate new block */
  3907. ar.inode = inode;
  3908. ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
  3909. ar.logical = map->m_lblk;
  3910. /*
  3911. * We calculate the offset from the beginning of the cluster
  3912. * for the logical block number, since when we allocate a
  3913. * physical cluster, the physical block should start at the
  3914. * same offset from the beginning of the cluster. This is
  3915. * needed so that future calls to get_implied_cluster_alloc()
  3916. * work correctly.
  3917. */
  3918. offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
  3919. ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
  3920. ar.goal -= offset;
  3921. ar.logical -= offset;
  3922. if (S_ISREG(inode->i_mode))
  3923. ar.flags = EXT4_MB_HINT_DATA;
  3924. else
  3925. /* disable in-core preallocation for non-regular files */
  3926. ar.flags = 0;
  3927. if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
  3928. ar.flags |= EXT4_MB_HINT_NOPREALLOC;
  3929. if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
  3930. ar.flags |= EXT4_MB_DELALLOC_RESERVED;
  3931. if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
  3932. ar.flags |= EXT4_MB_USE_RESERVED;
  3933. newblock = ext4_mb_new_blocks(handle, &ar, &err);
  3934. if (!newblock)
  3935. goto out;
  3936. allocated_clusters = ar.len;
  3937. ar.len = EXT4_C2B(sbi, ar.len) - offset;
  3938. ext_debug(inode, "allocate new block: goal %llu, found %llu/%u, requested %u\n",
  3939. ar.goal, newblock, ar.len, allocated);
  3940. if (ar.len > allocated)
  3941. ar.len = allocated;
  3942. got_allocated_blocks:
  3943. /* try to insert new extent into found leaf and return */
  3944. pblk = newblock + offset;
  3945. ext4_ext_store_pblock(&newex, pblk);
  3946. newex.ee_len = cpu_to_le16(ar.len);
  3947. /* Mark unwritten */
  3948. if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
  3949. ext4_ext_mark_unwritten(&newex);
  3950. map->m_flags |= EXT4_MAP_UNWRITTEN;
  3951. }
  3952. path = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
  3953. if (IS_ERR(path)) {
  3954. err = PTR_ERR(path);
  3955. /*
  3956. * Gracefully handle out of space conditions. If the filesystem
  3957. * is inconsistent, we'll just leak allocated blocks to avoid
  3958. * causing even more damage.
  3959. */
  3960. if (allocated_clusters && (err == -EDQUOT || err == -ENOSPC)) {
  3961. int fb_flags = 0;
  3962. /*
  3963. * free data blocks we just allocated.
  3964. * not a good idea to call discard here directly,
  3965. * but otherwise we'd need to call it every free().
  3966. */
  3967. ext4_discard_preallocations(inode);
  3968. if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
  3969. fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
  3970. ext4_free_blocks(handle, inode, NULL, newblock,
  3971. EXT4_C2B(sbi, allocated_clusters),
  3972. fb_flags);
  3973. }
  3974. goto out;
  3975. }
  3976. /*
  3977. * Cache the extent and update transaction to commit on fdatasync only
  3978. * when it is _not_ an unwritten extent.
  3979. */
  3980. if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0)
  3981. ext4_update_inode_fsync_trans(handle, inode, 1);
  3982. else
  3983. ext4_update_inode_fsync_trans(handle, inode, 0);
  3984. map->m_flags |= (EXT4_MAP_NEW | EXT4_MAP_MAPPED);
  3985. map->m_pblk = pblk;
  3986. map->m_len = ar.len;
  3987. allocated = map->m_len;
  3988. ext4_ext_show_leaf(inode, path);
  3989. out:
  3990. /*
  3991. * We never use EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF with CREATE flag.
  3992. * So we know that the depth used here is correct, since there was no
  3993. * block allocation done if EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF is set.
  3994. * If tomorrow we start using this QUERY flag with CREATE, then we will
  3995. * need to re-calculate the depth as it might have changed due to block
  3996. * allocation.
  3997. */
  3998. if (flags & EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF) {
  3999. WARN_ON_ONCE(flags & EXT4_GET_BLOCKS_CREATE);
  4000. if (!err && ex && (ex == EXT_LAST_EXTENT(path[depth].p_hdr)))
  4001. map->m_flags |= EXT4_MAP_QUERY_LAST_IN_LEAF;
  4002. }
  4003. ext4_free_ext_path(path);
  4004. trace_ext4_ext_map_blocks_exit(inode, flags, map,
  4005. err ? err : allocated);
  4006. return err ? err : allocated;
  4007. }
  4008. int ext4_ext_truncate(handle_t *handle, struct inode *inode)
  4009. {
  4010. struct super_block *sb = inode->i_sb;
  4011. ext4_lblk_t last_block;
  4012. int err = 0;
  4013. /*
  4014. * TODO: optimization is possible here.
  4015. * Probably we need not scan at all,
  4016. * because page truncation is enough.
  4017. */
  4018. /* we have to know where to truncate from in crash case */
  4019. EXT4_I(inode)->i_disksize = inode->i_size;
  4020. err = ext4_mark_inode_dirty(handle, inode);
  4021. if (err)
  4022. return err;
  4023. last_block = (inode->i_size + sb->s_blocksize - 1)
  4024. >> EXT4_BLOCK_SIZE_BITS(sb);
  4025. ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);
  4026. retry_remove_space:
  4027. err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
  4028. if (err == -ENOMEM) {
  4029. memalloc_retry_wait(GFP_ATOMIC);
  4030. goto retry_remove_space;
  4031. }
  4032. return err;
  4033. }
  4034. static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
  4035. ext4_lblk_t len, loff_t new_size,
  4036. int flags)
  4037. {
  4038. struct inode *inode = file_inode(file);
  4039. handle_t *handle;
  4040. int ret = 0, ret2 = 0, ret3 = 0;
  4041. int retries = 0;
  4042. int depth = 0;
  4043. struct ext4_map_blocks map;
  4044. unsigned int credits;
  4045. loff_t epos, old_size = i_size_read(inode);
  4046. unsigned int blkbits = inode->i_blkbits;
  4047. bool alloc_zero = false;
  4048. BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
  4049. map.m_lblk = offset;
  4050. map.m_len = len;
  4051. /*
  4052. * Don't normalize the request if it can fit in one extent so
  4053. * that it doesn't get unnecessarily split into multiple
  4054. * extents.
  4055. */
  4056. if (len <= EXT_UNWRITTEN_MAX_LEN)
  4057. flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
  4058. /*
  4059. * Do the actual write zero during a running journal transaction
  4060. * costs a lot. First allocate an unwritten extent and then
  4061. * convert it to written after zeroing it out.
  4062. */
  4063. if (flags & EXT4_GET_BLOCKS_ZERO) {
  4064. flags &= ~EXT4_GET_BLOCKS_ZERO;
  4065. flags |= EXT4_GET_BLOCKS_UNWRIT_EXT;
  4066. alloc_zero = true;
  4067. }
  4068. /*
  4069. * credits to insert 1 extent into extent tree
  4070. */
  4071. credits = ext4_chunk_trans_blocks(inode, len);
  4072. depth = ext_depth(inode);
  4073. retry:
  4074. while (len) {
  4075. /*
  4076. * Recalculate credits when extent tree depth changes.
  4077. */
  4078. if (depth != ext_depth(inode)) {
  4079. credits = ext4_chunk_trans_blocks(inode, len);
  4080. depth = ext_depth(inode);
  4081. }
  4082. handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
  4083. credits);
  4084. if (IS_ERR(handle)) {
  4085. ret = PTR_ERR(handle);
  4086. break;
  4087. }
  4088. ret = ext4_map_blocks(handle, inode, &map, flags);
  4089. if (ret <= 0) {
  4090. ext4_debug("inode #%lu: block %u: len %u: "
  4091. "ext4_ext_map_blocks returned %d",
  4092. inode->i_ino, map.m_lblk,
  4093. map.m_len, ret);
  4094. ext4_mark_inode_dirty(handle, inode);
  4095. ext4_journal_stop(handle);
  4096. break;
  4097. }
  4098. /*
  4099. * allow a full retry cycle for any remaining allocations
  4100. */
  4101. retries = 0;
  4102. epos = EXT4_LBLK_TO_B(inode, map.m_lblk + ret);
  4103. inode_set_ctime_current(inode);
  4104. if (new_size) {
  4105. if (epos > new_size)
  4106. epos = new_size;
  4107. if (ext4_update_inode_size(inode, epos) & 0x1)
  4108. inode_set_mtime_to_ts(inode,
  4109. inode_get_ctime(inode));
  4110. if (epos > old_size) {
  4111. pagecache_isize_extended(inode, old_size, epos);
  4112. ext4_zero_partial_blocks(handle, inode,
  4113. old_size, epos - old_size);
  4114. }
  4115. }
  4116. ret2 = ext4_mark_inode_dirty(handle, inode);
  4117. ext4_update_inode_fsync_trans(handle, inode, 1);
  4118. ret3 = ext4_journal_stop(handle);
  4119. ret2 = ret3 ? ret3 : ret2;
  4120. if (unlikely(ret2))
  4121. break;
  4122. if (alloc_zero &&
  4123. (map.m_flags & (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN))) {
  4124. ret2 = ext4_issue_zeroout(inode, map.m_lblk, map.m_pblk,
  4125. map.m_len);
  4126. if (likely(!ret2))
  4127. ret2 = ext4_convert_unwritten_extents(NULL,
  4128. inode, (loff_t)map.m_lblk << blkbits,
  4129. (loff_t)map.m_len << blkbits);
  4130. if (ret2)
  4131. break;
  4132. }
  4133. map.m_lblk += ret;
  4134. map.m_len = len = len - ret;
  4135. }
  4136. if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
  4137. goto retry;
  4138. return ret > 0 ? ret2 : ret;
  4139. }
  4140. static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len);
  4141. static int ext4_insert_range(struct file *file, loff_t offset, loff_t len);
  4142. static long ext4_zero_range(struct file *file, loff_t offset,
  4143. loff_t len, int mode)
  4144. {
  4145. struct inode *inode = file_inode(file);
  4146. handle_t *handle = NULL;
  4147. loff_t new_size = 0;
  4148. loff_t end = offset + len;
  4149. ext4_lblk_t start_lblk, end_lblk;
  4150. unsigned int blocksize = i_blocksize(inode);
  4151. unsigned int blkbits = inode->i_blkbits;
  4152. int ret, flags, credits;
  4153. trace_ext4_zero_range(inode, offset, len, mode);
  4154. WARN_ON_ONCE(!inode_is_locked(inode));
  4155. /* Indirect files do not support unwritten extents */
  4156. if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
  4157. return -EOPNOTSUPP;
  4158. if (!(mode & FALLOC_FL_KEEP_SIZE) &&
  4159. (end > inode->i_size || end > EXT4_I(inode)->i_disksize)) {
  4160. new_size = end;
  4161. ret = inode_newsize_ok(inode, new_size);
  4162. if (ret)
  4163. return ret;
  4164. }
  4165. flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
  4166. /* Preallocate the range including the unaligned edges */
  4167. if (!IS_ALIGNED(offset | end, blocksize)) {
  4168. ext4_lblk_t alloc_lblk = offset >> blkbits;
  4169. ext4_lblk_t len_lblk = EXT4_MAX_BLOCKS(len, offset, blkbits);
  4170. ret = ext4_alloc_file_blocks(file, alloc_lblk, len_lblk,
  4171. new_size, flags);
  4172. if (ret)
  4173. return ret;
  4174. }
  4175. ret = ext4_update_disksize_before_punch(inode, offset, len);
  4176. if (ret)
  4177. return ret;
  4178. /* Now release the pages and zero block aligned part of pages */
  4179. ret = ext4_truncate_page_cache_block_range(inode, offset, end);
  4180. if (ret)
  4181. return ret;
  4182. /* Zero range excluding the unaligned edges */
  4183. start_lblk = EXT4_B_TO_LBLK(inode, offset);
  4184. end_lblk = end >> blkbits;
  4185. if (end_lblk > start_lblk) {
  4186. ext4_lblk_t zero_blks = end_lblk - start_lblk;
  4187. if (mode & FALLOC_FL_WRITE_ZEROES)
  4188. flags = EXT4_GET_BLOCKS_CREATE_ZERO | EXT4_EX_NOCACHE;
  4189. else
  4190. flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
  4191. EXT4_EX_NOCACHE);
  4192. ret = ext4_alloc_file_blocks(file, start_lblk, zero_blks,
  4193. new_size, flags);
  4194. if (ret)
  4195. return ret;
  4196. }
  4197. /* Finish zeroing out if it doesn't contain partial block */
  4198. if (IS_ALIGNED(offset | end, blocksize))
  4199. return ret;
  4200. /*
  4201. * In worst case we have to writeout two nonadjacent unwritten
  4202. * blocks and update the inode
  4203. */
  4204. credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
  4205. if (ext4_should_journal_data(inode))
  4206. credits += 2;
  4207. handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
  4208. if (IS_ERR(handle)) {
  4209. ret = PTR_ERR(handle);
  4210. ext4_std_error(inode->i_sb, ret);
  4211. return ret;
  4212. }
  4213. /* Zero out partial block at the edges of the range */
  4214. ret = ext4_zero_partial_blocks(handle, inode, offset, len);
  4215. if (ret)
  4216. goto out_handle;
  4217. if (new_size)
  4218. ext4_update_inode_size(inode, new_size);
  4219. ret = ext4_mark_inode_dirty(handle, inode);
  4220. if (unlikely(ret))
  4221. goto out_handle;
  4222. ext4_update_inode_fsync_trans(handle, inode, 1);
  4223. if (file->f_flags & O_SYNC)
  4224. ext4_handle_sync(handle);
  4225. out_handle:
  4226. ext4_journal_stop(handle);
  4227. return ret;
  4228. }
  4229. static long ext4_do_fallocate(struct file *file, loff_t offset,
  4230. loff_t len, int mode)
  4231. {
  4232. struct inode *inode = file_inode(file);
  4233. loff_t end = offset + len;
  4234. loff_t new_size = 0;
  4235. ext4_lblk_t start_lblk, len_lblk;
  4236. int ret;
  4237. trace_ext4_fallocate_enter(inode, offset, len, mode);
  4238. WARN_ON_ONCE(!inode_is_locked(inode));
  4239. start_lblk = offset >> inode->i_blkbits;
  4240. len_lblk = EXT4_MAX_BLOCKS(len, offset, inode->i_blkbits);
  4241. /* We only support preallocation for extent-based files only. */
  4242. if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
  4243. ret = -EOPNOTSUPP;
  4244. goto out;
  4245. }
  4246. if (!(mode & FALLOC_FL_KEEP_SIZE) &&
  4247. (end > inode->i_size || end > EXT4_I(inode)->i_disksize)) {
  4248. new_size = end;
  4249. ret = inode_newsize_ok(inode, new_size);
  4250. if (ret)
  4251. goto out;
  4252. }
  4253. ret = ext4_alloc_file_blocks(file, start_lblk, len_lblk, new_size,
  4254. EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT);
  4255. if (ret)
  4256. goto out;
  4257. if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
  4258. ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
  4259. EXT4_I(inode)->i_sync_tid);
  4260. }
  4261. out:
  4262. trace_ext4_fallocate_exit(inode, offset, len_lblk, ret);
  4263. return ret;
  4264. }
  4265. /*
  4266. * preallocate space for a file. This implements ext4's fallocate file
  4267. * operation, which gets called from sys_fallocate system call.
  4268. * For block-mapped files, posix_fallocate should fall back to the method
  4269. * of writing zeroes to the required new blocks (the same behavior which is
  4270. * expected for file systems which do not support fallocate() system call).
  4271. */
  4272. long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
  4273. {
  4274. struct inode *inode = file_inode(file);
  4275. struct address_space *mapping = file->f_mapping;
  4276. int ret;
  4277. /*
  4278. * Encrypted inodes can't handle collapse range or insert
  4279. * range since we would need to re-encrypt blocks with a
  4280. * different IV or XTS tweak (which are based on the logical
  4281. * block number).
  4282. */
  4283. if (IS_ENCRYPTED(inode) &&
  4284. (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
  4285. return -EOPNOTSUPP;
  4286. /*
  4287. * Don't allow writing zeroes if the underlying device does not
  4288. * enable the unmap write zeroes operation.
  4289. */
  4290. if ((mode & FALLOC_FL_WRITE_ZEROES) &&
  4291. !bdev_write_zeroes_unmap_sectors(inode->i_sb->s_bdev))
  4292. return -EOPNOTSUPP;
  4293. /* Return error if mode is not supported */
  4294. if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
  4295. FALLOC_FL_ZERO_RANGE | FALLOC_FL_COLLAPSE_RANGE |
  4296. FALLOC_FL_INSERT_RANGE | FALLOC_FL_WRITE_ZEROES))
  4297. return -EOPNOTSUPP;
  4298. inode_lock(inode);
  4299. ret = ext4_convert_inline_data(inode);
  4300. if (ret)
  4301. goto out_inode_lock;
  4302. /* Wait all existing dio workers, newcomers will block on i_rwsem */
  4303. inode_dio_wait(inode);
  4304. ret = file_modified(file);
  4305. if (ret)
  4306. goto out_inode_lock;
  4307. if ((mode & FALLOC_FL_MODE_MASK) == FALLOC_FL_ALLOCATE_RANGE) {
  4308. ret = ext4_do_fallocate(file, offset, len, mode);
  4309. goto out_inode_lock;
  4310. }
  4311. /*
  4312. * Follow-up operations will drop page cache, hold invalidate lock
  4313. * to prevent page faults from reinstantiating pages we have
  4314. * released from page cache.
  4315. */
  4316. filemap_invalidate_lock(mapping);
  4317. ret = ext4_break_layouts(inode);
  4318. if (ret)
  4319. goto out_invalidate_lock;
  4320. switch (mode & FALLOC_FL_MODE_MASK) {
  4321. case FALLOC_FL_PUNCH_HOLE:
  4322. ret = ext4_punch_hole(file, offset, len);
  4323. break;
  4324. case FALLOC_FL_COLLAPSE_RANGE:
  4325. ret = ext4_collapse_range(file, offset, len);
  4326. break;
  4327. case FALLOC_FL_INSERT_RANGE:
  4328. ret = ext4_insert_range(file, offset, len);
  4329. break;
  4330. case FALLOC_FL_ZERO_RANGE:
  4331. case FALLOC_FL_WRITE_ZEROES:
  4332. ret = ext4_zero_range(file, offset, len, mode);
  4333. break;
  4334. default:
  4335. ret = -EOPNOTSUPP;
  4336. }
  4337. out_invalidate_lock:
  4338. filemap_invalidate_unlock(mapping);
  4339. out_inode_lock:
  4340. inode_unlock(inode);
  4341. return ret;
  4342. }
  4343. /*
  4344. * This function converts a range of blocks to written extents. The caller of
  4345. * this function will pass the start offset and the size. all unwritten extents
  4346. * within this range will be converted to written extents.
  4347. *
  4348. * This function is called from the direct IO end io call back function for
  4349. * atomic writes, to convert the unwritten extents after IO is completed.
  4350. *
  4351. * Note that the requirement for atomic writes is that all conversion should
  4352. * happen atomically in a single fs journal transaction. We mainly only allocate
  4353. * unwritten extents either on a hole on a pre-exiting unwritten extent range in
  4354. * ext4_map_blocks_atomic_write(). The only case where we can have multiple
  4355. * unwritten extents in a range [offset, offset+len) is when there is a split
  4356. * unwritten extent between two leaf nodes which was cached in extent status
  4357. * cache during ext4_iomap_alloc() time. That will allow
  4358. * ext4_map_blocks_atomic_write() to return the unwritten extent range w/o going
  4359. * into the slow path. That means we might need a loop for conversion of this
  4360. * unwritten extent split across leaf block within a single journal transaction.
  4361. * Split extents across leaf nodes is a rare case, but let's still handle that
  4362. * to meet the requirements of multi-fsblock atomic writes.
  4363. *
  4364. * Returns 0 on success.
  4365. */
  4366. int ext4_convert_unwritten_extents_atomic(handle_t *handle, struct inode *inode,
  4367. loff_t offset, ssize_t len)
  4368. {
  4369. unsigned int max_blocks;
  4370. int ret = 0, ret2 = 0, ret3 = 0;
  4371. struct ext4_map_blocks map;
  4372. unsigned int blkbits = inode->i_blkbits;
  4373. unsigned int credits = 0;
  4374. int flags = EXT4_GET_BLOCKS_IO_CONVERT_EXT | EXT4_EX_NOCACHE;
  4375. map.m_lblk = offset >> blkbits;
  4376. max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
  4377. if (!handle) {
  4378. /*
  4379. * TODO: An optimization can be added later by having an extent
  4380. * status flag e.g. EXTENT_STATUS_SPLIT_LEAF. If we query that
  4381. * it can tell if the extent in the cache is a split extent.
  4382. * But for now let's assume pextents as 2 always.
  4383. */
  4384. credits = ext4_meta_trans_blocks(inode, max_blocks, 2);
  4385. }
  4386. if (credits) {
  4387. handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
  4388. if (IS_ERR(handle)) {
  4389. ret = PTR_ERR(handle);
  4390. return ret;
  4391. }
  4392. }
  4393. while (ret >= 0 && ret < max_blocks) {
  4394. map.m_lblk += ret;
  4395. map.m_len = (max_blocks -= ret);
  4396. ret = ext4_map_blocks(handle, inode, &map, flags);
  4397. if (ret != max_blocks)
  4398. ext4_msg(inode->i_sb, KERN_INFO,
  4399. "inode #%lu: block %u: len %u: "
  4400. "split block mapping found for atomic write, "
  4401. "ret = %d",
  4402. inode->i_ino, map.m_lblk,
  4403. map.m_len, ret);
  4404. if (ret <= 0)
  4405. break;
  4406. }
  4407. ret2 = ext4_mark_inode_dirty(handle, inode);
  4408. if (credits) {
  4409. ret3 = ext4_journal_stop(handle);
  4410. if (unlikely(ret3))
  4411. ret2 = ret3;
  4412. }
  4413. if (ret <= 0 || ret2)
  4414. ext4_warning(inode->i_sb,
  4415. "inode #%lu: block %u: len %u: "
  4416. "returned %d or %d",
  4417. inode->i_ino, map.m_lblk,
  4418. map.m_len, ret, ret2);
  4419. return ret > 0 ? ret2 : ret;
  4420. }
  4421. /*
  4422. * This function convert a range of blocks to written extents
  4423. * The caller of this function will pass the start offset and the size.
  4424. * all unwritten extents within this range will be converted to
  4425. * written extents.
  4426. *
  4427. * This function is called from the direct IO end io call back
  4428. * function, to convert the fallocated extents after IO is completed.
  4429. * Returns 0 on success.
  4430. */
  4431. int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
  4432. loff_t offset, ssize_t len)
  4433. {
  4434. unsigned int max_blocks;
  4435. int ret = 0, ret2 = 0, ret3 = 0;
  4436. struct ext4_map_blocks map;
  4437. unsigned int blkbits = inode->i_blkbits;
  4438. unsigned int credits = 0;
  4439. map.m_lblk = offset >> blkbits;
  4440. max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
  4441. if (!handle) {
  4442. /*
  4443. * credits to insert 1 extent into extent tree
  4444. */
  4445. credits = ext4_chunk_trans_blocks(inode, max_blocks);
  4446. }
  4447. while (ret >= 0 && ret < max_blocks) {
  4448. map.m_lblk += ret;
  4449. map.m_len = (max_blocks -= ret);
  4450. if (credits) {
  4451. handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
  4452. credits);
  4453. if (IS_ERR(handle)) {
  4454. ret = PTR_ERR(handle);
  4455. break;
  4456. }
  4457. }
  4458. /*
  4459. * Do not cache any unrelated extents, as it does not hold the
  4460. * i_rwsem or invalidate_lock, which could corrupt the extent
  4461. * status tree.
  4462. */
  4463. ret = ext4_map_blocks(handle, inode, &map,
  4464. EXT4_GET_BLOCKS_IO_CONVERT_EXT |
  4465. EXT4_EX_NOCACHE);
  4466. if (ret <= 0)
  4467. ext4_warning(inode->i_sb,
  4468. "inode #%lu: block %u: len %u: "
  4469. "ext4_ext_map_blocks returned %d",
  4470. inode->i_ino, map.m_lblk,
  4471. map.m_len, ret);
  4472. ret2 = ext4_mark_inode_dirty(handle, inode);
  4473. if (credits) {
  4474. ret3 = ext4_journal_stop(handle);
  4475. if (unlikely(ret3))
  4476. ret2 = ret3;
  4477. }
  4478. if (ret <= 0 || ret2)
  4479. break;
  4480. }
  4481. return ret > 0 ? ret2 : ret;
  4482. }
  4483. int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
  4484. {
  4485. int ret = 0, err = 0;
  4486. struct ext4_io_end_vec *io_end_vec;
  4487. /*
  4488. * This is somewhat ugly but the idea is clear: When transaction is
  4489. * reserved, everything goes into it. Otherwise we rather start several
  4490. * smaller transactions for conversion of each extent separately.
  4491. */
  4492. if (handle) {
  4493. handle = ext4_journal_start_reserved(handle,
  4494. EXT4_HT_EXT_CONVERT);
  4495. if (IS_ERR(handle))
  4496. return PTR_ERR(handle);
  4497. }
  4498. list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
  4499. ret = ext4_convert_unwritten_extents(handle, io_end->inode,
  4500. io_end_vec->offset,
  4501. io_end_vec->size);
  4502. if (ret)
  4503. break;
  4504. }
  4505. if (handle)
  4506. err = ext4_journal_stop(handle);
  4507. return ret < 0 ? ret : err;
  4508. }
  4509. static int ext4_iomap_xattr_fiemap(struct inode *inode, struct iomap *iomap)
  4510. {
  4511. __u64 physical = 0;
  4512. __u64 length = 0;
  4513. int blockbits = inode->i_sb->s_blocksize_bits;
  4514. int error = 0;
  4515. u16 iomap_type;
  4516. /* in-inode? */
  4517. if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
  4518. struct ext4_iloc iloc;
  4519. int offset; /* offset of xattr in inode */
  4520. error = ext4_get_inode_loc(inode, &iloc);
  4521. if (error)
  4522. return error;
  4523. physical = (__u64)iloc.bh->b_blocknr << blockbits;
  4524. offset = EXT4_GOOD_OLD_INODE_SIZE +
  4525. EXT4_I(inode)->i_extra_isize;
  4526. physical += offset;
  4527. length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
  4528. brelse(iloc.bh);
  4529. iomap_type = IOMAP_INLINE;
  4530. } else if (EXT4_I(inode)->i_file_acl) { /* external block */
  4531. physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits;
  4532. length = inode->i_sb->s_blocksize;
  4533. iomap_type = IOMAP_MAPPED;
  4534. } else {
  4535. /* no in-inode or external block for xattr, so return -ENOENT */
  4536. error = -ENOENT;
  4537. goto out;
  4538. }
  4539. iomap->addr = physical;
  4540. iomap->offset = 0;
  4541. iomap->length = length;
  4542. iomap->type = iomap_type;
  4543. iomap->flags = 0;
  4544. out:
  4545. return error;
  4546. }
  4547. static int ext4_iomap_xattr_begin(struct inode *inode, loff_t offset,
  4548. loff_t length, unsigned flags,
  4549. struct iomap *iomap, struct iomap *srcmap)
  4550. {
  4551. int error;
  4552. error = ext4_iomap_xattr_fiemap(inode, iomap);
  4553. if (error == 0 && (offset >= iomap->length))
  4554. error = -ENOENT;
  4555. return error;
  4556. }
  4557. static const struct iomap_ops ext4_iomap_xattr_ops = {
  4558. .iomap_begin = ext4_iomap_xattr_begin,
  4559. };
  4560. static int ext4_fiemap_check_ranges(struct inode *inode, u64 start, u64 *len)
  4561. {
  4562. u64 maxbytes = ext4_get_maxbytes(inode);
  4563. if (*len == 0)
  4564. return -EINVAL;
  4565. if (start > maxbytes)
  4566. return -EFBIG;
  4567. /*
  4568. * Shrink request scope to what the fs can actually handle.
  4569. */
  4570. if (*len > maxbytes || (maxbytes - *len) < start)
  4571. *len = maxbytes - start;
  4572. return 0;
  4573. }
  4574. int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  4575. u64 start, u64 len)
  4576. {
  4577. int error = 0;
  4578. inode_lock_shared(inode);
  4579. if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
  4580. error = ext4_ext_precache(inode);
  4581. if (error)
  4582. goto unlock;
  4583. fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
  4584. }
  4585. /*
  4586. * For bitmap files the maximum size limit could be smaller than
  4587. * s_maxbytes, so check len here manually instead of just relying on the
  4588. * generic check.
  4589. */
  4590. error = ext4_fiemap_check_ranges(inode, start, &len);
  4591. if (error)
  4592. goto unlock;
  4593. if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
  4594. fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
  4595. error = iomap_fiemap(inode, fieinfo, start, len,
  4596. &ext4_iomap_xattr_ops);
  4597. } else {
  4598. error = iomap_fiemap(inode, fieinfo, start, len,
  4599. &ext4_iomap_report_ops);
  4600. }
  4601. unlock:
  4602. inode_unlock_shared(inode);
  4603. return error;
  4604. }
  4605. int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
  4606. __u64 start, __u64 len)
  4607. {
  4608. ext4_lblk_t start_blk, len_blks;
  4609. __u64 last_blk;
  4610. int error = 0;
  4611. if (ext4_has_inline_data(inode)) {
  4612. int has_inline;
  4613. down_read(&EXT4_I(inode)->xattr_sem);
  4614. has_inline = ext4_has_inline_data(inode);
  4615. up_read(&EXT4_I(inode)->xattr_sem);
  4616. if (has_inline)
  4617. return 0;
  4618. }
  4619. if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
  4620. inode_lock_shared(inode);
  4621. error = ext4_ext_precache(inode);
  4622. inode_unlock_shared(inode);
  4623. if (error)
  4624. return error;
  4625. fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
  4626. }
  4627. error = fiemap_prep(inode, fieinfo, start, &len, 0);
  4628. if (error)
  4629. return error;
  4630. error = ext4_fiemap_check_ranges(inode, start, &len);
  4631. if (error)
  4632. return error;
  4633. start_blk = start >> inode->i_sb->s_blocksize_bits;
  4634. last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
  4635. if (last_blk >= EXT_MAX_BLOCKS)
  4636. last_blk = EXT_MAX_BLOCKS-1;
  4637. len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
  4638. /*
  4639. * Walk the extent tree gathering extent information
  4640. * and pushing extents back to the user.
  4641. */
  4642. return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo);
  4643. }
  4644. /*
  4645. * ext4_ext_shift_path_extents:
  4646. * Shift the extents of a path structure lying between path[depth].p_ext
  4647. * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
  4648. * if it is right shift or left shift operation.
  4649. */
  4650. static int
  4651. ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
  4652. struct inode *inode, handle_t *handle,
  4653. enum SHIFT_DIRECTION SHIFT)
  4654. {
  4655. int depth, err = 0;
  4656. struct ext4_extent *ex_start, *ex_last;
  4657. bool update = false;
  4658. int credits, restart_credits;
  4659. depth = path->p_depth;
  4660. while (depth >= 0) {
  4661. if (depth == path->p_depth) {
  4662. ex_start = path[depth].p_ext;
  4663. if (!ex_start)
  4664. return -EFSCORRUPTED;
  4665. ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
  4666. /* leaf + sb + inode */
  4667. credits = 3;
  4668. if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) {
  4669. update = true;
  4670. /* extent tree + sb + inode */
  4671. credits = depth + 2;
  4672. }
  4673. restart_credits = ext4_chunk_trans_extent(inode, 0);
  4674. err = ext4_datasem_ensure_credits(handle, inode, credits,
  4675. restart_credits, 0);
  4676. if (err) {
  4677. if (err > 0)
  4678. err = -EAGAIN;
  4679. goto out;
  4680. }
  4681. err = ext4_ext_get_access(handle, inode, path + depth);
  4682. if (err)
  4683. goto out;
  4684. while (ex_start <= ex_last) {
  4685. if (SHIFT == SHIFT_LEFT) {
  4686. le32_add_cpu(&ex_start->ee_block,
  4687. -shift);
  4688. /* Try to merge to the left. */
  4689. if ((ex_start >
  4690. EXT_FIRST_EXTENT(path[depth].p_hdr))
  4691. &&
  4692. ext4_ext_try_to_merge_right(inode,
  4693. path, ex_start - 1))
  4694. ex_last--;
  4695. else
  4696. ex_start++;
  4697. } else {
  4698. le32_add_cpu(&ex_last->ee_block, shift);
  4699. ext4_ext_try_to_merge_right(inode, path,
  4700. ex_last);
  4701. ex_last--;
  4702. }
  4703. }
  4704. err = ext4_ext_dirty(handle, inode, path + depth);
  4705. if (err)
  4706. goto out;
  4707. if (--depth < 0 || !update)
  4708. break;
  4709. }
  4710. /* Update index too */
  4711. err = ext4_ext_get_access(handle, inode, path + depth);
  4712. if (err)
  4713. goto out;
  4714. if (SHIFT == SHIFT_LEFT)
  4715. le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
  4716. else
  4717. le32_add_cpu(&path[depth].p_idx->ei_block, shift);
  4718. err = ext4_ext_dirty(handle, inode, path + depth);
  4719. if (err)
  4720. goto out;
  4721. /* we are done if current index is not a starting index */
  4722. if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
  4723. break;
  4724. depth--;
  4725. }
  4726. out:
  4727. return err;
  4728. }
  4729. /*
  4730. * ext4_ext_shift_extents:
  4731. * All the extents which lies in the range from @start to the last allocated
  4732. * block for the @inode are shifted either towards left or right (depending
  4733. * upon @SHIFT) by @shift blocks.
  4734. * On success, 0 is returned, error otherwise.
  4735. */
  4736. static int
  4737. ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
  4738. ext4_lblk_t start, ext4_lblk_t shift,
  4739. enum SHIFT_DIRECTION SHIFT)
  4740. {
  4741. struct ext4_ext_path *path;
  4742. int ret = 0, depth;
  4743. struct ext4_extent *extent;
  4744. ext4_lblk_t stop, *iterator, ex_start, ex_end;
  4745. ext4_lblk_t tmp = EXT_MAX_BLOCKS;
  4746. /* Let path point to the last extent */
  4747. path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
  4748. EXT4_EX_NOCACHE);
  4749. if (IS_ERR(path))
  4750. return PTR_ERR(path);
  4751. depth = path->p_depth;
  4752. extent = path[depth].p_ext;
  4753. if (!extent)
  4754. goto out;
  4755. stop = le32_to_cpu(extent->ee_block);
  4756. /*
  4757. * For left shifts, make sure the hole on the left is big enough to
  4758. * accommodate the shift. For right shifts, make sure the last extent
  4759. * won't be shifted beyond EXT_MAX_BLOCKS.
  4760. */
  4761. if (SHIFT == SHIFT_LEFT) {
  4762. path = ext4_find_extent(inode, start - 1, path,
  4763. EXT4_EX_NOCACHE);
  4764. if (IS_ERR(path))
  4765. return PTR_ERR(path);
  4766. depth = path->p_depth;
  4767. extent = path[depth].p_ext;
  4768. if (extent) {
  4769. ex_start = le32_to_cpu(extent->ee_block);
  4770. ex_end = le32_to_cpu(extent->ee_block) +
  4771. ext4_ext_get_actual_len(extent);
  4772. } else {
  4773. ex_start = 0;
  4774. ex_end = 0;
  4775. }
  4776. if ((start == ex_start && shift > ex_start) ||
  4777. (shift > start - ex_end)) {
  4778. ret = -EINVAL;
  4779. goto out;
  4780. }
  4781. } else {
  4782. if (shift > EXT_MAX_BLOCKS -
  4783. (stop + ext4_ext_get_actual_len(extent))) {
  4784. ret = -EINVAL;
  4785. goto out;
  4786. }
  4787. }
  4788. /*
  4789. * In case of left shift, iterator points to start and it is increased
  4790. * till we reach stop. In case of right shift, iterator points to stop
  4791. * and it is decreased till we reach start.
  4792. */
  4793. again:
  4794. ret = 0;
  4795. if (SHIFT == SHIFT_LEFT)
  4796. iterator = &start;
  4797. else
  4798. iterator = &stop;
  4799. if (tmp != EXT_MAX_BLOCKS)
  4800. *iterator = tmp;
  4801. /*
  4802. * Its safe to start updating extents. Start and stop are unsigned, so
  4803. * in case of right shift if extent with 0 block is reached, iterator
  4804. * becomes NULL to indicate the end of the loop.
  4805. */
  4806. while (iterator && start <= stop) {
  4807. path = ext4_find_extent(inode, *iterator, path,
  4808. EXT4_EX_NOCACHE);
  4809. if (IS_ERR(path))
  4810. return PTR_ERR(path);
  4811. depth = path->p_depth;
  4812. extent = path[depth].p_ext;
  4813. if (!extent) {
  4814. EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
  4815. (unsigned long) *iterator);
  4816. ret = -EFSCORRUPTED;
  4817. goto out;
  4818. }
  4819. if (SHIFT == SHIFT_LEFT && *iterator >
  4820. le32_to_cpu(extent->ee_block)) {
  4821. /* Hole, move to the next extent */
  4822. if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
  4823. path[depth].p_ext++;
  4824. } else {
  4825. *iterator = ext4_ext_next_allocated_block(path);
  4826. continue;
  4827. }
  4828. }
  4829. tmp = *iterator;
  4830. if (SHIFT == SHIFT_LEFT) {
  4831. extent = EXT_LAST_EXTENT(path[depth].p_hdr);
  4832. *iterator = le32_to_cpu(extent->ee_block) +
  4833. ext4_ext_get_actual_len(extent);
  4834. } else {
  4835. extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
  4836. if (le32_to_cpu(extent->ee_block) > start)
  4837. *iterator = le32_to_cpu(extent->ee_block) - 1;
  4838. else if (le32_to_cpu(extent->ee_block) == start)
  4839. iterator = NULL;
  4840. else {
  4841. extent = EXT_LAST_EXTENT(path[depth].p_hdr);
  4842. while (le32_to_cpu(extent->ee_block) >= start)
  4843. extent--;
  4844. if (extent == EXT_LAST_EXTENT(path[depth].p_hdr))
  4845. break;
  4846. extent++;
  4847. iterator = NULL;
  4848. }
  4849. path[depth].p_ext = extent;
  4850. }
  4851. ret = ext4_ext_shift_path_extents(path, shift, inode,
  4852. handle, SHIFT);
  4853. /* iterator can be NULL which means we should break */
  4854. if (ret == -EAGAIN)
  4855. goto again;
  4856. if (ret)
  4857. break;
  4858. }
  4859. out:
  4860. ext4_free_ext_path(path);
  4861. return ret;
  4862. }
  4863. /*
  4864. * ext4_collapse_range:
  4865. * This implements the fallocate's collapse range functionality for ext4
  4866. * Returns: 0 and non-zero on error.
  4867. */
  4868. static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
  4869. {
  4870. struct inode *inode = file_inode(file);
  4871. struct super_block *sb = inode->i_sb;
  4872. struct address_space *mapping = inode->i_mapping;
  4873. loff_t end = offset + len;
  4874. ext4_lblk_t start_lblk, end_lblk;
  4875. handle_t *handle;
  4876. unsigned int credits;
  4877. loff_t start, new_size;
  4878. int ret;
  4879. trace_ext4_collapse_range(inode, offset, len);
  4880. WARN_ON_ONCE(!inode_is_locked(inode));
  4881. /* Currently just for extent based files */
  4882. if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
  4883. return -EOPNOTSUPP;
  4884. /* Collapse range works only on fs cluster size aligned regions. */
  4885. if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
  4886. return -EINVAL;
  4887. /*
  4888. * There is no need to overlap collapse range with EOF, in which case
  4889. * it is effectively a truncate operation
  4890. */
  4891. if (end >= inode->i_size)
  4892. return -EINVAL;
  4893. /*
  4894. * Write tail of the last page before removed range and data that
  4895. * will be shifted since they will get removed from the page cache
  4896. * below. We are also protected from pages becoming dirty by
  4897. * i_rwsem and invalidate_lock.
  4898. * Need to round down offset to be aligned with page size boundary
  4899. * for page size > block size.
  4900. */
  4901. start = round_down(offset, PAGE_SIZE);
  4902. ret = filemap_write_and_wait_range(mapping, start, offset);
  4903. if (!ret)
  4904. ret = filemap_write_and_wait_range(mapping, end, LLONG_MAX);
  4905. if (ret)
  4906. return ret;
  4907. truncate_pagecache(inode, start);
  4908. credits = ext4_chunk_trans_extent(inode, 0);
  4909. handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
  4910. if (IS_ERR(handle))
  4911. return PTR_ERR(handle);
  4912. ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
  4913. start_lblk = offset >> inode->i_blkbits;
  4914. end_lblk = (offset + len) >> inode->i_blkbits;
  4915. ext4_check_map_extents_env(inode);
  4916. down_write(&EXT4_I(inode)->i_data_sem);
  4917. ext4_discard_preallocations(inode);
  4918. ext4_es_remove_extent(inode, start_lblk, EXT_MAX_BLOCKS - start_lblk);
  4919. ret = ext4_ext_remove_space(inode, start_lblk, end_lblk - 1);
  4920. if (ret) {
  4921. up_write(&EXT4_I(inode)->i_data_sem);
  4922. goto out_handle;
  4923. }
  4924. ext4_discard_preallocations(inode);
  4925. ret = ext4_ext_shift_extents(inode, handle, end_lblk,
  4926. end_lblk - start_lblk, SHIFT_LEFT);
  4927. if (ret) {
  4928. up_write(&EXT4_I(inode)->i_data_sem);
  4929. goto out_handle;
  4930. }
  4931. new_size = inode->i_size - len;
  4932. i_size_write(inode, new_size);
  4933. EXT4_I(inode)->i_disksize = new_size;
  4934. up_write(&EXT4_I(inode)->i_data_sem);
  4935. ret = ext4_mark_inode_dirty(handle, inode);
  4936. if (ret)
  4937. goto out_handle;
  4938. ext4_update_inode_fsync_trans(handle, inode, 1);
  4939. if (IS_SYNC(inode))
  4940. ext4_handle_sync(handle);
  4941. out_handle:
  4942. ext4_journal_stop(handle);
  4943. return ret;
  4944. }
  4945. /*
  4946. * ext4_insert_range:
  4947. * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
  4948. * The data blocks starting from @offset to the EOF are shifted by @len
  4949. * towards right to create a hole in the @inode. Inode size is increased
  4950. * by len bytes.
  4951. * Returns 0 on success, error otherwise.
  4952. */
  4953. static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
  4954. {
  4955. struct inode *inode = file_inode(file);
  4956. struct super_block *sb = inode->i_sb;
  4957. struct address_space *mapping = inode->i_mapping;
  4958. handle_t *handle;
  4959. struct ext4_ext_path *path;
  4960. struct ext4_extent *extent;
  4961. ext4_lblk_t start_lblk, len_lblk, ee_start_lblk = 0;
  4962. unsigned int credits, ee_len;
  4963. int ret, depth;
  4964. loff_t start;
  4965. trace_ext4_insert_range(inode, offset, len);
  4966. WARN_ON_ONCE(!inode_is_locked(inode));
  4967. /* Currently just for extent based files */
  4968. if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
  4969. return -EOPNOTSUPP;
  4970. /* Insert range works only on fs cluster size aligned regions. */
  4971. if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
  4972. return -EINVAL;
  4973. /* Offset must be less than i_size */
  4974. if (offset >= inode->i_size)
  4975. return -EINVAL;
  4976. /* Check whether the maximum file size would be exceeded */
  4977. if (len > inode->i_sb->s_maxbytes - inode->i_size)
  4978. return -EFBIG;
  4979. /*
  4980. * Write out all dirty pages. Need to round down to align start offset
  4981. * to page size boundary for page size > block size.
  4982. */
  4983. start = round_down(offset, PAGE_SIZE);
  4984. ret = filemap_write_and_wait_range(mapping, start, LLONG_MAX);
  4985. if (ret)
  4986. return ret;
  4987. truncate_pagecache(inode, start);
  4988. credits = ext4_chunk_trans_extent(inode, 0);
  4989. handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
  4990. if (IS_ERR(handle))
  4991. return PTR_ERR(handle);
  4992. ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
  4993. /* Expand file to avoid data loss if there is error while shifting */
  4994. inode->i_size += len;
  4995. EXT4_I(inode)->i_disksize += len;
  4996. ret = ext4_mark_inode_dirty(handle, inode);
  4997. if (ret)
  4998. goto out_handle;
  4999. start_lblk = offset >> inode->i_blkbits;
  5000. len_lblk = len >> inode->i_blkbits;
  5001. ext4_check_map_extents_env(inode);
  5002. down_write(&EXT4_I(inode)->i_data_sem);
  5003. ext4_discard_preallocations(inode);
  5004. path = ext4_find_extent(inode, start_lblk, NULL, 0);
  5005. if (IS_ERR(path)) {
  5006. up_write(&EXT4_I(inode)->i_data_sem);
  5007. ret = PTR_ERR(path);
  5008. goto out_handle;
  5009. }
  5010. depth = ext_depth(inode);
  5011. extent = path[depth].p_ext;
  5012. if (extent) {
  5013. ee_start_lblk = le32_to_cpu(extent->ee_block);
  5014. ee_len = ext4_ext_get_actual_len(extent);
  5015. /*
  5016. * If start_lblk is not the starting block of extent, split
  5017. * the extent @start_lblk
  5018. */
  5019. if ((start_lblk > ee_start_lblk) &&
  5020. (start_lblk < (ee_start_lblk + ee_len))) {
  5021. path = ext4_split_extent_at(handle, inode, path,
  5022. start_lblk, EXT4_EX_NOCACHE |
  5023. EXT4_GET_BLOCKS_SPLIT_NOMERGE |
  5024. EXT4_GET_BLOCKS_METADATA_NOFAIL);
  5025. }
  5026. if (IS_ERR(path)) {
  5027. up_write(&EXT4_I(inode)->i_data_sem);
  5028. ret = PTR_ERR(path);
  5029. goto out_handle;
  5030. }
  5031. }
  5032. ext4_free_ext_path(path);
  5033. ext4_es_remove_extent(inode, start_lblk, EXT_MAX_BLOCKS - start_lblk);
  5034. /*
  5035. * if start_lblk lies in a hole which is at start of file, use
  5036. * ee_start_lblk to shift extents
  5037. */
  5038. ret = ext4_ext_shift_extents(inode, handle,
  5039. max(ee_start_lblk, start_lblk), len_lblk, SHIFT_RIGHT);
  5040. up_write(&EXT4_I(inode)->i_data_sem);
  5041. if (ret)
  5042. goto out_handle;
  5043. ext4_update_inode_fsync_trans(handle, inode, 1);
  5044. if (IS_SYNC(inode))
  5045. ext4_handle_sync(handle);
  5046. out_handle:
  5047. ext4_journal_stop(handle);
  5048. return ret;
  5049. }
  5050. /**
  5051. * ext4_swap_extents() - Swap extents between two inodes
  5052. * @handle: handle for this transaction
  5053. * @inode1: First inode
  5054. * @inode2: Second inode
  5055. * @lblk1: Start block for first inode
  5056. * @lblk2: Start block for second inode
  5057. * @count: Number of blocks to swap
  5058. * @unwritten: Mark second inode's extents as unwritten after swap
  5059. * @erp: Pointer to save error value
  5060. *
  5061. * This helper routine does exactly what is promise "swap extents". All other
  5062. * stuff such as page-cache locking consistency, bh mapping consistency or
  5063. * extent's data copying must be performed by caller.
  5064. * Locking:
  5065. * i_rwsem is held for both inodes
  5066. * i_data_sem is locked for write for both inodes
  5067. * Assumptions:
  5068. * All pages from requested range are locked for both inodes
  5069. */
  5070. int
  5071. ext4_swap_extents(handle_t *handle, struct inode *inode1,
  5072. struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
  5073. ext4_lblk_t count, int unwritten, int *erp)
  5074. {
  5075. struct ext4_ext_path *path1 = NULL;
  5076. struct ext4_ext_path *path2 = NULL;
  5077. int replaced_count = 0;
  5078. BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
  5079. BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
  5080. BUG_ON(!inode_is_locked(inode1));
  5081. BUG_ON(!inode_is_locked(inode2));
  5082. ext4_es_remove_extent(inode1, lblk1, count);
  5083. ext4_es_remove_extent(inode2, lblk2, count);
  5084. while (count) {
  5085. struct ext4_extent *ex1, *ex2, tmp_ex;
  5086. ext4_lblk_t e1_blk, e2_blk;
  5087. int e1_len, e2_len, len;
  5088. int split = 0;
  5089. path1 = ext4_find_extent(inode1, lblk1, path1, EXT4_EX_NOCACHE);
  5090. if (IS_ERR(path1)) {
  5091. *erp = PTR_ERR(path1);
  5092. goto errout;
  5093. }
  5094. path2 = ext4_find_extent(inode2, lblk2, path2, EXT4_EX_NOCACHE);
  5095. if (IS_ERR(path2)) {
  5096. *erp = PTR_ERR(path2);
  5097. goto errout;
  5098. }
  5099. ex1 = path1[path1->p_depth].p_ext;
  5100. ex2 = path2[path2->p_depth].p_ext;
  5101. /* Do we have something to swap ? */
  5102. if (unlikely(!ex2 || !ex1))
  5103. goto errout;
  5104. e1_blk = le32_to_cpu(ex1->ee_block);
  5105. e2_blk = le32_to_cpu(ex2->ee_block);
  5106. e1_len = ext4_ext_get_actual_len(ex1);
  5107. e2_len = ext4_ext_get_actual_len(ex2);
  5108. /* Hole handling */
  5109. if (!in_range(lblk1, e1_blk, e1_len) ||
  5110. !in_range(lblk2, e2_blk, e2_len)) {
  5111. ext4_lblk_t next1, next2;
  5112. /* if hole after extent, then go to next extent */
  5113. next1 = ext4_ext_next_allocated_block(path1);
  5114. next2 = ext4_ext_next_allocated_block(path2);
  5115. /* If hole before extent, then shift to that extent */
  5116. if (e1_blk > lblk1)
  5117. next1 = e1_blk;
  5118. if (e2_blk > lblk2)
  5119. next2 = e2_blk;
  5120. /* Do we have something to swap */
  5121. if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
  5122. goto errout;
  5123. /* Move to the rightest boundary */
  5124. len = next1 - lblk1;
  5125. if (len < next2 - lblk2)
  5126. len = next2 - lblk2;
  5127. if (len > count)
  5128. len = count;
  5129. lblk1 += len;
  5130. lblk2 += len;
  5131. count -= len;
  5132. continue;
  5133. }
  5134. /* Prepare left boundary */
  5135. if (e1_blk < lblk1) {
  5136. split = 1;
  5137. path1 = ext4_force_split_extent_at(handle, inode1,
  5138. path1, lblk1, 0);
  5139. if (IS_ERR(path1)) {
  5140. *erp = PTR_ERR(path1);
  5141. goto errout;
  5142. }
  5143. }
  5144. if (e2_blk < lblk2) {
  5145. split = 1;
  5146. path2 = ext4_force_split_extent_at(handle, inode2,
  5147. path2, lblk2, 0);
  5148. if (IS_ERR(path2)) {
  5149. *erp = PTR_ERR(path2);
  5150. goto errout;
  5151. }
  5152. }
  5153. /* ext4_split_extent_at() may result in leaf extent split,
  5154. * path must to be revalidated. */
  5155. if (split)
  5156. continue;
  5157. /* Prepare right boundary */
  5158. len = count;
  5159. if (len > e1_blk + e1_len - lblk1)
  5160. len = e1_blk + e1_len - lblk1;
  5161. if (len > e2_blk + e2_len - lblk2)
  5162. len = e2_blk + e2_len - lblk2;
  5163. if (len != e1_len) {
  5164. split = 1;
  5165. path1 = ext4_force_split_extent_at(handle, inode1,
  5166. path1, lblk1 + len, 0);
  5167. if (IS_ERR(path1)) {
  5168. *erp = PTR_ERR(path1);
  5169. goto errout;
  5170. }
  5171. }
  5172. if (len != e2_len) {
  5173. split = 1;
  5174. path2 = ext4_force_split_extent_at(handle, inode2,
  5175. path2, lblk2 + len, 0);
  5176. if (IS_ERR(path2)) {
  5177. *erp = PTR_ERR(path2);
  5178. goto errout;
  5179. }
  5180. }
  5181. /* ext4_split_extent_at() may result in leaf extent split,
  5182. * path must to be revalidated. */
  5183. if (split)
  5184. continue;
  5185. BUG_ON(e2_len != e1_len);
  5186. *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
  5187. if (unlikely(*erp))
  5188. goto errout;
  5189. *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
  5190. if (unlikely(*erp))
  5191. goto errout;
  5192. /* Both extents are fully inside boundaries. Swap it now */
  5193. tmp_ex = *ex1;
  5194. ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
  5195. ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
  5196. ex1->ee_len = cpu_to_le16(e2_len);
  5197. ex2->ee_len = cpu_to_le16(e1_len);
  5198. if (unwritten)
  5199. ext4_ext_mark_unwritten(ex2);
  5200. if (ext4_ext_is_unwritten(&tmp_ex))
  5201. ext4_ext_mark_unwritten(ex1);
  5202. ext4_ext_try_to_merge(handle, inode2, path2, ex2);
  5203. ext4_ext_try_to_merge(handle, inode1, path1, ex1);
  5204. *erp = ext4_ext_dirty(handle, inode2, path2 +
  5205. path2->p_depth);
  5206. if (unlikely(*erp))
  5207. goto errout;
  5208. *erp = ext4_ext_dirty(handle, inode1, path1 +
  5209. path1->p_depth);
  5210. /*
  5211. * Looks scarry ah..? second inode already points to new blocks,
  5212. * and it was successfully dirtied. But luckily error may happen
  5213. * only due to journal error, so full transaction will be
  5214. * aborted anyway.
  5215. */
  5216. if (unlikely(*erp))
  5217. goto errout;
  5218. lblk1 += len;
  5219. lblk2 += len;
  5220. replaced_count += len;
  5221. count -= len;
  5222. }
  5223. errout:
  5224. ext4_free_ext_path(path1);
  5225. ext4_free_ext_path(path2);
  5226. return replaced_count;
  5227. }
  5228. /*
  5229. * ext4_clu_mapped - determine whether any block in a logical cluster has
  5230. * been mapped to a physical cluster
  5231. *
  5232. * @inode - file containing the logical cluster
  5233. * @lclu - logical cluster of interest
  5234. *
  5235. * Returns 1 if any block in the logical cluster is mapped, signifying
  5236. * that a physical cluster has been allocated for it. Otherwise,
  5237. * returns 0. Can also return negative error codes. Derived from
  5238. * ext4_ext_map_blocks().
  5239. */
  5240. int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
  5241. {
  5242. struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
  5243. struct ext4_ext_path *path;
  5244. int depth, mapped = 0, err = 0;
  5245. struct ext4_extent *extent;
  5246. ext4_lblk_t first_lblk, first_lclu, last_lclu;
  5247. /*
  5248. * if data can be stored inline, the logical cluster isn't
  5249. * mapped - no physical clusters have been allocated, and the
  5250. * file has no extents
  5251. */
  5252. if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) ||
  5253. ext4_has_inline_data(inode))
  5254. return 0;
  5255. /* search for the extent closest to the first block in the cluster */
  5256. path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
  5257. if (IS_ERR(path))
  5258. return PTR_ERR(path);
  5259. depth = ext_depth(inode);
  5260. /*
  5261. * A consistent leaf must not be empty. This situation is possible,
  5262. * though, _during_ tree modification, and it's why an assert can't
  5263. * be put in ext4_find_extent().
  5264. */
  5265. if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
  5266. EXT4_ERROR_INODE(inode,
  5267. "bad extent address - lblock: %lu, depth: %d, pblock: %lld",
  5268. (unsigned long) EXT4_C2B(sbi, lclu),
  5269. depth, path[depth].p_block);
  5270. err = -EFSCORRUPTED;
  5271. goto out;
  5272. }
  5273. extent = path[depth].p_ext;
  5274. /* can't be mapped if the extent tree is empty */
  5275. if (extent == NULL)
  5276. goto out;
  5277. first_lblk = le32_to_cpu(extent->ee_block);
  5278. first_lclu = EXT4_B2C(sbi, first_lblk);
  5279. /*
  5280. * Three possible outcomes at this point - found extent spanning
  5281. * the target cluster, to the left of the target cluster, or to the
  5282. * right of the target cluster. The first two cases are handled here.
  5283. * The last case indicates the target cluster is not mapped.
  5284. */
  5285. if (lclu >= first_lclu) {
  5286. last_lclu = EXT4_B2C(sbi, first_lblk +
  5287. ext4_ext_get_actual_len(extent) - 1);
  5288. if (lclu <= last_lclu) {
  5289. mapped = 1;
  5290. } else {
  5291. first_lblk = ext4_ext_next_allocated_block(path);
  5292. first_lclu = EXT4_B2C(sbi, first_lblk);
  5293. if (lclu == first_lclu)
  5294. mapped = 1;
  5295. }
  5296. }
  5297. out:
  5298. ext4_free_ext_path(path);
  5299. return err ? err : mapped;
  5300. }
  5301. /*
  5302. * Updates physical block address and unwritten status of extent
  5303. * starting at lblk start and of len. If such an extent doesn't exist,
  5304. * this function splits the extent tree appropriately to create an
  5305. * extent like this. This function is called in the fast commit
  5306. * replay path. Returns 0 on success and error on failure.
  5307. */
  5308. int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
  5309. int len, int unwritten, ext4_fsblk_t pblk)
  5310. {
  5311. struct ext4_ext_path *path;
  5312. struct ext4_extent *ex;
  5313. int ret;
  5314. path = ext4_find_extent(inode, start, NULL, 0);
  5315. if (IS_ERR(path))
  5316. return PTR_ERR(path);
  5317. ex = path[path->p_depth].p_ext;
  5318. if (!ex) {
  5319. ret = -EFSCORRUPTED;
  5320. goto out;
  5321. }
  5322. if (le32_to_cpu(ex->ee_block) != start ||
  5323. ext4_ext_get_actual_len(ex) != len) {
  5324. /* We need to split this extent to match our extent first */
  5325. down_write(&EXT4_I(inode)->i_data_sem);
  5326. path = ext4_force_split_extent_at(NULL, inode, path, start, 1);
  5327. up_write(&EXT4_I(inode)->i_data_sem);
  5328. if (IS_ERR(path)) {
  5329. ret = PTR_ERR(path);
  5330. goto out;
  5331. }
  5332. path = ext4_find_extent(inode, start, path, 0);
  5333. if (IS_ERR(path))
  5334. return PTR_ERR(path);
  5335. ex = path[path->p_depth].p_ext;
  5336. WARN_ON(le32_to_cpu(ex->ee_block) != start);
  5337. if (ext4_ext_get_actual_len(ex) != len) {
  5338. down_write(&EXT4_I(inode)->i_data_sem);
  5339. path = ext4_force_split_extent_at(NULL, inode, path,
  5340. start + len, 1);
  5341. up_write(&EXT4_I(inode)->i_data_sem);
  5342. if (IS_ERR(path)) {
  5343. ret = PTR_ERR(path);
  5344. goto out;
  5345. }
  5346. path = ext4_find_extent(inode, start, path, 0);
  5347. if (IS_ERR(path))
  5348. return PTR_ERR(path);
  5349. ex = path[path->p_depth].p_ext;
  5350. }
  5351. }
  5352. if (unwritten)
  5353. ext4_ext_mark_unwritten(ex);
  5354. else
  5355. ext4_ext_mark_initialized(ex);
  5356. ext4_ext_store_pblock(ex, pblk);
  5357. down_write(&EXT4_I(inode)->i_data_sem);
  5358. ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
  5359. up_write(&EXT4_I(inode)->i_data_sem);
  5360. out:
  5361. ext4_free_ext_path(path);
  5362. ext4_mark_inode_dirty(NULL, inode);
  5363. return ret;
  5364. }
  5365. /* Try to shrink the extent tree */
  5366. void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end)
  5367. {
  5368. struct ext4_ext_path *path = NULL;
  5369. struct ext4_extent *ex;
  5370. ext4_lblk_t old_cur, cur = 0;
  5371. while (cur < end) {
  5372. path = ext4_find_extent(inode, cur, NULL, 0);
  5373. if (IS_ERR(path))
  5374. return;
  5375. ex = path[path->p_depth].p_ext;
  5376. if (!ex) {
  5377. ext4_free_ext_path(path);
  5378. ext4_mark_inode_dirty(NULL, inode);
  5379. return;
  5380. }
  5381. old_cur = cur;
  5382. cur = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
  5383. if (cur <= old_cur)
  5384. cur = old_cur + 1;
  5385. ext4_ext_try_to_merge(NULL, inode, path, ex);
  5386. down_write(&EXT4_I(inode)->i_data_sem);
  5387. ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
  5388. up_write(&EXT4_I(inode)->i_data_sem);
  5389. ext4_mark_inode_dirty(NULL, inode);
  5390. ext4_free_ext_path(path);
  5391. }
  5392. }
  5393. /* Check if *cur is a hole and if it is, skip it */
  5394. static int skip_hole(struct inode *inode, ext4_lblk_t *cur)
  5395. {
  5396. int ret;
  5397. struct ext4_map_blocks map;
  5398. map.m_lblk = *cur;
  5399. map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur;
  5400. ret = ext4_map_blocks(NULL, inode, &map, 0);
  5401. if (ret < 0)
  5402. return ret;
  5403. if (ret != 0)
  5404. return 0;
  5405. *cur = *cur + map.m_len;
  5406. return 0;
  5407. }
  5408. /* Count number of blocks used by this inode and update i_blocks */
  5409. int ext4_ext_replay_set_iblocks(struct inode *inode)
  5410. {
  5411. struct ext4_ext_path *path = NULL, *path2 = NULL;
  5412. struct ext4_extent *ex;
  5413. ext4_lblk_t cur = 0, end;
  5414. int numblks = 0, i, ret = 0;
  5415. ext4_fsblk_t cmp1, cmp2;
  5416. struct ext4_map_blocks map;
  5417. /* Determin the size of the file first */
  5418. path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
  5419. EXT4_EX_NOCACHE);
  5420. if (IS_ERR(path))
  5421. return PTR_ERR(path);
  5422. ex = path[path->p_depth].p_ext;
  5423. if (!ex)
  5424. goto out;
  5425. end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
  5426. /* Count the number of data blocks */
  5427. cur = 0;
  5428. while (cur < end) {
  5429. map.m_lblk = cur;
  5430. map.m_len = end - cur;
  5431. ret = ext4_map_blocks(NULL, inode, &map, 0);
  5432. if (ret < 0)
  5433. break;
  5434. if (ret > 0)
  5435. numblks += ret;
  5436. cur = cur + map.m_len;
  5437. }
  5438. /*
  5439. * Count the number of extent tree blocks. We do it by looking up
  5440. * two successive extents and determining the difference between
  5441. * their paths. When path is different for 2 successive extents
  5442. * we compare the blocks in the path at each level and increment
  5443. * iblocks by total number of differences found.
  5444. */
  5445. cur = 0;
  5446. ret = skip_hole(inode, &cur);
  5447. if (ret < 0)
  5448. goto out;
  5449. path = ext4_find_extent(inode, cur, path, 0);
  5450. if (IS_ERR(path))
  5451. goto out;
  5452. numblks += path->p_depth;
  5453. while (cur < end) {
  5454. path = ext4_find_extent(inode, cur, path, 0);
  5455. if (IS_ERR(path))
  5456. break;
  5457. ex = path[path->p_depth].p_ext;
  5458. if (!ex)
  5459. goto cleanup;
  5460. cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
  5461. ext4_ext_get_actual_len(ex));
  5462. ret = skip_hole(inode, &cur);
  5463. if (ret < 0)
  5464. break;
  5465. path2 = ext4_find_extent(inode, cur, path2, 0);
  5466. if (IS_ERR(path2))
  5467. break;
  5468. for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
  5469. cmp1 = cmp2 = 0;
  5470. if (i <= path->p_depth)
  5471. cmp1 = path[i].p_bh ?
  5472. path[i].p_bh->b_blocknr : 0;
  5473. if (i <= path2->p_depth)
  5474. cmp2 = path2[i].p_bh ?
  5475. path2[i].p_bh->b_blocknr : 0;
  5476. if (cmp1 != cmp2 && cmp2 != 0)
  5477. numblks++;
  5478. }
  5479. }
  5480. out:
  5481. inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9);
  5482. ext4_mark_inode_dirty(NULL, inode);
  5483. cleanup:
  5484. ext4_free_ext_path(path);
  5485. ext4_free_ext_path(path2);
  5486. return 0;
  5487. }
  5488. int ext4_ext_clear_bb(struct inode *inode)
  5489. {
  5490. struct ext4_ext_path *path = NULL;
  5491. struct ext4_extent *ex;
  5492. ext4_lblk_t cur = 0, end;
  5493. int j, ret = 0;
  5494. struct ext4_map_blocks map;
  5495. if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
  5496. return 0;
  5497. /* Determin the size of the file first */
  5498. path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
  5499. EXT4_EX_NOCACHE);
  5500. if (IS_ERR(path))
  5501. return PTR_ERR(path);
  5502. ex = path[path->p_depth].p_ext;
  5503. if (!ex)
  5504. goto out;
  5505. end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
  5506. cur = 0;
  5507. while (cur < end) {
  5508. map.m_lblk = cur;
  5509. map.m_len = end - cur;
  5510. ret = ext4_map_blocks(NULL, inode, &map, 0);
  5511. if (ret < 0)
  5512. break;
  5513. if (ret > 0) {
  5514. path = ext4_find_extent(inode, map.m_lblk, path, 0);
  5515. if (!IS_ERR(path)) {
  5516. for (j = 0; j < path->p_depth; j++) {
  5517. ext4_mb_mark_bb(inode->i_sb,
  5518. path[j].p_block, 1, false);
  5519. ext4_fc_record_regions(inode->i_sb, inode->i_ino,
  5520. 0, path[j].p_block, 1, 1);
  5521. }
  5522. } else {
  5523. path = NULL;
  5524. }
  5525. ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, false);
  5526. ext4_fc_record_regions(inode->i_sb, inode->i_ino,
  5527. map.m_lblk, map.m_pblk, map.m_len, 1);
  5528. }
  5529. cur = cur + map.m_len;
  5530. }
  5531. out:
  5532. ext4_free_ext_path(path);
  5533. return 0;
  5534. }
  5535. #if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
  5536. int ext4_ext_space_root_idx_test(struct inode *inode, int check)
  5537. {
  5538. return ext4_ext_space_root_idx(inode, check);
  5539. }
  5540. EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_space_root_idx_test);
  5541. struct ext4_ext_path *ext4_split_convert_extents_test(handle_t *handle,
  5542. struct inode *inode, struct ext4_map_blocks *map,
  5543. struct ext4_ext_path *path, int flags,
  5544. unsigned int *allocated)
  5545. {
  5546. return ext4_split_convert_extents(handle, inode, map, path,
  5547. flags, allocated);
  5548. }
  5549. EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_split_convert_extents_test);
  5550. EXPORT_SYMBOL_FOR_EXT4_TEST(__ext4_ext_dirty);
  5551. EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_zeroout);
  5552. EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_register_shrinker);
  5553. EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_unregister_shrinker);
  5554. EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_create_blocks);
  5555. EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_init_tree);
  5556. EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_lookup_extent);
  5557. EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_insert_extent);
  5558. EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_insert_extent);
  5559. EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_find_extent);
  5560. EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_issue_zeroout);
  5561. EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_query_blocks);
  5562. #endif