send.c 217 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2012 Alexander Block. All rights reserved.
  4. */
  5. #include <linux/bsearch.h>
  6. #include <linux/falloc.h>
  7. #include <linux/fs.h>
  8. #include <linux/file.h>
  9. #include <linux/sort.h>
  10. #include <linux/mount.h>
  11. #include <linux/xattr.h>
  12. #include <linux/posix_acl_xattr.h>
  13. #include <linux/radix-tree.h>
  14. #include <linux/vmalloc.h>
  15. #include <linux/string.h>
  16. #include <linux/compat.h>
  17. #include <linux/crc32c.h>
  18. #include <linux/fsverity.h>
  19. #include "send.h"
  20. #include "ctree.h"
  21. #include "backref.h"
  22. #include "locking.h"
  23. #include "disk-io.h"
  24. #include "btrfs_inode.h"
  25. #include "transaction.h"
  26. #include "compression.h"
  27. #include "print-tree.h"
  28. #include "accessors.h"
  29. #include "dir-item.h"
  30. #include "file-item.h"
  31. #include "ioctl.h"
  32. #include "verity.h"
  33. #include "lru_cache.h"
  34. /*
  35. * Maximum number of references an extent can have in order for us to attempt to
  36. * issue clone operations instead of write operations. This currently exists to
  37. * avoid hitting limitations of the backreference walking code (taking a lot of
  38. * time and using too much memory for extents with large number of references).
  39. */
  40. #define SEND_MAX_EXTENT_REFS 1024
  41. /*
  42. * A fs_path is a helper to dynamically build path names with unknown size.
  43. * It reallocates the internal buffer on demand.
  44. * It allows fast adding of path elements on the right side (normal path) and
  45. * fast adding to the left side (reversed path). A reversed path can also be
  46. * unreversed if needed.
  47. *
  48. * The definition of struct fs_path relies on -fms-extensions to allow
  49. * including a tagged struct as an anonymous member.
  50. */
  51. struct __fs_path {
  52. char *start;
  53. char *end;
  54. char *buf;
  55. unsigned short buf_len:15;
  56. unsigned short reversed:1;
  57. };
  58. static_assert(sizeof(struct __fs_path) < 256);
  59. struct fs_path {
  60. struct __fs_path;
  61. /*
  62. * Average path length does not exceed 200 bytes, we'll have
  63. * better packing in the slab and higher chance to satisfy
  64. * an allocation later during send.
  65. */
  66. char inline_buf[256 - sizeof(struct __fs_path)];
  67. };
  68. #define FS_PATH_INLINE_SIZE \
  69. sizeof_field(struct fs_path, inline_buf)
  70. /* reused for each extent */
  71. struct clone_root {
  72. struct btrfs_root *root;
  73. u64 ino;
  74. u64 offset;
  75. u64 num_bytes;
  76. bool found_ref;
  77. };
  78. #define SEND_MAX_NAME_CACHE_SIZE 256
  79. /*
  80. * Limit the root_ids array of struct backref_cache_entry to 17 elements.
  81. * This makes the size of a cache entry to be exactly 192 bytes on x86_64, which
  82. * can be satisfied from the kmalloc-192 slab, without wasting any space.
  83. * The most common case is to have a single root for cloning, which corresponds
  84. * to the send root. Having the user specify more than 16 clone roots is not
  85. * common, and in such rare cases we simply don't use caching if the number of
  86. * cloning roots that lead down to a leaf is more than 17.
  87. */
  88. #define SEND_MAX_BACKREF_CACHE_ROOTS 17
  89. /*
  90. * Max number of entries in the cache.
  91. * With SEND_MAX_BACKREF_CACHE_ROOTS as 17, the size in bytes, excluding
  92. * maple tree's internal nodes, is 24K.
  93. */
  94. #define SEND_MAX_BACKREF_CACHE_SIZE 128
  95. /*
  96. * A backref cache entry maps a leaf to a list of IDs of roots from which the
  97. * leaf is accessible and we can use for clone operations.
  98. * With SEND_MAX_BACKREF_CACHE_ROOTS as 12, each cache entry is 128 bytes (on
  99. * x86_64).
  100. */
  101. struct backref_cache_entry {
  102. struct btrfs_lru_cache_entry entry;
  103. u64 root_ids[SEND_MAX_BACKREF_CACHE_ROOTS];
  104. /* Number of valid elements in the root_ids array. */
  105. int num_roots;
  106. };
  107. /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */
  108. static_assert(offsetof(struct backref_cache_entry, entry) == 0);
  109. /*
  110. * Max number of entries in the cache that stores directories that were already
  111. * created. The cache uses raw struct btrfs_lru_cache_entry entries, so it uses
  112. * at most 4096 bytes - sizeof(struct btrfs_lru_cache_entry) is 48 bytes, but
  113. * the kmalloc-64 slab is used, so we get 4096 bytes (64 bytes * 64).
  114. */
  115. #define SEND_MAX_DIR_CREATED_CACHE_SIZE 64
  116. /*
  117. * Max number of entries in the cache that stores directories that were already
  118. * created. The cache uses raw struct btrfs_lru_cache_entry entries, so it uses
  119. * at most 4096 bytes - sizeof(struct btrfs_lru_cache_entry) is 48 bytes, but
  120. * the kmalloc-64 slab is used, so we get 4096 bytes (64 bytes * 64).
  121. */
  122. #define SEND_MAX_DIR_UTIMES_CACHE_SIZE 64
  123. struct send_ctx {
  124. struct file *send_filp;
  125. loff_t send_off;
  126. char *send_buf;
  127. u32 send_size;
  128. u32 send_max_size;
  129. /*
  130. * Whether BTRFS_SEND_A_DATA attribute was already added to current
  131. * command (since protocol v2, data must be the last attribute).
  132. */
  133. bool put_data;
  134. struct page **send_buf_pages;
  135. u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
  136. /* Protocol version compatibility requested */
  137. u32 proto;
  138. struct btrfs_root *send_root;
  139. struct btrfs_root *parent_root;
  140. struct clone_root *clone_roots;
  141. int clone_roots_cnt;
  142. /* current state of the compare_tree call */
  143. struct btrfs_path *left_path;
  144. struct btrfs_path *right_path;
  145. struct btrfs_key *cmp_key;
  146. /*
  147. * Keep track of the generation of the last transaction that was used
  148. * for relocating a block group. This is periodically checked in order
  149. * to detect if a relocation happened since the last check, so that we
  150. * don't operate on stale extent buffers for nodes (level >= 1) or on
  151. * stale disk_bytenr values of file extent items.
  152. */
  153. u64 last_reloc_trans;
  154. /*
  155. * infos of the currently processed inode. In case of deleted inodes,
  156. * these are the values from the deleted inode.
  157. */
  158. u64 cur_ino;
  159. u64 cur_inode_gen;
  160. u64 cur_inode_size;
  161. u64 cur_inode_mode;
  162. u64 cur_inode_rdev;
  163. u64 cur_inode_last_extent;
  164. u64 cur_inode_next_write_offset;
  165. bool cur_inode_new;
  166. bool cur_inode_new_gen;
  167. bool cur_inode_deleted;
  168. bool ignore_cur_inode;
  169. bool cur_inode_needs_verity;
  170. void *verity_descriptor;
  171. u64 send_progress;
  172. struct list_head new_refs;
  173. struct list_head deleted_refs;
  174. struct btrfs_lru_cache name_cache;
  175. /*
  176. * The inode we are currently processing. It's not NULL only when we
  177. * need to issue write commands for data extents from this inode.
  178. */
  179. struct inode *cur_inode;
  180. struct file_ra_state ra;
  181. u64 page_cache_clear_start;
  182. bool clean_page_cache;
  183. /*
  184. * We process inodes by their increasing order, so if before an
  185. * incremental send we reverse the parent/child relationship of
  186. * directories such that a directory with a lower inode number was
  187. * the parent of a directory with a higher inode number, and the one
  188. * becoming the new parent got renamed too, we can't rename/move the
  189. * directory with lower inode number when we finish processing it - we
  190. * must process the directory with higher inode number first, then
  191. * rename/move it and then rename/move the directory with lower inode
  192. * number. Example follows.
  193. *
  194. * Tree state when the first send was performed:
  195. *
  196. * .
  197. * |-- a (ino 257)
  198. * |-- b (ino 258)
  199. * |
  200. * |
  201. * |-- c (ino 259)
  202. * | |-- d (ino 260)
  203. * |
  204. * |-- c2 (ino 261)
  205. *
  206. * Tree state when the second (incremental) send is performed:
  207. *
  208. * .
  209. * |-- a (ino 257)
  210. * |-- b (ino 258)
  211. * |-- c2 (ino 261)
  212. * |-- d2 (ino 260)
  213. * |-- cc (ino 259)
  214. *
  215. * The sequence of steps that lead to the second state was:
  216. *
  217. * mv /a/b/c/d /a/b/c2/d2
  218. * mv /a/b/c /a/b/c2/d2/cc
  219. *
  220. * "c" has lower inode number, but we can't move it (2nd mv operation)
  221. * before we move "d", which has higher inode number.
  222. *
  223. * So we just memorize which move/rename operations must be performed
  224. * later when their respective parent is processed and moved/renamed.
  225. */
  226. /* Indexed by parent directory inode number. */
  227. struct rb_root pending_dir_moves;
  228. /*
  229. * Reverse index, indexed by the inode number of a directory that
  230. * is waiting for the move/rename of its immediate parent before its
  231. * own move/rename can be performed.
  232. */
  233. struct rb_root waiting_dir_moves;
  234. /*
  235. * A directory that is going to be rm'ed might have a child directory
  236. * which is in the pending directory moves index above. In this case,
  237. * the directory can only be removed after the move/rename of its child
  238. * is performed. Example:
  239. *
  240. * Parent snapshot:
  241. *
  242. * . (ino 256)
  243. * |-- a/ (ino 257)
  244. * |-- b/ (ino 258)
  245. * |-- c/ (ino 259)
  246. * | |-- x/ (ino 260)
  247. * |
  248. * |-- y/ (ino 261)
  249. *
  250. * Send snapshot:
  251. *
  252. * . (ino 256)
  253. * |-- a/ (ino 257)
  254. * |-- b/ (ino 258)
  255. * |-- YY/ (ino 261)
  256. * |-- x/ (ino 260)
  257. *
  258. * Sequence of steps that lead to the send snapshot:
  259. * rm -f /a/b/c/foo.txt
  260. * mv /a/b/y /a/b/YY
  261. * mv /a/b/c/x /a/b/YY
  262. * rmdir /a/b/c
  263. *
  264. * When the child is processed, its move/rename is delayed until its
  265. * parent is processed (as explained above), but all other operations
  266. * like update utimes, chown, chgrp, etc, are performed and the paths
  267. * that it uses for those operations must use the orphanized name of
  268. * its parent (the directory we're going to rm later), so we need to
  269. * memorize that name.
  270. *
  271. * Indexed by the inode number of the directory to be deleted.
  272. */
  273. struct rb_root orphan_dirs;
  274. struct rb_root rbtree_new_refs;
  275. struct rb_root rbtree_deleted_refs;
  276. struct btrfs_lru_cache backref_cache;
  277. u64 backref_cache_last_reloc_trans;
  278. struct btrfs_lru_cache dir_created_cache;
  279. struct btrfs_lru_cache dir_utimes_cache;
  280. struct fs_path cur_inode_path;
  281. };
  282. struct pending_dir_move {
  283. struct rb_node node;
  284. struct list_head list;
  285. u64 parent_ino;
  286. u64 ino;
  287. u64 gen;
  288. struct list_head update_refs;
  289. };
  290. struct waiting_dir_move {
  291. struct rb_node node;
  292. u64 ino;
  293. /*
  294. * There might be some directory that could not be removed because it
  295. * was waiting for this directory inode to be moved first. Therefore
  296. * after this directory is moved, we can try to rmdir the ino rmdir_ino.
  297. */
  298. u64 rmdir_ino;
  299. u64 rmdir_gen;
  300. bool orphanized;
  301. };
  302. struct orphan_dir_info {
  303. struct rb_node node;
  304. u64 ino;
  305. u64 gen;
  306. u64 last_dir_index_offset;
  307. u64 dir_high_seq_ino;
  308. };
  309. struct name_cache_entry {
  310. /*
  311. * The key in the entry is an inode number, and the generation matches
  312. * the inode's generation.
  313. */
  314. struct btrfs_lru_cache_entry entry;
  315. u64 parent_ino;
  316. u64 parent_gen;
  317. int ret;
  318. int need_later_update;
  319. /* Name length without NUL terminator. */
  320. int name_len;
  321. /* Not NUL terminated. */
  322. char name[] __counted_by(name_len) __nonstring;
  323. };
  324. /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */
  325. static_assert(offsetof(struct name_cache_entry, entry) == 0);
  326. #define ADVANCE 1
  327. #define ADVANCE_ONLY_NEXT -1
  328. enum btrfs_compare_tree_result {
  329. BTRFS_COMPARE_TREE_NEW,
  330. BTRFS_COMPARE_TREE_DELETED,
  331. BTRFS_COMPARE_TREE_CHANGED,
  332. BTRFS_COMPARE_TREE_SAME,
  333. };
  334. __cold
  335. static void inconsistent_snapshot_error(struct send_ctx *sctx,
  336. enum btrfs_compare_tree_result result,
  337. const char *what)
  338. {
  339. const char *result_string;
  340. switch (result) {
  341. case BTRFS_COMPARE_TREE_NEW:
  342. result_string = "new";
  343. break;
  344. case BTRFS_COMPARE_TREE_DELETED:
  345. result_string = "deleted";
  346. break;
  347. case BTRFS_COMPARE_TREE_CHANGED:
  348. result_string = "updated";
  349. break;
  350. case BTRFS_COMPARE_TREE_SAME:
  351. DEBUG_WARN("no change between trees");
  352. result_string = "unchanged";
  353. break;
  354. default:
  355. DEBUG_WARN("unexpected comparison result %d", result);
  356. result_string = "unexpected";
  357. }
  358. btrfs_err(sctx->send_root->fs_info,
  359. "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
  360. result_string, what, sctx->cmp_key->objectid,
  361. btrfs_root_id(sctx->send_root),
  362. (sctx->parent_root ? btrfs_root_id(sctx->parent_root) : 0));
  363. }
  364. __maybe_unused
  365. static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd)
  366. {
  367. switch (sctx->proto) {
  368. case 1: return cmd <= BTRFS_SEND_C_MAX_V1;
  369. case 2: return cmd <= BTRFS_SEND_C_MAX_V2;
  370. case 3: return cmd <= BTRFS_SEND_C_MAX_V3;
  371. default: return false;
  372. }
  373. }
  374. static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
  375. static struct waiting_dir_move *
  376. get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
  377. static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen);
  378. static int need_send_hole(struct send_ctx *sctx)
  379. {
  380. return (sctx->parent_root && !sctx->cur_inode_new &&
  381. !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
  382. S_ISREG(sctx->cur_inode_mode));
  383. }
  384. static void fs_path_reset(struct fs_path *p)
  385. {
  386. if (p->reversed)
  387. p->start = p->buf + p->buf_len - 1;
  388. else
  389. p->start = p->buf;
  390. p->end = p->start;
  391. *p->start = 0;
  392. }
  393. static void init_path(struct fs_path *p)
  394. {
  395. p->reversed = 0;
  396. p->buf = p->inline_buf;
  397. p->buf_len = FS_PATH_INLINE_SIZE;
  398. fs_path_reset(p);
  399. }
  400. static struct fs_path *fs_path_alloc(void)
  401. {
  402. struct fs_path *p;
  403. p = kmalloc_obj(*p);
  404. if (!p)
  405. return NULL;
  406. init_path(p);
  407. return p;
  408. }
  409. static struct fs_path *fs_path_alloc_reversed(void)
  410. {
  411. struct fs_path *p;
  412. p = fs_path_alloc();
  413. if (!p)
  414. return NULL;
  415. p->reversed = 1;
  416. fs_path_reset(p);
  417. return p;
  418. }
  419. static void fs_path_free(struct fs_path *p)
  420. {
  421. if (!p)
  422. return;
  423. if (p->buf != p->inline_buf)
  424. kfree(p->buf);
  425. kfree(p);
  426. }
  427. static inline int fs_path_len(const struct fs_path *p)
  428. {
  429. return p->end - p->start;
  430. }
  431. static int fs_path_ensure_buf(struct fs_path *p, int len)
  432. {
  433. char *tmp_buf;
  434. int path_len;
  435. int old_buf_len;
  436. len++;
  437. if (p->buf_len >= len)
  438. return 0;
  439. if (WARN_ON(len > PATH_MAX))
  440. return -ENAMETOOLONG;
  441. path_len = fs_path_len(p);
  442. old_buf_len = p->buf_len;
  443. /*
  444. * Allocate to the next largest kmalloc bucket size, to let
  445. * the fast path happen most of the time.
  446. */
  447. len = kmalloc_size_roundup(len);
  448. /*
  449. * First time the inline_buf does not suffice
  450. */
  451. if (p->buf == p->inline_buf) {
  452. tmp_buf = kmalloc(len, GFP_KERNEL);
  453. if (tmp_buf)
  454. memcpy(tmp_buf, p->buf, old_buf_len);
  455. } else {
  456. tmp_buf = krealloc(p->buf, len, GFP_KERNEL);
  457. }
  458. if (!tmp_buf)
  459. return -ENOMEM;
  460. p->buf = tmp_buf;
  461. p->buf_len = len;
  462. if (p->reversed) {
  463. tmp_buf = p->buf + old_buf_len - path_len - 1;
  464. p->end = p->buf + p->buf_len - 1;
  465. p->start = p->end - path_len;
  466. memmove(p->start, tmp_buf, path_len + 1);
  467. } else {
  468. p->start = p->buf;
  469. p->end = p->start + path_len;
  470. }
  471. return 0;
  472. }
  473. static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
  474. char **prepared)
  475. {
  476. int ret;
  477. int new_len;
  478. new_len = fs_path_len(p) + name_len;
  479. if (p->start != p->end)
  480. new_len++;
  481. ret = fs_path_ensure_buf(p, new_len);
  482. if (ret < 0)
  483. return ret;
  484. if (p->reversed) {
  485. if (p->start != p->end)
  486. *--p->start = '/';
  487. p->start -= name_len;
  488. *prepared = p->start;
  489. } else {
  490. if (p->start != p->end)
  491. *p->end++ = '/';
  492. *prepared = p->end;
  493. p->end += name_len;
  494. *p->end = 0;
  495. }
  496. return 0;
  497. }
  498. static int fs_path_add(struct fs_path *p, const char *name, int name_len)
  499. {
  500. int ret;
  501. char *prepared;
  502. ret = fs_path_prepare_for_add(p, name_len, &prepared);
  503. if (ret < 0)
  504. return ret;
  505. memcpy(prepared, name, name_len);
  506. return 0;
  507. }
  508. static inline int fs_path_add_path(struct fs_path *p, const struct fs_path *p2)
  509. {
  510. return fs_path_add(p, p2->start, fs_path_len(p2));
  511. }
  512. static int fs_path_add_from_extent_buffer(struct fs_path *p,
  513. struct extent_buffer *eb,
  514. unsigned long off, int len)
  515. {
  516. int ret;
  517. char *prepared;
  518. ret = fs_path_prepare_for_add(p, len, &prepared);
  519. if (ret < 0)
  520. return ret;
  521. read_extent_buffer(eb, prepared, off, len);
  522. return 0;
  523. }
  524. static int fs_path_copy(struct fs_path *p, struct fs_path *from)
  525. {
  526. p->reversed = from->reversed;
  527. fs_path_reset(p);
  528. return fs_path_add_path(p, from);
  529. }
  530. static void fs_path_unreverse(struct fs_path *p)
  531. {
  532. char *tmp;
  533. int len;
  534. if (!p->reversed)
  535. return;
  536. tmp = p->start;
  537. len = fs_path_len(p);
  538. p->start = p->buf;
  539. p->end = p->start + len;
  540. memmove(p->start, tmp, len + 1);
  541. p->reversed = 0;
  542. }
  543. static inline bool is_current_inode_path(const struct send_ctx *sctx,
  544. const struct fs_path *path)
  545. {
  546. const struct fs_path *cur = &sctx->cur_inode_path;
  547. return (strncmp(path->start, cur->start, fs_path_len(cur)) == 0);
  548. }
  549. static struct btrfs_path *alloc_path_for_send(void)
  550. {
  551. struct btrfs_path *path;
  552. path = btrfs_alloc_path();
  553. if (!path)
  554. return NULL;
  555. path->search_commit_root = true;
  556. path->skip_locking = true;
  557. path->need_commit_sem = true;
  558. return path;
  559. }
  560. static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
  561. {
  562. int ret;
  563. u32 pos = 0;
  564. while (pos < len) {
  565. ret = kernel_write(filp, buf + pos, len - pos, off);
  566. if (ret < 0)
  567. return ret;
  568. if (unlikely(ret == 0))
  569. return -EIO;
  570. pos += ret;
  571. }
  572. return 0;
  573. }
  574. static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
  575. {
  576. struct btrfs_tlv_header *hdr;
  577. int total_len = sizeof(*hdr) + len;
  578. int left = sctx->send_max_size - sctx->send_size;
  579. if (WARN_ON_ONCE(sctx->put_data))
  580. return -EINVAL;
  581. if (unlikely(left < total_len))
  582. return -EOVERFLOW;
  583. hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
  584. put_unaligned_le16(attr, &hdr->tlv_type);
  585. put_unaligned_le16(len, &hdr->tlv_len);
  586. memcpy(hdr + 1, data, len);
  587. sctx->send_size += total_len;
  588. return 0;
  589. }
  590. #define TLV_PUT_DEFINE_INT(bits) \
  591. static int tlv_put_u##bits(struct send_ctx *sctx, \
  592. u##bits attr, u##bits value) \
  593. { \
  594. __le##bits __tmp = cpu_to_le##bits(value); \
  595. return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \
  596. }
  597. TLV_PUT_DEFINE_INT(8)
  598. TLV_PUT_DEFINE_INT(32)
  599. TLV_PUT_DEFINE_INT(64)
  600. static int tlv_put_string(struct send_ctx *sctx, u16 attr,
  601. const char *str, int len)
  602. {
  603. if (len == -1)
  604. len = strlen(str);
  605. return tlv_put(sctx, attr, str, len);
  606. }
  607. static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
  608. const u8 *uuid)
  609. {
  610. return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
  611. }
  612. static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
  613. struct extent_buffer *eb,
  614. struct btrfs_timespec *ts)
  615. {
  616. struct btrfs_timespec bts;
  617. read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
  618. return tlv_put(sctx, attr, &bts, sizeof(bts));
  619. }
  620. #define TLV_PUT(sctx, attrtype, data, attrlen) \
  621. do { \
  622. ret = tlv_put(sctx, attrtype, data, attrlen); \
  623. if (ret < 0) \
  624. goto tlv_put_failure; \
  625. } while (0)
  626. #define TLV_PUT_INT(sctx, attrtype, bits, value) \
  627. do { \
  628. ret = tlv_put_u##bits(sctx, attrtype, value); \
  629. if (ret < 0) \
  630. goto tlv_put_failure; \
  631. } while (0)
  632. #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
  633. #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
  634. #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
  635. #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
  636. #define TLV_PUT_STRING(sctx, attrtype, str, len) \
  637. do { \
  638. ret = tlv_put_string(sctx, attrtype, str, len); \
  639. if (ret < 0) \
  640. goto tlv_put_failure; \
  641. } while (0)
  642. #define TLV_PUT_PATH(sctx, attrtype, p) \
  643. do { \
  644. ret = tlv_put_string(sctx, attrtype, p->start, \
  645. fs_path_len((p))); \
  646. if (ret < 0) \
  647. goto tlv_put_failure; \
  648. } while(0)
  649. #define TLV_PUT_UUID(sctx, attrtype, uuid) \
  650. do { \
  651. ret = tlv_put_uuid(sctx, attrtype, uuid); \
  652. if (ret < 0) \
  653. goto tlv_put_failure; \
  654. } while (0)
  655. #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
  656. do { \
  657. ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
  658. if (ret < 0) \
  659. goto tlv_put_failure; \
  660. } while (0)
  661. static int send_header(struct send_ctx *sctx)
  662. {
  663. struct btrfs_stream_header hdr;
  664. strscpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
  665. hdr.version = cpu_to_le32(sctx->proto);
  666. return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
  667. &sctx->send_off);
  668. }
  669. /*
  670. * For each command/item we want to send to userspace, we call this function.
  671. */
  672. static int begin_cmd(struct send_ctx *sctx, int cmd)
  673. {
  674. struct btrfs_cmd_header *hdr;
  675. if (WARN_ON(!sctx->send_buf))
  676. return -EINVAL;
  677. if (unlikely(sctx->send_size != 0)) {
  678. btrfs_err(sctx->send_root->fs_info,
  679. "send: command header buffer not empty cmd %d offset %llu",
  680. cmd, sctx->send_off);
  681. return -EINVAL;
  682. }
  683. sctx->send_size += sizeof(*hdr);
  684. hdr = (struct btrfs_cmd_header *)sctx->send_buf;
  685. put_unaligned_le16(cmd, &hdr->cmd);
  686. return 0;
  687. }
  688. static int send_cmd(struct send_ctx *sctx)
  689. {
  690. int ret;
  691. struct btrfs_cmd_header *hdr;
  692. u32 crc;
  693. hdr = (struct btrfs_cmd_header *)sctx->send_buf;
  694. put_unaligned_le32(sctx->send_size - sizeof(*hdr), &hdr->len);
  695. put_unaligned_le32(0, &hdr->crc);
  696. crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
  697. put_unaligned_le32(crc, &hdr->crc);
  698. ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
  699. &sctx->send_off);
  700. sctx->send_size = 0;
  701. sctx->put_data = false;
  702. return ret;
  703. }
  704. /*
  705. * Sends a move instruction to user space
  706. */
  707. static int send_rename(struct send_ctx *sctx,
  708. struct fs_path *from, struct fs_path *to)
  709. {
  710. int ret;
  711. ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
  712. if (ret < 0)
  713. return ret;
  714. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
  715. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
  716. ret = send_cmd(sctx);
  717. tlv_put_failure:
  718. return ret;
  719. }
  720. /*
  721. * Sends a link instruction to user space
  722. */
  723. static int send_link(struct send_ctx *sctx,
  724. struct fs_path *path, struct fs_path *lnk)
  725. {
  726. int ret;
  727. ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
  728. if (ret < 0)
  729. return ret;
  730. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
  731. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
  732. ret = send_cmd(sctx);
  733. tlv_put_failure:
  734. return ret;
  735. }
  736. /*
  737. * Sends an unlink instruction to user space
  738. */
  739. static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
  740. {
  741. int ret;
  742. ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
  743. if (ret < 0)
  744. return ret;
  745. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
  746. ret = send_cmd(sctx);
  747. tlv_put_failure:
  748. return ret;
  749. }
  750. /*
  751. * Sends a rmdir instruction to user space
  752. */
  753. static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
  754. {
  755. int ret;
  756. ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
  757. if (ret < 0)
  758. return ret;
  759. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
  760. ret = send_cmd(sctx);
  761. tlv_put_failure:
  762. return ret;
  763. }
  764. struct btrfs_inode_info {
  765. u64 size;
  766. u64 gen;
  767. u64 mode;
  768. u64 uid;
  769. u64 gid;
  770. u64 rdev;
  771. u64 fileattr;
  772. u64 nlink;
  773. };
  774. /*
  775. * Helper function to retrieve some fields from an inode item.
  776. */
  777. static int get_inode_info(struct btrfs_root *root, u64 ino,
  778. struct btrfs_inode_info *info)
  779. {
  780. int ret;
  781. BTRFS_PATH_AUTO_FREE(path);
  782. struct btrfs_inode_item *ii;
  783. struct btrfs_key key;
  784. path = alloc_path_for_send();
  785. if (!path)
  786. return -ENOMEM;
  787. key.objectid = ino;
  788. key.type = BTRFS_INODE_ITEM_KEY;
  789. key.offset = 0;
  790. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  791. if (ret) {
  792. if (ret > 0)
  793. ret = -ENOENT;
  794. return ret;
  795. }
  796. if (!info)
  797. return 0;
  798. ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
  799. struct btrfs_inode_item);
  800. info->size = btrfs_inode_size(path->nodes[0], ii);
  801. info->gen = btrfs_inode_generation(path->nodes[0], ii);
  802. info->mode = btrfs_inode_mode(path->nodes[0], ii);
  803. info->uid = btrfs_inode_uid(path->nodes[0], ii);
  804. info->gid = btrfs_inode_gid(path->nodes[0], ii);
  805. info->rdev = btrfs_inode_rdev(path->nodes[0], ii);
  806. info->nlink = btrfs_inode_nlink(path->nodes[0], ii);
  807. /*
  808. * Transfer the unchanged u64 value of btrfs_inode_item::flags, that's
  809. * otherwise logically split to 32/32 parts.
  810. */
  811. info->fileattr = btrfs_inode_flags(path->nodes[0], ii);
  812. return 0;
  813. }
  814. static int get_inode_gen(struct btrfs_root *root, u64 ino, u64 *gen)
  815. {
  816. int ret;
  817. struct btrfs_inode_info info = { 0 };
  818. ASSERT(gen);
  819. ret = get_inode_info(root, ino, &info);
  820. *gen = info.gen;
  821. return ret;
  822. }
  823. typedef int (*iterate_inode_ref_t)(u64 dir, struct fs_path *p, void *ctx);
  824. /*
  825. * Helper function to iterate the entries in ONE btrfs_inode_ref or
  826. * btrfs_inode_extref.
  827. * The iterate callback may return a non zero value to stop iteration. This can
  828. * be a negative value for error codes or 1 to simply stop it.
  829. *
  830. * path must point to the INODE_REF or INODE_EXTREF when called.
  831. */
  832. static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
  833. struct btrfs_key *found_key, bool resolve,
  834. iterate_inode_ref_t iterate, void *ctx)
  835. {
  836. struct extent_buffer *eb = path->nodes[0];
  837. struct btrfs_inode_ref *iref;
  838. struct btrfs_inode_extref *extref;
  839. BTRFS_PATH_AUTO_FREE(tmp_path);
  840. struct fs_path *p;
  841. u32 cur = 0;
  842. u32 total;
  843. int slot = path->slots[0];
  844. u32 name_len;
  845. char *start;
  846. int ret = 0;
  847. u64 dir;
  848. unsigned long name_off;
  849. unsigned long elem_size;
  850. unsigned long ptr;
  851. p = fs_path_alloc_reversed();
  852. if (!p)
  853. return -ENOMEM;
  854. tmp_path = alloc_path_for_send();
  855. if (!tmp_path) {
  856. fs_path_free(p);
  857. return -ENOMEM;
  858. }
  859. if (found_key->type == BTRFS_INODE_REF_KEY) {
  860. ptr = (unsigned long)btrfs_item_ptr(eb, slot,
  861. struct btrfs_inode_ref);
  862. total = btrfs_item_size(eb, slot);
  863. elem_size = sizeof(*iref);
  864. } else {
  865. ptr = btrfs_item_ptr_offset(eb, slot);
  866. total = btrfs_item_size(eb, slot);
  867. elem_size = sizeof(*extref);
  868. }
  869. while (cur < total) {
  870. fs_path_reset(p);
  871. if (found_key->type == BTRFS_INODE_REF_KEY) {
  872. iref = (struct btrfs_inode_ref *)(ptr + cur);
  873. name_len = btrfs_inode_ref_name_len(eb, iref);
  874. name_off = (unsigned long)(iref + 1);
  875. dir = found_key->offset;
  876. } else {
  877. extref = (struct btrfs_inode_extref *)(ptr + cur);
  878. name_len = btrfs_inode_extref_name_len(eb, extref);
  879. name_off = (unsigned long)&extref->name;
  880. dir = btrfs_inode_extref_parent(eb, extref);
  881. }
  882. if (resolve) {
  883. start = btrfs_ref_to_path(root, tmp_path, name_len,
  884. name_off, eb, dir,
  885. p->buf, p->buf_len);
  886. if (IS_ERR(start)) {
  887. ret = PTR_ERR(start);
  888. goto out;
  889. }
  890. if (start < p->buf) {
  891. /* overflow , try again with larger buffer */
  892. ret = fs_path_ensure_buf(p,
  893. p->buf_len + p->buf - start);
  894. if (ret < 0)
  895. goto out;
  896. start = btrfs_ref_to_path(root, tmp_path,
  897. name_len, name_off,
  898. eb, dir,
  899. p->buf, p->buf_len);
  900. if (IS_ERR(start)) {
  901. ret = PTR_ERR(start);
  902. goto out;
  903. }
  904. if (unlikely(start < p->buf)) {
  905. btrfs_err(root->fs_info,
  906. "send: path ref buffer underflow for key " BTRFS_KEY_FMT,
  907. BTRFS_KEY_FMT_VALUE(found_key));
  908. ret = -EINVAL;
  909. goto out;
  910. }
  911. }
  912. p->start = start;
  913. } else {
  914. ret = fs_path_add_from_extent_buffer(p, eb, name_off,
  915. name_len);
  916. if (ret < 0)
  917. goto out;
  918. }
  919. cur += elem_size + name_len;
  920. ret = iterate(dir, p, ctx);
  921. if (ret)
  922. goto out;
  923. }
  924. out:
  925. fs_path_free(p);
  926. return ret;
  927. }
  928. typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
  929. const char *name, int name_len,
  930. const char *data, int data_len,
  931. void *ctx);
  932. /*
  933. * Helper function to iterate the entries in ONE btrfs_dir_item.
  934. * The iterate callback may return a non zero value to stop iteration. This can
  935. * be a negative value for error codes or 1 to simply stop it.
  936. *
  937. * path must point to the dir item when called.
  938. */
  939. static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
  940. iterate_dir_item_t iterate, void *ctx)
  941. {
  942. int ret = 0;
  943. struct extent_buffer *eb;
  944. struct btrfs_dir_item *di;
  945. struct btrfs_key di_key;
  946. char *buf = NULL;
  947. int buf_len;
  948. u32 name_len;
  949. u32 data_len;
  950. u32 cur;
  951. u32 len;
  952. u32 total;
  953. int slot;
  954. int num;
  955. /*
  956. * Start with a small buffer (1 page). If later we end up needing more
  957. * space, which can happen for xattrs on a fs with a leaf size greater
  958. * than the page size, attempt to increase the buffer. Typically xattr
  959. * values are small.
  960. */
  961. buf_len = PATH_MAX;
  962. buf = kmalloc(buf_len, GFP_KERNEL);
  963. if (!buf) {
  964. ret = -ENOMEM;
  965. goto out;
  966. }
  967. eb = path->nodes[0];
  968. slot = path->slots[0];
  969. di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
  970. cur = 0;
  971. len = 0;
  972. total = btrfs_item_size(eb, slot);
  973. num = 0;
  974. while (cur < total) {
  975. name_len = btrfs_dir_name_len(eb, di);
  976. data_len = btrfs_dir_data_len(eb, di);
  977. btrfs_dir_item_key_to_cpu(eb, di, &di_key);
  978. if (btrfs_dir_ftype(eb, di) == BTRFS_FT_XATTR) {
  979. if (unlikely(name_len > XATTR_NAME_MAX)) {
  980. ret = -ENAMETOOLONG;
  981. goto out;
  982. }
  983. if (unlikely(name_len + data_len >
  984. BTRFS_MAX_XATTR_SIZE(root->fs_info))) {
  985. ret = -E2BIG;
  986. goto out;
  987. }
  988. } else {
  989. /*
  990. * Path too long
  991. */
  992. if (unlikely(name_len + data_len > PATH_MAX)) {
  993. ret = -ENAMETOOLONG;
  994. goto out;
  995. }
  996. }
  997. if (name_len + data_len > buf_len) {
  998. buf_len = name_len + data_len;
  999. if (is_vmalloc_addr(buf)) {
  1000. vfree(buf);
  1001. buf = NULL;
  1002. } else {
  1003. char *tmp = krealloc(buf, buf_len,
  1004. GFP_KERNEL | __GFP_NOWARN);
  1005. if (!tmp)
  1006. kfree(buf);
  1007. buf = tmp;
  1008. }
  1009. if (!buf) {
  1010. buf = kvmalloc(buf_len, GFP_KERNEL);
  1011. if (!buf) {
  1012. ret = -ENOMEM;
  1013. goto out;
  1014. }
  1015. }
  1016. }
  1017. read_extent_buffer(eb, buf, (unsigned long)(di + 1),
  1018. name_len + data_len);
  1019. len = sizeof(*di) + name_len + data_len;
  1020. di = (struct btrfs_dir_item *)((char *)di + len);
  1021. cur += len;
  1022. ret = iterate(num, &di_key, buf, name_len, buf + name_len,
  1023. data_len, ctx);
  1024. if (ret < 0)
  1025. goto out;
  1026. if (ret) {
  1027. ret = 0;
  1028. goto out;
  1029. }
  1030. num++;
  1031. }
  1032. out:
  1033. kvfree(buf);
  1034. return ret;
  1035. }
  1036. static int __copy_first_ref(u64 dir, struct fs_path *p, void *ctx)
  1037. {
  1038. int ret;
  1039. struct fs_path *pt = ctx;
  1040. ret = fs_path_copy(pt, p);
  1041. if (ret < 0)
  1042. return ret;
  1043. /* we want the first only */
  1044. return 1;
  1045. }
  1046. /*
  1047. * Retrieve the first path of an inode. If an inode has more then one
  1048. * ref/hardlink, this is ignored.
  1049. */
  1050. static int get_inode_path(struct btrfs_root *root,
  1051. u64 ino, struct fs_path *path)
  1052. {
  1053. int ret;
  1054. struct btrfs_key key, found_key;
  1055. BTRFS_PATH_AUTO_FREE(p);
  1056. p = alloc_path_for_send();
  1057. if (!p)
  1058. return -ENOMEM;
  1059. fs_path_reset(path);
  1060. key.objectid = ino;
  1061. key.type = BTRFS_INODE_REF_KEY;
  1062. key.offset = 0;
  1063. ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
  1064. if (ret < 0)
  1065. return ret;
  1066. if (ret)
  1067. return 1;
  1068. btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
  1069. if (found_key.objectid != ino ||
  1070. (found_key.type != BTRFS_INODE_REF_KEY &&
  1071. found_key.type != BTRFS_INODE_EXTREF_KEY))
  1072. return -ENOENT;
  1073. ret = iterate_inode_ref(root, p, &found_key, true, __copy_first_ref, path);
  1074. if (ret < 0)
  1075. return ret;
  1076. return 0;
  1077. }
  1078. struct backref_ctx {
  1079. struct send_ctx *sctx;
  1080. /* number of total found references */
  1081. u64 found;
  1082. /*
  1083. * used for clones found in send_root. clones found behind cur_objectid
  1084. * and cur_offset are not considered as allowed clones.
  1085. */
  1086. u64 cur_objectid;
  1087. u64 cur_offset;
  1088. /* may be truncated in case it's the last extent in a file */
  1089. u64 extent_len;
  1090. /* The bytenr the file extent item we are processing refers to. */
  1091. u64 bytenr;
  1092. /* The owner (root id) of the data backref for the current extent. */
  1093. u64 backref_owner;
  1094. /* The offset of the data backref for the current extent. */
  1095. u64 backref_offset;
  1096. };
  1097. static int __clone_root_cmp_bsearch(const void *key, const void *elt)
  1098. {
  1099. u64 root = (u64)(uintptr_t)key;
  1100. const struct clone_root *cr = elt;
  1101. if (root < btrfs_root_id(cr->root))
  1102. return -1;
  1103. if (root > btrfs_root_id(cr->root))
  1104. return 1;
  1105. return 0;
  1106. }
  1107. static int __clone_root_cmp_sort(const void *e1, const void *e2)
  1108. {
  1109. const struct clone_root *cr1 = e1;
  1110. const struct clone_root *cr2 = e2;
  1111. if (btrfs_root_id(cr1->root) < btrfs_root_id(cr2->root))
  1112. return -1;
  1113. if (btrfs_root_id(cr1->root) > btrfs_root_id(cr2->root))
  1114. return 1;
  1115. return 0;
  1116. }
  1117. /*
  1118. * Called for every backref that is found for the current extent.
  1119. * Results are collected in sctx->clone_roots->ino/offset.
  1120. */
  1121. static int iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root_id,
  1122. void *ctx_)
  1123. {
  1124. struct backref_ctx *bctx = ctx_;
  1125. struct clone_root *clone_root;
  1126. /* First check if the root is in the list of accepted clone sources */
  1127. clone_root = bsearch((void *)(uintptr_t)root_id, bctx->sctx->clone_roots,
  1128. bctx->sctx->clone_roots_cnt,
  1129. sizeof(struct clone_root),
  1130. __clone_root_cmp_bsearch);
  1131. if (!clone_root)
  1132. return 0;
  1133. /* This is our own reference, bail out as we can't clone from it. */
  1134. if (clone_root->root == bctx->sctx->send_root &&
  1135. ino == bctx->cur_objectid &&
  1136. offset == bctx->cur_offset)
  1137. return 0;
  1138. /*
  1139. * Make sure we don't consider clones from send_root that are
  1140. * behind the current inode/offset.
  1141. */
  1142. if (clone_root->root == bctx->sctx->send_root) {
  1143. /*
  1144. * If the source inode was not yet processed we can't issue a
  1145. * clone operation, as the source extent does not exist yet at
  1146. * the destination of the stream.
  1147. */
  1148. if (ino > bctx->cur_objectid)
  1149. return 0;
  1150. /*
  1151. * We clone from the inode currently being sent as long as the
  1152. * source extent is already processed, otherwise we could try
  1153. * to clone from an extent that does not exist yet at the
  1154. * destination of the stream.
  1155. */
  1156. if (ino == bctx->cur_objectid &&
  1157. offset + bctx->extent_len >
  1158. bctx->sctx->cur_inode_next_write_offset)
  1159. return 0;
  1160. }
  1161. bctx->found++;
  1162. clone_root->found_ref = true;
  1163. /*
  1164. * If the given backref refers to a file extent item with a larger
  1165. * number of bytes than what we found before, use the new one so that
  1166. * we clone more optimally and end up doing less writes and getting
  1167. * less exclusive, non-shared extents at the destination.
  1168. */
  1169. if (num_bytes > clone_root->num_bytes) {
  1170. clone_root->ino = ino;
  1171. clone_root->offset = offset;
  1172. clone_root->num_bytes = num_bytes;
  1173. /*
  1174. * Found a perfect candidate, so there's no need to continue
  1175. * backref walking.
  1176. */
  1177. if (num_bytes >= bctx->extent_len)
  1178. return BTRFS_ITERATE_EXTENT_INODES_STOP;
  1179. }
  1180. return 0;
  1181. }
  1182. static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx,
  1183. const u64 **root_ids_ret, int *root_count_ret)
  1184. {
  1185. struct backref_ctx *bctx = ctx;
  1186. struct send_ctx *sctx = bctx->sctx;
  1187. struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
  1188. const u64 key = leaf_bytenr >> fs_info->nodesize_bits;
  1189. struct btrfs_lru_cache_entry *raw_entry;
  1190. struct backref_cache_entry *entry;
  1191. if (sctx->backref_cache.size == 0)
  1192. return false;
  1193. /*
  1194. * If relocation happened since we first filled the cache, then we must
  1195. * empty the cache and can not use it, because even though we operate on
  1196. * read-only roots, their leaves and nodes may have been reallocated and
  1197. * now be used for different nodes/leaves of the same tree or some other
  1198. * tree.
  1199. *
  1200. * We are called from iterate_extent_inodes() while either holding a
  1201. * transaction handle or holding fs_info->commit_root_sem, so no need
  1202. * to take any lock here.
  1203. */
  1204. if (fs_info->last_reloc_trans > sctx->backref_cache_last_reloc_trans) {
  1205. btrfs_lru_cache_clear(&sctx->backref_cache);
  1206. return false;
  1207. }
  1208. raw_entry = btrfs_lru_cache_lookup(&sctx->backref_cache, key, 0);
  1209. if (!raw_entry)
  1210. return false;
  1211. entry = container_of(raw_entry, struct backref_cache_entry, entry);
  1212. *root_ids_ret = entry->root_ids;
  1213. *root_count_ret = entry->num_roots;
  1214. return true;
  1215. }
  1216. static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids,
  1217. void *ctx)
  1218. {
  1219. struct backref_ctx *bctx = ctx;
  1220. struct send_ctx *sctx = bctx->sctx;
  1221. struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
  1222. struct backref_cache_entry *new_entry;
  1223. struct ulist_iterator uiter;
  1224. struct ulist_node *node;
  1225. int ret;
  1226. /*
  1227. * We're called while holding a transaction handle or while holding
  1228. * fs_info->commit_root_sem (at iterate_extent_inodes()), so must do a
  1229. * NOFS allocation.
  1230. */
  1231. new_entry = kmalloc_obj(struct backref_cache_entry, GFP_NOFS);
  1232. /* No worries, cache is optional. */
  1233. if (!new_entry)
  1234. return;
  1235. new_entry->entry.key = leaf_bytenr >> fs_info->nodesize_bits;
  1236. new_entry->entry.gen = 0;
  1237. new_entry->num_roots = 0;
  1238. ULIST_ITER_INIT(&uiter);
  1239. while ((node = ulist_next(root_ids, &uiter)) != NULL) {
  1240. const u64 root_id = node->val;
  1241. struct clone_root *root;
  1242. root = bsearch((void *)(uintptr_t)root_id, sctx->clone_roots,
  1243. sctx->clone_roots_cnt, sizeof(struct clone_root),
  1244. __clone_root_cmp_bsearch);
  1245. if (!root)
  1246. continue;
  1247. /* Too many roots, just exit, no worries as caching is optional. */
  1248. if (new_entry->num_roots >= SEND_MAX_BACKREF_CACHE_ROOTS) {
  1249. kfree(new_entry);
  1250. return;
  1251. }
  1252. new_entry->root_ids[new_entry->num_roots] = root_id;
  1253. new_entry->num_roots++;
  1254. }
  1255. /*
  1256. * We may have not added any roots to the new cache entry, which means
  1257. * none of the roots is part of the list of roots from which we are
  1258. * allowed to clone. Cache the new entry as it's still useful to avoid
  1259. * backref walking to determine which roots have a path to the leaf.
  1260. *
  1261. * Also use GFP_NOFS because we're called while holding a transaction
  1262. * handle or while holding fs_info->commit_root_sem.
  1263. */
  1264. ret = btrfs_lru_cache_store(&sctx->backref_cache, &new_entry->entry,
  1265. GFP_NOFS);
  1266. ASSERT(ret == 0 || ret == -ENOMEM);
  1267. if (ret) {
  1268. /* Caching is optional, no worries. */
  1269. kfree(new_entry);
  1270. return;
  1271. }
  1272. /*
  1273. * We are called from iterate_extent_inodes() while either holding a
  1274. * transaction handle or holding fs_info->commit_root_sem, so no need
  1275. * to take any lock here.
  1276. */
  1277. if (sctx->backref_cache.size == 1)
  1278. sctx->backref_cache_last_reloc_trans = fs_info->last_reloc_trans;
  1279. }
  1280. static int check_extent_item(u64 bytenr, const struct btrfs_extent_item *ei,
  1281. const struct extent_buffer *leaf, void *ctx)
  1282. {
  1283. const u64 refs = btrfs_extent_refs(leaf, ei);
  1284. const struct backref_ctx *bctx = ctx;
  1285. const struct send_ctx *sctx = bctx->sctx;
  1286. if (bytenr == bctx->bytenr) {
  1287. const u64 flags = btrfs_extent_flags(leaf, ei);
  1288. if (WARN_ON(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
  1289. return -EUCLEAN;
  1290. /*
  1291. * If we have only one reference and only the send root as a
  1292. * clone source - meaning no clone roots were given in the
  1293. * struct btrfs_ioctl_send_args passed to the send ioctl - then
  1294. * it's our reference and there's no point in doing backref
  1295. * walking which is expensive, so exit early.
  1296. */
  1297. if (refs == 1 && sctx->clone_roots_cnt == 1)
  1298. return -ENOENT;
  1299. }
  1300. /*
  1301. * Backreference walking (iterate_extent_inodes() below) is currently
  1302. * too expensive when an extent has a large number of references, both
  1303. * in time spent and used memory. So for now just fallback to write
  1304. * operations instead of clone operations when an extent has more than
  1305. * a certain amount of references.
  1306. */
  1307. if (refs > SEND_MAX_EXTENT_REFS)
  1308. return -ENOENT;
  1309. return 0;
  1310. }
  1311. static bool skip_self_data_ref(u64 root, u64 ino, u64 offset, void *ctx)
  1312. {
  1313. const struct backref_ctx *bctx = ctx;
  1314. if (ino == bctx->cur_objectid &&
  1315. root == bctx->backref_owner &&
  1316. offset == bctx->backref_offset)
  1317. return true;
  1318. return false;
  1319. }
  1320. /*
  1321. * Given an inode, offset and extent item, it finds a good clone for a clone
  1322. * instruction. Returns -ENOENT when none could be found. The function makes
  1323. * sure that the returned clone is usable at the point where sending is at the
  1324. * moment. This means, that no clones are accepted which lie behind the current
  1325. * inode+offset.
  1326. *
  1327. * path must point to the extent item when called.
  1328. */
  1329. static int find_extent_clone(struct send_ctx *sctx,
  1330. struct btrfs_path *path,
  1331. u64 ino, u64 data_offset,
  1332. u64 ino_size,
  1333. struct clone_root **found)
  1334. {
  1335. struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
  1336. int ret;
  1337. int extent_type;
  1338. u64 disk_byte;
  1339. u64 num_bytes;
  1340. struct btrfs_file_extent_item *fi;
  1341. struct extent_buffer *eb = path->nodes[0];
  1342. struct backref_ctx backref_ctx = { 0 };
  1343. struct btrfs_backref_walk_ctx backref_walk_ctx = { 0 };
  1344. struct clone_root *cur_clone_root;
  1345. int compressed;
  1346. u32 i;
  1347. /*
  1348. * With fallocate we can get prealloc extents beyond the inode's i_size,
  1349. * so we don't do anything here because clone operations can not clone
  1350. * to a range beyond i_size without increasing the i_size of the
  1351. * destination inode.
  1352. */
  1353. if (data_offset >= ino_size)
  1354. return 0;
  1355. fi = btrfs_item_ptr(eb, path->slots[0], struct btrfs_file_extent_item);
  1356. extent_type = btrfs_file_extent_type(eb, fi);
  1357. if (extent_type == BTRFS_FILE_EXTENT_INLINE)
  1358. return -ENOENT;
  1359. disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
  1360. if (disk_byte == 0)
  1361. return -ENOENT;
  1362. compressed = btrfs_file_extent_compression(eb, fi);
  1363. num_bytes = btrfs_file_extent_num_bytes(eb, fi);
  1364. /*
  1365. * Setup the clone roots.
  1366. */
  1367. for (i = 0; i < sctx->clone_roots_cnt; i++) {
  1368. cur_clone_root = sctx->clone_roots + i;
  1369. cur_clone_root->ino = (u64)-1;
  1370. cur_clone_root->offset = 0;
  1371. cur_clone_root->num_bytes = 0;
  1372. cur_clone_root->found_ref = false;
  1373. }
  1374. backref_ctx.sctx = sctx;
  1375. backref_ctx.cur_objectid = ino;
  1376. backref_ctx.cur_offset = data_offset;
  1377. backref_ctx.bytenr = disk_byte;
  1378. /*
  1379. * Use the header owner and not the send root's id, because in case of a
  1380. * snapshot we can have shared subtrees.
  1381. */
  1382. backref_ctx.backref_owner = btrfs_header_owner(eb);
  1383. backref_ctx.backref_offset = data_offset - btrfs_file_extent_offset(eb, fi);
  1384. /*
  1385. * The last extent of a file may be too large due to page alignment.
  1386. * We need to adjust extent_len in this case so that the checks in
  1387. * iterate_backrefs() work.
  1388. */
  1389. if (data_offset + num_bytes >= ino_size)
  1390. backref_ctx.extent_len = ino_size - data_offset;
  1391. else
  1392. backref_ctx.extent_len = num_bytes;
  1393. /*
  1394. * Now collect all backrefs.
  1395. */
  1396. backref_walk_ctx.bytenr = disk_byte;
  1397. if (compressed == BTRFS_COMPRESS_NONE)
  1398. backref_walk_ctx.extent_item_pos = btrfs_file_extent_offset(eb, fi);
  1399. backref_walk_ctx.fs_info = fs_info;
  1400. backref_walk_ctx.cache_lookup = lookup_backref_cache;
  1401. backref_walk_ctx.cache_store = store_backref_cache;
  1402. backref_walk_ctx.indirect_ref_iterator = iterate_backrefs;
  1403. backref_walk_ctx.check_extent_item = check_extent_item;
  1404. backref_walk_ctx.user_ctx = &backref_ctx;
  1405. /*
  1406. * If have a single clone root, then it's the send root and we can tell
  1407. * the backref walking code to skip our own backref and not resolve it,
  1408. * since we can not use it for cloning - the source and destination
  1409. * ranges can't overlap and in case the leaf is shared through a subtree
  1410. * due to snapshots, we can't use those other roots since they are not
  1411. * in the list of clone roots.
  1412. */
  1413. if (sctx->clone_roots_cnt == 1)
  1414. backref_walk_ctx.skip_data_ref = skip_self_data_ref;
  1415. ret = iterate_extent_inodes(&backref_walk_ctx, true, iterate_backrefs,
  1416. &backref_ctx);
  1417. if (ret < 0)
  1418. return ret;
  1419. down_read(&fs_info->commit_root_sem);
  1420. if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
  1421. /*
  1422. * A transaction commit for a transaction in which block group
  1423. * relocation was done just happened.
  1424. * The disk_bytenr of the file extent item we processed is
  1425. * possibly stale, referring to the extent's location before
  1426. * relocation. So act as if we haven't found any clone sources
  1427. * and fallback to write commands, which will read the correct
  1428. * data from the new extent location. Otherwise we will fail
  1429. * below because we haven't found our own back reference or we
  1430. * could be getting incorrect sources in case the old extent
  1431. * was already reallocated after the relocation.
  1432. */
  1433. up_read(&fs_info->commit_root_sem);
  1434. return -ENOENT;
  1435. }
  1436. up_read(&fs_info->commit_root_sem);
  1437. if (!backref_ctx.found)
  1438. return -ENOENT;
  1439. cur_clone_root = NULL;
  1440. for (i = 0; i < sctx->clone_roots_cnt; i++) {
  1441. struct clone_root *clone_root = &sctx->clone_roots[i];
  1442. if (!clone_root->found_ref)
  1443. continue;
  1444. /*
  1445. * Choose the root from which we can clone more bytes, to
  1446. * minimize write operations and therefore have more extent
  1447. * sharing at the destination (the same as in the source).
  1448. */
  1449. if (!cur_clone_root ||
  1450. clone_root->num_bytes > cur_clone_root->num_bytes) {
  1451. cur_clone_root = clone_root;
  1452. /*
  1453. * We found an optimal clone candidate (any inode from
  1454. * any root is fine), so we're done.
  1455. */
  1456. if (clone_root->num_bytes >= backref_ctx.extent_len)
  1457. break;
  1458. }
  1459. }
  1460. if (cur_clone_root) {
  1461. *found = cur_clone_root;
  1462. ret = 0;
  1463. } else {
  1464. ret = -ENOENT;
  1465. }
  1466. return ret;
  1467. }
  1468. static int read_symlink(struct btrfs_root *root,
  1469. u64 ino,
  1470. struct fs_path *dest)
  1471. {
  1472. int ret;
  1473. BTRFS_PATH_AUTO_FREE(path);
  1474. struct btrfs_key key;
  1475. struct btrfs_file_extent_item *ei;
  1476. u8 type;
  1477. u8 compression;
  1478. unsigned long off;
  1479. int len;
  1480. path = alloc_path_for_send();
  1481. if (!path)
  1482. return -ENOMEM;
  1483. key.objectid = ino;
  1484. key.type = BTRFS_EXTENT_DATA_KEY;
  1485. key.offset = 0;
  1486. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  1487. if (ret < 0)
  1488. return ret;
  1489. if (unlikely(ret)) {
  1490. /*
  1491. * An empty symlink inode. Can happen in rare error paths when
  1492. * creating a symlink (transaction committed before the inode
  1493. * eviction handler removed the symlink inode items and a crash
  1494. * happened in between or the subvol was snapshotted in between).
  1495. * Print an informative message to dmesg/syslog so that the user
  1496. * can delete the symlink.
  1497. */
  1498. btrfs_err(root->fs_info,
  1499. "Found empty symlink inode %llu at root %llu",
  1500. ino, btrfs_root_id(root));
  1501. return -EIO;
  1502. }
  1503. ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
  1504. struct btrfs_file_extent_item);
  1505. type = btrfs_file_extent_type(path->nodes[0], ei);
  1506. if (unlikely(type != BTRFS_FILE_EXTENT_INLINE)) {
  1507. ret = -EUCLEAN;
  1508. btrfs_crit(root->fs_info,
  1509. "send: found symlink extent that is not inline, ino %llu root %llu extent type %d",
  1510. ino, btrfs_root_id(root), type);
  1511. return ret;
  1512. }
  1513. compression = btrfs_file_extent_compression(path->nodes[0], ei);
  1514. if (unlikely(compression != BTRFS_COMPRESS_NONE)) {
  1515. ret = -EUCLEAN;
  1516. btrfs_crit(root->fs_info,
  1517. "send: found symlink extent with compression, ino %llu root %llu compression type %d",
  1518. ino, btrfs_root_id(root), compression);
  1519. return ret;
  1520. }
  1521. off = btrfs_file_extent_inline_start(ei);
  1522. len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
  1523. return fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
  1524. }
  1525. /*
  1526. * Helper function to generate a file name that is unique in the root of
  1527. * send_root and parent_root. This is used to generate names for orphan inodes.
  1528. */
  1529. static int gen_unique_name(struct send_ctx *sctx,
  1530. u64 ino, u64 gen,
  1531. struct fs_path *dest)
  1532. {
  1533. BTRFS_PATH_AUTO_FREE(path);
  1534. struct btrfs_dir_item *di;
  1535. char tmp[64];
  1536. int len;
  1537. u64 idx = 0;
  1538. path = alloc_path_for_send();
  1539. if (!path)
  1540. return -ENOMEM;
  1541. while (1) {
  1542. struct fscrypt_str tmp_name;
  1543. len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
  1544. ino, gen, idx);
  1545. ASSERT(len < sizeof(tmp));
  1546. tmp_name.name = tmp;
  1547. tmp_name.len = len;
  1548. di = btrfs_lookup_dir_item(NULL, sctx->send_root,
  1549. path, BTRFS_FIRST_FREE_OBJECTID,
  1550. &tmp_name, 0);
  1551. btrfs_release_path(path);
  1552. if (IS_ERR(di))
  1553. return PTR_ERR(di);
  1554. if (di) {
  1555. /* not unique, try again */
  1556. idx++;
  1557. continue;
  1558. }
  1559. if (!sctx->parent_root) {
  1560. /* unique */
  1561. break;
  1562. }
  1563. di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
  1564. path, BTRFS_FIRST_FREE_OBJECTID,
  1565. &tmp_name, 0);
  1566. btrfs_release_path(path);
  1567. if (IS_ERR(di))
  1568. return PTR_ERR(di);
  1569. if (di) {
  1570. /* not unique, try again */
  1571. idx++;
  1572. continue;
  1573. }
  1574. /* unique */
  1575. break;
  1576. }
  1577. return fs_path_add(dest, tmp, len);
  1578. }
  1579. enum inode_state {
  1580. inode_state_no_change,
  1581. inode_state_will_create,
  1582. inode_state_did_create,
  1583. inode_state_will_delete,
  1584. inode_state_did_delete,
  1585. };
  1586. static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen,
  1587. u64 *send_gen, u64 *parent_gen)
  1588. {
  1589. int ret;
  1590. int left_ret;
  1591. int right_ret;
  1592. u64 left_gen;
  1593. u64 right_gen = 0;
  1594. struct btrfs_inode_info info;
  1595. ret = get_inode_info(sctx->send_root, ino, &info);
  1596. if (ret < 0 && ret != -ENOENT)
  1597. return ret;
  1598. left_ret = (info.nlink == 0) ? -ENOENT : ret;
  1599. left_gen = info.gen;
  1600. if (send_gen)
  1601. *send_gen = ((left_ret == -ENOENT) ? 0 : info.gen);
  1602. if (!sctx->parent_root) {
  1603. right_ret = -ENOENT;
  1604. } else {
  1605. ret = get_inode_info(sctx->parent_root, ino, &info);
  1606. if (ret < 0 && ret != -ENOENT)
  1607. return ret;
  1608. right_ret = (info.nlink == 0) ? -ENOENT : ret;
  1609. right_gen = info.gen;
  1610. if (parent_gen)
  1611. *parent_gen = ((right_ret == -ENOENT) ? 0 : info.gen);
  1612. }
  1613. if (!left_ret && !right_ret) {
  1614. if (left_gen == gen && right_gen == gen) {
  1615. ret = inode_state_no_change;
  1616. } else if (left_gen == gen) {
  1617. if (ino < sctx->send_progress)
  1618. ret = inode_state_did_create;
  1619. else
  1620. ret = inode_state_will_create;
  1621. } else if (right_gen == gen) {
  1622. if (ino < sctx->send_progress)
  1623. ret = inode_state_did_delete;
  1624. else
  1625. ret = inode_state_will_delete;
  1626. } else {
  1627. ret = -ENOENT;
  1628. }
  1629. } else if (!left_ret) {
  1630. if (left_gen == gen) {
  1631. if (ino < sctx->send_progress)
  1632. ret = inode_state_did_create;
  1633. else
  1634. ret = inode_state_will_create;
  1635. } else {
  1636. ret = -ENOENT;
  1637. }
  1638. } else if (!right_ret) {
  1639. if (right_gen == gen) {
  1640. if (ino < sctx->send_progress)
  1641. ret = inode_state_did_delete;
  1642. else
  1643. ret = inode_state_will_delete;
  1644. } else {
  1645. ret = -ENOENT;
  1646. }
  1647. } else {
  1648. ret = -ENOENT;
  1649. }
  1650. return ret;
  1651. }
  1652. static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen,
  1653. u64 *send_gen, u64 *parent_gen)
  1654. {
  1655. int ret;
  1656. if (ino == BTRFS_FIRST_FREE_OBJECTID)
  1657. return 1;
  1658. ret = get_cur_inode_state(sctx, ino, gen, send_gen, parent_gen);
  1659. if (ret < 0)
  1660. return ret;
  1661. if (ret == inode_state_no_change ||
  1662. ret == inode_state_did_create ||
  1663. ret == inode_state_will_delete)
  1664. return 1;
  1665. return 0;
  1666. }
  1667. /*
  1668. * Helper function to lookup a dir item in a dir.
  1669. */
  1670. static int lookup_dir_item_inode(struct btrfs_root *root,
  1671. u64 dir, const char *name, int name_len,
  1672. u64 *found_inode)
  1673. {
  1674. int ret = 0;
  1675. struct btrfs_dir_item *di;
  1676. struct btrfs_key key;
  1677. BTRFS_PATH_AUTO_FREE(path);
  1678. struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len);
  1679. path = alloc_path_for_send();
  1680. if (!path)
  1681. return -ENOMEM;
  1682. di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0);
  1683. if (IS_ERR_OR_NULL(di))
  1684. return di ? PTR_ERR(di) : -ENOENT;
  1685. btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
  1686. if (key.type == BTRFS_ROOT_ITEM_KEY)
  1687. return -ENOENT;
  1688. *found_inode = key.objectid;
  1689. return ret;
  1690. }
  1691. /*
  1692. * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
  1693. * generation of the parent dir and the name of the dir entry.
  1694. */
  1695. static int get_first_ref(struct btrfs_root *root, u64 ino,
  1696. u64 *dir, u64 *dir_gen, struct fs_path *name)
  1697. {
  1698. int ret;
  1699. struct btrfs_key key;
  1700. struct btrfs_key found_key;
  1701. BTRFS_PATH_AUTO_FREE(path);
  1702. int len;
  1703. u64 parent_dir;
  1704. path = alloc_path_for_send();
  1705. if (!path)
  1706. return -ENOMEM;
  1707. key.objectid = ino;
  1708. key.type = BTRFS_INODE_REF_KEY;
  1709. key.offset = 0;
  1710. ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
  1711. if (ret < 0)
  1712. return ret;
  1713. if (!ret)
  1714. btrfs_item_key_to_cpu(path->nodes[0], &found_key,
  1715. path->slots[0]);
  1716. if (ret || found_key.objectid != ino ||
  1717. (found_key.type != BTRFS_INODE_REF_KEY &&
  1718. found_key.type != BTRFS_INODE_EXTREF_KEY))
  1719. return -ENOENT;
  1720. if (found_key.type == BTRFS_INODE_REF_KEY) {
  1721. struct btrfs_inode_ref *iref;
  1722. iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
  1723. struct btrfs_inode_ref);
  1724. len = btrfs_inode_ref_name_len(path->nodes[0], iref);
  1725. ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
  1726. (unsigned long)(iref + 1),
  1727. len);
  1728. parent_dir = found_key.offset;
  1729. } else {
  1730. struct btrfs_inode_extref *extref;
  1731. extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
  1732. struct btrfs_inode_extref);
  1733. len = btrfs_inode_extref_name_len(path->nodes[0], extref);
  1734. ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
  1735. (unsigned long)&extref->name, len);
  1736. parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
  1737. }
  1738. if (ret < 0)
  1739. return ret;
  1740. btrfs_release_path(path);
  1741. if (dir_gen) {
  1742. ret = get_inode_gen(root, parent_dir, dir_gen);
  1743. if (ret < 0)
  1744. return ret;
  1745. }
  1746. *dir = parent_dir;
  1747. return ret;
  1748. }
  1749. static int is_first_ref(struct btrfs_root *root,
  1750. u64 ino, u64 dir,
  1751. const char *name, int name_len)
  1752. {
  1753. int ret;
  1754. struct fs_path *tmp_name;
  1755. u64 tmp_dir;
  1756. tmp_name = fs_path_alloc();
  1757. if (!tmp_name)
  1758. return -ENOMEM;
  1759. ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name);
  1760. if (ret < 0)
  1761. goto out;
  1762. if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
  1763. ret = 0;
  1764. goto out;
  1765. }
  1766. ret = !memcmp(tmp_name->start, name, name_len);
  1767. out:
  1768. fs_path_free(tmp_name);
  1769. return ret;
  1770. }
  1771. /*
  1772. * Used by process_recorded_refs to determine if a new ref would overwrite an
  1773. * already existing ref. In case it detects an overwrite, it returns the
  1774. * inode/gen in who_ino/who_gen.
  1775. * When an overwrite is detected, process_recorded_refs does proper orphanizing
  1776. * to make sure later references to the overwritten inode are possible.
  1777. * Orphanizing is however only required for the first ref of an inode.
  1778. * process_recorded_refs does an additional is_first_ref check to see if
  1779. * orphanizing is really required.
  1780. */
  1781. static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
  1782. const char *name, int name_len,
  1783. u64 *who_ino, u64 *who_gen, u64 *who_mode)
  1784. {
  1785. int ret;
  1786. u64 parent_root_dir_gen;
  1787. u64 other_inode = 0;
  1788. struct btrfs_inode_info info;
  1789. if (!sctx->parent_root)
  1790. return 0;
  1791. ret = is_inode_existent(sctx, dir, dir_gen, NULL, &parent_root_dir_gen);
  1792. if (ret <= 0)
  1793. return 0;
  1794. /*
  1795. * If we have a parent root we need to verify that the parent dir was
  1796. * not deleted and then re-created, if it was then we have no overwrite
  1797. * and we can just unlink this entry.
  1798. *
  1799. * @parent_root_dir_gen was set to 0 if the inode does not exist in the
  1800. * parent root.
  1801. */
  1802. if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID &&
  1803. parent_root_dir_gen != dir_gen)
  1804. return 0;
  1805. ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
  1806. &other_inode);
  1807. if (ret == -ENOENT)
  1808. return 0;
  1809. else if (ret < 0)
  1810. return ret;
  1811. /*
  1812. * Check if the overwritten ref was already processed. If yes, the ref
  1813. * was already unlinked/moved, so we can safely assume that we will not
  1814. * overwrite anything at this point in time.
  1815. */
  1816. if (other_inode > sctx->send_progress ||
  1817. is_waiting_for_move(sctx, other_inode)) {
  1818. ret = get_inode_info(sctx->parent_root, other_inode, &info);
  1819. if (ret < 0)
  1820. return ret;
  1821. *who_ino = other_inode;
  1822. *who_gen = info.gen;
  1823. *who_mode = info.mode;
  1824. return 1;
  1825. }
  1826. return 0;
  1827. }
  1828. /*
  1829. * Checks if the ref was overwritten by an already processed inode. This is
  1830. * used by __get_cur_name_and_parent to find out if the ref was orphanized and
  1831. * thus the orphan name needs be used.
  1832. * process_recorded_refs also uses it to avoid unlinking of refs that were
  1833. * overwritten.
  1834. */
  1835. static int did_overwrite_ref(struct send_ctx *sctx,
  1836. u64 dir, u64 dir_gen,
  1837. u64 ino, u64 ino_gen,
  1838. const char *name, int name_len)
  1839. {
  1840. int ret;
  1841. u64 ow_inode;
  1842. u64 ow_gen = 0;
  1843. u64 send_root_dir_gen;
  1844. if (!sctx->parent_root)
  1845. return 0;
  1846. ret = is_inode_existent(sctx, dir, dir_gen, &send_root_dir_gen, NULL);
  1847. if (ret <= 0)
  1848. return ret;
  1849. /*
  1850. * @send_root_dir_gen was set to 0 if the inode does not exist in the
  1851. * send root.
  1852. */
  1853. if (dir != BTRFS_FIRST_FREE_OBJECTID && send_root_dir_gen != dir_gen)
  1854. return 0;
  1855. /* check if the ref was overwritten by another ref */
  1856. ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
  1857. &ow_inode);
  1858. if (ret == -ENOENT) {
  1859. /* was never and will never be overwritten */
  1860. return 0;
  1861. } else if (ret < 0) {
  1862. return ret;
  1863. }
  1864. if (ow_inode == ino) {
  1865. ret = get_inode_gen(sctx->send_root, ow_inode, &ow_gen);
  1866. if (ret < 0)
  1867. return ret;
  1868. /* It's the same inode, so no overwrite happened. */
  1869. if (ow_gen == ino_gen)
  1870. return 0;
  1871. }
  1872. /*
  1873. * We know that it is or will be overwritten. Check this now.
  1874. * The current inode being processed might have been the one that caused
  1875. * inode 'ino' to be orphanized, therefore check if ow_inode matches
  1876. * the current inode being processed.
  1877. */
  1878. if (ow_inode < sctx->send_progress)
  1879. return 1;
  1880. if (ino != sctx->cur_ino && ow_inode == sctx->cur_ino) {
  1881. if (ow_gen == 0) {
  1882. ret = get_inode_gen(sctx->send_root, ow_inode, &ow_gen);
  1883. if (ret < 0)
  1884. return ret;
  1885. }
  1886. if (ow_gen == sctx->cur_inode_gen)
  1887. return 1;
  1888. }
  1889. return 0;
  1890. }
  1891. /*
  1892. * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
  1893. * that got overwritten. This is used by process_recorded_refs to determine
  1894. * if it has to use the path as returned by get_cur_path or the orphan name.
  1895. */
  1896. static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
  1897. {
  1898. int ret = 0;
  1899. struct fs_path *name = NULL;
  1900. u64 dir;
  1901. u64 dir_gen;
  1902. if (!sctx->parent_root)
  1903. goto out;
  1904. name = fs_path_alloc();
  1905. if (!name)
  1906. return -ENOMEM;
  1907. ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
  1908. if (ret < 0)
  1909. goto out;
  1910. ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
  1911. name->start, fs_path_len(name));
  1912. out:
  1913. fs_path_free(name);
  1914. return ret;
  1915. }
  1916. static inline struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
  1917. u64 ino, u64 gen)
  1918. {
  1919. struct btrfs_lru_cache_entry *entry;
  1920. entry = btrfs_lru_cache_lookup(&sctx->name_cache, ino, gen);
  1921. if (!entry)
  1922. return NULL;
  1923. return container_of(entry, struct name_cache_entry, entry);
  1924. }
  1925. /*
  1926. * Used by get_cur_path for each ref up to the root.
  1927. * Returns 0 if it succeeded.
  1928. * Returns 1 if the inode is not existent or got overwritten. In that case, the
  1929. * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
  1930. * is returned, parent_ino/parent_gen are not guaranteed to be valid.
  1931. * Returns <0 in case of error.
  1932. */
  1933. static int __get_cur_name_and_parent(struct send_ctx *sctx,
  1934. u64 ino, u64 gen,
  1935. u64 *parent_ino,
  1936. u64 *parent_gen,
  1937. struct fs_path *dest)
  1938. {
  1939. int ret;
  1940. int nce_ret;
  1941. struct name_cache_entry *nce;
  1942. /*
  1943. * First check if we already did a call to this function with the same
  1944. * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
  1945. * return the cached result.
  1946. */
  1947. nce = name_cache_search(sctx, ino, gen);
  1948. if (nce) {
  1949. if (ino < sctx->send_progress && nce->need_later_update) {
  1950. btrfs_lru_cache_remove(&sctx->name_cache, &nce->entry);
  1951. nce = NULL;
  1952. } else {
  1953. *parent_ino = nce->parent_ino;
  1954. *parent_gen = nce->parent_gen;
  1955. ret = fs_path_add(dest, nce->name, nce->name_len);
  1956. if (ret < 0)
  1957. return ret;
  1958. return nce->ret;
  1959. }
  1960. }
  1961. /*
  1962. * If the inode is not existent yet, add the orphan name and return 1.
  1963. * This should only happen for the parent dir that we determine in
  1964. * record_new_ref_if_needed().
  1965. */
  1966. ret = is_inode_existent(sctx, ino, gen, NULL, NULL);
  1967. if (ret < 0)
  1968. return ret;
  1969. if (!ret) {
  1970. ret = gen_unique_name(sctx, ino, gen, dest);
  1971. if (ret < 0)
  1972. return ret;
  1973. ret = 1;
  1974. goto out_cache;
  1975. }
  1976. /*
  1977. * Depending on whether the inode was already processed or not, use
  1978. * send_root or parent_root for ref lookup.
  1979. */
  1980. if (ino < sctx->send_progress)
  1981. ret = get_first_ref(sctx->send_root, ino,
  1982. parent_ino, parent_gen, dest);
  1983. else
  1984. ret = get_first_ref(sctx->parent_root, ino,
  1985. parent_ino, parent_gen, dest);
  1986. if (ret < 0)
  1987. return ret;
  1988. /*
  1989. * Check if the ref was overwritten by an inode's ref that was processed
  1990. * earlier. If yes, treat as orphan and return 1.
  1991. */
  1992. ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
  1993. dest->start, fs_path_len(dest));
  1994. if (ret < 0)
  1995. return ret;
  1996. if (ret) {
  1997. fs_path_reset(dest);
  1998. ret = gen_unique_name(sctx, ino, gen, dest);
  1999. if (ret < 0)
  2000. return ret;
  2001. ret = 1;
  2002. }
  2003. out_cache:
  2004. /*
  2005. * Store the result of the lookup in the name cache.
  2006. */
  2007. nce = kmalloc(sizeof(*nce) + fs_path_len(dest), GFP_KERNEL);
  2008. if (!nce)
  2009. return -ENOMEM;
  2010. nce->entry.key = ino;
  2011. nce->entry.gen = gen;
  2012. nce->parent_ino = *parent_ino;
  2013. nce->parent_gen = *parent_gen;
  2014. nce->name_len = fs_path_len(dest);
  2015. nce->ret = ret;
  2016. memcpy(nce->name, dest->start, nce->name_len);
  2017. if (ino < sctx->send_progress)
  2018. nce->need_later_update = 0;
  2019. else
  2020. nce->need_later_update = 1;
  2021. nce_ret = btrfs_lru_cache_store(&sctx->name_cache, &nce->entry, GFP_KERNEL);
  2022. if (nce_ret < 0) {
  2023. kfree(nce);
  2024. return nce_ret;
  2025. }
  2026. return ret;
  2027. }
  2028. /*
  2029. * Magic happens here. This function returns the first ref to an inode as it
  2030. * would look like while receiving the stream at this point in time.
  2031. * We walk the path up to the root. For every inode in between, we check if it
  2032. * was already processed/sent. If yes, we continue with the parent as found
  2033. * in send_root. If not, we continue with the parent as found in parent_root.
  2034. * If we encounter an inode that was deleted at this point in time, we use the
  2035. * inodes "orphan" name instead of the real name and stop. Same with new inodes
  2036. * that were not created yet and overwritten inodes/refs.
  2037. *
  2038. * When do we have orphan inodes:
  2039. * 1. When an inode is freshly created and thus no valid refs are available yet
  2040. * 2. When a directory lost all it's refs (deleted) but still has dir items
  2041. * inside which were not processed yet (pending for move/delete). If anyone
  2042. * tried to get the path to the dir items, it would get a path inside that
  2043. * orphan directory.
  2044. * 3. When an inode is moved around or gets new links, it may overwrite the ref
  2045. * of an unprocessed inode. If in that case the first ref would be
  2046. * overwritten, the overwritten inode gets "orphanized". Later when we
  2047. * process this overwritten inode, it is restored at a new place by moving
  2048. * the orphan inode.
  2049. *
  2050. * sctx->send_progress tells this function at which point in time receiving
  2051. * would be.
  2052. */
  2053. static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
  2054. struct fs_path *dest)
  2055. {
  2056. int ret = 0;
  2057. struct fs_path *name = NULL;
  2058. u64 parent_inode = 0;
  2059. u64 parent_gen = 0;
  2060. int stop = 0;
  2061. const bool is_cur_inode = (ino == sctx->cur_ino && gen == sctx->cur_inode_gen);
  2062. if (is_cur_inode && fs_path_len(&sctx->cur_inode_path) > 0) {
  2063. if (dest != &sctx->cur_inode_path)
  2064. return fs_path_copy(dest, &sctx->cur_inode_path);
  2065. return 0;
  2066. }
  2067. name = fs_path_alloc();
  2068. if (!name) {
  2069. ret = -ENOMEM;
  2070. goto out;
  2071. }
  2072. dest->reversed = 1;
  2073. fs_path_reset(dest);
  2074. while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
  2075. struct waiting_dir_move *wdm;
  2076. fs_path_reset(name);
  2077. if (is_waiting_for_rm(sctx, ino, gen)) {
  2078. ret = gen_unique_name(sctx, ino, gen, name);
  2079. if (ret < 0)
  2080. goto out;
  2081. ret = fs_path_add_path(dest, name);
  2082. break;
  2083. }
  2084. wdm = get_waiting_dir_move(sctx, ino);
  2085. if (wdm && wdm->orphanized) {
  2086. ret = gen_unique_name(sctx, ino, gen, name);
  2087. stop = 1;
  2088. } else if (wdm) {
  2089. ret = get_first_ref(sctx->parent_root, ino,
  2090. &parent_inode, &parent_gen, name);
  2091. } else {
  2092. ret = __get_cur_name_and_parent(sctx, ino, gen,
  2093. &parent_inode,
  2094. &parent_gen, name);
  2095. if (ret)
  2096. stop = 1;
  2097. }
  2098. if (ret < 0)
  2099. goto out;
  2100. ret = fs_path_add_path(dest, name);
  2101. if (ret < 0)
  2102. goto out;
  2103. ino = parent_inode;
  2104. gen = parent_gen;
  2105. }
  2106. out:
  2107. fs_path_free(name);
  2108. if (!ret) {
  2109. fs_path_unreverse(dest);
  2110. if (is_cur_inode && dest != &sctx->cur_inode_path)
  2111. ret = fs_path_copy(&sctx->cur_inode_path, dest);
  2112. }
  2113. return ret;
  2114. }
  2115. /*
  2116. * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
  2117. */
  2118. static int send_subvol_begin(struct send_ctx *sctx)
  2119. {
  2120. int ret;
  2121. struct btrfs_root *send_root = sctx->send_root;
  2122. struct btrfs_root *parent_root = sctx->parent_root;
  2123. BTRFS_PATH_AUTO_FREE(path);
  2124. struct btrfs_key key;
  2125. struct btrfs_root_ref *ref;
  2126. struct extent_buffer *leaf;
  2127. char AUTO_KFREE(name);
  2128. int namelen;
  2129. path = btrfs_alloc_path();
  2130. if (!path)
  2131. return -ENOMEM;
  2132. name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
  2133. if (!name)
  2134. return -ENOMEM;
  2135. key.objectid = btrfs_root_id(send_root);
  2136. key.type = BTRFS_ROOT_BACKREF_KEY;
  2137. key.offset = 0;
  2138. ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
  2139. &key, path, 1, 0);
  2140. if (ret < 0)
  2141. return ret;
  2142. if (ret)
  2143. return -ENOENT;
  2144. leaf = path->nodes[0];
  2145. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  2146. if (key.type != BTRFS_ROOT_BACKREF_KEY ||
  2147. key.objectid != btrfs_root_id(send_root)) {
  2148. return -ENOENT;
  2149. }
  2150. ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
  2151. namelen = btrfs_root_ref_name_len(leaf, ref);
  2152. read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
  2153. btrfs_release_path(path);
  2154. if (parent_root) {
  2155. ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
  2156. if (ret < 0)
  2157. return ret;
  2158. } else {
  2159. ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
  2160. if (ret < 0)
  2161. return ret;
  2162. }
  2163. TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
  2164. if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
  2165. TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
  2166. sctx->send_root->root_item.received_uuid);
  2167. else
  2168. TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
  2169. sctx->send_root->root_item.uuid);
  2170. TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
  2171. btrfs_root_ctransid(&sctx->send_root->root_item));
  2172. if (parent_root) {
  2173. if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
  2174. TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
  2175. parent_root->root_item.received_uuid);
  2176. else
  2177. TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
  2178. parent_root->root_item.uuid);
  2179. TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
  2180. btrfs_root_ctransid(&sctx->parent_root->root_item));
  2181. }
  2182. ret = send_cmd(sctx);
  2183. tlv_put_failure:
  2184. return ret;
  2185. }
  2186. static struct fs_path *get_cur_inode_path(struct send_ctx *sctx)
  2187. {
  2188. if (fs_path_len(&sctx->cur_inode_path) == 0) {
  2189. int ret;
  2190. ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
  2191. &sctx->cur_inode_path);
  2192. if (ret < 0)
  2193. return ERR_PTR(ret);
  2194. }
  2195. return &sctx->cur_inode_path;
  2196. }
  2197. static struct fs_path *get_path_for_command(struct send_ctx *sctx, u64 ino, u64 gen)
  2198. {
  2199. struct fs_path *path;
  2200. int ret;
  2201. if (ino == sctx->cur_ino && gen == sctx->cur_inode_gen)
  2202. return get_cur_inode_path(sctx);
  2203. path = fs_path_alloc();
  2204. if (!path)
  2205. return ERR_PTR(-ENOMEM);
  2206. ret = get_cur_path(sctx, ino, gen, path);
  2207. if (ret < 0) {
  2208. fs_path_free(path);
  2209. return ERR_PTR(ret);
  2210. }
  2211. return path;
  2212. }
  2213. static void free_path_for_command(const struct send_ctx *sctx, struct fs_path *path)
  2214. {
  2215. if (path != &sctx->cur_inode_path)
  2216. fs_path_free(path);
  2217. }
  2218. static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
  2219. {
  2220. int ret = 0;
  2221. struct fs_path *p;
  2222. p = get_path_for_command(sctx, ino, gen);
  2223. if (IS_ERR(p))
  2224. return PTR_ERR(p);
  2225. ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
  2226. if (ret < 0)
  2227. goto out;
  2228. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
  2229. TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
  2230. ret = send_cmd(sctx);
  2231. tlv_put_failure:
  2232. out:
  2233. free_path_for_command(sctx, p);
  2234. return ret;
  2235. }
  2236. static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
  2237. {
  2238. int ret = 0;
  2239. struct fs_path *p;
  2240. p = get_path_for_command(sctx, ino, gen);
  2241. if (IS_ERR(p))
  2242. return PTR_ERR(p);
  2243. ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
  2244. if (ret < 0)
  2245. goto out;
  2246. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
  2247. TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
  2248. ret = send_cmd(sctx);
  2249. tlv_put_failure:
  2250. out:
  2251. free_path_for_command(sctx, p);
  2252. return ret;
  2253. }
  2254. static int send_fileattr(struct send_ctx *sctx, u64 ino, u64 gen, u64 fileattr)
  2255. {
  2256. int ret = 0;
  2257. struct fs_path *p;
  2258. if (sctx->proto < 2)
  2259. return 0;
  2260. p = get_path_for_command(sctx, ino, gen);
  2261. if (IS_ERR(p))
  2262. return PTR_ERR(p);
  2263. ret = begin_cmd(sctx, BTRFS_SEND_C_FILEATTR);
  2264. if (ret < 0)
  2265. goto out;
  2266. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
  2267. TLV_PUT_U64(sctx, BTRFS_SEND_A_FILEATTR, fileattr);
  2268. ret = send_cmd(sctx);
  2269. tlv_put_failure:
  2270. out:
  2271. free_path_for_command(sctx, p);
  2272. return ret;
  2273. }
  2274. static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
  2275. {
  2276. int ret = 0;
  2277. struct fs_path *p;
  2278. p = get_path_for_command(sctx, ino, gen);
  2279. if (IS_ERR(p))
  2280. return PTR_ERR(p);
  2281. ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
  2282. if (ret < 0)
  2283. goto out;
  2284. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
  2285. TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
  2286. TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
  2287. ret = send_cmd(sctx);
  2288. tlv_put_failure:
  2289. out:
  2290. free_path_for_command(sctx, p);
  2291. return ret;
  2292. }
  2293. static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
  2294. {
  2295. int ret = 0;
  2296. struct fs_path *p = NULL;
  2297. struct btrfs_inode_item *ii;
  2298. BTRFS_PATH_AUTO_FREE(path);
  2299. struct extent_buffer *eb;
  2300. struct btrfs_key key;
  2301. int slot;
  2302. p = get_path_for_command(sctx, ino, gen);
  2303. if (IS_ERR(p))
  2304. return PTR_ERR(p);
  2305. path = alloc_path_for_send();
  2306. if (!path) {
  2307. ret = -ENOMEM;
  2308. goto out;
  2309. }
  2310. key.objectid = ino;
  2311. key.type = BTRFS_INODE_ITEM_KEY;
  2312. key.offset = 0;
  2313. ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
  2314. if (ret > 0)
  2315. ret = -ENOENT;
  2316. if (ret < 0)
  2317. goto out;
  2318. eb = path->nodes[0];
  2319. slot = path->slots[0];
  2320. ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
  2321. ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
  2322. if (ret < 0)
  2323. goto out;
  2324. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
  2325. TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
  2326. TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
  2327. TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
  2328. if (sctx->proto >= 2)
  2329. TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_OTIME, eb, &ii->otime);
  2330. ret = send_cmd(sctx);
  2331. tlv_put_failure:
  2332. out:
  2333. free_path_for_command(sctx, p);
  2334. return ret;
  2335. }
  2336. /*
  2337. * If the cache is full, we can't remove entries from it and do a call to
  2338. * send_utimes() for each respective inode, because we might be finishing
  2339. * processing an inode that is a directory and it just got renamed, and existing
  2340. * entries in the cache may refer to inodes that have the directory in their
  2341. * full path - in which case we would generate outdated paths (pre-rename)
  2342. * for the inodes that the cache entries point to. Instead of pruning the
  2343. * cache when inserting, do it after we finish processing each inode at
  2344. * finish_inode_if_needed().
  2345. */
  2346. static int cache_dir_utimes(struct send_ctx *sctx, u64 dir, u64 gen)
  2347. {
  2348. struct btrfs_lru_cache_entry *entry;
  2349. int ret;
  2350. entry = btrfs_lru_cache_lookup(&sctx->dir_utimes_cache, dir, gen);
  2351. if (entry != NULL)
  2352. return 0;
  2353. /* Caching is optional, don't fail if we can't allocate memory. */
  2354. entry = kmalloc_obj(*entry);
  2355. if (!entry)
  2356. return send_utimes(sctx, dir, gen);
  2357. entry->key = dir;
  2358. entry->gen = gen;
  2359. ret = btrfs_lru_cache_store(&sctx->dir_utimes_cache, entry, GFP_KERNEL);
  2360. ASSERT(ret != -EEXIST);
  2361. if (ret) {
  2362. kfree(entry);
  2363. return send_utimes(sctx, dir, gen);
  2364. }
  2365. return 0;
  2366. }
  2367. static int trim_dir_utimes_cache(struct send_ctx *sctx)
  2368. {
  2369. while (sctx->dir_utimes_cache.size > SEND_MAX_DIR_UTIMES_CACHE_SIZE) {
  2370. struct btrfs_lru_cache_entry *lru;
  2371. int ret;
  2372. lru = btrfs_lru_cache_lru_entry(&sctx->dir_utimes_cache);
  2373. ASSERT(lru != NULL);
  2374. ret = send_utimes(sctx, lru->key, lru->gen);
  2375. if (ret)
  2376. return ret;
  2377. btrfs_lru_cache_remove(&sctx->dir_utimes_cache, lru);
  2378. }
  2379. return 0;
  2380. }
  2381. /*
  2382. * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
  2383. * a valid path yet because we did not process the refs yet. So, the inode
  2384. * is created as orphan.
  2385. */
  2386. static int send_create_inode(struct send_ctx *sctx, u64 ino)
  2387. {
  2388. int ret = 0;
  2389. struct fs_path *p;
  2390. int cmd;
  2391. struct btrfs_inode_info info;
  2392. u64 gen;
  2393. u64 mode;
  2394. u64 rdev;
  2395. p = fs_path_alloc();
  2396. if (!p)
  2397. return -ENOMEM;
  2398. if (ino != sctx->cur_ino) {
  2399. ret = get_inode_info(sctx->send_root, ino, &info);
  2400. if (ret < 0)
  2401. goto out;
  2402. gen = info.gen;
  2403. mode = info.mode;
  2404. rdev = info.rdev;
  2405. } else {
  2406. gen = sctx->cur_inode_gen;
  2407. mode = sctx->cur_inode_mode;
  2408. rdev = sctx->cur_inode_rdev;
  2409. }
  2410. if (S_ISREG(mode)) {
  2411. cmd = BTRFS_SEND_C_MKFILE;
  2412. } else if (S_ISDIR(mode)) {
  2413. cmd = BTRFS_SEND_C_MKDIR;
  2414. } else if (S_ISLNK(mode)) {
  2415. cmd = BTRFS_SEND_C_SYMLINK;
  2416. } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
  2417. cmd = BTRFS_SEND_C_MKNOD;
  2418. } else if (S_ISFIFO(mode)) {
  2419. cmd = BTRFS_SEND_C_MKFIFO;
  2420. } else if (S_ISSOCK(mode)) {
  2421. cmd = BTRFS_SEND_C_MKSOCK;
  2422. } else {
  2423. btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
  2424. (int)(mode & S_IFMT));
  2425. ret = -EOPNOTSUPP;
  2426. goto out;
  2427. }
  2428. ret = begin_cmd(sctx, cmd);
  2429. if (ret < 0)
  2430. goto out;
  2431. ret = gen_unique_name(sctx, ino, gen, p);
  2432. if (ret < 0)
  2433. goto out;
  2434. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
  2435. TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
  2436. if (S_ISLNK(mode)) {
  2437. fs_path_reset(p);
  2438. ret = read_symlink(sctx->send_root, ino, p);
  2439. if (ret < 0)
  2440. goto out;
  2441. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
  2442. } else if (S_ISCHR(mode) || S_ISBLK(mode) ||
  2443. S_ISFIFO(mode) || S_ISSOCK(mode)) {
  2444. TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
  2445. TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
  2446. }
  2447. ret = send_cmd(sctx);
  2448. if (ret < 0)
  2449. goto out;
  2450. tlv_put_failure:
  2451. out:
  2452. fs_path_free(p);
  2453. return ret;
  2454. }
  2455. static void cache_dir_created(struct send_ctx *sctx, u64 dir)
  2456. {
  2457. struct btrfs_lru_cache_entry *entry;
  2458. int ret;
  2459. /* Caching is optional, ignore any failures. */
  2460. entry = kmalloc_obj(*entry);
  2461. if (!entry)
  2462. return;
  2463. entry->key = dir;
  2464. entry->gen = 0;
  2465. ret = btrfs_lru_cache_store(&sctx->dir_created_cache, entry, GFP_KERNEL);
  2466. if (ret < 0)
  2467. kfree(entry);
  2468. }
  2469. /*
  2470. * We need some special handling for inodes that get processed before the parent
  2471. * directory got created. See process_recorded_refs for details.
  2472. * This function does the check if we already created the dir out of order.
  2473. */
  2474. static int did_create_dir(struct send_ctx *sctx, u64 dir)
  2475. {
  2476. int ret = 0;
  2477. int iter_ret = 0;
  2478. BTRFS_PATH_AUTO_FREE(path);
  2479. struct btrfs_key key;
  2480. struct btrfs_key found_key;
  2481. struct btrfs_key di_key;
  2482. struct btrfs_dir_item *di;
  2483. if (btrfs_lru_cache_lookup(&sctx->dir_created_cache, dir, 0))
  2484. return 1;
  2485. path = alloc_path_for_send();
  2486. if (!path)
  2487. return -ENOMEM;
  2488. key.objectid = dir;
  2489. key.type = BTRFS_DIR_INDEX_KEY;
  2490. key.offset = 0;
  2491. btrfs_for_each_slot(sctx->send_root, &key, &found_key, path, iter_ret) {
  2492. struct extent_buffer *eb = path->nodes[0];
  2493. if (found_key.objectid != key.objectid ||
  2494. found_key.type != key.type) {
  2495. ret = 0;
  2496. break;
  2497. }
  2498. di = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dir_item);
  2499. btrfs_dir_item_key_to_cpu(eb, di, &di_key);
  2500. if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
  2501. di_key.objectid < sctx->send_progress) {
  2502. ret = 1;
  2503. cache_dir_created(sctx, dir);
  2504. break;
  2505. }
  2506. }
  2507. /* Catch error found during iteration */
  2508. if (iter_ret < 0)
  2509. ret = iter_ret;
  2510. return ret;
  2511. }
  2512. /*
  2513. * Only creates the inode if it is:
  2514. * 1. Not a directory
  2515. * 2. Or a directory which was not created already due to out of order
  2516. * directories. See did_create_dir and process_recorded_refs for details.
  2517. */
  2518. static int send_create_inode_if_needed(struct send_ctx *sctx)
  2519. {
  2520. int ret;
  2521. if (S_ISDIR(sctx->cur_inode_mode)) {
  2522. ret = did_create_dir(sctx, sctx->cur_ino);
  2523. if (ret < 0)
  2524. return ret;
  2525. else if (ret > 0)
  2526. return 0;
  2527. }
  2528. ret = send_create_inode(sctx, sctx->cur_ino);
  2529. if (ret == 0 && S_ISDIR(sctx->cur_inode_mode))
  2530. cache_dir_created(sctx, sctx->cur_ino);
  2531. return ret;
  2532. }
  2533. struct recorded_ref {
  2534. struct list_head list;
  2535. char *name;
  2536. struct fs_path *full_path;
  2537. u64 dir;
  2538. u64 dir_gen;
  2539. int name_len;
  2540. struct rb_node node;
  2541. struct rb_root *root;
  2542. };
  2543. static struct recorded_ref *recorded_ref_alloc(void)
  2544. {
  2545. struct recorded_ref *ref;
  2546. ref = kzalloc_obj(*ref);
  2547. if (!ref)
  2548. return NULL;
  2549. RB_CLEAR_NODE(&ref->node);
  2550. INIT_LIST_HEAD(&ref->list);
  2551. return ref;
  2552. }
  2553. static void recorded_ref_free(struct recorded_ref *ref)
  2554. {
  2555. if (!ref)
  2556. return;
  2557. if (!RB_EMPTY_NODE(&ref->node))
  2558. rb_erase(&ref->node, ref->root);
  2559. list_del(&ref->list);
  2560. fs_path_free(ref->full_path);
  2561. kfree(ref);
  2562. }
  2563. static void set_ref_path(struct recorded_ref *ref, struct fs_path *path)
  2564. {
  2565. ref->full_path = path;
  2566. ref->name = (char *)kbasename(ref->full_path->start);
  2567. ref->name_len = ref->full_path->end - ref->name;
  2568. }
  2569. static int dup_ref(struct recorded_ref *ref, struct list_head *list)
  2570. {
  2571. struct recorded_ref *new;
  2572. new = recorded_ref_alloc();
  2573. if (!new)
  2574. return -ENOMEM;
  2575. new->dir = ref->dir;
  2576. new->dir_gen = ref->dir_gen;
  2577. list_add_tail(&new->list, list);
  2578. return 0;
  2579. }
  2580. static void __free_recorded_refs(struct list_head *head)
  2581. {
  2582. struct recorded_ref *cur;
  2583. while (!list_empty(head)) {
  2584. cur = list_first_entry(head, struct recorded_ref, list);
  2585. recorded_ref_free(cur);
  2586. }
  2587. }
  2588. static void free_recorded_refs(struct send_ctx *sctx)
  2589. {
  2590. __free_recorded_refs(&sctx->new_refs);
  2591. __free_recorded_refs(&sctx->deleted_refs);
  2592. }
  2593. /*
  2594. * Renames/moves a file/dir to its orphan name. Used when the first
  2595. * ref of an unprocessed inode gets overwritten and for all non empty
  2596. * directories.
  2597. */
  2598. static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
  2599. struct fs_path *path)
  2600. {
  2601. int ret;
  2602. struct fs_path *orphan;
  2603. orphan = fs_path_alloc();
  2604. if (!orphan)
  2605. return -ENOMEM;
  2606. ret = gen_unique_name(sctx, ino, gen, orphan);
  2607. if (ret < 0)
  2608. goto out;
  2609. ret = send_rename(sctx, path, orphan);
  2610. if (ret < 0)
  2611. goto out;
  2612. if (ino == sctx->cur_ino && gen == sctx->cur_inode_gen)
  2613. ret = fs_path_copy(&sctx->cur_inode_path, orphan);
  2614. out:
  2615. fs_path_free(orphan);
  2616. return ret;
  2617. }
  2618. static struct orphan_dir_info *add_orphan_dir_info(struct send_ctx *sctx,
  2619. u64 dir_ino, u64 dir_gen)
  2620. {
  2621. struct rb_node **p = &sctx->orphan_dirs.rb_node;
  2622. struct rb_node *parent = NULL;
  2623. struct orphan_dir_info *entry, *odi;
  2624. while (*p) {
  2625. parent = *p;
  2626. entry = rb_entry(parent, struct orphan_dir_info, node);
  2627. if (dir_ino < entry->ino)
  2628. p = &(*p)->rb_left;
  2629. else if (dir_ino > entry->ino)
  2630. p = &(*p)->rb_right;
  2631. else if (dir_gen < entry->gen)
  2632. p = &(*p)->rb_left;
  2633. else if (dir_gen > entry->gen)
  2634. p = &(*p)->rb_right;
  2635. else
  2636. return entry;
  2637. }
  2638. odi = kmalloc_obj(*odi);
  2639. if (!odi)
  2640. return ERR_PTR(-ENOMEM);
  2641. odi->ino = dir_ino;
  2642. odi->gen = dir_gen;
  2643. odi->last_dir_index_offset = 0;
  2644. odi->dir_high_seq_ino = 0;
  2645. rb_link_node(&odi->node, parent, p);
  2646. rb_insert_color(&odi->node, &sctx->orphan_dirs);
  2647. return odi;
  2648. }
  2649. static struct orphan_dir_info *get_orphan_dir_info(struct send_ctx *sctx,
  2650. u64 dir_ino, u64 gen)
  2651. {
  2652. struct rb_node *n = sctx->orphan_dirs.rb_node;
  2653. struct orphan_dir_info *entry;
  2654. while (n) {
  2655. entry = rb_entry(n, struct orphan_dir_info, node);
  2656. if (dir_ino < entry->ino)
  2657. n = n->rb_left;
  2658. else if (dir_ino > entry->ino)
  2659. n = n->rb_right;
  2660. else if (gen < entry->gen)
  2661. n = n->rb_left;
  2662. else if (gen > entry->gen)
  2663. n = n->rb_right;
  2664. else
  2665. return entry;
  2666. }
  2667. return NULL;
  2668. }
  2669. static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen)
  2670. {
  2671. struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino, gen);
  2672. return odi != NULL;
  2673. }
  2674. static void free_orphan_dir_info(struct send_ctx *sctx,
  2675. struct orphan_dir_info *odi)
  2676. {
  2677. if (!odi)
  2678. return;
  2679. rb_erase(&odi->node, &sctx->orphan_dirs);
  2680. kfree(odi);
  2681. }
  2682. /*
  2683. * Returns 1 if a directory can be removed at this point in time.
  2684. * We check this by iterating all dir items and checking if the inode behind
  2685. * the dir item was already processed.
  2686. */
  2687. static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen)
  2688. {
  2689. int ret = 0;
  2690. int iter_ret = 0;
  2691. struct btrfs_root *root = sctx->parent_root;
  2692. struct btrfs_path *path;
  2693. struct btrfs_key key;
  2694. struct btrfs_key found_key;
  2695. struct btrfs_key loc;
  2696. struct btrfs_dir_item *di;
  2697. struct orphan_dir_info *odi = NULL;
  2698. u64 dir_high_seq_ino = 0;
  2699. u64 last_dir_index_offset = 0;
  2700. /*
  2701. * Don't try to rmdir the top/root subvolume dir.
  2702. */
  2703. if (dir == BTRFS_FIRST_FREE_OBJECTID)
  2704. return 0;
  2705. odi = get_orphan_dir_info(sctx, dir, dir_gen);
  2706. if (odi && sctx->cur_ino < odi->dir_high_seq_ino)
  2707. return 0;
  2708. path = alloc_path_for_send();
  2709. if (!path)
  2710. return -ENOMEM;
  2711. if (!odi) {
  2712. /*
  2713. * Find the inode number associated with the last dir index
  2714. * entry. This is very likely the inode with the highest number
  2715. * of all inodes that have an entry in the directory. We can
  2716. * then use it to avoid future calls to can_rmdir(), when
  2717. * processing inodes with a lower number, from having to search
  2718. * the parent root b+tree for dir index keys.
  2719. */
  2720. key.objectid = dir;
  2721. key.type = BTRFS_DIR_INDEX_KEY;
  2722. key.offset = (u64)-1;
  2723. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  2724. if (ret < 0) {
  2725. goto out;
  2726. } else if (ret > 0) {
  2727. /* Can't happen, the root is never empty. */
  2728. ASSERT(path->slots[0] > 0);
  2729. if (WARN_ON(path->slots[0] == 0)) {
  2730. ret = -EUCLEAN;
  2731. goto out;
  2732. }
  2733. path->slots[0]--;
  2734. }
  2735. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  2736. if (key.objectid != dir || key.type != BTRFS_DIR_INDEX_KEY) {
  2737. /* No index keys, dir can be removed. */
  2738. ret = 1;
  2739. goto out;
  2740. }
  2741. di = btrfs_item_ptr(path->nodes[0], path->slots[0],
  2742. struct btrfs_dir_item);
  2743. btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
  2744. dir_high_seq_ino = loc.objectid;
  2745. if (sctx->cur_ino < dir_high_seq_ino) {
  2746. ret = 0;
  2747. goto out;
  2748. }
  2749. btrfs_release_path(path);
  2750. }
  2751. key.objectid = dir;
  2752. key.type = BTRFS_DIR_INDEX_KEY;
  2753. key.offset = (odi ? odi->last_dir_index_offset : 0);
  2754. btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
  2755. struct waiting_dir_move *dm;
  2756. if (found_key.objectid != key.objectid ||
  2757. found_key.type != key.type)
  2758. break;
  2759. di = btrfs_item_ptr(path->nodes[0], path->slots[0],
  2760. struct btrfs_dir_item);
  2761. btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
  2762. dir_high_seq_ino = max(dir_high_seq_ino, loc.objectid);
  2763. last_dir_index_offset = found_key.offset;
  2764. dm = get_waiting_dir_move(sctx, loc.objectid);
  2765. if (dm) {
  2766. dm->rmdir_ino = dir;
  2767. dm->rmdir_gen = dir_gen;
  2768. ret = 0;
  2769. goto out;
  2770. }
  2771. if (loc.objectid > sctx->cur_ino) {
  2772. ret = 0;
  2773. goto out;
  2774. }
  2775. }
  2776. if (iter_ret < 0) {
  2777. ret = iter_ret;
  2778. goto out;
  2779. }
  2780. free_orphan_dir_info(sctx, odi);
  2781. ret = 1;
  2782. out:
  2783. btrfs_free_path(path);
  2784. if (ret)
  2785. return ret;
  2786. if (!odi) {
  2787. odi = add_orphan_dir_info(sctx, dir, dir_gen);
  2788. if (IS_ERR(odi))
  2789. return PTR_ERR(odi);
  2790. odi->gen = dir_gen;
  2791. }
  2792. odi->last_dir_index_offset = last_dir_index_offset;
  2793. odi->dir_high_seq_ino = max(odi->dir_high_seq_ino, dir_high_seq_ino);
  2794. return 0;
  2795. }
  2796. static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
  2797. {
  2798. struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
  2799. return entry != NULL;
  2800. }
  2801. static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
  2802. {
  2803. struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
  2804. struct rb_node *parent = NULL;
  2805. struct waiting_dir_move *entry, *dm;
  2806. dm = kmalloc_obj(*dm);
  2807. if (!dm)
  2808. return -ENOMEM;
  2809. dm->ino = ino;
  2810. dm->rmdir_ino = 0;
  2811. dm->rmdir_gen = 0;
  2812. dm->orphanized = orphanized;
  2813. while (*p) {
  2814. parent = *p;
  2815. entry = rb_entry(parent, struct waiting_dir_move, node);
  2816. if (ino < entry->ino) {
  2817. p = &(*p)->rb_left;
  2818. } else if (ino > entry->ino) {
  2819. p = &(*p)->rb_right;
  2820. } else {
  2821. kfree(dm);
  2822. return -EEXIST;
  2823. }
  2824. }
  2825. rb_link_node(&dm->node, parent, p);
  2826. rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
  2827. return 0;
  2828. }
  2829. static struct waiting_dir_move *
  2830. get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
  2831. {
  2832. struct rb_node *n = sctx->waiting_dir_moves.rb_node;
  2833. struct waiting_dir_move *entry;
  2834. while (n) {
  2835. entry = rb_entry(n, struct waiting_dir_move, node);
  2836. if (ino < entry->ino)
  2837. n = n->rb_left;
  2838. else if (ino > entry->ino)
  2839. n = n->rb_right;
  2840. else
  2841. return entry;
  2842. }
  2843. return NULL;
  2844. }
  2845. static void free_waiting_dir_move(struct send_ctx *sctx,
  2846. struct waiting_dir_move *dm)
  2847. {
  2848. if (!dm)
  2849. return;
  2850. rb_erase(&dm->node, &sctx->waiting_dir_moves);
  2851. kfree(dm);
  2852. }
  2853. static int add_pending_dir_move(struct send_ctx *sctx,
  2854. u64 ino,
  2855. u64 ino_gen,
  2856. u64 parent_ino,
  2857. struct list_head *new_refs,
  2858. struct list_head *deleted_refs,
  2859. const bool is_orphan)
  2860. {
  2861. struct rb_node **p = &sctx->pending_dir_moves.rb_node;
  2862. struct rb_node *parent = NULL;
  2863. struct pending_dir_move *entry = NULL, *pm;
  2864. struct recorded_ref *cur;
  2865. int exists = 0;
  2866. int ret;
  2867. pm = kmalloc_obj(*pm);
  2868. if (!pm)
  2869. return -ENOMEM;
  2870. pm->parent_ino = parent_ino;
  2871. pm->ino = ino;
  2872. pm->gen = ino_gen;
  2873. INIT_LIST_HEAD(&pm->list);
  2874. INIT_LIST_HEAD(&pm->update_refs);
  2875. RB_CLEAR_NODE(&pm->node);
  2876. while (*p) {
  2877. parent = *p;
  2878. entry = rb_entry(parent, struct pending_dir_move, node);
  2879. if (parent_ino < entry->parent_ino) {
  2880. p = &(*p)->rb_left;
  2881. } else if (parent_ino > entry->parent_ino) {
  2882. p = &(*p)->rb_right;
  2883. } else {
  2884. exists = 1;
  2885. break;
  2886. }
  2887. }
  2888. list_for_each_entry(cur, deleted_refs, list) {
  2889. ret = dup_ref(cur, &pm->update_refs);
  2890. if (ret < 0)
  2891. goto out;
  2892. }
  2893. list_for_each_entry(cur, new_refs, list) {
  2894. ret = dup_ref(cur, &pm->update_refs);
  2895. if (ret < 0)
  2896. goto out;
  2897. }
  2898. ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
  2899. if (ret)
  2900. goto out;
  2901. if (exists) {
  2902. list_add_tail(&pm->list, &entry->list);
  2903. } else {
  2904. rb_link_node(&pm->node, parent, p);
  2905. rb_insert_color(&pm->node, &sctx->pending_dir_moves);
  2906. }
  2907. ret = 0;
  2908. out:
  2909. if (ret) {
  2910. __free_recorded_refs(&pm->update_refs);
  2911. kfree(pm);
  2912. }
  2913. return ret;
  2914. }
  2915. static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
  2916. u64 parent_ino)
  2917. {
  2918. struct rb_node *n = sctx->pending_dir_moves.rb_node;
  2919. struct pending_dir_move *entry;
  2920. while (n) {
  2921. entry = rb_entry(n, struct pending_dir_move, node);
  2922. if (parent_ino < entry->parent_ino)
  2923. n = n->rb_left;
  2924. else if (parent_ino > entry->parent_ino)
  2925. n = n->rb_right;
  2926. else
  2927. return entry;
  2928. }
  2929. return NULL;
  2930. }
  2931. static int path_loop(struct send_ctx *sctx, struct fs_path *name,
  2932. u64 ino, u64 gen, u64 *ancestor_ino)
  2933. {
  2934. int ret = 0;
  2935. u64 parent_inode = 0;
  2936. u64 parent_gen = 0;
  2937. u64 start_ino = ino;
  2938. *ancestor_ino = 0;
  2939. while (ino != BTRFS_FIRST_FREE_OBJECTID) {
  2940. fs_path_reset(name);
  2941. if (is_waiting_for_rm(sctx, ino, gen))
  2942. break;
  2943. if (is_waiting_for_move(sctx, ino)) {
  2944. if (*ancestor_ino == 0)
  2945. *ancestor_ino = ino;
  2946. ret = get_first_ref(sctx->parent_root, ino,
  2947. &parent_inode, &parent_gen, name);
  2948. } else {
  2949. ret = __get_cur_name_and_parent(sctx, ino, gen,
  2950. &parent_inode,
  2951. &parent_gen, name);
  2952. if (ret > 0) {
  2953. ret = 0;
  2954. break;
  2955. }
  2956. }
  2957. if (ret < 0)
  2958. break;
  2959. if (parent_inode == start_ino) {
  2960. ret = 1;
  2961. if (*ancestor_ino == 0)
  2962. *ancestor_ino = ino;
  2963. break;
  2964. }
  2965. ino = parent_inode;
  2966. gen = parent_gen;
  2967. }
  2968. return ret;
  2969. }
  2970. static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
  2971. {
  2972. struct fs_path *from_path = NULL;
  2973. struct fs_path *to_path = NULL;
  2974. struct fs_path *name = NULL;
  2975. u64 orig_progress = sctx->send_progress;
  2976. struct recorded_ref *cur;
  2977. u64 parent_ino, parent_gen;
  2978. struct waiting_dir_move *dm = NULL;
  2979. u64 rmdir_ino = 0;
  2980. u64 rmdir_gen;
  2981. u64 ancestor;
  2982. bool is_orphan;
  2983. int ret;
  2984. name = fs_path_alloc();
  2985. from_path = fs_path_alloc();
  2986. if (!name || !from_path) {
  2987. ret = -ENOMEM;
  2988. goto out;
  2989. }
  2990. dm = get_waiting_dir_move(sctx, pm->ino);
  2991. ASSERT(dm);
  2992. rmdir_ino = dm->rmdir_ino;
  2993. rmdir_gen = dm->rmdir_gen;
  2994. is_orphan = dm->orphanized;
  2995. free_waiting_dir_move(sctx, dm);
  2996. if (is_orphan) {
  2997. ret = gen_unique_name(sctx, pm->ino,
  2998. pm->gen, from_path);
  2999. } else {
  3000. ret = get_first_ref(sctx->parent_root, pm->ino,
  3001. &parent_ino, &parent_gen, name);
  3002. if (ret < 0)
  3003. goto out;
  3004. ret = get_cur_path(sctx, parent_ino, parent_gen,
  3005. from_path);
  3006. if (ret < 0)
  3007. goto out;
  3008. ret = fs_path_add_path(from_path, name);
  3009. }
  3010. if (ret < 0)
  3011. goto out;
  3012. sctx->send_progress = sctx->cur_ino + 1;
  3013. ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
  3014. if (ret < 0)
  3015. goto out;
  3016. if (ret) {
  3017. LIST_HEAD(deleted_refs);
  3018. ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
  3019. ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
  3020. &pm->update_refs, &deleted_refs,
  3021. is_orphan);
  3022. if (ret < 0)
  3023. goto out;
  3024. if (rmdir_ino) {
  3025. dm = get_waiting_dir_move(sctx, pm->ino);
  3026. ASSERT(dm);
  3027. dm->rmdir_ino = rmdir_ino;
  3028. dm->rmdir_gen = rmdir_gen;
  3029. }
  3030. goto out;
  3031. }
  3032. fs_path_reset(name);
  3033. to_path = name;
  3034. name = NULL;
  3035. ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
  3036. if (ret < 0)
  3037. goto out;
  3038. ret = send_rename(sctx, from_path, to_path);
  3039. if (ret < 0)
  3040. goto out;
  3041. if (rmdir_ino) {
  3042. struct orphan_dir_info *odi;
  3043. u64 gen;
  3044. odi = get_orphan_dir_info(sctx, rmdir_ino, rmdir_gen);
  3045. if (!odi) {
  3046. /* already deleted */
  3047. goto finish;
  3048. }
  3049. gen = odi->gen;
  3050. ret = can_rmdir(sctx, rmdir_ino, gen);
  3051. if (ret < 0)
  3052. goto out;
  3053. if (!ret)
  3054. goto finish;
  3055. name = fs_path_alloc();
  3056. if (!name) {
  3057. ret = -ENOMEM;
  3058. goto out;
  3059. }
  3060. ret = get_cur_path(sctx, rmdir_ino, gen, name);
  3061. if (ret < 0)
  3062. goto out;
  3063. ret = send_rmdir(sctx, name);
  3064. if (ret < 0)
  3065. goto out;
  3066. }
  3067. finish:
  3068. ret = cache_dir_utimes(sctx, pm->ino, pm->gen);
  3069. if (ret < 0)
  3070. goto out;
  3071. /*
  3072. * After rename/move, need to update the utimes of both new parent(s)
  3073. * and old parent(s).
  3074. */
  3075. list_for_each_entry(cur, &pm->update_refs, list) {
  3076. /*
  3077. * The parent inode might have been deleted in the send snapshot
  3078. */
  3079. ret = get_inode_info(sctx->send_root, cur->dir, NULL);
  3080. if (ret == -ENOENT) {
  3081. ret = 0;
  3082. continue;
  3083. }
  3084. if (ret < 0)
  3085. goto out;
  3086. ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen);
  3087. if (ret < 0)
  3088. goto out;
  3089. }
  3090. out:
  3091. fs_path_free(name);
  3092. fs_path_free(from_path);
  3093. fs_path_free(to_path);
  3094. sctx->send_progress = orig_progress;
  3095. return ret;
  3096. }
  3097. static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
  3098. {
  3099. if (!list_empty(&m->list))
  3100. list_del(&m->list);
  3101. if (!RB_EMPTY_NODE(&m->node))
  3102. rb_erase(&m->node, &sctx->pending_dir_moves);
  3103. __free_recorded_refs(&m->update_refs);
  3104. kfree(m);
  3105. }
  3106. static void tail_append_pending_moves(struct send_ctx *sctx,
  3107. struct pending_dir_move *moves,
  3108. struct list_head *stack)
  3109. {
  3110. if (list_empty(&moves->list)) {
  3111. list_add_tail(&moves->list, stack);
  3112. } else {
  3113. LIST_HEAD(list);
  3114. list_splice_init(&moves->list, &list);
  3115. list_add_tail(&moves->list, stack);
  3116. list_splice_tail(&list, stack);
  3117. }
  3118. if (!RB_EMPTY_NODE(&moves->node)) {
  3119. rb_erase(&moves->node, &sctx->pending_dir_moves);
  3120. RB_CLEAR_NODE(&moves->node);
  3121. }
  3122. }
  3123. static int apply_children_dir_moves(struct send_ctx *sctx)
  3124. {
  3125. struct pending_dir_move *pm;
  3126. LIST_HEAD(stack);
  3127. u64 parent_ino = sctx->cur_ino;
  3128. int ret = 0;
  3129. pm = get_pending_dir_moves(sctx, parent_ino);
  3130. if (!pm)
  3131. return 0;
  3132. tail_append_pending_moves(sctx, pm, &stack);
  3133. while (!list_empty(&stack)) {
  3134. pm = list_first_entry(&stack, struct pending_dir_move, list);
  3135. parent_ino = pm->ino;
  3136. ret = apply_dir_move(sctx, pm);
  3137. free_pending_move(sctx, pm);
  3138. if (ret)
  3139. goto out;
  3140. pm = get_pending_dir_moves(sctx, parent_ino);
  3141. if (pm)
  3142. tail_append_pending_moves(sctx, pm, &stack);
  3143. }
  3144. return 0;
  3145. out:
  3146. while (!list_empty(&stack)) {
  3147. pm = list_first_entry(&stack, struct pending_dir_move, list);
  3148. free_pending_move(sctx, pm);
  3149. }
  3150. return ret;
  3151. }
  3152. /*
  3153. * We might need to delay a directory rename even when no ancestor directory
  3154. * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
  3155. * renamed. This happens when we rename a directory to the old name (the name
  3156. * in the parent root) of some other unrelated directory that got its rename
  3157. * delayed due to some ancestor with higher number that got renamed.
  3158. *
  3159. * Example:
  3160. *
  3161. * Parent snapshot:
  3162. * . (ino 256)
  3163. * |---- a/ (ino 257)
  3164. * | |---- file (ino 260)
  3165. * |
  3166. * |---- b/ (ino 258)
  3167. * |---- c/ (ino 259)
  3168. *
  3169. * Send snapshot:
  3170. * . (ino 256)
  3171. * |---- a/ (ino 258)
  3172. * |---- x/ (ino 259)
  3173. * |---- y/ (ino 257)
  3174. * |----- file (ino 260)
  3175. *
  3176. * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
  3177. * from 'a' to 'x/y' happening first, which in turn depends on the rename of
  3178. * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
  3179. * must issue is:
  3180. *
  3181. * 1 - rename 259 from 'c' to 'x'
  3182. * 2 - rename 257 from 'a' to 'x/y'
  3183. * 3 - rename 258 from 'b' to 'a'
  3184. *
  3185. * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
  3186. * be done right away and < 0 on error.
  3187. */
  3188. static int wait_for_dest_dir_move(struct send_ctx *sctx,
  3189. struct recorded_ref *parent_ref,
  3190. const bool is_orphan)
  3191. {
  3192. BTRFS_PATH_AUTO_FREE(path);
  3193. struct btrfs_key key;
  3194. struct btrfs_key di_key;
  3195. struct btrfs_dir_item *di;
  3196. u64 left_gen;
  3197. u64 right_gen;
  3198. int ret = 0;
  3199. struct waiting_dir_move *wdm;
  3200. if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
  3201. return 0;
  3202. path = alloc_path_for_send();
  3203. if (!path)
  3204. return -ENOMEM;
  3205. key.objectid = parent_ref->dir;
  3206. key.type = BTRFS_DIR_ITEM_KEY;
  3207. key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
  3208. ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
  3209. if (ret < 0)
  3210. return ret;
  3211. if (ret > 0)
  3212. return 0;
  3213. di = btrfs_match_dir_item_name(path, parent_ref->name,
  3214. parent_ref->name_len);
  3215. if (!di)
  3216. return 0;
  3217. /*
  3218. * di_key.objectid has the number of the inode that has a dentry in the
  3219. * parent directory with the same name that sctx->cur_ino is being
  3220. * renamed to. We need to check if that inode is in the send root as
  3221. * well and if it is currently marked as an inode with a pending rename,
  3222. * if it is, we need to delay the rename of sctx->cur_ino as well, so
  3223. * that it happens after that other inode is renamed.
  3224. */
  3225. btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
  3226. if (di_key.type != BTRFS_INODE_ITEM_KEY)
  3227. return 0;
  3228. ret = get_inode_gen(sctx->parent_root, di_key.objectid, &left_gen);
  3229. if (ret < 0)
  3230. return ret;
  3231. ret = get_inode_gen(sctx->send_root, di_key.objectid, &right_gen);
  3232. if (ret < 0) {
  3233. if (ret == -ENOENT)
  3234. ret = 0;
  3235. return ret;
  3236. }
  3237. /* Different inode, no need to delay the rename of sctx->cur_ino */
  3238. if (right_gen != left_gen)
  3239. return 0;
  3240. wdm = get_waiting_dir_move(sctx, di_key.objectid);
  3241. if (wdm && !wdm->orphanized) {
  3242. ret = add_pending_dir_move(sctx,
  3243. sctx->cur_ino,
  3244. sctx->cur_inode_gen,
  3245. di_key.objectid,
  3246. &sctx->new_refs,
  3247. &sctx->deleted_refs,
  3248. is_orphan);
  3249. if (!ret)
  3250. ret = 1;
  3251. }
  3252. return ret;
  3253. }
  3254. /*
  3255. * Check if inode ino2, or any of its ancestors, is inode ino1.
  3256. * Return 1 if true, 0 if false and < 0 on error.
  3257. */
  3258. static int check_ino_in_path(struct btrfs_root *root,
  3259. const u64 ino1,
  3260. const u64 ino1_gen,
  3261. const u64 ino2,
  3262. const u64 ino2_gen,
  3263. struct fs_path *fs_path)
  3264. {
  3265. u64 ino = ino2;
  3266. if (ino1 == ino2)
  3267. return ino1_gen == ino2_gen;
  3268. while (ino > BTRFS_FIRST_FREE_OBJECTID) {
  3269. u64 parent;
  3270. u64 parent_gen;
  3271. int ret;
  3272. fs_path_reset(fs_path);
  3273. ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
  3274. if (ret < 0)
  3275. return ret;
  3276. if (parent == ino1)
  3277. return parent_gen == ino1_gen;
  3278. ino = parent;
  3279. }
  3280. return 0;
  3281. }
  3282. /*
  3283. * Check if inode ino1 is an ancestor of inode ino2 in the given root for any
  3284. * possible path (in case ino2 is not a directory and has multiple hard links).
  3285. * Return 1 if true, 0 if false and < 0 on error.
  3286. */
  3287. static int is_ancestor(struct btrfs_root *root,
  3288. const u64 ino1,
  3289. const u64 ino1_gen,
  3290. const u64 ino2,
  3291. struct fs_path *fs_path)
  3292. {
  3293. bool free_fs_path = false;
  3294. int ret = 0;
  3295. int iter_ret = 0;
  3296. BTRFS_PATH_AUTO_FREE(path);
  3297. struct btrfs_key key;
  3298. if (!fs_path) {
  3299. fs_path = fs_path_alloc();
  3300. if (!fs_path)
  3301. return -ENOMEM;
  3302. free_fs_path = true;
  3303. }
  3304. path = alloc_path_for_send();
  3305. if (!path) {
  3306. ret = -ENOMEM;
  3307. goto out;
  3308. }
  3309. key.objectid = ino2;
  3310. key.type = BTRFS_INODE_REF_KEY;
  3311. key.offset = 0;
  3312. btrfs_for_each_slot(root, &key, &key, path, iter_ret) {
  3313. struct extent_buffer *leaf = path->nodes[0];
  3314. int slot = path->slots[0];
  3315. u32 cur_offset = 0;
  3316. u32 item_size;
  3317. if (key.objectid != ino2)
  3318. break;
  3319. if (key.type != BTRFS_INODE_REF_KEY &&
  3320. key.type != BTRFS_INODE_EXTREF_KEY)
  3321. break;
  3322. item_size = btrfs_item_size(leaf, slot);
  3323. while (cur_offset < item_size) {
  3324. u64 parent;
  3325. u64 parent_gen;
  3326. if (key.type == BTRFS_INODE_EXTREF_KEY) {
  3327. unsigned long ptr;
  3328. struct btrfs_inode_extref *extref;
  3329. ptr = btrfs_item_ptr_offset(leaf, slot);
  3330. extref = (struct btrfs_inode_extref *)
  3331. (ptr + cur_offset);
  3332. parent = btrfs_inode_extref_parent(leaf,
  3333. extref);
  3334. cur_offset += sizeof(*extref);
  3335. cur_offset += btrfs_inode_extref_name_len(leaf,
  3336. extref);
  3337. } else {
  3338. parent = key.offset;
  3339. cur_offset = item_size;
  3340. }
  3341. ret = get_inode_gen(root, parent, &parent_gen);
  3342. if (ret < 0)
  3343. goto out;
  3344. ret = check_ino_in_path(root, ino1, ino1_gen,
  3345. parent, parent_gen, fs_path);
  3346. if (ret)
  3347. goto out;
  3348. }
  3349. }
  3350. ret = 0;
  3351. if (iter_ret < 0)
  3352. ret = iter_ret;
  3353. out:
  3354. if (free_fs_path)
  3355. fs_path_free(fs_path);
  3356. return ret;
  3357. }
  3358. static int wait_for_parent_move(struct send_ctx *sctx,
  3359. struct recorded_ref *parent_ref,
  3360. const bool is_orphan)
  3361. {
  3362. int ret = 0;
  3363. u64 ino = parent_ref->dir;
  3364. u64 ino_gen = parent_ref->dir_gen;
  3365. u64 parent_ino_before, parent_ino_after;
  3366. struct fs_path *path_before = NULL;
  3367. struct fs_path *path_after = NULL;
  3368. int len1, len2;
  3369. path_after = fs_path_alloc();
  3370. path_before = fs_path_alloc();
  3371. if (!path_after || !path_before) {
  3372. ret = -ENOMEM;
  3373. goto out;
  3374. }
  3375. /*
  3376. * Our current directory inode may not yet be renamed/moved because some
  3377. * ancestor (immediate or not) has to be renamed/moved first. So find if
  3378. * such ancestor exists and make sure our own rename/move happens after
  3379. * that ancestor is processed to avoid path build infinite loops (done
  3380. * at get_cur_path()).
  3381. */
  3382. while (ino > BTRFS_FIRST_FREE_OBJECTID) {
  3383. u64 parent_ino_after_gen;
  3384. if (is_waiting_for_move(sctx, ino)) {
  3385. /*
  3386. * If the current inode is an ancestor of ino in the
  3387. * parent root, we need to delay the rename of the
  3388. * current inode, otherwise don't delayed the rename
  3389. * because we can end up with a circular dependency
  3390. * of renames, resulting in some directories never
  3391. * getting the respective rename operations issued in
  3392. * the send stream or getting into infinite path build
  3393. * loops.
  3394. */
  3395. ret = is_ancestor(sctx->parent_root,
  3396. sctx->cur_ino, sctx->cur_inode_gen,
  3397. ino, path_before);
  3398. if (ret)
  3399. break;
  3400. }
  3401. fs_path_reset(path_before);
  3402. fs_path_reset(path_after);
  3403. ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
  3404. &parent_ino_after_gen, path_after);
  3405. if (ret < 0)
  3406. goto out;
  3407. ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
  3408. NULL, path_before);
  3409. if (ret < 0 && ret != -ENOENT) {
  3410. goto out;
  3411. } else if (ret == -ENOENT) {
  3412. ret = 0;
  3413. break;
  3414. }
  3415. len1 = fs_path_len(path_before);
  3416. len2 = fs_path_len(path_after);
  3417. if (ino > sctx->cur_ino &&
  3418. (parent_ino_before != parent_ino_after || len1 != len2 ||
  3419. memcmp(path_before->start, path_after->start, len1))) {
  3420. u64 parent_ino_gen;
  3421. ret = get_inode_gen(sctx->parent_root, ino, &parent_ino_gen);
  3422. if (ret < 0)
  3423. goto out;
  3424. if (ino_gen == parent_ino_gen) {
  3425. ret = 1;
  3426. break;
  3427. }
  3428. }
  3429. ino = parent_ino_after;
  3430. ino_gen = parent_ino_after_gen;
  3431. }
  3432. out:
  3433. fs_path_free(path_before);
  3434. fs_path_free(path_after);
  3435. if (ret == 1) {
  3436. ret = add_pending_dir_move(sctx,
  3437. sctx->cur_ino,
  3438. sctx->cur_inode_gen,
  3439. ino,
  3440. &sctx->new_refs,
  3441. &sctx->deleted_refs,
  3442. is_orphan);
  3443. if (!ret)
  3444. ret = 1;
  3445. }
  3446. return ret;
  3447. }
  3448. static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
  3449. {
  3450. int ret;
  3451. struct fs_path *new_path;
  3452. /*
  3453. * Our reference's name member points to its full_path member string, so
  3454. * we use here a new path.
  3455. */
  3456. new_path = fs_path_alloc();
  3457. if (!new_path)
  3458. return -ENOMEM;
  3459. ret = get_cur_path(sctx, ref->dir, ref->dir_gen, new_path);
  3460. if (ret < 0) {
  3461. fs_path_free(new_path);
  3462. return ret;
  3463. }
  3464. ret = fs_path_add(new_path, ref->name, ref->name_len);
  3465. if (ret < 0) {
  3466. fs_path_free(new_path);
  3467. return ret;
  3468. }
  3469. fs_path_free(ref->full_path);
  3470. set_ref_path(ref, new_path);
  3471. return 0;
  3472. }
  3473. /*
  3474. * When processing the new references for an inode we may orphanize an existing
  3475. * directory inode because its old name conflicts with one of the new references
  3476. * of the current inode. Later, when processing another new reference of our
  3477. * inode, we might need to orphanize another inode, but the path we have in the
  3478. * reference reflects the pre-orphanization name of the directory we previously
  3479. * orphanized. For example:
  3480. *
  3481. * parent snapshot looks like:
  3482. *
  3483. * . (ino 256)
  3484. * |----- f1 (ino 257)
  3485. * |----- f2 (ino 258)
  3486. * |----- d1/ (ino 259)
  3487. * |----- d2/ (ino 260)
  3488. *
  3489. * send snapshot looks like:
  3490. *
  3491. * . (ino 256)
  3492. * |----- d1 (ino 258)
  3493. * |----- f2/ (ino 259)
  3494. * |----- f2_link/ (ino 260)
  3495. * | |----- f1 (ino 257)
  3496. * |
  3497. * |----- d2 (ino 258)
  3498. *
  3499. * When processing inode 257 we compute the name for inode 259 as "d1", and we
  3500. * cache it in the name cache. Later when we start processing inode 258, when
  3501. * collecting all its new references we set a full path of "d1/d2" for its new
  3502. * reference with name "d2". When we start processing the new references we
  3503. * start by processing the new reference with name "d1", and this results in
  3504. * orphanizing inode 259, since its old reference causes a conflict. Then we
  3505. * move on the next new reference, with name "d2", and we find out we must
  3506. * orphanize inode 260, as its old reference conflicts with ours - but for the
  3507. * orphanization we use a source path corresponding to the path we stored in the
  3508. * new reference, which is "d1/d2" and not "o259-6-0/d2" - this makes the
  3509. * receiver fail since the path component "d1/" no longer exists, it was renamed
  3510. * to "o259-6-0/" when processing the previous new reference. So in this case we
  3511. * must recompute the path in the new reference and use it for the new
  3512. * orphanization operation.
  3513. */
  3514. static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
  3515. {
  3516. char AUTO_KFREE(name);
  3517. int ret;
  3518. name = kmemdup(ref->name, ref->name_len, GFP_KERNEL);
  3519. if (!name)
  3520. return -ENOMEM;
  3521. fs_path_reset(ref->full_path);
  3522. ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path);
  3523. if (ret < 0)
  3524. return ret;
  3525. ret = fs_path_add(ref->full_path, name, ref->name_len);
  3526. if (ret < 0)
  3527. return ret;
  3528. /* Update the reference's base name pointer. */
  3529. set_ref_path(ref, ref->full_path);
  3530. return 0;
  3531. }
  3532. static int rbtree_check_dir_ref_comp(const void *k, const struct rb_node *node)
  3533. {
  3534. const struct recorded_ref *data = k;
  3535. const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node);
  3536. if (data->dir > ref->dir)
  3537. return 1;
  3538. if (data->dir < ref->dir)
  3539. return -1;
  3540. if (data->dir_gen > ref->dir_gen)
  3541. return 1;
  3542. if (data->dir_gen < ref->dir_gen)
  3543. return -1;
  3544. return 0;
  3545. }
  3546. static bool rbtree_check_dir_ref_less(struct rb_node *node, const struct rb_node *parent)
  3547. {
  3548. const struct recorded_ref *entry = rb_entry(node, struct recorded_ref, node);
  3549. return rbtree_check_dir_ref_comp(entry, parent) < 0;
  3550. }
  3551. static int record_check_dir_ref_in_tree(struct rb_root *root,
  3552. struct recorded_ref *ref, struct list_head *list)
  3553. {
  3554. struct recorded_ref *tmp_ref;
  3555. int ret;
  3556. if (rb_find(ref, root, rbtree_check_dir_ref_comp))
  3557. return 0;
  3558. ret = dup_ref(ref, list);
  3559. if (ret < 0)
  3560. return ret;
  3561. tmp_ref = list_last_entry(list, struct recorded_ref, list);
  3562. rb_add(&tmp_ref->node, root, rbtree_check_dir_ref_less);
  3563. tmp_ref->root = root;
  3564. return 0;
  3565. }
  3566. static int rename_current_inode(struct send_ctx *sctx,
  3567. struct fs_path *current_path,
  3568. struct fs_path *new_path)
  3569. {
  3570. int ret;
  3571. ret = send_rename(sctx, current_path, new_path);
  3572. if (ret < 0)
  3573. return ret;
  3574. ret = fs_path_copy(&sctx->cur_inode_path, new_path);
  3575. if (ret < 0)
  3576. return ret;
  3577. return fs_path_copy(current_path, new_path);
  3578. }
  3579. /*
  3580. * This does all the move/link/unlink/rmdir magic.
  3581. */
  3582. static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
  3583. {
  3584. struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
  3585. int ret = 0;
  3586. struct recorded_ref *cur;
  3587. struct recorded_ref *cur2;
  3588. LIST_HEAD(check_dirs);
  3589. struct rb_root rbtree_check_dirs = RB_ROOT;
  3590. struct fs_path *valid_path = NULL;
  3591. u64 ow_inode = 0;
  3592. u64 ow_gen;
  3593. u64 ow_mode;
  3594. bool did_overwrite = false;
  3595. bool is_orphan = false;
  3596. bool can_rename = true;
  3597. bool orphanized_dir = false;
  3598. bool orphanized_ancestor = false;
  3599. /*
  3600. * This should never happen as the root dir always has the same ref
  3601. * which is always '..'
  3602. */
  3603. if (unlikely(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID)) {
  3604. btrfs_err(fs_info,
  3605. "send: unexpected inode %llu in process_recorded_refs()",
  3606. sctx->cur_ino);
  3607. ret = -EINVAL;
  3608. goto out;
  3609. }
  3610. valid_path = fs_path_alloc();
  3611. if (!valid_path) {
  3612. ret = -ENOMEM;
  3613. goto out;
  3614. }
  3615. /*
  3616. * First, check if the first ref of the current inode was overwritten
  3617. * before. If yes, we know that the current inode was already orphanized
  3618. * and thus use the orphan name. If not, we can use get_cur_path to
  3619. * get the path of the first ref as it would like while receiving at
  3620. * this point in time.
  3621. * New inodes are always orphan at the beginning, so force to use the
  3622. * orphan name in this case.
  3623. * The first ref is stored in valid_path and will be updated if it
  3624. * gets moved around.
  3625. */
  3626. if (!sctx->cur_inode_new) {
  3627. ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
  3628. sctx->cur_inode_gen);
  3629. if (ret < 0)
  3630. goto out;
  3631. if (ret)
  3632. did_overwrite = true;
  3633. }
  3634. if (sctx->cur_inode_new || did_overwrite) {
  3635. ret = gen_unique_name(sctx, sctx->cur_ino,
  3636. sctx->cur_inode_gen, valid_path);
  3637. if (ret < 0)
  3638. goto out;
  3639. is_orphan = true;
  3640. } else {
  3641. ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
  3642. valid_path);
  3643. if (ret < 0)
  3644. goto out;
  3645. }
  3646. /*
  3647. * Before doing any rename and link operations, do a first pass on the
  3648. * new references to orphanize any unprocessed inodes that may have a
  3649. * reference that conflicts with one of the new references of the current
  3650. * inode. This needs to happen first because a new reference may conflict
  3651. * with the old reference of a parent directory, so we must make sure
  3652. * that the path used for link and rename commands don't use an
  3653. * orphanized name when an ancestor was not yet orphanized.
  3654. *
  3655. * Example:
  3656. *
  3657. * Parent snapshot:
  3658. *
  3659. * . (ino 256)
  3660. * |----- testdir/ (ino 259)
  3661. * | |----- a (ino 257)
  3662. * |
  3663. * |----- b (ino 258)
  3664. *
  3665. * Send snapshot:
  3666. *
  3667. * . (ino 256)
  3668. * |----- testdir_2/ (ino 259)
  3669. * | |----- a (ino 260)
  3670. * |
  3671. * |----- testdir (ino 257)
  3672. * |----- b (ino 257)
  3673. * |----- b2 (ino 258)
  3674. *
  3675. * Processing the new reference for inode 257 with name "b" may happen
  3676. * before processing the new reference with name "testdir". If so, we
  3677. * must make sure that by the time we send a link command to create the
  3678. * hard link "b", inode 259 was already orphanized, since the generated
  3679. * path in "valid_path" already contains the orphanized name for 259.
  3680. * We are processing inode 257, so only later when processing 259 we do
  3681. * the rename operation to change its temporary (orphanized) name to
  3682. * "testdir_2".
  3683. */
  3684. list_for_each_entry(cur, &sctx->new_refs, list) {
  3685. ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
  3686. if (ret < 0)
  3687. goto out;
  3688. if (ret == inode_state_will_create)
  3689. continue;
  3690. /*
  3691. * Check if this new ref would overwrite the first ref of another
  3692. * unprocessed inode. If yes, orphanize the overwritten inode.
  3693. * If we find an overwritten ref that is not the first ref,
  3694. * simply unlink it.
  3695. */
  3696. ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
  3697. cur->name, cur->name_len,
  3698. &ow_inode, &ow_gen, &ow_mode);
  3699. if (ret < 0)
  3700. goto out;
  3701. if (ret) {
  3702. ret = is_first_ref(sctx->parent_root,
  3703. ow_inode, cur->dir, cur->name,
  3704. cur->name_len);
  3705. if (ret < 0)
  3706. goto out;
  3707. if (ret) {
  3708. struct name_cache_entry *nce;
  3709. struct waiting_dir_move *wdm;
  3710. if (orphanized_dir) {
  3711. ret = refresh_ref_path(sctx, cur);
  3712. if (ret < 0)
  3713. goto out;
  3714. }
  3715. ret = orphanize_inode(sctx, ow_inode, ow_gen,
  3716. cur->full_path);
  3717. if (ret < 0)
  3718. goto out;
  3719. if (S_ISDIR(ow_mode))
  3720. orphanized_dir = true;
  3721. /*
  3722. * If ow_inode has its rename operation delayed
  3723. * make sure that its orphanized name is used in
  3724. * the source path when performing its rename
  3725. * operation.
  3726. */
  3727. wdm = get_waiting_dir_move(sctx, ow_inode);
  3728. if (wdm)
  3729. wdm->orphanized = true;
  3730. /*
  3731. * Make sure we clear our orphanized inode's
  3732. * name from the name cache. This is because the
  3733. * inode ow_inode might be an ancestor of some
  3734. * other inode that will be orphanized as well
  3735. * later and has an inode number greater than
  3736. * sctx->send_progress. We need to prevent
  3737. * future name lookups from using the old name
  3738. * and get instead the orphan name.
  3739. */
  3740. nce = name_cache_search(sctx, ow_inode, ow_gen);
  3741. if (nce)
  3742. btrfs_lru_cache_remove(&sctx->name_cache,
  3743. &nce->entry);
  3744. /*
  3745. * ow_inode might currently be an ancestor of
  3746. * cur_ino, therefore compute valid_path (the
  3747. * current path of cur_ino) again because it
  3748. * might contain the pre-orphanization name of
  3749. * ow_inode, which is no longer valid.
  3750. */
  3751. ret = is_ancestor(sctx->parent_root,
  3752. ow_inode, ow_gen,
  3753. sctx->cur_ino, NULL);
  3754. if (ret > 0) {
  3755. orphanized_ancestor = true;
  3756. fs_path_reset(valid_path);
  3757. fs_path_reset(&sctx->cur_inode_path);
  3758. ret = get_cur_path(sctx, sctx->cur_ino,
  3759. sctx->cur_inode_gen,
  3760. valid_path);
  3761. }
  3762. if (ret < 0)
  3763. goto out;
  3764. } else {
  3765. /*
  3766. * If we previously orphanized a directory that
  3767. * collided with a new reference that we already
  3768. * processed, recompute the current path because
  3769. * that directory may be part of the path.
  3770. */
  3771. if (orphanized_dir) {
  3772. ret = refresh_ref_path(sctx, cur);
  3773. if (ret < 0)
  3774. goto out;
  3775. }
  3776. ret = send_unlink(sctx, cur->full_path);
  3777. if (ret < 0)
  3778. goto out;
  3779. }
  3780. }
  3781. }
  3782. list_for_each_entry(cur, &sctx->new_refs, list) {
  3783. /*
  3784. * We may have refs where the parent directory does not exist
  3785. * yet. This happens if the parent directories inum is higher
  3786. * than the current inum. To handle this case, we create the
  3787. * parent directory out of order. But we need to check if this
  3788. * did already happen before due to other refs in the same dir.
  3789. */
  3790. ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
  3791. if (ret < 0)
  3792. goto out;
  3793. if (ret == inode_state_will_create) {
  3794. ret = 0;
  3795. /*
  3796. * First check if any of the current inodes refs did
  3797. * already create the dir.
  3798. */
  3799. list_for_each_entry(cur2, &sctx->new_refs, list) {
  3800. if (cur == cur2)
  3801. break;
  3802. if (cur2->dir == cur->dir) {
  3803. ret = 1;
  3804. break;
  3805. }
  3806. }
  3807. /*
  3808. * If that did not happen, check if a previous inode
  3809. * did already create the dir.
  3810. */
  3811. if (!ret)
  3812. ret = did_create_dir(sctx, cur->dir);
  3813. if (ret < 0)
  3814. goto out;
  3815. if (!ret) {
  3816. ret = send_create_inode(sctx, cur->dir);
  3817. if (ret < 0)
  3818. goto out;
  3819. cache_dir_created(sctx, cur->dir);
  3820. }
  3821. }
  3822. if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
  3823. ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
  3824. if (ret < 0)
  3825. goto out;
  3826. if (ret == 1) {
  3827. can_rename = false;
  3828. *pending_move = 1;
  3829. }
  3830. }
  3831. if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
  3832. can_rename) {
  3833. ret = wait_for_parent_move(sctx, cur, is_orphan);
  3834. if (ret < 0)
  3835. goto out;
  3836. if (ret == 1) {
  3837. can_rename = false;
  3838. *pending_move = 1;
  3839. }
  3840. }
  3841. /*
  3842. * link/move the ref to the new place. If we have an orphan
  3843. * inode, move it and update valid_path. If not, link or move
  3844. * it depending on the inode mode.
  3845. */
  3846. if (is_orphan && can_rename) {
  3847. ret = rename_current_inode(sctx, valid_path, cur->full_path);
  3848. if (ret < 0)
  3849. goto out;
  3850. is_orphan = false;
  3851. } else if (can_rename) {
  3852. if (S_ISDIR(sctx->cur_inode_mode)) {
  3853. /*
  3854. * Dirs can't be linked, so move it. For moved
  3855. * dirs, we always have one new and one deleted
  3856. * ref. The deleted ref is ignored later.
  3857. */
  3858. ret = rename_current_inode(sctx, valid_path,
  3859. cur->full_path);
  3860. if (ret < 0)
  3861. goto out;
  3862. } else {
  3863. /*
  3864. * We might have previously orphanized an inode
  3865. * which is an ancestor of our current inode,
  3866. * so our reference's full path, which was
  3867. * computed before any such orphanizations, must
  3868. * be updated.
  3869. */
  3870. if (orphanized_dir) {
  3871. ret = update_ref_path(sctx, cur);
  3872. if (ret < 0)
  3873. goto out;
  3874. }
  3875. ret = send_link(sctx, cur->full_path,
  3876. valid_path);
  3877. if (ret < 0)
  3878. goto out;
  3879. }
  3880. }
  3881. ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs);
  3882. if (ret < 0)
  3883. goto out;
  3884. }
  3885. if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
  3886. /*
  3887. * Check if we can already rmdir the directory. If not,
  3888. * orphanize it. For every dir item inside that gets deleted
  3889. * later, we do this check again and rmdir it then if possible.
  3890. * See the use of check_dirs for more details.
  3891. */
  3892. ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen);
  3893. if (ret < 0)
  3894. goto out;
  3895. if (ret) {
  3896. ret = send_rmdir(sctx, valid_path);
  3897. if (ret < 0)
  3898. goto out;
  3899. } else if (!is_orphan) {
  3900. ret = orphanize_inode(sctx, sctx->cur_ino,
  3901. sctx->cur_inode_gen, valid_path);
  3902. if (ret < 0)
  3903. goto out;
  3904. is_orphan = true;
  3905. }
  3906. list_for_each_entry(cur, &sctx->deleted_refs, list) {
  3907. ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs);
  3908. if (ret < 0)
  3909. goto out;
  3910. }
  3911. } else if (S_ISDIR(sctx->cur_inode_mode) &&
  3912. !list_empty(&sctx->deleted_refs)) {
  3913. /*
  3914. * We have a moved dir. Add the old parent to check_dirs
  3915. */
  3916. cur = list_first_entry(&sctx->deleted_refs, struct recorded_ref, list);
  3917. ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs);
  3918. if (ret < 0)
  3919. goto out;
  3920. } else if (!S_ISDIR(sctx->cur_inode_mode)) {
  3921. /*
  3922. * We have a non dir inode. Go through all deleted refs and
  3923. * unlink them if they were not already overwritten by other
  3924. * inodes.
  3925. */
  3926. list_for_each_entry(cur, &sctx->deleted_refs, list) {
  3927. ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
  3928. sctx->cur_ino, sctx->cur_inode_gen,
  3929. cur->name, cur->name_len);
  3930. if (ret < 0)
  3931. goto out;
  3932. if (!ret) {
  3933. /*
  3934. * If we orphanized any ancestor before, we need
  3935. * to recompute the full path for deleted names,
  3936. * since any such path was computed before we
  3937. * processed any references and orphanized any
  3938. * ancestor inode.
  3939. */
  3940. if (orphanized_ancestor) {
  3941. ret = update_ref_path(sctx, cur);
  3942. if (ret < 0)
  3943. goto out;
  3944. }
  3945. ret = send_unlink(sctx, cur->full_path);
  3946. if (ret < 0)
  3947. goto out;
  3948. if (is_current_inode_path(sctx, cur->full_path))
  3949. fs_path_reset(&sctx->cur_inode_path);
  3950. }
  3951. ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs);
  3952. if (ret < 0)
  3953. goto out;
  3954. }
  3955. /*
  3956. * If the inode is still orphan, unlink the orphan. This may
  3957. * happen when a previous inode did overwrite the first ref
  3958. * of this inode and no new refs were added for the current
  3959. * inode. Unlinking does not mean that the inode is deleted in
  3960. * all cases. There may still be links to this inode in other
  3961. * places.
  3962. */
  3963. if (is_orphan) {
  3964. ret = send_unlink(sctx, valid_path);
  3965. if (ret < 0)
  3966. goto out;
  3967. }
  3968. }
  3969. /*
  3970. * We did collect all parent dirs where cur_inode was once located. We
  3971. * now go through all these dirs and check if they are pending for
  3972. * deletion and if it's finally possible to perform the rmdir now.
  3973. * We also update the inode stats of the parent dirs here.
  3974. */
  3975. list_for_each_entry(cur, &check_dirs, list) {
  3976. /*
  3977. * In case we had refs into dirs that were not processed yet,
  3978. * we don't need to do the utime and rmdir logic for these dirs.
  3979. * The dir will be processed later.
  3980. */
  3981. if (cur->dir > sctx->cur_ino)
  3982. continue;
  3983. ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
  3984. if (ret < 0)
  3985. goto out;
  3986. if (ret == inode_state_did_create ||
  3987. ret == inode_state_no_change) {
  3988. ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen);
  3989. if (ret < 0)
  3990. goto out;
  3991. } else if (ret == inode_state_did_delete) {
  3992. ret = can_rmdir(sctx, cur->dir, cur->dir_gen);
  3993. if (ret < 0)
  3994. goto out;
  3995. if (ret) {
  3996. ret = get_cur_path(sctx, cur->dir,
  3997. cur->dir_gen, valid_path);
  3998. if (ret < 0)
  3999. goto out;
  4000. ret = send_rmdir(sctx, valid_path);
  4001. if (ret < 0)
  4002. goto out;
  4003. }
  4004. }
  4005. }
  4006. ret = 0;
  4007. out:
  4008. __free_recorded_refs(&check_dirs);
  4009. free_recorded_refs(sctx);
  4010. fs_path_free(valid_path);
  4011. return ret;
  4012. }
  4013. static int rbtree_ref_comp(const void *k, const struct rb_node *node)
  4014. {
  4015. const struct recorded_ref *data = k;
  4016. const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node);
  4017. if (data->dir > ref->dir)
  4018. return 1;
  4019. if (data->dir < ref->dir)
  4020. return -1;
  4021. if (data->dir_gen > ref->dir_gen)
  4022. return 1;
  4023. if (data->dir_gen < ref->dir_gen)
  4024. return -1;
  4025. if (data->name_len > ref->name_len)
  4026. return 1;
  4027. if (data->name_len < ref->name_len)
  4028. return -1;
  4029. return strcmp(data->name, ref->name);
  4030. }
  4031. static bool rbtree_ref_less(struct rb_node *node, const struct rb_node *parent)
  4032. {
  4033. const struct recorded_ref *entry = rb_entry(node, struct recorded_ref, node);
  4034. return rbtree_ref_comp(entry, parent) < 0;
  4035. }
  4036. static int record_ref_in_tree(struct rb_root *root, struct list_head *refs,
  4037. struct fs_path *name, u64 dir, u64 dir_gen,
  4038. struct send_ctx *sctx)
  4039. {
  4040. int ret = 0;
  4041. struct fs_path *path = NULL;
  4042. struct recorded_ref *ref = NULL;
  4043. path = fs_path_alloc();
  4044. if (!path) {
  4045. ret = -ENOMEM;
  4046. goto out;
  4047. }
  4048. ref = recorded_ref_alloc();
  4049. if (!ref) {
  4050. ret = -ENOMEM;
  4051. goto out;
  4052. }
  4053. ret = get_cur_path(sctx, dir, dir_gen, path);
  4054. if (ret < 0)
  4055. goto out;
  4056. ret = fs_path_add_path(path, name);
  4057. if (ret < 0)
  4058. goto out;
  4059. ref->dir = dir;
  4060. ref->dir_gen = dir_gen;
  4061. set_ref_path(ref, path);
  4062. list_add_tail(&ref->list, refs);
  4063. rb_add(&ref->node, root, rbtree_ref_less);
  4064. ref->root = root;
  4065. out:
  4066. if (ret) {
  4067. if (path && (!ref || !ref->full_path))
  4068. fs_path_free(path);
  4069. recorded_ref_free(ref);
  4070. }
  4071. return ret;
  4072. }
  4073. static int record_new_ref_if_needed(u64 dir, struct fs_path *name, void *ctx)
  4074. {
  4075. int ret;
  4076. struct send_ctx *sctx = ctx;
  4077. struct rb_node *node = NULL;
  4078. struct recorded_ref data;
  4079. struct recorded_ref *ref;
  4080. u64 dir_gen;
  4081. ret = get_inode_gen(sctx->send_root, dir, &dir_gen);
  4082. if (ret < 0)
  4083. return ret;
  4084. data.dir = dir;
  4085. data.dir_gen = dir_gen;
  4086. set_ref_path(&data, name);
  4087. node = rb_find(&data, &sctx->rbtree_deleted_refs, rbtree_ref_comp);
  4088. if (node) {
  4089. ref = rb_entry(node, struct recorded_ref, node);
  4090. recorded_ref_free(ref);
  4091. } else {
  4092. ret = record_ref_in_tree(&sctx->rbtree_new_refs,
  4093. &sctx->new_refs, name, dir, dir_gen,
  4094. sctx);
  4095. }
  4096. return ret;
  4097. }
  4098. static int record_deleted_ref_if_needed(u64 dir, struct fs_path *name, void *ctx)
  4099. {
  4100. int ret;
  4101. struct send_ctx *sctx = ctx;
  4102. struct rb_node *node = NULL;
  4103. struct recorded_ref data;
  4104. struct recorded_ref *ref;
  4105. u64 dir_gen;
  4106. ret = get_inode_gen(sctx->parent_root, dir, &dir_gen);
  4107. if (ret < 0)
  4108. return ret;
  4109. data.dir = dir;
  4110. data.dir_gen = dir_gen;
  4111. set_ref_path(&data, name);
  4112. node = rb_find(&data, &sctx->rbtree_new_refs, rbtree_ref_comp);
  4113. if (node) {
  4114. ref = rb_entry(node, struct recorded_ref, node);
  4115. recorded_ref_free(ref);
  4116. } else {
  4117. ret = record_ref_in_tree(&sctx->rbtree_deleted_refs,
  4118. &sctx->deleted_refs, name, dir,
  4119. dir_gen, sctx);
  4120. }
  4121. return ret;
  4122. }
  4123. static int record_new_ref(struct send_ctx *sctx)
  4124. {
  4125. int ret;
  4126. ret = iterate_inode_ref(sctx->send_root, sctx->left_path, sctx->cmp_key,
  4127. false, record_new_ref_if_needed, sctx);
  4128. if (ret < 0)
  4129. return ret;
  4130. return 0;
  4131. }
  4132. static int record_deleted_ref(struct send_ctx *sctx)
  4133. {
  4134. int ret;
  4135. ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, sctx->cmp_key,
  4136. false, record_deleted_ref_if_needed, sctx);
  4137. if (ret < 0)
  4138. return ret;
  4139. return 0;
  4140. }
  4141. static int record_changed_ref(struct send_ctx *sctx)
  4142. {
  4143. int ret;
  4144. ret = iterate_inode_ref(sctx->send_root, sctx->left_path, sctx->cmp_key,
  4145. false, record_new_ref_if_needed, sctx);
  4146. if (ret < 0)
  4147. return ret;
  4148. ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, sctx->cmp_key,
  4149. false, record_deleted_ref_if_needed, sctx);
  4150. if (ret < 0)
  4151. return ret;
  4152. return 0;
  4153. }
  4154. /*
  4155. * Record and process all refs at once. Needed when an inode changes the
  4156. * generation number, which means that it was deleted and recreated.
  4157. */
  4158. static int process_all_refs(struct send_ctx *sctx,
  4159. enum btrfs_compare_tree_result cmd)
  4160. {
  4161. int ret = 0;
  4162. int iter_ret = 0;
  4163. struct btrfs_root *root;
  4164. BTRFS_PATH_AUTO_FREE(path);
  4165. struct btrfs_key key;
  4166. struct btrfs_key found_key;
  4167. iterate_inode_ref_t cb;
  4168. int pending_move = 0;
  4169. path = alloc_path_for_send();
  4170. if (!path)
  4171. return -ENOMEM;
  4172. if (cmd == BTRFS_COMPARE_TREE_NEW) {
  4173. root = sctx->send_root;
  4174. cb = record_new_ref_if_needed;
  4175. } else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
  4176. root = sctx->parent_root;
  4177. cb = record_deleted_ref_if_needed;
  4178. } else {
  4179. btrfs_err(sctx->send_root->fs_info,
  4180. "Wrong command %d in process_all_refs", cmd);
  4181. return -EINVAL;
  4182. }
  4183. key.objectid = sctx->cmp_key->objectid;
  4184. key.type = BTRFS_INODE_REF_KEY;
  4185. key.offset = 0;
  4186. btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
  4187. if (found_key.objectid != key.objectid ||
  4188. (found_key.type != BTRFS_INODE_REF_KEY &&
  4189. found_key.type != BTRFS_INODE_EXTREF_KEY))
  4190. break;
  4191. ret = iterate_inode_ref(root, path, &found_key, false, cb, sctx);
  4192. if (ret < 0)
  4193. return ret;
  4194. }
  4195. /* Catch error found during iteration */
  4196. if (iter_ret < 0)
  4197. return iter_ret;
  4198. btrfs_release_path(path);
  4199. /*
  4200. * We don't actually care about pending_move as we are simply
  4201. * re-creating this inode and will be rename'ing it into place once we
  4202. * rename the parent directory.
  4203. */
  4204. return process_recorded_refs(sctx, &pending_move);
  4205. }
  4206. static int send_set_xattr(struct send_ctx *sctx,
  4207. const char *name, int name_len,
  4208. const char *data, int data_len)
  4209. {
  4210. struct fs_path *path;
  4211. int ret;
  4212. path = get_cur_inode_path(sctx);
  4213. if (IS_ERR(path))
  4214. return PTR_ERR(path);
  4215. ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
  4216. if (ret < 0)
  4217. return ret;
  4218. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
  4219. TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
  4220. TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
  4221. ret = send_cmd(sctx);
  4222. tlv_put_failure:
  4223. return ret;
  4224. }
  4225. static int send_remove_xattr(struct send_ctx *sctx,
  4226. struct fs_path *path,
  4227. const char *name, int name_len)
  4228. {
  4229. int ret;
  4230. ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
  4231. if (ret < 0)
  4232. return ret;
  4233. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
  4234. TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
  4235. ret = send_cmd(sctx);
  4236. tlv_put_failure:
  4237. return ret;
  4238. }
  4239. static int __process_new_xattr(int num, struct btrfs_key *di_key,
  4240. const char *name, int name_len, const char *data,
  4241. int data_len, void *ctx)
  4242. {
  4243. struct send_ctx *sctx = ctx;
  4244. struct posix_acl_xattr_header dummy_acl;
  4245. /* Capabilities are emitted by finish_inode_if_needed */
  4246. if (!strncmp(name, XATTR_NAME_CAPS, name_len))
  4247. return 0;
  4248. /*
  4249. * This hack is needed because empty acls are stored as zero byte
  4250. * data in xattrs. Problem with that is, that receiving these zero byte
  4251. * acls will fail later. To fix this, we send a dummy acl list that
  4252. * only contains the version number and no entries.
  4253. */
  4254. if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
  4255. !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
  4256. if (data_len == 0) {
  4257. dummy_acl.a_version =
  4258. cpu_to_le32(POSIX_ACL_XATTR_VERSION);
  4259. data = (char *)&dummy_acl;
  4260. data_len = sizeof(dummy_acl);
  4261. }
  4262. }
  4263. return send_set_xattr(sctx, name, name_len, data, data_len);
  4264. }
  4265. static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
  4266. const char *name, int name_len,
  4267. const char *data, int data_len, void *ctx)
  4268. {
  4269. struct send_ctx *sctx = ctx;
  4270. struct fs_path *p;
  4271. p = get_cur_inode_path(sctx);
  4272. if (IS_ERR(p))
  4273. return PTR_ERR(p);
  4274. return send_remove_xattr(sctx, p, name, name_len);
  4275. }
  4276. static int process_new_xattr(struct send_ctx *sctx)
  4277. {
  4278. return iterate_dir_item(sctx->send_root, sctx->left_path,
  4279. __process_new_xattr, sctx);
  4280. }
  4281. static int process_deleted_xattr(struct send_ctx *sctx)
  4282. {
  4283. return iterate_dir_item(sctx->parent_root, sctx->right_path,
  4284. __process_deleted_xattr, sctx);
  4285. }
  4286. struct find_xattr_ctx {
  4287. const char *name;
  4288. int name_len;
  4289. int found_idx;
  4290. char *found_data;
  4291. int found_data_len;
  4292. bool copy_data;
  4293. };
  4294. static int __find_xattr(int num, struct btrfs_key *di_key, const char *name,
  4295. int name_len, const char *data, int data_len, void *vctx)
  4296. {
  4297. struct find_xattr_ctx *ctx = vctx;
  4298. if (name_len == ctx->name_len &&
  4299. strncmp(name, ctx->name, name_len) == 0) {
  4300. ctx->found_idx = num;
  4301. ctx->found_data_len = data_len;
  4302. if (ctx->copy_data) {
  4303. ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
  4304. if (!ctx->found_data)
  4305. return -ENOMEM;
  4306. }
  4307. return 1;
  4308. }
  4309. return 0;
  4310. }
  4311. static int find_xattr(struct btrfs_root *root,
  4312. struct btrfs_path *path,
  4313. struct btrfs_key *key,
  4314. const char *name, int name_len,
  4315. char **data, int *data_len)
  4316. {
  4317. int ret;
  4318. struct find_xattr_ctx ctx;
  4319. ctx.name = name;
  4320. ctx.name_len = name_len;
  4321. ctx.found_idx = -1;
  4322. ctx.found_data = NULL;
  4323. ctx.found_data_len = 0;
  4324. ctx.copy_data = (data != NULL);
  4325. ret = iterate_dir_item(root, path, __find_xattr, &ctx);
  4326. if (ret < 0)
  4327. return ret;
  4328. if (ctx.found_idx == -1)
  4329. return -ENOENT;
  4330. if (data) {
  4331. *data = ctx.found_data;
  4332. *data_len = ctx.found_data_len;
  4333. } else {
  4334. ASSERT(ctx.found_data == NULL);
  4335. }
  4336. return ctx.found_idx;
  4337. }
  4338. static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
  4339. const char *name, int name_len,
  4340. const char *data, int data_len,
  4341. void *ctx)
  4342. {
  4343. int ret;
  4344. struct send_ctx *sctx = ctx;
  4345. char AUTO_KFREE(found_data);
  4346. int found_data_len = 0;
  4347. ret = find_xattr(sctx->parent_root, sctx->right_path,
  4348. sctx->cmp_key, name, name_len, &found_data,
  4349. &found_data_len);
  4350. if (ret == -ENOENT) {
  4351. ret = __process_new_xattr(num, di_key, name, name_len, data,
  4352. data_len, ctx);
  4353. } else if (ret >= 0) {
  4354. if (data_len != found_data_len ||
  4355. memcmp(data, found_data, data_len)) {
  4356. ret = __process_new_xattr(num, di_key, name, name_len,
  4357. data, data_len, ctx);
  4358. } else {
  4359. ret = 0;
  4360. }
  4361. }
  4362. return ret;
  4363. }
  4364. static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
  4365. const char *name, int name_len,
  4366. const char *data, int data_len,
  4367. void *ctx)
  4368. {
  4369. int ret;
  4370. struct send_ctx *sctx = ctx;
  4371. ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
  4372. name, name_len, NULL, NULL);
  4373. if (ret == -ENOENT)
  4374. ret = __process_deleted_xattr(num, di_key, name, name_len, data,
  4375. data_len, ctx);
  4376. else if (ret >= 0)
  4377. ret = 0;
  4378. return ret;
  4379. }
  4380. static int process_changed_xattr(struct send_ctx *sctx)
  4381. {
  4382. int ret;
  4383. ret = iterate_dir_item(sctx->send_root, sctx->left_path,
  4384. __process_changed_new_xattr, sctx);
  4385. if (ret < 0)
  4386. return ret;
  4387. return iterate_dir_item(sctx->parent_root, sctx->right_path,
  4388. __process_changed_deleted_xattr, sctx);
  4389. }
  4390. static int process_all_new_xattrs(struct send_ctx *sctx)
  4391. {
  4392. int ret = 0;
  4393. int iter_ret = 0;
  4394. struct btrfs_root *root;
  4395. BTRFS_PATH_AUTO_FREE(path);
  4396. struct btrfs_key key;
  4397. struct btrfs_key found_key;
  4398. path = alloc_path_for_send();
  4399. if (!path)
  4400. return -ENOMEM;
  4401. root = sctx->send_root;
  4402. key.objectid = sctx->cmp_key->objectid;
  4403. key.type = BTRFS_XATTR_ITEM_KEY;
  4404. key.offset = 0;
  4405. btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
  4406. if (found_key.objectid != key.objectid ||
  4407. found_key.type != key.type) {
  4408. ret = 0;
  4409. break;
  4410. }
  4411. ret = iterate_dir_item(root, path, __process_new_xattr, sctx);
  4412. if (ret < 0)
  4413. break;
  4414. }
  4415. /* Catch error found during iteration */
  4416. if (iter_ret < 0)
  4417. ret = iter_ret;
  4418. return ret;
  4419. }
  4420. static int send_verity(struct send_ctx *sctx, struct fs_path *path,
  4421. struct fsverity_descriptor *desc)
  4422. {
  4423. int ret;
  4424. ret = begin_cmd(sctx, BTRFS_SEND_C_ENABLE_VERITY);
  4425. if (ret < 0)
  4426. return ret;
  4427. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
  4428. TLV_PUT_U8(sctx, BTRFS_SEND_A_VERITY_ALGORITHM,
  4429. le8_to_cpu(desc->hash_algorithm));
  4430. TLV_PUT_U32(sctx, BTRFS_SEND_A_VERITY_BLOCK_SIZE,
  4431. 1U << le8_to_cpu(desc->log_blocksize));
  4432. TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SALT_DATA, desc->salt,
  4433. le8_to_cpu(desc->salt_size));
  4434. TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SIG_DATA, desc->signature,
  4435. le32_to_cpu(desc->sig_size));
  4436. ret = send_cmd(sctx);
  4437. tlv_put_failure:
  4438. return ret;
  4439. }
  4440. static int process_verity(struct send_ctx *sctx)
  4441. {
  4442. int ret = 0;
  4443. struct btrfs_inode *inode;
  4444. struct fs_path *p;
  4445. inode = btrfs_iget(sctx->cur_ino, sctx->send_root);
  4446. if (IS_ERR(inode))
  4447. return PTR_ERR(inode);
  4448. ret = btrfs_get_verity_descriptor(&inode->vfs_inode, NULL, 0);
  4449. if (ret < 0)
  4450. goto iput;
  4451. if (unlikely(ret > FS_VERITY_MAX_DESCRIPTOR_SIZE)) {
  4452. ret = -EMSGSIZE;
  4453. goto iput;
  4454. }
  4455. if (!sctx->verity_descriptor) {
  4456. sctx->verity_descriptor = kvmalloc(FS_VERITY_MAX_DESCRIPTOR_SIZE,
  4457. GFP_KERNEL);
  4458. if (!sctx->verity_descriptor) {
  4459. ret = -ENOMEM;
  4460. goto iput;
  4461. }
  4462. }
  4463. ret = btrfs_get_verity_descriptor(&inode->vfs_inode, sctx->verity_descriptor, ret);
  4464. if (ret < 0)
  4465. goto iput;
  4466. p = get_cur_inode_path(sctx);
  4467. if (IS_ERR(p)) {
  4468. ret = PTR_ERR(p);
  4469. goto iput;
  4470. }
  4471. ret = send_verity(sctx, p, sctx->verity_descriptor);
  4472. iput:
  4473. iput(&inode->vfs_inode);
  4474. return ret;
  4475. }
  4476. static inline u64 max_send_read_size(const struct send_ctx *sctx)
  4477. {
  4478. return sctx->send_max_size - SZ_16K;
  4479. }
  4480. static int put_data_header(struct send_ctx *sctx, u32 len)
  4481. {
  4482. if (WARN_ON_ONCE(sctx->put_data))
  4483. return -EINVAL;
  4484. sctx->put_data = true;
  4485. if (sctx->proto >= 2) {
  4486. /*
  4487. * Since v2, the data attribute header doesn't include a length,
  4488. * it is implicitly to the end of the command.
  4489. */
  4490. if (unlikely(sctx->send_max_size - sctx->send_size < sizeof(__le16) + len))
  4491. return -EOVERFLOW;
  4492. put_unaligned_le16(BTRFS_SEND_A_DATA, sctx->send_buf + sctx->send_size);
  4493. sctx->send_size += sizeof(__le16);
  4494. } else {
  4495. struct btrfs_tlv_header *hdr;
  4496. if (unlikely(sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len))
  4497. return -EOVERFLOW;
  4498. hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
  4499. put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
  4500. put_unaligned_le16(len, &hdr->tlv_len);
  4501. sctx->send_size += sizeof(*hdr);
  4502. }
  4503. return 0;
  4504. }
  4505. static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
  4506. {
  4507. struct btrfs_root *root = sctx->send_root;
  4508. struct btrfs_fs_info *fs_info = root->fs_info;
  4509. u64 cur = offset;
  4510. const u64 end = offset + len;
  4511. const pgoff_t last_index = ((end - 1) >> PAGE_SHIFT);
  4512. struct address_space *mapping = sctx->cur_inode->i_mapping;
  4513. int ret;
  4514. ret = put_data_header(sctx, len);
  4515. if (ret)
  4516. return ret;
  4517. while (cur < end) {
  4518. pgoff_t index = (cur >> PAGE_SHIFT);
  4519. unsigned int cur_len;
  4520. unsigned int pg_offset;
  4521. struct folio *folio;
  4522. folio = filemap_lock_folio(mapping, index);
  4523. if (IS_ERR(folio)) {
  4524. page_cache_sync_readahead(mapping,
  4525. &sctx->ra, NULL, index,
  4526. last_index + 1 - index);
  4527. folio = filemap_grab_folio(mapping, index);
  4528. if (IS_ERR(folio)) {
  4529. ret = PTR_ERR(folio);
  4530. break;
  4531. }
  4532. }
  4533. pg_offset = offset_in_folio(folio, cur);
  4534. cur_len = min_t(unsigned int, end - cur, folio_size(folio) - pg_offset);
  4535. if (folio_test_readahead(folio))
  4536. page_cache_async_readahead(mapping, &sctx->ra, NULL, folio,
  4537. last_index + 1 - index);
  4538. if (!folio_test_uptodate(folio)) {
  4539. btrfs_read_folio(NULL, folio);
  4540. folio_lock(folio);
  4541. if (unlikely(!folio_test_uptodate(folio))) {
  4542. folio_unlock(folio);
  4543. btrfs_err(fs_info,
  4544. "send: IO error at offset %llu for inode %llu root %llu",
  4545. folio_pos(folio), sctx->cur_ino,
  4546. btrfs_root_id(sctx->send_root));
  4547. folio_put(folio);
  4548. ret = -EIO;
  4549. break;
  4550. }
  4551. if (folio->mapping != mapping) {
  4552. folio_unlock(folio);
  4553. folio_put(folio);
  4554. continue;
  4555. }
  4556. }
  4557. memcpy_from_folio(sctx->send_buf + sctx->send_size, folio,
  4558. pg_offset, cur_len);
  4559. folio_unlock(folio);
  4560. folio_put(folio);
  4561. cur += cur_len;
  4562. sctx->send_size += cur_len;
  4563. }
  4564. return ret;
  4565. }
  4566. /*
  4567. * Read some bytes from the current inode/file and send a write command to
  4568. * user space.
  4569. */
  4570. static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
  4571. {
  4572. int ret = 0;
  4573. struct fs_path *p;
  4574. p = get_cur_inode_path(sctx);
  4575. if (IS_ERR(p))
  4576. return PTR_ERR(p);
  4577. ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
  4578. if (ret < 0)
  4579. return ret;
  4580. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
  4581. TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
  4582. ret = put_file_data(sctx, offset, len);
  4583. if (ret < 0)
  4584. return ret;
  4585. ret = send_cmd(sctx);
  4586. tlv_put_failure:
  4587. return ret;
  4588. }
  4589. /*
  4590. * Send a clone command to user space.
  4591. */
  4592. static int send_clone(struct send_ctx *sctx,
  4593. u64 offset, u32 len,
  4594. struct clone_root *clone_root)
  4595. {
  4596. int ret = 0;
  4597. struct fs_path *p;
  4598. struct fs_path *cur_inode_path;
  4599. u64 gen;
  4600. cur_inode_path = get_cur_inode_path(sctx);
  4601. if (IS_ERR(cur_inode_path))
  4602. return PTR_ERR(cur_inode_path);
  4603. p = fs_path_alloc();
  4604. if (!p)
  4605. return -ENOMEM;
  4606. ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
  4607. if (ret < 0)
  4608. goto out;
  4609. TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
  4610. TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
  4611. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, cur_inode_path);
  4612. if (clone_root->root == sctx->send_root) {
  4613. ret = get_inode_gen(sctx->send_root, clone_root->ino, &gen);
  4614. if (ret < 0)
  4615. goto out;
  4616. ret = get_cur_path(sctx, clone_root->ino, gen, p);
  4617. } else {
  4618. ret = get_inode_path(clone_root->root, clone_root->ino, p);
  4619. }
  4620. if (ret < 0)
  4621. goto out;
  4622. /*
  4623. * If the parent we're using has a received_uuid set then use that as
  4624. * our clone source as that is what we will look for when doing a
  4625. * receive.
  4626. *
  4627. * This covers the case that we create a snapshot off of a received
  4628. * subvolume and then use that as the parent and try to receive on a
  4629. * different host.
  4630. */
  4631. if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
  4632. TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
  4633. clone_root->root->root_item.received_uuid);
  4634. else
  4635. TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
  4636. clone_root->root->root_item.uuid);
  4637. TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
  4638. btrfs_root_ctransid(&clone_root->root->root_item));
  4639. TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
  4640. TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
  4641. clone_root->offset);
  4642. ret = send_cmd(sctx);
  4643. tlv_put_failure:
  4644. out:
  4645. fs_path_free(p);
  4646. return ret;
  4647. }
  4648. /*
  4649. * Send an update extent command to user space.
  4650. */
  4651. static int send_update_extent(struct send_ctx *sctx,
  4652. u64 offset, u32 len)
  4653. {
  4654. int ret = 0;
  4655. struct fs_path *p;
  4656. p = get_cur_inode_path(sctx);
  4657. if (IS_ERR(p))
  4658. return PTR_ERR(p);
  4659. ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
  4660. if (ret < 0)
  4661. return ret;
  4662. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
  4663. TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
  4664. TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
  4665. ret = send_cmd(sctx);
  4666. tlv_put_failure:
  4667. return ret;
  4668. }
  4669. static int send_fallocate(struct send_ctx *sctx, u32 mode, u64 offset, u64 len)
  4670. {
  4671. struct fs_path *path;
  4672. int ret;
  4673. path = get_cur_inode_path(sctx);
  4674. if (IS_ERR(path))
  4675. return PTR_ERR(path);
  4676. ret = begin_cmd(sctx, BTRFS_SEND_C_FALLOCATE);
  4677. if (ret < 0)
  4678. return ret;
  4679. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
  4680. TLV_PUT_U32(sctx, BTRFS_SEND_A_FALLOCATE_MODE, mode);
  4681. TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
  4682. TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
  4683. ret = send_cmd(sctx);
  4684. tlv_put_failure:
  4685. return ret;
  4686. }
  4687. static int send_hole(struct send_ctx *sctx, u64 end)
  4688. {
  4689. struct fs_path *p = NULL;
  4690. u64 read_size = max_send_read_size(sctx);
  4691. u64 offset = sctx->cur_inode_last_extent;
  4692. int ret = 0;
  4693. /*
  4694. * Starting with send stream v2 we have fallocate and can use it to
  4695. * punch holes instead of sending writes full of zeroes.
  4696. */
  4697. if (proto_cmd_ok(sctx, BTRFS_SEND_C_FALLOCATE))
  4698. return send_fallocate(sctx, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
  4699. offset, end - offset);
  4700. /*
  4701. * A hole that starts at EOF or beyond it. Since we do not yet support
  4702. * fallocate (for extent preallocation and hole punching), sending a
  4703. * write of zeroes starting at EOF or beyond would later require issuing
  4704. * a truncate operation which would undo the write and achieve nothing.
  4705. */
  4706. if (offset >= sctx->cur_inode_size)
  4707. return 0;
  4708. /*
  4709. * Don't go beyond the inode's i_size due to prealloc extents that start
  4710. * after the i_size.
  4711. */
  4712. end = min_t(u64, end, sctx->cur_inode_size);
  4713. if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
  4714. return send_update_extent(sctx, offset, end - offset);
  4715. p = get_cur_inode_path(sctx);
  4716. if (IS_ERR(p))
  4717. return PTR_ERR(p);
  4718. while (offset < end) {
  4719. u64 len = min(end - offset, read_size);
  4720. ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
  4721. if (ret < 0)
  4722. break;
  4723. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
  4724. TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
  4725. ret = put_data_header(sctx, len);
  4726. if (ret < 0)
  4727. break;
  4728. memset(sctx->send_buf + sctx->send_size, 0, len);
  4729. sctx->send_size += len;
  4730. ret = send_cmd(sctx);
  4731. if (ret < 0)
  4732. break;
  4733. offset += len;
  4734. }
  4735. sctx->cur_inode_next_write_offset = offset;
  4736. tlv_put_failure:
  4737. return ret;
  4738. }
  4739. static int send_encoded_inline_extent(struct send_ctx *sctx,
  4740. struct btrfs_path *path, u64 offset,
  4741. u64 len)
  4742. {
  4743. struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
  4744. struct fs_path *fspath;
  4745. struct extent_buffer *leaf = path->nodes[0];
  4746. struct btrfs_key key;
  4747. struct btrfs_file_extent_item *ei;
  4748. u64 ram_bytes;
  4749. size_t inline_size;
  4750. int ret;
  4751. fspath = get_cur_inode_path(sctx);
  4752. if (IS_ERR(fspath))
  4753. return PTR_ERR(fspath);
  4754. ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE);
  4755. if (ret < 0)
  4756. return ret;
  4757. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  4758. ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
  4759. ram_bytes = btrfs_file_extent_ram_bytes(leaf, ei);
  4760. inline_size = btrfs_file_extent_inline_item_len(leaf, path->slots[0]);
  4761. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, fspath);
  4762. TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
  4763. TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_FILE_LEN,
  4764. min(key.offset + ram_bytes - offset, len));
  4765. TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_LEN, ram_bytes);
  4766. TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_OFFSET, offset - key.offset);
  4767. ret = btrfs_encoded_io_compression_from_extent(fs_info,
  4768. btrfs_file_extent_compression(leaf, ei));
  4769. if (ret < 0)
  4770. return ret;
  4771. TLV_PUT_U32(sctx, BTRFS_SEND_A_COMPRESSION, ret);
  4772. ret = put_data_header(sctx, inline_size);
  4773. if (ret < 0)
  4774. return ret;
  4775. read_extent_buffer(leaf, sctx->send_buf + sctx->send_size,
  4776. btrfs_file_extent_inline_start(ei), inline_size);
  4777. sctx->send_size += inline_size;
  4778. ret = send_cmd(sctx);
  4779. tlv_put_failure:
  4780. return ret;
  4781. }
  4782. static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path,
  4783. u64 offset, u64 len)
  4784. {
  4785. struct btrfs_root *root = sctx->send_root;
  4786. struct btrfs_fs_info *fs_info = root->fs_info;
  4787. struct btrfs_inode *inode;
  4788. struct fs_path *fspath;
  4789. struct extent_buffer *leaf = path->nodes[0];
  4790. struct btrfs_key key;
  4791. struct btrfs_file_extent_item *ei;
  4792. u64 disk_bytenr, disk_num_bytes;
  4793. u32 data_offset;
  4794. struct btrfs_cmd_header *hdr;
  4795. u32 crc;
  4796. int ret;
  4797. inode = btrfs_iget(sctx->cur_ino, root);
  4798. if (IS_ERR(inode))
  4799. return PTR_ERR(inode);
  4800. fspath = get_cur_inode_path(sctx);
  4801. if (IS_ERR(fspath)) {
  4802. ret = PTR_ERR(fspath);
  4803. goto out;
  4804. }
  4805. ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE);
  4806. if (ret < 0)
  4807. goto out;
  4808. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  4809. ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
  4810. disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
  4811. disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, ei);
  4812. TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, fspath);
  4813. TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
  4814. TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_FILE_LEN,
  4815. min(key.offset + btrfs_file_extent_num_bytes(leaf, ei) - offset,
  4816. len));
  4817. TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_LEN,
  4818. btrfs_file_extent_ram_bytes(leaf, ei));
  4819. TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_OFFSET,
  4820. offset - key.offset + btrfs_file_extent_offset(leaf, ei));
  4821. ret = btrfs_encoded_io_compression_from_extent(fs_info,
  4822. btrfs_file_extent_compression(leaf, ei));
  4823. if (ret < 0)
  4824. goto out;
  4825. TLV_PUT_U32(sctx, BTRFS_SEND_A_COMPRESSION, ret);
  4826. TLV_PUT_U32(sctx, BTRFS_SEND_A_ENCRYPTION, 0);
  4827. ret = put_data_header(sctx, disk_num_bytes);
  4828. if (ret < 0)
  4829. goto out;
  4830. /*
  4831. * We want to do I/O directly into the send buffer, so get the next page
  4832. * boundary in the send buffer. This means that there may be a gap
  4833. * between the beginning of the command and the file data.
  4834. */
  4835. data_offset = PAGE_ALIGN(sctx->send_size);
  4836. if (unlikely(data_offset > sctx->send_max_size ||
  4837. sctx->send_max_size - data_offset < disk_num_bytes)) {
  4838. ret = -EOVERFLOW;
  4839. goto out;
  4840. }
  4841. /*
  4842. * Note that send_buf is a mapping of send_buf_pages, so this is really
  4843. * reading into send_buf.
  4844. */
  4845. ret = btrfs_encoded_read_regular_fill_pages(inode,
  4846. disk_bytenr, disk_num_bytes,
  4847. sctx->send_buf_pages +
  4848. (data_offset >> PAGE_SHIFT),
  4849. NULL);
  4850. if (ret)
  4851. goto out;
  4852. hdr = (struct btrfs_cmd_header *)sctx->send_buf;
  4853. hdr->len = cpu_to_le32(sctx->send_size + disk_num_bytes - sizeof(*hdr));
  4854. hdr->crc = 0;
  4855. crc = crc32c(0, sctx->send_buf, sctx->send_size);
  4856. crc = crc32c(crc, sctx->send_buf + data_offset, disk_num_bytes);
  4857. hdr->crc = cpu_to_le32(crc);
  4858. ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
  4859. &sctx->send_off);
  4860. if (!ret) {
  4861. ret = write_buf(sctx->send_filp, sctx->send_buf + data_offset,
  4862. disk_num_bytes, &sctx->send_off);
  4863. }
  4864. sctx->send_size = 0;
  4865. sctx->put_data = false;
  4866. tlv_put_failure:
  4867. out:
  4868. iput(&inode->vfs_inode);
  4869. return ret;
  4870. }
  4871. static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
  4872. const u64 offset, const u64 len)
  4873. {
  4874. const u64 end = offset + len;
  4875. struct extent_buffer *leaf = path->nodes[0];
  4876. struct btrfs_file_extent_item *ei;
  4877. u64 read_size = max_send_read_size(sctx);
  4878. u64 sent = 0;
  4879. if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
  4880. return send_update_extent(sctx, offset, len);
  4881. ei = btrfs_item_ptr(leaf, path->slots[0],
  4882. struct btrfs_file_extent_item);
  4883. if ((sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) &&
  4884. btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) {
  4885. bool is_inline = (btrfs_file_extent_type(leaf, ei) ==
  4886. BTRFS_FILE_EXTENT_INLINE);
  4887. /*
  4888. * Send the compressed extent unless the compressed data is
  4889. * larger than the decompressed data. This can happen if we're
  4890. * not sending the entire extent, either because it has been
  4891. * partially overwritten/truncated or because this is a part of
  4892. * the extent that we couldn't clone in clone_range().
  4893. */
  4894. if (is_inline &&
  4895. btrfs_file_extent_inline_item_len(leaf,
  4896. path->slots[0]) <= len) {
  4897. return send_encoded_inline_extent(sctx, path, offset,
  4898. len);
  4899. } else if (!is_inline &&
  4900. btrfs_file_extent_disk_num_bytes(leaf, ei) <= len) {
  4901. return send_encoded_extent(sctx, path, offset, len);
  4902. }
  4903. }
  4904. if (sctx->cur_inode == NULL) {
  4905. struct btrfs_inode *btrfs_inode;
  4906. struct btrfs_root *root = sctx->send_root;
  4907. btrfs_inode = btrfs_iget(sctx->cur_ino, root);
  4908. if (IS_ERR(btrfs_inode))
  4909. return PTR_ERR(btrfs_inode);
  4910. sctx->cur_inode = &btrfs_inode->vfs_inode;
  4911. memset(&sctx->ra, 0, sizeof(struct file_ra_state));
  4912. file_ra_state_init(&sctx->ra, sctx->cur_inode->i_mapping);
  4913. /*
  4914. * It's very likely there are no pages from this inode in the page
  4915. * cache, so after reading extents and sending their data, we clean
  4916. * the page cache to avoid trashing the page cache (adding pressure
  4917. * to the page cache and forcing eviction of other data more useful
  4918. * for applications).
  4919. *
  4920. * We decide if we should clean the page cache simply by checking
  4921. * if the inode's mapping nrpages is 0 when we first open it, and
  4922. * not by using something like filemap_range_has_page() before
  4923. * reading an extent because when we ask the readahead code to
  4924. * read a given file range, it may (and almost always does) read
  4925. * pages from beyond that range (see the documentation for
  4926. * page_cache_sync_readahead()), so it would not be reliable,
  4927. * because after reading the first extent future calls to
  4928. * filemap_range_has_page() would return true because the readahead
  4929. * on the previous extent resulted in reading pages of the current
  4930. * extent as well.
  4931. */
  4932. sctx->clean_page_cache = (sctx->cur_inode->i_mapping->nrpages == 0);
  4933. sctx->page_cache_clear_start = round_down(offset, PAGE_SIZE);
  4934. }
  4935. while (sent < len) {
  4936. u64 size = min(len - sent, read_size);
  4937. int ret;
  4938. ret = send_write(sctx, offset + sent, size);
  4939. if (ret < 0)
  4940. return ret;
  4941. sent += size;
  4942. }
  4943. if (sctx->clean_page_cache && PAGE_ALIGNED(end)) {
  4944. /*
  4945. * Always operate only on ranges that are a multiple of the page
  4946. * size. This is not only to prevent zeroing parts of a page in
  4947. * the case of subpage sector size, but also to guarantee we evict
  4948. * pages, as passing a range that is smaller than page size does
  4949. * not evict the respective page (only zeroes part of its content).
  4950. *
  4951. * Always start from the end offset of the last range cleared.
  4952. * This is because the readahead code may (and very often does)
  4953. * reads pages beyond the range we request for readahead. So if
  4954. * we have an extent layout like this:
  4955. *
  4956. * [ extent A ] [ extent B ] [ extent C ]
  4957. *
  4958. * When we ask page_cache_sync_readahead() to read extent A, it
  4959. * may also trigger reads for pages of extent B. If we are doing
  4960. * an incremental send and extent B has not changed between the
  4961. * parent and send snapshots, some or all of its pages may end
  4962. * up being read and placed in the page cache. So when truncating
  4963. * the page cache we always start from the end offset of the
  4964. * previously processed extent up to the end of the current
  4965. * extent.
  4966. */
  4967. truncate_inode_pages_range(&sctx->cur_inode->i_data,
  4968. sctx->page_cache_clear_start,
  4969. end - 1);
  4970. sctx->page_cache_clear_start = end;
  4971. }
  4972. return 0;
  4973. }
  4974. /*
  4975. * Search for a capability xattr related to sctx->cur_ino. If the capability is
  4976. * found, call send_set_xattr function to emit it.
  4977. *
  4978. * Return 0 if there isn't a capability, or when the capability was emitted
  4979. * successfully, or < 0 if an error occurred.
  4980. */
  4981. static int send_capabilities(struct send_ctx *sctx)
  4982. {
  4983. BTRFS_PATH_AUTO_FREE(path);
  4984. struct btrfs_dir_item *di;
  4985. struct extent_buffer *leaf;
  4986. unsigned long data_ptr;
  4987. char AUTO_KFREE(buf);
  4988. int buf_len;
  4989. int ret = 0;
  4990. path = alloc_path_for_send();
  4991. if (!path)
  4992. return -ENOMEM;
  4993. di = btrfs_lookup_xattr(NULL, sctx->send_root, path, sctx->cur_ino,
  4994. XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0);
  4995. if (!di) {
  4996. /* There is no xattr for this inode */
  4997. return 0;
  4998. } else if (IS_ERR(di)) {
  4999. return PTR_ERR(di);
  5000. }
  5001. leaf = path->nodes[0];
  5002. buf_len = btrfs_dir_data_len(leaf, di);
  5003. buf = kmalloc(buf_len, GFP_KERNEL);
  5004. if (!buf)
  5005. return -ENOMEM;
  5006. data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
  5007. read_extent_buffer(leaf, buf, data_ptr, buf_len);
  5008. ret = send_set_xattr(sctx, XATTR_NAME_CAPS,
  5009. strlen(XATTR_NAME_CAPS), buf, buf_len);
  5010. return ret;
  5011. }
  5012. static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
  5013. struct clone_root *clone_root, const u64 disk_byte,
  5014. u64 data_offset, u64 offset, u64 len)
  5015. {
  5016. BTRFS_PATH_AUTO_FREE(path);
  5017. struct btrfs_key key;
  5018. int ret;
  5019. struct btrfs_inode_info info;
  5020. u64 clone_src_i_size = 0;
  5021. /*
  5022. * Prevent cloning from a zero offset with a length matching the sector
  5023. * size because in some scenarios this will make the receiver fail.
  5024. *
  5025. * For example, if in the source filesystem the extent at offset 0
  5026. * has a length of sectorsize and it was written using direct IO, then
  5027. * it can never be an inline extent (even if compression is enabled).
  5028. * Then this extent can be cloned in the original filesystem to a non
  5029. * zero file offset, but it may not be possible to clone in the
  5030. * destination filesystem because it can be inlined due to compression
  5031. * on the destination filesystem (as the receiver's write operations are
  5032. * always done using buffered IO). The same happens when the original
  5033. * filesystem does not have compression enabled but the destination
  5034. * filesystem has.
  5035. */
  5036. if (clone_root->offset == 0 &&
  5037. len == sctx->send_root->fs_info->sectorsize)
  5038. return send_extent_data(sctx, dst_path, offset, len);
  5039. path = alloc_path_for_send();
  5040. if (!path)
  5041. return -ENOMEM;
  5042. /*
  5043. * There are inodes that have extents that lie behind its i_size. Don't
  5044. * accept clones from these extents.
  5045. */
  5046. ret = get_inode_info(clone_root->root, clone_root->ino, &info);
  5047. btrfs_release_path(path);
  5048. if (ret < 0)
  5049. return ret;
  5050. clone_src_i_size = info.size;
  5051. /*
  5052. * We can't send a clone operation for the entire range if we find
  5053. * extent items in the respective range in the source file that
  5054. * refer to different extents or if we find holes.
  5055. * So check for that and do a mix of clone and regular write/copy
  5056. * operations if needed.
  5057. *
  5058. * Example:
  5059. *
  5060. * mkfs.btrfs -f /dev/sda
  5061. * mount /dev/sda /mnt
  5062. * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
  5063. * cp --reflink=always /mnt/foo /mnt/bar
  5064. * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
  5065. * btrfs subvolume snapshot -r /mnt /mnt/snap
  5066. *
  5067. * If when we send the snapshot and we are processing file bar (which
  5068. * has a higher inode number than foo) we blindly send a clone operation
  5069. * for the [0, 100K[ range from foo to bar, the receiver ends up getting
  5070. * a file bar that matches the content of file foo - iow, doesn't match
  5071. * the content from bar in the original filesystem.
  5072. */
  5073. key.objectid = clone_root->ino;
  5074. key.type = BTRFS_EXTENT_DATA_KEY;
  5075. key.offset = clone_root->offset;
  5076. ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
  5077. if (ret < 0)
  5078. return ret;
  5079. if (ret > 0 && path->slots[0] > 0) {
  5080. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
  5081. if (key.objectid == clone_root->ino &&
  5082. key.type == BTRFS_EXTENT_DATA_KEY)
  5083. path->slots[0]--;
  5084. }
  5085. while (true) {
  5086. struct extent_buffer *leaf = path->nodes[0];
  5087. int slot = path->slots[0];
  5088. struct btrfs_file_extent_item *ei;
  5089. u8 type;
  5090. u64 ext_len;
  5091. u64 clone_len;
  5092. u64 clone_data_offset;
  5093. bool crossed_src_i_size = false;
  5094. if (slot >= btrfs_header_nritems(leaf)) {
  5095. ret = btrfs_next_leaf(clone_root->root, path);
  5096. if (ret < 0)
  5097. return ret;
  5098. else if (ret > 0)
  5099. break;
  5100. continue;
  5101. }
  5102. btrfs_item_key_to_cpu(leaf, &key, slot);
  5103. /*
  5104. * We might have an implicit trailing hole (NO_HOLES feature
  5105. * enabled). We deal with it after leaving this loop.
  5106. */
  5107. if (key.objectid != clone_root->ino ||
  5108. key.type != BTRFS_EXTENT_DATA_KEY)
  5109. break;
  5110. ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
  5111. type = btrfs_file_extent_type(leaf, ei);
  5112. if (type == BTRFS_FILE_EXTENT_INLINE) {
  5113. ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
  5114. ext_len = PAGE_ALIGN(ext_len);
  5115. } else {
  5116. ext_len = btrfs_file_extent_num_bytes(leaf, ei);
  5117. }
  5118. if (key.offset + ext_len <= clone_root->offset)
  5119. goto next;
  5120. if (key.offset > clone_root->offset) {
  5121. /* Implicit hole, NO_HOLES feature enabled. */
  5122. u64 hole_len = key.offset - clone_root->offset;
  5123. if (hole_len > len)
  5124. hole_len = len;
  5125. ret = send_extent_data(sctx, dst_path, offset,
  5126. hole_len);
  5127. if (ret < 0)
  5128. return ret;
  5129. len -= hole_len;
  5130. if (len == 0)
  5131. break;
  5132. offset += hole_len;
  5133. clone_root->offset += hole_len;
  5134. data_offset += hole_len;
  5135. }
  5136. if (key.offset >= clone_root->offset + len)
  5137. break;
  5138. if (key.offset >= clone_src_i_size)
  5139. break;
  5140. if (key.offset + ext_len > clone_src_i_size) {
  5141. ext_len = clone_src_i_size - key.offset;
  5142. crossed_src_i_size = true;
  5143. }
  5144. clone_data_offset = btrfs_file_extent_offset(leaf, ei);
  5145. if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
  5146. clone_root->offset = key.offset;
  5147. if (clone_data_offset < data_offset &&
  5148. clone_data_offset + ext_len > data_offset) {
  5149. u64 extent_offset;
  5150. extent_offset = data_offset - clone_data_offset;
  5151. ext_len -= extent_offset;
  5152. clone_data_offset += extent_offset;
  5153. clone_root->offset += extent_offset;
  5154. }
  5155. }
  5156. clone_len = min_t(u64, ext_len, len);
  5157. if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
  5158. clone_data_offset == data_offset) {
  5159. const u64 src_end = clone_root->offset + clone_len;
  5160. const u64 sectorsize = SZ_64K;
  5161. /*
  5162. * We can't clone the last block, when its size is not
  5163. * sector size aligned, into the middle of a file. If we
  5164. * do so, the receiver will get a failure (-EINVAL) when
  5165. * trying to clone or will silently corrupt the data in
  5166. * the destination file if it's on a kernel without the
  5167. * fix introduced by commit ac765f83f1397646
  5168. * ("Btrfs: fix data corruption due to cloning of eof
  5169. * block).
  5170. *
  5171. * So issue a clone of the aligned down range plus a
  5172. * regular write for the eof block, if we hit that case.
  5173. *
  5174. * Also, we use the maximum possible sector size, 64K,
  5175. * because we don't know what's the sector size of the
  5176. * filesystem that receives the stream, so we have to
  5177. * assume the largest possible sector size.
  5178. */
  5179. if (src_end == clone_src_i_size &&
  5180. !IS_ALIGNED(src_end, sectorsize) &&
  5181. offset + clone_len < sctx->cur_inode_size) {
  5182. u64 slen;
  5183. slen = ALIGN_DOWN(src_end - clone_root->offset,
  5184. sectorsize);
  5185. if (slen > 0) {
  5186. ret = send_clone(sctx, offset, slen,
  5187. clone_root);
  5188. if (ret < 0)
  5189. return ret;
  5190. }
  5191. ret = send_extent_data(sctx, dst_path,
  5192. offset + slen,
  5193. clone_len - slen);
  5194. } else {
  5195. ret = send_clone(sctx, offset, clone_len,
  5196. clone_root);
  5197. }
  5198. } else if (crossed_src_i_size && clone_len < len) {
  5199. /*
  5200. * If we are at i_size of the clone source inode and we
  5201. * can not clone from it, terminate the loop. This is
  5202. * to avoid sending two write operations, one with a
  5203. * length matching clone_len and the final one after
  5204. * this loop with a length of len - clone_len.
  5205. *
  5206. * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED
  5207. * was passed to the send ioctl), this helps avoid
  5208. * sending an encoded write for an offset that is not
  5209. * sector size aligned, in case the i_size of the source
  5210. * inode is not sector size aligned. That will make the
  5211. * receiver fallback to decompression of the data and
  5212. * writing it using regular buffered IO, therefore while
  5213. * not incorrect, it's not optimal due decompression and
  5214. * possible re-compression at the receiver.
  5215. */
  5216. break;
  5217. } else {
  5218. ret = send_extent_data(sctx, dst_path, offset,
  5219. clone_len);
  5220. }
  5221. if (ret < 0)
  5222. return ret;
  5223. len -= clone_len;
  5224. if (len == 0)
  5225. break;
  5226. offset += clone_len;
  5227. clone_root->offset += clone_len;
  5228. /*
  5229. * If we are cloning from the file we are currently processing,
  5230. * and using the send root as the clone root, we must stop once
  5231. * the current clone offset reaches the current eof of the file
  5232. * at the receiver, otherwise we would issue an invalid clone
  5233. * operation (source range going beyond eof) and cause the
  5234. * receiver to fail. So if we reach the current eof, bail out
  5235. * and fallback to a regular write.
  5236. */
  5237. if (clone_root->root == sctx->send_root &&
  5238. clone_root->ino == sctx->cur_ino &&
  5239. clone_root->offset >= sctx->cur_inode_next_write_offset)
  5240. break;
  5241. data_offset += clone_len;
  5242. next:
  5243. path->slots[0]++;
  5244. }
  5245. if (len > 0)
  5246. ret = send_extent_data(sctx, dst_path, offset, len);
  5247. else
  5248. ret = 0;
  5249. return ret;
  5250. }
  5251. static int send_write_or_clone(struct send_ctx *sctx,
  5252. struct btrfs_path *path,
  5253. struct btrfs_key *key,
  5254. struct clone_root *clone_root)
  5255. {
  5256. int ret = 0;
  5257. u64 offset = key->offset;
  5258. u64 end;
  5259. u64 bs = sctx->send_root->fs_info->sectorsize;
  5260. struct btrfs_file_extent_item *ei;
  5261. u64 disk_byte;
  5262. u64 data_offset;
  5263. u64 num_bytes;
  5264. struct btrfs_inode_info info = { 0 };
  5265. end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size);
  5266. if (offset >= end)
  5267. return 0;
  5268. num_bytes = end - offset;
  5269. if (!clone_root)
  5270. goto write_data;
  5271. if (IS_ALIGNED(end, bs))
  5272. goto clone_data;
  5273. /*
  5274. * If the extent end is not aligned, we can clone if the extent ends at
  5275. * the i_size of the inode and the clone range ends at the i_size of the
  5276. * source inode, otherwise the clone operation fails with -EINVAL.
  5277. */
  5278. if (end != sctx->cur_inode_size)
  5279. goto write_data;
  5280. ret = get_inode_info(clone_root->root, clone_root->ino, &info);
  5281. if (ret < 0)
  5282. return ret;
  5283. if (clone_root->offset + num_bytes == info.size) {
  5284. /*
  5285. * The final size of our file matches the end offset, but it may
  5286. * be that its current size is larger, so we have to truncate it
  5287. * to any value between the start offset of the range and the
  5288. * final i_size, otherwise the clone operation is invalid
  5289. * because it's unaligned and it ends before the current EOF.
  5290. * We do this truncate to the final i_size when we finish
  5291. * processing the inode, but it's too late by then. And here we
  5292. * truncate to the start offset of the range because it's always
  5293. * sector size aligned while if it were the final i_size it
  5294. * would result in dirtying part of a page, filling part of a
  5295. * page with zeroes and then having the clone operation at the
  5296. * receiver trigger IO and wait for it due to the dirty page.
  5297. */
  5298. if (sctx->parent_root != NULL) {
  5299. ret = send_truncate(sctx, sctx->cur_ino,
  5300. sctx->cur_inode_gen, offset);
  5301. if (ret < 0)
  5302. return ret;
  5303. }
  5304. goto clone_data;
  5305. }
  5306. write_data:
  5307. ret = send_extent_data(sctx, path, offset, num_bytes);
  5308. sctx->cur_inode_next_write_offset = end;
  5309. return ret;
  5310. clone_data:
  5311. ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
  5312. struct btrfs_file_extent_item);
  5313. disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
  5314. data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
  5315. ret = clone_range(sctx, path, clone_root, disk_byte, data_offset, offset,
  5316. num_bytes);
  5317. sctx->cur_inode_next_write_offset = end;
  5318. return ret;
  5319. }
  5320. static int is_extent_unchanged(struct send_ctx *sctx,
  5321. struct btrfs_path *left_path,
  5322. struct btrfs_key *ekey)
  5323. {
  5324. int ret = 0;
  5325. struct btrfs_key key;
  5326. BTRFS_PATH_AUTO_FREE(path);
  5327. struct extent_buffer *eb;
  5328. int slot;
  5329. struct btrfs_key found_key;
  5330. struct btrfs_file_extent_item *ei;
  5331. u64 left_disknr;
  5332. u64 right_disknr;
  5333. u64 left_offset;
  5334. u64 right_offset;
  5335. u64 left_offset_fixed;
  5336. u64 left_len;
  5337. u64 right_len;
  5338. u64 left_gen;
  5339. u64 right_gen;
  5340. u8 left_type;
  5341. u8 right_type;
  5342. path = alloc_path_for_send();
  5343. if (!path)
  5344. return -ENOMEM;
  5345. eb = left_path->nodes[0];
  5346. slot = left_path->slots[0];
  5347. ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
  5348. left_type = btrfs_file_extent_type(eb, ei);
  5349. if (left_type != BTRFS_FILE_EXTENT_REG)
  5350. return 0;
  5351. left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
  5352. left_len = btrfs_file_extent_num_bytes(eb, ei);
  5353. left_offset = btrfs_file_extent_offset(eb, ei);
  5354. left_gen = btrfs_file_extent_generation(eb, ei);
  5355. /*
  5356. * Following comments will refer to these graphics. L is the left
  5357. * extents which we are checking at the moment. 1-8 are the right
  5358. * extents that we iterate.
  5359. *
  5360. * |-----L-----|
  5361. * |-1-|-2a-|-3-|-4-|-5-|-6-|
  5362. *
  5363. * |-----L-----|
  5364. * |--1--|-2b-|...(same as above)
  5365. *
  5366. * Alternative situation. Happens on files where extents got split.
  5367. * |-----L-----|
  5368. * |-----------7-----------|-6-|
  5369. *
  5370. * Alternative situation. Happens on files which got larger.
  5371. * |-----L-----|
  5372. * |-8-|
  5373. * Nothing follows after 8.
  5374. */
  5375. key.objectid = ekey->objectid;
  5376. key.type = BTRFS_EXTENT_DATA_KEY;
  5377. key.offset = ekey->offset;
  5378. ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
  5379. if (ret < 0)
  5380. return ret;
  5381. if (ret)
  5382. return 0;
  5383. /*
  5384. * Handle special case where the right side has no extents at all.
  5385. */
  5386. eb = path->nodes[0];
  5387. slot = path->slots[0];
  5388. btrfs_item_key_to_cpu(eb, &found_key, slot);
  5389. if (found_key.objectid != key.objectid ||
  5390. found_key.type != key.type)
  5391. /* If we're a hole then just pretend nothing changed */
  5392. return (left_disknr ? 0 : 1);
  5393. /*
  5394. * We're now on 2a, 2b or 7.
  5395. */
  5396. key = found_key;
  5397. while (key.offset < ekey->offset + left_len) {
  5398. ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
  5399. right_type = btrfs_file_extent_type(eb, ei);
  5400. if (right_type != BTRFS_FILE_EXTENT_REG &&
  5401. right_type != BTRFS_FILE_EXTENT_INLINE)
  5402. return 0;
  5403. if (right_type == BTRFS_FILE_EXTENT_INLINE) {
  5404. right_len = btrfs_file_extent_ram_bytes(eb, ei);
  5405. right_len = PAGE_ALIGN(right_len);
  5406. } else {
  5407. right_len = btrfs_file_extent_num_bytes(eb, ei);
  5408. }
  5409. /*
  5410. * Are we at extent 8? If yes, we know the extent is changed.
  5411. * This may only happen on the first iteration.
  5412. */
  5413. if (found_key.offset + right_len <= ekey->offset)
  5414. /* If we're a hole just pretend nothing changed */
  5415. return (left_disknr ? 0 : 1);
  5416. /*
  5417. * We just wanted to see if when we have an inline extent, what
  5418. * follows it is a regular extent (wanted to check the above
  5419. * condition for inline extents too). This should normally not
  5420. * happen but it's possible for example when we have an inline
  5421. * compressed extent representing data with a size matching
  5422. * the page size (currently the same as sector size).
  5423. */
  5424. if (right_type == BTRFS_FILE_EXTENT_INLINE)
  5425. return 0;
  5426. right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
  5427. right_offset = btrfs_file_extent_offset(eb, ei);
  5428. right_gen = btrfs_file_extent_generation(eb, ei);
  5429. left_offset_fixed = left_offset;
  5430. if (key.offset < ekey->offset) {
  5431. /* Fix the right offset for 2a and 7. */
  5432. right_offset += ekey->offset - key.offset;
  5433. } else {
  5434. /* Fix the left offset for all behind 2a and 2b */
  5435. left_offset_fixed += key.offset - ekey->offset;
  5436. }
  5437. /*
  5438. * Check if we have the same extent.
  5439. */
  5440. if (left_disknr != right_disknr ||
  5441. left_offset_fixed != right_offset ||
  5442. left_gen != right_gen)
  5443. return 0;
  5444. /*
  5445. * Go to the next extent.
  5446. */
  5447. ret = btrfs_next_item(sctx->parent_root, path);
  5448. if (ret < 0)
  5449. return ret;
  5450. if (!ret) {
  5451. eb = path->nodes[0];
  5452. slot = path->slots[0];
  5453. btrfs_item_key_to_cpu(eb, &found_key, slot);
  5454. }
  5455. if (ret || found_key.objectid != key.objectid ||
  5456. found_key.type != key.type) {
  5457. key.offset += right_len;
  5458. break;
  5459. }
  5460. if (found_key.offset != key.offset + right_len)
  5461. return 0;
  5462. key = found_key;
  5463. }
  5464. /*
  5465. * We're now behind the left extent (treat as unchanged) or at the end
  5466. * of the right side (treat as changed).
  5467. */
  5468. if (key.offset >= ekey->offset + left_len)
  5469. ret = 1;
  5470. else
  5471. ret = 0;
  5472. return ret;
  5473. }
  5474. static int get_last_extent(struct send_ctx *sctx, u64 offset)
  5475. {
  5476. BTRFS_PATH_AUTO_FREE(path);
  5477. struct btrfs_root *root = sctx->send_root;
  5478. struct btrfs_key key;
  5479. int ret;
  5480. path = alloc_path_for_send();
  5481. if (!path)
  5482. return -ENOMEM;
  5483. sctx->cur_inode_last_extent = 0;
  5484. key.objectid = sctx->cur_ino;
  5485. key.type = BTRFS_EXTENT_DATA_KEY;
  5486. key.offset = offset;
  5487. ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
  5488. if (ret < 0)
  5489. return ret;
  5490. ret = 0;
  5491. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  5492. if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
  5493. return ret;
  5494. sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
  5495. return ret;
  5496. }
  5497. static int range_is_hole_in_parent(struct send_ctx *sctx,
  5498. const u64 start,
  5499. const u64 end)
  5500. {
  5501. BTRFS_PATH_AUTO_FREE(path);
  5502. struct btrfs_key key;
  5503. struct btrfs_root *root = sctx->parent_root;
  5504. u64 search_start = start;
  5505. int ret;
  5506. path = alloc_path_for_send();
  5507. if (!path)
  5508. return -ENOMEM;
  5509. key.objectid = sctx->cur_ino;
  5510. key.type = BTRFS_EXTENT_DATA_KEY;
  5511. key.offset = search_start;
  5512. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  5513. if (ret < 0)
  5514. return ret;
  5515. if (ret > 0 && path->slots[0] > 0)
  5516. path->slots[0]--;
  5517. while (search_start < end) {
  5518. struct extent_buffer *leaf = path->nodes[0];
  5519. int slot = path->slots[0];
  5520. struct btrfs_file_extent_item *fi;
  5521. u64 extent_end;
  5522. if (slot >= btrfs_header_nritems(leaf)) {
  5523. ret = btrfs_next_leaf(root, path);
  5524. if (ret < 0)
  5525. return ret;
  5526. if (ret > 0)
  5527. break;
  5528. continue;
  5529. }
  5530. btrfs_item_key_to_cpu(leaf, &key, slot);
  5531. if (key.objectid < sctx->cur_ino ||
  5532. key.type < BTRFS_EXTENT_DATA_KEY)
  5533. goto next;
  5534. if (key.objectid > sctx->cur_ino ||
  5535. key.type > BTRFS_EXTENT_DATA_KEY ||
  5536. key.offset >= end)
  5537. break;
  5538. fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
  5539. extent_end = btrfs_file_extent_end(path);
  5540. if (extent_end <= start)
  5541. goto next;
  5542. if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE)
  5543. return 0;
  5544. if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
  5545. search_start = extent_end;
  5546. goto next;
  5547. }
  5548. return 0;
  5549. next:
  5550. path->slots[0]++;
  5551. }
  5552. return 1;
  5553. }
  5554. static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
  5555. struct btrfs_key *key)
  5556. {
  5557. int ret = 0;
  5558. if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
  5559. return 0;
  5560. /*
  5561. * Get last extent's end offset (exclusive) if we haven't determined it
  5562. * yet (we're processing the first file extent item that is new), or if
  5563. * we're at the first slot of a leaf and the last extent's end is less
  5564. * than the current extent's offset, because we might have skipped
  5565. * entire leaves that contained only file extent items for our current
  5566. * inode. These leaves have a generation number smaller (older) than the
  5567. * one in the current leaf and the leaf our last extent came from, and
  5568. * are located between these 2 leaves.
  5569. */
  5570. if ((sctx->cur_inode_last_extent == (u64)-1) ||
  5571. (path->slots[0] == 0 && sctx->cur_inode_last_extent < key->offset)) {
  5572. ret = get_last_extent(sctx, key->offset - 1);
  5573. if (ret)
  5574. return ret;
  5575. }
  5576. if (sctx->cur_inode_last_extent < key->offset) {
  5577. ret = range_is_hole_in_parent(sctx,
  5578. sctx->cur_inode_last_extent,
  5579. key->offset);
  5580. if (ret < 0)
  5581. return ret;
  5582. else if (ret == 0)
  5583. ret = send_hole(sctx, key->offset);
  5584. else
  5585. ret = 0;
  5586. }
  5587. sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
  5588. return ret;
  5589. }
  5590. static int process_extent(struct send_ctx *sctx,
  5591. struct btrfs_path *path,
  5592. struct btrfs_key *key)
  5593. {
  5594. struct clone_root *found_clone = NULL;
  5595. int ret = 0;
  5596. if (S_ISLNK(sctx->cur_inode_mode))
  5597. return 0;
  5598. if (sctx->parent_root && !sctx->cur_inode_new) {
  5599. ret = is_extent_unchanged(sctx, path, key);
  5600. if (ret < 0)
  5601. return ret;
  5602. if (ret)
  5603. goto out_hole;
  5604. } else {
  5605. struct btrfs_file_extent_item *ei;
  5606. u8 type;
  5607. ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
  5608. struct btrfs_file_extent_item);
  5609. type = btrfs_file_extent_type(path->nodes[0], ei);
  5610. if (type == BTRFS_FILE_EXTENT_PREALLOC ||
  5611. type == BTRFS_FILE_EXTENT_REG) {
  5612. /*
  5613. * The send spec does not have a prealloc command yet,
  5614. * so just leave a hole for prealloc'ed extents until
  5615. * we have enough commands queued up to justify rev'ing
  5616. * the send spec.
  5617. */
  5618. if (type == BTRFS_FILE_EXTENT_PREALLOC)
  5619. return 0;
  5620. /* Have a hole, just skip it. */
  5621. if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0)
  5622. return 0;
  5623. }
  5624. }
  5625. ret = find_extent_clone(sctx, path, key->objectid, key->offset,
  5626. sctx->cur_inode_size, &found_clone);
  5627. if (ret != -ENOENT && ret < 0)
  5628. return ret;
  5629. ret = send_write_or_clone(sctx, path, key, found_clone);
  5630. if (ret)
  5631. return ret;
  5632. out_hole:
  5633. return maybe_send_hole(sctx, path, key);
  5634. }
  5635. static int process_all_extents(struct send_ctx *sctx)
  5636. {
  5637. int ret = 0;
  5638. int iter_ret = 0;
  5639. struct btrfs_root *root;
  5640. BTRFS_PATH_AUTO_FREE(path);
  5641. struct btrfs_key key;
  5642. struct btrfs_key found_key;
  5643. root = sctx->send_root;
  5644. path = alloc_path_for_send();
  5645. if (!path)
  5646. return -ENOMEM;
  5647. key.objectid = sctx->cmp_key->objectid;
  5648. key.type = BTRFS_EXTENT_DATA_KEY;
  5649. key.offset = 0;
  5650. btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
  5651. if (found_key.objectid != key.objectid ||
  5652. found_key.type != key.type) {
  5653. ret = 0;
  5654. break;
  5655. }
  5656. ret = process_extent(sctx, path, &found_key);
  5657. if (ret < 0)
  5658. break;
  5659. }
  5660. /* Catch error found during iteration */
  5661. if (iter_ret < 0)
  5662. ret = iter_ret;
  5663. return ret;
  5664. }
  5665. static int process_recorded_refs_if_needed(struct send_ctx *sctx, bool at_end,
  5666. int *pending_move,
  5667. int *refs_processed)
  5668. {
  5669. int ret;
  5670. if (sctx->cur_ino == 0)
  5671. return 0;
  5672. if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
  5673. sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
  5674. return 0;
  5675. if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
  5676. return 0;
  5677. ret = process_recorded_refs(sctx, pending_move);
  5678. if (ret < 0)
  5679. return ret;
  5680. *refs_processed = 1;
  5681. return 0;
  5682. }
  5683. static int finish_inode_if_needed(struct send_ctx *sctx, bool at_end)
  5684. {
  5685. int ret = 0;
  5686. struct btrfs_inode_info info;
  5687. u64 left_mode;
  5688. u64 left_uid;
  5689. u64 left_gid;
  5690. u64 left_fileattr;
  5691. u64 right_mode;
  5692. u64 right_uid;
  5693. u64 right_gid;
  5694. u64 right_fileattr;
  5695. int need_chmod = 0;
  5696. int need_chown = 0;
  5697. bool need_fileattr = false;
  5698. int need_truncate = 1;
  5699. int pending_move = 0;
  5700. int refs_processed = 0;
  5701. if (sctx->ignore_cur_inode)
  5702. return 0;
  5703. ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
  5704. &refs_processed);
  5705. if (ret < 0)
  5706. goto out;
  5707. /*
  5708. * We have processed the refs and thus need to advance send_progress.
  5709. * Now, calls to get_cur_xxx will take the updated refs of the current
  5710. * inode into account.
  5711. *
  5712. * On the other hand, if our current inode is a directory and couldn't
  5713. * be moved/renamed because its parent was renamed/moved too and it has
  5714. * a higher inode number, we can only move/rename our current inode
  5715. * after we moved/renamed its parent. Therefore in this case operate on
  5716. * the old path (pre move/rename) of our current inode, and the
  5717. * move/rename will be performed later.
  5718. */
  5719. if (refs_processed && !pending_move)
  5720. sctx->send_progress = sctx->cur_ino + 1;
  5721. if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
  5722. goto out;
  5723. if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
  5724. goto out;
  5725. ret = get_inode_info(sctx->send_root, sctx->cur_ino, &info);
  5726. if (ret < 0)
  5727. goto out;
  5728. left_mode = info.mode;
  5729. left_uid = info.uid;
  5730. left_gid = info.gid;
  5731. left_fileattr = info.fileattr;
  5732. if (!sctx->parent_root || sctx->cur_inode_new) {
  5733. need_chown = 1;
  5734. if (!S_ISLNK(sctx->cur_inode_mode))
  5735. need_chmod = 1;
  5736. if (sctx->cur_inode_next_write_offset == sctx->cur_inode_size)
  5737. need_truncate = 0;
  5738. } else {
  5739. u64 old_size;
  5740. ret = get_inode_info(sctx->parent_root, sctx->cur_ino, &info);
  5741. if (ret < 0)
  5742. goto out;
  5743. old_size = info.size;
  5744. right_mode = info.mode;
  5745. right_uid = info.uid;
  5746. right_gid = info.gid;
  5747. right_fileattr = info.fileattr;
  5748. if (left_uid != right_uid || left_gid != right_gid)
  5749. need_chown = 1;
  5750. if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
  5751. need_chmod = 1;
  5752. if (!S_ISLNK(sctx->cur_inode_mode) && left_fileattr != right_fileattr)
  5753. need_fileattr = true;
  5754. if ((old_size == sctx->cur_inode_size) ||
  5755. (sctx->cur_inode_size > old_size &&
  5756. sctx->cur_inode_next_write_offset == sctx->cur_inode_size))
  5757. need_truncate = 0;
  5758. }
  5759. if (S_ISREG(sctx->cur_inode_mode)) {
  5760. if (need_send_hole(sctx)) {
  5761. if (sctx->cur_inode_last_extent == (u64)-1 ||
  5762. sctx->cur_inode_last_extent <
  5763. sctx->cur_inode_size) {
  5764. ret = get_last_extent(sctx, (u64)-1);
  5765. if (ret)
  5766. goto out;
  5767. }
  5768. if (sctx->cur_inode_last_extent < sctx->cur_inode_size) {
  5769. ret = range_is_hole_in_parent(sctx,
  5770. sctx->cur_inode_last_extent,
  5771. sctx->cur_inode_size);
  5772. if (ret < 0) {
  5773. goto out;
  5774. } else if (ret == 0) {
  5775. ret = send_hole(sctx, sctx->cur_inode_size);
  5776. if (ret < 0)
  5777. goto out;
  5778. } else {
  5779. /* Range is already a hole, skip. */
  5780. ret = 0;
  5781. }
  5782. }
  5783. }
  5784. if (need_truncate) {
  5785. ret = send_truncate(sctx, sctx->cur_ino,
  5786. sctx->cur_inode_gen,
  5787. sctx->cur_inode_size);
  5788. if (ret < 0)
  5789. goto out;
  5790. }
  5791. }
  5792. if (need_chown) {
  5793. ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
  5794. left_uid, left_gid);
  5795. if (ret < 0)
  5796. goto out;
  5797. }
  5798. if (need_chmod) {
  5799. ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
  5800. left_mode);
  5801. if (ret < 0)
  5802. goto out;
  5803. }
  5804. if (need_fileattr) {
  5805. ret = send_fileattr(sctx, sctx->cur_ino, sctx->cur_inode_gen,
  5806. left_fileattr);
  5807. if (ret < 0)
  5808. goto out;
  5809. }
  5810. if (proto_cmd_ok(sctx, BTRFS_SEND_C_ENABLE_VERITY)
  5811. && sctx->cur_inode_needs_verity) {
  5812. ret = process_verity(sctx);
  5813. if (ret < 0)
  5814. goto out;
  5815. }
  5816. ret = send_capabilities(sctx);
  5817. if (ret < 0)
  5818. goto out;
  5819. /*
  5820. * If other directory inodes depended on our current directory
  5821. * inode's move/rename, now do their move/rename operations.
  5822. */
  5823. if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
  5824. ret = apply_children_dir_moves(sctx);
  5825. if (ret)
  5826. goto out;
  5827. /*
  5828. * Need to send that every time, no matter if it actually
  5829. * changed between the two trees as we have done changes to
  5830. * the inode before. If our inode is a directory and it's
  5831. * waiting to be moved/renamed, we will send its utimes when
  5832. * it's moved/renamed, therefore we don't need to do it here.
  5833. */
  5834. sctx->send_progress = sctx->cur_ino + 1;
  5835. /*
  5836. * If the current inode is a non-empty directory, delay issuing
  5837. * the utimes command for it, as it's very likely we have inodes
  5838. * with an higher number inside it. We want to issue the utimes
  5839. * command only after adding all dentries to it.
  5840. */
  5841. if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_size > 0)
  5842. ret = cache_dir_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
  5843. else
  5844. ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
  5845. if (ret < 0)
  5846. goto out;
  5847. }
  5848. out:
  5849. if (!ret)
  5850. ret = trim_dir_utimes_cache(sctx);
  5851. return ret;
  5852. }
  5853. static void close_current_inode(struct send_ctx *sctx)
  5854. {
  5855. u64 i_size;
  5856. if (sctx->cur_inode == NULL)
  5857. return;
  5858. i_size = i_size_read(sctx->cur_inode);
  5859. /*
  5860. * If we are doing an incremental send, we may have extents between the
  5861. * last processed extent and the i_size that have not been processed
  5862. * because they haven't changed but we may have read some of their pages
  5863. * through readahead, see the comments at send_extent_data().
  5864. */
  5865. if (sctx->clean_page_cache && sctx->page_cache_clear_start < i_size)
  5866. truncate_inode_pages_range(&sctx->cur_inode->i_data,
  5867. sctx->page_cache_clear_start,
  5868. round_up(i_size, PAGE_SIZE) - 1);
  5869. iput(sctx->cur_inode);
  5870. sctx->cur_inode = NULL;
  5871. }
  5872. static int changed_inode(struct send_ctx *sctx,
  5873. enum btrfs_compare_tree_result result)
  5874. {
  5875. int ret;
  5876. struct btrfs_key *key = sctx->cmp_key;
  5877. struct btrfs_inode_item *left_ii = NULL;
  5878. struct btrfs_inode_item *right_ii = NULL;
  5879. u64 left_gen = 0;
  5880. u64 right_gen = 0;
  5881. close_current_inode(sctx);
  5882. sctx->cur_ino = key->objectid;
  5883. sctx->cur_inode_new_gen = false;
  5884. sctx->cur_inode_last_extent = (u64)-1;
  5885. sctx->cur_inode_next_write_offset = 0;
  5886. sctx->ignore_cur_inode = false;
  5887. fs_path_reset(&sctx->cur_inode_path);
  5888. /*
  5889. * Set send_progress to current inode. This will tell all get_cur_xxx
  5890. * functions that the current inode's refs are not updated yet. Later,
  5891. * when process_recorded_refs is finished, it is set to cur_ino + 1.
  5892. */
  5893. sctx->send_progress = sctx->cur_ino;
  5894. if (result == BTRFS_COMPARE_TREE_NEW ||
  5895. result == BTRFS_COMPARE_TREE_CHANGED) {
  5896. left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
  5897. sctx->left_path->slots[0],
  5898. struct btrfs_inode_item);
  5899. left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
  5900. left_ii);
  5901. } else {
  5902. right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
  5903. sctx->right_path->slots[0],
  5904. struct btrfs_inode_item);
  5905. right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
  5906. right_ii);
  5907. }
  5908. if (result == BTRFS_COMPARE_TREE_CHANGED) {
  5909. right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
  5910. sctx->right_path->slots[0],
  5911. struct btrfs_inode_item);
  5912. right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
  5913. right_ii);
  5914. /*
  5915. * The cur_ino = root dir case is special here. We can't treat
  5916. * the inode as deleted+reused because it would generate a
  5917. * stream that tries to delete/mkdir the root dir.
  5918. */
  5919. if (left_gen != right_gen &&
  5920. sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
  5921. sctx->cur_inode_new_gen = true;
  5922. }
  5923. /*
  5924. * Normally we do not find inodes with a link count of zero (orphans)
  5925. * because the most common case is to create a snapshot and use it
  5926. * for a send operation. However other less common use cases involve
  5927. * using a subvolume and send it after turning it to RO mode just
  5928. * after deleting all hard links of a file while holding an open
  5929. * file descriptor against it or turning a RO snapshot into RW mode,
  5930. * keep an open file descriptor against a file, delete it and then
  5931. * turn the snapshot back to RO mode before using it for a send
  5932. * operation. The former is what the receiver operation does.
  5933. * Therefore, if we want to send these snapshots soon after they're
  5934. * received, we need to handle orphan inodes as well. Moreover, orphans
  5935. * can appear not only in the send snapshot but also in the parent
  5936. * snapshot. Here are several cases:
  5937. *
  5938. * Case 1: BTRFS_COMPARE_TREE_NEW
  5939. * | send snapshot | action
  5940. * --------------------------------
  5941. * nlink | 0 | ignore
  5942. *
  5943. * Case 2: BTRFS_COMPARE_TREE_DELETED
  5944. * | parent snapshot | action
  5945. * ----------------------------------
  5946. * nlink | 0 | as usual
  5947. * Note: No unlinks will be sent because there're no paths for it.
  5948. *
  5949. * Case 3: BTRFS_COMPARE_TREE_CHANGED
  5950. * | | parent snapshot | send snapshot | action
  5951. * -----------------------------------------------------------------------
  5952. * subcase 1 | nlink | 0 | 0 | ignore
  5953. * subcase 2 | nlink | >0 | 0 | new_gen(deletion)
  5954. * subcase 3 | nlink | 0 | >0 | new_gen(creation)
  5955. *
  5956. */
  5957. if (result == BTRFS_COMPARE_TREE_NEW) {
  5958. if (btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii) == 0) {
  5959. sctx->ignore_cur_inode = true;
  5960. return 0;
  5961. }
  5962. sctx->cur_inode_gen = left_gen;
  5963. sctx->cur_inode_new = true;
  5964. sctx->cur_inode_deleted = false;
  5965. sctx->cur_inode_size = btrfs_inode_size(
  5966. sctx->left_path->nodes[0], left_ii);
  5967. sctx->cur_inode_mode = btrfs_inode_mode(
  5968. sctx->left_path->nodes[0], left_ii);
  5969. sctx->cur_inode_rdev = btrfs_inode_rdev(
  5970. sctx->left_path->nodes[0], left_ii);
  5971. if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
  5972. ret = send_create_inode_if_needed(sctx);
  5973. } else if (result == BTRFS_COMPARE_TREE_DELETED) {
  5974. sctx->cur_inode_gen = right_gen;
  5975. sctx->cur_inode_new = false;
  5976. sctx->cur_inode_deleted = true;
  5977. sctx->cur_inode_size = btrfs_inode_size(
  5978. sctx->right_path->nodes[0], right_ii);
  5979. sctx->cur_inode_mode = btrfs_inode_mode(
  5980. sctx->right_path->nodes[0], right_ii);
  5981. } else if (result == BTRFS_COMPARE_TREE_CHANGED) {
  5982. u32 new_nlinks, old_nlinks;
  5983. new_nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
  5984. old_nlinks = btrfs_inode_nlink(sctx->right_path->nodes[0], right_ii);
  5985. if (new_nlinks == 0 && old_nlinks == 0) {
  5986. sctx->ignore_cur_inode = true;
  5987. return 0;
  5988. } else if (new_nlinks == 0 || old_nlinks == 0) {
  5989. sctx->cur_inode_new_gen = 1;
  5990. }
  5991. /*
  5992. * We need to do some special handling in case the inode was
  5993. * reported as changed with a changed generation number. This
  5994. * means that the original inode was deleted and new inode
  5995. * reused the same inum. So we have to treat the old inode as
  5996. * deleted and the new one as new.
  5997. */
  5998. if (sctx->cur_inode_new_gen) {
  5999. /*
  6000. * First, process the inode as if it was deleted.
  6001. */
  6002. if (old_nlinks > 0) {
  6003. sctx->cur_inode_gen = right_gen;
  6004. sctx->cur_inode_new = false;
  6005. sctx->cur_inode_deleted = true;
  6006. sctx->cur_inode_size = btrfs_inode_size(
  6007. sctx->right_path->nodes[0], right_ii);
  6008. sctx->cur_inode_mode = btrfs_inode_mode(
  6009. sctx->right_path->nodes[0], right_ii);
  6010. ret = process_all_refs(sctx,
  6011. BTRFS_COMPARE_TREE_DELETED);
  6012. if (ret < 0)
  6013. return ret;
  6014. }
  6015. /*
  6016. * Now process the inode as if it was new.
  6017. */
  6018. if (new_nlinks > 0) {
  6019. sctx->cur_inode_gen = left_gen;
  6020. sctx->cur_inode_new = true;
  6021. sctx->cur_inode_deleted = false;
  6022. sctx->cur_inode_size = btrfs_inode_size(
  6023. sctx->left_path->nodes[0],
  6024. left_ii);
  6025. sctx->cur_inode_mode = btrfs_inode_mode(
  6026. sctx->left_path->nodes[0],
  6027. left_ii);
  6028. sctx->cur_inode_rdev = btrfs_inode_rdev(
  6029. sctx->left_path->nodes[0],
  6030. left_ii);
  6031. ret = send_create_inode_if_needed(sctx);
  6032. if (ret < 0)
  6033. return ret;
  6034. ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
  6035. if (ret < 0)
  6036. return ret;
  6037. /*
  6038. * Advance send_progress now as we did not get
  6039. * into process_recorded_refs_if_needed in the
  6040. * new_gen case.
  6041. */
  6042. sctx->send_progress = sctx->cur_ino + 1;
  6043. /*
  6044. * Now process all extents and xattrs of the
  6045. * inode as if they were all new.
  6046. */
  6047. ret = process_all_extents(sctx);
  6048. if (ret < 0)
  6049. return ret;
  6050. ret = process_all_new_xattrs(sctx);
  6051. if (ret < 0)
  6052. return ret;
  6053. }
  6054. } else {
  6055. sctx->cur_inode_gen = left_gen;
  6056. sctx->cur_inode_new = false;
  6057. sctx->cur_inode_new_gen = false;
  6058. sctx->cur_inode_deleted = false;
  6059. sctx->cur_inode_size = btrfs_inode_size(
  6060. sctx->left_path->nodes[0], left_ii);
  6061. sctx->cur_inode_mode = btrfs_inode_mode(
  6062. sctx->left_path->nodes[0], left_ii);
  6063. }
  6064. }
  6065. return 0;
  6066. }
  6067. /*
  6068. * We have to process new refs before deleted refs, but compare_trees gives us
  6069. * the new and deleted refs mixed. To fix this, we record the new/deleted refs
  6070. * first and later process them in process_recorded_refs.
  6071. * For the cur_inode_new_gen case, we skip recording completely because
  6072. * changed_inode did already initiate processing of refs. The reason for this is
  6073. * that in this case, compare_tree actually compares the refs of 2 different
  6074. * inodes. To fix this, process_all_refs is used in changed_inode to handle all
  6075. * refs of the right tree as deleted and all refs of the left tree as new.
  6076. */
  6077. static int changed_ref(struct send_ctx *sctx,
  6078. enum btrfs_compare_tree_result result)
  6079. {
  6080. int ret = 0;
  6081. if (unlikely(sctx->cur_ino != sctx->cmp_key->objectid)) {
  6082. inconsistent_snapshot_error(sctx, result, "reference");
  6083. return -EIO;
  6084. }
  6085. if (!sctx->cur_inode_new_gen &&
  6086. sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
  6087. if (result == BTRFS_COMPARE_TREE_NEW)
  6088. ret = record_new_ref(sctx);
  6089. else if (result == BTRFS_COMPARE_TREE_DELETED)
  6090. ret = record_deleted_ref(sctx);
  6091. else if (result == BTRFS_COMPARE_TREE_CHANGED)
  6092. ret = record_changed_ref(sctx);
  6093. }
  6094. return ret;
  6095. }
  6096. /*
  6097. * Process new/deleted/changed xattrs. We skip processing in the
  6098. * cur_inode_new_gen case because changed_inode did already initiate processing
  6099. * of xattrs. The reason is the same as in changed_ref
  6100. */
  6101. static int changed_xattr(struct send_ctx *sctx,
  6102. enum btrfs_compare_tree_result result)
  6103. {
  6104. int ret = 0;
  6105. if (unlikely(sctx->cur_ino != sctx->cmp_key->objectid)) {
  6106. inconsistent_snapshot_error(sctx, result, "xattr");
  6107. return -EIO;
  6108. }
  6109. if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
  6110. if (result == BTRFS_COMPARE_TREE_NEW)
  6111. ret = process_new_xattr(sctx);
  6112. else if (result == BTRFS_COMPARE_TREE_DELETED)
  6113. ret = process_deleted_xattr(sctx);
  6114. else if (result == BTRFS_COMPARE_TREE_CHANGED)
  6115. ret = process_changed_xattr(sctx);
  6116. }
  6117. return ret;
  6118. }
  6119. /*
  6120. * Process new/deleted/changed extents. We skip processing in the
  6121. * cur_inode_new_gen case because changed_inode did already initiate processing
  6122. * of extents. The reason is the same as in changed_ref
  6123. */
  6124. static int changed_extent(struct send_ctx *sctx,
  6125. enum btrfs_compare_tree_result result)
  6126. {
  6127. int ret = 0;
  6128. /*
  6129. * We have found an extent item that changed without the inode item
  6130. * having changed. This can happen either after relocation (where the
  6131. * disk_bytenr of an extent item is replaced at
  6132. * relocation.c:replace_file_extents()) or after deduplication into a
  6133. * file in both the parent and send snapshots (where an extent item can
  6134. * get modified or replaced with a new one). Note that deduplication
  6135. * updates the inode item, but it only changes the iversion (sequence
  6136. * field in the inode item) of the inode, so if a file is deduplicated
  6137. * the same amount of times in both the parent and send snapshots, its
  6138. * iversion becomes the same in both snapshots, whence the inode item is
  6139. * the same on both snapshots.
  6140. */
  6141. if (sctx->cur_ino != sctx->cmp_key->objectid)
  6142. return 0;
  6143. if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
  6144. if (result != BTRFS_COMPARE_TREE_DELETED)
  6145. ret = process_extent(sctx, sctx->left_path,
  6146. sctx->cmp_key);
  6147. }
  6148. return ret;
  6149. }
  6150. static int changed_verity(struct send_ctx *sctx, enum btrfs_compare_tree_result result)
  6151. {
  6152. if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
  6153. if (result == BTRFS_COMPARE_TREE_NEW)
  6154. sctx->cur_inode_needs_verity = true;
  6155. }
  6156. return 0;
  6157. }
  6158. static int dir_changed(struct send_ctx *sctx, u64 dir)
  6159. {
  6160. u64 orig_gen, new_gen;
  6161. int ret;
  6162. ret = get_inode_gen(sctx->send_root, dir, &new_gen);
  6163. if (ret)
  6164. return ret;
  6165. ret = get_inode_gen(sctx->parent_root, dir, &orig_gen);
  6166. if (ret)
  6167. return ret;
  6168. return (orig_gen != new_gen) ? 1 : 0;
  6169. }
  6170. static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
  6171. struct btrfs_key *key)
  6172. {
  6173. struct btrfs_inode_extref *extref;
  6174. struct extent_buffer *leaf;
  6175. u64 dirid = 0, last_dirid = 0;
  6176. unsigned long ptr;
  6177. u32 item_size;
  6178. u32 cur_offset = 0;
  6179. int ref_name_len;
  6180. /* Easy case, just check this one dirid */
  6181. if (key->type == BTRFS_INODE_REF_KEY) {
  6182. dirid = key->offset;
  6183. return dir_changed(sctx, dirid);
  6184. }
  6185. leaf = path->nodes[0];
  6186. item_size = btrfs_item_size(leaf, path->slots[0]);
  6187. ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
  6188. while (cur_offset < item_size) {
  6189. int ret;
  6190. extref = (struct btrfs_inode_extref *)(ptr +
  6191. cur_offset);
  6192. dirid = btrfs_inode_extref_parent(leaf, extref);
  6193. ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
  6194. cur_offset += ref_name_len + sizeof(*extref);
  6195. if (dirid == last_dirid)
  6196. continue;
  6197. ret = dir_changed(sctx, dirid);
  6198. if (ret)
  6199. return ret;
  6200. last_dirid = dirid;
  6201. }
  6202. return 0;
  6203. }
  6204. /*
  6205. * Updates compare related fields in sctx and simply forwards to the actual
  6206. * changed_xxx functions.
  6207. */
  6208. static int changed_cb(struct btrfs_path *left_path,
  6209. struct btrfs_path *right_path,
  6210. struct btrfs_key *key,
  6211. enum btrfs_compare_tree_result result,
  6212. struct send_ctx *sctx)
  6213. {
  6214. int ret;
  6215. /*
  6216. * We can not hold the commit root semaphore here. This is because in
  6217. * the case of sending and receiving to the same filesystem, using a
  6218. * pipe, could result in a deadlock:
  6219. *
  6220. * 1) The task running send blocks on the pipe because it's full;
  6221. *
  6222. * 2) The task running receive, which is the only consumer of the pipe,
  6223. * is waiting for a transaction commit (for example due to a space
  6224. * reservation when doing a write or triggering a transaction commit
  6225. * when creating a subvolume);
  6226. *
  6227. * 3) The transaction is waiting to write lock the commit root semaphore,
  6228. * but can not acquire it since it's being held at 1).
  6229. *
  6230. * Down this call chain we write to the pipe through kernel_write().
  6231. * The same type of problem can also happen when sending to a file that
  6232. * is stored in the same filesystem - when reserving space for a write
  6233. * into the file, we can trigger a transaction commit.
  6234. *
  6235. * Our caller has supplied us with clones of leaves from the send and
  6236. * parent roots, so we're safe here from a concurrent relocation and
  6237. * further reallocation of metadata extents while we are here. Below we
  6238. * also assert that the leaves are clones.
  6239. */
  6240. lockdep_assert_not_held(&sctx->send_root->fs_info->commit_root_sem);
  6241. /*
  6242. * We always have a send root, so left_path is never NULL. We will not
  6243. * have a leaf when we have reached the end of the send root but have
  6244. * not yet reached the end of the parent root.
  6245. */
  6246. if (left_path->nodes[0])
  6247. ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
  6248. &left_path->nodes[0]->bflags));
  6249. /*
  6250. * When doing a full send we don't have a parent root, so right_path is
  6251. * NULL. When doing an incremental send, we may have reached the end of
  6252. * the parent root already, so we don't have a leaf at right_path.
  6253. */
  6254. if (right_path && right_path->nodes[0])
  6255. ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
  6256. &right_path->nodes[0]->bflags));
  6257. if (result == BTRFS_COMPARE_TREE_SAME) {
  6258. if (key->type == BTRFS_INODE_REF_KEY ||
  6259. key->type == BTRFS_INODE_EXTREF_KEY) {
  6260. ret = compare_refs(sctx, left_path, key);
  6261. if (!ret)
  6262. return 0;
  6263. if (ret < 0)
  6264. return ret;
  6265. } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
  6266. return maybe_send_hole(sctx, left_path, key);
  6267. } else {
  6268. return 0;
  6269. }
  6270. result = BTRFS_COMPARE_TREE_CHANGED;
  6271. }
  6272. sctx->left_path = left_path;
  6273. sctx->right_path = right_path;
  6274. sctx->cmp_key = key;
  6275. ret = finish_inode_if_needed(sctx, 0);
  6276. if (ret < 0)
  6277. return ret;
  6278. /* Ignore non-FS objects */
  6279. if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
  6280. key->objectid == BTRFS_FREE_SPACE_OBJECTID)
  6281. return 0;
  6282. if (key->type == BTRFS_INODE_ITEM_KEY) {
  6283. ret = changed_inode(sctx, result);
  6284. } else if (!sctx->ignore_cur_inode) {
  6285. if (key->type == BTRFS_INODE_REF_KEY ||
  6286. key->type == BTRFS_INODE_EXTREF_KEY)
  6287. ret = changed_ref(sctx, result);
  6288. else if (key->type == BTRFS_XATTR_ITEM_KEY)
  6289. ret = changed_xattr(sctx, result);
  6290. else if (key->type == BTRFS_EXTENT_DATA_KEY)
  6291. ret = changed_extent(sctx, result);
  6292. else if (key->type == BTRFS_VERITY_DESC_ITEM_KEY &&
  6293. key->offset == 0)
  6294. ret = changed_verity(sctx, result);
  6295. }
  6296. return ret;
  6297. }
  6298. static int search_key_again(const struct send_ctx *sctx,
  6299. struct btrfs_root *root,
  6300. struct btrfs_path *path,
  6301. const struct btrfs_key *key)
  6302. {
  6303. int ret;
  6304. if (!path->need_commit_sem)
  6305. lockdep_assert_held_read(&root->fs_info->commit_root_sem);
  6306. /*
  6307. * Roots used for send operations are readonly and no one can add,
  6308. * update or remove keys from them, so we should be able to find our
  6309. * key again. The only exception is deduplication, which can operate on
  6310. * readonly roots and add, update or remove keys to/from them - but at
  6311. * the moment we don't allow it to run in parallel with send.
  6312. */
  6313. ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
  6314. ASSERT(ret <= 0);
  6315. if (unlikely(ret > 0)) {
  6316. btrfs_print_tree(path->nodes[path->lowest_level], false);
  6317. btrfs_err(root->fs_info,
  6318. "send: key " BTRFS_KEY_FMT" not found in %s root %llu, lowest_level %d, slot %d",
  6319. BTRFS_KEY_FMT_VALUE(key),
  6320. (root == sctx->parent_root ? "parent" : "send"),
  6321. btrfs_root_id(root), path->lowest_level,
  6322. path->slots[path->lowest_level]);
  6323. return -EUCLEAN;
  6324. }
  6325. return ret;
  6326. }
  6327. static int full_send_tree(struct send_ctx *sctx)
  6328. {
  6329. int ret;
  6330. struct btrfs_root *send_root = sctx->send_root;
  6331. struct btrfs_key key;
  6332. struct btrfs_fs_info *fs_info = send_root->fs_info;
  6333. BTRFS_PATH_AUTO_FREE(path);
  6334. path = alloc_path_for_send();
  6335. if (!path)
  6336. return -ENOMEM;
  6337. path->reada = READA_FORWARD_ALWAYS;
  6338. key.objectid = BTRFS_FIRST_FREE_OBJECTID;
  6339. key.type = BTRFS_INODE_ITEM_KEY;
  6340. key.offset = 0;
  6341. down_read(&fs_info->commit_root_sem);
  6342. sctx->last_reloc_trans = fs_info->last_reloc_trans;
  6343. up_read(&fs_info->commit_root_sem);
  6344. ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
  6345. if (ret < 0)
  6346. return ret;
  6347. if (ret)
  6348. goto out_finish;
  6349. while (1) {
  6350. btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
  6351. ret = changed_cb(path, NULL, &key,
  6352. BTRFS_COMPARE_TREE_NEW, sctx);
  6353. if (ret < 0)
  6354. return ret;
  6355. down_read(&fs_info->commit_root_sem);
  6356. if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
  6357. sctx->last_reloc_trans = fs_info->last_reloc_trans;
  6358. up_read(&fs_info->commit_root_sem);
  6359. /*
  6360. * A transaction used for relocating a block group was
  6361. * committed or is about to finish its commit. Release
  6362. * our path (leaf) and restart the search, so that we
  6363. * avoid operating on any file extent items that are
  6364. * stale, with a disk_bytenr that reflects a pre
  6365. * relocation value. This way we avoid as much as
  6366. * possible to fallback to regular writes when checking
  6367. * if we can clone file ranges.
  6368. */
  6369. btrfs_release_path(path);
  6370. ret = search_key_again(sctx, send_root, path, &key);
  6371. if (ret < 0)
  6372. return ret;
  6373. } else {
  6374. up_read(&fs_info->commit_root_sem);
  6375. }
  6376. ret = btrfs_next_item(send_root, path);
  6377. if (ret < 0)
  6378. return ret;
  6379. if (ret) {
  6380. ret = 0;
  6381. break;
  6382. }
  6383. }
  6384. out_finish:
  6385. return finish_inode_if_needed(sctx, 1);
  6386. }
  6387. static int replace_node_with_clone(struct btrfs_path *path, int level)
  6388. {
  6389. struct extent_buffer *clone;
  6390. clone = btrfs_clone_extent_buffer(path->nodes[level]);
  6391. if (!clone)
  6392. return -ENOMEM;
  6393. free_extent_buffer(path->nodes[level]);
  6394. path->nodes[level] = clone;
  6395. return 0;
  6396. }
  6397. static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen)
  6398. {
  6399. struct extent_buffer *eb;
  6400. struct extent_buffer *parent = path->nodes[*level];
  6401. int slot = path->slots[*level];
  6402. const int nritems = btrfs_header_nritems(parent);
  6403. u64 reada_max;
  6404. u64 reada_done = 0;
  6405. lockdep_assert_held_read(&parent->fs_info->commit_root_sem);
  6406. ASSERT(*level != 0);
  6407. eb = btrfs_read_node_slot(parent, slot);
  6408. if (IS_ERR(eb))
  6409. return PTR_ERR(eb);
  6410. /*
  6411. * Trigger readahead for the next leaves we will process, so that it is
  6412. * very likely that when we need them they are already in memory and we
  6413. * will not block on disk IO. For nodes we only do readahead for one,
  6414. * since the time window between processing nodes is typically larger.
  6415. */
  6416. reada_max = (*level == 1 ? SZ_128K : eb->fs_info->nodesize);
  6417. for (slot++; slot < nritems && reada_done < reada_max; slot++) {
  6418. if (btrfs_node_ptr_generation(parent, slot) > reada_min_gen) {
  6419. btrfs_readahead_node_child(parent, slot);
  6420. reada_done += eb->fs_info->nodesize;
  6421. }
  6422. }
  6423. path->nodes[*level - 1] = eb;
  6424. path->slots[*level - 1] = 0;
  6425. (*level)--;
  6426. if (*level == 0)
  6427. return replace_node_with_clone(path, 0);
  6428. return 0;
  6429. }
  6430. static int tree_move_next_or_upnext(struct btrfs_path *path,
  6431. int *level, int root_level)
  6432. {
  6433. int ret = 0;
  6434. int nritems;
  6435. nritems = btrfs_header_nritems(path->nodes[*level]);
  6436. path->slots[*level]++;
  6437. while (path->slots[*level] >= nritems) {
  6438. if (*level == root_level) {
  6439. path->slots[*level] = nritems - 1;
  6440. return -1;
  6441. }
  6442. /* move upnext */
  6443. path->slots[*level] = 0;
  6444. free_extent_buffer(path->nodes[*level]);
  6445. path->nodes[*level] = NULL;
  6446. (*level)++;
  6447. path->slots[*level]++;
  6448. nritems = btrfs_header_nritems(path->nodes[*level]);
  6449. ret = 1;
  6450. }
  6451. return ret;
  6452. }
  6453. /*
  6454. * Returns 1 if it had to move up and next. 0 is returned if it moved only next
  6455. * or down.
  6456. */
  6457. static int tree_advance(struct btrfs_path *path,
  6458. int *level, int root_level,
  6459. int allow_down,
  6460. struct btrfs_key *key,
  6461. u64 reada_min_gen)
  6462. {
  6463. int ret;
  6464. if (*level == 0 || !allow_down) {
  6465. ret = tree_move_next_or_upnext(path, level, root_level);
  6466. } else {
  6467. ret = tree_move_down(path, level, reada_min_gen);
  6468. }
  6469. /*
  6470. * Even if we have reached the end of a tree, ret is -1, update the key
  6471. * anyway, so that in case we need to restart due to a block group
  6472. * relocation, we can assert that the last key of the root node still
  6473. * exists in the tree.
  6474. */
  6475. if (*level == 0)
  6476. btrfs_item_key_to_cpu(path->nodes[*level], key,
  6477. path->slots[*level]);
  6478. else
  6479. btrfs_node_key_to_cpu(path->nodes[*level], key,
  6480. path->slots[*level]);
  6481. return ret;
  6482. }
  6483. static int tree_compare_item(struct btrfs_path *left_path,
  6484. struct btrfs_path *right_path,
  6485. char *tmp_buf)
  6486. {
  6487. int cmp;
  6488. int len1, len2;
  6489. unsigned long off1, off2;
  6490. len1 = btrfs_item_size(left_path->nodes[0], left_path->slots[0]);
  6491. len2 = btrfs_item_size(right_path->nodes[0], right_path->slots[0]);
  6492. if (len1 != len2)
  6493. return 1;
  6494. off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]);
  6495. off2 = btrfs_item_ptr_offset(right_path->nodes[0],
  6496. right_path->slots[0]);
  6497. read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1);
  6498. cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1);
  6499. if (cmp)
  6500. return 1;
  6501. return 0;
  6502. }
  6503. /*
  6504. * A transaction used for relocating a block group was committed or is about to
  6505. * finish its commit. Release our paths and restart the search, so that we are
  6506. * not using stale extent buffers:
  6507. *
  6508. * 1) For levels > 0, we are only holding references of extent buffers, without
  6509. * any locks on them, which does not prevent them from having been relocated
  6510. * and reallocated after the last time we released the commit root semaphore.
  6511. * The exception are the root nodes, for which we always have a clone, see
  6512. * the comment at btrfs_compare_trees();
  6513. *
  6514. * 2) For leaves, level 0, we are holding copies (clones) of extent buffers, so
  6515. * we are safe from the concurrent relocation and reallocation. However they
  6516. * can have file extent items with a pre relocation disk_bytenr value, so we
  6517. * restart the start from the current commit roots and clone the new leaves so
  6518. * that we get the post relocation disk_bytenr values. Not doing so, could
  6519. * make us clone the wrong data in case there are new extents using the old
  6520. * disk_bytenr that happen to be shared.
  6521. */
  6522. static int restart_after_relocation(struct btrfs_path *left_path,
  6523. struct btrfs_path *right_path,
  6524. const struct btrfs_key *left_key,
  6525. const struct btrfs_key *right_key,
  6526. int left_level,
  6527. int right_level,
  6528. const struct send_ctx *sctx)
  6529. {
  6530. int root_level;
  6531. int ret;
  6532. lockdep_assert_held_read(&sctx->send_root->fs_info->commit_root_sem);
  6533. btrfs_release_path(left_path);
  6534. btrfs_release_path(right_path);
  6535. /*
  6536. * Since keys can not be added or removed to/from our roots because they
  6537. * are readonly and we do not allow deduplication to run in parallel
  6538. * (which can add, remove or change keys), the layout of the trees should
  6539. * not change.
  6540. */
  6541. left_path->lowest_level = left_level;
  6542. ret = search_key_again(sctx, sctx->send_root, left_path, left_key);
  6543. if (ret < 0)
  6544. return ret;
  6545. right_path->lowest_level = right_level;
  6546. ret = search_key_again(sctx, sctx->parent_root, right_path, right_key);
  6547. if (ret < 0)
  6548. return ret;
  6549. /*
  6550. * If the lowest level nodes are leaves, clone them so that they can be
  6551. * safely used by changed_cb() while not under the protection of the
  6552. * commit root semaphore, even if relocation and reallocation happens in
  6553. * parallel.
  6554. */
  6555. if (left_level == 0) {
  6556. ret = replace_node_with_clone(left_path, 0);
  6557. if (ret < 0)
  6558. return ret;
  6559. }
  6560. if (right_level == 0) {
  6561. ret = replace_node_with_clone(right_path, 0);
  6562. if (ret < 0)
  6563. return ret;
  6564. }
  6565. /*
  6566. * Now clone the root nodes (unless they happen to be the leaves we have
  6567. * already cloned). This is to protect against concurrent snapshotting of
  6568. * the send and parent roots (see the comment at btrfs_compare_trees()).
  6569. */
  6570. root_level = btrfs_header_level(sctx->send_root->commit_root);
  6571. if (root_level > 0) {
  6572. ret = replace_node_with_clone(left_path, root_level);
  6573. if (ret < 0)
  6574. return ret;
  6575. }
  6576. root_level = btrfs_header_level(sctx->parent_root->commit_root);
  6577. if (root_level > 0) {
  6578. ret = replace_node_with_clone(right_path, root_level);
  6579. if (ret < 0)
  6580. return ret;
  6581. }
  6582. return 0;
  6583. }
  6584. /*
  6585. * This function compares two trees and calls the provided callback for
  6586. * every changed/new/deleted item it finds.
  6587. * If shared tree blocks are encountered, whole subtrees are skipped, making
  6588. * the compare pretty fast on snapshotted subvolumes.
  6589. *
  6590. * This currently works on commit roots only. As commit roots are read only,
  6591. * we don't do any locking. The commit roots are protected with transactions.
  6592. * Transactions are ended and rejoined when a commit is tried in between.
  6593. *
  6594. * This function checks for modifications done to the trees while comparing.
  6595. * If it detects a change, it aborts immediately.
  6596. */
  6597. static int btrfs_compare_trees(struct btrfs_root *left_root,
  6598. struct btrfs_root *right_root, struct send_ctx *sctx)
  6599. {
  6600. struct btrfs_fs_info *fs_info = left_root->fs_info;
  6601. int ret;
  6602. int cmp;
  6603. BTRFS_PATH_AUTO_FREE(left_path);
  6604. BTRFS_PATH_AUTO_FREE(right_path);
  6605. struct btrfs_key left_key;
  6606. struct btrfs_key right_key;
  6607. char *tmp_buf = NULL;
  6608. int left_root_level;
  6609. int right_root_level;
  6610. int left_level;
  6611. int right_level;
  6612. int left_end_reached = 0;
  6613. int right_end_reached = 0;
  6614. int advance_left = 0;
  6615. int advance_right = 0;
  6616. u64 left_blockptr;
  6617. u64 right_blockptr;
  6618. u64 left_gen;
  6619. u64 right_gen;
  6620. u64 reada_min_gen;
  6621. left_path = btrfs_alloc_path();
  6622. if (!left_path) {
  6623. ret = -ENOMEM;
  6624. goto out;
  6625. }
  6626. right_path = btrfs_alloc_path();
  6627. if (!right_path) {
  6628. ret = -ENOMEM;
  6629. goto out;
  6630. }
  6631. tmp_buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
  6632. if (!tmp_buf) {
  6633. ret = -ENOMEM;
  6634. goto out;
  6635. }
  6636. left_path->search_commit_root = true;
  6637. left_path->skip_locking = true;
  6638. right_path->search_commit_root = true;
  6639. right_path->skip_locking = true;
  6640. /*
  6641. * Strategy: Go to the first items of both trees. Then do
  6642. *
  6643. * If both trees are at level 0
  6644. * Compare keys of current items
  6645. * If left < right treat left item as new, advance left tree
  6646. * and repeat
  6647. * If left > right treat right item as deleted, advance right tree
  6648. * and repeat
  6649. * If left == right do deep compare of items, treat as changed if
  6650. * needed, advance both trees and repeat
  6651. * If both trees are at the same level but not at level 0
  6652. * Compare keys of current nodes/leafs
  6653. * If left < right advance left tree and repeat
  6654. * If left > right advance right tree and repeat
  6655. * If left == right compare blockptrs of the next nodes/leafs
  6656. * If they match advance both trees but stay at the same level
  6657. * and repeat
  6658. * If they don't match advance both trees while allowing to go
  6659. * deeper and repeat
  6660. * If tree levels are different
  6661. * Advance the tree that needs it and repeat
  6662. *
  6663. * Advancing a tree means:
  6664. * If we are at level 0, try to go to the next slot. If that's not
  6665. * possible, go one level up and repeat. Stop when we found a level
  6666. * where we could go to the next slot. We may at this point be on a
  6667. * node or a leaf.
  6668. *
  6669. * If we are not at level 0 and not on shared tree blocks, go one
  6670. * level deeper.
  6671. *
  6672. * If we are not at level 0 and on shared tree blocks, go one slot to
  6673. * the right if possible or go up and right.
  6674. */
  6675. down_read(&fs_info->commit_root_sem);
  6676. left_level = btrfs_header_level(left_root->commit_root);
  6677. left_root_level = left_level;
  6678. /*
  6679. * We clone the root node of the send and parent roots to prevent races
  6680. * with snapshot creation of these roots. Snapshot creation COWs the
  6681. * root node of a tree, so after the transaction is committed the old
  6682. * extent can be reallocated while this send operation is still ongoing.
  6683. * So we clone them, under the commit root semaphore, to be race free.
  6684. */
  6685. left_path->nodes[left_level] =
  6686. btrfs_clone_extent_buffer(left_root->commit_root);
  6687. if (!left_path->nodes[left_level]) {
  6688. ret = -ENOMEM;
  6689. goto out_unlock;
  6690. }
  6691. right_level = btrfs_header_level(right_root->commit_root);
  6692. right_root_level = right_level;
  6693. right_path->nodes[right_level] =
  6694. btrfs_clone_extent_buffer(right_root->commit_root);
  6695. if (!right_path->nodes[right_level]) {
  6696. ret = -ENOMEM;
  6697. goto out_unlock;
  6698. }
  6699. /*
  6700. * Our right root is the parent root, while the left root is the "send"
  6701. * root. We know that all new nodes/leaves in the left root must have
  6702. * a generation greater than the right root's generation, so we trigger
  6703. * readahead for those nodes and leaves of the left root, as we know we
  6704. * will need to read them at some point.
  6705. */
  6706. reada_min_gen = btrfs_header_generation(right_root->commit_root);
  6707. if (left_level == 0)
  6708. btrfs_item_key_to_cpu(left_path->nodes[left_level],
  6709. &left_key, left_path->slots[left_level]);
  6710. else
  6711. btrfs_node_key_to_cpu(left_path->nodes[left_level],
  6712. &left_key, left_path->slots[left_level]);
  6713. if (right_level == 0)
  6714. btrfs_item_key_to_cpu(right_path->nodes[right_level],
  6715. &right_key, right_path->slots[right_level]);
  6716. else
  6717. btrfs_node_key_to_cpu(right_path->nodes[right_level],
  6718. &right_key, right_path->slots[right_level]);
  6719. sctx->last_reloc_trans = fs_info->last_reloc_trans;
  6720. while (1) {
  6721. if (need_resched() ||
  6722. rwsem_is_contended(&fs_info->commit_root_sem)) {
  6723. up_read(&fs_info->commit_root_sem);
  6724. cond_resched();
  6725. down_read(&fs_info->commit_root_sem);
  6726. }
  6727. if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
  6728. ret = restart_after_relocation(left_path, right_path,
  6729. &left_key, &right_key,
  6730. left_level, right_level,
  6731. sctx);
  6732. if (ret < 0)
  6733. goto out_unlock;
  6734. sctx->last_reloc_trans = fs_info->last_reloc_trans;
  6735. }
  6736. if (advance_left && !left_end_reached) {
  6737. ret = tree_advance(left_path, &left_level,
  6738. left_root_level,
  6739. advance_left != ADVANCE_ONLY_NEXT,
  6740. &left_key, reada_min_gen);
  6741. if (ret == -1)
  6742. left_end_reached = ADVANCE;
  6743. else if (ret < 0)
  6744. goto out_unlock;
  6745. advance_left = 0;
  6746. }
  6747. if (advance_right && !right_end_reached) {
  6748. ret = tree_advance(right_path, &right_level,
  6749. right_root_level,
  6750. advance_right != ADVANCE_ONLY_NEXT,
  6751. &right_key, reada_min_gen);
  6752. if (ret == -1)
  6753. right_end_reached = ADVANCE;
  6754. else if (ret < 0)
  6755. goto out_unlock;
  6756. advance_right = 0;
  6757. }
  6758. if (left_end_reached && right_end_reached) {
  6759. ret = 0;
  6760. goto out_unlock;
  6761. } else if (left_end_reached) {
  6762. if (right_level == 0) {
  6763. up_read(&fs_info->commit_root_sem);
  6764. ret = changed_cb(left_path, right_path,
  6765. &right_key,
  6766. BTRFS_COMPARE_TREE_DELETED,
  6767. sctx);
  6768. if (ret < 0)
  6769. goto out;
  6770. down_read(&fs_info->commit_root_sem);
  6771. }
  6772. advance_right = ADVANCE;
  6773. continue;
  6774. } else if (right_end_reached) {
  6775. if (left_level == 0) {
  6776. up_read(&fs_info->commit_root_sem);
  6777. ret = changed_cb(left_path, right_path,
  6778. &left_key,
  6779. BTRFS_COMPARE_TREE_NEW,
  6780. sctx);
  6781. if (ret < 0)
  6782. goto out;
  6783. down_read(&fs_info->commit_root_sem);
  6784. }
  6785. advance_left = ADVANCE;
  6786. continue;
  6787. }
  6788. if (left_level == 0 && right_level == 0) {
  6789. up_read(&fs_info->commit_root_sem);
  6790. cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
  6791. if (cmp < 0) {
  6792. ret = changed_cb(left_path, right_path,
  6793. &left_key,
  6794. BTRFS_COMPARE_TREE_NEW,
  6795. sctx);
  6796. advance_left = ADVANCE;
  6797. } else if (cmp > 0) {
  6798. ret = changed_cb(left_path, right_path,
  6799. &right_key,
  6800. BTRFS_COMPARE_TREE_DELETED,
  6801. sctx);
  6802. advance_right = ADVANCE;
  6803. } else {
  6804. enum btrfs_compare_tree_result result;
  6805. WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
  6806. ret = tree_compare_item(left_path, right_path,
  6807. tmp_buf);
  6808. if (ret)
  6809. result = BTRFS_COMPARE_TREE_CHANGED;
  6810. else
  6811. result = BTRFS_COMPARE_TREE_SAME;
  6812. ret = changed_cb(left_path, right_path,
  6813. &left_key, result, sctx);
  6814. advance_left = ADVANCE;
  6815. advance_right = ADVANCE;
  6816. }
  6817. if (ret < 0)
  6818. goto out;
  6819. down_read(&fs_info->commit_root_sem);
  6820. } else if (left_level == right_level) {
  6821. cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
  6822. if (cmp < 0) {
  6823. advance_left = ADVANCE;
  6824. } else if (cmp > 0) {
  6825. advance_right = ADVANCE;
  6826. } else {
  6827. left_blockptr = btrfs_node_blockptr(
  6828. left_path->nodes[left_level],
  6829. left_path->slots[left_level]);
  6830. right_blockptr = btrfs_node_blockptr(
  6831. right_path->nodes[right_level],
  6832. right_path->slots[right_level]);
  6833. left_gen = btrfs_node_ptr_generation(
  6834. left_path->nodes[left_level],
  6835. left_path->slots[left_level]);
  6836. right_gen = btrfs_node_ptr_generation(
  6837. right_path->nodes[right_level],
  6838. right_path->slots[right_level]);
  6839. if (left_blockptr == right_blockptr &&
  6840. left_gen == right_gen) {
  6841. /*
  6842. * As we're on a shared block, don't
  6843. * allow to go deeper.
  6844. */
  6845. advance_left = ADVANCE_ONLY_NEXT;
  6846. advance_right = ADVANCE_ONLY_NEXT;
  6847. } else {
  6848. advance_left = ADVANCE;
  6849. advance_right = ADVANCE;
  6850. }
  6851. }
  6852. } else if (left_level < right_level) {
  6853. advance_right = ADVANCE;
  6854. } else {
  6855. advance_left = ADVANCE;
  6856. }
  6857. }
  6858. out_unlock:
  6859. up_read(&fs_info->commit_root_sem);
  6860. out:
  6861. kvfree(tmp_buf);
  6862. return ret;
  6863. }
  6864. static int send_subvol(struct send_ctx *sctx)
  6865. {
  6866. int ret;
  6867. if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
  6868. ret = send_header(sctx);
  6869. if (ret < 0)
  6870. goto out;
  6871. }
  6872. ret = send_subvol_begin(sctx);
  6873. if (ret < 0)
  6874. goto out;
  6875. if (sctx->parent_root) {
  6876. ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, sctx);
  6877. if (ret < 0)
  6878. goto out;
  6879. ret = finish_inode_if_needed(sctx, 1);
  6880. if (ret < 0)
  6881. goto out;
  6882. } else {
  6883. ret = full_send_tree(sctx);
  6884. if (ret < 0)
  6885. goto out;
  6886. }
  6887. out:
  6888. free_recorded_refs(sctx);
  6889. return ret;
  6890. }
  6891. /*
  6892. * If orphan cleanup did remove any orphans from a root, it means the tree
  6893. * was modified and therefore the commit root is not the same as the current
  6894. * root anymore. This is a problem, because send uses the commit root and
  6895. * therefore can see inode items that don't exist in the current root anymore,
  6896. * and for example make calls to btrfs_iget, which will do tree lookups based
  6897. * on the current root and not on the commit root. Those lookups will fail,
  6898. * returning a -ESTALE error, and making send fail with that error. So make
  6899. * sure a send does not see any orphans we have just removed, and that it will
  6900. * see the same inodes regardless of whether a transaction commit happened
  6901. * before it started (meaning that the commit root will be the same as the
  6902. * current root) or not.
  6903. */
  6904. static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
  6905. {
  6906. struct btrfs_root *root = sctx->parent_root;
  6907. if (root && root->node != root->commit_root)
  6908. return btrfs_commit_current_transaction(root);
  6909. for (int i = 0; i < sctx->clone_roots_cnt; i++) {
  6910. root = sctx->clone_roots[i].root;
  6911. if (root->node != root->commit_root)
  6912. return btrfs_commit_current_transaction(root);
  6913. }
  6914. return 0;
  6915. }
  6916. /*
  6917. * Make sure any existing delalloc is flushed for any root used by a send
  6918. * operation so that we do not miss any data and we do not race with writeback
  6919. * finishing and changing a tree while send is using the tree. This could
  6920. * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
  6921. * a send operation then uses the subvolume.
  6922. * After flushing delalloc ensure_commit_roots_uptodate() must be called.
  6923. */
  6924. static int flush_delalloc_roots(struct send_ctx *sctx)
  6925. {
  6926. struct btrfs_root *root = sctx->parent_root;
  6927. int ret;
  6928. int i;
  6929. if (root) {
  6930. ret = btrfs_start_delalloc_snapshot(root, false);
  6931. if (ret)
  6932. return ret;
  6933. btrfs_wait_ordered_extents(root, U64_MAX, NULL);
  6934. }
  6935. for (i = 0; i < sctx->clone_roots_cnt; i++) {
  6936. root = sctx->clone_roots[i].root;
  6937. ret = btrfs_start_delalloc_snapshot(root, false);
  6938. if (ret)
  6939. return ret;
  6940. btrfs_wait_ordered_extents(root, U64_MAX, NULL);
  6941. }
  6942. return 0;
  6943. }
  6944. static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
  6945. {
  6946. spin_lock(&root->root_item_lock);
  6947. root->send_in_progress--;
  6948. /*
  6949. * Not much left to do, we don't know why it's unbalanced and
  6950. * can't blindly reset it to 0.
  6951. */
  6952. if (root->send_in_progress < 0)
  6953. btrfs_err(root->fs_info,
  6954. "send_in_progress unbalanced %d root %llu",
  6955. root->send_in_progress, btrfs_root_id(root));
  6956. spin_unlock(&root->root_item_lock);
  6957. }
  6958. static void dedupe_in_progress_warn(const struct btrfs_root *root)
  6959. {
  6960. btrfs_warn_rl(root->fs_info,
  6961. "cannot use root %llu for send while deduplications on it are in progress (%d in progress)",
  6962. btrfs_root_id(root), root->dedupe_in_progress);
  6963. }
  6964. long btrfs_ioctl_send(struct btrfs_root *send_root, const struct btrfs_ioctl_send_args *arg)
  6965. {
  6966. int ret = 0;
  6967. struct btrfs_fs_info *fs_info = send_root->fs_info;
  6968. struct btrfs_root *clone_root;
  6969. struct send_ctx *sctx = NULL;
  6970. u32 i;
  6971. u64 *clone_sources_tmp = NULL;
  6972. int clone_sources_to_rollback = 0;
  6973. size_t alloc_size;
  6974. int sort_clone_roots = 0;
  6975. struct btrfs_lru_cache_entry *entry;
  6976. struct btrfs_lru_cache_entry *tmp;
  6977. if (!capable(CAP_SYS_ADMIN))
  6978. return -EPERM;
  6979. /*
  6980. * The subvolume must remain read-only during send, protect against
  6981. * making it RW. This also protects against deletion.
  6982. */
  6983. spin_lock(&send_root->root_item_lock);
  6984. /*
  6985. * Unlikely but possible, if the subvolume is marked for deletion but
  6986. * is slow to remove the directory entry, send can still be started.
  6987. */
  6988. if (btrfs_root_dead(send_root)) {
  6989. spin_unlock(&send_root->root_item_lock);
  6990. return -EPERM;
  6991. }
  6992. /* Userspace tools do the checks and warn the user if it's not RO. */
  6993. if (!btrfs_root_readonly(send_root)) {
  6994. spin_unlock(&send_root->root_item_lock);
  6995. return -EPERM;
  6996. }
  6997. if (send_root->dedupe_in_progress) {
  6998. dedupe_in_progress_warn(send_root);
  6999. spin_unlock(&send_root->root_item_lock);
  7000. return -EAGAIN;
  7001. }
  7002. send_root->send_in_progress++;
  7003. spin_unlock(&send_root->root_item_lock);
  7004. /*
  7005. * Check that we don't overflow at later allocations, we request
  7006. * clone_sources_count + 1 items, and compare to unsigned long inside
  7007. * access_ok. Also set an upper limit for allocation size so this can't
  7008. * easily exhaust memory. Max number of clone sources is about 200K.
  7009. */
  7010. if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) {
  7011. ret = -EINVAL;
  7012. goto out;
  7013. }
  7014. if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
  7015. ret = -EOPNOTSUPP;
  7016. goto out;
  7017. }
  7018. sctx = kzalloc_obj(struct send_ctx);
  7019. if (!sctx) {
  7020. ret = -ENOMEM;
  7021. goto out;
  7022. }
  7023. init_path(&sctx->cur_inode_path);
  7024. INIT_LIST_HEAD(&sctx->new_refs);
  7025. INIT_LIST_HEAD(&sctx->deleted_refs);
  7026. btrfs_lru_cache_init(&sctx->name_cache, SEND_MAX_NAME_CACHE_SIZE);
  7027. btrfs_lru_cache_init(&sctx->backref_cache, SEND_MAX_BACKREF_CACHE_SIZE);
  7028. btrfs_lru_cache_init(&sctx->dir_created_cache,
  7029. SEND_MAX_DIR_CREATED_CACHE_SIZE);
  7030. /*
  7031. * This cache is periodically trimmed to a fixed size elsewhere, see
  7032. * cache_dir_utimes() and trim_dir_utimes_cache().
  7033. */
  7034. btrfs_lru_cache_init(&sctx->dir_utimes_cache, 0);
  7035. sctx->pending_dir_moves = RB_ROOT;
  7036. sctx->waiting_dir_moves = RB_ROOT;
  7037. sctx->orphan_dirs = RB_ROOT;
  7038. sctx->rbtree_new_refs = RB_ROOT;
  7039. sctx->rbtree_deleted_refs = RB_ROOT;
  7040. sctx->flags = arg->flags;
  7041. if (arg->flags & BTRFS_SEND_FLAG_VERSION) {
  7042. if (arg->version > BTRFS_SEND_STREAM_VERSION) {
  7043. ret = -EPROTO;
  7044. goto out;
  7045. }
  7046. /* Zero means "use the highest version" */
  7047. sctx->proto = arg->version ?: BTRFS_SEND_STREAM_VERSION;
  7048. } else {
  7049. sctx->proto = 1;
  7050. }
  7051. if ((arg->flags & BTRFS_SEND_FLAG_COMPRESSED) && sctx->proto < 2) {
  7052. ret = -EINVAL;
  7053. goto out;
  7054. }
  7055. sctx->send_filp = fget(arg->send_fd);
  7056. if (!sctx->send_filp || !(sctx->send_filp->f_mode & FMODE_WRITE)) {
  7057. ret = -EBADF;
  7058. goto out;
  7059. }
  7060. sctx->send_root = send_root;
  7061. sctx->clone_roots_cnt = arg->clone_sources_count;
  7062. if (sctx->proto >= 2) {
  7063. u32 send_buf_num_pages;
  7064. sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V2;
  7065. sctx->send_buf = vmalloc(sctx->send_max_size);
  7066. if (!sctx->send_buf) {
  7067. ret = -ENOMEM;
  7068. goto out;
  7069. }
  7070. send_buf_num_pages = sctx->send_max_size >> PAGE_SHIFT;
  7071. sctx->send_buf_pages = kzalloc_objs(*sctx->send_buf_pages,
  7072. send_buf_num_pages);
  7073. if (!sctx->send_buf_pages) {
  7074. ret = -ENOMEM;
  7075. goto out;
  7076. }
  7077. for (i = 0; i < send_buf_num_pages; i++) {
  7078. sctx->send_buf_pages[i] =
  7079. vmalloc_to_page(sctx->send_buf + (i << PAGE_SHIFT));
  7080. }
  7081. } else {
  7082. sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V1;
  7083. sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
  7084. }
  7085. if (!sctx->send_buf) {
  7086. ret = -ENOMEM;
  7087. goto out;
  7088. }
  7089. sctx->clone_roots = kvzalloc_objs(*sctx->clone_roots,
  7090. arg->clone_sources_count + 1);
  7091. if (!sctx->clone_roots) {
  7092. ret = -ENOMEM;
  7093. goto out;
  7094. }
  7095. alloc_size = array_size(sizeof(*arg->clone_sources),
  7096. arg->clone_sources_count);
  7097. if (arg->clone_sources_count) {
  7098. clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL);
  7099. if (!clone_sources_tmp) {
  7100. ret = -ENOMEM;
  7101. goto out;
  7102. }
  7103. ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
  7104. alloc_size);
  7105. if (ret) {
  7106. ret = -EFAULT;
  7107. goto out;
  7108. }
  7109. for (i = 0; i < arg->clone_sources_count; i++) {
  7110. clone_root = btrfs_get_fs_root(fs_info,
  7111. clone_sources_tmp[i], true);
  7112. if (IS_ERR(clone_root)) {
  7113. ret = PTR_ERR(clone_root);
  7114. goto out;
  7115. }
  7116. spin_lock(&clone_root->root_item_lock);
  7117. if (!btrfs_root_readonly(clone_root) ||
  7118. btrfs_root_dead(clone_root)) {
  7119. spin_unlock(&clone_root->root_item_lock);
  7120. btrfs_put_root(clone_root);
  7121. ret = -EPERM;
  7122. goto out;
  7123. }
  7124. if (clone_root->dedupe_in_progress) {
  7125. dedupe_in_progress_warn(clone_root);
  7126. spin_unlock(&clone_root->root_item_lock);
  7127. btrfs_put_root(clone_root);
  7128. ret = -EAGAIN;
  7129. goto out;
  7130. }
  7131. clone_root->send_in_progress++;
  7132. spin_unlock(&clone_root->root_item_lock);
  7133. sctx->clone_roots[i].root = clone_root;
  7134. clone_sources_to_rollback = i + 1;
  7135. }
  7136. kvfree(clone_sources_tmp);
  7137. clone_sources_tmp = NULL;
  7138. }
  7139. if (arg->parent_root) {
  7140. sctx->parent_root = btrfs_get_fs_root(fs_info, arg->parent_root,
  7141. true);
  7142. if (IS_ERR(sctx->parent_root)) {
  7143. ret = PTR_ERR(sctx->parent_root);
  7144. goto out;
  7145. }
  7146. spin_lock(&sctx->parent_root->root_item_lock);
  7147. sctx->parent_root->send_in_progress++;
  7148. if (!btrfs_root_readonly(sctx->parent_root) ||
  7149. btrfs_root_dead(sctx->parent_root)) {
  7150. spin_unlock(&sctx->parent_root->root_item_lock);
  7151. ret = -EPERM;
  7152. goto out;
  7153. }
  7154. if (sctx->parent_root->dedupe_in_progress) {
  7155. dedupe_in_progress_warn(sctx->parent_root);
  7156. spin_unlock(&sctx->parent_root->root_item_lock);
  7157. ret = -EAGAIN;
  7158. goto out;
  7159. }
  7160. spin_unlock(&sctx->parent_root->root_item_lock);
  7161. }
  7162. /*
  7163. * Clones from send_root are allowed, but only if the clone source
  7164. * is behind the current send position. This is checked while searching
  7165. * for possible clone sources.
  7166. */
  7167. sctx->clone_roots[sctx->clone_roots_cnt++].root =
  7168. btrfs_grab_root(sctx->send_root);
  7169. /* We do a bsearch later */
  7170. sort(sctx->clone_roots, sctx->clone_roots_cnt,
  7171. sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
  7172. NULL);
  7173. sort_clone_roots = 1;
  7174. ret = flush_delalloc_roots(sctx);
  7175. if (ret)
  7176. goto out;
  7177. ret = ensure_commit_roots_uptodate(sctx);
  7178. if (ret)
  7179. goto out;
  7180. ret = send_subvol(sctx);
  7181. if (ret < 0)
  7182. goto out;
  7183. btrfs_lru_cache_for_each_entry_safe(&sctx->dir_utimes_cache, entry, tmp) {
  7184. ret = send_utimes(sctx, entry->key, entry->gen);
  7185. if (ret < 0)
  7186. goto out;
  7187. btrfs_lru_cache_remove(&sctx->dir_utimes_cache, entry);
  7188. }
  7189. if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
  7190. ret = begin_cmd(sctx, BTRFS_SEND_C_END);
  7191. if (ret < 0)
  7192. goto out;
  7193. ret = send_cmd(sctx);
  7194. if (ret < 0)
  7195. goto out;
  7196. }
  7197. out:
  7198. WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
  7199. while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
  7200. struct rb_node *n;
  7201. struct pending_dir_move *pm;
  7202. n = rb_first(&sctx->pending_dir_moves);
  7203. pm = rb_entry(n, struct pending_dir_move, node);
  7204. while (!list_empty(&pm->list)) {
  7205. struct pending_dir_move *pm2;
  7206. pm2 = list_first_entry(&pm->list,
  7207. struct pending_dir_move, list);
  7208. free_pending_move(sctx, pm2);
  7209. }
  7210. free_pending_move(sctx, pm);
  7211. }
  7212. WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
  7213. while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
  7214. struct rb_node *n;
  7215. struct waiting_dir_move *dm;
  7216. n = rb_first(&sctx->waiting_dir_moves);
  7217. dm = rb_entry(n, struct waiting_dir_move, node);
  7218. rb_erase(&dm->node, &sctx->waiting_dir_moves);
  7219. kfree(dm);
  7220. }
  7221. WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
  7222. while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
  7223. struct rb_node *n;
  7224. struct orphan_dir_info *odi;
  7225. n = rb_first(&sctx->orphan_dirs);
  7226. odi = rb_entry(n, struct orphan_dir_info, node);
  7227. free_orphan_dir_info(sctx, odi);
  7228. }
  7229. if (sort_clone_roots) {
  7230. for (i = 0; i < sctx->clone_roots_cnt; i++) {
  7231. btrfs_root_dec_send_in_progress(
  7232. sctx->clone_roots[i].root);
  7233. btrfs_put_root(sctx->clone_roots[i].root);
  7234. }
  7235. } else {
  7236. for (i = 0; sctx && i < clone_sources_to_rollback; i++) {
  7237. btrfs_root_dec_send_in_progress(
  7238. sctx->clone_roots[i].root);
  7239. btrfs_put_root(sctx->clone_roots[i].root);
  7240. }
  7241. btrfs_root_dec_send_in_progress(send_root);
  7242. }
  7243. if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) {
  7244. btrfs_root_dec_send_in_progress(sctx->parent_root);
  7245. btrfs_put_root(sctx->parent_root);
  7246. }
  7247. kvfree(clone_sources_tmp);
  7248. if (sctx) {
  7249. if (sctx->send_filp)
  7250. fput(sctx->send_filp);
  7251. kvfree(sctx->clone_roots);
  7252. kfree(sctx->send_buf_pages);
  7253. kvfree(sctx->send_buf);
  7254. kvfree(sctx->verity_descriptor);
  7255. close_current_inode(sctx);
  7256. btrfs_lru_cache_clear(&sctx->name_cache);
  7257. btrfs_lru_cache_clear(&sctx->backref_cache);
  7258. btrfs_lru_cache_clear(&sctx->dir_created_cache);
  7259. btrfs_lru_cache_clear(&sctx->dir_utimes_cache);
  7260. if (sctx->cur_inode_path.buf != sctx->cur_inode_path.inline_buf)
  7261. kfree(sctx->cur_inode_path.buf);
  7262. kfree(sctx);
  7263. }
  7264. return ret;
  7265. }