data.c 112 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * fs/f2fs/data.c
  4. *
  5. * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  6. * http://www.samsung.com/
  7. */
  8. #include <linux/fs.h>
  9. #include <linux/f2fs_fs.h>
  10. #include <linux/sched/mm.h>
  11. #include <linux/mpage.h>
  12. #include <linux/writeback.h>
  13. #include <linux/pagevec.h>
  14. #include <linux/blkdev.h>
  15. #include <linux/bio.h>
  16. #include <linux/blk-crypto.h>
  17. #include <linux/swap.h>
  18. #include <linux/prefetch.h>
  19. #include <linux/uio.h>
  20. #include <linux/sched/signal.h>
  21. #include <linux/fiemap.h>
  22. #include <linux/iomap.h>
  23. #include "f2fs.h"
  24. #include "node.h"
  25. #include "segment.h"
  26. #include "iostat.h"
  27. #include <trace/events/f2fs.h>
  28. #define NUM_PREALLOC_POST_READ_CTXS 128
  29. static struct kmem_cache *bio_post_read_ctx_cache;
  30. static struct kmem_cache *bio_entry_slab;
  31. static struct kmem_cache *ffs_entry_slab;
  32. static mempool_t *bio_post_read_ctx_pool;
  33. static struct bio_set f2fs_bioset;
  34. struct f2fs_folio_state {
  35. spinlock_t state_lock;
  36. unsigned int read_pages_pending;
  37. };
  38. #define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
  39. int __init f2fs_init_bioset(void)
  40. {
  41. return bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
  42. 0, BIOSET_NEED_BVECS);
  43. }
  44. void f2fs_destroy_bioset(void)
  45. {
  46. bioset_exit(&f2fs_bioset);
  47. }
  48. bool f2fs_is_cp_guaranteed(const struct folio *folio)
  49. {
  50. struct address_space *mapping = folio->mapping;
  51. struct inode *inode;
  52. struct f2fs_sb_info *sbi;
  53. if (fscrypt_is_bounce_folio(folio))
  54. return folio_test_f2fs_gcing(fscrypt_pagecache_folio(folio));
  55. inode = mapping->host;
  56. sbi = F2FS_I_SB(inode);
  57. if (inode->i_ino == F2FS_META_INO(sbi) ||
  58. inode->i_ino == F2FS_NODE_INO(sbi) ||
  59. S_ISDIR(inode->i_mode))
  60. return true;
  61. if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
  62. folio_test_f2fs_gcing(folio))
  63. return true;
  64. return false;
  65. }
  66. static enum count_type __read_io_type(struct folio *folio)
  67. {
  68. struct address_space *mapping = folio->mapping;
  69. if (mapping) {
  70. struct inode *inode = mapping->host;
  71. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  72. if (inode->i_ino == F2FS_META_INO(sbi))
  73. return F2FS_RD_META;
  74. if (inode->i_ino == F2FS_NODE_INO(sbi))
  75. return F2FS_RD_NODE;
  76. }
  77. return F2FS_RD_DATA;
  78. }
  79. /* postprocessing steps for read bios */
  80. enum bio_post_read_step {
  81. #ifdef CONFIG_FS_ENCRYPTION
  82. STEP_DECRYPT = BIT(0),
  83. #else
  84. STEP_DECRYPT = 0, /* compile out the decryption-related code */
  85. #endif
  86. #ifdef CONFIG_F2FS_FS_COMPRESSION
  87. STEP_DECOMPRESS = BIT(1),
  88. #else
  89. STEP_DECOMPRESS = 0, /* compile out the decompression-related code */
  90. #endif
  91. #ifdef CONFIG_FS_VERITY
  92. STEP_VERITY = BIT(2),
  93. #else
  94. STEP_VERITY = 0, /* compile out the verity-related code */
  95. #endif
  96. };
  97. struct bio_post_read_ctx {
  98. struct bio *bio;
  99. struct f2fs_sb_info *sbi;
  100. struct fsverity_info *vi;
  101. struct work_struct work;
  102. unsigned int enabled_steps;
  103. /*
  104. * decompression_attempted keeps track of whether
  105. * f2fs_end_read_compressed_page() has been called on the pages in the
  106. * bio that belong to a compressed cluster yet.
  107. */
  108. bool decompression_attempted;
  109. block_t fs_blkaddr;
  110. };
  111. /*
  112. * Update and unlock a bio's pages, and free the bio.
  113. *
  114. * This marks pages up-to-date only if there was no error in the bio (I/O error,
  115. * decryption error, or verity error), as indicated by bio->bi_status.
  116. *
  117. * "Compressed pages" (pagecache pages backed by a compressed cluster on-disk)
  118. * aren't marked up-to-date here, as decompression is done on a per-compression-
  119. * cluster basis rather than a per-bio basis. Instead, we only must do two
  120. * things for each compressed page here: call f2fs_end_read_compressed_page()
  121. * with failed=true if an error occurred before it would have normally gotten
  122. * called (i.e., I/O error or decryption error, but *not* verity error), and
  123. * release the bio's reference to the decompress_io_ctx of the page's cluster.
  124. */
  125. static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
  126. {
  127. struct folio_iter fi;
  128. struct bio_post_read_ctx *ctx = bio->bi_private;
  129. unsigned long flags;
  130. bio_for_each_folio_all(fi, bio) {
  131. struct folio *folio = fi.folio;
  132. unsigned nr_pages = fi.length >> PAGE_SHIFT;
  133. bool finished = true;
  134. if (!folio_test_large(folio) &&
  135. f2fs_is_compressed_page(folio)) {
  136. if (ctx && !ctx->decompression_attempted)
  137. f2fs_end_read_compressed_page(folio, true, 0,
  138. in_task);
  139. f2fs_put_folio_dic(folio, in_task);
  140. continue;
  141. }
  142. if (folio_test_large(folio)) {
  143. struct f2fs_folio_state *ffs = folio->private;
  144. spin_lock_irqsave(&ffs->state_lock, flags);
  145. ffs->read_pages_pending -= nr_pages;
  146. finished = !ffs->read_pages_pending;
  147. spin_unlock_irqrestore(&ffs->state_lock, flags);
  148. }
  149. while (nr_pages--)
  150. dec_page_count(F2FS_F_SB(folio), __read_io_type(folio));
  151. if (F2FS_F_SB(folio)->node_inode && is_node_folio(folio) &&
  152. f2fs_sanity_check_node_footer(F2FS_F_SB(folio),
  153. folio, folio->index, NODE_TYPE_REGULAR, true))
  154. bio->bi_status = BLK_STS_IOERR;
  155. if (finished)
  156. folio_end_read(folio, bio->bi_status == BLK_STS_OK);
  157. }
  158. if (ctx)
  159. mempool_free(ctx, bio_post_read_ctx_pool);
  160. bio_put(bio);
  161. }
  162. static void f2fs_verify_bio(struct work_struct *work)
  163. {
  164. struct bio_post_read_ctx *ctx =
  165. container_of(work, struct bio_post_read_ctx, work);
  166. struct bio *bio = ctx->bio;
  167. bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
  168. struct fsverity_info *vi = ctx->vi;
  169. /*
  170. * fsverity_verify_bio() may call readahead() again, and while verity
  171. * will be disabled for this, decryption and/or decompression may still
  172. * be needed, resulting in another bio_post_read_ctx being allocated.
  173. * So to prevent deadlocks we need to release the current ctx to the
  174. * mempool first. This assumes that verity is the last post-read step.
  175. */
  176. mempool_free(ctx, bio_post_read_ctx_pool);
  177. bio->bi_private = NULL;
  178. /*
  179. * Verify the bio's pages with fs-verity. Exclude compressed pages,
  180. * as those were handled separately by f2fs_end_read_compressed_page().
  181. */
  182. if (may_have_compressed_pages) {
  183. struct folio_iter fi;
  184. bio_for_each_folio_all(fi, bio) {
  185. struct folio *folio = fi.folio;
  186. if (!f2fs_is_compressed_page(folio) &&
  187. !fsverity_verify_folio(vi, folio)) {
  188. bio->bi_status = BLK_STS_IOERR;
  189. break;
  190. }
  191. }
  192. } else {
  193. fsverity_verify_bio(vi, bio);
  194. }
  195. f2fs_finish_read_bio(bio, true);
  196. }
  197. /*
  198. * If the bio's data needs to be verified with fs-verity, then enqueue the
  199. * verity work for the bio. Otherwise finish the bio now.
  200. *
  201. * Note that to avoid deadlocks, the verity work can't be done on the
  202. * decryption/decompression workqueue. This is because verifying the data pages
  203. * can involve reading verity metadata pages from the file, and these verity
  204. * metadata pages may be encrypted and/or compressed.
  205. */
  206. static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
  207. {
  208. struct bio_post_read_ctx *ctx = bio->bi_private;
  209. if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
  210. INIT_WORK(&ctx->work, f2fs_verify_bio);
  211. fsverity_enqueue_verify_work(&ctx->work);
  212. } else {
  213. f2fs_finish_read_bio(bio, in_task);
  214. }
  215. }
  216. /*
  217. * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
  218. * remaining page was read by @ctx->bio.
  219. *
  220. * Note that a bio may span clusters (even a mix of compressed and uncompressed
  221. * clusters) or be for just part of a cluster. STEP_DECOMPRESS just indicates
  222. * that the bio includes at least one compressed page. The actual decompression
  223. * is done on a per-cluster basis, not a per-bio basis.
  224. */
  225. static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
  226. bool in_task)
  227. {
  228. struct folio_iter fi;
  229. bool all_compressed = true;
  230. block_t blkaddr = ctx->fs_blkaddr;
  231. bio_for_each_folio_all(fi, ctx->bio) {
  232. struct folio *folio = fi.folio;
  233. if (f2fs_is_compressed_page(folio))
  234. f2fs_end_read_compressed_page(folio, false, blkaddr,
  235. in_task);
  236. else
  237. all_compressed = false;
  238. blkaddr++;
  239. }
  240. ctx->decompression_attempted = true;
  241. /*
  242. * Optimization: if all the bio's pages are compressed, then scheduling
  243. * the per-bio verity work is unnecessary, as verity will be fully
  244. * handled at the compression cluster level.
  245. */
  246. if (all_compressed)
  247. ctx->enabled_steps &= ~STEP_VERITY;
  248. }
  249. static void f2fs_post_read_work(struct work_struct *work)
  250. {
  251. struct bio_post_read_ctx *ctx =
  252. container_of(work, struct bio_post_read_ctx, work);
  253. struct bio *bio = ctx->bio;
  254. if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) {
  255. f2fs_finish_read_bio(bio, true);
  256. return;
  257. }
  258. if (ctx->enabled_steps & STEP_DECOMPRESS)
  259. f2fs_handle_step_decompress(ctx, true);
  260. f2fs_verify_and_finish_bio(bio, true);
  261. }
  262. static void f2fs_read_end_io(struct bio *bio)
  263. {
  264. struct f2fs_sb_info *sbi = F2FS_F_SB(bio_first_folio_all(bio));
  265. struct bio_post_read_ctx *ctx;
  266. bool intask = in_task() && !irqs_disabled();
  267. iostat_update_and_unbind_ctx(bio);
  268. ctx = bio->bi_private;
  269. if (time_to_inject(sbi, FAULT_READ_IO))
  270. bio->bi_status = BLK_STS_IOERR;
  271. if (bio->bi_status != BLK_STS_OK) {
  272. f2fs_finish_read_bio(bio, intask);
  273. return;
  274. }
  275. if (ctx) {
  276. unsigned int enabled_steps = ctx->enabled_steps &
  277. (STEP_DECRYPT | STEP_DECOMPRESS);
  278. /*
  279. * If we have only decompression step between decompression and
  280. * decrypt, we don't need post processing for this.
  281. */
  282. if (enabled_steps == STEP_DECOMPRESS &&
  283. !f2fs_low_mem_mode(sbi)) {
  284. f2fs_handle_step_decompress(ctx, intask);
  285. } else if (enabled_steps) {
  286. INIT_WORK(&ctx->work, f2fs_post_read_work);
  287. queue_work(ctx->sbi->post_read_wq, &ctx->work);
  288. return;
  289. }
  290. }
  291. f2fs_verify_and_finish_bio(bio, intask);
  292. }
  293. static void f2fs_write_end_io(struct bio *bio)
  294. {
  295. struct f2fs_sb_info *sbi;
  296. struct folio_iter fi;
  297. iostat_update_and_unbind_ctx(bio);
  298. sbi = bio->bi_private;
  299. if (time_to_inject(sbi, FAULT_WRITE_IO))
  300. bio->bi_status = BLK_STS_IOERR;
  301. bio_for_each_folio_all(fi, bio) {
  302. struct folio *folio = fi.folio;
  303. enum count_type type;
  304. if (fscrypt_is_bounce_folio(folio)) {
  305. struct folio *io_folio = folio;
  306. folio = fscrypt_pagecache_folio(io_folio);
  307. fscrypt_free_bounce_page(&io_folio->page);
  308. }
  309. #ifdef CONFIG_F2FS_FS_COMPRESSION
  310. if (f2fs_is_compressed_page(folio)) {
  311. f2fs_compress_write_end_io(bio, folio);
  312. continue;
  313. }
  314. #endif
  315. type = WB_DATA_TYPE(folio, false);
  316. if (unlikely(bio->bi_status != BLK_STS_OK)) {
  317. mapping_set_error(folio->mapping, -EIO);
  318. if (type == F2FS_WB_CP_DATA)
  319. f2fs_stop_checkpoint(sbi, true,
  320. STOP_CP_REASON_WRITE_FAIL);
  321. }
  322. if (is_node_folio(folio)) {
  323. f2fs_sanity_check_node_footer(sbi, folio,
  324. folio->index, NODE_TYPE_REGULAR, true);
  325. f2fs_bug_on(sbi, folio->index != nid_of_node(folio));
  326. }
  327. dec_page_count(sbi, type);
  328. /*
  329. * we should access sbi before folio_end_writeback() to
  330. * avoid racing w/ kill_f2fs_super()
  331. */
  332. if (type == F2FS_WB_CP_DATA && !get_pages(sbi, type) &&
  333. wq_has_sleeper(&sbi->cp_wait))
  334. wake_up(&sbi->cp_wait);
  335. if (f2fs_in_warm_node_list(sbi, folio))
  336. f2fs_del_fsync_node_entry(sbi, folio);
  337. folio_clear_f2fs_gcing(folio);
  338. folio_end_writeback(folio);
  339. }
  340. bio_put(bio);
  341. }
  342. #ifdef CONFIG_BLK_DEV_ZONED
  343. static void f2fs_zone_write_end_io(struct bio *bio)
  344. {
  345. struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
  346. bio->bi_private = io->bi_private;
  347. complete(&io->zone_wait);
  348. f2fs_write_end_io(bio);
  349. }
  350. #endif
  351. struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
  352. block_t blk_addr, sector_t *sector)
  353. {
  354. struct block_device *bdev = sbi->sb->s_bdev;
  355. int i;
  356. if (f2fs_is_multi_device(sbi)) {
  357. for (i = 0; i < sbi->s_ndevs; i++) {
  358. if (FDEV(i).start_blk <= blk_addr &&
  359. FDEV(i).end_blk >= blk_addr) {
  360. blk_addr -= FDEV(i).start_blk;
  361. bdev = FDEV(i).bdev;
  362. break;
  363. }
  364. }
  365. }
  366. if (sector)
  367. *sector = SECTOR_FROM_BLOCK(blk_addr);
  368. return bdev;
  369. }
  370. int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
  371. {
  372. int i;
  373. if (!f2fs_is_multi_device(sbi))
  374. return 0;
  375. for (i = 0; i < sbi->s_ndevs; i++)
  376. if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
  377. return i;
  378. return 0;
  379. }
  380. static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
  381. {
  382. unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0);
  383. unsigned int fua_flag, meta_flag, io_flag;
  384. blk_opf_t op_flags = 0;
  385. if (fio->op != REQ_OP_WRITE)
  386. return 0;
  387. if (fio->type == DATA)
  388. io_flag = fio->sbi->data_io_flag;
  389. else if (fio->type == NODE)
  390. io_flag = fio->sbi->node_io_flag;
  391. else
  392. return 0;
  393. fua_flag = io_flag & temp_mask;
  394. meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
  395. /*
  396. * data/node io flag bits per temp:
  397. * REQ_META | REQ_FUA |
  398. * 5 | 4 | 3 | 2 | 1 | 0 |
  399. * Cold | Warm | Hot | Cold | Warm | Hot |
  400. */
  401. if (BIT(fio->temp) & meta_flag)
  402. op_flags |= REQ_META;
  403. if (BIT(fio->temp) & fua_flag)
  404. op_flags |= REQ_FUA;
  405. if (fio->type == DATA &&
  406. F2FS_I(fio->folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE)
  407. op_flags |= REQ_PRIO;
  408. return op_flags;
  409. }
  410. static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
  411. {
  412. struct f2fs_sb_info *sbi = fio->sbi;
  413. struct block_device *bdev;
  414. sector_t sector;
  415. struct bio *bio;
  416. bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
  417. bio = bio_alloc_bioset(bdev, npages,
  418. fio->op | fio->op_flags | f2fs_io_flags(fio),
  419. GFP_NOIO, &f2fs_bioset);
  420. bio->bi_iter.bi_sector = sector;
  421. if (is_read_io(fio->op)) {
  422. bio->bi_end_io = f2fs_read_end_io;
  423. bio->bi_private = NULL;
  424. } else {
  425. bio->bi_end_io = f2fs_write_end_io;
  426. bio->bi_private = sbi;
  427. bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
  428. fio->type, fio->temp);
  429. }
  430. iostat_alloc_and_bind_ctx(sbi, bio, NULL);
  431. if (fio->io_wbc)
  432. wbc_init_bio(fio->io_wbc, bio);
  433. return bio;
  434. }
  435. static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
  436. pgoff_t first_idx,
  437. const struct f2fs_io_info *fio,
  438. gfp_t gfp_mask)
  439. {
  440. /*
  441. * The f2fs garbage collector sets ->encrypted_page when it wants to
  442. * read/write raw data without encryption.
  443. */
  444. if (!fio || !fio->encrypted_page)
  445. fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
  446. }
  447. static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
  448. pgoff_t next_idx,
  449. const struct f2fs_io_info *fio)
  450. {
  451. /*
  452. * The f2fs garbage collector sets ->encrypted_page when it wants to
  453. * read/write raw data without encryption.
  454. */
  455. if (fio && fio->encrypted_page)
  456. return !bio_has_crypt_ctx(bio);
  457. return fscrypt_mergeable_bio(bio, inode, next_idx);
  458. }
  459. void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
  460. enum page_type type)
  461. {
  462. if (!bio)
  463. return;
  464. WARN_ON_ONCE(!is_read_io(bio_op(bio)));
  465. trace_f2fs_submit_read_bio(sbi->sb, type, bio);
  466. iostat_update_submit_ctx(bio, type);
  467. blk_crypto_submit_bio(bio);
  468. }
  469. static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio,
  470. enum page_type type)
  471. {
  472. WARN_ON_ONCE(is_read_io(bio_op(bio)));
  473. trace_f2fs_submit_write_bio(sbi->sb, type, bio);
  474. iostat_update_submit_ctx(bio, type);
  475. blk_crypto_submit_bio(bio);
  476. }
  477. static void __submit_merged_bio(struct f2fs_bio_info *io)
  478. {
  479. struct f2fs_io_info *fio = &io->fio;
  480. if (!io->bio)
  481. return;
  482. if (is_read_io(fio->op)) {
  483. trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
  484. f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
  485. } else {
  486. trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
  487. f2fs_submit_write_bio(io->sbi, io->bio, fio->type);
  488. }
  489. io->bio = NULL;
  490. }
  491. static bool __has_merged_page(struct bio *bio, struct inode *inode,
  492. struct folio *folio, nid_t ino)
  493. {
  494. struct folio_iter fi;
  495. if (!bio)
  496. return false;
  497. if (!inode && !folio && !ino)
  498. return true;
  499. bio_for_each_folio_all(fi, bio) {
  500. struct folio *target = fi.folio;
  501. if (fscrypt_is_bounce_folio(target)) {
  502. target = fscrypt_pagecache_folio(target);
  503. if (IS_ERR(target))
  504. continue;
  505. }
  506. if (f2fs_is_compressed_page(target)) {
  507. target = f2fs_compress_control_folio(target);
  508. if (IS_ERR(target))
  509. continue;
  510. }
  511. if (inode && inode == target->mapping->host)
  512. return true;
  513. if (folio && folio == target)
  514. return true;
  515. if (ino && ino == ino_of_node(target))
  516. return true;
  517. }
  518. return false;
  519. }
  520. int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
  521. {
  522. int i;
  523. for (i = 0; i < NR_PAGE_TYPE; i++) {
  524. int n = (i == META) ? 1 : NR_TEMP_TYPE;
  525. int j;
  526. sbi->write_io[i] = f2fs_kmalloc(sbi,
  527. array_size(n, sizeof(struct f2fs_bio_info)),
  528. GFP_KERNEL);
  529. if (!sbi->write_io[i])
  530. return -ENOMEM;
  531. for (j = HOT; j < n; j++) {
  532. struct f2fs_bio_info *io = &sbi->write_io[i][j];
  533. init_f2fs_rwsem_trace(&io->io_rwsem, sbi,
  534. LOCK_NAME_IO_RWSEM);
  535. io->sbi = sbi;
  536. io->bio = NULL;
  537. io->last_block_in_bio = 0;
  538. spin_lock_init(&io->io_lock);
  539. INIT_LIST_HEAD(&io->io_list);
  540. INIT_LIST_HEAD(&io->bio_list);
  541. init_f2fs_rwsem(&io->bio_list_lock);
  542. #ifdef CONFIG_BLK_DEV_ZONED
  543. init_completion(&io->zone_wait);
  544. io->zone_pending_bio = NULL;
  545. io->bi_private = NULL;
  546. #endif
  547. }
  548. }
  549. return 0;
  550. }
  551. static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
  552. enum page_type type, enum temp_type temp)
  553. {
  554. enum page_type btype = PAGE_TYPE_OF_BIO(type);
  555. struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
  556. struct f2fs_lock_context lc;
  557. f2fs_down_write_trace(&io->io_rwsem, &lc);
  558. if (!io->bio)
  559. goto unlock_out;
  560. /* change META to META_FLUSH in the checkpoint procedure */
  561. if (type >= META_FLUSH) {
  562. io->fio.type = META_FLUSH;
  563. io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC;
  564. if (!test_opt(sbi, NOBARRIER))
  565. io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
  566. }
  567. __submit_merged_bio(io);
  568. unlock_out:
  569. f2fs_up_write_trace(&io->io_rwsem, &lc);
  570. }
  571. static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
  572. struct inode *inode, struct folio *folio,
  573. nid_t ino, enum page_type type, bool writeback)
  574. {
  575. enum temp_type temp;
  576. bool ret = true;
  577. bool force = !inode && !folio && !ino;
  578. for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
  579. if (!force) {
  580. enum page_type btype = PAGE_TYPE_OF_BIO(type);
  581. struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
  582. struct f2fs_lock_context lc;
  583. f2fs_down_read_trace(&io->io_rwsem, &lc);
  584. ret = __has_merged_page(io->bio, inode, folio, ino);
  585. f2fs_up_read_trace(&io->io_rwsem, &lc);
  586. }
  587. if (ret) {
  588. __f2fs_submit_merged_write(sbi, type, temp);
  589. /*
  590. * For waitting writebck case, if the bio owned by the
  591. * folio is already submitted, we do not need to submit
  592. * other types of bios.
  593. */
  594. if (writeback)
  595. break;
  596. }
  597. /* TODO: use HOT temp only for meta pages now. */
  598. if (type >= META)
  599. break;
  600. }
  601. }
  602. void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
  603. {
  604. __submit_merged_write_cond(sbi, NULL, NULL, 0, type, false);
  605. }
  606. void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
  607. struct inode *inode, struct folio *folio,
  608. nid_t ino, enum page_type type)
  609. {
  610. __submit_merged_write_cond(sbi, inode, folio, ino, type, false);
  611. }
  612. void f2fs_submit_merged_write_folio(struct f2fs_sb_info *sbi,
  613. struct folio *folio, enum page_type type)
  614. {
  615. __submit_merged_write_cond(sbi, NULL, folio, 0, type, true);
  616. }
  617. void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
  618. {
  619. f2fs_submit_merged_write(sbi, DATA);
  620. f2fs_submit_merged_write(sbi, NODE);
  621. f2fs_submit_merged_write(sbi, META);
  622. }
  623. /*
  624. * Fill the locked page with data located in the block address.
  625. * A caller needs to unlock the page on failure.
  626. */
  627. int f2fs_submit_page_bio(struct f2fs_io_info *fio)
  628. {
  629. struct bio *bio;
  630. struct folio *fio_folio = fio->folio;
  631. struct folio *data_folio = fio->encrypted_page ?
  632. page_folio(fio->encrypted_page) : fio_folio;
  633. if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
  634. fio->is_por ? META_POR : (__is_meta_io(fio) ?
  635. META_GENERIC : DATA_GENERIC_ENHANCE)))
  636. return -EFSCORRUPTED;
  637. trace_f2fs_submit_folio_bio(data_folio, fio);
  638. /* Allocate a new bio */
  639. bio = __bio_alloc(fio, 1);
  640. f2fs_set_bio_crypt_ctx(bio, fio_folio->mapping->host,
  641. fio_folio->index, fio, GFP_NOIO);
  642. bio_add_folio_nofail(bio, data_folio, folio_size(data_folio), 0);
  643. if (fio->io_wbc && !is_read_io(fio->op))
  644. wbc_account_cgroup_owner(fio->io_wbc, fio_folio, PAGE_SIZE);
  645. inc_page_count(fio->sbi, is_read_io(fio->op) ?
  646. __read_io_type(data_folio) : WB_DATA_TYPE(fio->folio, false));
  647. if (is_read_io(bio_op(bio)))
  648. f2fs_submit_read_bio(fio->sbi, bio, fio->type);
  649. else
  650. f2fs_submit_write_bio(fio->sbi, bio, fio->type);
  651. return 0;
  652. }
  653. static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
  654. block_t last_blkaddr, block_t cur_blkaddr)
  655. {
  656. if (unlikely(sbi->max_io_bytes &&
  657. bio->bi_iter.bi_size >= sbi->max_io_bytes))
  658. return false;
  659. if (last_blkaddr + 1 != cur_blkaddr)
  660. return false;
  661. return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL);
  662. }
  663. static bool io_type_is_mergeable(struct f2fs_bio_info *io,
  664. struct f2fs_io_info *fio)
  665. {
  666. blk_opf_t mask = ~(REQ_PREFLUSH | REQ_FUA);
  667. if (io->fio.op != fio->op)
  668. return false;
  669. return (io->fio.op_flags & mask) == (fio->op_flags & mask);
  670. }
  671. static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
  672. struct f2fs_bio_info *io,
  673. struct f2fs_io_info *fio,
  674. block_t last_blkaddr,
  675. block_t cur_blkaddr)
  676. {
  677. if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
  678. return false;
  679. return io_type_is_mergeable(io, fio);
  680. }
  681. static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
  682. struct folio *folio, enum temp_type temp)
  683. {
  684. struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
  685. struct bio_entry *be;
  686. be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
  687. be->bio = bio;
  688. bio_get(bio);
  689. bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
  690. f2fs_down_write(&io->bio_list_lock);
  691. list_add_tail(&be->list, &io->bio_list);
  692. f2fs_up_write(&io->bio_list_lock);
  693. }
  694. static void del_bio_entry(struct bio_entry *be)
  695. {
  696. list_del(&be->list);
  697. kmem_cache_free(bio_entry_slab, be);
  698. }
  699. static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
  700. struct folio *folio)
  701. {
  702. struct folio *fio_folio = fio->folio;
  703. struct f2fs_sb_info *sbi = fio->sbi;
  704. enum temp_type temp;
  705. bool found = false;
  706. int ret = -EAGAIN;
  707. for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
  708. struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
  709. struct list_head *head = &io->bio_list;
  710. struct bio_entry *be;
  711. f2fs_down_write(&io->bio_list_lock);
  712. list_for_each_entry(be, head, list) {
  713. if (be->bio != *bio)
  714. continue;
  715. found = true;
  716. f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
  717. *fio->last_block,
  718. fio->new_blkaddr));
  719. if (f2fs_crypt_mergeable_bio(*bio,
  720. fio_folio->mapping->host,
  721. fio_folio->index, fio) &&
  722. bio_add_folio(*bio, folio, folio_size(folio), 0)) {
  723. ret = 0;
  724. break;
  725. }
  726. /* page can't be merged into bio; submit the bio */
  727. del_bio_entry(be);
  728. f2fs_submit_write_bio(sbi, *bio, DATA);
  729. break;
  730. }
  731. f2fs_up_write(&io->bio_list_lock);
  732. }
  733. if (ret) {
  734. bio_put(*bio);
  735. *bio = NULL;
  736. }
  737. return ret;
  738. }
  739. void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
  740. struct bio **bio, struct folio *folio)
  741. {
  742. enum temp_type temp;
  743. bool found = false;
  744. struct bio *target = bio ? *bio : NULL;
  745. f2fs_bug_on(sbi, !target && !folio);
  746. for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
  747. struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
  748. struct list_head *head = &io->bio_list;
  749. struct bio_entry *be;
  750. if (list_empty(head))
  751. continue;
  752. f2fs_down_read(&io->bio_list_lock);
  753. list_for_each_entry(be, head, list) {
  754. if (target)
  755. found = (target == be->bio);
  756. else
  757. found = __has_merged_page(be->bio, NULL,
  758. folio, 0);
  759. if (found)
  760. break;
  761. }
  762. f2fs_up_read(&io->bio_list_lock);
  763. if (!found)
  764. continue;
  765. found = false;
  766. f2fs_down_write(&io->bio_list_lock);
  767. list_for_each_entry(be, head, list) {
  768. if (target)
  769. found = (target == be->bio);
  770. else
  771. found = __has_merged_page(be->bio, NULL,
  772. folio, 0);
  773. if (found) {
  774. target = be->bio;
  775. del_bio_entry(be);
  776. break;
  777. }
  778. }
  779. f2fs_up_write(&io->bio_list_lock);
  780. }
  781. if (found)
  782. f2fs_submit_write_bio(sbi, target, DATA);
  783. if (bio && *bio) {
  784. bio_put(*bio);
  785. *bio = NULL;
  786. }
  787. }
  788. int f2fs_merge_page_bio(struct f2fs_io_info *fio)
  789. {
  790. struct bio *bio = *fio->bio;
  791. struct folio *data_folio = fio->encrypted_page ?
  792. page_folio(fio->encrypted_page) : fio->folio;
  793. struct folio *folio = fio->folio;
  794. if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
  795. __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
  796. return -EFSCORRUPTED;
  797. trace_f2fs_submit_folio_bio(data_folio, fio);
  798. if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
  799. fio->new_blkaddr))
  800. f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
  801. alloc_new:
  802. if (!bio) {
  803. bio = __bio_alloc(fio, BIO_MAX_VECS);
  804. f2fs_set_bio_crypt_ctx(bio, folio->mapping->host,
  805. folio->index, fio, GFP_NOIO);
  806. add_bio_entry(fio->sbi, bio, data_folio, fio->temp);
  807. } else {
  808. if (add_ipu_page(fio, &bio, data_folio))
  809. goto alloc_new;
  810. }
  811. if (fio->io_wbc)
  812. wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio));
  813. inc_page_count(fio->sbi, WB_DATA_TYPE(folio, false));
  814. *fio->last_block = fio->new_blkaddr;
  815. *fio->bio = bio;
  816. return 0;
  817. }
  818. #ifdef CONFIG_BLK_DEV_ZONED
  819. static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
  820. {
  821. struct block_device *bdev = sbi->sb->s_bdev;
  822. int devi = 0;
  823. if (f2fs_is_multi_device(sbi)) {
  824. devi = f2fs_target_device_index(sbi, blkaddr);
  825. if (blkaddr < FDEV(devi).start_blk ||
  826. blkaddr > FDEV(devi).end_blk) {
  827. f2fs_err(sbi, "Invalid block %x", blkaddr);
  828. return false;
  829. }
  830. blkaddr -= FDEV(devi).start_blk;
  831. bdev = FDEV(devi).bdev;
  832. }
  833. return bdev_is_zoned(bdev) &&
  834. f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
  835. (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
  836. }
  837. #endif
  838. void f2fs_submit_page_write(struct f2fs_io_info *fio)
  839. {
  840. struct f2fs_sb_info *sbi = fio->sbi;
  841. enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
  842. struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
  843. struct folio *bio_folio;
  844. struct f2fs_lock_context lc;
  845. enum count_type type;
  846. f2fs_bug_on(sbi, is_read_io(fio->op));
  847. f2fs_down_write_trace(&io->io_rwsem, &lc);
  848. next:
  849. #ifdef CONFIG_BLK_DEV_ZONED
  850. if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
  851. wait_for_completion_io(&io->zone_wait);
  852. bio_put(io->zone_pending_bio);
  853. io->zone_pending_bio = NULL;
  854. io->bi_private = NULL;
  855. }
  856. #endif
  857. if (fio->in_list) {
  858. spin_lock(&io->io_lock);
  859. if (list_empty(&io->io_list)) {
  860. spin_unlock(&io->io_lock);
  861. goto out;
  862. }
  863. fio = list_first_entry(&io->io_list,
  864. struct f2fs_io_info, list);
  865. list_del(&fio->list);
  866. spin_unlock(&io->io_lock);
  867. }
  868. verify_fio_blkaddr(fio);
  869. if (fio->encrypted_page)
  870. bio_folio = page_folio(fio->encrypted_page);
  871. else if (fio->compressed_page)
  872. bio_folio = page_folio(fio->compressed_page);
  873. else
  874. bio_folio = fio->folio;
  875. /* set submitted = true as a return value */
  876. fio->submitted = 1;
  877. type = WB_DATA_TYPE(bio_folio, fio->compressed_page);
  878. inc_page_count(sbi, type);
  879. if (io->bio &&
  880. (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
  881. fio->new_blkaddr) ||
  882. !f2fs_crypt_mergeable_bio(io->bio, fio_inode(fio),
  883. bio_folio->index, fio)))
  884. __submit_merged_bio(io);
  885. alloc_new:
  886. if (io->bio == NULL) {
  887. io->bio = __bio_alloc(fio, BIO_MAX_VECS);
  888. f2fs_set_bio_crypt_ctx(io->bio, fio_inode(fio),
  889. bio_folio->index, fio, GFP_NOIO);
  890. io->fio = *fio;
  891. }
  892. if (!bio_add_folio(io->bio, bio_folio, folio_size(bio_folio), 0)) {
  893. __submit_merged_bio(io);
  894. goto alloc_new;
  895. }
  896. if (fio->io_wbc)
  897. wbc_account_cgroup_owner(fio->io_wbc, fio->folio,
  898. folio_size(fio->folio));
  899. io->last_block_in_bio = fio->new_blkaddr;
  900. trace_f2fs_submit_folio_write(fio->folio, fio);
  901. #ifdef CONFIG_BLK_DEV_ZONED
  902. if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
  903. is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
  904. bio_get(io->bio);
  905. reinit_completion(&io->zone_wait);
  906. io->bi_private = io->bio->bi_private;
  907. io->bio->bi_private = io;
  908. io->bio->bi_end_io = f2fs_zone_write_end_io;
  909. io->zone_pending_bio = io->bio;
  910. __submit_merged_bio(io);
  911. }
  912. #endif
  913. if (fio->in_list)
  914. goto next;
  915. out:
  916. if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
  917. !f2fs_is_checkpoint_ready(sbi))
  918. __submit_merged_bio(io);
  919. f2fs_up_write_trace(&io->io_rwsem, &lc);
  920. }
  921. static struct bio *f2fs_grab_read_bio(struct inode *inode,
  922. struct fsverity_info *vi, block_t blkaddr,
  923. unsigned nr_pages, blk_opf_t op_flag,
  924. pgoff_t first_idx, bool for_write)
  925. {
  926. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  927. struct bio *bio;
  928. struct bio_post_read_ctx *ctx = NULL;
  929. unsigned int post_read_steps = 0;
  930. sector_t sector;
  931. struct block_device *bdev = f2fs_target_device(sbi, blkaddr, &sector);
  932. bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages),
  933. REQ_OP_READ | op_flag,
  934. for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
  935. bio->bi_iter.bi_sector = sector;
  936. f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
  937. bio->bi_end_io = f2fs_read_end_io;
  938. if (fscrypt_inode_uses_fs_layer_crypto(inode))
  939. post_read_steps |= STEP_DECRYPT;
  940. if (vi)
  941. post_read_steps |= STEP_VERITY;
  942. /*
  943. * STEP_DECOMPRESS is handled specially, since a compressed file might
  944. * contain both compressed and uncompressed clusters. We'll allocate a
  945. * bio_post_read_ctx if the file is compressed, but the caller is
  946. * responsible for enabling STEP_DECOMPRESS if it's actually needed.
  947. */
  948. if (post_read_steps || f2fs_compressed_file(inode)) {
  949. /* Due to the mempool, this never fails. */
  950. ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
  951. ctx->bio = bio;
  952. ctx->sbi = sbi;
  953. ctx->vi = vi;
  954. ctx->enabled_steps = post_read_steps;
  955. ctx->fs_blkaddr = blkaddr;
  956. ctx->decompression_attempted = false;
  957. bio->bi_private = ctx;
  958. }
  959. iostat_alloc_and_bind_ctx(sbi, bio, ctx);
  960. return bio;
  961. }
  962. /* This can handle encryption stuffs */
  963. static void f2fs_submit_page_read(struct inode *inode, struct fsverity_info *vi,
  964. struct folio *folio, block_t blkaddr,
  965. blk_opf_t op_flags, bool for_write)
  966. {
  967. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  968. struct bio *bio;
  969. bio = f2fs_grab_read_bio(inode, vi, blkaddr, 1, op_flags, folio->index,
  970. for_write);
  971. /* wait for GCed page writeback via META_MAPPING */
  972. f2fs_wait_on_block_writeback(inode, blkaddr);
  973. if (!bio_add_folio(bio, folio, PAGE_SIZE, 0))
  974. f2fs_bug_on(sbi, 1);
  975. inc_page_count(sbi, F2FS_RD_DATA);
  976. f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE);
  977. f2fs_submit_read_bio(sbi, bio, DATA);
  978. }
  979. static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
  980. {
  981. __le32 *addr = get_dnode_addr(dn->inode, dn->node_folio);
  982. dn->data_blkaddr = blkaddr;
  983. addr[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
  984. }
  985. /*
  986. * Lock ordering for the change of data block address:
  987. * ->data_page
  988. * ->node_folio
  989. * update block addresses in the node page
  990. */
  991. void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
  992. {
  993. f2fs_folio_wait_writeback(dn->node_folio, NODE, true, true);
  994. __set_data_blkaddr(dn, blkaddr);
  995. if (folio_mark_dirty(dn->node_folio))
  996. dn->node_changed = true;
  997. }
  998. void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
  999. {
  1000. f2fs_set_data_blkaddr(dn, blkaddr);
  1001. f2fs_update_read_extent_cache(dn);
  1002. }
  1003. /* dn->ofs_in_node will be returned with up-to-date last block pointer */
  1004. int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
  1005. {
  1006. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  1007. int err;
  1008. if (!count)
  1009. return 0;
  1010. if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
  1011. return -EPERM;
  1012. err = inc_valid_block_count(sbi, dn->inode, &count, true);
  1013. if (unlikely(err))
  1014. return err;
  1015. trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
  1016. dn->ofs_in_node, count);
  1017. f2fs_folio_wait_writeback(dn->node_folio, NODE, true, true);
  1018. for (; count > 0; dn->ofs_in_node++) {
  1019. block_t blkaddr = f2fs_data_blkaddr(dn);
  1020. if (blkaddr == NULL_ADDR) {
  1021. __set_data_blkaddr(dn, NEW_ADDR);
  1022. count--;
  1023. }
  1024. }
  1025. if (folio_mark_dirty(dn->node_folio))
  1026. dn->node_changed = true;
  1027. return 0;
  1028. }
  1029. /* Should keep dn->ofs_in_node unchanged */
  1030. int f2fs_reserve_new_block(struct dnode_of_data *dn)
  1031. {
  1032. unsigned int ofs_in_node = dn->ofs_in_node;
  1033. int ret;
  1034. ret = f2fs_reserve_new_blocks(dn, 1);
  1035. dn->ofs_in_node = ofs_in_node;
  1036. return ret;
  1037. }
  1038. int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
  1039. {
  1040. bool need_put = dn->inode_folio ? false : true;
  1041. int err;
  1042. err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
  1043. if (err)
  1044. return err;
  1045. if (dn->data_blkaddr == NULL_ADDR)
  1046. err = f2fs_reserve_new_block(dn);
  1047. if (err || need_put)
  1048. f2fs_put_dnode(dn);
  1049. return err;
  1050. }
  1051. static inline struct fsverity_info *f2fs_need_verity(const struct inode *inode,
  1052. pgoff_t idx)
  1053. {
  1054. if (idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE))
  1055. return fsverity_get_info(inode);
  1056. return NULL;
  1057. }
  1058. struct folio *f2fs_get_read_data_folio(struct inode *inode, pgoff_t index,
  1059. blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs)
  1060. {
  1061. struct address_space *mapping = inode->i_mapping;
  1062. struct dnode_of_data dn;
  1063. struct folio *folio;
  1064. int err;
  1065. retry:
  1066. folio = f2fs_grab_cache_folio(mapping, index, for_write);
  1067. if (IS_ERR(folio))
  1068. return folio;
  1069. if (folio_test_large(folio)) {
  1070. pgoff_t folio_index = mapping_align_index(mapping, index);
  1071. f2fs_folio_put(folio, true);
  1072. invalidate_inode_pages2_range(mapping, folio_index,
  1073. folio_index + folio_nr_pages(folio) - 1);
  1074. f2fs_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT);
  1075. goto retry;
  1076. }
  1077. if (f2fs_lookup_read_extent_cache_block(inode, index,
  1078. &dn.data_blkaddr)) {
  1079. if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
  1080. DATA_GENERIC_ENHANCE_READ)) {
  1081. err = -EFSCORRUPTED;
  1082. goto put_err;
  1083. }
  1084. goto got_it;
  1085. }
  1086. set_new_dnode(&dn, inode, NULL, NULL, 0);
  1087. err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
  1088. if (err) {
  1089. if (err == -ENOENT && next_pgofs)
  1090. *next_pgofs = f2fs_get_next_page_offset(&dn, index);
  1091. goto put_err;
  1092. }
  1093. f2fs_put_dnode(&dn);
  1094. if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
  1095. err = -ENOENT;
  1096. if (next_pgofs)
  1097. *next_pgofs = index + 1;
  1098. goto put_err;
  1099. }
  1100. if (dn.data_blkaddr != NEW_ADDR &&
  1101. !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
  1102. dn.data_blkaddr,
  1103. DATA_GENERIC_ENHANCE)) {
  1104. err = -EFSCORRUPTED;
  1105. goto put_err;
  1106. }
  1107. got_it:
  1108. if (folio_test_uptodate(folio)) {
  1109. folio_unlock(folio);
  1110. return folio;
  1111. }
  1112. /*
  1113. * A new dentry page is allocated but not able to be written, since its
  1114. * new inode page couldn't be allocated due to -ENOSPC.
  1115. * In such the case, its blkaddr can be remained as NEW_ADDR.
  1116. * see, f2fs_add_link -> f2fs_get_new_data_folio ->
  1117. * f2fs_init_inode_metadata.
  1118. */
  1119. if (dn.data_blkaddr == NEW_ADDR) {
  1120. folio_zero_segment(folio, 0, folio_size(folio));
  1121. if (!folio_test_uptodate(folio))
  1122. folio_mark_uptodate(folio);
  1123. folio_unlock(folio);
  1124. return folio;
  1125. }
  1126. f2fs_submit_page_read(inode, f2fs_need_verity(inode, folio->index),
  1127. folio, dn.data_blkaddr, op_flags, for_write);
  1128. return folio;
  1129. put_err:
  1130. f2fs_folio_put(folio, true);
  1131. return ERR_PTR(err);
  1132. }
  1133. struct folio *f2fs_find_data_folio(struct inode *inode, pgoff_t index,
  1134. pgoff_t *next_pgofs)
  1135. {
  1136. struct address_space *mapping = inode->i_mapping;
  1137. struct folio *folio;
  1138. folio = f2fs_filemap_get_folio(mapping, index, FGP_ACCESSED, 0);
  1139. if (IS_ERR(folio))
  1140. goto read;
  1141. if (folio_test_uptodate(folio))
  1142. return folio;
  1143. f2fs_folio_put(folio, false);
  1144. read:
  1145. folio = f2fs_get_read_data_folio(inode, index, 0, false, next_pgofs);
  1146. if (IS_ERR(folio))
  1147. return folio;
  1148. if (folio_test_uptodate(folio))
  1149. return folio;
  1150. folio_wait_locked(folio);
  1151. if (unlikely(!folio_test_uptodate(folio))) {
  1152. f2fs_folio_put(folio, false);
  1153. return ERR_PTR(-EIO);
  1154. }
  1155. return folio;
  1156. }
  1157. /*
  1158. * If it tries to access a hole, return an error.
  1159. * Because, the callers, functions in dir.c and GC, should be able to know
  1160. * whether this page exists or not.
  1161. */
  1162. struct folio *f2fs_get_lock_data_folio(struct inode *inode, pgoff_t index,
  1163. bool for_write)
  1164. {
  1165. struct address_space *mapping = inode->i_mapping;
  1166. struct folio *folio;
  1167. folio = f2fs_get_read_data_folio(inode, index, 0, for_write, NULL);
  1168. if (IS_ERR(folio))
  1169. return folio;
  1170. /* wait for read completion */
  1171. folio_lock(folio);
  1172. if (unlikely(folio->mapping != mapping || !folio_test_uptodate(folio))) {
  1173. f2fs_folio_put(folio, true);
  1174. return ERR_PTR(-EIO);
  1175. }
  1176. return folio;
  1177. }
  1178. /*
  1179. * Caller ensures that this data page is never allocated.
  1180. * A new zero-filled data page is allocated in the page cache.
  1181. *
  1182. * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
  1183. * f2fs_unlock_op().
  1184. * Note that, ifolio is set only by make_empty_dir, and if any error occur,
  1185. * ifolio should be released by this function.
  1186. */
  1187. struct folio *f2fs_get_new_data_folio(struct inode *inode,
  1188. struct folio *ifolio, pgoff_t index, bool new_i_size)
  1189. {
  1190. struct address_space *mapping = inode->i_mapping;
  1191. struct folio *folio;
  1192. struct dnode_of_data dn;
  1193. int err;
  1194. folio = f2fs_grab_cache_folio(mapping, index, true);
  1195. if (IS_ERR(folio)) {
  1196. /*
  1197. * before exiting, we should make sure ifolio will be released
  1198. * if any error occur.
  1199. */
  1200. f2fs_folio_put(ifolio, true);
  1201. return ERR_PTR(-ENOMEM);
  1202. }
  1203. set_new_dnode(&dn, inode, ifolio, NULL, 0);
  1204. err = f2fs_reserve_block(&dn, index);
  1205. if (err) {
  1206. f2fs_folio_put(folio, true);
  1207. return ERR_PTR(err);
  1208. }
  1209. if (!ifolio)
  1210. f2fs_put_dnode(&dn);
  1211. if (folio_test_uptodate(folio))
  1212. goto got_it;
  1213. if (dn.data_blkaddr == NEW_ADDR) {
  1214. folio_zero_segment(folio, 0, folio_size(folio));
  1215. if (!folio_test_uptodate(folio))
  1216. folio_mark_uptodate(folio);
  1217. } else {
  1218. f2fs_folio_put(folio, true);
  1219. /* if ifolio exists, blkaddr should be NEW_ADDR */
  1220. f2fs_bug_on(F2FS_I_SB(inode), ifolio);
  1221. folio = f2fs_get_lock_data_folio(inode, index, true);
  1222. if (IS_ERR(folio))
  1223. return folio;
  1224. }
  1225. got_it:
  1226. if (new_i_size && i_size_read(inode) <
  1227. ((loff_t)(index + 1) << PAGE_SHIFT))
  1228. f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
  1229. return folio;
  1230. }
  1231. static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
  1232. {
  1233. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  1234. struct f2fs_summary sum;
  1235. struct node_info ni;
  1236. block_t old_blkaddr;
  1237. blkcnt_t count = 1;
  1238. int err;
  1239. if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
  1240. return -EPERM;
  1241. err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
  1242. if (err)
  1243. return err;
  1244. dn->data_blkaddr = f2fs_data_blkaddr(dn);
  1245. if (dn->data_blkaddr == NULL_ADDR) {
  1246. err = inc_valid_block_count(sbi, dn->inode, &count, true);
  1247. if (unlikely(err))
  1248. return err;
  1249. }
  1250. set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
  1251. old_blkaddr = dn->data_blkaddr;
  1252. err = f2fs_allocate_data_block(sbi, NULL, old_blkaddr,
  1253. &dn->data_blkaddr, &sum, seg_type, NULL);
  1254. if (err)
  1255. return err;
  1256. if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
  1257. f2fs_invalidate_internal_cache(sbi, old_blkaddr, 1);
  1258. f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
  1259. return 0;
  1260. }
  1261. static void f2fs_map_lock(struct f2fs_sb_info *sbi,
  1262. struct f2fs_lock_context *lc,
  1263. int flag)
  1264. {
  1265. if (flag == F2FS_GET_BLOCK_PRE_AIO)
  1266. f2fs_down_read_trace(&sbi->node_change, lc);
  1267. else
  1268. f2fs_lock_op(sbi, lc);
  1269. }
  1270. static void f2fs_map_unlock(struct f2fs_sb_info *sbi,
  1271. struct f2fs_lock_context *lc,
  1272. int flag)
  1273. {
  1274. if (flag == F2FS_GET_BLOCK_PRE_AIO)
  1275. f2fs_up_read_trace(&sbi->node_change, lc);
  1276. else
  1277. f2fs_unlock_op(sbi, lc);
  1278. }
  1279. int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index)
  1280. {
  1281. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  1282. struct f2fs_lock_context lc;
  1283. int err = 0;
  1284. f2fs_map_lock(sbi, &lc, F2FS_GET_BLOCK_PRE_AIO);
  1285. if (!f2fs_lookup_read_extent_cache_block(dn->inode, index,
  1286. &dn->data_blkaddr))
  1287. err = f2fs_reserve_block(dn, index);
  1288. f2fs_map_unlock(sbi, &lc, F2FS_GET_BLOCK_PRE_AIO);
  1289. return err;
  1290. }
  1291. static int f2fs_map_no_dnode(struct inode *inode,
  1292. struct f2fs_map_blocks *map, struct dnode_of_data *dn,
  1293. pgoff_t pgoff)
  1294. {
  1295. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1296. /*
  1297. * There is one exceptional case that read_node_page() may return
  1298. * -ENOENT due to filesystem has been shutdown or cp_error, return
  1299. * -EIO in that case.
  1300. */
  1301. if (map->m_may_create &&
  1302. (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || f2fs_cp_error(sbi)))
  1303. return -EIO;
  1304. if (map->m_next_pgofs)
  1305. *map->m_next_pgofs = f2fs_get_next_page_offset(dn, pgoff);
  1306. if (map->m_next_extent)
  1307. *map->m_next_extent = f2fs_get_next_page_offset(dn, pgoff);
  1308. return 0;
  1309. }
  1310. static bool f2fs_map_blocks_cached(struct inode *inode,
  1311. struct f2fs_map_blocks *map, int flag)
  1312. {
  1313. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1314. unsigned int maxblocks = map->m_len;
  1315. pgoff_t pgoff = (pgoff_t)map->m_lblk;
  1316. struct extent_info ei = {};
  1317. if (!f2fs_lookup_read_extent_cache(inode, pgoff, &ei))
  1318. return false;
  1319. map->m_pblk = ei.blk + pgoff - ei.fofs;
  1320. map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgoff);
  1321. map->m_flags = F2FS_MAP_MAPPED;
  1322. if (map->m_next_extent)
  1323. *map->m_next_extent = pgoff + map->m_len;
  1324. /* for hardware encryption, but to avoid potential issue in future */
  1325. if (flag == F2FS_GET_BLOCK_DIO)
  1326. f2fs_wait_on_block_writeback_range(inode,
  1327. map->m_pblk, map->m_len);
  1328. if (f2fs_allow_multi_device_dio(sbi, flag)) {
  1329. int bidx = f2fs_target_device_index(sbi, map->m_pblk);
  1330. struct f2fs_dev_info *dev = &sbi->devs[bidx];
  1331. map->m_bdev = dev->bdev;
  1332. map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk);
  1333. map->m_pblk -= dev->start_blk;
  1334. } else {
  1335. map->m_bdev = inode->i_sb->s_bdev;
  1336. }
  1337. return true;
  1338. }
  1339. static bool map_is_mergeable(struct f2fs_sb_info *sbi,
  1340. struct f2fs_map_blocks *map,
  1341. block_t blkaddr, int flag, int bidx,
  1342. int ofs)
  1343. {
  1344. if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
  1345. return false;
  1346. if (map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs))
  1347. return true;
  1348. if (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR)
  1349. return true;
  1350. if (flag == F2FS_GET_BLOCK_PRE_DIO)
  1351. return true;
  1352. if (flag == F2FS_GET_BLOCK_DIO &&
  1353. map->m_pblk == NULL_ADDR && blkaddr == NULL_ADDR)
  1354. return true;
  1355. return false;
  1356. }
  1357. /*
  1358. * f2fs_map_blocks() tries to find or build mapping relationship which
  1359. * maps continuous logical blocks to physical blocks, and return such
  1360. * info via f2fs_map_blocks structure.
  1361. */
  1362. int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
  1363. {
  1364. unsigned int maxblocks = map->m_len;
  1365. struct dnode_of_data dn;
  1366. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1367. struct f2fs_lock_context lc;
  1368. int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
  1369. pgoff_t pgofs, end_offset, end;
  1370. int err = 0, ofs = 1;
  1371. unsigned int ofs_in_node, last_ofs_in_node;
  1372. blkcnt_t prealloc;
  1373. block_t blkaddr;
  1374. unsigned int start_pgofs;
  1375. int bidx = 0;
  1376. bool is_hole;
  1377. bool lfs_dio_write;
  1378. if (!maxblocks)
  1379. return 0;
  1380. lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
  1381. map->m_may_create);
  1382. if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
  1383. goto out;
  1384. map->m_bdev = inode->i_sb->s_bdev;
  1385. map->m_multidev_dio =
  1386. f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
  1387. map->m_len = 0;
  1388. map->m_flags = 0;
  1389. /* it only supports block size == page size */
  1390. pgofs = (pgoff_t)map->m_lblk;
  1391. end = pgofs + maxblocks;
  1392. if (flag == F2FS_GET_BLOCK_PRECACHE)
  1393. mode = LOOKUP_NODE_RA;
  1394. next_dnode:
  1395. if (map->m_may_create) {
  1396. if (f2fs_lfs_mode(sbi))
  1397. f2fs_balance_fs(sbi, true);
  1398. f2fs_map_lock(sbi, &lc, flag);
  1399. }
  1400. /* When reading holes, we need its node page */
  1401. set_new_dnode(&dn, inode, NULL, NULL, 0);
  1402. err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
  1403. if (err) {
  1404. if (flag == F2FS_GET_BLOCK_BMAP)
  1405. map->m_pblk = 0;
  1406. if (err == -ENOENT)
  1407. err = f2fs_map_no_dnode(inode, map, &dn, pgofs);
  1408. goto unlock_out;
  1409. }
  1410. start_pgofs = pgofs;
  1411. prealloc = 0;
  1412. last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
  1413. end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
  1414. next_block:
  1415. blkaddr = f2fs_data_blkaddr(&dn);
  1416. is_hole = !__is_valid_data_blkaddr(blkaddr);
  1417. if (!is_hole &&
  1418. !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
  1419. err = -EFSCORRUPTED;
  1420. goto sync_out;
  1421. }
  1422. /* use out-place-update for direct IO under LFS mode */
  1423. if (map->m_may_create && (is_hole ||
  1424. (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
  1425. !f2fs_is_pinned_file(inode) && map->m_last_pblk != blkaddr))) {
  1426. if (unlikely(f2fs_cp_error(sbi))) {
  1427. err = -EIO;
  1428. goto sync_out;
  1429. }
  1430. switch (flag) {
  1431. case F2FS_GET_BLOCK_PRE_AIO:
  1432. if (blkaddr == NULL_ADDR) {
  1433. prealloc++;
  1434. last_ofs_in_node = dn.ofs_in_node;
  1435. }
  1436. break;
  1437. case F2FS_GET_BLOCK_PRE_DIO:
  1438. case F2FS_GET_BLOCK_DIO:
  1439. err = __allocate_data_block(&dn, map->m_seg_type);
  1440. if (err)
  1441. goto sync_out;
  1442. if (flag == F2FS_GET_BLOCK_PRE_DIO)
  1443. file_need_truncate(inode);
  1444. set_inode_flag(inode, FI_APPEND_WRITE);
  1445. break;
  1446. default:
  1447. WARN_ON_ONCE(1);
  1448. err = -EIO;
  1449. goto sync_out;
  1450. }
  1451. blkaddr = dn.data_blkaddr;
  1452. if (is_hole)
  1453. map->m_flags |= F2FS_MAP_NEW;
  1454. } else if (is_hole) {
  1455. if (f2fs_compressed_file(inode) &&
  1456. f2fs_sanity_check_cluster(&dn)) {
  1457. err = -EFSCORRUPTED;
  1458. f2fs_handle_error(sbi,
  1459. ERROR_CORRUPTED_CLUSTER);
  1460. goto sync_out;
  1461. }
  1462. switch (flag) {
  1463. case F2FS_GET_BLOCK_PRECACHE:
  1464. goto sync_out;
  1465. case F2FS_GET_BLOCK_BMAP:
  1466. map->m_pblk = 0;
  1467. goto sync_out;
  1468. case F2FS_GET_BLOCK_FIEMAP:
  1469. if (blkaddr == NULL_ADDR) {
  1470. if (map->m_next_pgofs)
  1471. *map->m_next_pgofs = pgofs + 1;
  1472. goto sync_out;
  1473. }
  1474. break;
  1475. case F2FS_GET_BLOCK_DIO:
  1476. if (map->m_next_pgofs)
  1477. *map->m_next_pgofs = pgofs + 1;
  1478. break;
  1479. default:
  1480. /* for defragment case */
  1481. if (map->m_next_pgofs)
  1482. *map->m_next_pgofs = pgofs + 1;
  1483. goto sync_out;
  1484. }
  1485. }
  1486. if (flag == F2FS_GET_BLOCK_PRE_AIO)
  1487. goto skip;
  1488. if (map->m_multidev_dio)
  1489. bidx = f2fs_target_device_index(sbi, blkaddr);
  1490. if (map->m_len == 0) {
  1491. /* reserved delalloc block should be mapped for fiemap. */
  1492. if (blkaddr == NEW_ADDR)
  1493. map->m_flags |= F2FS_MAP_DELALLOC;
  1494. /* DIO READ and hole case, should not map the blocks. */
  1495. if (!(flag == F2FS_GET_BLOCK_DIO && is_hole && !map->m_may_create))
  1496. map->m_flags |= F2FS_MAP_MAPPED;
  1497. map->m_pblk = blkaddr;
  1498. map->m_len = 1;
  1499. if (map->m_multidev_dio)
  1500. map->m_bdev = FDEV(bidx).bdev;
  1501. if (lfs_dio_write)
  1502. map->m_last_pblk = NULL_ADDR;
  1503. } else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) {
  1504. ofs++;
  1505. map->m_len++;
  1506. } else {
  1507. if (lfs_dio_write && !f2fs_is_pinned_file(inode))
  1508. map->m_last_pblk = blkaddr;
  1509. goto sync_out;
  1510. }
  1511. skip:
  1512. dn.ofs_in_node++;
  1513. pgofs++;
  1514. /* preallocate blocks in batch for one dnode page */
  1515. if (flag == F2FS_GET_BLOCK_PRE_AIO &&
  1516. (pgofs == end || dn.ofs_in_node == end_offset)) {
  1517. dn.ofs_in_node = ofs_in_node;
  1518. err = f2fs_reserve_new_blocks(&dn, prealloc);
  1519. if (err)
  1520. goto sync_out;
  1521. map->m_len += dn.ofs_in_node - ofs_in_node;
  1522. if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
  1523. err = -ENOSPC;
  1524. goto sync_out;
  1525. }
  1526. dn.ofs_in_node = end_offset;
  1527. }
  1528. if (pgofs >= end)
  1529. goto sync_out;
  1530. else if (dn.ofs_in_node < end_offset)
  1531. goto next_block;
  1532. if (flag == F2FS_GET_BLOCK_PRECACHE) {
  1533. if (map->m_flags & F2FS_MAP_MAPPED) {
  1534. unsigned int ofs = start_pgofs - map->m_lblk;
  1535. f2fs_update_read_extent_cache_range(&dn,
  1536. start_pgofs, map->m_pblk + ofs,
  1537. map->m_len - ofs);
  1538. }
  1539. }
  1540. f2fs_put_dnode(&dn);
  1541. if (map->m_may_create) {
  1542. f2fs_map_unlock(sbi, &lc, flag);
  1543. f2fs_balance_fs(sbi, dn.node_changed);
  1544. }
  1545. goto next_dnode;
  1546. sync_out:
  1547. if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
  1548. /*
  1549. * for hardware encryption, but to avoid potential issue
  1550. * in future
  1551. */
  1552. f2fs_wait_on_block_writeback_range(inode,
  1553. map->m_pblk, map->m_len);
  1554. if (map->m_multidev_dio) {
  1555. block_t blk_addr = map->m_pblk;
  1556. bidx = f2fs_target_device_index(sbi, map->m_pblk);
  1557. map->m_bdev = FDEV(bidx).bdev;
  1558. map->m_pblk -= FDEV(bidx).start_blk;
  1559. if (map->m_may_create)
  1560. f2fs_update_device_state(sbi, inode->i_ino,
  1561. blk_addr, map->m_len);
  1562. f2fs_bug_on(sbi, blk_addr + map->m_len >
  1563. FDEV(bidx).end_blk + 1);
  1564. }
  1565. }
  1566. if (flag == F2FS_GET_BLOCK_PRECACHE) {
  1567. if (map->m_flags & F2FS_MAP_MAPPED) {
  1568. unsigned int ofs = start_pgofs - map->m_lblk;
  1569. if (map->m_len > ofs)
  1570. f2fs_update_read_extent_cache_range(&dn,
  1571. start_pgofs, map->m_pblk + ofs,
  1572. map->m_len - ofs);
  1573. }
  1574. if (map->m_next_extent)
  1575. *map->m_next_extent = is_hole ? pgofs + 1 : pgofs;
  1576. }
  1577. f2fs_put_dnode(&dn);
  1578. unlock_out:
  1579. if (map->m_may_create) {
  1580. f2fs_map_unlock(sbi, &lc, flag);
  1581. f2fs_balance_fs(sbi, dn.node_changed);
  1582. }
  1583. out:
  1584. trace_f2fs_map_blocks(inode, map, flag, err);
  1585. return err;
  1586. }
  1587. static bool __f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len,
  1588. bool check_first)
  1589. {
  1590. struct f2fs_map_blocks map;
  1591. block_t last_lblk;
  1592. int err;
  1593. if (pos + len > i_size_read(inode))
  1594. return false;
  1595. map.m_lblk = F2FS_BYTES_TO_BLK(pos);
  1596. map.m_next_pgofs = NULL;
  1597. map.m_next_extent = NULL;
  1598. map.m_seg_type = NO_CHECK_TYPE;
  1599. map.m_may_create = false;
  1600. last_lblk = F2FS_BLK_ALIGN(pos + len);
  1601. while (map.m_lblk < last_lblk) {
  1602. map.m_len = last_lblk - map.m_lblk;
  1603. err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
  1604. if (err || map.m_len == 0)
  1605. return false;
  1606. map.m_lblk += map.m_len;
  1607. if (check_first)
  1608. break;
  1609. }
  1610. return true;
  1611. }
  1612. bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
  1613. {
  1614. return __f2fs_overwrite_io(inode, pos, len, false);
  1615. }
  1616. static int f2fs_xattr_fiemap(struct inode *inode,
  1617. struct fiemap_extent_info *fieinfo)
  1618. {
  1619. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1620. struct node_info ni;
  1621. __u64 phys = 0, len;
  1622. __u32 flags;
  1623. nid_t xnid = F2FS_I(inode)->i_xattr_nid;
  1624. int err = 0;
  1625. if (f2fs_has_inline_xattr(inode)) {
  1626. int offset;
  1627. struct folio *folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi),
  1628. inode->i_ino, false);
  1629. if (IS_ERR(folio))
  1630. return PTR_ERR(folio);
  1631. err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
  1632. if (err) {
  1633. f2fs_folio_put(folio, true);
  1634. return err;
  1635. }
  1636. phys = F2FS_BLK_TO_BYTES(ni.blk_addr);
  1637. offset = offsetof(struct f2fs_inode, i_addr) +
  1638. sizeof(__le32) * (DEF_ADDRS_PER_INODE -
  1639. get_inline_xattr_addrs(inode));
  1640. phys += offset;
  1641. len = inline_xattr_size(inode);
  1642. f2fs_folio_put(folio, true);
  1643. flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
  1644. if (!xnid)
  1645. flags |= FIEMAP_EXTENT_LAST;
  1646. err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
  1647. trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
  1648. if (err)
  1649. return err;
  1650. }
  1651. if (xnid) {
  1652. struct folio *folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi),
  1653. xnid, false);
  1654. if (IS_ERR(folio))
  1655. return PTR_ERR(folio);
  1656. err = f2fs_get_node_info(sbi, xnid, &ni, false);
  1657. if (err) {
  1658. f2fs_folio_put(folio, true);
  1659. return err;
  1660. }
  1661. phys = F2FS_BLK_TO_BYTES(ni.blk_addr);
  1662. len = inode->i_sb->s_blocksize;
  1663. f2fs_folio_put(folio, true);
  1664. flags = FIEMAP_EXTENT_LAST;
  1665. }
  1666. if (phys) {
  1667. err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
  1668. trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
  1669. }
  1670. return (err < 0 ? err : 0);
  1671. }
  1672. int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  1673. u64 start, u64 len)
  1674. {
  1675. struct f2fs_map_blocks map;
  1676. sector_t start_blk, last_blk, blk_len, max_len;
  1677. pgoff_t next_pgofs;
  1678. u64 logical = 0, phys = 0, size = 0;
  1679. u32 flags = 0;
  1680. int ret = 0;
  1681. bool compr_cluster = false, compr_appended;
  1682. unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
  1683. unsigned int count_in_cluster = 0;
  1684. loff_t maxbytes;
  1685. if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
  1686. ret = f2fs_precache_extents(inode);
  1687. if (ret)
  1688. return ret;
  1689. }
  1690. ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
  1691. if (ret)
  1692. return ret;
  1693. inode_lock_shared(inode);
  1694. maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
  1695. if (start > maxbytes) {
  1696. ret = -EFBIG;
  1697. goto out;
  1698. }
  1699. if (len > maxbytes || (maxbytes - len) < start)
  1700. len = maxbytes - start;
  1701. if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
  1702. ret = f2fs_xattr_fiemap(inode, fieinfo);
  1703. goto out;
  1704. }
  1705. if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
  1706. ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
  1707. if (ret != -EAGAIN)
  1708. goto out;
  1709. }
  1710. start_blk = F2FS_BYTES_TO_BLK(start);
  1711. last_blk = F2FS_BYTES_TO_BLK(start + len - 1);
  1712. blk_len = last_blk - start_blk + 1;
  1713. max_len = F2FS_BYTES_TO_BLK(maxbytes) - start_blk;
  1714. next:
  1715. memset(&map, 0, sizeof(map));
  1716. map.m_lblk = start_blk;
  1717. map.m_len = blk_len;
  1718. map.m_next_pgofs = &next_pgofs;
  1719. map.m_seg_type = NO_CHECK_TYPE;
  1720. if (compr_cluster) {
  1721. map.m_lblk += 1;
  1722. map.m_len = cluster_size - count_in_cluster;
  1723. }
  1724. ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
  1725. if (ret)
  1726. goto out;
  1727. /* HOLE */
  1728. if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
  1729. start_blk = next_pgofs;
  1730. if (F2FS_BLK_TO_BYTES(start_blk) < maxbytes)
  1731. goto prep_next;
  1732. flags |= FIEMAP_EXTENT_LAST;
  1733. }
  1734. /*
  1735. * current extent may cross boundary of inquiry, increase len to
  1736. * requery.
  1737. */
  1738. if (!compr_cluster && (map.m_flags & F2FS_MAP_MAPPED) &&
  1739. map.m_lblk + map.m_len - 1 == last_blk &&
  1740. blk_len != max_len) {
  1741. blk_len = max_len;
  1742. goto next;
  1743. }
  1744. compr_appended = false;
  1745. /* In a case of compressed cluster, append this to the last extent */
  1746. if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) ||
  1747. !(map.m_flags & F2FS_MAP_FLAGS))) {
  1748. compr_appended = true;
  1749. goto skip_fill;
  1750. }
  1751. if (size) {
  1752. flags |= FIEMAP_EXTENT_MERGED;
  1753. if (IS_ENCRYPTED(inode))
  1754. flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
  1755. ret = fiemap_fill_next_extent(fieinfo, logical,
  1756. phys, size, flags);
  1757. trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
  1758. if (ret)
  1759. goto out;
  1760. size = 0;
  1761. }
  1762. if (start_blk > last_blk)
  1763. goto out;
  1764. skip_fill:
  1765. if (map.m_pblk == COMPRESS_ADDR) {
  1766. compr_cluster = true;
  1767. count_in_cluster = 1;
  1768. } else if (compr_appended) {
  1769. unsigned int appended_blks = cluster_size -
  1770. count_in_cluster + 1;
  1771. size += F2FS_BLK_TO_BYTES(appended_blks);
  1772. start_blk += appended_blks;
  1773. compr_cluster = false;
  1774. } else {
  1775. logical = F2FS_BLK_TO_BYTES(start_blk);
  1776. phys = __is_valid_data_blkaddr(map.m_pblk) ?
  1777. F2FS_BLK_TO_BYTES(map.m_pblk) : 0;
  1778. size = F2FS_BLK_TO_BYTES(map.m_len);
  1779. flags = 0;
  1780. if (compr_cluster) {
  1781. flags = FIEMAP_EXTENT_ENCODED;
  1782. count_in_cluster += map.m_len;
  1783. if (count_in_cluster == cluster_size) {
  1784. compr_cluster = false;
  1785. size += F2FS_BLKSIZE;
  1786. }
  1787. } else if (map.m_flags & F2FS_MAP_DELALLOC) {
  1788. flags = FIEMAP_EXTENT_UNWRITTEN;
  1789. }
  1790. start_blk += F2FS_BYTES_TO_BLK(size);
  1791. }
  1792. prep_next:
  1793. cond_resched();
  1794. if (fatal_signal_pending(current))
  1795. ret = -EINTR;
  1796. else
  1797. goto next;
  1798. out:
  1799. if (ret == 1)
  1800. ret = 0;
  1801. inode_unlock_shared(inode);
  1802. return ret;
  1803. }
  1804. static inline loff_t f2fs_readpage_limit(struct inode *inode)
  1805. {
  1806. if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
  1807. return F2FS_BLK_TO_BYTES(max_file_blocks(inode));
  1808. return i_size_read(inode);
  1809. }
  1810. static inline blk_opf_t f2fs_ra_op_flags(struct readahead_control *rac)
  1811. {
  1812. return rac ? REQ_RAHEAD : 0;
  1813. }
  1814. static int f2fs_read_single_page(struct inode *inode, struct fsverity_info *vi,
  1815. struct folio *folio, unsigned int nr_pages,
  1816. struct f2fs_map_blocks *map,
  1817. struct bio **bio_ret,
  1818. sector_t *last_block_in_bio,
  1819. struct readahead_control *rac)
  1820. {
  1821. struct bio *bio = *bio_ret;
  1822. const unsigned int blocksize = F2FS_BLKSIZE;
  1823. sector_t block_in_file;
  1824. sector_t last_block;
  1825. sector_t last_block_in_file;
  1826. sector_t block_nr;
  1827. pgoff_t index = folio->index;
  1828. int ret = 0;
  1829. block_in_file = (sector_t)index;
  1830. last_block = block_in_file + nr_pages;
  1831. last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) +
  1832. blocksize - 1);
  1833. if (last_block > last_block_in_file)
  1834. last_block = last_block_in_file;
  1835. /* just zeroing out page which is beyond EOF */
  1836. if (block_in_file >= last_block)
  1837. goto zero_out;
  1838. /*
  1839. * Map blocks using the previous result first.
  1840. */
  1841. if (map->m_flags & F2FS_MAP_MAPPED) {
  1842. if (block_in_file > map->m_lblk &&
  1843. block_in_file < (map->m_lblk + map->m_len))
  1844. goto got_it;
  1845. } else if (block_in_file < *map->m_next_pgofs) {
  1846. goto got_it;
  1847. }
  1848. /*
  1849. * Then do more f2fs_map_blocks() calls until we are
  1850. * done with this page.
  1851. */
  1852. map->m_lblk = block_in_file;
  1853. map->m_len = last_block - block_in_file;
  1854. ret = f2fs_map_blocks(inode, map, F2FS_GET_BLOCK_DEFAULT);
  1855. if (ret)
  1856. goto out;
  1857. got_it:
  1858. if ((map->m_flags & F2FS_MAP_MAPPED)) {
  1859. block_nr = map->m_pblk + block_in_file - map->m_lblk;
  1860. folio_set_mappedtodisk(folio);
  1861. if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
  1862. DATA_GENERIC_ENHANCE_READ)) {
  1863. ret = -EFSCORRUPTED;
  1864. goto out;
  1865. }
  1866. } else {
  1867. zero_out:
  1868. folio_zero_segment(folio, 0, folio_size(folio));
  1869. if (vi && !fsverity_verify_folio(vi, folio)) {
  1870. ret = -EIO;
  1871. goto out;
  1872. }
  1873. if (!folio_test_uptodate(folio))
  1874. folio_mark_uptodate(folio);
  1875. folio_unlock(folio);
  1876. goto out;
  1877. }
  1878. /*
  1879. * This page will go to BIO. Do we need to send this
  1880. * BIO off first?
  1881. */
  1882. if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
  1883. *last_block_in_bio, block_nr) ||
  1884. !f2fs_crypt_mergeable_bio(bio, inode, index, NULL))) {
  1885. submit_and_realloc:
  1886. f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
  1887. bio = NULL;
  1888. }
  1889. if (bio == NULL)
  1890. bio = f2fs_grab_read_bio(inode, vi, block_nr, nr_pages,
  1891. f2fs_ra_op_flags(rac), index, false);
  1892. /*
  1893. * If the page is under writeback, we need to wait for
  1894. * its completion to see the correct decrypted data.
  1895. */
  1896. f2fs_wait_on_block_writeback(inode, block_nr);
  1897. if (!bio_add_folio(bio, folio, blocksize, 0))
  1898. goto submit_and_realloc;
  1899. inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
  1900. f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
  1901. F2FS_BLKSIZE);
  1902. *last_block_in_bio = block_nr;
  1903. out:
  1904. *bio_ret = bio;
  1905. return ret;
  1906. }
  1907. #ifdef CONFIG_F2FS_FS_COMPRESSION
  1908. int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
  1909. unsigned nr_pages, sector_t *last_block_in_bio,
  1910. struct readahead_control *rac, bool for_write)
  1911. {
  1912. struct dnode_of_data dn;
  1913. struct inode *inode = cc->inode;
  1914. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1915. struct bio *bio = *bio_ret;
  1916. unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
  1917. sector_t last_block_in_file;
  1918. const unsigned int blocksize = F2FS_BLKSIZE;
  1919. struct decompress_io_ctx *dic = NULL;
  1920. struct extent_info ei = {};
  1921. bool from_dnode = true;
  1922. int i;
  1923. int ret = 0;
  1924. if (unlikely(f2fs_cp_error(sbi))) {
  1925. ret = -EIO;
  1926. from_dnode = false;
  1927. goto out_put_dnode;
  1928. }
  1929. f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
  1930. last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) +
  1931. blocksize - 1);
  1932. /* get rid of pages beyond EOF */
  1933. for (i = 0; i < cc->cluster_size; i++) {
  1934. struct page *page = cc->rpages[i];
  1935. struct folio *folio;
  1936. if (!page)
  1937. continue;
  1938. folio = page_folio(page);
  1939. if ((sector_t)folio->index >= last_block_in_file) {
  1940. folio_zero_segment(folio, 0, folio_size(folio));
  1941. if (!folio_test_uptodate(folio))
  1942. folio_mark_uptodate(folio);
  1943. } else if (!folio_test_uptodate(folio)) {
  1944. continue;
  1945. }
  1946. folio_unlock(folio);
  1947. if (for_write)
  1948. folio_put(folio);
  1949. cc->rpages[i] = NULL;
  1950. cc->nr_rpages--;
  1951. }
  1952. /* we are done since all pages are beyond EOF */
  1953. if (f2fs_cluster_is_empty(cc))
  1954. goto out;
  1955. if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei))
  1956. from_dnode = false;
  1957. if (!from_dnode)
  1958. goto skip_reading_dnode;
  1959. set_new_dnode(&dn, inode, NULL, NULL, 0);
  1960. ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
  1961. if (ret)
  1962. goto out;
  1963. f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
  1964. skip_reading_dnode:
  1965. for (i = 1; i < cc->cluster_size; i++) {
  1966. block_t blkaddr;
  1967. blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_folio,
  1968. dn.ofs_in_node + i) :
  1969. ei.blk + i - 1;
  1970. if (!__is_valid_data_blkaddr(blkaddr))
  1971. break;
  1972. if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
  1973. ret = -EFAULT;
  1974. goto out_put_dnode;
  1975. }
  1976. cc->nr_cpages++;
  1977. if (!from_dnode && i >= ei.c_len)
  1978. break;
  1979. }
  1980. /* nothing to decompress */
  1981. if (cc->nr_cpages == 0) {
  1982. ret = 0;
  1983. goto out_put_dnode;
  1984. }
  1985. dic = f2fs_alloc_dic(cc);
  1986. if (IS_ERR(dic)) {
  1987. ret = PTR_ERR(dic);
  1988. goto out_put_dnode;
  1989. }
  1990. for (i = 0; i < cc->nr_cpages; i++) {
  1991. struct folio *folio = page_folio(dic->cpages[i]);
  1992. block_t blkaddr;
  1993. struct bio_post_read_ctx *ctx;
  1994. blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_folio,
  1995. dn.ofs_in_node + i + 1) :
  1996. ei.blk + i;
  1997. f2fs_wait_on_block_writeback(inode, blkaddr);
  1998. if (f2fs_load_compressed_folio(sbi, folio, blkaddr)) {
  1999. if (atomic_dec_and_test(&dic->remaining_pages)) {
  2000. f2fs_decompress_cluster(dic, true);
  2001. break;
  2002. }
  2003. continue;
  2004. }
  2005. if (bio && (!page_is_mergeable(sbi, bio,
  2006. *last_block_in_bio, blkaddr) ||
  2007. !f2fs_crypt_mergeable_bio(bio, inode, folio->index, NULL))) {
  2008. submit_and_realloc:
  2009. f2fs_submit_read_bio(sbi, bio, DATA);
  2010. bio = NULL;
  2011. }
  2012. if (!bio)
  2013. bio = f2fs_grab_read_bio(inode, cc->vi, blkaddr,
  2014. nr_pages - i,
  2015. f2fs_ra_op_flags(rac),
  2016. folio->index, for_write);
  2017. if (!bio_add_folio(bio, folio, blocksize, 0))
  2018. goto submit_and_realloc;
  2019. ctx = get_post_read_ctx(bio);
  2020. ctx->enabled_steps |= STEP_DECOMPRESS;
  2021. refcount_inc(&dic->refcnt);
  2022. inc_page_count(sbi, F2FS_RD_DATA);
  2023. f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE);
  2024. *last_block_in_bio = blkaddr;
  2025. }
  2026. if (from_dnode)
  2027. f2fs_put_dnode(&dn);
  2028. *bio_ret = bio;
  2029. return 0;
  2030. out_put_dnode:
  2031. if (from_dnode)
  2032. f2fs_put_dnode(&dn);
  2033. out:
  2034. for (i = 0; i < cc->cluster_size; i++) {
  2035. if (cc->rpages[i]) {
  2036. ClearPageUptodate(cc->rpages[i]);
  2037. unlock_page(cc->rpages[i]);
  2038. }
  2039. }
  2040. *bio_ret = bio;
  2041. return ret;
  2042. }
  2043. #endif
  2044. static struct f2fs_folio_state *ffs_find_or_alloc(struct folio *folio)
  2045. {
  2046. struct f2fs_folio_state *ffs = folio->private;
  2047. if (ffs)
  2048. return ffs;
  2049. ffs = f2fs_kmem_cache_alloc(ffs_entry_slab,
  2050. GFP_NOIO | __GFP_ZERO, true, NULL);
  2051. spin_lock_init(&ffs->state_lock);
  2052. folio_attach_private(folio, ffs);
  2053. return ffs;
  2054. }
  2055. static void ffs_detach_free(struct folio *folio)
  2056. {
  2057. struct f2fs_folio_state *ffs;
  2058. if (!folio_test_large(folio)) {
  2059. folio_detach_private(folio);
  2060. return;
  2061. }
  2062. ffs = folio_detach_private(folio);
  2063. if (!ffs)
  2064. return;
  2065. WARN_ON_ONCE(ffs->read_pages_pending != 0);
  2066. kmem_cache_free(ffs_entry_slab, ffs);
  2067. }
  2068. static int f2fs_read_data_large_folio(struct inode *inode,
  2069. struct fsverity_info *vi,
  2070. struct readahead_control *rac, struct folio *folio)
  2071. {
  2072. struct bio *bio = NULL;
  2073. sector_t last_block_in_bio = 0;
  2074. struct f2fs_map_blocks map = {0, };
  2075. pgoff_t index, offset, next_pgofs = 0;
  2076. unsigned max_nr_pages = rac ? readahead_count(rac) :
  2077. folio_nr_pages(folio);
  2078. unsigned nrpages;
  2079. struct f2fs_folio_state *ffs;
  2080. int ret = 0;
  2081. bool folio_in_bio;
  2082. if (!IS_IMMUTABLE(inode) || f2fs_compressed_file(inode)) {
  2083. if (folio)
  2084. folio_unlock(folio);
  2085. return -EOPNOTSUPP;
  2086. }
  2087. map.m_seg_type = NO_CHECK_TYPE;
  2088. if (rac)
  2089. folio = readahead_folio(rac);
  2090. next_folio:
  2091. if (!folio)
  2092. goto out;
  2093. folio_in_bio = false;
  2094. index = folio->index;
  2095. offset = 0;
  2096. ffs = NULL;
  2097. nrpages = folio_nr_pages(folio);
  2098. for (; nrpages; nrpages--, max_nr_pages--, index++, offset++) {
  2099. sector_t block_nr;
  2100. /*
  2101. * Map blocks using the previous result first.
  2102. */
  2103. if (map.m_flags & F2FS_MAP_MAPPED) {
  2104. if (index > map.m_lblk &&
  2105. index < (map.m_lblk + map.m_len))
  2106. goto got_it;
  2107. } else if (index < next_pgofs) {
  2108. /* hole case */
  2109. goto got_it;
  2110. }
  2111. /*
  2112. * Then do more f2fs_map_blocks() calls until we are
  2113. * done with this page.
  2114. */
  2115. memset(&map, 0, sizeof(map));
  2116. map.m_next_pgofs = &next_pgofs;
  2117. map.m_seg_type = NO_CHECK_TYPE;
  2118. map.m_lblk = index;
  2119. map.m_len = max_nr_pages;
  2120. ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
  2121. if (ret)
  2122. goto err_out;
  2123. got_it:
  2124. if ((map.m_flags & F2FS_MAP_MAPPED)) {
  2125. block_nr = map.m_pblk + index - map.m_lblk;
  2126. if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
  2127. DATA_GENERIC_ENHANCE_READ)) {
  2128. ret = -EFSCORRUPTED;
  2129. goto err_out;
  2130. }
  2131. } else {
  2132. size_t page_offset = offset << PAGE_SHIFT;
  2133. folio_zero_range(folio, page_offset, PAGE_SIZE);
  2134. if (vi && !fsverity_verify_blocks(vi, folio, PAGE_SIZE, page_offset)) {
  2135. ret = -EIO;
  2136. goto err_out;
  2137. }
  2138. continue;
  2139. }
  2140. /* We must increment read_pages_pending before possible BIOs submitting
  2141. * to prevent from premature folio_end_read() call on folio
  2142. */
  2143. if (folio_test_large(folio)) {
  2144. ffs = ffs_find_or_alloc(folio);
  2145. /* set the bitmap to wait */
  2146. spin_lock_irq(&ffs->state_lock);
  2147. ffs->read_pages_pending++;
  2148. spin_unlock_irq(&ffs->state_lock);
  2149. }
  2150. /*
  2151. * This page will go to BIO. Do we need to send this
  2152. * BIO off first?
  2153. */
  2154. if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
  2155. last_block_in_bio, block_nr) ||
  2156. !f2fs_crypt_mergeable_bio(bio, inode, index, NULL))) {
  2157. submit_and_realloc:
  2158. f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
  2159. bio = NULL;
  2160. }
  2161. if (bio == NULL)
  2162. bio = f2fs_grab_read_bio(inode, vi,
  2163. block_nr, max_nr_pages,
  2164. f2fs_ra_op_flags(rac),
  2165. index, false);
  2166. /*
  2167. * If the page is under writeback, we need to wait for
  2168. * its completion to see the correct decrypted data.
  2169. */
  2170. f2fs_wait_on_block_writeback(inode, block_nr);
  2171. if (!bio_add_folio(bio, folio, F2FS_BLKSIZE,
  2172. offset << PAGE_SHIFT))
  2173. goto submit_and_realloc;
  2174. folio_in_bio = true;
  2175. inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
  2176. f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
  2177. F2FS_BLKSIZE);
  2178. last_block_in_bio = block_nr;
  2179. }
  2180. trace_f2fs_read_folio(folio, DATA);
  2181. err_out:
  2182. if (!folio_in_bio) {
  2183. folio_end_read(folio, !ret);
  2184. if (ret)
  2185. return ret;
  2186. }
  2187. if (rac) {
  2188. folio = readahead_folio(rac);
  2189. goto next_folio;
  2190. }
  2191. out:
  2192. f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
  2193. if (ret) {
  2194. /* Wait bios and clear uptodate. */
  2195. folio_lock(folio);
  2196. folio_clear_uptodate(folio);
  2197. folio_unlock(folio);
  2198. }
  2199. return ret;
  2200. }
  2201. /*
  2202. * This function was originally taken from fs/mpage.c, and customized for f2fs.
  2203. * Major change was from block_size == page_size in f2fs by default.
  2204. */
  2205. static int f2fs_mpage_readpages(struct inode *inode, struct fsverity_info *vi,
  2206. struct readahead_control *rac, struct folio *folio)
  2207. {
  2208. struct bio *bio = NULL;
  2209. sector_t last_block_in_bio = 0;
  2210. struct f2fs_map_blocks map;
  2211. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2212. struct compress_ctx cc = {
  2213. .inode = inode,
  2214. .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
  2215. .cluster_size = F2FS_I(inode)->i_cluster_size,
  2216. .cluster_idx = NULL_CLUSTER,
  2217. .rpages = NULL,
  2218. .cpages = NULL,
  2219. .nr_rpages = 0,
  2220. .nr_cpages = 0,
  2221. };
  2222. pgoff_t nc_cluster_idx = NULL_CLUSTER;
  2223. pgoff_t index;
  2224. #endif
  2225. pgoff_t next_pgofs = 0;
  2226. unsigned nr_pages = rac ? readahead_count(rac) : 1;
  2227. struct address_space *mapping = rac ? rac->mapping : folio->mapping;
  2228. unsigned max_nr_pages = nr_pages;
  2229. int ret = 0;
  2230. if (mapping_large_folio_support(mapping))
  2231. return f2fs_read_data_large_folio(inode, vi, rac, folio);
  2232. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2233. if (f2fs_compressed_file(inode)) {
  2234. index = rac ? readahead_index(rac) : folio->index;
  2235. max_nr_pages = round_up(index + nr_pages, cc.cluster_size) -
  2236. round_down(index, cc.cluster_size);
  2237. }
  2238. #endif
  2239. map.m_pblk = 0;
  2240. map.m_lblk = 0;
  2241. map.m_len = 0;
  2242. map.m_flags = 0;
  2243. map.m_next_pgofs = &next_pgofs;
  2244. map.m_next_extent = NULL;
  2245. map.m_seg_type = NO_CHECK_TYPE;
  2246. map.m_may_create = false;
  2247. for (; nr_pages; nr_pages--) {
  2248. if (rac) {
  2249. folio = readahead_folio(rac);
  2250. prefetchw(&folio->flags);
  2251. }
  2252. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2253. index = folio->index;
  2254. if (!f2fs_compressed_file(inode))
  2255. goto read_single_page;
  2256. /* there are remained compressed pages, submit them */
  2257. if (!f2fs_cluster_can_merge_page(&cc, index)) {
  2258. cc.vi = vi;
  2259. ret = f2fs_read_multi_pages(&cc, &bio,
  2260. max_nr_pages,
  2261. &last_block_in_bio,
  2262. rac, false);
  2263. f2fs_destroy_compress_ctx(&cc, false);
  2264. if (ret)
  2265. goto set_error_page;
  2266. }
  2267. if (cc.cluster_idx == NULL_CLUSTER) {
  2268. if (nc_cluster_idx == index >> cc.log_cluster_size)
  2269. goto read_single_page;
  2270. ret = f2fs_is_compressed_cluster(inode, index);
  2271. if (ret < 0)
  2272. goto set_error_page;
  2273. else if (!ret) {
  2274. nc_cluster_idx =
  2275. index >> cc.log_cluster_size;
  2276. goto read_single_page;
  2277. }
  2278. nc_cluster_idx = NULL_CLUSTER;
  2279. }
  2280. ret = f2fs_init_compress_ctx(&cc);
  2281. if (ret)
  2282. goto set_error_page;
  2283. f2fs_compress_ctx_add_page(&cc, folio);
  2284. goto next_page;
  2285. read_single_page:
  2286. #endif
  2287. ret = f2fs_read_single_page(inode, vi, folio, max_nr_pages,
  2288. &map, &bio, &last_block_in_bio,
  2289. rac);
  2290. if (ret) {
  2291. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2292. set_error_page:
  2293. #endif
  2294. folio_zero_segment(folio, 0, folio_size(folio));
  2295. folio_unlock(folio);
  2296. }
  2297. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2298. next_page:
  2299. #endif
  2300. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2301. if (f2fs_compressed_file(inode)) {
  2302. /* last page */
  2303. if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
  2304. cc.vi = vi;
  2305. ret = f2fs_read_multi_pages(&cc, &bio,
  2306. max_nr_pages,
  2307. &last_block_in_bio,
  2308. rac, false);
  2309. f2fs_destroy_compress_ctx(&cc, false);
  2310. }
  2311. }
  2312. #endif
  2313. }
  2314. f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
  2315. return ret;
  2316. }
  2317. static int f2fs_read_data_folio(struct file *file, struct folio *folio)
  2318. {
  2319. struct inode *inode = folio->mapping->host;
  2320. struct fsverity_info *vi = NULL;
  2321. int ret;
  2322. trace_f2fs_readpage(folio, DATA);
  2323. if (!f2fs_is_compress_backend_ready(inode)) {
  2324. folio_unlock(folio);
  2325. return -EOPNOTSUPP;
  2326. }
  2327. /* If the file has inline data, try to read it directly */
  2328. if (f2fs_has_inline_data(inode)) {
  2329. ret = f2fs_read_inline_data(inode, folio);
  2330. if (ret != -EAGAIN)
  2331. return ret;
  2332. }
  2333. vi = f2fs_need_verity(inode, folio->index);
  2334. if (vi)
  2335. fsverity_readahead(vi, folio->index, folio_nr_pages(folio));
  2336. return f2fs_mpage_readpages(inode, vi, NULL, folio);
  2337. }
  2338. static void f2fs_readahead(struct readahead_control *rac)
  2339. {
  2340. struct inode *inode = rac->mapping->host;
  2341. struct fsverity_info *vi = NULL;
  2342. trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
  2343. if (!f2fs_is_compress_backend_ready(inode))
  2344. return;
  2345. /* If the file has inline data, skip readahead */
  2346. if (f2fs_has_inline_data(inode))
  2347. return;
  2348. vi = f2fs_need_verity(inode, readahead_index(rac));
  2349. if (vi)
  2350. fsverity_readahead(vi, readahead_index(rac),
  2351. readahead_count(rac));
  2352. f2fs_mpage_readpages(inode, vi, rac, NULL);
  2353. }
  2354. int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
  2355. {
  2356. struct inode *inode = fio_inode(fio);
  2357. struct folio *mfolio;
  2358. struct page *page;
  2359. gfp_t gfp_flags = GFP_NOFS;
  2360. if (!f2fs_encrypted_file(inode))
  2361. return 0;
  2362. page = fio->compressed_page ? fio->compressed_page : fio->page;
  2363. if (fscrypt_inode_uses_inline_crypto(inode))
  2364. return 0;
  2365. retry_encrypt:
  2366. fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page_folio(page),
  2367. PAGE_SIZE, 0, gfp_flags);
  2368. if (IS_ERR(fio->encrypted_page)) {
  2369. /* flush pending IOs and wait for a while in the ENOMEM case */
  2370. if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
  2371. f2fs_flush_merged_writes(fio->sbi);
  2372. memalloc_retry_wait(GFP_NOFS);
  2373. gfp_flags |= __GFP_NOFAIL;
  2374. goto retry_encrypt;
  2375. }
  2376. return PTR_ERR(fio->encrypted_page);
  2377. }
  2378. mfolio = filemap_lock_folio(META_MAPPING(fio->sbi), fio->old_blkaddr);
  2379. if (!IS_ERR(mfolio)) {
  2380. if (folio_test_uptodate(mfolio))
  2381. memcpy(folio_address(mfolio),
  2382. page_address(fio->encrypted_page), PAGE_SIZE);
  2383. f2fs_folio_put(mfolio, true);
  2384. }
  2385. return 0;
  2386. }
  2387. static inline bool check_inplace_update_policy(struct inode *inode,
  2388. struct f2fs_io_info *fio)
  2389. {
  2390. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  2391. if (IS_F2FS_IPU_HONOR_OPU_WRITE(sbi) &&
  2392. is_inode_flag_set(inode, FI_OPU_WRITE))
  2393. return false;
  2394. if (IS_F2FS_IPU_FORCE(sbi))
  2395. return true;
  2396. if (IS_F2FS_IPU_SSR(sbi) && f2fs_need_SSR(sbi))
  2397. return true;
  2398. if (IS_F2FS_IPU_UTIL(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
  2399. return true;
  2400. if (IS_F2FS_IPU_SSR_UTIL(sbi) && f2fs_need_SSR(sbi) &&
  2401. utilization(sbi) > SM_I(sbi)->min_ipu_util)
  2402. return true;
  2403. /*
  2404. * IPU for rewrite async pages
  2405. */
  2406. if (IS_F2FS_IPU_ASYNC(sbi) && fio && fio->op == REQ_OP_WRITE &&
  2407. !(fio->op_flags & REQ_SYNC) && !IS_ENCRYPTED(inode))
  2408. return true;
  2409. /* this is only set during fdatasync */
  2410. if (IS_F2FS_IPU_FSYNC(sbi) && is_inode_flag_set(inode, FI_NEED_IPU))
  2411. return true;
  2412. if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
  2413. !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
  2414. return true;
  2415. return false;
  2416. }
  2417. bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
  2418. {
  2419. /* swap file is migrating in aligned write mode */
  2420. if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
  2421. return false;
  2422. if (f2fs_is_pinned_file(inode))
  2423. return true;
  2424. /* if this is cold file, we should overwrite to avoid fragmentation */
  2425. if (file_is_cold(inode) && !is_inode_flag_set(inode, FI_OPU_WRITE))
  2426. return true;
  2427. return check_inplace_update_policy(inode, fio);
  2428. }
  2429. bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
  2430. {
  2431. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  2432. /* The below cases were checked when setting it. */
  2433. if (f2fs_is_pinned_file(inode))
  2434. return false;
  2435. if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
  2436. return true;
  2437. if (f2fs_lfs_mode(sbi))
  2438. return true;
  2439. if (S_ISDIR(inode->i_mode))
  2440. return true;
  2441. if (IS_NOQUOTA(inode))
  2442. return true;
  2443. if (f2fs_used_in_atomic_write(inode))
  2444. return true;
  2445. /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */
  2446. if (f2fs_compressed_file(inode) &&
  2447. F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER &&
  2448. is_inode_flag_set(inode, FI_ENABLE_COMPRESS))
  2449. return true;
  2450. /* swap file is migrating in aligned write mode */
  2451. if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
  2452. return true;
  2453. if (is_inode_flag_set(inode, FI_OPU_WRITE))
  2454. return true;
  2455. if (fio) {
  2456. if (page_private_gcing(fio->page))
  2457. return true;
  2458. if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
  2459. f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
  2460. return true;
  2461. }
  2462. return false;
  2463. }
  2464. static inline bool need_inplace_update(struct f2fs_io_info *fio)
  2465. {
  2466. struct inode *inode = fio_inode(fio);
  2467. if (f2fs_should_update_outplace(inode, fio))
  2468. return false;
  2469. return f2fs_should_update_inplace(inode, fio);
  2470. }
  2471. int f2fs_do_write_data_page(struct f2fs_io_info *fio)
  2472. {
  2473. struct folio *folio = fio->folio;
  2474. struct inode *inode = folio->mapping->host;
  2475. struct dnode_of_data dn;
  2476. struct node_info ni;
  2477. struct f2fs_lock_context lc;
  2478. bool ipu_force = false;
  2479. bool atomic_commit;
  2480. int err = 0;
  2481. /* Use COW inode to make dnode_of_data for atomic write */
  2482. atomic_commit = f2fs_is_atomic_file(inode) &&
  2483. folio_test_f2fs_atomic(folio);
  2484. if (atomic_commit)
  2485. set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
  2486. else
  2487. set_new_dnode(&dn, inode, NULL, NULL, 0);
  2488. if (need_inplace_update(fio) &&
  2489. f2fs_lookup_read_extent_cache_block(inode, folio->index,
  2490. &fio->old_blkaddr)) {
  2491. if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
  2492. DATA_GENERIC_ENHANCE))
  2493. return -EFSCORRUPTED;
  2494. ipu_force = true;
  2495. fio->need_lock = LOCK_DONE;
  2496. goto got_it;
  2497. }
  2498. if (is_sbi_flag_set(fio->sbi, SBI_ENABLE_CHECKPOINT) &&
  2499. time_to_inject(fio->sbi, FAULT_SKIP_WRITE))
  2500. return -EINVAL;
  2501. /* Deadlock due to between page->lock and f2fs_lock_op */
  2502. if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi, &lc))
  2503. return -EAGAIN;
  2504. err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE);
  2505. if (err)
  2506. goto out;
  2507. fio->old_blkaddr = dn.data_blkaddr;
  2508. /* This page is already truncated */
  2509. if (fio->old_blkaddr == NULL_ADDR) {
  2510. folio_clear_uptodate(folio);
  2511. folio_clear_f2fs_gcing(folio);
  2512. goto out_writepage;
  2513. }
  2514. got_it:
  2515. if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
  2516. !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
  2517. DATA_GENERIC_ENHANCE)) {
  2518. err = -EFSCORRUPTED;
  2519. goto out_writepage;
  2520. }
  2521. /* wait for GCed page writeback via META_MAPPING */
  2522. if (fio->meta_gc)
  2523. f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
  2524. /*
  2525. * If current allocation needs SSR,
  2526. * it had better in-place writes for updated data.
  2527. */
  2528. if (ipu_force ||
  2529. (__is_valid_data_blkaddr(fio->old_blkaddr) &&
  2530. need_inplace_update(fio))) {
  2531. err = f2fs_encrypt_one_page(fio);
  2532. if (err)
  2533. goto out_writepage;
  2534. folio_start_writeback(folio);
  2535. f2fs_put_dnode(&dn);
  2536. if (fio->need_lock == LOCK_REQ)
  2537. f2fs_unlock_op(fio->sbi, &lc);
  2538. err = f2fs_inplace_write_data(fio);
  2539. if (err) {
  2540. if (fscrypt_inode_uses_fs_layer_crypto(inode))
  2541. fscrypt_finalize_bounce_page(&fio->encrypted_page);
  2542. folio_end_writeback(folio);
  2543. } else {
  2544. set_inode_flag(inode, FI_UPDATE_WRITE);
  2545. }
  2546. trace_f2fs_do_write_data_page(folio, IPU);
  2547. return err;
  2548. }
  2549. if (fio->need_lock == LOCK_RETRY) {
  2550. if (!f2fs_trylock_op(fio->sbi, &lc)) {
  2551. err = -EAGAIN;
  2552. goto out_writepage;
  2553. }
  2554. fio->need_lock = LOCK_REQ;
  2555. }
  2556. err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
  2557. if (err)
  2558. goto out_writepage;
  2559. fio->version = ni.version;
  2560. err = f2fs_encrypt_one_page(fio);
  2561. if (err)
  2562. goto out_writepage;
  2563. folio_start_writeback(folio);
  2564. if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
  2565. f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
  2566. /* LFS mode write path */
  2567. f2fs_outplace_write_data(&dn, fio);
  2568. trace_f2fs_do_write_data_page(folio, OPU);
  2569. set_inode_flag(inode, FI_APPEND_WRITE);
  2570. if (atomic_commit)
  2571. folio_clear_f2fs_atomic(folio);
  2572. out_writepage:
  2573. f2fs_put_dnode(&dn);
  2574. out:
  2575. if (fio->need_lock == LOCK_REQ)
  2576. f2fs_unlock_op(fio->sbi, &lc);
  2577. return err;
  2578. }
  2579. int f2fs_write_single_data_page(struct folio *folio, int *submitted,
  2580. struct bio **bio,
  2581. sector_t *last_block,
  2582. struct writeback_control *wbc,
  2583. enum iostat_type io_type,
  2584. int compr_blocks,
  2585. bool allow_balance)
  2586. {
  2587. struct inode *inode = folio->mapping->host;
  2588. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  2589. loff_t i_size = i_size_read(inode);
  2590. const pgoff_t end_index = ((unsigned long long)i_size)
  2591. >> PAGE_SHIFT;
  2592. loff_t psize = (loff_t)(folio->index + 1) << PAGE_SHIFT;
  2593. unsigned offset = 0;
  2594. bool need_balance_fs = false;
  2595. bool quota_inode = IS_NOQUOTA(inode);
  2596. int err = 0;
  2597. struct f2fs_io_info fio = {
  2598. .sbi = sbi,
  2599. .ino = inode->i_ino,
  2600. .type = DATA,
  2601. .op = REQ_OP_WRITE,
  2602. .op_flags = wbc_to_write_flags(wbc),
  2603. .old_blkaddr = NULL_ADDR,
  2604. .folio = folio,
  2605. .encrypted_page = NULL,
  2606. .submitted = 0,
  2607. .compr_blocks = compr_blocks,
  2608. .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
  2609. .meta_gc = f2fs_meta_inode_gc_required(inode) ? 1 : 0,
  2610. .io_type = io_type,
  2611. .io_wbc = wbc,
  2612. .bio = bio,
  2613. .last_block = last_block,
  2614. };
  2615. trace_f2fs_writepage(folio, DATA);
  2616. /* we should bypass data pages to proceed the kworker jobs */
  2617. if (unlikely(f2fs_cp_error(sbi))) {
  2618. mapping_set_error(folio->mapping, -EIO);
  2619. /*
  2620. * don't drop any dirty dentry pages for keeping lastest
  2621. * directory structure.
  2622. */
  2623. if (S_ISDIR(inode->i_mode) &&
  2624. !is_sbi_flag_set(sbi, SBI_IS_CLOSE))
  2625. goto redirty_out;
  2626. /* keep data pages in remount-ro mode */
  2627. if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
  2628. goto redirty_out;
  2629. goto out;
  2630. }
  2631. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  2632. goto redirty_out;
  2633. if (folio->index < end_index ||
  2634. f2fs_verity_in_progress(inode) ||
  2635. compr_blocks)
  2636. goto write;
  2637. /*
  2638. * If the offset is out-of-range of file size,
  2639. * this page does not have to be written to disk.
  2640. */
  2641. offset = i_size & (PAGE_SIZE - 1);
  2642. if ((folio->index >= end_index + 1) || !offset)
  2643. goto out;
  2644. folio_zero_segment(folio, offset, folio_size(folio));
  2645. write:
  2646. /* Dentry/quota blocks are controlled by checkpoint */
  2647. if (S_ISDIR(inode->i_mode) || quota_inode) {
  2648. struct f2fs_lock_context lc;
  2649. /*
  2650. * We need to wait for node_write to avoid block allocation during
  2651. * checkpoint. This can only happen to quota writes which can cause
  2652. * the below discard race condition.
  2653. */
  2654. if (quota_inode)
  2655. f2fs_down_read_trace(&sbi->node_write, &lc);
  2656. fio.need_lock = LOCK_DONE;
  2657. err = f2fs_do_write_data_page(&fio);
  2658. if (quota_inode)
  2659. f2fs_up_read_trace(&sbi->node_write, &lc);
  2660. goto done;
  2661. }
  2662. need_balance_fs = true;
  2663. err = -EAGAIN;
  2664. if (f2fs_has_inline_data(inode)) {
  2665. err = f2fs_write_inline_data(inode, folio);
  2666. if (!err)
  2667. goto out;
  2668. }
  2669. if (err == -EAGAIN) {
  2670. err = f2fs_do_write_data_page(&fio);
  2671. if (err == -EAGAIN) {
  2672. f2fs_bug_on(sbi, compr_blocks);
  2673. fio.need_lock = LOCK_REQ;
  2674. err = f2fs_do_write_data_page(&fio);
  2675. }
  2676. }
  2677. if (err) {
  2678. file_set_keep_isize(inode);
  2679. } else {
  2680. spin_lock(&F2FS_I(inode)->i_size_lock);
  2681. if (F2FS_I(inode)->last_disk_size < psize)
  2682. F2FS_I(inode)->last_disk_size = psize;
  2683. spin_unlock(&F2FS_I(inode)->i_size_lock);
  2684. }
  2685. done:
  2686. if (err && err != -ENOENT)
  2687. goto redirty_out;
  2688. out:
  2689. inode_dec_dirty_pages(inode);
  2690. if (err) {
  2691. folio_clear_uptodate(folio);
  2692. folio_clear_f2fs_gcing(folio);
  2693. }
  2694. folio_unlock(folio);
  2695. if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
  2696. !F2FS_I(inode)->wb_task && allow_balance)
  2697. f2fs_balance_fs(sbi, need_balance_fs);
  2698. if (unlikely(f2fs_cp_error(sbi))) {
  2699. f2fs_submit_merged_write(sbi, DATA);
  2700. if (bio && *bio)
  2701. f2fs_submit_merged_ipu_write(sbi, bio, NULL);
  2702. submitted = NULL;
  2703. }
  2704. if (submitted)
  2705. *submitted = fio.submitted;
  2706. return 0;
  2707. redirty_out:
  2708. folio_redirty_for_writepage(wbc, folio);
  2709. /*
  2710. * pageout() in MM translates EAGAIN, so calls handle_write_error()
  2711. * -> mapping_set_error() -> set_bit(AS_EIO, ...).
  2712. * file_write_and_wait_range() will see EIO error, which is critical
  2713. * to return value of fsync() followed by atomic_write failure to user.
  2714. */
  2715. folio_unlock(folio);
  2716. if (!err)
  2717. return 1;
  2718. return err;
  2719. }
  2720. /*
  2721. * This function was copied from write_cache_pages from mm/page-writeback.c.
  2722. * The major change is making write step of cold data page separately from
  2723. * warm/hot data page.
  2724. */
  2725. static int f2fs_write_cache_pages(struct address_space *mapping,
  2726. struct writeback_control *wbc,
  2727. enum iostat_type io_type)
  2728. {
  2729. int ret = 0;
  2730. int done = 0, retry = 0;
  2731. struct page *pages_local[F2FS_ONSTACK_PAGES];
  2732. struct page **pages = pages_local;
  2733. struct folio_batch fbatch;
  2734. struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
  2735. struct bio *bio = NULL;
  2736. sector_t last_block;
  2737. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2738. struct inode *inode = mapping->host;
  2739. struct compress_ctx cc = {
  2740. .inode = inode,
  2741. .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
  2742. .cluster_size = F2FS_I(inode)->i_cluster_size,
  2743. .cluster_idx = NULL_CLUSTER,
  2744. .rpages = NULL,
  2745. .nr_rpages = 0,
  2746. .cpages = NULL,
  2747. .valid_nr_cpages = 0,
  2748. .rbuf = NULL,
  2749. .cbuf = NULL,
  2750. .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
  2751. .private = NULL,
  2752. };
  2753. #endif
  2754. int nr_folios, p, idx;
  2755. int nr_pages;
  2756. unsigned int max_pages = F2FS_ONSTACK_PAGES;
  2757. pgoff_t index;
  2758. pgoff_t end; /* Inclusive */
  2759. pgoff_t done_index;
  2760. int range_whole = 0;
  2761. xa_mark_t tag;
  2762. int nwritten = 0;
  2763. int submitted = 0;
  2764. int i;
  2765. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2766. if (f2fs_compressed_file(inode) &&
  2767. 1 << cc.log_cluster_size > F2FS_ONSTACK_PAGES) {
  2768. pages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
  2769. cc.log_cluster_size, GFP_NOFS | __GFP_NOFAIL);
  2770. max_pages = 1 << cc.log_cluster_size;
  2771. }
  2772. #endif
  2773. folio_batch_init(&fbatch);
  2774. if (get_dirty_pages(mapping->host) <=
  2775. SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
  2776. set_inode_flag(mapping->host, FI_HOT_DATA);
  2777. else
  2778. clear_inode_flag(mapping->host, FI_HOT_DATA);
  2779. if (wbc->range_cyclic) {
  2780. index = mapping->writeback_index; /* prev offset */
  2781. end = -1;
  2782. } else {
  2783. index = wbc->range_start >> PAGE_SHIFT;
  2784. end = wbc->range_end >> PAGE_SHIFT;
  2785. if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
  2786. range_whole = 1;
  2787. }
  2788. tag = wbc_to_tag(wbc);
  2789. retry:
  2790. retry = 0;
  2791. if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
  2792. tag_pages_for_writeback(mapping, index, end);
  2793. done_index = index;
  2794. while (!done && !retry && (index <= end)) {
  2795. nr_pages = 0;
  2796. again:
  2797. nr_folios = filemap_get_folios_tag(mapping, &index, end,
  2798. tag, &fbatch);
  2799. if (nr_folios == 0) {
  2800. if (nr_pages)
  2801. goto write;
  2802. break;
  2803. }
  2804. for (i = 0; i < nr_folios; i++) {
  2805. struct folio *folio = fbatch.folios[i];
  2806. idx = 0;
  2807. p = folio_nr_pages(folio);
  2808. add_more:
  2809. pages[nr_pages] = folio_page(folio, idx);
  2810. folio_get(folio);
  2811. if (++nr_pages == max_pages) {
  2812. index = folio->index + idx + 1;
  2813. folio_batch_release(&fbatch);
  2814. goto write;
  2815. }
  2816. if (++idx < p)
  2817. goto add_more;
  2818. }
  2819. folio_batch_release(&fbatch);
  2820. goto again;
  2821. write:
  2822. for (i = 0; i < nr_pages; i++) {
  2823. struct page *page = pages[i];
  2824. struct folio *folio = page_folio(page);
  2825. bool need_readd;
  2826. readd:
  2827. need_readd = false;
  2828. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2829. if (f2fs_compressed_file(inode)) {
  2830. void *fsdata = NULL;
  2831. struct page *pagep;
  2832. int ret2;
  2833. ret = f2fs_init_compress_ctx(&cc);
  2834. if (ret) {
  2835. done = 1;
  2836. break;
  2837. }
  2838. if (!f2fs_cluster_can_merge_page(&cc,
  2839. folio->index)) {
  2840. ret = f2fs_write_multi_pages(&cc,
  2841. &submitted, wbc, io_type);
  2842. if (!ret)
  2843. need_readd = true;
  2844. goto result;
  2845. }
  2846. if (unlikely(f2fs_cp_error(sbi)))
  2847. goto lock_folio;
  2848. if (!f2fs_cluster_is_empty(&cc))
  2849. goto lock_folio;
  2850. if (f2fs_all_cluster_page_ready(&cc,
  2851. pages, i, nr_pages, true))
  2852. goto lock_folio;
  2853. ret2 = f2fs_prepare_compress_overwrite(
  2854. inode, &pagep,
  2855. folio->index, &fsdata);
  2856. if (ret2 < 0) {
  2857. ret = ret2;
  2858. done = 1;
  2859. break;
  2860. } else if (ret2 &&
  2861. (!f2fs_compress_write_end(inode,
  2862. fsdata, folio->index, 1) ||
  2863. !f2fs_all_cluster_page_ready(&cc,
  2864. pages, i, nr_pages,
  2865. false))) {
  2866. retry = 1;
  2867. break;
  2868. }
  2869. }
  2870. #endif
  2871. /* give a priority to WB_SYNC threads */
  2872. if (atomic_read(&sbi->wb_sync_req[DATA]) &&
  2873. wbc->sync_mode == WB_SYNC_NONE) {
  2874. done = 1;
  2875. break;
  2876. }
  2877. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2878. lock_folio:
  2879. #endif
  2880. done_index = folio->index;
  2881. retry_write:
  2882. folio_lock(folio);
  2883. if (unlikely(folio->mapping != mapping)) {
  2884. continue_unlock:
  2885. folio_unlock(folio);
  2886. continue;
  2887. }
  2888. if (!folio_test_dirty(folio)) {
  2889. /* someone wrote it for us */
  2890. goto continue_unlock;
  2891. }
  2892. if (folio_test_writeback(folio)) {
  2893. if (wbc->sync_mode == WB_SYNC_NONE)
  2894. goto continue_unlock;
  2895. f2fs_folio_wait_writeback(folio, DATA, true, true);
  2896. }
  2897. if (!folio_clear_dirty_for_io(folio))
  2898. goto continue_unlock;
  2899. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2900. if (f2fs_compressed_file(inode)) {
  2901. folio_get(folio);
  2902. f2fs_compress_ctx_add_page(&cc, folio);
  2903. continue;
  2904. }
  2905. #endif
  2906. submitted = 0;
  2907. ret = f2fs_write_single_data_page(folio,
  2908. &submitted, &bio, &last_block,
  2909. wbc, io_type, 0, true);
  2910. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2911. result:
  2912. #endif
  2913. nwritten += submitted;
  2914. wbc->nr_to_write -= submitted;
  2915. if (unlikely(ret)) {
  2916. /*
  2917. * keep nr_to_write, since vfs uses this to
  2918. * get # of written pages.
  2919. */
  2920. if (ret == 1) {
  2921. ret = 0;
  2922. goto next;
  2923. } else if (ret == -EAGAIN) {
  2924. ret = 0;
  2925. if (wbc->sync_mode == WB_SYNC_ALL) {
  2926. f2fs_schedule_timeout(
  2927. DEFAULT_SCHEDULE_TIMEOUT);
  2928. goto retry_write;
  2929. }
  2930. goto next;
  2931. }
  2932. done_index = folio_next_index(folio);
  2933. done = 1;
  2934. break;
  2935. }
  2936. if (wbc->nr_to_write <= 0 &&
  2937. wbc->sync_mode == WB_SYNC_NONE) {
  2938. done = 1;
  2939. break;
  2940. }
  2941. next:
  2942. if (need_readd)
  2943. goto readd;
  2944. }
  2945. release_pages(pages, nr_pages);
  2946. cond_resched();
  2947. }
  2948. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2949. /* flush remained pages in compress cluster */
  2950. if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
  2951. ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
  2952. nwritten += submitted;
  2953. wbc->nr_to_write -= submitted;
  2954. if (ret) {
  2955. done = 1;
  2956. retry = 0;
  2957. }
  2958. }
  2959. if (f2fs_compressed_file(inode))
  2960. f2fs_destroy_compress_ctx(&cc, false);
  2961. #endif
  2962. if (retry) {
  2963. index = 0;
  2964. end = -1;
  2965. goto retry;
  2966. }
  2967. if (wbc->range_cyclic && !done)
  2968. done_index = 0;
  2969. if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
  2970. mapping->writeback_index = done_index;
  2971. if (nwritten)
  2972. f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
  2973. NULL, 0, DATA);
  2974. /* submit cached bio of IPU write */
  2975. if (bio)
  2976. f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
  2977. #ifdef CONFIG_F2FS_FS_COMPRESSION
  2978. if (pages != pages_local)
  2979. kfree(pages);
  2980. #endif
  2981. return ret;
  2982. }
  2983. static inline bool __should_serialize_io(struct inode *inode,
  2984. struct writeback_control *wbc)
  2985. {
  2986. /* to avoid deadlock in path of data flush */
  2987. if (F2FS_I(inode)->wb_task)
  2988. return false;
  2989. if (!S_ISREG(inode->i_mode))
  2990. return false;
  2991. if (IS_NOQUOTA(inode))
  2992. return false;
  2993. if (f2fs_is_pinned_file(inode))
  2994. return false;
  2995. if (f2fs_need_compress_data(inode))
  2996. return true;
  2997. if (wbc->sync_mode != WB_SYNC_ALL)
  2998. return true;
  2999. if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
  3000. return true;
  3001. return false;
  3002. }
  3003. static inline void account_writeback(struct inode *inode, bool inc)
  3004. {
  3005. if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
  3006. return;
  3007. f2fs_down_read(&F2FS_I(inode)->i_sem);
  3008. if (inc)
  3009. atomic_inc(&F2FS_I(inode)->writeback);
  3010. else
  3011. atomic_dec(&F2FS_I(inode)->writeback);
  3012. f2fs_up_read(&F2FS_I(inode)->i_sem);
  3013. }
  3014. static inline void update_skipped_write(struct f2fs_sb_info *sbi,
  3015. struct writeback_control *wbc)
  3016. {
  3017. long skipped = wbc->pages_skipped;
  3018. if (is_sbi_flag_set(sbi, SBI_ENABLE_CHECKPOINT) && skipped &&
  3019. wbc->sync_mode == WB_SYNC_ALL)
  3020. atomic_add(skipped, &sbi->nr_pages[F2FS_SKIPPED_WRITE]);
  3021. }
  3022. static int __f2fs_write_data_pages(struct address_space *mapping,
  3023. struct writeback_control *wbc,
  3024. enum iostat_type io_type)
  3025. {
  3026. struct inode *inode = mapping->host;
  3027. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3028. struct blk_plug plug;
  3029. int ret;
  3030. bool locked = false;
  3031. /* skip writing if there is no dirty page in this inode */
  3032. if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
  3033. return 0;
  3034. /* during POR, we don't need to trigger writepage at all. */
  3035. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  3036. goto skip_write;
  3037. if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
  3038. wbc->sync_mode == WB_SYNC_NONE &&
  3039. get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
  3040. f2fs_available_free_memory(sbi, DIRTY_DENTS))
  3041. goto skip_write;
  3042. /* skip writing in file defragment preparing stage */
  3043. if (is_inode_flag_set(inode, FI_SKIP_WRITES))
  3044. goto skip_write;
  3045. trace_f2fs_writepages(mapping->host, wbc, DATA);
  3046. /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
  3047. if (wbc->sync_mode == WB_SYNC_ALL)
  3048. atomic_inc(&sbi->wb_sync_req[DATA]);
  3049. else if (atomic_read(&sbi->wb_sync_req[DATA])) {
  3050. /* to avoid potential deadlock */
  3051. if (current->plug)
  3052. blk_finish_plug(current->plug);
  3053. goto skip_write;
  3054. }
  3055. if (__should_serialize_io(inode, wbc)) {
  3056. mutex_lock(&sbi->writepages);
  3057. locked = true;
  3058. }
  3059. account_writeback(inode, true);
  3060. blk_start_plug(&plug);
  3061. ret = f2fs_write_cache_pages(mapping, wbc, io_type);
  3062. blk_finish_plug(&plug);
  3063. account_writeback(inode, false);
  3064. if (locked)
  3065. mutex_unlock(&sbi->writepages);
  3066. if (wbc->sync_mode == WB_SYNC_ALL)
  3067. atomic_dec(&sbi->wb_sync_req[DATA]);
  3068. /*
  3069. * if some pages were truncated, we cannot guarantee its mapping->host
  3070. * to detect pending bios.
  3071. */
  3072. f2fs_remove_dirty_inode(inode);
  3073. /*
  3074. * f2fs_write_cache_pages() has retry logic for EAGAIN case which is
  3075. * common when racing w/ checkpoint, so only update skipped write
  3076. * when ret is non-zero.
  3077. */
  3078. if (ret)
  3079. update_skipped_write(sbi, wbc);
  3080. return ret;
  3081. skip_write:
  3082. wbc->pages_skipped += get_dirty_pages(inode);
  3083. update_skipped_write(sbi, wbc);
  3084. trace_f2fs_writepages(mapping->host, wbc, DATA);
  3085. return 0;
  3086. }
  3087. static int f2fs_write_data_pages(struct address_space *mapping,
  3088. struct writeback_control *wbc)
  3089. {
  3090. struct inode *inode = mapping->host;
  3091. return __f2fs_write_data_pages(mapping, wbc,
  3092. F2FS_I(inode)->cp_task == current ?
  3093. FS_CP_DATA_IO : FS_DATA_IO);
  3094. }
  3095. void f2fs_write_failed(struct inode *inode, loff_t to)
  3096. {
  3097. loff_t i_size = i_size_read(inode);
  3098. if (IS_NOQUOTA(inode))
  3099. return;
  3100. /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
  3101. if (to > i_size && !f2fs_verity_in_progress(inode)) {
  3102. f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
  3103. filemap_invalidate_lock(inode->i_mapping);
  3104. truncate_pagecache(inode, i_size);
  3105. f2fs_truncate_blocks(inode, i_size, true);
  3106. filemap_invalidate_unlock(inode->i_mapping);
  3107. f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
  3108. }
  3109. }
  3110. static int prepare_write_begin(struct f2fs_sb_info *sbi,
  3111. struct folio *folio, loff_t pos, unsigned int len,
  3112. block_t *blk_addr, bool *node_changed)
  3113. {
  3114. struct inode *inode = folio->mapping->host;
  3115. pgoff_t index = folio->index;
  3116. struct dnode_of_data dn;
  3117. struct f2fs_lock_context lc;
  3118. struct folio *ifolio;
  3119. bool locked = false;
  3120. int flag = F2FS_GET_BLOCK_PRE_AIO;
  3121. int err = 0;
  3122. /*
  3123. * If a whole page is being written and we already preallocated all the
  3124. * blocks, then there is no need to get a block address now.
  3125. */
  3126. if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
  3127. return 0;
  3128. /* f2fs_lock_op avoids race between write CP and convert_inline_page */
  3129. if (f2fs_has_inline_data(inode)) {
  3130. if (pos + len > MAX_INLINE_DATA(inode))
  3131. flag = F2FS_GET_BLOCK_DEFAULT;
  3132. f2fs_map_lock(sbi, &lc, flag);
  3133. locked = true;
  3134. } else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
  3135. f2fs_map_lock(sbi, &lc, flag);
  3136. locked = true;
  3137. }
  3138. restart:
  3139. /* check inline_data */
  3140. ifolio = f2fs_get_inode_folio(sbi, inode->i_ino);
  3141. if (IS_ERR(ifolio)) {
  3142. err = PTR_ERR(ifolio);
  3143. goto unlock_out;
  3144. }
  3145. set_new_dnode(&dn, inode, ifolio, ifolio, 0);
  3146. if (f2fs_has_inline_data(inode)) {
  3147. if (pos + len <= MAX_INLINE_DATA(inode)) {
  3148. f2fs_do_read_inline_data(folio, ifolio);
  3149. set_inode_flag(inode, FI_DATA_EXIST);
  3150. if (inode->i_nlink)
  3151. folio_set_f2fs_inline(ifolio);
  3152. goto out;
  3153. }
  3154. err = f2fs_convert_inline_folio(&dn, folio);
  3155. if (err || dn.data_blkaddr != NULL_ADDR)
  3156. goto out;
  3157. }
  3158. if (!f2fs_lookup_read_extent_cache_block(inode, index,
  3159. &dn.data_blkaddr)) {
  3160. if (IS_DEVICE_ALIASING(inode)) {
  3161. err = -ENODATA;
  3162. goto out;
  3163. }
  3164. if (locked) {
  3165. err = f2fs_reserve_block(&dn, index);
  3166. goto out;
  3167. }
  3168. /* hole case */
  3169. err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
  3170. if (!err && dn.data_blkaddr != NULL_ADDR)
  3171. goto out;
  3172. f2fs_put_dnode(&dn);
  3173. f2fs_map_lock(sbi, &lc, F2FS_GET_BLOCK_PRE_AIO);
  3174. WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
  3175. locked = true;
  3176. goto restart;
  3177. }
  3178. out:
  3179. if (!err) {
  3180. /* convert_inline_page can make node_changed */
  3181. *blk_addr = dn.data_blkaddr;
  3182. *node_changed = dn.node_changed;
  3183. }
  3184. f2fs_put_dnode(&dn);
  3185. unlock_out:
  3186. if (locked)
  3187. f2fs_map_unlock(sbi, &lc, flag);
  3188. return err;
  3189. }
  3190. static int __find_data_block(struct inode *inode, pgoff_t index,
  3191. block_t *blk_addr)
  3192. {
  3193. struct dnode_of_data dn;
  3194. struct folio *ifolio;
  3195. int err = 0;
  3196. ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino);
  3197. if (IS_ERR(ifolio))
  3198. return PTR_ERR(ifolio);
  3199. set_new_dnode(&dn, inode, ifolio, ifolio, 0);
  3200. if (!f2fs_lookup_read_extent_cache_block(inode, index,
  3201. &dn.data_blkaddr)) {
  3202. /* hole case */
  3203. err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
  3204. if (err) {
  3205. dn.data_blkaddr = NULL_ADDR;
  3206. err = 0;
  3207. }
  3208. }
  3209. *blk_addr = dn.data_blkaddr;
  3210. f2fs_put_dnode(&dn);
  3211. return err;
  3212. }
  3213. static int __reserve_data_block(struct inode *inode, pgoff_t index,
  3214. block_t *blk_addr, bool *node_changed)
  3215. {
  3216. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3217. struct dnode_of_data dn;
  3218. struct f2fs_lock_context lc;
  3219. struct folio *ifolio;
  3220. int err = 0;
  3221. f2fs_map_lock(sbi, &lc, F2FS_GET_BLOCK_PRE_AIO);
  3222. ifolio = f2fs_get_inode_folio(sbi, inode->i_ino);
  3223. if (IS_ERR(ifolio)) {
  3224. err = PTR_ERR(ifolio);
  3225. goto unlock_out;
  3226. }
  3227. set_new_dnode(&dn, inode, ifolio, ifolio, 0);
  3228. if (!f2fs_lookup_read_extent_cache_block(dn.inode, index,
  3229. &dn.data_blkaddr))
  3230. err = f2fs_reserve_block(&dn, index);
  3231. *blk_addr = dn.data_blkaddr;
  3232. *node_changed = dn.node_changed;
  3233. f2fs_put_dnode(&dn);
  3234. unlock_out:
  3235. f2fs_map_unlock(sbi, &lc, F2FS_GET_BLOCK_PRE_AIO);
  3236. return err;
  3237. }
  3238. static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
  3239. struct folio *folio, loff_t pos, unsigned int len,
  3240. block_t *blk_addr, bool *node_changed, bool *use_cow)
  3241. {
  3242. struct inode *inode = folio->mapping->host;
  3243. struct inode *cow_inode = F2FS_I(inode)->cow_inode;
  3244. pgoff_t index = folio->index;
  3245. int err = 0;
  3246. block_t ori_blk_addr = NULL_ADDR;
  3247. /* If pos is beyond the end of file, reserve a new block in COW inode */
  3248. if ((pos & PAGE_MASK) >= i_size_read(inode))
  3249. goto reserve_block;
  3250. /* Look for the block in COW inode first */
  3251. err = __find_data_block(cow_inode, index, blk_addr);
  3252. if (err) {
  3253. return err;
  3254. } else if (*blk_addr != NULL_ADDR) {
  3255. *use_cow = true;
  3256. return 0;
  3257. }
  3258. if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE))
  3259. goto reserve_block;
  3260. /* Look for the block in the original inode */
  3261. err = __find_data_block(inode, index, &ori_blk_addr);
  3262. if (err)
  3263. return err;
  3264. reserve_block:
  3265. /* Finally, we should reserve a new block in COW inode for the update */
  3266. err = __reserve_data_block(cow_inode, index, blk_addr, node_changed);
  3267. if (err)
  3268. return err;
  3269. inc_atomic_write_cnt(inode);
  3270. if (ori_blk_addr != NULL_ADDR)
  3271. *blk_addr = ori_blk_addr;
  3272. return 0;
  3273. }
  3274. static int f2fs_write_begin(const struct kiocb *iocb,
  3275. struct address_space *mapping,
  3276. loff_t pos, unsigned len, struct folio **foliop,
  3277. void **fsdata)
  3278. {
  3279. struct inode *inode = mapping->host;
  3280. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3281. struct folio *folio;
  3282. pgoff_t index = pos >> PAGE_SHIFT;
  3283. bool need_balance = false;
  3284. bool use_cow = false;
  3285. block_t blkaddr = NULL_ADDR;
  3286. int err = 0;
  3287. trace_f2fs_write_begin(inode, pos, len);
  3288. if (!f2fs_is_checkpoint_ready(sbi)) {
  3289. err = -ENOSPC;
  3290. goto fail;
  3291. }
  3292. /*
  3293. * We should check this at this moment to avoid deadlock on inode page
  3294. * and #0 page. The locking rule for inline_data conversion should be:
  3295. * folio_lock(folio #0) -> folio_lock(inode_page)
  3296. */
  3297. if (index != 0) {
  3298. err = f2fs_convert_inline_inode(inode);
  3299. if (err)
  3300. goto fail;
  3301. }
  3302. #ifdef CONFIG_F2FS_FS_COMPRESSION
  3303. if (f2fs_compressed_file(inode)) {
  3304. int ret;
  3305. struct page *page;
  3306. *fsdata = NULL;
  3307. if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
  3308. goto repeat;
  3309. ret = f2fs_prepare_compress_overwrite(inode, &page,
  3310. index, fsdata);
  3311. if (ret < 0) {
  3312. err = ret;
  3313. goto fail;
  3314. } else if (ret) {
  3315. *foliop = page_folio(page);
  3316. return 0;
  3317. }
  3318. }
  3319. #endif
  3320. repeat:
  3321. /*
  3322. * Do not use FGP_STABLE to avoid deadlock.
  3323. * Will wait that below with our IO control.
  3324. */
  3325. folio = f2fs_filemap_get_folio(mapping, index,
  3326. FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_NOFS,
  3327. mapping_gfp_mask(mapping));
  3328. if (IS_ERR(folio)) {
  3329. err = PTR_ERR(folio);
  3330. goto fail;
  3331. }
  3332. /* TODO: cluster can be compressed due to race with .writepage */
  3333. *foliop = folio;
  3334. if (f2fs_is_atomic_file(inode))
  3335. err = prepare_atomic_write_begin(sbi, folio, pos, len,
  3336. &blkaddr, &need_balance, &use_cow);
  3337. else
  3338. err = prepare_write_begin(sbi, folio, pos, len,
  3339. &blkaddr, &need_balance);
  3340. if (err)
  3341. goto put_folio;
  3342. if (need_balance && !IS_NOQUOTA(inode) &&
  3343. has_not_enough_free_secs(sbi, 0, 0)) {
  3344. folio_unlock(folio);
  3345. f2fs_balance_fs(sbi, true);
  3346. folio_lock(folio);
  3347. if (folio->mapping != mapping) {
  3348. /* The folio got truncated from under us */
  3349. folio_unlock(folio);
  3350. folio_put(folio);
  3351. goto repeat;
  3352. }
  3353. }
  3354. f2fs_folio_wait_writeback(folio, DATA, false, true);
  3355. if (len == folio_size(folio) || folio_test_uptodate(folio))
  3356. return 0;
  3357. if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
  3358. !f2fs_verity_in_progress(inode)) {
  3359. folio_zero_segment(folio, len, folio_size(folio));
  3360. return 0;
  3361. }
  3362. if (blkaddr == NEW_ADDR) {
  3363. folio_zero_segment(folio, 0, folio_size(folio));
  3364. folio_mark_uptodate(folio);
  3365. } else {
  3366. if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
  3367. DATA_GENERIC_ENHANCE_READ)) {
  3368. err = -EFSCORRUPTED;
  3369. goto put_folio;
  3370. }
  3371. f2fs_submit_page_read(use_cow ? F2FS_I(inode)->cow_inode :
  3372. inode,
  3373. NULL, /* can't write to fsverity files */
  3374. folio, blkaddr, 0, true);
  3375. folio_lock(folio);
  3376. if (unlikely(folio->mapping != mapping)) {
  3377. folio_unlock(folio);
  3378. folio_put(folio);
  3379. goto repeat;
  3380. }
  3381. if (unlikely(!folio_test_uptodate(folio))) {
  3382. err = -EIO;
  3383. goto put_folio;
  3384. }
  3385. }
  3386. return 0;
  3387. put_folio:
  3388. f2fs_folio_put(folio, true);
  3389. fail:
  3390. f2fs_write_failed(inode, pos + len);
  3391. return err;
  3392. }
  3393. static int f2fs_write_end(const struct kiocb *iocb,
  3394. struct address_space *mapping,
  3395. loff_t pos, unsigned len, unsigned copied,
  3396. struct folio *folio, void *fsdata)
  3397. {
  3398. struct inode *inode = folio->mapping->host;
  3399. trace_f2fs_write_end(inode, pos, len, copied);
  3400. /*
  3401. * This should be come from len == PAGE_SIZE, and we expect copied
  3402. * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
  3403. * let generic_perform_write() try to copy data again through copied=0.
  3404. */
  3405. if (!folio_test_uptodate(folio)) {
  3406. if (unlikely(copied != len))
  3407. copied = 0;
  3408. else
  3409. folio_mark_uptodate(folio);
  3410. }
  3411. #ifdef CONFIG_F2FS_FS_COMPRESSION
  3412. /* overwrite compressed file */
  3413. if (f2fs_compressed_file(inode) && fsdata) {
  3414. f2fs_compress_write_end(inode, fsdata, folio->index, copied);
  3415. f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
  3416. if (pos + copied > i_size_read(inode) &&
  3417. !f2fs_verity_in_progress(inode))
  3418. f2fs_i_size_write(inode, pos + copied);
  3419. return copied;
  3420. }
  3421. #endif
  3422. if (!copied)
  3423. goto unlock_out;
  3424. folio_mark_dirty(folio);
  3425. if (f2fs_is_atomic_file(inode))
  3426. folio_set_f2fs_atomic(folio);
  3427. if (pos + copied > i_size_read(inode) &&
  3428. !f2fs_verity_in_progress(inode)) {
  3429. f2fs_i_size_write(inode, pos + copied);
  3430. if (f2fs_is_atomic_file(inode))
  3431. f2fs_i_size_write(F2FS_I(inode)->cow_inode,
  3432. pos + copied);
  3433. }
  3434. unlock_out:
  3435. f2fs_folio_put(folio, true);
  3436. f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
  3437. return copied;
  3438. }
  3439. void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
  3440. {
  3441. struct inode *inode = folio->mapping->host;
  3442. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3443. if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
  3444. (offset || length != folio_size(folio)))
  3445. return;
  3446. if (folio_test_dirty(folio)) {
  3447. if (inode->i_ino == F2FS_META_INO(sbi)) {
  3448. dec_page_count(sbi, F2FS_DIRTY_META);
  3449. } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
  3450. dec_page_count(sbi, F2FS_DIRTY_NODES);
  3451. } else {
  3452. inode_dec_dirty_pages(inode);
  3453. f2fs_remove_dirty_inode(inode);
  3454. }
  3455. }
  3456. if (offset || length != folio_size(folio))
  3457. return;
  3458. folio_cancel_dirty(folio);
  3459. ffs_detach_free(folio);
  3460. }
  3461. bool f2fs_release_folio(struct folio *folio, gfp_t wait)
  3462. {
  3463. /* If this is dirty folio, keep private data */
  3464. if (folio_test_dirty(folio))
  3465. return false;
  3466. ffs_detach_free(folio);
  3467. return true;
  3468. }
  3469. static bool f2fs_dirty_data_folio(struct address_space *mapping,
  3470. struct folio *folio)
  3471. {
  3472. struct inode *inode = mapping->host;
  3473. trace_f2fs_set_page_dirty(folio, DATA);
  3474. if (!folio_test_uptodate(folio))
  3475. folio_mark_uptodate(folio);
  3476. BUG_ON(folio_test_swapcache(folio));
  3477. if (filemap_dirty_folio(mapping, folio)) {
  3478. f2fs_update_dirty_folio(inode, folio);
  3479. return true;
  3480. }
  3481. return false;
  3482. }
  3483. static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
  3484. {
  3485. #ifdef CONFIG_F2FS_FS_COMPRESSION
  3486. struct dnode_of_data dn;
  3487. sector_t start_idx, blknr = 0;
  3488. int ret;
  3489. start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
  3490. set_new_dnode(&dn, inode, NULL, NULL, 0);
  3491. ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
  3492. if (ret)
  3493. return 0;
  3494. if (dn.data_blkaddr != COMPRESS_ADDR) {
  3495. dn.ofs_in_node += block - start_idx;
  3496. blknr = f2fs_data_blkaddr(&dn);
  3497. if (!__is_valid_data_blkaddr(blknr))
  3498. blknr = 0;
  3499. }
  3500. f2fs_put_dnode(&dn);
  3501. return blknr;
  3502. #else
  3503. return 0;
  3504. #endif
  3505. }
  3506. static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
  3507. {
  3508. struct inode *inode = mapping->host;
  3509. sector_t blknr = 0;
  3510. if (f2fs_has_inline_data(inode))
  3511. goto out;
  3512. /* make sure allocating whole blocks */
  3513. if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
  3514. filemap_write_and_wait(mapping);
  3515. /* Block number less than F2FS MAX BLOCKS */
  3516. if (unlikely(block >= max_file_blocks(inode)))
  3517. goto out;
  3518. if (f2fs_compressed_file(inode)) {
  3519. blknr = f2fs_bmap_compress(inode, block);
  3520. } else {
  3521. struct f2fs_map_blocks map;
  3522. memset(&map, 0, sizeof(map));
  3523. map.m_lblk = block;
  3524. map.m_len = 1;
  3525. map.m_next_pgofs = NULL;
  3526. map.m_seg_type = NO_CHECK_TYPE;
  3527. if (!f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_BMAP))
  3528. blknr = map.m_pblk;
  3529. }
  3530. out:
  3531. trace_f2fs_bmap(inode, block, blknr);
  3532. return blknr;
  3533. }
  3534. #ifdef CONFIG_SWAP
  3535. static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
  3536. unsigned int blkcnt)
  3537. {
  3538. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3539. unsigned int blkofs;
  3540. unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
  3541. unsigned int end_blk = start_blk + blkcnt - 1;
  3542. unsigned int secidx = start_blk / blk_per_sec;
  3543. unsigned int end_sec;
  3544. int ret = 0;
  3545. if (!blkcnt)
  3546. return 0;
  3547. end_sec = end_blk / blk_per_sec;
  3548. f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
  3549. filemap_invalidate_lock(inode->i_mapping);
  3550. set_inode_flag(inode, FI_ALIGNED_WRITE);
  3551. set_inode_flag(inode, FI_OPU_WRITE);
  3552. for (; secidx <= end_sec; secidx++) {
  3553. unsigned int blkofs_end = secidx == end_sec ?
  3554. end_blk % blk_per_sec : blk_per_sec - 1;
  3555. f2fs_down_write(&sbi->pin_sem);
  3556. ret = f2fs_allocate_pinning_section(sbi);
  3557. if (ret) {
  3558. f2fs_up_write(&sbi->pin_sem);
  3559. break;
  3560. }
  3561. set_inode_flag(inode, FI_SKIP_WRITES);
  3562. for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
  3563. struct folio *folio;
  3564. unsigned int blkidx = secidx * blk_per_sec + blkofs;
  3565. folio = f2fs_get_lock_data_folio(inode, blkidx, true);
  3566. if (IS_ERR(folio)) {
  3567. f2fs_up_write(&sbi->pin_sem);
  3568. ret = PTR_ERR(folio);
  3569. goto done;
  3570. }
  3571. folio_mark_dirty(folio);
  3572. f2fs_folio_put(folio, true);
  3573. }
  3574. clear_inode_flag(inode, FI_SKIP_WRITES);
  3575. ret = filemap_fdatawrite(inode->i_mapping);
  3576. f2fs_up_write(&sbi->pin_sem);
  3577. if (ret)
  3578. break;
  3579. }
  3580. done:
  3581. clear_inode_flag(inode, FI_SKIP_WRITES);
  3582. clear_inode_flag(inode, FI_OPU_WRITE);
  3583. clear_inode_flag(inode, FI_ALIGNED_WRITE);
  3584. filemap_invalidate_unlock(inode->i_mapping);
  3585. f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
  3586. return ret;
  3587. }
  3588. static int check_swap_activate(struct swap_info_struct *sis,
  3589. struct file *swap_file, sector_t *span)
  3590. {
  3591. struct address_space *mapping = swap_file->f_mapping;
  3592. struct inode *inode = mapping->host;
  3593. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3594. block_t cur_lblock;
  3595. block_t last_lblock;
  3596. block_t pblock;
  3597. block_t lowest_pblock = -1;
  3598. block_t highest_pblock = 0;
  3599. int nr_extents = 0;
  3600. unsigned int nr_pblocks;
  3601. unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
  3602. unsigned int not_aligned = 0;
  3603. int ret = 0;
  3604. /*
  3605. * Map all the blocks into the extent list. This code doesn't try
  3606. * to be very smart.
  3607. */
  3608. cur_lblock = 0;
  3609. last_lblock = F2FS_BYTES_TO_BLK(i_size_read(inode));
  3610. while (cur_lblock < last_lblock && cur_lblock < sis->max) {
  3611. struct f2fs_map_blocks map;
  3612. bool last_extent = false;
  3613. retry:
  3614. cond_resched();
  3615. memset(&map, 0, sizeof(map));
  3616. map.m_lblk = cur_lblock;
  3617. map.m_len = last_lblock - cur_lblock;
  3618. map.m_next_pgofs = NULL;
  3619. map.m_next_extent = NULL;
  3620. map.m_seg_type = NO_CHECK_TYPE;
  3621. map.m_may_create = false;
  3622. ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
  3623. if (ret)
  3624. goto out;
  3625. /* hole */
  3626. if (!(map.m_flags & F2FS_MAP_FLAGS)) {
  3627. f2fs_err(sbi, "Swapfile has holes");
  3628. ret = -EINVAL;
  3629. goto out;
  3630. }
  3631. pblock = map.m_pblk;
  3632. nr_pblocks = map.m_len;
  3633. if (!last_extent &&
  3634. ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec ||
  3635. nr_pblocks % blks_per_sec ||
  3636. f2fs_is_sequential_zone_area(sbi, pblock))) {
  3637. not_aligned++;
  3638. nr_pblocks = roundup(nr_pblocks, blks_per_sec);
  3639. if (cur_lblock + nr_pblocks > sis->max)
  3640. nr_pblocks -= blks_per_sec;
  3641. /* this extent is last one */
  3642. if (!nr_pblocks) {
  3643. nr_pblocks = last_lblock - cur_lblock;
  3644. last_extent = true;
  3645. }
  3646. ret = f2fs_migrate_blocks(inode, cur_lblock,
  3647. nr_pblocks);
  3648. if (ret) {
  3649. if (ret == -ENOENT)
  3650. ret = -EINVAL;
  3651. goto out;
  3652. }
  3653. /* lookup block mapping info after block migration */
  3654. goto retry;
  3655. }
  3656. if (cur_lblock + nr_pblocks >= sis->max)
  3657. nr_pblocks = sis->max - cur_lblock;
  3658. if (cur_lblock) { /* exclude the header page */
  3659. if (pblock < lowest_pblock)
  3660. lowest_pblock = pblock;
  3661. if (pblock + nr_pblocks - 1 > highest_pblock)
  3662. highest_pblock = pblock + nr_pblocks - 1;
  3663. }
  3664. /*
  3665. * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
  3666. */
  3667. ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
  3668. if (ret < 0)
  3669. goto out;
  3670. nr_extents += ret;
  3671. cur_lblock += nr_pblocks;
  3672. }
  3673. ret = nr_extents;
  3674. *span = 1 + highest_pblock - lowest_pblock;
  3675. if (cur_lblock == 0)
  3676. cur_lblock = 1; /* force Empty message */
  3677. sis->max = cur_lblock;
  3678. sis->pages = cur_lblock - 1;
  3679. out:
  3680. if (not_aligned)
  3681. f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)",
  3682. not_aligned, blks_per_sec * F2FS_BLKSIZE);
  3683. return ret;
  3684. }
  3685. static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
  3686. sector_t *span)
  3687. {
  3688. struct inode *inode = file_inode(file);
  3689. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  3690. int ret;
  3691. if (!S_ISREG(inode->i_mode))
  3692. return -EINVAL;
  3693. if (f2fs_readonly(sbi->sb))
  3694. return -EROFS;
  3695. if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
  3696. f2fs_err(sbi, "Swapfile not supported in LFS mode");
  3697. return -EINVAL;
  3698. }
  3699. ret = f2fs_convert_inline_inode(inode);
  3700. if (ret)
  3701. return ret;
  3702. if (!f2fs_disable_compressed_file(inode))
  3703. return -EINVAL;
  3704. ret = filemap_fdatawrite(inode->i_mapping);
  3705. if (ret < 0)
  3706. return ret;
  3707. f2fs_precache_extents(inode);
  3708. ret = check_swap_activate(sis, file, span);
  3709. if (ret < 0)
  3710. return ret;
  3711. stat_inc_swapfile_inode(inode);
  3712. set_inode_flag(inode, FI_PIN_FILE);
  3713. f2fs_update_time(sbi, REQ_TIME);
  3714. return ret;
  3715. }
  3716. static void f2fs_swap_deactivate(struct file *file)
  3717. {
  3718. struct inode *inode = file_inode(file);
  3719. stat_dec_swapfile_inode(inode);
  3720. clear_inode_flag(inode, FI_PIN_FILE);
  3721. }
  3722. #else
  3723. static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
  3724. sector_t *span)
  3725. {
  3726. return -EOPNOTSUPP;
  3727. }
  3728. static void f2fs_swap_deactivate(struct file *file)
  3729. {
  3730. }
  3731. #endif
  3732. const struct address_space_operations f2fs_dblock_aops = {
  3733. .read_folio = f2fs_read_data_folio,
  3734. .readahead = f2fs_readahead,
  3735. .writepages = f2fs_write_data_pages,
  3736. .write_begin = f2fs_write_begin,
  3737. .write_end = f2fs_write_end,
  3738. .dirty_folio = f2fs_dirty_data_folio,
  3739. .migrate_folio = filemap_migrate_folio,
  3740. .invalidate_folio = f2fs_invalidate_folio,
  3741. .release_folio = f2fs_release_folio,
  3742. .bmap = f2fs_bmap,
  3743. .swap_activate = f2fs_swap_activate,
  3744. .swap_deactivate = f2fs_swap_deactivate,
  3745. };
  3746. void f2fs_clear_page_cache_dirty_tag(struct folio *folio)
  3747. {
  3748. struct address_space *mapping = folio->mapping;
  3749. unsigned long flags;
  3750. xa_lock_irqsave(&mapping->i_pages, flags);
  3751. __xa_clear_mark(&mapping->i_pages, folio->index,
  3752. PAGECACHE_TAG_DIRTY);
  3753. xa_unlock_irqrestore(&mapping->i_pages, flags);
  3754. }
  3755. int __init f2fs_init_post_read_processing(void)
  3756. {
  3757. bio_post_read_ctx_cache =
  3758. kmem_cache_create("f2fs_bio_post_read_ctx",
  3759. sizeof(struct bio_post_read_ctx), 0, 0, NULL);
  3760. if (!bio_post_read_ctx_cache)
  3761. goto fail;
  3762. bio_post_read_ctx_pool =
  3763. mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
  3764. bio_post_read_ctx_cache);
  3765. if (!bio_post_read_ctx_pool)
  3766. goto fail_free_cache;
  3767. return 0;
  3768. fail_free_cache:
  3769. kmem_cache_destroy(bio_post_read_ctx_cache);
  3770. fail:
  3771. return -ENOMEM;
  3772. }
  3773. void f2fs_destroy_post_read_processing(void)
  3774. {
  3775. mempool_destroy(bio_post_read_ctx_pool);
  3776. kmem_cache_destroy(bio_post_read_ctx_cache);
  3777. }
  3778. int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
  3779. {
  3780. if (!f2fs_sb_has_encrypt(sbi) &&
  3781. !f2fs_sb_has_verity(sbi) &&
  3782. !f2fs_sb_has_compression(sbi))
  3783. return 0;
  3784. sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
  3785. WQ_UNBOUND | WQ_HIGHPRI,
  3786. num_online_cpus());
  3787. return sbi->post_read_wq ? 0 : -ENOMEM;
  3788. }
  3789. void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
  3790. {
  3791. if (sbi->post_read_wq)
  3792. destroy_workqueue(sbi->post_read_wq);
  3793. }
  3794. int __init f2fs_init_bio_entry_cache(void)
  3795. {
  3796. bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
  3797. sizeof(struct bio_entry));
  3798. if (!bio_entry_slab)
  3799. return -ENOMEM;
  3800. ffs_entry_slab = f2fs_kmem_cache_create("f2fs_ffs_slab",
  3801. sizeof(struct f2fs_folio_state));
  3802. if (!ffs_entry_slab) {
  3803. kmem_cache_destroy(bio_entry_slab);
  3804. return -ENOMEM;
  3805. }
  3806. return 0;
  3807. }
  3808. void f2fs_destroy_bio_entry_cache(void)
  3809. {
  3810. kmem_cache_destroy(bio_entry_slab);
  3811. kmem_cache_destroy(ffs_entry_slab);
  3812. }
  3813. static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
  3814. unsigned int flags, struct iomap *iomap,
  3815. struct iomap *srcmap)
  3816. {
  3817. struct f2fs_map_blocks map = { NULL, };
  3818. pgoff_t next_pgofs = 0;
  3819. int err;
  3820. map.m_lblk = F2FS_BYTES_TO_BLK(offset);
  3821. map.m_len = F2FS_BYTES_TO_BLK(offset + length - 1) - map.m_lblk + 1;
  3822. map.m_next_pgofs = &next_pgofs;
  3823. map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
  3824. inode->i_write_hint);
  3825. if (flags & IOMAP_WRITE && iomap->private) {
  3826. map.m_last_pblk = (unsigned long)iomap->private;
  3827. iomap->private = NULL;
  3828. }
  3829. /*
  3830. * If the blocks being overwritten are already allocated,
  3831. * f2fs_map_lock and f2fs_balance_fs are not necessary.
  3832. */
  3833. if ((flags & IOMAP_WRITE) &&
  3834. !__f2fs_overwrite_io(inode, offset, length, true))
  3835. map.m_may_create = true;
  3836. err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO);
  3837. if (err)
  3838. return err;
  3839. iomap->offset = F2FS_BLK_TO_BYTES(map.m_lblk);
  3840. /*
  3841. * When inline encryption is enabled, sometimes I/O to an encrypted file
  3842. * has to be broken up to guarantee DUN contiguity. Handle this by
  3843. * limiting the length of the mapping returned.
  3844. */
  3845. map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
  3846. /*
  3847. * We should never see delalloc or compressed extents here based on
  3848. * prior flushing and checks.
  3849. */
  3850. if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
  3851. return -EINVAL;
  3852. if (map.m_flags & F2FS_MAP_MAPPED) {
  3853. if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR))
  3854. return -EINVAL;
  3855. iomap->length = F2FS_BLK_TO_BYTES(map.m_len);
  3856. iomap->type = IOMAP_MAPPED;
  3857. iomap->flags |= IOMAP_F_MERGED;
  3858. iomap->bdev = map.m_bdev;
  3859. iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk);
  3860. if (flags & IOMAP_WRITE && map.m_last_pblk)
  3861. iomap->private = (void *)map.m_last_pblk;
  3862. } else {
  3863. if (flags & IOMAP_WRITE)
  3864. return -ENOTBLK;
  3865. if (map.m_pblk == NULL_ADDR) {
  3866. iomap->length = F2FS_BLK_TO_BYTES(next_pgofs) -
  3867. iomap->offset;
  3868. iomap->type = IOMAP_HOLE;
  3869. } else if (map.m_pblk == NEW_ADDR) {
  3870. iomap->length = F2FS_BLK_TO_BYTES(map.m_len);
  3871. iomap->type = IOMAP_UNWRITTEN;
  3872. } else {
  3873. f2fs_bug_on(F2FS_I_SB(inode), 1);
  3874. }
  3875. iomap->addr = IOMAP_NULL_ADDR;
  3876. }
  3877. if (map.m_flags & F2FS_MAP_NEW)
  3878. iomap->flags |= IOMAP_F_NEW;
  3879. if ((inode_state_read_once(inode) & I_DIRTY_DATASYNC) ||
  3880. offset + length > i_size_read(inode))
  3881. iomap->flags |= IOMAP_F_DIRTY;
  3882. return 0;
  3883. }
  3884. const struct iomap_ops f2fs_iomap_ops = {
  3885. .iomap_begin = f2fs_iomap_begin,
  3886. };