route.c 171 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Linux INET6 implementation
  4. * FIB front-end.
  5. *
  6. * Authors:
  7. * Pedro Roque <roque@di.fc.ul.pt>
  8. */
  9. /* Changes:
  10. *
  11. * YOSHIFUJI Hideaki @USAGI
  12. * reworked default router selection.
  13. * - respect outgoing interface
  14. * - select from (probably) reachable routers (i.e.
  15. * routers in REACHABLE, STALE, DELAY or PROBE states).
  16. * - always select the same router if it is (probably)
  17. * reachable. otherwise, round-robin the list.
  18. * Ville Nuorvala
  19. * Fixed routing subtrees.
  20. */
  21. #define pr_fmt(fmt) "IPv6: " fmt
  22. #include <linux/capability.h>
  23. #include <linux/errno.h>
  24. #include <linux/export.h>
  25. #include <linux/types.h>
  26. #include <linux/times.h>
  27. #include <linux/socket.h>
  28. #include <linux/sockios.h>
  29. #include <linux/net.h>
  30. #include <linux/route.h>
  31. #include <linux/netdevice.h>
  32. #include <linux/in6.h>
  33. #include <linux/mroute6.h>
  34. #include <linux/init.h>
  35. #include <linux/if_arp.h>
  36. #include <linux/proc_fs.h>
  37. #include <linux/seq_file.h>
  38. #include <linux/nsproxy.h>
  39. #include <linux/slab.h>
  40. #include <linux/jhash.h>
  41. #include <linux/siphash.h>
  42. #include <net/net_namespace.h>
  43. #include <net/snmp.h>
  44. #include <net/ipv6.h>
  45. #include <net/ip6_fib.h>
  46. #include <net/ip6_route.h>
  47. #include <net/ndisc.h>
  48. #include <net/addrconf.h>
  49. #include <net/tcp.h>
  50. #include <linux/rtnetlink.h>
  51. #include <net/dst.h>
  52. #include <net/dst_metadata.h>
  53. #include <net/xfrm.h>
  54. #include <net/netevent.h>
  55. #include <net/netlink.h>
  56. #include <net/rtnh.h>
  57. #include <net/lwtunnel.h>
  58. #include <net/ip_tunnels.h>
  59. #include <net/l3mdev.h>
  60. #include <net/ip.h>
  61. #include <linux/uaccess.h>
  62. #include <linux/btf_ids.h>
  63. #ifdef CONFIG_SYSCTL
  64. #include <linux/sysctl.h>
  65. #endif
  66. static int ip6_rt_type_to_error(u8 fib6_type);
  67. #define CREATE_TRACE_POINTS
  68. #include <trace/events/fib6.h>
  69. EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
  70. #undef CREATE_TRACE_POINTS
  71. enum rt6_nud_state {
  72. RT6_NUD_FAIL_HARD = -3,
  73. RT6_NUD_FAIL_PROBE = -2,
  74. RT6_NUD_FAIL_DO_RR = -1,
  75. RT6_NUD_SUCCEED = 1
  76. };
  77. INDIRECT_CALLABLE_SCOPE
  78. struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
  79. static unsigned int ip6_default_advmss(const struct dst_entry *dst);
  80. INDIRECT_CALLABLE_SCOPE
  81. unsigned int ip6_mtu(const struct dst_entry *dst);
  82. static void ip6_negative_advice(struct sock *sk,
  83. struct dst_entry *dst);
  84. static void ip6_dst_destroy(struct dst_entry *);
  85. static void ip6_dst_ifdown(struct dst_entry *,
  86. struct net_device *dev);
  87. static void ip6_dst_gc(struct dst_ops *ops);
  88. static int ip6_pkt_discard(struct sk_buff *skb);
  89. static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  90. static int ip6_pkt_prohibit(struct sk_buff *skb);
  91. static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  92. static void ip6_link_failure(struct sk_buff *skb);
  93. static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  94. struct sk_buff *skb, u32 mtu,
  95. bool confirm_neigh);
  96. static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  97. struct sk_buff *skb);
  98. static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
  99. int strict);
  100. static size_t rt6_nlmsg_size(struct fib6_info *f6i);
  101. static int rt6_fill_node(struct net *net, struct sk_buff *skb,
  102. struct fib6_info *rt, struct dst_entry *dst,
  103. struct in6_addr *dest, struct in6_addr *src,
  104. int iif, int type, u32 portid, u32 seq,
  105. unsigned int flags);
  106. static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
  107. const struct in6_addr *daddr,
  108. const struct in6_addr *saddr);
  109. #ifdef CONFIG_IPV6_ROUTE_INFO
  110. static struct fib6_info *rt6_add_route_info(struct net *net,
  111. const struct in6_addr *prefix, int prefixlen,
  112. const struct in6_addr *gwaddr,
  113. struct net_device *dev,
  114. unsigned int pref);
  115. static struct fib6_info *rt6_get_route_info(struct net *net,
  116. const struct in6_addr *prefix, int prefixlen,
  117. const struct in6_addr *gwaddr,
  118. struct net_device *dev);
  119. #endif
  120. struct uncached_list {
  121. spinlock_t lock;
  122. struct list_head head;
  123. };
  124. static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
  125. void rt6_uncached_list_add(struct rt6_info *rt)
  126. {
  127. struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
  128. rt->dst.rt_uncached_list = ul;
  129. spin_lock_bh(&ul->lock);
  130. list_add_tail(&rt->dst.rt_uncached, &ul->head);
  131. spin_unlock_bh(&ul->lock);
  132. }
  133. void rt6_uncached_list_del(struct rt6_info *rt)
  134. {
  135. struct uncached_list *ul = rt->dst.rt_uncached_list;
  136. if (ul) {
  137. spin_lock_bh(&ul->lock);
  138. list_del_init(&rt->dst.rt_uncached);
  139. spin_unlock_bh(&ul->lock);
  140. }
  141. }
  142. static void rt6_uncached_list_flush_dev(struct net_device *dev)
  143. {
  144. int cpu;
  145. for_each_possible_cpu(cpu) {
  146. struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
  147. struct rt6_info *rt, *safe;
  148. if (list_empty(&ul->head))
  149. continue;
  150. spin_lock_bh(&ul->lock);
  151. list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) {
  152. struct inet6_dev *rt_idev = rt->rt6i_idev;
  153. struct net_device *rt_dev = rt->dst.dev;
  154. bool handled = false;
  155. if (rt_idev && rt_idev->dev == dev) {
  156. rt->rt6i_idev = in6_dev_get(blackhole_netdev);
  157. in6_dev_put(rt_idev);
  158. handled = true;
  159. }
  160. if (rt_dev == dev) {
  161. rt->dst.dev = blackhole_netdev;
  162. netdev_ref_replace(rt_dev, blackhole_netdev,
  163. &rt->dst.dev_tracker,
  164. GFP_ATOMIC);
  165. handled = true;
  166. }
  167. if (handled)
  168. list_del_init(&rt->dst.rt_uncached);
  169. }
  170. spin_unlock_bh(&ul->lock);
  171. }
  172. }
  173. static inline const void *choose_neigh_daddr(const struct in6_addr *p,
  174. struct sk_buff *skb,
  175. const void *daddr)
  176. {
  177. if (!ipv6_addr_any(p))
  178. return (const void *) p;
  179. else if (skb)
  180. return &ipv6_hdr(skb)->daddr;
  181. return daddr;
  182. }
  183. struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
  184. struct net_device *dev,
  185. struct sk_buff *skb,
  186. const void *daddr)
  187. {
  188. struct neighbour *n;
  189. daddr = choose_neigh_daddr(gw, skb, daddr);
  190. n = __ipv6_neigh_lookup(dev, daddr);
  191. if (n)
  192. return n;
  193. n = neigh_create(&nd_tbl, daddr, dev);
  194. return IS_ERR(n) ? NULL : n;
  195. }
  196. static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
  197. struct sk_buff *skb,
  198. const void *daddr)
  199. {
  200. const struct rt6_info *rt = dst_rt6_info(dst);
  201. return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
  202. dst_dev(dst), skb, daddr);
  203. }
  204. static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
  205. {
  206. const struct rt6_info *rt = dst_rt6_info(dst);
  207. struct net_device *dev = dst_dev(dst);
  208. daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
  209. if (!daddr)
  210. return;
  211. if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
  212. return;
  213. if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
  214. return;
  215. __ipv6_confirm_neigh(dev, daddr);
  216. }
  217. static struct dst_ops ip6_dst_ops_template = {
  218. .family = AF_INET6,
  219. .gc = ip6_dst_gc,
  220. .gc_thresh = 1024,
  221. .check = ip6_dst_check,
  222. .default_advmss = ip6_default_advmss,
  223. .mtu = ip6_mtu,
  224. .cow_metrics = dst_cow_metrics_generic,
  225. .destroy = ip6_dst_destroy,
  226. .ifdown = ip6_dst_ifdown,
  227. .negative_advice = ip6_negative_advice,
  228. .link_failure = ip6_link_failure,
  229. .update_pmtu = ip6_rt_update_pmtu,
  230. .redirect = rt6_do_redirect,
  231. .local_out = __ip6_local_out,
  232. .neigh_lookup = ip6_dst_neigh_lookup,
  233. .confirm_neigh = ip6_confirm_neigh,
  234. };
  235. static struct dst_ops ip6_dst_blackhole_ops = {
  236. .family = AF_INET6,
  237. .default_advmss = ip6_default_advmss,
  238. .neigh_lookup = ip6_dst_neigh_lookup,
  239. .check = ip6_dst_check,
  240. .destroy = ip6_dst_destroy,
  241. .cow_metrics = dst_cow_metrics_generic,
  242. .update_pmtu = dst_blackhole_update_pmtu,
  243. .redirect = dst_blackhole_redirect,
  244. .mtu = dst_blackhole_mtu,
  245. };
  246. static const u32 ip6_template_metrics[RTAX_MAX] = {
  247. [RTAX_HOPLIMIT - 1] = 0,
  248. };
  249. static const struct fib6_info fib6_null_entry_template = {
  250. .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
  251. .fib6_protocol = RTPROT_KERNEL,
  252. .fib6_metric = ~(u32)0,
  253. .fib6_ref = REFCOUNT_INIT(1),
  254. .fib6_type = RTN_UNREACHABLE,
  255. .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
  256. };
  257. static const struct rt6_info ip6_null_entry_template = {
  258. .dst = {
  259. .__rcuref = RCUREF_INIT(1),
  260. .__use = 1,
  261. .obsolete = DST_OBSOLETE_FORCE_CHK,
  262. .error = -ENETUNREACH,
  263. .input = ip6_pkt_discard,
  264. .output = ip6_pkt_discard_out,
  265. },
  266. .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
  267. };
  268. #ifdef CONFIG_IPV6_MULTIPLE_TABLES
  269. static const struct rt6_info ip6_prohibit_entry_template = {
  270. .dst = {
  271. .__rcuref = RCUREF_INIT(1),
  272. .__use = 1,
  273. .obsolete = DST_OBSOLETE_FORCE_CHK,
  274. .error = -EACCES,
  275. .input = ip6_pkt_prohibit,
  276. .output = ip6_pkt_prohibit_out,
  277. },
  278. .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
  279. };
  280. static const struct rt6_info ip6_blk_hole_entry_template = {
  281. .dst = {
  282. .__rcuref = RCUREF_INIT(1),
  283. .__use = 1,
  284. .obsolete = DST_OBSOLETE_FORCE_CHK,
  285. .error = -EINVAL,
  286. .input = dst_discard,
  287. .output = dst_discard_out,
  288. },
  289. .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
  290. };
  291. #endif
  292. static void rt6_info_init(struct rt6_info *rt)
  293. {
  294. memset_after(rt, 0, dst);
  295. }
  296. /* allocate dst with ip6_dst_ops */
  297. struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
  298. int flags)
  299. {
  300. struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
  301. DST_OBSOLETE_FORCE_CHK, flags);
  302. if (rt) {
  303. rt6_info_init(rt);
  304. atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
  305. }
  306. return rt;
  307. }
  308. EXPORT_SYMBOL(ip6_dst_alloc);
  309. static void ip6_dst_destroy(struct dst_entry *dst)
  310. {
  311. struct rt6_info *rt = dst_rt6_info(dst);
  312. struct fib6_info *from;
  313. struct inet6_dev *idev;
  314. ip_dst_metrics_put(dst);
  315. rt6_uncached_list_del(rt);
  316. idev = rt->rt6i_idev;
  317. if (idev) {
  318. rt->rt6i_idev = NULL;
  319. in6_dev_put(idev);
  320. }
  321. from = unrcu_pointer(xchg(&rt->from, NULL));
  322. fib6_info_release(from);
  323. }
  324. static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
  325. {
  326. struct rt6_info *rt = dst_rt6_info(dst);
  327. struct inet6_dev *idev = rt->rt6i_idev;
  328. struct fib6_info *from;
  329. if (idev && idev->dev != blackhole_netdev) {
  330. struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
  331. if (blackhole_idev) {
  332. rt->rt6i_idev = blackhole_idev;
  333. in6_dev_put(idev);
  334. }
  335. }
  336. from = unrcu_pointer(xchg(&rt->from, NULL));
  337. fib6_info_release(from);
  338. }
  339. static bool __rt6_check_expired(const struct rt6_info *rt)
  340. {
  341. if (rt->rt6i_flags & RTF_EXPIRES)
  342. return time_after(jiffies, READ_ONCE(rt->dst.expires));
  343. return false;
  344. }
  345. static bool rt6_check_expired(const struct rt6_info *rt)
  346. {
  347. struct fib6_info *from;
  348. from = rcu_dereference(rt->from);
  349. if (rt->rt6i_flags & RTF_EXPIRES) {
  350. if (time_after(jiffies, READ_ONCE(rt->dst.expires)))
  351. return true;
  352. } else if (from) {
  353. return READ_ONCE(rt->dst.obsolete) != DST_OBSOLETE_FORCE_CHK ||
  354. fib6_check_expired(from);
  355. }
  356. return false;
  357. }
  358. static struct fib6_info *
  359. rt6_multipath_first_sibling_rcu(const struct fib6_info *rt)
  360. {
  361. struct fib6_info *iter;
  362. struct fib6_node *fn;
  363. fn = rcu_dereference(rt->fib6_node);
  364. if (!fn)
  365. goto out;
  366. iter = rcu_dereference(fn->leaf);
  367. if (!iter)
  368. goto out;
  369. while (iter) {
  370. if (iter->fib6_metric == rt->fib6_metric &&
  371. rt6_qualify_for_ecmp(iter))
  372. return iter;
  373. iter = rcu_dereference(iter->fib6_next);
  374. }
  375. out:
  376. return NULL;
  377. }
  378. void fib6_select_path(const struct net *net, struct fib6_result *res,
  379. struct flowi6 *fl6, int oif, bool have_oif_match,
  380. const struct sk_buff *skb, int strict)
  381. {
  382. struct fib6_info *first, *match = res->f6i;
  383. struct fib6_info *sibling;
  384. int hash;
  385. if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
  386. goto out;
  387. if (match->nh && have_oif_match && res->nh)
  388. return;
  389. if (skb)
  390. IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
  391. /* We might have already computed the hash for ICMPv6 errors. In such
  392. * case it will always be non-zero. Otherwise now is the time to do it.
  393. */
  394. if (!fl6->mp_hash &&
  395. (!match->nh || nexthop_is_multipath(match->nh)))
  396. fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
  397. if (unlikely(match->nh)) {
  398. nexthop_path_fib6_result(res, fl6->mp_hash);
  399. return;
  400. }
  401. first = rt6_multipath_first_sibling_rcu(match);
  402. if (!first)
  403. goto out;
  404. hash = fl6->mp_hash;
  405. if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound)) {
  406. if (rt6_score_route(first->fib6_nh, first->fib6_flags, oif,
  407. strict) >= 0)
  408. match = first;
  409. goto out;
  410. }
  411. list_for_each_entry_rcu(sibling, &first->fib6_siblings,
  412. fib6_siblings) {
  413. const struct fib6_nh *nh = sibling->fib6_nh;
  414. int nh_upper_bound;
  415. nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
  416. if (hash > nh_upper_bound)
  417. continue;
  418. if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
  419. break;
  420. match = sibling;
  421. break;
  422. }
  423. out:
  424. res->f6i = match;
  425. res->nh = match->fib6_nh;
  426. }
  427. /*
  428. * Route lookup. rcu_read_lock() should be held.
  429. */
  430. static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
  431. const struct in6_addr *saddr, int oif, int flags)
  432. {
  433. const struct net_device *dev;
  434. if (nh->fib_nh_flags & RTNH_F_DEAD)
  435. return false;
  436. dev = nh->fib_nh_dev;
  437. if (oif) {
  438. if (dev->ifindex == oif)
  439. return true;
  440. } else {
  441. if (ipv6_chk_addr(net, saddr, dev,
  442. flags & RT6_LOOKUP_F_IFACE))
  443. return true;
  444. }
  445. return false;
  446. }
  447. struct fib6_nh_dm_arg {
  448. struct net *net;
  449. const struct in6_addr *saddr;
  450. int oif;
  451. int flags;
  452. struct fib6_nh *nh;
  453. };
  454. static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
  455. {
  456. struct fib6_nh_dm_arg *arg = _arg;
  457. arg->nh = nh;
  458. return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
  459. arg->flags);
  460. }
  461. /* returns fib6_nh from nexthop or NULL */
  462. static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
  463. struct fib6_result *res,
  464. const struct in6_addr *saddr,
  465. int oif, int flags)
  466. {
  467. struct fib6_nh_dm_arg arg = {
  468. .net = net,
  469. .saddr = saddr,
  470. .oif = oif,
  471. .flags = flags,
  472. };
  473. if (nexthop_is_blackhole(nh))
  474. return NULL;
  475. if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
  476. return arg.nh;
  477. return NULL;
  478. }
  479. static void rt6_device_match(struct net *net, struct fib6_result *res,
  480. const struct in6_addr *saddr, int oif, int flags)
  481. {
  482. struct fib6_info *f6i = res->f6i;
  483. struct fib6_info *spf6i;
  484. struct fib6_nh *nh;
  485. if (!oif && ipv6_addr_any(saddr)) {
  486. if (unlikely(f6i->nh)) {
  487. nh = nexthop_fib6_nh(f6i->nh);
  488. if (nexthop_is_blackhole(f6i->nh))
  489. goto out_blackhole;
  490. } else {
  491. nh = f6i->fib6_nh;
  492. }
  493. if (!(nh->fib_nh_flags & RTNH_F_DEAD))
  494. goto out;
  495. }
  496. for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
  497. bool matched = false;
  498. if (unlikely(spf6i->nh)) {
  499. nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
  500. oif, flags);
  501. if (nh)
  502. matched = true;
  503. } else {
  504. nh = spf6i->fib6_nh;
  505. if (__rt6_device_match(net, nh, saddr, oif, flags))
  506. matched = true;
  507. }
  508. if (matched) {
  509. res->f6i = spf6i;
  510. goto out;
  511. }
  512. }
  513. if (oif && flags & RT6_LOOKUP_F_IFACE) {
  514. res->f6i = net->ipv6.fib6_null_entry;
  515. nh = res->f6i->fib6_nh;
  516. goto out;
  517. }
  518. if (unlikely(f6i->nh)) {
  519. nh = nexthop_fib6_nh(f6i->nh);
  520. if (nexthop_is_blackhole(f6i->nh))
  521. goto out_blackhole;
  522. } else {
  523. nh = f6i->fib6_nh;
  524. }
  525. if (nh->fib_nh_flags & RTNH_F_DEAD) {
  526. res->f6i = net->ipv6.fib6_null_entry;
  527. nh = res->f6i->fib6_nh;
  528. }
  529. out:
  530. res->nh = nh;
  531. res->fib6_type = res->f6i->fib6_type;
  532. res->fib6_flags = res->f6i->fib6_flags;
  533. return;
  534. out_blackhole:
  535. res->fib6_flags |= RTF_REJECT;
  536. res->fib6_type = RTN_BLACKHOLE;
  537. res->nh = nh;
  538. }
  539. #ifdef CONFIG_IPV6_ROUTER_PREF
  540. struct __rt6_probe_work {
  541. struct work_struct work;
  542. struct in6_addr target;
  543. struct net_device *dev;
  544. netdevice_tracker dev_tracker;
  545. };
  546. static void rt6_probe_deferred(struct work_struct *w)
  547. {
  548. struct in6_addr mcaddr;
  549. struct __rt6_probe_work *work =
  550. container_of(w, struct __rt6_probe_work, work);
  551. addrconf_addr_solict_mult(&work->target, &mcaddr);
  552. ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
  553. netdev_put(work->dev, &work->dev_tracker);
  554. kfree(work);
  555. }
  556. static void rt6_probe(struct fib6_nh *fib6_nh)
  557. {
  558. struct __rt6_probe_work *work = NULL;
  559. const struct in6_addr *nh_gw;
  560. unsigned long last_probe;
  561. struct neighbour *neigh;
  562. struct net_device *dev;
  563. struct inet6_dev *idev;
  564. /*
  565. * Okay, this does not seem to be appropriate
  566. * for now, however, we need to check if it
  567. * is really so; aka Router Reachability Probing.
  568. *
  569. * Router Reachability Probe MUST be rate-limited
  570. * to no more than one per minute.
  571. */
  572. if (!fib6_nh->fib_nh_gw_family)
  573. return;
  574. nh_gw = &fib6_nh->fib_nh_gw6;
  575. dev = fib6_nh->fib_nh_dev;
  576. rcu_read_lock();
  577. last_probe = READ_ONCE(fib6_nh->last_probe);
  578. idev = __in6_dev_get(dev);
  579. if (!idev)
  580. goto out;
  581. neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
  582. if (neigh) {
  583. if (READ_ONCE(neigh->nud_state) & NUD_VALID)
  584. goto out;
  585. write_lock_bh(&neigh->lock);
  586. if (!(neigh->nud_state & NUD_VALID) &&
  587. time_after(jiffies,
  588. neigh->updated +
  589. READ_ONCE(idev->cnf.rtr_probe_interval))) {
  590. work = kmalloc_obj(*work, GFP_ATOMIC);
  591. if (work)
  592. __neigh_set_probe_once(neigh);
  593. }
  594. write_unlock_bh(&neigh->lock);
  595. } else if (time_after(jiffies, last_probe +
  596. READ_ONCE(idev->cnf.rtr_probe_interval))) {
  597. work = kmalloc_obj(*work, GFP_ATOMIC);
  598. }
  599. if (!work || cmpxchg(&fib6_nh->last_probe,
  600. last_probe, jiffies) != last_probe) {
  601. kfree(work);
  602. } else {
  603. INIT_WORK(&work->work, rt6_probe_deferred);
  604. work->target = *nh_gw;
  605. netdev_hold(dev, &work->dev_tracker, GFP_ATOMIC);
  606. work->dev = dev;
  607. schedule_work(&work->work);
  608. }
  609. out:
  610. rcu_read_unlock();
  611. }
  612. #else
  613. static inline void rt6_probe(struct fib6_nh *fib6_nh)
  614. {
  615. }
  616. #endif
  617. /*
  618. * Default Router Selection (RFC 2461 6.3.6)
  619. */
  620. static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
  621. {
  622. enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
  623. struct neighbour *neigh;
  624. rcu_read_lock();
  625. neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
  626. &fib6_nh->fib_nh_gw6);
  627. if (neigh) {
  628. u8 nud_state = READ_ONCE(neigh->nud_state);
  629. if (nud_state & NUD_VALID)
  630. ret = RT6_NUD_SUCCEED;
  631. #ifdef CONFIG_IPV6_ROUTER_PREF
  632. else if (!(nud_state & NUD_FAILED))
  633. ret = RT6_NUD_SUCCEED;
  634. else
  635. ret = RT6_NUD_FAIL_PROBE;
  636. #endif
  637. } else {
  638. ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
  639. RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
  640. }
  641. rcu_read_unlock();
  642. return ret;
  643. }
  644. static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
  645. int strict)
  646. {
  647. int m = 0;
  648. if (!oif || nh->fib_nh_dev->ifindex == oif)
  649. m = 2;
  650. if (!m && (strict & RT6_LOOKUP_F_IFACE))
  651. return RT6_NUD_FAIL_HARD;
  652. #ifdef CONFIG_IPV6_ROUTER_PREF
  653. m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
  654. #endif
  655. if ((strict & RT6_LOOKUP_F_REACHABLE) &&
  656. !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
  657. int n = rt6_check_neigh(nh);
  658. if (n < 0)
  659. return n;
  660. }
  661. return m;
  662. }
  663. static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
  664. int oif, int strict, int *mpri, bool *do_rr)
  665. {
  666. bool match_do_rr = false;
  667. bool rc = false;
  668. int m;
  669. if (nh->fib_nh_flags & RTNH_F_DEAD)
  670. goto out;
  671. if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
  672. nh->fib_nh_flags & RTNH_F_LINKDOWN &&
  673. !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
  674. goto out;
  675. m = rt6_score_route(nh, fib6_flags, oif, strict);
  676. if (m == RT6_NUD_FAIL_DO_RR) {
  677. match_do_rr = true;
  678. m = 0; /* lowest valid score */
  679. } else if (m == RT6_NUD_FAIL_HARD) {
  680. goto out;
  681. }
  682. if (strict & RT6_LOOKUP_F_REACHABLE)
  683. rt6_probe(nh);
  684. /* note that m can be RT6_NUD_FAIL_PROBE at this point */
  685. if (m > *mpri) {
  686. *do_rr = match_do_rr;
  687. *mpri = m;
  688. rc = true;
  689. }
  690. out:
  691. return rc;
  692. }
  693. struct fib6_nh_frl_arg {
  694. u32 flags;
  695. int oif;
  696. int strict;
  697. int *mpri;
  698. bool *do_rr;
  699. struct fib6_nh *nh;
  700. };
  701. static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
  702. {
  703. struct fib6_nh_frl_arg *arg = _arg;
  704. arg->nh = nh;
  705. return find_match(nh, arg->flags, arg->oif, arg->strict,
  706. arg->mpri, arg->do_rr);
  707. }
  708. static void __find_rr_leaf(struct fib6_info *f6i_start,
  709. struct fib6_info *nomatch, u32 metric,
  710. struct fib6_result *res, struct fib6_info **cont,
  711. int oif, int strict, bool *do_rr, int *mpri)
  712. {
  713. struct fib6_info *f6i;
  714. for (f6i = f6i_start;
  715. f6i && f6i != nomatch;
  716. f6i = rcu_dereference(f6i->fib6_next)) {
  717. bool matched = false;
  718. struct fib6_nh *nh;
  719. if (cont && f6i->fib6_metric != metric) {
  720. *cont = f6i;
  721. return;
  722. }
  723. if (fib6_check_expired(f6i))
  724. continue;
  725. if (unlikely(f6i->nh)) {
  726. struct fib6_nh_frl_arg arg = {
  727. .flags = f6i->fib6_flags,
  728. .oif = oif,
  729. .strict = strict,
  730. .mpri = mpri,
  731. .do_rr = do_rr
  732. };
  733. if (nexthop_is_blackhole(f6i->nh)) {
  734. res->fib6_flags = RTF_REJECT;
  735. res->fib6_type = RTN_BLACKHOLE;
  736. res->f6i = f6i;
  737. res->nh = nexthop_fib6_nh(f6i->nh);
  738. return;
  739. }
  740. if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
  741. &arg)) {
  742. matched = true;
  743. nh = arg.nh;
  744. }
  745. } else {
  746. nh = f6i->fib6_nh;
  747. if (find_match(nh, f6i->fib6_flags, oif, strict,
  748. mpri, do_rr))
  749. matched = true;
  750. }
  751. if (matched) {
  752. res->f6i = f6i;
  753. res->nh = nh;
  754. res->fib6_flags = f6i->fib6_flags;
  755. res->fib6_type = f6i->fib6_type;
  756. }
  757. }
  758. }
  759. static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
  760. struct fib6_info *rr_head, int oif, int strict,
  761. bool *do_rr, struct fib6_result *res)
  762. {
  763. u32 metric = rr_head->fib6_metric;
  764. struct fib6_info *cont = NULL;
  765. int mpri = -1;
  766. __find_rr_leaf(rr_head, NULL, metric, res, &cont,
  767. oif, strict, do_rr, &mpri);
  768. __find_rr_leaf(leaf, rr_head, metric, res, &cont,
  769. oif, strict, do_rr, &mpri);
  770. if (res->f6i || !cont)
  771. return;
  772. __find_rr_leaf(cont, NULL, metric, res, NULL,
  773. oif, strict, do_rr, &mpri);
  774. }
  775. static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
  776. struct fib6_result *res, int strict)
  777. {
  778. struct fib6_info *leaf = rcu_dereference(fn->leaf);
  779. struct fib6_info *rt0;
  780. bool do_rr = false;
  781. int key_plen;
  782. /* make sure this function or its helpers sets f6i */
  783. res->f6i = NULL;
  784. if (!leaf || leaf == net->ipv6.fib6_null_entry)
  785. goto out;
  786. rt0 = rcu_dereference(fn->rr_ptr);
  787. if (!rt0)
  788. rt0 = leaf;
  789. /* Double check to make sure fn is not an intermediate node
  790. * and fn->leaf does not points to its child's leaf
  791. * (This might happen if all routes under fn are deleted from
  792. * the tree and fib6_repair_tree() is called on the node.)
  793. */
  794. key_plen = rt0->fib6_dst.plen;
  795. #ifdef CONFIG_IPV6_SUBTREES
  796. if (rt0->fib6_src.plen)
  797. key_plen = rt0->fib6_src.plen;
  798. #endif
  799. if (fn->fn_bit != key_plen)
  800. goto out;
  801. find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
  802. if (do_rr) {
  803. struct fib6_info *next = rcu_dereference(rt0->fib6_next);
  804. /* no entries matched; do round-robin */
  805. if (!next || next->fib6_metric != rt0->fib6_metric)
  806. next = leaf;
  807. if (next != rt0) {
  808. spin_lock_bh(&leaf->fib6_table->tb6_lock);
  809. /* make sure next is not being deleted from the tree */
  810. if (next->fib6_node)
  811. rcu_assign_pointer(fn->rr_ptr, next);
  812. spin_unlock_bh(&leaf->fib6_table->tb6_lock);
  813. }
  814. }
  815. out:
  816. if (!res->f6i) {
  817. res->f6i = net->ipv6.fib6_null_entry;
  818. res->nh = res->f6i->fib6_nh;
  819. res->fib6_flags = res->f6i->fib6_flags;
  820. res->fib6_type = res->f6i->fib6_type;
  821. }
  822. }
  823. static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
  824. {
  825. return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
  826. res->nh->fib_nh_gw_family;
  827. }
  828. #ifdef CONFIG_IPV6_ROUTE_INFO
  829. int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
  830. const struct in6_addr *gwaddr)
  831. {
  832. struct net *net = dev_net(dev);
  833. struct route_info *rinfo = (struct route_info *) opt;
  834. struct in6_addr prefix_buf, *prefix;
  835. struct fib6_table *table;
  836. unsigned int pref;
  837. unsigned long lifetime;
  838. struct fib6_info *rt;
  839. if (len < sizeof(struct route_info)) {
  840. return -EINVAL;
  841. }
  842. /* Sanity check for prefix_len and length */
  843. if (rinfo->length > 3) {
  844. return -EINVAL;
  845. } else if (rinfo->prefix_len > 128) {
  846. return -EINVAL;
  847. } else if (rinfo->prefix_len > 64) {
  848. if (rinfo->length < 2) {
  849. return -EINVAL;
  850. }
  851. } else if (rinfo->prefix_len > 0) {
  852. if (rinfo->length < 1) {
  853. return -EINVAL;
  854. }
  855. }
  856. pref = rinfo->route_pref;
  857. if (pref == ICMPV6_ROUTER_PREF_INVALID)
  858. return -EINVAL;
  859. lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
  860. if (rinfo->length == 3)
  861. prefix = (struct in6_addr *)rinfo->prefix;
  862. else {
  863. /* this function is safe */
  864. ipv6_addr_prefix(&prefix_buf,
  865. (struct in6_addr *)rinfo->prefix,
  866. rinfo->prefix_len);
  867. prefix = &prefix_buf;
  868. }
  869. if (rinfo->prefix_len == 0)
  870. rt = rt6_get_dflt_router(net, gwaddr, dev);
  871. else
  872. rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
  873. gwaddr, dev);
  874. if (rt && !lifetime) {
  875. ip6_del_rt(net, rt, false);
  876. rt = NULL;
  877. }
  878. if (!rt && lifetime)
  879. rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
  880. dev, pref);
  881. else if (rt)
  882. rt->fib6_flags = RTF_ROUTEINFO |
  883. (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
  884. if (rt) {
  885. table = rt->fib6_table;
  886. spin_lock_bh(&table->tb6_lock);
  887. if (!addrconf_finite_timeout(lifetime)) {
  888. fib6_clean_expires(rt);
  889. fib6_may_remove_gc_list(net, rt);
  890. } else {
  891. fib6_set_expires(rt, jiffies + HZ * lifetime);
  892. fib6_add_gc_list(rt);
  893. }
  894. spin_unlock_bh(&table->tb6_lock);
  895. fib6_info_release(rt);
  896. }
  897. return 0;
  898. }
  899. #endif
  900. /*
  901. * Misc support functions
  902. */
  903. /* called with rcu_lock held */
  904. static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
  905. {
  906. struct net_device *dev = res->nh->fib_nh_dev;
  907. if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
  908. /* for copies of local routes, dst->dev needs to be the
  909. * device if it is a master device, the master device if
  910. * device is enslaved, and the loopback as the default
  911. */
  912. if (netif_is_l3_slave(dev) &&
  913. !rt6_need_strict(&res->f6i->fib6_dst.addr))
  914. dev = l3mdev_master_dev_rcu(dev) ? :
  915. dev_net(dev)->loopback_dev;
  916. else if (!netif_is_l3_master(dev))
  917. dev = dev_net(dev)->loopback_dev;
  918. /* last case is netif_is_l3_master(dev) is true in which
  919. * case we want dev returned to be dev
  920. */
  921. }
  922. return dev;
  923. }
  924. static const int fib6_prop[RTN_MAX + 1] = {
  925. [RTN_UNSPEC] = 0,
  926. [RTN_UNICAST] = 0,
  927. [RTN_LOCAL] = 0,
  928. [RTN_BROADCAST] = 0,
  929. [RTN_ANYCAST] = 0,
  930. [RTN_MULTICAST] = 0,
  931. [RTN_BLACKHOLE] = -EINVAL,
  932. [RTN_UNREACHABLE] = -EHOSTUNREACH,
  933. [RTN_PROHIBIT] = -EACCES,
  934. [RTN_THROW] = -EAGAIN,
  935. [RTN_NAT] = -EINVAL,
  936. [RTN_XRESOLVE] = -EINVAL,
  937. };
  938. static int ip6_rt_type_to_error(u8 fib6_type)
  939. {
  940. return fib6_prop[fib6_type];
  941. }
  942. static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
  943. {
  944. unsigned short flags = 0;
  945. if (rt->dst_nocount)
  946. flags |= DST_NOCOUNT;
  947. if (rt->dst_nopolicy)
  948. flags |= DST_NOPOLICY;
  949. return flags;
  950. }
  951. static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
  952. {
  953. rt->dst.error = ip6_rt_type_to_error(fib6_type);
  954. switch (fib6_type) {
  955. case RTN_BLACKHOLE:
  956. rt->dst.output = dst_discard_out;
  957. rt->dst.input = dst_discard;
  958. break;
  959. case RTN_PROHIBIT:
  960. rt->dst.output = ip6_pkt_prohibit_out;
  961. rt->dst.input = ip6_pkt_prohibit;
  962. break;
  963. case RTN_THROW:
  964. case RTN_UNREACHABLE:
  965. default:
  966. rt->dst.output = ip6_pkt_discard_out;
  967. rt->dst.input = ip6_pkt_discard;
  968. break;
  969. }
  970. }
  971. static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
  972. {
  973. struct fib6_info *f6i = res->f6i;
  974. if (res->fib6_flags & RTF_REJECT) {
  975. ip6_rt_init_dst_reject(rt, res->fib6_type);
  976. return;
  977. }
  978. rt->dst.error = 0;
  979. rt->dst.output = ip6_output;
  980. if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
  981. rt->dst.input = ip6_input;
  982. } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
  983. rt->dst.input = ip6_mc_input;
  984. rt->dst.output = ip6_mr_output;
  985. } else {
  986. rt->dst.input = ip6_forward;
  987. }
  988. if (res->nh->fib_nh_lws) {
  989. rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
  990. lwtunnel_set_redirect(&rt->dst);
  991. }
  992. rt->dst.lastuse = jiffies;
  993. }
  994. /* Caller must already hold reference to @from */
  995. static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
  996. {
  997. rt->rt6i_flags &= ~RTF_EXPIRES;
  998. rcu_assign_pointer(rt->from, from);
  999. ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
  1000. }
  1001. /* Caller must already hold reference to f6i in result */
  1002. static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
  1003. {
  1004. const struct fib6_nh *nh = res->nh;
  1005. const struct net_device *dev = nh->fib_nh_dev;
  1006. struct fib6_info *f6i = res->f6i;
  1007. ip6_rt_init_dst(rt, res);
  1008. rt->rt6i_dst = f6i->fib6_dst;
  1009. rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
  1010. rt->rt6i_flags = res->fib6_flags;
  1011. if (nh->fib_nh_gw_family) {
  1012. rt->rt6i_gateway = nh->fib_nh_gw6;
  1013. rt->rt6i_flags |= RTF_GATEWAY;
  1014. }
  1015. rt6_set_from(rt, f6i);
  1016. #ifdef CONFIG_IPV6_SUBTREES
  1017. rt->rt6i_src = f6i->fib6_src;
  1018. #endif
  1019. }
  1020. static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
  1021. struct in6_addr *saddr)
  1022. {
  1023. struct fib6_node *pn, *sn;
  1024. while (1) {
  1025. if (fn->fn_flags & RTN_TL_ROOT)
  1026. return NULL;
  1027. pn = rcu_dereference(fn->parent);
  1028. sn = FIB6_SUBTREE(pn);
  1029. if (sn && sn != fn)
  1030. fn = fib6_node_lookup(sn, NULL, saddr);
  1031. else
  1032. fn = pn;
  1033. if (fn->fn_flags & RTN_RTINFO)
  1034. return fn;
  1035. }
  1036. }
  1037. static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
  1038. {
  1039. struct rt6_info *rt = *prt;
  1040. if (dst_hold_safe(&rt->dst))
  1041. return true;
  1042. if (net) {
  1043. rt = net->ipv6.ip6_null_entry;
  1044. dst_hold(&rt->dst);
  1045. } else {
  1046. rt = NULL;
  1047. }
  1048. *prt = rt;
  1049. return false;
  1050. }
  1051. /* called with rcu_lock held */
  1052. static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
  1053. {
  1054. struct net_device *dev = res->nh->fib_nh_dev;
  1055. struct fib6_info *f6i = res->f6i;
  1056. unsigned short flags;
  1057. struct rt6_info *nrt;
  1058. if (!fib6_info_hold_safe(f6i))
  1059. goto fallback;
  1060. flags = fib6_info_dst_flags(f6i);
  1061. nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
  1062. if (!nrt) {
  1063. fib6_info_release(f6i);
  1064. goto fallback;
  1065. }
  1066. ip6_rt_copy_init(nrt, res);
  1067. return nrt;
  1068. fallback:
  1069. nrt = dev_net(dev)->ipv6.ip6_null_entry;
  1070. dst_hold(&nrt->dst);
  1071. return nrt;
  1072. }
  1073. INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
  1074. struct fib6_table *table,
  1075. struct flowi6 *fl6,
  1076. const struct sk_buff *skb,
  1077. int flags)
  1078. {
  1079. struct fib6_result res = {};
  1080. struct fib6_node *fn;
  1081. struct rt6_info *rt;
  1082. rcu_read_lock();
  1083. fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
  1084. restart:
  1085. res.f6i = rcu_dereference(fn->leaf);
  1086. if (!res.f6i)
  1087. res.f6i = net->ipv6.fib6_null_entry;
  1088. else
  1089. rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
  1090. flags);
  1091. if (res.f6i == net->ipv6.fib6_null_entry) {
  1092. fn = fib6_backtrack(fn, &fl6->saddr);
  1093. if (fn)
  1094. goto restart;
  1095. rt = net->ipv6.ip6_null_entry;
  1096. dst_hold(&rt->dst);
  1097. goto out;
  1098. } else if (res.fib6_flags & RTF_REJECT) {
  1099. goto do_create;
  1100. }
  1101. fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
  1102. fl6->flowi6_oif != 0, skb, flags);
  1103. /* Search through exception table */
  1104. rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
  1105. if (rt) {
  1106. if (ip6_hold_safe(net, &rt))
  1107. dst_use_noref(&rt->dst, jiffies);
  1108. } else {
  1109. do_create:
  1110. rt = ip6_create_rt_rcu(&res);
  1111. }
  1112. out:
  1113. trace_fib6_table_lookup(net, &res, table, fl6);
  1114. rcu_read_unlock();
  1115. return rt;
  1116. }
  1117. struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
  1118. const struct sk_buff *skb, int flags)
  1119. {
  1120. return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
  1121. }
  1122. EXPORT_SYMBOL_GPL(ip6_route_lookup);
  1123. struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
  1124. const struct in6_addr *saddr, int oif,
  1125. const struct sk_buff *skb, int strict)
  1126. {
  1127. struct flowi6 fl6 = {
  1128. .flowi6_oif = oif,
  1129. .daddr = *daddr,
  1130. };
  1131. struct dst_entry *dst;
  1132. int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
  1133. if (saddr) {
  1134. memcpy(&fl6.saddr, saddr, sizeof(*saddr));
  1135. flags |= RT6_LOOKUP_F_HAS_SADDR;
  1136. }
  1137. dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
  1138. if (dst->error == 0)
  1139. return dst_rt6_info(dst);
  1140. dst_release(dst);
  1141. return NULL;
  1142. }
  1143. EXPORT_SYMBOL(rt6_lookup);
  1144. /* ip6_ins_rt is called with FREE table->tb6_lock.
  1145. * It takes new route entry, the addition fails by any reason the
  1146. * route is released.
  1147. * Caller must hold dst before calling it.
  1148. */
  1149. static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
  1150. struct netlink_ext_ack *extack)
  1151. {
  1152. int err;
  1153. struct fib6_table *table;
  1154. table = rt->fib6_table;
  1155. spin_lock_bh(&table->tb6_lock);
  1156. err = fib6_add(&table->tb6_root, rt, info, extack);
  1157. spin_unlock_bh(&table->tb6_lock);
  1158. return err;
  1159. }
  1160. int ip6_ins_rt(struct net *net, struct fib6_info *rt)
  1161. {
  1162. struct nl_info info = { .nl_net = net, };
  1163. return __ip6_ins_rt(rt, &info, NULL);
  1164. }
  1165. static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
  1166. const struct in6_addr *daddr,
  1167. const struct in6_addr *saddr)
  1168. {
  1169. struct fib6_info *f6i = res->f6i;
  1170. struct net_device *dev;
  1171. struct rt6_info *rt;
  1172. /*
  1173. * Clone the route.
  1174. */
  1175. if (!fib6_info_hold_safe(f6i))
  1176. return NULL;
  1177. dev = ip6_rt_get_dev_rcu(res);
  1178. rt = ip6_dst_alloc(dev_net(dev), dev, 0);
  1179. if (!rt) {
  1180. fib6_info_release(f6i);
  1181. return NULL;
  1182. }
  1183. ip6_rt_copy_init(rt, res);
  1184. rt->rt6i_flags |= RTF_CACHE;
  1185. rt->rt6i_dst.addr = *daddr;
  1186. rt->rt6i_dst.plen = 128;
  1187. if (!rt6_is_gw_or_nonexthop(res)) {
  1188. if (f6i->fib6_dst.plen != 128 &&
  1189. ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
  1190. rt->rt6i_flags |= RTF_ANYCAST;
  1191. #ifdef CONFIG_IPV6_SUBTREES
  1192. if (rt->rt6i_src.plen && saddr) {
  1193. rt->rt6i_src.addr = *saddr;
  1194. rt->rt6i_src.plen = 128;
  1195. }
  1196. #endif
  1197. }
  1198. return rt;
  1199. }
  1200. static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
  1201. {
  1202. struct fib6_info *f6i = res->f6i;
  1203. unsigned short flags = fib6_info_dst_flags(f6i);
  1204. struct net_device *dev;
  1205. struct rt6_info *pcpu_rt;
  1206. if (!fib6_info_hold_safe(f6i))
  1207. return NULL;
  1208. rcu_read_lock();
  1209. dev = ip6_rt_get_dev_rcu(res);
  1210. pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags | DST_NOCOUNT);
  1211. rcu_read_unlock();
  1212. if (!pcpu_rt) {
  1213. fib6_info_release(f6i);
  1214. return NULL;
  1215. }
  1216. ip6_rt_copy_init(pcpu_rt, res);
  1217. pcpu_rt->rt6i_flags |= RTF_PCPU;
  1218. if (f6i->nh)
  1219. pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev));
  1220. return pcpu_rt;
  1221. }
  1222. static bool rt6_is_valid(const struct rt6_info *rt6)
  1223. {
  1224. return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev));
  1225. }
  1226. /* It should be called with rcu_read_lock() acquired */
  1227. static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
  1228. {
  1229. struct rt6_info *pcpu_rt;
  1230. pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
  1231. if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) {
  1232. struct rt6_info *prev, **p;
  1233. p = this_cpu_ptr(res->nh->rt6i_pcpu);
  1234. /* Paired with READ_ONCE() in __fib6_drop_pcpu_from() */
  1235. prev = xchg(p, NULL);
  1236. if (prev) {
  1237. dst_dev_put(&prev->dst);
  1238. dst_release(&prev->dst);
  1239. }
  1240. pcpu_rt = NULL;
  1241. }
  1242. return pcpu_rt;
  1243. }
  1244. static struct rt6_info *rt6_make_pcpu_route(struct net *net,
  1245. const struct fib6_result *res)
  1246. {
  1247. struct rt6_info *pcpu_rt, *prev, **p;
  1248. pcpu_rt = ip6_rt_pcpu_alloc(res);
  1249. if (!pcpu_rt)
  1250. return NULL;
  1251. p = this_cpu_ptr(res->nh->rt6i_pcpu);
  1252. prev = cmpxchg(p, NULL, pcpu_rt);
  1253. if (unlikely(prev)) {
  1254. /*
  1255. * Another task on this CPU already installed a pcpu_rt.
  1256. * This can happen on PREEMPT_RT where preemption is possible.
  1257. * Free our allocation and return the existing one.
  1258. */
  1259. WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT_RT));
  1260. dst_dev_put(&pcpu_rt->dst);
  1261. dst_release(&pcpu_rt->dst);
  1262. return prev;
  1263. }
  1264. if (res->f6i->fib6_destroying) {
  1265. struct fib6_info *from;
  1266. from = unrcu_pointer(xchg(&pcpu_rt->from, NULL));
  1267. fib6_info_release(from);
  1268. }
  1269. return pcpu_rt;
  1270. }
  1271. /* exception hash table implementation
  1272. */
  1273. static DEFINE_SPINLOCK(rt6_exception_lock);
  1274. /* Remove rt6_ex from hash table and free the memory
  1275. * Caller must hold rt6_exception_lock
  1276. */
  1277. static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
  1278. struct rt6_exception *rt6_ex)
  1279. {
  1280. struct net *net;
  1281. if (!bucket || !rt6_ex)
  1282. return;
  1283. net = dev_net(rt6_ex->rt6i->dst.dev);
  1284. net->ipv6.rt6_stats->fib_rt_cache--;
  1285. /* purge completely the exception to allow releasing the held resources:
  1286. * some [sk] cache may keep the dst around for unlimited time
  1287. */
  1288. dst_dev_put(&rt6_ex->rt6i->dst);
  1289. hlist_del_rcu(&rt6_ex->hlist);
  1290. dst_release(&rt6_ex->rt6i->dst);
  1291. kfree_rcu(rt6_ex, rcu);
  1292. WARN_ON_ONCE(!bucket->depth);
  1293. bucket->depth--;
  1294. }
  1295. /* Remove oldest rt6_ex in bucket and free the memory
  1296. * Caller must hold rt6_exception_lock
  1297. */
  1298. static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
  1299. {
  1300. struct rt6_exception *rt6_ex, *oldest = NULL;
  1301. if (!bucket)
  1302. return;
  1303. hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
  1304. if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
  1305. oldest = rt6_ex;
  1306. }
  1307. rt6_remove_exception(bucket, oldest);
  1308. }
  1309. static u32 rt6_exception_hash(const struct in6_addr *dst,
  1310. const struct in6_addr *src)
  1311. {
  1312. static siphash_aligned_key_t rt6_exception_key;
  1313. struct {
  1314. struct in6_addr dst;
  1315. struct in6_addr src;
  1316. } __aligned(SIPHASH_ALIGNMENT) combined = {
  1317. .dst = *dst,
  1318. };
  1319. u64 val;
  1320. net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key));
  1321. #ifdef CONFIG_IPV6_SUBTREES
  1322. if (src)
  1323. combined.src = *src;
  1324. #endif
  1325. val = siphash(&combined, sizeof(combined), &rt6_exception_key);
  1326. return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
  1327. }
  1328. /* Helper function to find the cached rt in the hash table
  1329. * and update bucket pointer to point to the bucket for this
  1330. * (daddr, saddr) pair
  1331. * Caller must hold rt6_exception_lock
  1332. */
  1333. static struct rt6_exception *
  1334. __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
  1335. const struct in6_addr *daddr,
  1336. const struct in6_addr *saddr)
  1337. {
  1338. struct rt6_exception *rt6_ex;
  1339. u32 hval;
  1340. if (!(*bucket) || !daddr)
  1341. return NULL;
  1342. hval = rt6_exception_hash(daddr, saddr);
  1343. *bucket += hval;
  1344. hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
  1345. struct rt6_info *rt6 = rt6_ex->rt6i;
  1346. bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
  1347. #ifdef CONFIG_IPV6_SUBTREES
  1348. if (matched && saddr)
  1349. matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
  1350. #endif
  1351. if (matched)
  1352. return rt6_ex;
  1353. }
  1354. return NULL;
  1355. }
  1356. /* Helper function to find the cached rt in the hash table
  1357. * and update bucket pointer to point to the bucket for this
  1358. * (daddr, saddr) pair
  1359. * Caller must hold rcu_read_lock()
  1360. */
  1361. static struct rt6_exception *
  1362. __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
  1363. const struct in6_addr *daddr,
  1364. const struct in6_addr *saddr)
  1365. {
  1366. struct rt6_exception *rt6_ex;
  1367. u32 hval;
  1368. WARN_ON_ONCE(!rcu_read_lock_held());
  1369. if (!(*bucket) || !daddr)
  1370. return NULL;
  1371. hval = rt6_exception_hash(daddr, saddr);
  1372. *bucket += hval;
  1373. hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
  1374. struct rt6_info *rt6 = rt6_ex->rt6i;
  1375. bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
  1376. #ifdef CONFIG_IPV6_SUBTREES
  1377. if (matched && saddr)
  1378. matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
  1379. #endif
  1380. if (matched)
  1381. return rt6_ex;
  1382. }
  1383. return NULL;
  1384. }
  1385. static unsigned int fib6_mtu(const struct fib6_result *res)
  1386. {
  1387. const struct fib6_nh *nh = res->nh;
  1388. unsigned int mtu;
  1389. if (res->f6i->fib6_pmtu) {
  1390. mtu = res->f6i->fib6_pmtu;
  1391. } else {
  1392. struct net_device *dev = nh->fib_nh_dev;
  1393. struct inet6_dev *idev;
  1394. rcu_read_lock();
  1395. idev = __in6_dev_get(dev);
  1396. mtu = READ_ONCE(idev->cnf.mtu6);
  1397. rcu_read_unlock();
  1398. }
  1399. mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
  1400. return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
  1401. }
  1402. #define FIB6_EXCEPTION_BUCKET_FLUSHED 0x1UL
  1403. /* used when the flushed bit is not relevant, only access to the bucket
  1404. * (ie., all bucket users except rt6_insert_exception);
  1405. *
  1406. * called under rcu lock; sometimes called with rt6_exception_lock held
  1407. */
  1408. static
  1409. struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
  1410. spinlock_t *lock)
  1411. {
  1412. struct rt6_exception_bucket *bucket;
  1413. if (lock)
  1414. bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
  1415. lockdep_is_held(lock));
  1416. else
  1417. bucket = rcu_dereference(nh->rt6i_exception_bucket);
  1418. /* remove bucket flushed bit if set */
  1419. if (bucket) {
  1420. unsigned long p = (unsigned long)bucket;
  1421. p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
  1422. bucket = (struct rt6_exception_bucket *)p;
  1423. }
  1424. return bucket;
  1425. }
  1426. static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
  1427. {
  1428. unsigned long p = (unsigned long)bucket;
  1429. return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
  1430. }
  1431. /* called with rt6_exception_lock held */
  1432. static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
  1433. spinlock_t *lock)
  1434. {
  1435. struct rt6_exception_bucket *bucket;
  1436. unsigned long p;
  1437. bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
  1438. lockdep_is_held(lock));
  1439. p = (unsigned long)bucket;
  1440. p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
  1441. bucket = (struct rt6_exception_bucket *)p;
  1442. rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
  1443. }
  1444. static int rt6_insert_exception(struct rt6_info *nrt,
  1445. const struct fib6_result *res)
  1446. {
  1447. struct net *net = dev_net(nrt->dst.dev);
  1448. struct rt6_exception_bucket *bucket;
  1449. struct fib6_info *f6i = res->f6i;
  1450. struct in6_addr *src_key = NULL;
  1451. struct rt6_exception *rt6_ex;
  1452. struct fib6_nh *nh = res->nh;
  1453. int max_depth;
  1454. int err = 0;
  1455. spin_lock_bh(&rt6_exception_lock);
  1456. bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
  1457. lockdep_is_held(&rt6_exception_lock));
  1458. if (!bucket) {
  1459. bucket = kzalloc_objs(*bucket, FIB6_EXCEPTION_BUCKET_SIZE,
  1460. GFP_ATOMIC);
  1461. if (!bucket) {
  1462. err = -ENOMEM;
  1463. goto out;
  1464. }
  1465. rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
  1466. } else if (fib6_nh_excptn_bucket_flushed(bucket)) {
  1467. err = -EINVAL;
  1468. goto out;
  1469. }
  1470. #ifdef CONFIG_IPV6_SUBTREES
  1471. /* fib6_src.plen != 0 indicates f6i is in subtree
  1472. * and exception table is indexed by a hash of
  1473. * both fib6_dst and fib6_src.
  1474. * Otherwise, the exception table is indexed by
  1475. * a hash of only fib6_dst.
  1476. */
  1477. if (f6i->fib6_src.plen)
  1478. src_key = &nrt->rt6i_src.addr;
  1479. #endif
  1480. /* rt6_mtu_change() might lower mtu on f6i.
  1481. * Only insert this exception route if its mtu
  1482. * is less than f6i's mtu value.
  1483. */
  1484. if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
  1485. err = -EINVAL;
  1486. goto out;
  1487. }
  1488. rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
  1489. src_key);
  1490. if (rt6_ex)
  1491. rt6_remove_exception(bucket, rt6_ex);
  1492. rt6_ex = kzalloc_obj(*rt6_ex, GFP_ATOMIC);
  1493. if (!rt6_ex) {
  1494. err = -ENOMEM;
  1495. goto out;
  1496. }
  1497. rt6_ex->rt6i = nrt;
  1498. rt6_ex->stamp = jiffies;
  1499. hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
  1500. bucket->depth++;
  1501. net->ipv6.rt6_stats->fib_rt_cache++;
  1502. /* Randomize max depth to avoid some side channels attacks. */
  1503. max_depth = FIB6_MAX_DEPTH + get_random_u32_below(FIB6_MAX_DEPTH);
  1504. while (bucket->depth > max_depth)
  1505. rt6_exception_remove_oldest(bucket);
  1506. out:
  1507. spin_unlock_bh(&rt6_exception_lock);
  1508. /* Update fn->fn_sernum to invalidate all cached dst */
  1509. if (!err) {
  1510. spin_lock_bh(&f6i->fib6_table->tb6_lock);
  1511. fib6_update_sernum(net, f6i);
  1512. fib6_add_gc_list(f6i);
  1513. spin_unlock_bh(&f6i->fib6_table->tb6_lock);
  1514. fib6_force_start_gc(net);
  1515. }
  1516. return err;
  1517. }
  1518. static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
  1519. {
  1520. struct rt6_exception_bucket *bucket;
  1521. struct rt6_exception *rt6_ex;
  1522. struct hlist_node *tmp;
  1523. int i;
  1524. spin_lock_bh(&rt6_exception_lock);
  1525. bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
  1526. if (!bucket)
  1527. goto out;
  1528. /* Prevent rt6_insert_exception() to recreate the bucket list */
  1529. if (!from)
  1530. fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
  1531. for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
  1532. hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
  1533. if (!from ||
  1534. rcu_access_pointer(rt6_ex->rt6i->from) == from)
  1535. rt6_remove_exception(bucket, rt6_ex);
  1536. }
  1537. WARN_ON_ONCE(!from && bucket->depth);
  1538. bucket++;
  1539. }
  1540. out:
  1541. spin_unlock_bh(&rt6_exception_lock);
  1542. }
  1543. static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
  1544. {
  1545. struct fib6_info *f6i = arg;
  1546. fib6_nh_flush_exceptions(nh, f6i);
  1547. return 0;
  1548. }
  1549. void rt6_flush_exceptions(struct fib6_info *f6i)
  1550. {
  1551. if (f6i->nh) {
  1552. rcu_read_lock();
  1553. nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions, f6i);
  1554. rcu_read_unlock();
  1555. } else {
  1556. fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
  1557. }
  1558. }
  1559. /* Find cached rt in the hash table inside passed in rt
  1560. * Caller has to hold rcu_read_lock()
  1561. */
  1562. static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
  1563. const struct in6_addr *daddr,
  1564. const struct in6_addr *saddr)
  1565. {
  1566. const struct in6_addr *src_key = NULL;
  1567. struct rt6_exception_bucket *bucket;
  1568. struct rt6_exception *rt6_ex;
  1569. struct rt6_info *ret = NULL;
  1570. #ifdef CONFIG_IPV6_SUBTREES
  1571. /* fib6i_src.plen != 0 indicates f6i is in subtree
  1572. * and exception table is indexed by a hash of
  1573. * both fib6_dst and fib6_src.
  1574. * However, the src addr used to create the hash
  1575. * might not be exactly the passed in saddr which
  1576. * is a /128 addr from the flow.
  1577. * So we need to use f6i->fib6_src to redo lookup
  1578. * if the passed in saddr does not find anything.
  1579. * (See the logic in ip6_rt_cache_alloc() on how
  1580. * rt->rt6i_src is updated.)
  1581. */
  1582. if (res->f6i->fib6_src.plen)
  1583. src_key = saddr;
  1584. find_ex:
  1585. #endif
  1586. bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
  1587. rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
  1588. if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
  1589. ret = rt6_ex->rt6i;
  1590. #ifdef CONFIG_IPV6_SUBTREES
  1591. /* Use fib6_src as src_key and redo lookup */
  1592. if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
  1593. src_key = &res->f6i->fib6_src.addr;
  1594. goto find_ex;
  1595. }
  1596. #endif
  1597. return ret;
  1598. }
  1599. /* Remove the passed in cached rt from the hash table that contains it */
  1600. static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
  1601. const struct rt6_info *rt)
  1602. {
  1603. const struct in6_addr *src_key = NULL;
  1604. struct rt6_exception_bucket *bucket;
  1605. struct rt6_exception *rt6_ex;
  1606. int err;
  1607. if (!rcu_access_pointer(nh->rt6i_exception_bucket))
  1608. return -ENOENT;
  1609. spin_lock_bh(&rt6_exception_lock);
  1610. bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
  1611. #ifdef CONFIG_IPV6_SUBTREES
  1612. /* rt6i_src.plen != 0 indicates 'from' is in subtree
  1613. * and exception table is indexed by a hash of
  1614. * both rt6i_dst and rt6i_src.
  1615. * Otherwise, the exception table is indexed by
  1616. * a hash of only rt6i_dst.
  1617. */
  1618. if (plen)
  1619. src_key = &rt->rt6i_src.addr;
  1620. #endif
  1621. rt6_ex = __rt6_find_exception_spinlock(&bucket,
  1622. &rt->rt6i_dst.addr,
  1623. src_key);
  1624. if (rt6_ex) {
  1625. rt6_remove_exception(bucket, rt6_ex);
  1626. err = 0;
  1627. } else {
  1628. err = -ENOENT;
  1629. }
  1630. spin_unlock_bh(&rt6_exception_lock);
  1631. return err;
  1632. }
  1633. struct fib6_nh_excptn_arg {
  1634. struct rt6_info *rt;
  1635. int plen;
  1636. };
  1637. static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
  1638. {
  1639. struct fib6_nh_excptn_arg *arg = _arg;
  1640. int err;
  1641. err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
  1642. if (err == 0)
  1643. return 1;
  1644. return 0;
  1645. }
  1646. static int rt6_remove_exception_rt(struct rt6_info *rt)
  1647. {
  1648. struct fib6_info *from;
  1649. from = rcu_dereference(rt->from);
  1650. if (!from || !(rt->rt6i_flags & RTF_CACHE))
  1651. return -EINVAL;
  1652. if (from->nh) {
  1653. struct fib6_nh_excptn_arg arg = {
  1654. .rt = rt,
  1655. .plen = from->fib6_src.plen
  1656. };
  1657. int rc;
  1658. /* rc = 1 means an entry was found */
  1659. rc = nexthop_for_each_fib6_nh(from->nh,
  1660. rt6_nh_remove_exception_rt,
  1661. &arg);
  1662. return rc ? 0 : -ENOENT;
  1663. }
  1664. return fib6_nh_remove_exception(from->fib6_nh,
  1665. from->fib6_src.plen, rt);
  1666. }
  1667. /* Find rt6_ex which contains the passed in rt cache and
  1668. * refresh its stamp
  1669. */
  1670. static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
  1671. const struct rt6_info *rt)
  1672. {
  1673. const struct in6_addr *src_key = NULL;
  1674. struct rt6_exception_bucket *bucket;
  1675. struct rt6_exception *rt6_ex;
  1676. bucket = fib6_nh_get_excptn_bucket(nh, NULL);
  1677. #ifdef CONFIG_IPV6_SUBTREES
  1678. /* rt6i_src.plen != 0 indicates 'from' is in subtree
  1679. * and exception table is indexed by a hash of
  1680. * both rt6i_dst and rt6i_src.
  1681. * Otherwise, the exception table is indexed by
  1682. * a hash of only rt6i_dst.
  1683. */
  1684. if (plen)
  1685. src_key = &rt->rt6i_src.addr;
  1686. #endif
  1687. rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
  1688. if (rt6_ex)
  1689. rt6_ex->stamp = jiffies;
  1690. }
  1691. struct fib6_nh_match_arg {
  1692. const struct net_device *dev;
  1693. const struct in6_addr *gw;
  1694. struct fib6_nh *match;
  1695. };
  1696. /* determine if fib6_nh has given device and gateway */
  1697. static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
  1698. {
  1699. struct fib6_nh_match_arg *arg = _arg;
  1700. if (arg->dev != nh->fib_nh_dev ||
  1701. (arg->gw && !nh->fib_nh_gw_family) ||
  1702. (!arg->gw && nh->fib_nh_gw_family) ||
  1703. (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
  1704. return 0;
  1705. arg->match = nh;
  1706. /* found a match, break the loop */
  1707. return 1;
  1708. }
  1709. static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
  1710. {
  1711. struct fib6_info *from;
  1712. struct fib6_nh *fib6_nh;
  1713. rcu_read_lock();
  1714. from = rcu_dereference(rt->from);
  1715. if (!from || !(rt->rt6i_flags & RTF_CACHE))
  1716. goto unlock;
  1717. if (from->nh) {
  1718. struct fib6_nh_match_arg arg = {
  1719. .dev = rt->dst.dev,
  1720. .gw = &rt->rt6i_gateway,
  1721. };
  1722. nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
  1723. if (!arg.match)
  1724. goto unlock;
  1725. fib6_nh = arg.match;
  1726. } else {
  1727. fib6_nh = from->fib6_nh;
  1728. }
  1729. fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
  1730. unlock:
  1731. rcu_read_unlock();
  1732. }
  1733. static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
  1734. struct rt6_info *rt, int mtu)
  1735. {
  1736. u32 dmtu = dst6_mtu(&rt->dst);
  1737. /* If the new MTU is lower than the route PMTU, this new MTU will be the
  1738. * lowest MTU in the path: always allow updating the route PMTU to
  1739. * reflect PMTU decreases.
  1740. *
  1741. * If the new MTU is higher, and the route PMTU is equal to the local
  1742. * MTU, this means the old MTU is the lowest in the path, so allow
  1743. * updating it: if other nodes now have lower MTUs, PMTU discovery will
  1744. * handle this.
  1745. */
  1746. if (dmtu >= mtu)
  1747. return true;
  1748. if (dmtu == idev->cnf.mtu6)
  1749. return true;
  1750. return false;
  1751. }
  1752. static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
  1753. const struct fib6_nh *nh, int mtu)
  1754. {
  1755. struct rt6_exception_bucket *bucket;
  1756. struct rt6_exception *rt6_ex;
  1757. int i;
  1758. bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
  1759. if (!bucket)
  1760. return;
  1761. for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
  1762. hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
  1763. struct rt6_info *entry = rt6_ex->rt6i;
  1764. /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
  1765. * route), the metrics of its rt->from have already
  1766. * been updated.
  1767. */
  1768. if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
  1769. rt6_mtu_change_route_allowed(idev, entry, mtu))
  1770. dst_metric_set(&entry->dst, RTAX_MTU, mtu);
  1771. }
  1772. bucket++;
  1773. }
  1774. }
  1775. #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
  1776. static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
  1777. const struct in6_addr *gateway)
  1778. {
  1779. struct rt6_exception_bucket *bucket;
  1780. struct rt6_exception *rt6_ex;
  1781. struct hlist_node *tmp;
  1782. int i;
  1783. if (!rcu_access_pointer(nh->rt6i_exception_bucket))
  1784. return;
  1785. spin_lock_bh(&rt6_exception_lock);
  1786. bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
  1787. if (bucket) {
  1788. for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
  1789. hlist_for_each_entry_safe(rt6_ex, tmp,
  1790. &bucket->chain, hlist) {
  1791. struct rt6_info *entry = rt6_ex->rt6i;
  1792. if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
  1793. RTF_CACHE_GATEWAY &&
  1794. ipv6_addr_equal(gateway,
  1795. &entry->rt6i_gateway)) {
  1796. rt6_remove_exception(bucket, rt6_ex);
  1797. }
  1798. }
  1799. bucket++;
  1800. }
  1801. }
  1802. spin_unlock_bh(&rt6_exception_lock);
  1803. }
  1804. static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
  1805. struct rt6_exception *rt6_ex,
  1806. struct fib6_gc_args *gc_args,
  1807. unsigned long now)
  1808. {
  1809. struct rt6_info *rt = rt6_ex->rt6i;
  1810. /* we are pruning and obsoleting aged-out and non gateway exceptions
  1811. * even if others have still references to them, so that on next
  1812. * dst_check() such references can be dropped.
  1813. * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
  1814. * expired, independently from their aging, as per RFC 8201 section 4
  1815. */
  1816. if (!(rt->rt6i_flags & RTF_EXPIRES)) {
  1817. if (time_after_eq(now, READ_ONCE(rt->dst.lastuse) +
  1818. gc_args->timeout)) {
  1819. pr_debug("aging clone %p\n", rt);
  1820. rt6_remove_exception(bucket, rt6_ex);
  1821. return;
  1822. }
  1823. } else if (time_after(jiffies, READ_ONCE(rt->dst.expires))) {
  1824. pr_debug("purging expired route %p\n", rt);
  1825. rt6_remove_exception(bucket, rt6_ex);
  1826. return;
  1827. }
  1828. if (rt->rt6i_flags & RTF_GATEWAY) {
  1829. struct neighbour *neigh;
  1830. neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
  1831. if (!(neigh && (neigh->flags & NTF_ROUTER))) {
  1832. pr_debug("purging route %p via non-router but gateway\n",
  1833. rt);
  1834. rt6_remove_exception(bucket, rt6_ex);
  1835. return;
  1836. }
  1837. }
  1838. gc_args->more++;
  1839. }
  1840. static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
  1841. struct fib6_gc_args *gc_args,
  1842. unsigned long now)
  1843. {
  1844. struct rt6_exception_bucket *bucket;
  1845. struct rt6_exception *rt6_ex;
  1846. struct hlist_node *tmp;
  1847. int i;
  1848. if (!rcu_access_pointer(nh->rt6i_exception_bucket))
  1849. return;
  1850. rcu_read_lock_bh();
  1851. spin_lock(&rt6_exception_lock);
  1852. bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
  1853. if (bucket) {
  1854. for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
  1855. hlist_for_each_entry_safe(rt6_ex, tmp,
  1856. &bucket->chain, hlist) {
  1857. rt6_age_examine_exception(bucket, rt6_ex,
  1858. gc_args, now);
  1859. }
  1860. bucket++;
  1861. }
  1862. }
  1863. spin_unlock(&rt6_exception_lock);
  1864. rcu_read_unlock_bh();
  1865. }
  1866. struct fib6_nh_age_excptn_arg {
  1867. struct fib6_gc_args *gc_args;
  1868. unsigned long now;
  1869. };
  1870. static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
  1871. {
  1872. struct fib6_nh_age_excptn_arg *arg = _arg;
  1873. fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
  1874. return 0;
  1875. }
  1876. void rt6_age_exceptions(struct fib6_info *f6i,
  1877. struct fib6_gc_args *gc_args,
  1878. unsigned long now)
  1879. {
  1880. if (f6i->nh) {
  1881. struct fib6_nh_age_excptn_arg arg = {
  1882. .gc_args = gc_args,
  1883. .now = now
  1884. };
  1885. nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
  1886. &arg);
  1887. } else {
  1888. fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
  1889. }
  1890. }
  1891. /* must be called with rcu lock held */
  1892. int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
  1893. struct flowi6 *fl6, struct fib6_result *res, int strict)
  1894. {
  1895. struct fib6_node *fn, *saved_fn;
  1896. fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
  1897. saved_fn = fn;
  1898. redo_rt6_select:
  1899. rt6_select(net, fn, oif, res, strict);
  1900. if (res->f6i == net->ipv6.fib6_null_entry) {
  1901. fn = fib6_backtrack(fn, &fl6->saddr);
  1902. if (fn)
  1903. goto redo_rt6_select;
  1904. else if (strict & RT6_LOOKUP_F_REACHABLE) {
  1905. /* also consider unreachable route */
  1906. strict &= ~RT6_LOOKUP_F_REACHABLE;
  1907. fn = saved_fn;
  1908. goto redo_rt6_select;
  1909. }
  1910. }
  1911. trace_fib6_table_lookup(net, res, table, fl6);
  1912. return 0;
  1913. }
  1914. struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
  1915. int oif, struct flowi6 *fl6,
  1916. const struct sk_buff *skb, int flags)
  1917. {
  1918. struct fib6_result res = {};
  1919. struct rt6_info *rt = NULL;
  1920. int strict = 0;
  1921. WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
  1922. !rcu_read_lock_held());
  1923. strict |= flags & RT6_LOOKUP_F_IFACE;
  1924. strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
  1925. if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
  1926. strict |= RT6_LOOKUP_F_REACHABLE;
  1927. rcu_read_lock();
  1928. fib6_table_lookup(net, table, oif, fl6, &res, strict);
  1929. if (res.f6i == net->ipv6.fib6_null_entry)
  1930. goto out;
  1931. fib6_select_path(net, &res, fl6, oif, false, skb, strict);
  1932. /*Search through exception table */
  1933. rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
  1934. if (rt) {
  1935. goto out;
  1936. } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
  1937. !res.nh->fib_nh_gw_family)) {
  1938. /* Create a RTF_CACHE clone which will not be
  1939. * owned by the fib6 tree. It is for the special case where
  1940. * the daddr in the skb during the neighbor look-up is different
  1941. * from the fl6->daddr used to look-up route here.
  1942. */
  1943. rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
  1944. if (rt) {
  1945. /* 1 refcnt is taken during ip6_rt_cache_alloc().
  1946. * As rt6_uncached_list_add() does not consume refcnt,
  1947. * this refcnt is always returned to the caller even
  1948. * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
  1949. */
  1950. rt6_uncached_list_add(rt);
  1951. rcu_read_unlock();
  1952. return rt;
  1953. }
  1954. } else {
  1955. /* Get a percpu copy */
  1956. local_bh_disable();
  1957. rt = rt6_get_pcpu_route(&res);
  1958. if (!rt)
  1959. rt = rt6_make_pcpu_route(net, &res);
  1960. local_bh_enable();
  1961. }
  1962. out:
  1963. if (!rt)
  1964. rt = net->ipv6.ip6_null_entry;
  1965. if (!(flags & RT6_LOOKUP_F_DST_NOREF))
  1966. ip6_hold_safe(net, &rt);
  1967. rcu_read_unlock();
  1968. return rt;
  1969. }
  1970. EXPORT_SYMBOL_GPL(ip6_pol_route);
  1971. INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net,
  1972. struct fib6_table *table,
  1973. struct flowi6 *fl6,
  1974. const struct sk_buff *skb,
  1975. int flags)
  1976. {
  1977. return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
  1978. }
  1979. struct dst_entry *ip6_route_input_lookup(struct net *net,
  1980. struct net_device *dev,
  1981. struct flowi6 *fl6,
  1982. const struct sk_buff *skb,
  1983. int flags)
  1984. {
  1985. if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
  1986. flags |= RT6_LOOKUP_F_IFACE;
  1987. return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
  1988. }
  1989. EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
  1990. static void ip6_multipath_l3_keys(const struct sk_buff *skb,
  1991. struct flow_keys *keys,
  1992. struct flow_keys *flkeys)
  1993. {
  1994. const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
  1995. const struct ipv6hdr *key_iph = outer_iph;
  1996. struct flow_keys *_flkeys = flkeys;
  1997. const struct ipv6hdr *inner_iph;
  1998. const struct icmp6hdr *icmph;
  1999. struct ipv6hdr _inner_iph;
  2000. struct icmp6hdr _icmph;
  2001. if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
  2002. goto out;
  2003. icmph = skb_header_pointer(skb, skb_transport_offset(skb),
  2004. sizeof(_icmph), &_icmph);
  2005. if (!icmph)
  2006. goto out;
  2007. if (!icmpv6_is_err(icmph->icmp6_type))
  2008. goto out;
  2009. inner_iph = skb_header_pointer(skb,
  2010. skb_transport_offset(skb) + sizeof(*icmph),
  2011. sizeof(_inner_iph), &_inner_iph);
  2012. if (!inner_iph)
  2013. goto out;
  2014. key_iph = inner_iph;
  2015. _flkeys = NULL;
  2016. out:
  2017. if (_flkeys) {
  2018. keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
  2019. keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
  2020. keys->tags.flow_label = _flkeys->tags.flow_label;
  2021. keys->basic.ip_proto = _flkeys->basic.ip_proto;
  2022. } else {
  2023. keys->addrs.v6addrs.src = key_iph->saddr;
  2024. keys->addrs.v6addrs.dst = key_iph->daddr;
  2025. keys->tags.flow_label = ip6_flowlabel(key_iph);
  2026. keys->basic.ip_proto = key_iph->nexthdr;
  2027. }
  2028. }
  2029. static u32 rt6_multipath_custom_hash_outer(const struct net *net,
  2030. const struct sk_buff *skb,
  2031. bool *p_has_inner)
  2032. {
  2033. u32 hash_fields = ip6_multipath_hash_fields(net);
  2034. struct flow_keys keys, hash_keys;
  2035. if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
  2036. return 0;
  2037. memset(&hash_keys, 0, sizeof(hash_keys));
  2038. skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
  2039. hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
  2040. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
  2041. hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
  2042. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
  2043. hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
  2044. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
  2045. hash_keys.basic.ip_proto = keys.basic.ip_proto;
  2046. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
  2047. hash_keys.tags.flow_label = keys.tags.flow_label;
  2048. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
  2049. hash_keys.ports.src = keys.ports.src;
  2050. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
  2051. hash_keys.ports.dst = keys.ports.dst;
  2052. *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
  2053. return fib_multipath_hash_from_keys(net, &hash_keys);
  2054. }
  2055. static u32 rt6_multipath_custom_hash_inner(const struct net *net,
  2056. const struct sk_buff *skb,
  2057. bool has_inner)
  2058. {
  2059. u32 hash_fields = ip6_multipath_hash_fields(net);
  2060. struct flow_keys keys, hash_keys;
  2061. /* We assume the packet carries an encapsulation, but if none was
  2062. * encountered during dissection of the outer flow, then there is no
  2063. * point in calling the flow dissector again.
  2064. */
  2065. if (!has_inner)
  2066. return 0;
  2067. if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
  2068. return 0;
  2069. memset(&hash_keys, 0, sizeof(hash_keys));
  2070. skb_flow_dissect_flow_keys(skb, &keys, 0);
  2071. if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
  2072. return 0;
  2073. if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
  2074. hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
  2075. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
  2076. hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
  2077. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
  2078. hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
  2079. } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
  2080. hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
  2081. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
  2082. hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
  2083. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
  2084. hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
  2085. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
  2086. hash_keys.tags.flow_label = keys.tags.flow_label;
  2087. }
  2088. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
  2089. hash_keys.basic.ip_proto = keys.basic.ip_proto;
  2090. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
  2091. hash_keys.ports.src = keys.ports.src;
  2092. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
  2093. hash_keys.ports.dst = keys.ports.dst;
  2094. return fib_multipath_hash_from_keys(net, &hash_keys);
  2095. }
  2096. static u32 rt6_multipath_custom_hash_skb(const struct net *net,
  2097. const struct sk_buff *skb)
  2098. {
  2099. u32 mhash, mhash_inner;
  2100. bool has_inner = true;
  2101. mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner);
  2102. mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner);
  2103. return jhash_2words(mhash, mhash_inner, 0);
  2104. }
  2105. static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
  2106. const struct flowi6 *fl6)
  2107. {
  2108. u32 hash_fields = ip6_multipath_hash_fields(net);
  2109. struct flow_keys hash_keys;
  2110. if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
  2111. return 0;
  2112. memset(&hash_keys, 0, sizeof(hash_keys));
  2113. hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
  2114. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
  2115. hash_keys.addrs.v6addrs.src = fl6->saddr;
  2116. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
  2117. hash_keys.addrs.v6addrs.dst = fl6->daddr;
  2118. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
  2119. hash_keys.basic.ip_proto = fl6->flowi6_proto;
  2120. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
  2121. hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
  2122. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) {
  2123. if (fl6->flowi6_flags & FLOWI_FLAG_ANY_SPORT)
  2124. hash_keys.ports.src = (__force __be16)get_random_u16();
  2125. else
  2126. hash_keys.ports.src = fl6->fl6_sport;
  2127. }
  2128. if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
  2129. hash_keys.ports.dst = fl6->fl6_dport;
  2130. return fib_multipath_hash_from_keys(net, &hash_keys);
  2131. }
  2132. /* if skb is set it will be used and fl6 can be NULL */
  2133. u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
  2134. const struct sk_buff *skb, struct flow_keys *flkeys)
  2135. {
  2136. struct flow_keys hash_keys;
  2137. u32 mhash = 0;
  2138. switch (ip6_multipath_hash_policy(net)) {
  2139. case 0:
  2140. memset(&hash_keys, 0, sizeof(hash_keys));
  2141. hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
  2142. if (skb) {
  2143. ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
  2144. } else {
  2145. hash_keys.addrs.v6addrs.src = fl6->saddr;
  2146. hash_keys.addrs.v6addrs.dst = fl6->daddr;
  2147. hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
  2148. hash_keys.basic.ip_proto = fl6->flowi6_proto;
  2149. }
  2150. mhash = fib_multipath_hash_from_keys(net, &hash_keys);
  2151. break;
  2152. case 1:
  2153. if (skb) {
  2154. unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
  2155. struct flow_keys keys;
  2156. /* short-circuit if we already have L4 hash present */
  2157. if (skb->l4_hash)
  2158. return skb_get_hash_raw(skb) >> 1;
  2159. memset(&hash_keys, 0, sizeof(hash_keys));
  2160. if (!flkeys) {
  2161. skb_flow_dissect_flow_keys(skb, &keys, flag);
  2162. flkeys = &keys;
  2163. }
  2164. hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
  2165. hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
  2166. hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
  2167. hash_keys.ports.src = flkeys->ports.src;
  2168. hash_keys.ports.dst = flkeys->ports.dst;
  2169. hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
  2170. } else {
  2171. memset(&hash_keys, 0, sizeof(hash_keys));
  2172. hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
  2173. hash_keys.addrs.v6addrs.src = fl6->saddr;
  2174. hash_keys.addrs.v6addrs.dst = fl6->daddr;
  2175. if (fl6->flowi6_flags & FLOWI_FLAG_ANY_SPORT)
  2176. hash_keys.ports.src = (__force __be16)get_random_u16();
  2177. else
  2178. hash_keys.ports.src = fl6->fl6_sport;
  2179. hash_keys.ports.dst = fl6->fl6_dport;
  2180. hash_keys.basic.ip_proto = fl6->flowi6_proto;
  2181. }
  2182. mhash = fib_multipath_hash_from_keys(net, &hash_keys);
  2183. break;
  2184. case 2:
  2185. memset(&hash_keys, 0, sizeof(hash_keys));
  2186. hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
  2187. if (skb) {
  2188. struct flow_keys keys;
  2189. if (!flkeys) {
  2190. skb_flow_dissect_flow_keys(skb, &keys, 0);
  2191. flkeys = &keys;
  2192. }
  2193. /* Inner can be v4 or v6 */
  2194. if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
  2195. hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
  2196. hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
  2197. hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
  2198. } else if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
  2199. hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
  2200. hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
  2201. hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
  2202. hash_keys.tags.flow_label = flkeys->tags.flow_label;
  2203. hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
  2204. } else {
  2205. /* Same as case 0 */
  2206. hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
  2207. ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
  2208. }
  2209. } else {
  2210. /* Same as case 0 */
  2211. hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
  2212. hash_keys.addrs.v6addrs.src = fl6->saddr;
  2213. hash_keys.addrs.v6addrs.dst = fl6->daddr;
  2214. hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
  2215. hash_keys.basic.ip_proto = fl6->flowi6_proto;
  2216. }
  2217. mhash = fib_multipath_hash_from_keys(net, &hash_keys);
  2218. break;
  2219. case 3:
  2220. if (skb)
  2221. mhash = rt6_multipath_custom_hash_skb(net, skb);
  2222. else
  2223. mhash = rt6_multipath_custom_hash_fl6(net, fl6);
  2224. break;
  2225. }
  2226. return mhash >> 1;
  2227. }
  2228. /* Called with rcu held */
  2229. void ip6_route_input(struct sk_buff *skb)
  2230. {
  2231. const struct ipv6hdr *iph = ipv6_hdr(skb);
  2232. struct net *net = dev_net(skb->dev);
  2233. int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
  2234. struct ip_tunnel_info *tun_info;
  2235. struct flowi6 fl6 = {
  2236. .flowi6_iif = skb->dev->ifindex,
  2237. .daddr = iph->daddr,
  2238. .saddr = iph->saddr,
  2239. .flowlabel = ip6_flowinfo(iph),
  2240. .flowi6_mark = skb->mark,
  2241. .flowi6_proto = iph->nexthdr,
  2242. };
  2243. struct flow_keys *flkeys = NULL, _flkeys;
  2244. tun_info = skb_tunnel_info(skb);
  2245. if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
  2246. fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
  2247. if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
  2248. flkeys = &_flkeys;
  2249. if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
  2250. fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
  2251. skb_dst_drop(skb);
  2252. skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
  2253. &fl6, skb, flags));
  2254. }
  2255. INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
  2256. struct fib6_table *table,
  2257. struct flowi6 *fl6,
  2258. const struct sk_buff *skb,
  2259. int flags)
  2260. {
  2261. return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
  2262. }
  2263. static struct dst_entry *ip6_route_output_flags_noref(struct net *net,
  2264. const struct sock *sk,
  2265. struct flowi6 *fl6,
  2266. int flags)
  2267. {
  2268. bool any_src;
  2269. if (ipv6_addr_type(&fl6->daddr) &
  2270. (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
  2271. struct dst_entry *dst;
  2272. /* This function does not take refcnt on the dst */
  2273. dst = l3mdev_link_scope_lookup(net, fl6);
  2274. if (dst)
  2275. return dst;
  2276. }
  2277. fl6->flowi6_iif = LOOPBACK_IFINDEX;
  2278. flags |= RT6_LOOKUP_F_DST_NOREF;
  2279. any_src = ipv6_addr_any(&fl6->saddr);
  2280. if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
  2281. (fl6->flowi6_oif && any_src))
  2282. flags |= RT6_LOOKUP_F_IFACE;
  2283. if (!any_src)
  2284. flags |= RT6_LOOKUP_F_HAS_SADDR;
  2285. else if (sk)
  2286. flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs));
  2287. return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
  2288. }
  2289. struct dst_entry *ip6_route_output_flags(struct net *net,
  2290. const struct sock *sk,
  2291. struct flowi6 *fl6,
  2292. int flags)
  2293. {
  2294. struct dst_entry *dst;
  2295. struct rt6_info *rt6;
  2296. rcu_read_lock();
  2297. dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
  2298. rt6 = dst_rt6_info(dst);
  2299. /* For dst cached in uncached_list, refcnt is already taken. */
  2300. if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
  2301. dst = &net->ipv6.ip6_null_entry->dst;
  2302. dst_hold(dst);
  2303. }
  2304. rcu_read_unlock();
  2305. return dst;
  2306. }
  2307. EXPORT_SYMBOL_GPL(ip6_route_output_flags);
  2308. struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
  2309. {
  2310. struct rt6_info *rt, *ort = dst_rt6_info(dst_orig);
  2311. struct net_device *loopback_dev = net->loopback_dev;
  2312. struct dst_entry *new = NULL;
  2313. rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev,
  2314. DST_OBSOLETE_DEAD, 0);
  2315. if (rt) {
  2316. rt6_info_init(rt);
  2317. atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
  2318. new = &rt->dst;
  2319. new->__use = 1;
  2320. new->input = dst_discard;
  2321. new->output = dst_discard_out;
  2322. dst_copy_metrics(new, &ort->dst);
  2323. rt->rt6i_idev = in6_dev_get(loopback_dev);
  2324. rt->rt6i_gateway = ort->rt6i_gateway;
  2325. rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
  2326. memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
  2327. #ifdef CONFIG_IPV6_SUBTREES
  2328. memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
  2329. #endif
  2330. }
  2331. dst_release(dst_orig);
  2332. return new ? new : ERR_PTR(-ENOMEM);
  2333. }
  2334. /*
  2335. * Destination cache support functions
  2336. */
  2337. static bool fib6_check(struct fib6_info *f6i, u32 cookie)
  2338. {
  2339. u32 rt_cookie = 0;
  2340. if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
  2341. return false;
  2342. if (fib6_check_expired(f6i))
  2343. return false;
  2344. return true;
  2345. }
  2346. static struct dst_entry *rt6_check(struct rt6_info *rt,
  2347. struct fib6_info *from,
  2348. u32 cookie)
  2349. {
  2350. u32 rt_cookie = 0;
  2351. if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
  2352. rt_cookie != cookie)
  2353. return NULL;
  2354. if (rt6_check_expired(rt))
  2355. return NULL;
  2356. return &rt->dst;
  2357. }
  2358. static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
  2359. struct fib6_info *from,
  2360. u32 cookie)
  2361. {
  2362. if (!__rt6_check_expired(rt) &&
  2363. READ_ONCE(rt->dst.obsolete) == DST_OBSOLETE_FORCE_CHK &&
  2364. fib6_check(from, cookie))
  2365. return &rt->dst;
  2366. return NULL;
  2367. }
  2368. INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
  2369. u32 cookie)
  2370. {
  2371. struct dst_entry *dst_ret;
  2372. struct fib6_info *from;
  2373. struct rt6_info *rt;
  2374. rt = dst_rt6_info(dst);
  2375. if (rt->sernum)
  2376. return rt6_is_valid(rt) ? dst : NULL;
  2377. rcu_read_lock();
  2378. /* All IPV6 dsts are created with ->obsolete set to the value
  2379. * DST_OBSOLETE_FORCE_CHK which forces validation calls down
  2380. * into this function always.
  2381. */
  2382. from = rcu_dereference(rt->from);
  2383. if (from && (rt->rt6i_flags & RTF_PCPU ||
  2384. unlikely(!list_empty(&rt->dst.rt_uncached))))
  2385. dst_ret = rt6_dst_from_check(rt, from, cookie);
  2386. else
  2387. dst_ret = rt6_check(rt, from, cookie);
  2388. rcu_read_unlock();
  2389. return dst_ret;
  2390. }
  2391. EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
  2392. static void ip6_negative_advice(struct sock *sk,
  2393. struct dst_entry *dst)
  2394. {
  2395. struct rt6_info *rt = dst_rt6_info(dst);
  2396. if (rt->rt6i_flags & RTF_CACHE) {
  2397. rcu_read_lock();
  2398. if (rt6_check_expired(rt)) {
  2399. /* rt/dst can not be destroyed yet,
  2400. * because of rcu_read_lock()
  2401. */
  2402. sk_dst_reset(sk);
  2403. rt6_remove_exception_rt(rt);
  2404. }
  2405. rcu_read_unlock();
  2406. return;
  2407. }
  2408. sk_dst_reset(sk);
  2409. }
  2410. static void ip6_link_failure(struct sk_buff *skb)
  2411. {
  2412. struct rt6_info *rt;
  2413. icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
  2414. rt = dst_rt6_info(skb_dst(skb));
  2415. if (rt) {
  2416. rcu_read_lock();
  2417. if (rt->rt6i_flags & RTF_CACHE) {
  2418. rt6_remove_exception_rt(rt);
  2419. } else {
  2420. struct fib6_info *from;
  2421. struct fib6_node *fn;
  2422. from = rcu_dereference(rt->from);
  2423. if (from) {
  2424. fn = rcu_dereference(from->fib6_node);
  2425. if (fn && (rt->rt6i_flags & RTF_DEFAULT))
  2426. WRITE_ONCE(fn->fn_sernum, -1);
  2427. }
  2428. }
  2429. rcu_read_unlock();
  2430. }
  2431. }
  2432. static void rt6_update_expires(struct rt6_info *rt0, int timeout)
  2433. {
  2434. if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
  2435. struct fib6_info *from;
  2436. rcu_read_lock();
  2437. from = rcu_dereference(rt0->from);
  2438. if (from)
  2439. WRITE_ONCE(rt0->dst.expires, from->expires);
  2440. rcu_read_unlock();
  2441. }
  2442. dst_set_expires(&rt0->dst, timeout);
  2443. rt0->rt6i_flags |= RTF_EXPIRES;
  2444. }
  2445. static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
  2446. {
  2447. struct net *net = dev_net(rt->dst.dev);
  2448. dst_metric_set(&rt->dst, RTAX_MTU, mtu);
  2449. rt->rt6i_flags |= RTF_MODIFIED;
  2450. rt6_update_expires(rt, READ_ONCE(net->ipv6.sysctl.ip6_rt_mtu_expires));
  2451. }
  2452. static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
  2453. {
  2454. return !(rt->rt6i_flags & RTF_CACHE) &&
  2455. (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
  2456. }
  2457. static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
  2458. const struct ipv6hdr *iph, u32 mtu,
  2459. bool confirm_neigh)
  2460. {
  2461. const struct in6_addr *daddr, *saddr;
  2462. struct rt6_info *rt6 = dst_rt6_info(dst);
  2463. /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
  2464. * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
  2465. * [see also comment in rt6_mtu_change_route()]
  2466. */
  2467. if (iph) {
  2468. daddr = &iph->daddr;
  2469. saddr = &iph->saddr;
  2470. } else if (sk) {
  2471. daddr = &sk->sk_v6_daddr;
  2472. saddr = &inet6_sk(sk)->saddr;
  2473. } else {
  2474. daddr = NULL;
  2475. saddr = NULL;
  2476. }
  2477. if (confirm_neigh)
  2478. dst_confirm_neigh(dst, daddr);
  2479. if (mtu < IPV6_MIN_MTU)
  2480. return;
  2481. if (mtu >= dst6_mtu(dst))
  2482. return;
  2483. if (!rt6_cache_allowed_for_pmtu(rt6)) {
  2484. rt6_do_update_pmtu(rt6, mtu);
  2485. /* update rt6_ex->stamp for cache */
  2486. if (rt6->rt6i_flags & RTF_CACHE)
  2487. rt6_update_exception_stamp_rt(rt6);
  2488. } else if (daddr) {
  2489. struct fib6_result res = {};
  2490. struct rt6_info *nrt6;
  2491. rcu_read_lock();
  2492. res.f6i = rcu_dereference(rt6->from);
  2493. if (!res.f6i)
  2494. goto out_unlock;
  2495. res.fib6_flags = res.f6i->fib6_flags;
  2496. res.fib6_type = res.f6i->fib6_type;
  2497. if (res.f6i->nh) {
  2498. struct fib6_nh_match_arg arg = {
  2499. .dev = dst_dev_rcu(dst),
  2500. .gw = &rt6->rt6i_gateway,
  2501. };
  2502. nexthop_for_each_fib6_nh(res.f6i->nh,
  2503. fib6_nh_find_match, &arg);
  2504. /* fib6_info uses a nexthop that does not have fib6_nh
  2505. * using the dst->dev + gw. Should be impossible.
  2506. */
  2507. if (!arg.match)
  2508. goto out_unlock;
  2509. res.nh = arg.match;
  2510. } else {
  2511. res.nh = res.f6i->fib6_nh;
  2512. }
  2513. nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
  2514. if (nrt6) {
  2515. rt6_do_update_pmtu(nrt6, mtu);
  2516. if (rt6_insert_exception(nrt6, &res))
  2517. dst_release_immediate(&nrt6->dst);
  2518. }
  2519. out_unlock:
  2520. rcu_read_unlock();
  2521. }
  2522. }
  2523. static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  2524. struct sk_buff *skb, u32 mtu,
  2525. bool confirm_neigh)
  2526. {
  2527. __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
  2528. confirm_neigh);
  2529. }
  2530. void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
  2531. int oif, u32 mark, kuid_t uid)
  2532. {
  2533. const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
  2534. struct dst_entry *dst;
  2535. struct flowi6 fl6 = {
  2536. .flowi6_oif = oif,
  2537. .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
  2538. .daddr = iph->daddr,
  2539. .saddr = iph->saddr,
  2540. .flowlabel = ip6_flowinfo(iph),
  2541. .flowi6_uid = uid,
  2542. };
  2543. dst = ip6_route_output(net, NULL, &fl6);
  2544. if (!dst->error)
  2545. __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
  2546. dst_release(dst);
  2547. }
  2548. EXPORT_SYMBOL_GPL(ip6_update_pmtu);
  2549. void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
  2550. {
  2551. int oif = sk->sk_bound_dev_if;
  2552. struct dst_entry *dst;
  2553. if (!oif && skb->dev)
  2554. oif = l3mdev_master_ifindex(skb->dev);
  2555. ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
  2556. sk_uid(sk));
  2557. dst = __sk_dst_get(sk);
  2558. if (!dst || !READ_ONCE(dst->obsolete) ||
  2559. dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
  2560. return;
  2561. bh_lock_sock(sk);
  2562. if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
  2563. ip6_datagram_dst_update(sk, false);
  2564. bh_unlock_sock(sk);
  2565. }
  2566. EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
  2567. void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
  2568. const struct flowi6 *fl6)
  2569. {
  2570. #ifdef CONFIG_IPV6_SUBTREES
  2571. struct ipv6_pinfo *np = inet6_sk(sk);
  2572. #endif
  2573. ip6_dst_store(sk, dst,
  2574. ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr),
  2575. #ifdef CONFIG_IPV6_SUBTREES
  2576. ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
  2577. true :
  2578. #endif
  2579. false);
  2580. }
  2581. static bool ip6_redirect_nh_match(const struct fib6_result *res,
  2582. struct flowi6 *fl6,
  2583. const struct in6_addr *gw,
  2584. struct rt6_info **ret)
  2585. {
  2586. const struct fib6_nh *nh = res->nh;
  2587. if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
  2588. fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
  2589. return false;
  2590. /* rt_cache's gateway might be different from its 'parent'
  2591. * in the case of an ip redirect.
  2592. * So we keep searching in the exception table if the gateway
  2593. * is different.
  2594. */
  2595. if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
  2596. struct rt6_info *rt_cache;
  2597. rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
  2598. if (rt_cache &&
  2599. ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
  2600. *ret = rt_cache;
  2601. return true;
  2602. }
  2603. return false;
  2604. }
  2605. return true;
  2606. }
  2607. struct fib6_nh_rd_arg {
  2608. struct fib6_result *res;
  2609. struct flowi6 *fl6;
  2610. const struct in6_addr *gw;
  2611. struct rt6_info **ret;
  2612. };
  2613. static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
  2614. {
  2615. struct fib6_nh_rd_arg *arg = _arg;
  2616. arg->res->nh = nh;
  2617. return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
  2618. }
  2619. /* Handle redirects */
  2620. struct ip6rd_flowi {
  2621. struct flowi6 fl6;
  2622. struct in6_addr gateway;
  2623. };
  2624. INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
  2625. struct fib6_table *table,
  2626. struct flowi6 *fl6,
  2627. const struct sk_buff *skb,
  2628. int flags)
  2629. {
  2630. struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
  2631. struct rt6_info *ret = NULL;
  2632. struct fib6_result res = {};
  2633. struct fib6_nh_rd_arg arg = {
  2634. .res = &res,
  2635. .fl6 = fl6,
  2636. .gw = &rdfl->gateway,
  2637. .ret = &ret
  2638. };
  2639. struct fib6_info *rt;
  2640. struct fib6_node *fn;
  2641. /* Get the "current" route for this destination and
  2642. * check if the redirect has come from appropriate router.
  2643. *
  2644. * RFC 4861 specifies that redirects should only be
  2645. * accepted if they come from the nexthop to the target.
  2646. * Due to the way the routes are chosen, this notion
  2647. * is a bit fuzzy and one might need to check all possible
  2648. * routes.
  2649. */
  2650. rcu_read_lock();
  2651. fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
  2652. restart:
  2653. for_each_fib6_node_rt_rcu(fn) {
  2654. res.f6i = rt;
  2655. if (fib6_check_expired(rt))
  2656. continue;
  2657. if (rt->fib6_flags & RTF_REJECT)
  2658. break;
  2659. if (unlikely(rt->nh)) {
  2660. if (nexthop_is_blackhole(rt->nh))
  2661. continue;
  2662. /* on match, res->nh is filled in and potentially ret */
  2663. if (nexthop_for_each_fib6_nh(rt->nh,
  2664. fib6_nh_redirect_match,
  2665. &arg))
  2666. goto out;
  2667. } else {
  2668. res.nh = rt->fib6_nh;
  2669. if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
  2670. &ret))
  2671. goto out;
  2672. }
  2673. }
  2674. if (!rt)
  2675. rt = net->ipv6.fib6_null_entry;
  2676. else if (rt->fib6_flags & RTF_REJECT) {
  2677. ret = net->ipv6.ip6_null_entry;
  2678. goto out;
  2679. }
  2680. if (rt == net->ipv6.fib6_null_entry) {
  2681. fn = fib6_backtrack(fn, &fl6->saddr);
  2682. if (fn)
  2683. goto restart;
  2684. }
  2685. res.f6i = rt;
  2686. res.nh = rt->fib6_nh;
  2687. out:
  2688. if (ret) {
  2689. ip6_hold_safe(net, &ret);
  2690. } else {
  2691. res.fib6_flags = res.f6i->fib6_flags;
  2692. res.fib6_type = res.f6i->fib6_type;
  2693. ret = ip6_create_rt_rcu(&res);
  2694. }
  2695. rcu_read_unlock();
  2696. trace_fib6_table_lookup(net, &res, table, fl6);
  2697. return ret;
  2698. };
  2699. static struct dst_entry *ip6_route_redirect(struct net *net,
  2700. const struct flowi6 *fl6,
  2701. const struct sk_buff *skb,
  2702. const struct in6_addr *gateway)
  2703. {
  2704. int flags = RT6_LOOKUP_F_HAS_SADDR;
  2705. struct ip6rd_flowi rdfl;
  2706. rdfl.fl6 = *fl6;
  2707. rdfl.gateway = *gateway;
  2708. return fib6_rule_lookup(net, &rdfl.fl6, skb,
  2709. flags, __ip6_route_redirect);
  2710. }
  2711. void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
  2712. kuid_t uid)
  2713. {
  2714. const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
  2715. struct dst_entry *dst;
  2716. struct flowi6 fl6 = {
  2717. .flowi6_iif = LOOPBACK_IFINDEX,
  2718. .flowi6_oif = oif,
  2719. .flowi6_mark = mark,
  2720. .daddr = iph->daddr,
  2721. .saddr = iph->saddr,
  2722. .flowlabel = ip6_flowinfo(iph),
  2723. .flowi6_uid = uid,
  2724. };
  2725. dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
  2726. rt6_do_redirect(dst, NULL, skb);
  2727. dst_release(dst);
  2728. }
  2729. EXPORT_SYMBOL_GPL(ip6_redirect);
  2730. void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
  2731. {
  2732. const struct ipv6hdr *iph = ipv6_hdr(skb);
  2733. const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
  2734. struct dst_entry *dst;
  2735. struct flowi6 fl6 = {
  2736. .flowi6_iif = LOOPBACK_IFINDEX,
  2737. .flowi6_oif = oif,
  2738. .daddr = msg->dest,
  2739. .saddr = iph->daddr,
  2740. .flowi6_uid = sock_net_uid(net, NULL),
  2741. };
  2742. dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
  2743. rt6_do_redirect(dst, NULL, skb);
  2744. dst_release(dst);
  2745. }
  2746. void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
  2747. {
  2748. ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
  2749. READ_ONCE(sk->sk_mark), sk_uid(sk));
  2750. }
  2751. EXPORT_SYMBOL_GPL(ip6_sk_redirect);
  2752. static unsigned int ip6_default_advmss(const struct dst_entry *dst)
  2753. {
  2754. unsigned int mtu = dst6_mtu(dst);
  2755. struct net *net;
  2756. mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
  2757. rcu_read_lock();
  2758. net = dst_dev_net_rcu(dst);
  2759. mtu = max_t(unsigned int, mtu,
  2760. READ_ONCE(net->ipv6.sysctl.ip6_rt_min_advmss));
  2761. rcu_read_unlock();
  2762. /*
  2763. * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
  2764. * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
  2765. * IPV6_MAXPLEN is also valid and means: "any MSS,
  2766. * rely only on pmtu discovery"
  2767. */
  2768. if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
  2769. mtu = IPV6_MAXPLEN;
  2770. return mtu;
  2771. }
  2772. INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst)
  2773. {
  2774. return ip6_dst_mtu_maybe_forward(dst, false);
  2775. }
  2776. EXPORT_INDIRECT_CALLABLE(ip6_mtu);
  2777. /* MTU selection:
  2778. * 1. mtu on route is locked - use it
  2779. * 2. mtu from nexthop exception
  2780. * 3. mtu from egress device
  2781. *
  2782. * based on ip6_dst_mtu_forward and exception logic of
  2783. * rt6_find_cached_rt; called with rcu_read_lock
  2784. */
  2785. u32 ip6_mtu_from_fib6(const struct fib6_result *res,
  2786. const struct in6_addr *daddr,
  2787. const struct in6_addr *saddr)
  2788. {
  2789. const struct fib6_nh *nh = res->nh;
  2790. struct fib6_info *f6i = res->f6i;
  2791. struct inet6_dev *idev;
  2792. struct rt6_info *rt;
  2793. u32 mtu = 0;
  2794. if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
  2795. mtu = f6i->fib6_pmtu;
  2796. if (mtu)
  2797. goto out;
  2798. }
  2799. rt = rt6_find_cached_rt(res, daddr, saddr);
  2800. if (unlikely(rt)) {
  2801. mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
  2802. } else {
  2803. struct net_device *dev = nh->fib_nh_dev;
  2804. mtu = IPV6_MIN_MTU;
  2805. idev = __in6_dev_get(dev);
  2806. if (idev)
  2807. mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6));
  2808. }
  2809. mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
  2810. out:
  2811. return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
  2812. }
  2813. struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
  2814. struct flowi6 *fl6)
  2815. {
  2816. struct dst_entry *dst;
  2817. struct rt6_info *rt;
  2818. struct inet6_dev *idev = in6_dev_get(dev);
  2819. struct net *net = dev_net(dev);
  2820. if (unlikely(!idev))
  2821. return ERR_PTR(-ENODEV);
  2822. rt = ip6_dst_alloc(net, dev, 0);
  2823. if (unlikely(!rt)) {
  2824. in6_dev_put(idev);
  2825. dst = ERR_PTR(-ENOMEM);
  2826. goto out;
  2827. }
  2828. rt->dst.input = ip6_input;
  2829. rt->dst.output = ip6_output;
  2830. rt->rt6i_gateway = fl6->daddr;
  2831. rt->rt6i_dst.addr = fl6->daddr;
  2832. rt->rt6i_dst.plen = 128;
  2833. rt->rt6i_idev = idev;
  2834. dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
  2835. /* Add this dst into uncached_list so that rt6_disable_ip() can
  2836. * do proper release of the net_device
  2837. */
  2838. rt6_uncached_list_add(rt);
  2839. dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
  2840. out:
  2841. return dst;
  2842. }
  2843. static void ip6_dst_gc(struct dst_ops *ops)
  2844. {
  2845. struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
  2846. int rt_min_interval = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_min_interval);
  2847. int rt_elasticity = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_elasticity);
  2848. int rt_gc_timeout = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_timeout);
  2849. unsigned long rt_last_gc = READ_ONCE(net->ipv6.ip6_rt_last_gc);
  2850. unsigned int val;
  2851. int entries;
  2852. if (time_after(rt_last_gc + rt_min_interval, jiffies))
  2853. goto out;
  2854. fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
  2855. entries = dst_entries_get_slow(ops);
  2856. if (entries < ops->gc_thresh)
  2857. atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
  2858. out:
  2859. val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
  2860. atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
  2861. }
  2862. static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
  2863. const struct in6_addr *gw_addr, u32 tbid,
  2864. int flags, struct fib6_result *res)
  2865. {
  2866. struct flowi6 fl6 = {
  2867. .flowi6_oif = cfg->fc_ifindex,
  2868. .daddr = *gw_addr,
  2869. .saddr = cfg->fc_prefsrc,
  2870. };
  2871. struct fib6_table *table;
  2872. int err;
  2873. table = fib6_get_table(net, tbid);
  2874. if (!table)
  2875. return -EINVAL;
  2876. if (!ipv6_addr_any(&cfg->fc_prefsrc))
  2877. flags |= RT6_LOOKUP_F_HAS_SADDR;
  2878. flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
  2879. err = fib6_table_lookup(net, table, cfg->fc_ifindex, &fl6, res, flags);
  2880. if (!err && res->f6i != net->ipv6.fib6_null_entry)
  2881. fib6_select_path(net, res, &fl6, cfg->fc_ifindex,
  2882. cfg->fc_ifindex != 0, NULL, flags);
  2883. return err;
  2884. }
  2885. static int ip6_route_check_nh_onlink(struct net *net,
  2886. struct fib6_config *cfg,
  2887. const struct net_device *dev,
  2888. struct netlink_ext_ack *extack)
  2889. {
  2890. u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
  2891. const struct in6_addr *gw_addr = &cfg->fc_gateway;
  2892. struct fib6_result res = {};
  2893. int err;
  2894. err = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0, &res);
  2895. if (!err && !(res.fib6_flags & RTF_REJECT) &&
  2896. res.fib6_type != RTN_UNICAST) {
  2897. NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
  2898. err = -EINVAL;
  2899. }
  2900. return err;
  2901. }
  2902. static int ip6_route_check_nh(struct net *net,
  2903. struct fib6_config *cfg,
  2904. struct net_device **_dev,
  2905. netdevice_tracker *dev_tracker,
  2906. struct inet6_dev **idev)
  2907. {
  2908. const struct in6_addr *gw_addr = &cfg->fc_gateway;
  2909. struct net_device *dev = _dev ? *_dev : NULL;
  2910. int flags = RT6_LOOKUP_F_IFACE;
  2911. struct fib6_result res = {};
  2912. int err = -EHOSTUNREACH;
  2913. if (cfg->fc_table) {
  2914. err = ip6_nh_lookup_table(net, cfg, gw_addr,
  2915. cfg->fc_table, flags, &res);
  2916. /* gw_addr can not require a gateway or resolve to a reject
  2917. * route. If a device is given, it must match the result.
  2918. */
  2919. if (err || res.fib6_flags & RTF_REJECT ||
  2920. res.nh->fib_nh_gw_family ||
  2921. (dev && dev != res.nh->fib_nh_dev))
  2922. err = -EHOSTUNREACH;
  2923. }
  2924. if (err < 0) {
  2925. struct flowi6 fl6 = {
  2926. .flowi6_oif = cfg->fc_ifindex,
  2927. .daddr = *gw_addr,
  2928. };
  2929. err = fib6_lookup(net, cfg->fc_ifindex, &fl6, &res, flags);
  2930. if (err || res.fib6_flags & RTF_REJECT ||
  2931. res.nh->fib_nh_gw_family)
  2932. err = -EHOSTUNREACH;
  2933. if (err)
  2934. return err;
  2935. fib6_select_path(net, &res, &fl6, cfg->fc_ifindex,
  2936. cfg->fc_ifindex != 0, NULL, flags);
  2937. }
  2938. err = 0;
  2939. if (dev) {
  2940. if (dev != res.nh->fib_nh_dev)
  2941. err = -EHOSTUNREACH;
  2942. } else {
  2943. *_dev = dev = res.nh->fib_nh_dev;
  2944. netdev_hold(dev, dev_tracker, GFP_ATOMIC);
  2945. *idev = in6_dev_get(dev);
  2946. }
  2947. return err;
  2948. }
  2949. static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
  2950. struct net_device **_dev,
  2951. netdevice_tracker *dev_tracker,
  2952. struct inet6_dev **idev,
  2953. struct netlink_ext_ack *extack)
  2954. {
  2955. const struct in6_addr *gw_addr = &cfg->fc_gateway;
  2956. int gwa_type = ipv6_addr_type(gw_addr);
  2957. bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
  2958. const struct net_device *dev = *_dev;
  2959. bool need_addr_check = !dev;
  2960. int err = -EINVAL;
  2961. /* if gw_addr is local we will fail to detect this in case
  2962. * address is still TENTATIVE (DAD in progress). rt6_lookup()
  2963. * will return already-added prefix route via interface that
  2964. * prefix route was assigned to, which might be non-loopback.
  2965. */
  2966. if (dev &&
  2967. ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
  2968. NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
  2969. goto out;
  2970. }
  2971. if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
  2972. /* IPv6 strictly inhibits using not link-local
  2973. * addresses as nexthop address.
  2974. * Otherwise, router will not able to send redirects.
  2975. * It is very good, but in some (rare!) circumstances
  2976. * (SIT, PtP, NBMA NOARP links) it is handy to allow
  2977. * some exceptions. --ANK
  2978. * We allow IPv4-mapped nexthops to support RFC4798-type
  2979. * addressing
  2980. */
  2981. if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
  2982. NL_SET_ERR_MSG(extack, "Invalid gateway address");
  2983. goto out;
  2984. }
  2985. rcu_read_lock();
  2986. if (cfg->fc_flags & RTNH_F_ONLINK)
  2987. err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
  2988. else
  2989. err = ip6_route_check_nh(net, cfg, _dev, dev_tracker,
  2990. idev);
  2991. rcu_read_unlock();
  2992. if (err)
  2993. goto out;
  2994. }
  2995. /* reload in case device was changed */
  2996. dev = *_dev;
  2997. err = -EINVAL;
  2998. if (!dev) {
  2999. NL_SET_ERR_MSG(extack, "Egress device not specified");
  3000. goto out;
  3001. } else if (dev->flags & IFF_LOOPBACK) {
  3002. NL_SET_ERR_MSG(extack,
  3003. "Egress device can not be loopback device for this route");
  3004. goto out;
  3005. }
  3006. /* if we did not check gw_addr above, do so now that the
  3007. * egress device has been resolved.
  3008. */
  3009. if (need_addr_check &&
  3010. ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
  3011. NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
  3012. goto out;
  3013. }
  3014. err = 0;
  3015. out:
  3016. return err;
  3017. }
  3018. static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
  3019. {
  3020. if ((flags & RTF_REJECT) ||
  3021. (dev && (dev->flags & IFF_LOOPBACK) &&
  3022. !(addr_type & IPV6_ADDR_LOOPBACK) &&
  3023. !(flags & (RTF_ANYCAST | RTF_LOCAL))))
  3024. return true;
  3025. return false;
  3026. }
  3027. int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
  3028. struct fib6_config *cfg, gfp_t gfp_flags,
  3029. struct netlink_ext_ack *extack)
  3030. {
  3031. netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker;
  3032. struct net_device *dev = NULL;
  3033. struct inet6_dev *idev = NULL;
  3034. int err;
  3035. fib6_nh->fib_nh_family = AF_INET6;
  3036. #ifdef CONFIG_IPV6_ROUTER_PREF
  3037. fib6_nh->last_probe = jiffies;
  3038. #endif
  3039. if (cfg->fc_is_fdb) {
  3040. fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
  3041. fib6_nh->fib_nh_gw_family = AF_INET6;
  3042. return 0;
  3043. }
  3044. err = -ENODEV;
  3045. if (cfg->fc_ifindex) {
  3046. dev = netdev_get_by_index(net, cfg->fc_ifindex,
  3047. dev_tracker, gfp_flags);
  3048. if (!dev)
  3049. goto out;
  3050. idev = in6_dev_get(dev);
  3051. if (!idev)
  3052. goto out;
  3053. }
  3054. if (cfg->fc_flags & RTNH_F_ONLINK) {
  3055. if (!dev) {
  3056. NL_SET_ERR_MSG(extack,
  3057. "Nexthop device required for onlink");
  3058. goto out;
  3059. }
  3060. if (!(dev->flags & IFF_UP)) {
  3061. NL_SET_ERR_MSG(extack, "Nexthop device is not up");
  3062. err = -ENETDOWN;
  3063. goto out;
  3064. }
  3065. fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
  3066. }
  3067. fib6_nh->fib_nh_weight = 1;
  3068. /* Reset the nexthop device to the loopback device in case of reject
  3069. * routes.
  3070. */
  3071. if (cfg->fc_flags & RTF_REJECT) {
  3072. /* hold loopback dev/idev if we haven't done so. */
  3073. if (dev != net->loopback_dev) {
  3074. if (dev) {
  3075. netdev_put(dev, dev_tracker);
  3076. in6_dev_put(idev);
  3077. }
  3078. dev = net->loopback_dev;
  3079. netdev_hold(dev, dev_tracker, gfp_flags);
  3080. idev = in6_dev_get(dev);
  3081. if (!idev) {
  3082. err = -ENODEV;
  3083. goto out;
  3084. }
  3085. }
  3086. goto pcpu_alloc;
  3087. }
  3088. if (cfg->fc_flags & RTF_GATEWAY) {
  3089. err = ip6_validate_gw(net, cfg, &dev, dev_tracker,
  3090. &idev, extack);
  3091. if (err)
  3092. goto out;
  3093. fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
  3094. fib6_nh->fib_nh_gw_family = AF_INET6;
  3095. }
  3096. err = -ENODEV;
  3097. if (!dev)
  3098. goto out;
  3099. if (!idev || idev->cnf.disable_ipv6) {
  3100. NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
  3101. err = -EACCES;
  3102. goto out;
  3103. }
  3104. if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
  3105. NL_SET_ERR_MSG(extack, "Nexthop device is not up");
  3106. err = -ENETDOWN;
  3107. goto out;
  3108. }
  3109. if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
  3110. !netif_carrier_ok(dev))
  3111. fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
  3112. err = fib_nh_common_init(net, &fib6_nh->nh_common, cfg->fc_encap,
  3113. cfg->fc_encap_type, cfg, gfp_flags, extack);
  3114. if (err)
  3115. goto out;
  3116. pcpu_alloc:
  3117. fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
  3118. if (!fib6_nh->rt6i_pcpu) {
  3119. err = -ENOMEM;
  3120. goto out;
  3121. }
  3122. fib6_nh->fib_nh_dev = dev;
  3123. fib6_nh->fib_nh_oif = dev->ifindex;
  3124. err = 0;
  3125. out:
  3126. if (idev)
  3127. in6_dev_put(idev);
  3128. if (err) {
  3129. fib_nh_common_release(&fib6_nh->nh_common);
  3130. fib6_nh->nh_common.nhc_pcpu_rth_output = NULL;
  3131. fib6_nh->fib_nh_lws = NULL;
  3132. netdev_put(dev, dev_tracker);
  3133. }
  3134. return err;
  3135. }
  3136. void fib6_nh_release(struct fib6_nh *fib6_nh)
  3137. {
  3138. struct rt6_exception_bucket *bucket;
  3139. rcu_read_lock();
  3140. fib6_nh_flush_exceptions(fib6_nh, NULL);
  3141. bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
  3142. if (bucket) {
  3143. rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
  3144. kfree(bucket);
  3145. }
  3146. rcu_read_unlock();
  3147. fib6_nh_release_dsts(fib6_nh);
  3148. free_percpu(fib6_nh->rt6i_pcpu);
  3149. fib_nh_common_release(&fib6_nh->nh_common);
  3150. }
  3151. void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
  3152. {
  3153. int cpu;
  3154. if (!fib6_nh->rt6i_pcpu)
  3155. return;
  3156. for_each_possible_cpu(cpu) {
  3157. struct rt6_info *pcpu_rt, **ppcpu_rt;
  3158. ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
  3159. pcpu_rt = xchg(ppcpu_rt, NULL);
  3160. if (pcpu_rt) {
  3161. dst_dev_put(&pcpu_rt->dst);
  3162. dst_release(&pcpu_rt->dst);
  3163. }
  3164. }
  3165. }
  3166. static int fib6_config_validate(struct fib6_config *cfg,
  3167. struct netlink_ext_ack *extack)
  3168. {
  3169. /* RTF_PCPU is an internal flag; can not be set by userspace */
  3170. if (cfg->fc_flags & RTF_PCPU) {
  3171. NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
  3172. goto errout;
  3173. }
  3174. /* RTF_CACHE is an internal flag; can not be set by userspace */
  3175. if (cfg->fc_flags & RTF_CACHE) {
  3176. NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
  3177. goto errout;
  3178. }
  3179. if (cfg->fc_type > RTN_MAX) {
  3180. NL_SET_ERR_MSG(extack, "Invalid route type");
  3181. goto errout;
  3182. }
  3183. if (cfg->fc_dst_len > 128) {
  3184. NL_SET_ERR_MSG(extack, "Invalid prefix length");
  3185. goto errout;
  3186. }
  3187. #ifdef CONFIG_IPV6_SUBTREES
  3188. if (cfg->fc_src_len > 128) {
  3189. NL_SET_ERR_MSG(extack, "Invalid source address length");
  3190. goto errout;
  3191. }
  3192. if (cfg->fc_nh_id && cfg->fc_src_len) {
  3193. NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
  3194. goto errout;
  3195. }
  3196. #else
  3197. if (cfg->fc_src_len) {
  3198. NL_SET_ERR_MSG(extack,
  3199. "Specifying source address requires IPV6_SUBTREES to be enabled");
  3200. goto errout;
  3201. }
  3202. #endif
  3203. return 0;
  3204. errout:
  3205. return -EINVAL;
  3206. }
  3207. static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
  3208. gfp_t gfp_flags,
  3209. struct netlink_ext_ack *extack)
  3210. {
  3211. struct net *net = cfg->fc_nlinfo.nl_net;
  3212. struct fib6_table *table;
  3213. struct fib6_info *rt;
  3214. int err;
  3215. if (cfg->fc_nlinfo.nlh &&
  3216. !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
  3217. table = fib6_get_table(net, cfg->fc_table);
  3218. if (!table) {
  3219. pr_warn("NLM_F_CREATE should be specified when creating new route\n");
  3220. table = fib6_new_table(net, cfg->fc_table);
  3221. }
  3222. } else {
  3223. table = fib6_new_table(net, cfg->fc_table);
  3224. }
  3225. if (!table) {
  3226. err = -ENOBUFS;
  3227. goto err;
  3228. }
  3229. rt = fib6_info_alloc(gfp_flags, !cfg->fc_nh_id);
  3230. if (!rt) {
  3231. err = -ENOMEM;
  3232. goto err;
  3233. }
  3234. rt->fib6_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len,
  3235. extack);
  3236. if (IS_ERR(rt->fib6_metrics)) {
  3237. err = PTR_ERR(rt->fib6_metrics);
  3238. goto free;
  3239. }
  3240. if (cfg->fc_flags & RTF_ADDRCONF)
  3241. rt->dst_nocount = true;
  3242. if (cfg->fc_flags & RTF_EXPIRES)
  3243. fib6_set_expires(rt, jiffies +
  3244. clock_t_to_jiffies(cfg->fc_expires));
  3245. if (cfg->fc_protocol == RTPROT_UNSPEC)
  3246. cfg->fc_protocol = RTPROT_BOOT;
  3247. rt->fib6_protocol = cfg->fc_protocol;
  3248. rt->fib6_table = table;
  3249. rt->fib6_metric = cfg->fc_metric;
  3250. rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
  3251. rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
  3252. ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
  3253. rt->fib6_dst.plen = cfg->fc_dst_len;
  3254. #ifdef CONFIG_IPV6_SUBTREES
  3255. ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
  3256. rt->fib6_src.plen = cfg->fc_src_len;
  3257. #endif
  3258. return rt;
  3259. free:
  3260. kfree(rt);
  3261. err:
  3262. return ERR_PTR(err);
  3263. }
  3264. static int ip6_route_info_create_nh(struct fib6_info *rt,
  3265. struct fib6_config *cfg,
  3266. gfp_t gfp_flags,
  3267. struct netlink_ext_ack *extack)
  3268. {
  3269. struct net *net = cfg->fc_nlinfo.nl_net;
  3270. struct fib6_nh *fib6_nh;
  3271. int err;
  3272. if (cfg->fc_nh_id) {
  3273. struct nexthop *nh;
  3274. rcu_read_lock();
  3275. nh = nexthop_find_by_id(net, cfg->fc_nh_id);
  3276. if (!nh) {
  3277. err = -EINVAL;
  3278. NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
  3279. goto out_free;
  3280. }
  3281. err = fib6_check_nexthop(nh, cfg, extack);
  3282. if (err)
  3283. goto out_free;
  3284. if (!nexthop_get(nh)) {
  3285. NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
  3286. err = -ENOENT;
  3287. goto out_free;
  3288. }
  3289. rt->nh = nh;
  3290. fib6_nh = nexthop_fib6_nh(rt->nh);
  3291. rcu_read_unlock();
  3292. } else {
  3293. int addr_type;
  3294. err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
  3295. if (err)
  3296. goto out_release;
  3297. fib6_nh = rt->fib6_nh;
  3298. /* We cannot add true routes via loopback here, they would
  3299. * result in kernel looping; promote them to reject routes
  3300. */
  3301. addr_type = ipv6_addr_type(&cfg->fc_dst);
  3302. if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
  3303. addr_type))
  3304. rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
  3305. }
  3306. if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
  3307. struct net_device *dev = fib6_nh->fib_nh_dev;
  3308. if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
  3309. NL_SET_ERR_MSG(extack, "Invalid source address");
  3310. err = -EINVAL;
  3311. goto out_release;
  3312. }
  3313. rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
  3314. rt->fib6_prefsrc.plen = 128;
  3315. }
  3316. return 0;
  3317. out_release:
  3318. fib6_info_release(rt);
  3319. return err;
  3320. out_free:
  3321. rcu_read_unlock();
  3322. ip_fib_metrics_put(rt->fib6_metrics);
  3323. kfree(rt);
  3324. return err;
  3325. }
  3326. int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
  3327. struct netlink_ext_ack *extack)
  3328. {
  3329. struct fib6_info *rt;
  3330. int err;
  3331. err = fib6_config_validate(cfg, extack);
  3332. if (err)
  3333. return err;
  3334. rt = ip6_route_info_create(cfg, gfp_flags, extack);
  3335. if (IS_ERR(rt))
  3336. return PTR_ERR(rt);
  3337. err = ip6_route_info_create_nh(rt, cfg, gfp_flags, extack);
  3338. if (err)
  3339. return err;
  3340. err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
  3341. fib6_info_release(rt);
  3342. return err;
  3343. }
  3344. static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
  3345. {
  3346. struct net *net = info->nl_net;
  3347. struct fib6_table *table;
  3348. int err;
  3349. if (rt == net->ipv6.fib6_null_entry) {
  3350. err = -ENOENT;
  3351. goto out;
  3352. }
  3353. table = rt->fib6_table;
  3354. spin_lock_bh(&table->tb6_lock);
  3355. err = fib6_del(rt, info);
  3356. spin_unlock_bh(&table->tb6_lock);
  3357. out:
  3358. fib6_info_release(rt);
  3359. return err;
  3360. }
  3361. int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify)
  3362. {
  3363. struct nl_info info = {
  3364. .nl_net = net,
  3365. .skip_notify = skip_notify
  3366. };
  3367. return __ip6_del_rt(rt, &info);
  3368. }
  3369. static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
  3370. {
  3371. struct nl_info *info = &cfg->fc_nlinfo;
  3372. struct net *net = info->nl_net;
  3373. struct sk_buff *skb = NULL;
  3374. struct fib6_table *table;
  3375. int err = -ENOENT;
  3376. if (rt == net->ipv6.fib6_null_entry)
  3377. goto out_put;
  3378. table = rt->fib6_table;
  3379. spin_lock_bh(&table->tb6_lock);
  3380. if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
  3381. struct fib6_info *sibling, *next_sibling;
  3382. struct fib6_node *fn;
  3383. /* prefer to send a single notification with all hops */
  3384. skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
  3385. if (skb) {
  3386. u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
  3387. if (rt6_fill_node(net, skb, rt, NULL,
  3388. NULL, NULL, 0, RTM_DELROUTE,
  3389. info->portid, seq, 0) < 0) {
  3390. kfree_skb(skb);
  3391. skb = NULL;
  3392. } else
  3393. info->skip_notify = 1;
  3394. }
  3395. /* 'rt' points to the first sibling route. If it is not the
  3396. * leaf, then we do not need to send a notification. Otherwise,
  3397. * we need to check if the last sibling has a next route or not
  3398. * and emit a replace or delete notification, respectively.
  3399. */
  3400. info->skip_notify_kernel = 1;
  3401. fn = rcu_dereference_protected(rt->fib6_node,
  3402. lockdep_is_held(&table->tb6_lock));
  3403. if (rcu_access_pointer(fn->leaf) == rt) {
  3404. struct fib6_info *last_sibling, *replace_rt;
  3405. last_sibling = list_last_entry(&rt->fib6_siblings,
  3406. struct fib6_info,
  3407. fib6_siblings);
  3408. replace_rt = rcu_dereference_protected(
  3409. last_sibling->fib6_next,
  3410. lockdep_is_held(&table->tb6_lock));
  3411. if (replace_rt)
  3412. call_fib6_entry_notifiers_replace(net,
  3413. replace_rt);
  3414. else
  3415. call_fib6_multipath_entry_notifiers(net,
  3416. FIB_EVENT_ENTRY_DEL,
  3417. rt, rt->fib6_nsiblings,
  3418. NULL);
  3419. }
  3420. list_for_each_entry_safe(sibling, next_sibling,
  3421. &rt->fib6_siblings,
  3422. fib6_siblings) {
  3423. err = fib6_del(sibling, info);
  3424. if (err)
  3425. goto out_unlock;
  3426. }
  3427. }
  3428. err = fib6_del(rt, info);
  3429. out_unlock:
  3430. spin_unlock_bh(&table->tb6_lock);
  3431. out_put:
  3432. fib6_info_release(rt);
  3433. if (skb) {
  3434. rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
  3435. info->nlh, gfp_any());
  3436. }
  3437. return err;
  3438. }
  3439. static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
  3440. {
  3441. int rc = -ESRCH;
  3442. if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
  3443. goto out;
  3444. if (cfg->fc_flags & RTF_GATEWAY &&
  3445. !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
  3446. goto out;
  3447. rc = rt6_remove_exception_rt(rt);
  3448. out:
  3449. return rc;
  3450. }
  3451. static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
  3452. struct fib6_nh *nh)
  3453. {
  3454. struct fib6_result res = {
  3455. .f6i = rt,
  3456. .nh = nh,
  3457. };
  3458. struct rt6_info *rt_cache;
  3459. rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
  3460. if (rt_cache)
  3461. return __ip6_del_cached_rt(rt_cache, cfg);
  3462. return 0;
  3463. }
  3464. struct fib6_nh_del_cached_rt_arg {
  3465. struct fib6_config *cfg;
  3466. struct fib6_info *f6i;
  3467. };
  3468. static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
  3469. {
  3470. struct fib6_nh_del_cached_rt_arg *arg = _arg;
  3471. int rc;
  3472. rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
  3473. return rc != -ESRCH ? rc : 0;
  3474. }
  3475. static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
  3476. {
  3477. struct fib6_nh_del_cached_rt_arg arg = {
  3478. .cfg = cfg,
  3479. .f6i = f6i
  3480. };
  3481. return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
  3482. }
  3483. static int ip6_route_del(struct fib6_config *cfg,
  3484. struct netlink_ext_ack *extack)
  3485. {
  3486. struct fib6_table *table;
  3487. struct fib6_info *rt;
  3488. struct fib6_node *fn;
  3489. int err = -ESRCH;
  3490. table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
  3491. if (!table) {
  3492. NL_SET_ERR_MSG(extack, "FIB table does not exist");
  3493. return err;
  3494. }
  3495. rcu_read_lock();
  3496. fn = fib6_locate(&table->tb6_root,
  3497. &cfg->fc_dst, cfg->fc_dst_len,
  3498. &cfg->fc_src, cfg->fc_src_len,
  3499. !(cfg->fc_flags & RTF_CACHE));
  3500. if (fn) {
  3501. for_each_fib6_node_rt_rcu(fn) {
  3502. struct fib6_nh *nh;
  3503. if (rt->nh && cfg->fc_nh_id &&
  3504. rt->nh->id != cfg->fc_nh_id)
  3505. continue;
  3506. if (cfg->fc_flags & RTF_CACHE) {
  3507. int rc = 0;
  3508. if (rt->nh) {
  3509. rc = ip6_del_cached_rt_nh(cfg, rt);
  3510. } else if (cfg->fc_nh_id) {
  3511. continue;
  3512. } else {
  3513. nh = rt->fib6_nh;
  3514. rc = ip6_del_cached_rt(cfg, rt, nh);
  3515. }
  3516. if (rc != -ESRCH) {
  3517. rcu_read_unlock();
  3518. return rc;
  3519. }
  3520. continue;
  3521. }
  3522. if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
  3523. continue;
  3524. if (cfg->fc_protocol &&
  3525. cfg->fc_protocol != rt->fib6_protocol)
  3526. continue;
  3527. if (rt->nh) {
  3528. if (!fib6_info_hold_safe(rt))
  3529. continue;
  3530. err = __ip6_del_rt(rt, &cfg->fc_nlinfo);
  3531. break;
  3532. }
  3533. if (cfg->fc_nh_id)
  3534. continue;
  3535. nh = rt->fib6_nh;
  3536. if (cfg->fc_ifindex &&
  3537. (!nh->fib_nh_dev ||
  3538. nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
  3539. continue;
  3540. if (cfg->fc_flags & RTF_GATEWAY &&
  3541. !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
  3542. continue;
  3543. if (!fib6_info_hold_safe(rt))
  3544. continue;
  3545. /* if gateway was specified only delete the one hop */
  3546. if (cfg->fc_flags & RTF_GATEWAY)
  3547. err = __ip6_del_rt(rt, &cfg->fc_nlinfo);
  3548. else
  3549. err = __ip6_del_rt_siblings(rt, cfg);
  3550. break;
  3551. }
  3552. }
  3553. rcu_read_unlock();
  3554. return err;
  3555. }
  3556. static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
  3557. {
  3558. struct netevent_redirect netevent;
  3559. struct rt6_info *rt, *nrt = NULL;
  3560. struct fib6_result res = {};
  3561. struct ndisc_options ndopts;
  3562. struct inet6_dev *in6_dev;
  3563. struct neighbour *neigh;
  3564. struct rd_msg *msg;
  3565. int optlen, on_link;
  3566. u8 *lladdr;
  3567. optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
  3568. optlen -= sizeof(*msg);
  3569. if (optlen < 0) {
  3570. net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
  3571. return;
  3572. }
  3573. msg = (struct rd_msg *)icmp6_hdr(skb);
  3574. if (ipv6_addr_is_multicast(&msg->dest)) {
  3575. net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
  3576. return;
  3577. }
  3578. on_link = 0;
  3579. if (ipv6_addr_equal(&msg->dest, &msg->target)) {
  3580. on_link = 1;
  3581. } else if (ipv6_addr_type(&msg->target) !=
  3582. (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
  3583. net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
  3584. return;
  3585. }
  3586. in6_dev = __in6_dev_get(skb->dev);
  3587. if (!in6_dev)
  3588. return;
  3589. if (READ_ONCE(in6_dev->cnf.forwarding) ||
  3590. !READ_ONCE(in6_dev->cnf.accept_redirects))
  3591. return;
  3592. /* RFC2461 8.1:
  3593. * The IP source address of the Redirect MUST be the same as the current
  3594. * first-hop router for the specified ICMP Destination Address.
  3595. */
  3596. if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
  3597. net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
  3598. return;
  3599. }
  3600. lladdr = NULL;
  3601. if (ndopts.nd_opts_tgt_lladdr) {
  3602. lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
  3603. skb->dev);
  3604. if (!lladdr) {
  3605. net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
  3606. return;
  3607. }
  3608. }
  3609. rt = dst_rt6_info(dst);
  3610. if (rt->rt6i_flags & RTF_REJECT) {
  3611. net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
  3612. return;
  3613. }
  3614. /* Redirect received -> path was valid.
  3615. * Look, redirects are sent only in response to data packets,
  3616. * so that this nexthop apparently is reachable. --ANK
  3617. */
  3618. dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
  3619. neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
  3620. if (!neigh)
  3621. return;
  3622. /*
  3623. * We have finally decided to accept it.
  3624. */
  3625. ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
  3626. NEIGH_UPDATE_F_WEAK_OVERRIDE|
  3627. NEIGH_UPDATE_F_OVERRIDE|
  3628. (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
  3629. NEIGH_UPDATE_F_ISROUTER)),
  3630. NDISC_REDIRECT, &ndopts);
  3631. rcu_read_lock();
  3632. res.f6i = rcu_dereference(rt->from);
  3633. if (!res.f6i)
  3634. goto out;
  3635. if (res.f6i->nh) {
  3636. struct fib6_nh_match_arg arg = {
  3637. .dev = dst_dev_rcu(dst),
  3638. .gw = &rt->rt6i_gateway,
  3639. };
  3640. nexthop_for_each_fib6_nh(res.f6i->nh,
  3641. fib6_nh_find_match, &arg);
  3642. /* fib6_info uses a nexthop that does not have fib6_nh
  3643. * using the dst->dev. Should be impossible
  3644. */
  3645. if (!arg.match)
  3646. goto out;
  3647. res.nh = arg.match;
  3648. } else {
  3649. res.nh = res.f6i->fib6_nh;
  3650. }
  3651. res.fib6_flags = res.f6i->fib6_flags;
  3652. res.fib6_type = res.f6i->fib6_type;
  3653. nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
  3654. if (!nrt)
  3655. goto out;
  3656. nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
  3657. if (on_link)
  3658. nrt->rt6i_flags &= ~RTF_GATEWAY;
  3659. nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
  3660. /* rt6_insert_exception() will take care of duplicated exceptions */
  3661. if (rt6_insert_exception(nrt, &res)) {
  3662. dst_release_immediate(&nrt->dst);
  3663. goto out;
  3664. }
  3665. netevent.old = &rt->dst;
  3666. netevent.new = &nrt->dst;
  3667. netevent.daddr = &msg->dest;
  3668. netevent.neigh = neigh;
  3669. call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
  3670. out:
  3671. rcu_read_unlock();
  3672. neigh_release(neigh);
  3673. }
  3674. #ifdef CONFIG_IPV6_ROUTE_INFO
  3675. static struct fib6_info *rt6_get_route_info(struct net *net,
  3676. const struct in6_addr *prefix, int prefixlen,
  3677. const struct in6_addr *gwaddr,
  3678. struct net_device *dev)
  3679. {
  3680. u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
  3681. int ifindex = dev->ifindex;
  3682. struct fib6_node *fn;
  3683. struct fib6_info *rt = NULL;
  3684. struct fib6_table *table;
  3685. table = fib6_get_table(net, tb_id);
  3686. if (!table)
  3687. return NULL;
  3688. rcu_read_lock();
  3689. fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
  3690. if (!fn)
  3691. goto out;
  3692. for_each_fib6_node_rt_rcu(fn) {
  3693. /* these routes do not use nexthops */
  3694. if (rt->nh)
  3695. continue;
  3696. if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
  3697. continue;
  3698. if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
  3699. !rt->fib6_nh->fib_nh_gw_family)
  3700. continue;
  3701. if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
  3702. continue;
  3703. if (!fib6_info_hold_safe(rt))
  3704. continue;
  3705. break;
  3706. }
  3707. out:
  3708. rcu_read_unlock();
  3709. return rt;
  3710. }
  3711. static struct fib6_info *rt6_add_route_info(struct net *net,
  3712. const struct in6_addr *prefix, int prefixlen,
  3713. const struct in6_addr *gwaddr,
  3714. struct net_device *dev,
  3715. unsigned int pref)
  3716. {
  3717. struct fib6_config cfg = {
  3718. .fc_metric = IP6_RT_PRIO_USER,
  3719. .fc_ifindex = dev->ifindex,
  3720. .fc_dst_len = prefixlen,
  3721. .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
  3722. RTF_UP | RTF_PREF(pref),
  3723. .fc_protocol = RTPROT_RA,
  3724. .fc_type = RTN_UNICAST,
  3725. .fc_nlinfo.portid = 0,
  3726. .fc_nlinfo.nlh = NULL,
  3727. .fc_nlinfo.nl_net = net,
  3728. };
  3729. cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
  3730. cfg.fc_dst = *prefix;
  3731. cfg.fc_gateway = *gwaddr;
  3732. /* We should treat it as a default route if prefix length is 0. */
  3733. if (!prefixlen)
  3734. cfg.fc_flags |= RTF_DEFAULT;
  3735. ip6_route_add(&cfg, GFP_ATOMIC, NULL);
  3736. return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
  3737. }
  3738. #endif
  3739. struct fib6_info *rt6_get_dflt_router(struct net *net,
  3740. const struct in6_addr *addr,
  3741. struct net_device *dev)
  3742. {
  3743. u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
  3744. struct fib6_info *rt;
  3745. struct fib6_table *table;
  3746. table = fib6_get_table(net, tb_id);
  3747. if (!table)
  3748. return NULL;
  3749. rcu_read_lock();
  3750. for_each_fib6_node_rt_rcu(&table->tb6_root) {
  3751. struct fib6_nh *nh;
  3752. /* RA routes do not use nexthops */
  3753. if (rt->nh)
  3754. continue;
  3755. nh = rt->fib6_nh;
  3756. if (dev == nh->fib_nh_dev &&
  3757. ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
  3758. ipv6_addr_equal(&nh->fib_nh_gw6, addr))
  3759. break;
  3760. }
  3761. if (rt && !fib6_info_hold_safe(rt))
  3762. rt = NULL;
  3763. rcu_read_unlock();
  3764. return rt;
  3765. }
  3766. struct fib6_info *rt6_add_dflt_router(struct net *net,
  3767. const struct in6_addr *gwaddr,
  3768. struct net_device *dev,
  3769. unsigned int pref,
  3770. u32 defrtr_usr_metric,
  3771. int lifetime)
  3772. {
  3773. struct fib6_config cfg = {
  3774. .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
  3775. .fc_metric = defrtr_usr_metric,
  3776. .fc_ifindex = dev->ifindex,
  3777. .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
  3778. RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
  3779. .fc_protocol = RTPROT_RA,
  3780. .fc_type = RTN_UNICAST,
  3781. .fc_nlinfo.portid = 0,
  3782. .fc_nlinfo.nlh = NULL,
  3783. .fc_nlinfo.nl_net = net,
  3784. .fc_expires = jiffies_to_clock_t(lifetime * HZ),
  3785. };
  3786. cfg.fc_gateway = *gwaddr;
  3787. if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
  3788. struct fib6_table *table;
  3789. table = fib6_get_table(dev_net(dev), cfg.fc_table);
  3790. if (table)
  3791. table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
  3792. }
  3793. return rt6_get_dflt_router(net, gwaddr, dev);
  3794. }
  3795. static void __rt6_purge_dflt_routers(struct net *net,
  3796. struct fib6_table *table)
  3797. {
  3798. struct fib6_info *rt;
  3799. restart:
  3800. rcu_read_lock();
  3801. for_each_fib6_node_rt_rcu(&table->tb6_root) {
  3802. struct net_device *dev = fib6_info_nh_dev(rt);
  3803. struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
  3804. if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
  3805. (!idev || idev->cnf.accept_ra != 2) &&
  3806. fib6_info_hold_safe(rt)) {
  3807. rcu_read_unlock();
  3808. ip6_del_rt(net, rt, false);
  3809. goto restart;
  3810. }
  3811. }
  3812. rcu_read_unlock();
  3813. table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
  3814. }
  3815. void rt6_purge_dflt_routers(struct net *net)
  3816. {
  3817. struct fib6_table *table;
  3818. struct hlist_head *head;
  3819. unsigned int h;
  3820. rcu_read_lock();
  3821. for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
  3822. head = &net->ipv6.fib_table_hash[h];
  3823. hlist_for_each_entry_rcu(table, head, tb6_hlist) {
  3824. if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
  3825. __rt6_purge_dflt_routers(net, table);
  3826. }
  3827. }
  3828. rcu_read_unlock();
  3829. }
  3830. static void rtmsg_to_fib6_config(struct net *net,
  3831. struct in6_rtmsg *rtmsg,
  3832. struct fib6_config *cfg)
  3833. {
  3834. *cfg = (struct fib6_config){
  3835. .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
  3836. : RT6_TABLE_MAIN,
  3837. .fc_ifindex = rtmsg->rtmsg_ifindex,
  3838. .fc_metric = rtmsg->rtmsg_metric,
  3839. .fc_expires = rtmsg->rtmsg_info,
  3840. .fc_dst_len = rtmsg->rtmsg_dst_len,
  3841. .fc_src_len = rtmsg->rtmsg_src_len,
  3842. .fc_flags = rtmsg->rtmsg_flags,
  3843. .fc_type = rtmsg->rtmsg_type,
  3844. .fc_nlinfo.nl_net = net,
  3845. .fc_dst = rtmsg->rtmsg_dst,
  3846. .fc_src = rtmsg->rtmsg_src,
  3847. .fc_gateway = rtmsg->rtmsg_gateway,
  3848. };
  3849. }
  3850. int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
  3851. {
  3852. struct fib6_config cfg;
  3853. int err;
  3854. if (cmd != SIOCADDRT && cmd != SIOCDELRT)
  3855. return -EINVAL;
  3856. if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
  3857. return -EPERM;
  3858. rtmsg_to_fib6_config(net, rtmsg, &cfg);
  3859. switch (cmd) {
  3860. case SIOCADDRT:
  3861. /* Only do the default setting of fc_metric in route adding */
  3862. if (cfg.fc_metric == 0)
  3863. cfg.fc_metric = IP6_RT_PRIO_USER;
  3864. err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
  3865. break;
  3866. case SIOCDELRT:
  3867. err = ip6_route_del(&cfg, NULL);
  3868. break;
  3869. }
  3870. return err;
  3871. }
  3872. /*
  3873. * Drop the packet on the floor
  3874. */
  3875. static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
  3876. {
  3877. struct dst_entry *dst = skb_dst(skb);
  3878. struct net_device *dev = dst_dev(dst);
  3879. struct net *net = dev_net(dev);
  3880. struct inet6_dev *idev;
  3881. SKB_DR(reason);
  3882. int type;
  3883. if (netif_is_l3_master(skb->dev) ||
  3884. dev == net->loopback_dev)
  3885. idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
  3886. else
  3887. idev = ip6_dst_idev(dst);
  3888. switch (ipstats_mib_noroutes) {
  3889. case IPSTATS_MIB_INNOROUTES:
  3890. type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
  3891. if (type == IPV6_ADDR_ANY) {
  3892. SKB_DR_SET(reason, IP_INADDRERRORS);
  3893. IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
  3894. break;
  3895. }
  3896. SKB_DR_SET(reason, IP_INNOROUTES);
  3897. fallthrough;
  3898. case IPSTATS_MIB_OUTNOROUTES:
  3899. SKB_DR_OR(reason, IP_OUTNOROUTES);
  3900. IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
  3901. break;
  3902. }
  3903. /* Start over by dropping the dst for l3mdev case */
  3904. if (netif_is_l3_master(skb->dev))
  3905. skb_dst_drop(skb);
  3906. icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
  3907. kfree_skb_reason(skb, reason);
  3908. return 0;
  3909. }
  3910. static int ip6_pkt_discard(struct sk_buff *skb)
  3911. {
  3912. return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
  3913. }
  3914. static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
  3915. {
  3916. skb->dev = skb_dst_dev(skb);
  3917. return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
  3918. }
  3919. static int ip6_pkt_prohibit(struct sk_buff *skb)
  3920. {
  3921. return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
  3922. }
  3923. static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
  3924. {
  3925. skb->dev = skb_dst_dev(skb);
  3926. return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
  3927. }
  3928. /*
  3929. * Allocate a dst for local (unicast / anycast) address.
  3930. */
  3931. struct fib6_info *addrconf_f6i_alloc(struct net *net,
  3932. struct inet6_dev *idev,
  3933. const struct in6_addr *addr,
  3934. bool anycast, gfp_t gfp_flags,
  3935. struct netlink_ext_ack *extack)
  3936. {
  3937. struct fib6_config cfg = {
  3938. .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
  3939. .fc_ifindex = idev->dev->ifindex,
  3940. .fc_flags = RTF_UP | RTF_NONEXTHOP,
  3941. .fc_dst = *addr,
  3942. .fc_dst_len = 128,
  3943. .fc_protocol = RTPROT_KERNEL,
  3944. .fc_nlinfo.nl_net = net,
  3945. .fc_ignore_dev_down = true,
  3946. };
  3947. struct fib6_info *f6i;
  3948. int err;
  3949. if (anycast) {
  3950. cfg.fc_type = RTN_ANYCAST;
  3951. cfg.fc_flags |= RTF_ANYCAST;
  3952. } else {
  3953. cfg.fc_type = RTN_LOCAL;
  3954. cfg.fc_flags |= RTF_LOCAL;
  3955. }
  3956. f6i = ip6_route_info_create(&cfg, gfp_flags, extack);
  3957. if (IS_ERR(f6i))
  3958. return f6i;
  3959. err = ip6_route_info_create_nh(f6i, &cfg, gfp_flags, extack);
  3960. if (err)
  3961. return ERR_PTR(err);
  3962. f6i->dst_nocount = true;
  3963. if (!anycast &&
  3964. (READ_ONCE(net->ipv6.devconf_all->disable_policy) ||
  3965. READ_ONCE(idev->cnf.disable_policy)))
  3966. f6i->dst_nopolicy = true;
  3967. return f6i;
  3968. }
  3969. /* remove deleted ip from prefsrc entries */
  3970. struct arg_dev_net_ip {
  3971. struct net *net;
  3972. struct in6_addr *addr;
  3973. };
  3974. static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
  3975. {
  3976. struct net *net = ((struct arg_dev_net_ip *)arg)->net;
  3977. struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
  3978. if (!rt->nh &&
  3979. rt != net->ipv6.fib6_null_entry &&
  3980. ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr) &&
  3981. !ipv6_chk_addr(net, addr, rt->fib6_nh->fib_nh_dev, 0)) {
  3982. spin_lock_bh(&rt6_exception_lock);
  3983. /* remove prefsrc entry */
  3984. rt->fib6_prefsrc.plen = 0;
  3985. spin_unlock_bh(&rt6_exception_lock);
  3986. }
  3987. return 0;
  3988. }
  3989. void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
  3990. {
  3991. struct net *net = dev_net(ifp->idev->dev);
  3992. struct arg_dev_net_ip adni = {
  3993. .net = net,
  3994. .addr = &ifp->addr,
  3995. };
  3996. fib6_clean_all(net, fib6_remove_prefsrc, &adni);
  3997. }
  3998. #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
  3999. /* Remove routers and update dst entries when gateway turn into host. */
  4000. static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
  4001. {
  4002. struct in6_addr *gateway = (struct in6_addr *)arg;
  4003. struct fib6_nh *nh;
  4004. /* RA routes do not use nexthops */
  4005. if (rt->nh)
  4006. return 0;
  4007. nh = rt->fib6_nh;
  4008. if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
  4009. nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
  4010. return -1;
  4011. /* Further clean up cached routes in exception table.
  4012. * This is needed because cached route may have a different
  4013. * gateway than its 'parent' in the case of an ip redirect.
  4014. */
  4015. fib6_nh_exceptions_clean_tohost(nh, gateway);
  4016. return 0;
  4017. }
  4018. void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
  4019. {
  4020. fib6_clean_all(net, fib6_clean_tohost, gateway);
  4021. }
  4022. struct arg_netdev_event {
  4023. const struct net_device *dev;
  4024. union {
  4025. unsigned char nh_flags;
  4026. unsigned long event;
  4027. };
  4028. };
  4029. static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
  4030. {
  4031. struct fib6_info *iter;
  4032. struct fib6_node *fn;
  4033. fn = rcu_dereference_protected(rt->fib6_node,
  4034. lockdep_is_held(&rt->fib6_table->tb6_lock));
  4035. iter = rcu_dereference_protected(fn->leaf,
  4036. lockdep_is_held(&rt->fib6_table->tb6_lock));
  4037. while (iter) {
  4038. if (iter->fib6_metric == rt->fib6_metric &&
  4039. rt6_qualify_for_ecmp(iter))
  4040. return iter;
  4041. iter = rcu_dereference_protected(iter->fib6_next,
  4042. lockdep_is_held(&rt->fib6_table->tb6_lock));
  4043. }
  4044. return NULL;
  4045. }
  4046. /* only called for fib entries with builtin fib6_nh */
  4047. static bool rt6_is_dead(const struct fib6_info *rt)
  4048. {
  4049. if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
  4050. (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
  4051. ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
  4052. return true;
  4053. return false;
  4054. }
  4055. static int rt6_multipath_total_weight(const struct fib6_info *rt)
  4056. {
  4057. struct fib6_info *iter;
  4058. int total = 0;
  4059. if (!rt6_is_dead(rt))
  4060. total += rt->fib6_nh->fib_nh_weight;
  4061. list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
  4062. if (!rt6_is_dead(iter))
  4063. total += iter->fib6_nh->fib_nh_weight;
  4064. }
  4065. return total;
  4066. }
  4067. static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
  4068. {
  4069. int upper_bound = -1;
  4070. if (!rt6_is_dead(rt)) {
  4071. *weight += rt->fib6_nh->fib_nh_weight;
  4072. upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
  4073. total) - 1;
  4074. }
  4075. atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
  4076. }
  4077. static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
  4078. {
  4079. struct fib6_info *iter;
  4080. int weight = 0;
  4081. rt6_upper_bound_set(rt, &weight, total);
  4082. list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
  4083. rt6_upper_bound_set(iter, &weight, total);
  4084. }
  4085. void rt6_multipath_rebalance(struct fib6_info *rt)
  4086. {
  4087. struct fib6_info *first;
  4088. int total;
  4089. /* In case the entire multipath route was marked for flushing,
  4090. * then there is no need to rebalance upon the removal of every
  4091. * sibling route.
  4092. */
  4093. if (!rt->fib6_nsiblings || rt->should_flush)
  4094. return;
  4095. /* During lookup routes are evaluated in order, so we need to
  4096. * make sure upper bounds are assigned from the first sibling
  4097. * onwards.
  4098. */
  4099. first = rt6_multipath_first_sibling(rt);
  4100. if (WARN_ON_ONCE(!first))
  4101. return;
  4102. total = rt6_multipath_total_weight(first);
  4103. rt6_multipath_upper_bound_set(first, total);
  4104. }
  4105. static int fib6_ifup(struct fib6_info *rt, void *p_arg)
  4106. {
  4107. const struct arg_netdev_event *arg = p_arg;
  4108. struct net *net = dev_net(arg->dev);
  4109. if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
  4110. rt->fib6_nh->fib_nh_dev == arg->dev) {
  4111. rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
  4112. fib6_update_sernum_upto_root(net, rt);
  4113. rt6_multipath_rebalance(rt);
  4114. }
  4115. return 0;
  4116. }
  4117. void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
  4118. {
  4119. struct arg_netdev_event arg = {
  4120. .dev = dev,
  4121. {
  4122. .nh_flags = nh_flags,
  4123. },
  4124. };
  4125. if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
  4126. arg.nh_flags |= RTNH_F_LINKDOWN;
  4127. fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
  4128. }
  4129. /* only called for fib entries with inline fib6_nh */
  4130. static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
  4131. const struct net_device *dev)
  4132. {
  4133. struct fib6_info *iter;
  4134. if (rt->fib6_nh->fib_nh_dev == dev)
  4135. return true;
  4136. list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
  4137. if (iter->fib6_nh->fib_nh_dev == dev)
  4138. return true;
  4139. return false;
  4140. }
  4141. static void rt6_multipath_flush(struct fib6_info *rt)
  4142. {
  4143. struct fib6_info *iter;
  4144. rt->should_flush = 1;
  4145. list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
  4146. iter->should_flush = 1;
  4147. }
  4148. static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
  4149. const struct net_device *down_dev)
  4150. {
  4151. struct fib6_info *iter;
  4152. unsigned int dead = 0;
  4153. if (rt->fib6_nh->fib_nh_dev == down_dev ||
  4154. rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
  4155. dead++;
  4156. list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
  4157. if (iter->fib6_nh->fib_nh_dev == down_dev ||
  4158. iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
  4159. dead++;
  4160. return dead;
  4161. }
  4162. static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
  4163. const struct net_device *dev,
  4164. unsigned char nh_flags)
  4165. {
  4166. struct fib6_info *iter;
  4167. if (rt->fib6_nh->fib_nh_dev == dev)
  4168. rt->fib6_nh->fib_nh_flags |= nh_flags;
  4169. list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
  4170. if (iter->fib6_nh->fib_nh_dev == dev)
  4171. iter->fib6_nh->fib_nh_flags |= nh_flags;
  4172. }
  4173. /* called with write lock held for table with rt */
  4174. static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
  4175. {
  4176. const struct arg_netdev_event *arg = p_arg;
  4177. const struct net_device *dev = arg->dev;
  4178. struct net *net = dev_net(dev);
  4179. if (rt == net->ipv6.fib6_null_entry || rt->nh)
  4180. return 0;
  4181. switch (arg->event) {
  4182. case NETDEV_UNREGISTER:
  4183. return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
  4184. case NETDEV_DOWN:
  4185. if (rt->should_flush)
  4186. return -1;
  4187. if (!rt->fib6_nsiblings)
  4188. return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
  4189. if (rt6_multipath_uses_dev(rt, dev)) {
  4190. unsigned int count;
  4191. count = rt6_multipath_dead_count(rt, dev);
  4192. if (rt->fib6_nsiblings + 1 == count) {
  4193. rt6_multipath_flush(rt);
  4194. return -1;
  4195. }
  4196. rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
  4197. RTNH_F_LINKDOWN);
  4198. fib6_update_sernum(net, rt);
  4199. rt6_multipath_rebalance(rt);
  4200. }
  4201. return -2;
  4202. case NETDEV_CHANGE:
  4203. if (rt->fib6_nh->fib_nh_dev != dev ||
  4204. rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
  4205. break;
  4206. rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
  4207. rt6_multipath_rebalance(rt);
  4208. break;
  4209. }
  4210. return 0;
  4211. }
  4212. void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
  4213. {
  4214. struct arg_netdev_event arg = {
  4215. .dev = dev,
  4216. {
  4217. .event = event,
  4218. },
  4219. };
  4220. struct net *net = dev_net(dev);
  4221. if (READ_ONCE(net->ipv6.sysctl.skip_notify_on_dev_down))
  4222. fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
  4223. else
  4224. fib6_clean_all(net, fib6_ifdown, &arg);
  4225. }
  4226. void rt6_disable_ip(struct net_device *dev, unsigned long event)
  4227. {
  4228. rt6_sync_down_dev(dev, event);
  4229. rt6_uncached_list_flush_dev(dev);
  4230. neigh_ifdown(&nd_tbl, dev);
  4231. }
  4232. struct rt6_mtu_change_arg {
  4233. struct net_device *dev;
  4234. unsigned int mtu;
  4235. struct fib6_info *f6i;
  4236. };
  4237. static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
  4238. {
  4239. struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
  4240. struct fib6_info *f6i = arg->f6i;
  4241. /* For administrative MTU increase, there is no way to discover
  4242. * IPv6 PMTU increase, so PMTU increase should be updated here.
  4243. * Since RFC 1981 doesn't include administrative MTU increase
  4244. * update PMTU increase is a MUST. (i.e. jumbo frame)
  4245. */
  4246. if (nh->fib_nh_dev == arg->dev) {
  4247. struct inet6_dev *idev = __in6_dev_get(arg->dev);
  4248. u32 mtu = f6i->fib6_pmtu;
  4249. if (mtu >= arg->mtu ||
  4250. (mtu < arg->mtu && mtu == idev->cnf.mtu6))
  4251. fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
  4252. spin_lock_bh(&rt6_exception_lock);
  4253. rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
  4254. spin_unlock_bh(&rt6_exception_lock);
  4255. }
  4256. return 0;
  4257. }
  4258. static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
  4259. {
  4260. struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
  4261. struct inet6_dev *idev;
  4262. /* In IPv6 pmtu discovery is not optional,
  4263. so that RTAX_MTU lock cannot disable it.
  4264. We still use this lock to block changes
  4265. caused by addrconf/ndisc.
  4266. */
  4267. idev = __in6_dev_get(arg->dev);
  4268. if (!idev)
  4269. return 0;
  4270. if (fib6_metric_locked(f6i, RTAX_MTU))
  4271. return 0;
  4272. arg->f6i = f6i;
  4273. if (f6i->nh) {
  4274. /* fib6_nh_mtu_change only returns 0, so this is safe */
  4275. return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
  4276. arg);
  4277. }
  4278. return fib6_nh_mtu_change(f6i->fib6_nh, arg);
  4279. }
  4280. void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
  4281. {
  4282. struct rt6_mtu_change_arg arg = {
  4283. .dev = dev,
  4284. .mtu = mtu,
  4285. };
  4286. fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
  4287. }
  4288. static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
  4289. [RTA_UNSPEC] = { .strict_start_type = RTA_DPORT + 1 },
  4290. [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
  4291. [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
  4292. [RTA_OIF] = { .type = NLA_U32 },
  4293. [RTA_IIF] = { .type = NLA_U32 },
  4294. [RTA_PRIORITY] = { .type = NLA_U32 },
  4295. [RTA_METRICS] = { .type = NLA_NESTED },
  4296. [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
  4297. [RTA_PREF] = { .type = NLA_U8 },
  4298. [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
  4299. [RTA_ENCAP] = { .type = NLA_NESTED },
  4300. [RTA_EXPIRES] = { .type = NLA_U32 },
  4301. [RTA_UID] = { .type = NLA_U32 },
  4302. [RTA_MARK] = { .type = NLA_U32 },
  4303. [RTA_TABLE] = { .type = NLA_U32 },
  4304. [RTA_IP_PROTO] = { .type = NLA_U8 },
  4305. [RTA_SPORT] = { .type = NLA_U16 },
  4306. [RTA_DPORT] = { .type = NLA_U16 },
  4307. [RTA_NH_ID] = { .type = NLA_U32 },
  4308. [RTA_FLOWLABEL] = { .type = NLA_BE32 },
  4309. };
  4310. static int rtm_to_fib6_multipath_config(struct fib6_config *cfg,
  4311. struct netlink_ext_ack *extack,
  4312. bool newroute)
  4313. {
  4314. struct rtnexthop *rtnh;
  4315. int remaining;
  4316. remaining = cfg->fc_mp_len;
  4317. rtnh = (struct rtnexthop *)cfg->fc_mp;
  4318. if (!rtnh_ok(rtnh, remaining)) {
  4319. NL_SET_ERR_MSG(extack, "Invalid nexthop configuration - no valid nexthops");
  4320. return -EINVAL;
  4321. }
  4322. do {
  4323. bool has_gateway = cfg->fc_flags & RTF_GATEWAY;
  4324. int attrlen = rtnh_attrlen(rtnh);
  4325. if (attrlen > 0) {
  4326. struct nlattr *nla, *attrs;
  4327. attrs = rtnh_attrs(rtnh);
  4328. nla = nla_find(attrs, attrlen, RTA_GATEWAY);
  4329. if (nla) {
  4330. if (nla_len(nla) < sizeof(cfg->fc_gateway)) {
  4331. NL_SET_ERR_MSG(extack,
  4332. "Invalid IPv6 address in RTA_GATEWAY");
  4333. return -EINVAL;
  4334. }
  4335. has_gateway = true;
  4336. }
  4337. }
  4338. if (newroute && (cfg->fc_nh_id || !has_gateway)) {
  4339. NL_SET_ERR_MSG(extack,
  4340. "Device only routes can not be added for IPv6 using the multipath API.");
  4341. return -EINVAL;
  4342. }
  4343. rtnh = rtnh_next(rtnh, &remaining);
  4344. } while (rtnh_ok(rtnh, remaining));
  4345. return lwtunnel_valid_encap_type_attr(cfg->fc_mp, cfg->fc_mp_len, extack);
  4346. }
  4347. static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
  4348. struct fib6_config *cfg,
  4349. struct netlink_ext_ack *extack)
  4350. {
  4351. bool newroute = nlh->nlmsg_type == RTM_NEWROUTE;
  4352. struct nlattr *tb[RTA_MAX+1];
  4353. struct rtmsg *rtm;
  4354. unsigned int pref;
  4355. int err;
  4356. err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
  4357. rtm_ipv6_policy, extack);
  4358. if (err < 0)
  4359. goto errout;
  4360. err = -EINVAL;
  4361. rtm = nlmsg_data(nlh);
  4362. if (rtm->rtm_tos) {
  4363. NL_SET_ERR_MSG(extack,
  4364. "Invalid dsfield (tos): option not available for IPv6");
  4365. goto errout;
  4366. }
  4367. if (tb[RTA_FLOWLABEL]) {
  4368. NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
  4369. "Flow label cannot be specified for this operation");
  4370. goto errout;
  4371. }
  4372. *cfg = (struct fib6_config){
  4373. .fc_table = rtm->rtm_table,
  4374. .fc_dst_len = rtm->rtm_dst_len,
  4375. .fc_src_len = rtm->rtm_src_len,
  4376. .fc_flags = RTF_UP,
  4377. .fc_protocol = rtm->rtm_protocol,
  4378. .fc_type = rtm->rtm_type,
  4379. .fc_nlinfo.portid = NETLINK_CB(skb).portid,
  4380. .fc_nlinfo.nlh = nlh,
  4381. .fc_nlinfo.nl_net = sock_net(skb->sk),
  4382. };
  4383. if (rtm->rtm_type == RTN_UNREACHABLE ||
  4384. rtm->rtm_type == RTN_BLACKHOLE ||
  4385. rtm->rtm_type == RTN_PROHIBIT ||
  4386. rtm->rtm_type == RTN_THROW)
  4387. cfg->fc_flags |= RTF_REJECT;
  4388. if (rtm->rtm_type == RTN_LOCAL)
  4389. cfg->fc_flags |= RTF_LOCAL;
  4390. if (rtm->rtm_flags & RTM_F_CLONED)
  4391. cfg->fc_flags |= RTF_CACHE;
  4392. cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
  4393. if (tb[RTA_NH_ID]) {
  4394. if (tb[RTA_GATEWAY] || tb[RTA_OIF] ||
  4395. tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
  4396. NL_SET_ERR_MSG(extack,
  4397. "Nexthop specification and nexthop id are mutually exclusive");
  4398. goto errout;
  4399. }
  4400. cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
  4401. }
  4402. if (tb[RTA_GATEWAY]) {
  4403. cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
  4404. cfg->fc_flags |= RTF_GATEWAY;
  4405. }
  4406. if (tb[RTA_VIA]) {
  4407. NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
  4408. goto errout;
  4409. }
  4410. if (tb[RTA_DST]) {
  4411. int plen = (rtm->rtm_dst_len + 7) >> 3;
  4412. if (nla_len(tb[RTA_DST]) < plen)
  4413. goto errout;
  4414. nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
  4415. }
  4416. if (tb[RTA_SRC]) {
  4417. int plen = (rtm->rtm_src_len + 7) >> 3;
  4418. if (nla_len(tb[RTA_SRC]) < plen)
  4419. goto errout;
  4420. nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
  4421. }
  4422. if (tb[RTA_PREFSRC])
  4423. cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
  4424. if (tb[RTA_OIF])
  4425. cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
  4426. if (tb[RTA_PRIORITY])
  4427. cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
  4428. if (tb[RTA_METRICS]) {
  4429. cfg->fc_mx = nla_data(tb[RTA_METRICS]);
  4430. cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
  4431. }
  4432. if (tb[RTA_TABLE])
  4433. cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
  4434. if (tb[RTA_MULTIPATH]) {
  4435. cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
  4436. cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
  4437. err = rtm_to_fib6_multipath_config(cfg, extack, newroute);
  4438. if (err < 0)
  4439. goto errout;
  4440. }
  4441. if (tb[RTA_PREF]) {
  4442. pref = nla_get_u8(tb[RTA_PREF]);
  4443. if (pref != ICMPV6_ROUTER_PREF_LOW &&
  4444. pref != ICMPV6_ROUTER_PREF_HIGH)
  4445. pref = ICMPV6_ROUTER_PREF_MEDIUM;
  4446. cfg->fc_flags |= RTF_PREF(pref);
  4447. }
  4448. if (tb[RTA_ENCAP])
  4449. cfg->fc_encap = tb[RTA_ENCAP];
  4450. if (tb[RTA_ENCAP_TYPE]) {
  4451. cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
  4452. err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
  4453. if (err < 0)
  4454. goto errout;
  4455. }
  4456. if (tb[RTA_EXPIRES]) {
  4457. unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
  4458. if (addrconf_finite_timeout(timeout)) {
  4459. cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
  4460. cfg->fc_flags |= RTF_EXPIRES;
  4461. }
  4462. }
  4463. err = 0;
  4464. errout:
  4465. return err;
  4466. }
  4467. struct rt6_nh {
  4468. struct fib6_info *fib6_info;
  4469. struct fib6_config r_cfg;
  4470. struct list_head list;
  4471. };
  4472. static int ip6_route_info_append(struct list_head *rt6_nh_list,
  4473. struct fib6_info *rt,
  4474. struct fib6_config *r_cfg)
  4475. {
  4476. struct rt6_nh *nh;
  4477. list_for_each_entry(nh, rt6_nh_list, list) {
  4478. /* check if fib6_info already exists */
  4479. if (rt6_duplicate_nexthop(nh->fib6_info, rt))
  4480. return -EEXIST;
  4481. }
  4482. nh = kzalloc_obj(*nh);
  4483. if (!nh)
  4484. return -ENOMEM;
  4485. nh->fib6_info = rt;
  4486. memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
  4487. list_add_tail(&nh->list, rt6_nh_list);
  4488. return 0;
  4489. }
  4490. static void ip6_route_mpath_notify(struct fib6_info *rt,
  4491. struct fib6_info *rt_last,
  4492. struct nl_info *info,
  4493. __u16 nlflags)
  4494. {
  4495. /* if this is an APPEND route, then rt points to the first route
  4496. * inserted and rt_last points to last route inserted. Userspace
  4497. * wants a consistent dump of the route which starts at the first
  4498. * nexthop. Since sibling routes are always added at the end of
  4499. * the list, find the first sibling of the last route appended
  4500. */
  4501. rcu_read_lock();
  4502. if ((nlflags & NLM_F_APPEND) && rt_last &&
  4503. READ_ONCE(rt_last->fib6_nsiblings)) {
  4504. rt = list_first_or_null_rcu(&rt_last->fib6_siblings,
  4505. struct fib6_info,
  4506. fib6_siblings);
  4507. }
  4508. if (rt)
  4509. inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
  4510. rcu_read_unlock();
  4511. }
  4512. static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
  4513. {
  4514. bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
  4515. bool should_notify = false;
  4516. struct fib6_info *leaf;
  4517. struct fib6_node *fn;
  4518. rcu_read_lock();
  4519. fn = rcu_dereference(rt->fib6_node);
  4520. if (!fn)
  4521. goto out;
  4522. leaf = rcu_dereference(fn->leaf);
  4523. if (!leaf)
  4524. goto out;
  4525. if (rt == leaf ||
  4526. (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric &&
  4527. rt6_qualify_for_ecmp(leaf)))
  4528. should_notify = true;
  4529. out:
  4530. rcu_read_unlock();
  4531. return should_notify;
  4532. }
  4533. static int ip6_route_multipath_add(struct fib6_config *cfg,
  4534. struct netlink_ext_ack *extack)
  4535. {
  4536. struct fib6_info *rt_notif = NULL, *rt_last = NULL;
  4537. struct nl_info *info = &cfg->fc_nlinfo;
  4538. struct rt6_nh *nh, *nh_safe;
  4539. struct fib6_config r_cfg;
  4540. struct rtnexthop *rtnh;
  4541. LIST_HEAD(rt6_nh_list);
  4542. struct rt6_nh *err_nh;
  4543. struct fib6_info *rt;
  4544. __u16 nlflags;
  4545. int remaining;
  4546. int attrlen;
  4547. int replace;
  4548. int nhn = 0;
  4549. int err;
  4550. err = fib6_config_validate(cfg, extack);
  4551. if (err)
  4552. return err;
  4553. replace = (cfg->fc_nlinfo.nlh &&
  4554. (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
  4555. nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
  4556. if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
  4557. nlflags |= NLM_F_APPEND;
  4558. remaining = cfg->fc_mp_len;
  4559. rtnh = (struct rtnexthop *)cfg->fc_mp;
  4560. /* Parse a Multipath Entry and build a list (rt6_nh_list) of
  4561. * fib6_info structs per nexthop
  4562. */
  4563. while (rtnh_ok(rtnh, remaining)) {
  4564. memcpy(&r_cfg, cfg, sizeof(*cfg));
  4565. if (rtnh->rtnh_ifindex)
  4566. r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
  4567. attrlen = rtnh_attrlen(rtnh);
  4568. if (attrlen > 0) {
  4569. struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
  4570. nla = nla_find(attrs, attrlen, RTA_GATEWAY);
  4571. if (nla) {
  4572. r_cfg.fc_gateway = nla_get_in6_addr(nla);
  4573. r_cfg.fc_flags |= RTF_GATEWAY;
  4574. }
  4575. r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
  4576. nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
  4577. if (nla)
  4578. r_cfg.fc_encap_type = nla_get_u16(nla);
  4579. }
  4580. r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
  4581. rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
  4582. if (IS_ERR(rt)) {
  4583. err = PTR_ERR(rt);
  4584. rt = NULL;
  4585. goto cleanup;
  4586. }
  4587. err = ip6_route_info_create_nh(rt, &r_cfg, GFP_KERNEL, extack);
  4588. if (err) {
  4589. rt = NULL;
  4590. goto cleanup;
  4591. }
  4592. rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
  4593. err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
  4594. if (err) {
  4595. fib6_info_release(rt);
  4596. goto cleanup;
  4597. }
  4598. rtnh = rtnh_next(rtnh, &remaining);
  4599. }
  4600. /* for add and replace send one notification with all nexthops.
  4601. * Skip the notification in fib6_add_rt2node and send one with
  4602. * the full route when done
  4603. */
  4604. info->skip_notify = 1;
  4605. /* For add and replace, send one notification with all nexthops. For
  4606. * append, send one notification with all appended nexthops.
  4607. */
  4608. info->skip_notify_kernel = 1;
  4609. err_nh = NULL;
  4610. list_for_each_entry(nh, &rt6_nh_list, list) {
  4611. err = __ip6_ins_rt(nh->fib6_info, info, extack);
  4612. if (err) {
  4613. if (replace && nhn)
  4614. NL_SET_ERR_MSG_MOD(extack,
  4615. "multipath route replace failed (check consistency of installed routes)");
  4616. err_nh = nh;
  4617. goto add_errout;
  4618. }
  4619. /* save reference to last route successfully inserted */
  4620. rt_last = nh->fib6_info;
  4621. /* save reference to first route for notification */
  4622. if (!rt_notif)
  4623. rt_notif = nh->fib6_info;
  4624. /* Because each route is added like a single route we remove
  4625. * these flags after the first nexthop: if there is a collision,
  4626. * we have already failed to add the first nexthop:
  4627. * fib6_add_rt2node() has rejected it; when replacing, old
  4628. * nexthops have been replaced by first new, the rest should
  4629. * be added to it.
  4630. */
  4631. if (cfg->fc_nlinfo.nlh) {
  4632. cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
  4633. NLM_F_REPLACE);
  4634. cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
  4635. }
  4636. nhn++;
  4637. }
  4638. /* An in-kernel notification should only be sent in case the new
  4639. * multipath route is added as the first route in the node, or if
  4640. * it was appended to it. We pass 'rt_notif' since it is the first
  4641. * sibling and might allow us to skip some checks in the replace case.
  4642. */
  4643. if (ip6_route_mpath_should_notify(rt_notif)) {
  4644. enum fib_event_type fib_event;
  4645. if (rt_notif->fib6_nsiblings != nhn - 1)
  4646. fib_event = FIB_EVENT_ENTRY_APPEND;
  4647. else
  4648. fib_event = FIB_EVENT_ENTRY_REPLACE;
  4649. err = call_fib6_multipath_entry_notifiers(info->nl_net,
  4650. fib_event, rt_notif,
  4651. nhn - 1, extack);
  4652. if (err) {
  4653. /* Delete all the siblings that were just added */
  4654. err_nh = NULL;
  4655. goto add_errout;
  4656. }
  4657. }
  4658. /* success ... tell user about new route */
  4659. ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
  4660. goto cleanup;
  4661. add_errout:
  4662. /* send notification for routes that were added so that
  4663. * the delete notifications sent by ip6_route_del are
  4664. * coherent
  4665. */
  4666. if (rt_notif)
  4667. ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
  4668. /* Delete routes that were already added */
  4669. list_for_each_entry(nh, &rt6_nh_list, list) {
  4670. if (err_nh == nh)
  4671. break;
  4672. ip6_route_del(&nh->r_cfg, extack);
  4673. }
  4674. cleanup:
  4675. list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, list) {
  4676. fib6_info_release(nh->fib6_info);
  4677. list_del(&nh->list);
  4678. kfree(nh);
  4679. }
  4680. return err;
  4681. }
  4682. static int ip6_route_multipath_del(struct fib6_config *cfg,
  4683. struct netlink_ext_ack *extack)
  4684. {
  4685. struct fib6_config r_cfg;
  4686. struct rtnexthop *rtnh;
  4687. int last_err = 0;
  4688. int remaining;
  4689. int attrlen;
  4690. int err;
  4691. remaining = cfg->fc_mp_len;
  4692. rtnh = (struct rtnexthop *)cfg->fc_mp;
  4693. /* Parse a Multipath Entry */
  4694. while (rtnh_ok(rtnh, remaining)) {
  4695. memcpy(&r_cfg, cfg, sizeof(*cfg));
  4696. if (rtnh->rtnh_ifindex)
  4697. r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
  4698. attrlen = rtnh_attrlen(rtnh);
  4699. if (attrlen > 0) {
  4700. struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
  4701. nla = nla_find(attrs, attrlen, RTA_GATEWAY);
  4702. if (nla) {
  4703. r_cfg.fc_gateway = nla_get_in6_addr(nla);
  4704. r_cfg.fc_flags |= RTF_GATEWAY;
  4705. }
  4706. }
  4707. err = ip6_route_del(&r_cfg, extack);
  4708. if (err)
  4709. last_err = err;
  4710. rtnh = rtnh_next(rtnh, &remaining);
  4711. }
  4712. return last_err;
  4713. }
  4714. static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
  4715. struct netlink_ext_ack *extack)
  4716. {
  4717. struct fib6_config cfg;
  4718. int err;
  4719. err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
  4720. if (err < 0)
  4721. return err;
  4722. if (cfg.fc_nh_id) {
  4723. rcu_read_lock();
  4724. err = !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id);
  4725. rcu_read_unlock();
  4726. if (err) {
  4727. NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
  4728. return -EINVAL;
  4729. }
  4730. }
  4731. if (cfg.fc_mp) {
  4732. return ip6_route_multipath_del(&cfg, extack);
  4733. } else {
  4734. cfg.fc_delete_all_nh = 1;
  4735. return ip6_route_del(&cfg, extack);
  4736. }
  4737. }
  4738. static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
  4739. struct netlink_ext_ack *extack)
  4740. {
  4741. struct fib6_config cfg;
  4742. int err;
  4743. err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
  4744. if (err < 0)
  4745. return err;
  4746. if (cfg.fc_metric == 0)
  4747. cfg.fc_metric = IP6_RT_PRIO_USER;
  4748. if (cfg.fc_mp)
  4749. return ip6_route_multipath_add(&cfg, extack);
  4750. else
  4751. return ip6_route_add(&cfg, GFP_KERNEL, extack);
  4752. }
  4753. /* add the overhead of this fib6_nh to nexthop_len */
  4754. static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
  4755. {
  4756. int *nexthop_len = arg;
  4757. *nexthop_len += nla_total_size(0) /* RTA_MULTIPATH */
  4758. + NLA_ALIGN(sizeof(struct rtnexthop))
  4759. + nla_total_size(16); /* RTA_GATEWAY */
  4760. if (nh->fib_nh_lws) {
  4761. /* RTA_ENCAP_TYPE */
  4762. *nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
  4763. /* RTA_ENCAP */
  4764. *nexthop_len += nla_total_size(2);
  4765. }
  4766. return 0;
  4767. }
  4768. static size_t rt6_nlmsg_size(struct fib6_info *f6i)
  4769. {
  4770. struct fib6_info *sibling;
  4771. struct fib6_nh *nh;
  4772. int nexthop_len;
  4773. if (f6i->nh) {
  4774. nexthop_len = nla_total_size(4); /* RTA_NH_ID */
  4775. nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
  4776. &nexthop_len);
  4777. goto common;
  4778. }
  4779. rcu_read_lock();
  4780. retry:
  4781. nh = f6i->fib6_nh;
  4782. nexthop_len = 0;
  4783. if (READ_ONCE(f6i->fib6_nsiblings)) {
  4784. rt6_nh_nlmsg_size(nh, &nexthop_len);
  4785. list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
  4786. fib6_siblings) {
  4787. rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
  4788. if (!READ_ONCE(f6i->fib6_nsiblings))
  4789. goto retry;
  4790. }
  4791. }
  4792. rcu_read_unlock();
  4793. nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
  4794. common:
  4795. return NLMSG_ALIGN(sizeof(struct rtmsg))
  4796. + nla_total_size(16) /* RTA_SRC */
  4797. + nla_total_size(16) /* RTA_DST */
  4798. + nla_total_size(16) /* RTA_GATEWAY */
  4799. + nla_total_size(16) /* RTA_PREFSRC */
  4800. + nla_total_size(4) /* RTA_TABLE */
  4801. + nla_total_size(4) /* RTA_IIF */
  4802. + nla_total_size(4) /* RTA_OIF */
  4803. + nla_total_size(4) /* RTA_PRIORITY */
  4804. + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
  4805. + nla_total_size(sizeof(struct rta_cacheinfo))
  4806. + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
  4807. + nla_total_size(1) /* RTA_PREF */
  4808. + nexthop_len;
  4809. }
  4810. static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
  4811. unsigned char *flags)
  4812. {
  4813. if (nexthop_is_multipath(nh)) {
  4814. struct nlattr *mp;
  4815. mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
  4816. if (!mp)
  4817. goto nla_put_failure;
  4818. if (nexthop_mpath_fill_node(skb, nh, AF_INET6))
  4819. goto nla_put_failure;
  4820. nla_nest_end(skb, mp);
  4821. } else {
  4822. struct fib6_nh *fib6_nh;
  4823. fib6_nh = nexthop_fib6_nh(nh);
  4824. if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6,
  4825. flags, false) < 0)
  4826. goto nla_put_failure;
  4827. }
  4828. return 0;
  4829. nla_put_failure:
  4830. return -EMSGSIZE;
  4831. }
  4832. static int rt6_fill_node(struct net *net, struct sk_buff *skb,
  4833. struct fib6_info *rt, struct dst_entry *dst,
  4834. struct in6_addr *dest, struct in6_addr *src,
  4835. int iif, int type, u32 portid, u32 seq,
  4836. unsigned int flags)
  4837. {
  4838. struct rt6_info *rt6 = dst_rt6_info(dst);
  4839. struct rt6key *rt6_dst, *rt6_src;
  4840. u32 *pmetrics, table, rt6_flags;
  4841. unsigned char nh_flags = 0;
  4842. struct nlmsghdr *nlh;
  4843. struct rtmsg *rtm;
  4844. long expires = 0;
  4845. nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
  4846. if (!nlh)
  4847. return -EMSGSIZE;
  4848. if (rt6) {
  4849. rt6_dst = &rt6->rt6i_dst;
  4850. rt6_src = &rt6->rt6i_src;
  4851. rt6_flags = rt6->rt6i_flags;
  4852. } else {
  4853. rt6_dst = &rt->fib6_dst;
  4854. rt6_src = &rt->fib6_src;
  4855. rt6_flags = rt->fib6_flags;
  4856. }
  4857. rtm = nlmsg_data(nlh);
  4858. rtm->rtm_family = AF_INET6;
  4859. rtm->rtm_dst_len = rt6_dst->plen;
  4860. rtm->rtm_src_len = rt6_src->plen;
  4861. rtm->rtm_tos = 0;
  4862. if (rt->fib6_table)
  4863. table = rt->fib6_table->tb6_id;
  4864. else
  4865. table = RT6_TABLE_UNSPEC;
  4866. rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
  4867. if (nla_put_u32(skb, RTA_TABLE, table))
  4868. goto nla_put_failure;
  4869. rtm->rtm_type = rt->fib6_type;
  4870. rtm->rtm_flags = 0;
  4871. rtm->rtm_scope = RT_SCOPE_UNIVERSE;
  4872. rtm->rtm_protocol = rt->fib6_protocol;
  4873. if (rt6_flags & RTF_CACHE)
  4874. rtm->rtm_flags |= RTM_F_CLONED;
  4875. if (dest) {
  4876. if (nla_put_in6_addr(skb, RTA_DST, dest))
  4877. goto nla_put_failure;
  4878. rtm->rtm_dst_len = 128;
  4879. } else if (rtm->rtm_dst_len)
  4880. if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
  4881. goto nla_put_failure;
  4882. #ifdef CONFIG_IPV6_SUBTREES
  4883. if (src) {
  4884. if (nla_put_in6_addr(skb, RTA_SRC, src))
  4885. goto nla_put_failure;
  4886. rtm->rtm_src_len = 128;
  4887. } else if (rtm->rtm_src_len &&
  4888. nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
  4889. goto nla_put_failure;
  4890. #endif
  4891. if (iif) {
  4892. #ifdef CONFIG_IPV6_MROUTE
  4893. if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
  4894. int err = ip6mr_get_route(net, skb, rtm, portid);
  4895. if (err == 0)
  4896. return 0;
  4897. if (err < 0)
  4898. goto nla_put_failure;
  4899. } else
  4900. #endif
  4901. if (nla_put_u32(skb, RTA_IIF, iif))
  4902. goto nla_put_failure;
  4903. } else if (dest) {
  4904. struct in6_addr saddr_buf;
  4905. if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 &&
  4906. nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
  4907. goto nla_put_failure;
  4908. }
  4909. if (rt->fib6_prefsrc.plen) {
  4910. struct in6_addr saddr_buf;
  4911. saddr_buf = rt->fib6_prefsrc.addr;
  4912. if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
  4913. goto nla_put_failure;
  4914. }
  4915. pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
  4916. if (rtnetlink_put_metrics(skb, pmetrics) < 0)
  4917. goto nla_put_failure;
  4918. if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
  4919. goto nla_put_failure;
  4920. /* For multipath routes, walk the siblings list and add
  4921. * each as a nexthop within RTA_MULTIPATH.
  4922. */
  4923. if (rt6) {
  4924. struct net_device *dev;
  4925. if (rt6_flags & RTF_GATEWAY &&
  4926. nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
  4927. goto nla_put_failure;
  4928. dev = dst_dev(dst);
  4929. if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
  4930. goto nla_put_failure;
  4931. if (lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
  4932. goto nla_put_failure;
  4933. } else if (READ_ONCE(rt->fib6_nsiblings)) {
  4934. struct fib6_info *sibling;
  4935. struct nlattr *mp;
  4936. mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
  4937. if (!mp)
  4938. goto nla_put_failure;
  4939. if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
  4940. rt->fib6_nh->fib_nh_weight, AF_INET6,
  4941. 0) < 0)
  4942. goto nla_put_failure;
  4943. rcu_read_lock();
  4944. list_for_each_entry_rcu(sibling, &rt->fib6_siblings,
  4945. fib6_siblings) {
  4946. if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
  4947. sibling->fib6_nh->fib_nh_weight,
  4948. AF_INET6, 0) < 0) {
  4949. rcu_read_unlock();
  4950. goto nla_put_failure;
  4951. }
  4952. }
  4953. rcu_read_unlock();
  4954. nla_nest_end(skb, mp);
  4955. } else if (rt->nh) {
  4956. if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
  4957. goto nla_put_failure;
  4958. if (nexthop_is_blackhole(rt->nh))
  4959. rtm->rtm_type = RTN_BLACKHOLE;
  4960. if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&
  4961. rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
  4962. goto nla_put_failure;
  4963. rtm->rtm_flags |= nh_flags;
  4964. } else {
  4965. if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6,
  4966. &nh_flags, false) < 0)
  4967. goto nla_put_failure;
  4968. rtm->rtm_flags |= nh_flags;
  4969. }
  4970. if (rt6_flags & RTF_EXPIRES) {
  4971. expires = dst ? READ_ONCE(dst->expires) : rt->expires;
  4972. expires -= jiffies;
  4973. }
  4974. if (!dst) {
  4975. if (READ_ONCE(rt->offload))
  4976. rtm->rtm_flags |= RTM_F_OFFLOAD;
  4977. if (READ_ONCE(rt->trap))
  4978. rtm->rtm_flags |= RTM_F_TRAP;
  4979. if (READ_ONCE(rt->offload_failed))
  4980. rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
  4981. }
  4982. if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
  4983. goto nla_put_failure;
  4984. if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
  4985. goto nla_put_failure;
  4986. nlmsg_end(skb, nlh);
  4987. return 0;
  4988. nla_put_failure:
  4989. nlmsg_cancel(skb, nlh);
  4990. return -EMSGSIZE;
  4991. }
  4992. static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
  4993. {
  4994. const struct net_device *dev = arg;
  4995. if (nh->fib_nh_dev == dev)
  4996. return 1;
  4997. return 0;
  4998. }
  4999. static bool fib6_info_uses_dev(const struct fib6_info *f6i,
  5000. const struct net_device *dev)
  5001. {
  5002. if (f6i->nh) {
  5003. struct net_device *_dev = (struct net_device *)dev;
  5004. return !!nexthop_for_each_fib6_nh(f6i->nh,
  5005. fib6_info_nh_uses_dev,
  5006. _dev);
  5007. }
  5008. if (f6i->fib6_nh->fib_nh_dev == dev)
  5009. return true;
  5010. if (READ_ONCE(f6i->fib6_nsiblings)) {
  5011. const struct fib6_info *sibling;
  5012. rcu_read_lock();
  5013. list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
  5014. fib6_siblings) {
  5015. if (sibling->fib6_nh->fib_nh_dev == dev) {
  5016. rcu_read_unlock();
  5017. return true;
  5018. }
  5019. if (!READ_ONCE(f6i->fib6_nsiblings))
  5020. break;
  5021. }
  5022. rcu_read_unlock();
  5023. }
  5024. return false;
  5025. }
  5026. struct fib6_nh_exception_dump_walker {
  5027. struct rt6_rtnl_dump_arg *dump;
  5028. struct fib6_info *rt;
  5029. unsigned int flags;
  5030. unsigned int skip;
  5031. unsigned int count;
  5032. };
  5033. static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
  5034. {
  5035. struct fib6_nh_exception_dump_walker *w = arg;
  5036. struct rt6_rtnl_dump_arg *dump = w->dump;
  5037. struct rt6_exception_bucket *bucket;
  5038. struct rt6_exception *rt6_ex;
  5039. int i, err;
  5040. bucket = fib6_nh_get_excptn_bucket(nh, NULL);
  5041. if (!bucket)
  5042. return 0;
  5043. for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
  5044. hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
  5045. if (w->skip) {
  5046. w->skip--;
  5047. continue;
  5048. }
  5049. /* Expiration of entries doesn't bump sernum, insertion
  5050. * does. Removal is triggered by insertion, so we can
  5051. * rely on the fact that if entries change between two
  5052. * partial dumps, this node is scanned again completely,
  5053. * see rt6_insert_exception() and fib6_dump_table().
  5054. *
  5055. * Count expired entries we go through as handled
  5056. * entries that we'll skip next time, in case of partial
  5057. * node dump. Otherwise, if entries expire meanwhile,
  5058. * we'll skip the wrong amount.
  5059. */
  5060. if (rt6_check_expired(rt6_ex->rt6i)) {
  5061. w->count++;
  5062. continue;
  5063. }
  5064. err = rt6_fill_node(dump->net, dump->skb, w->rt,
  5065. &rt6_ex->rt6i->dst, NULL, NULL, 0,
  5066. RTM_NEWROUTE,
  5067. NETLINK_CB(dump->cb->skb).portid,
  5068. dump->cb->nlh->nlmsg_seq, w->flags);
  5069. if (err)
  5070. return err;
  5071. w->count++;
  5072. }
  5073. bucket++;
  5074. }
  5075. return 0;
  5076. }
  5077. /* Return -1 if done with node, number of handled routes on partial dump */
  5078. int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
  5079. {
  5080. struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
  5081. struct fib_dump_filter *filter = &arg->filter;
  5082. unsigned int flags = NLM_F_MULTI;
  5083. struct net *net = arg->net;
  5084. int count = 0;
  5085. if (rt == net->ipv6.fib6_null_entry)
  5086. return -1;
  5087. if ((filter->flags & RTM_F_PREFIX) &&
  5088. !(rt->fib6_flags & RTF_PREFIX_RT)) {
  5089. /* success since this is not a prefix route */
  5090. return -1;
  5091. }
  5092. if (filter->filter_set &&
  5093. ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
  5094. (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
  5095. (filter->protocol && rt->fib6_protocol != filter->protocol))) {
  5096. return -1;
  5097. }
  5098. if (filter->filter_set ||
  5099. !filter->dump_routes || !filter->dump_exceptions) {
  5100. flags |= NLM_F_DUMP_FILTERED;
  5101. }
  5102. if (filter->dump_routes) {
  5103. if (skip) {
  5104. skip--;
  5105. } else {
  5106. if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
  5107. 0, RTM_NEWROUTE,
  5108. NETLINK_CB(arg->cb->skb).portid,
  5109. arg->cb->nlh->nlmsg_seq, flags)) {
  5110. return 0;
  5111. }
  5112. count++;
  5113. }
  5114. }
  5115. if (filter->dump_exceptions) {
  5116. struct fib6_nh_exception_dump_walker w = { .dump = arg,
  5117. .rt = rt,
  5118. .flags = flags,
  5119. .skip = skip,
  5120. .count = 0 };
  5121. int err;
  5122. rcu_read_lock();
  5123. if (rt->nh) {
  5124. err = nexthop_for_each_fib6_nh(rt->nh,
  5125. rt6_nh_dump_exceptions,
  5126. &w);
  5127. } else {
  5128. err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
  5129. }
  5130. rcu_read_unlock();
  5131. if (err)
  5132. return count + w.count;
  5133. }
  5134. return -1;
  5135. }
  5136. static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
  5137. const struct nlmsghdr *nlh,
  5138. struct nlattr **tb,
  5139. struct netlink_ext_ack *extack)
  5140. {
  5141. struct rtmsg *rtm;
  5142. int i, err;
  5143. rtm = nlmsg_payload(nlh, sizeof(*rtm));
  5144. if (!rtm) {
  5145. NL_SET_ERR_MSG_MOD(extack,
  5146. "Invalid header for get route request");
  5147. return -EINVAL;
  5148. }
  5149. if (!netlink_strict_get_check(skb))
  5150. return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
  5151. rtm_ipv6_policy, extack);
  5152. if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
  5153. (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
  5154. rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
  5155. rtm->rtm_type) {
  5156. NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
  5157. return -EINVAL;
  5158. }
  5159. if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
  5160. NL_SET_ERR_MSG_MOD(extack,
  5161. "Invalid flags for get route request");
  5162. return -EINVAL;
  5163. }
  5164. err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
  5165. rtm_ipv6_policy, extack);
  5166. if (err)
  5167. return err;
  5168. if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
  5169. (tb[RTA_DST] && !rtm->rtm_dst_len)) {
  5170. NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
  5171. return -EINVAL;
  5172. }
  5173. if (tb[RTA_FLOWLABEL] &&
  5174. (nla_get_be32(tb[RTA_FLOWLABEL]) & ~IPV6_FLOWLABEL_MASK)) {
  5175. NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
  5176. "Invalid flow label");
  5177. return -EINVAL;
  5178. }
  5179. for (i = 0; i <= RTA_MAX; i++) {
  5180. if (!tb[i])
  5181. continue;
  5182. switch (i) {
  5183. case RTA_SRC:
  5184. case RTA_DST:
  5185. case RTA_IIF:
  5186. case RTA_OIF:
  5187. case RTA_MARK:
  5188. case RTA_UID:
  5189. case RTA_SPORT:
  5190. case RTA_DPORT:
  5191. case RTA_IP_PROTO:
  5192. case RTA_FLOWLABEL:
  5193. break;
  5194. default:
  5195. NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
  5196. return -EINVAL;
  5197. }
  5198. }
  5199. return 0;
  5200. }
  5201. static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
  5202. struct netlink_ext_ack *extack)
  5203. {
  5204. struct net *net = sock_net(in_skb->sk);
  5205. struct nlattr *tb[RTA_MAX+1];
  5206. int err, iif = 0, oif = 0;
  5207. struct fib6_info *from;
  5208. struct dst_entry *dst;
  5209. struct rt6_info *rt;
  5210. struct sk_buff *skb;
  5211. struct rtmsg *rtm;
  5212. struct flowi6 fl6 = {};
  5213. __be32 flowlabel;
  5214. bool fibmatch;
  5215. err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
  5216. if (err < 0)
  5217. goto errout;
  5218. err = -EINVAL;
  5219. rtm = nlmsg_data(nlh);
  5220. fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
  5221. if (tb[RTA_SRC]) {
  5222. if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
  5223. goto errout;
  5224. fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
  5225. }
  5226. if (tb[RTA_DST]) {
  5227. if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
  5228. goto errout;
  5229. fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
  5230. }
  5231. if (tb[RTA_IIF])
  5232. iif = nla_get_u32(tb[RTA_IIF]);
  5233. if (tb[RTA_OIF])
  5234. oif = nla_get_u32(tb[RTA_OIF]);
  5235. if (tb[RTA_MARK])
  5236. fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
  5237. if (tb[RTA_UID])
  5238. fl6.flowi6_uid = make_kuid(current_user_ns(),
  5239. nla_get_u32(tb[RTA_UID]));
  5240. else
  5241. fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
  5242. if (tb[RTA_SPORT])
  5243. fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
  5244. if (tb[RTA_DPORT])
  5245. fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
  5246. if (tb[RTA_IP_PROTO]) {
  5247. err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
  5248. &fl6.flowi6_proto, AF_INET6,
  5249. extack);
  5250. if (err)
  5251. goto errout;
  5252. }
  5253. flowlabel = nla_get_be32_default(tb[RTA_FLOWLABEL], 0);
  5254. fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, flowlabel);
  5255. if (iif) {
  5256. struct net_device *dev;
  5257. int flags = 0;
  5258. rcu_read_lock();
  5259. dev = dev_get_by_index_rcu(net, iif);
  5260. if (!dev) {
  5261. rcu_read_unlock();
  5262. err = -ENODEV;
  5263. goto errout;
  5264. }
  5265. fl6.flowi6_iif = iif;
  5266. if (!ipv6_addr_any(&fl6.saddr))
  5267. flags |= RT6_LOOKUP_F_HAS_SADDR;
  5268. dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
  5269. rcu_read_unlock();
  5270. } else {
  5271. fl6.flowi6_oif = oif;
  5272. dst = ip6_route_output(net, NULL, &fl6);
  5273. }
  5274. rt = dst_rt6_info(dst);
  5275. if (rt->dst.error) {
  5276. err = rt->dst.error;
  5277. ip6_rt_put(rt);
  5278. goto errout;
  5279. }
  5280. if (rt == net->ipv6.ip6_null_entry) {
  5281. err = rt->dst.error;
  5282. ip6_rt_put(rt);
  5283. goto errout;
  5284. }
  5285. skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
  5286. if (!skb) {
  5287. ip6_rt_put(rt);
  5288. err = -ENOBUFS;
  5289. goto errout;
  5290. }
  5291. skb_dst_set(skb, &rt->dst);
  5292. rcu_read_lock();
  5293. from = rcu_dereference(rt->from);
  5294. if (from) {
  5295. if (fibmatch)
  5296. err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
  5297. iif, RTM_NEWROUTE,
  5298. NETLINK_CB(in_skb).portid,
  5299. nlh->nlmsg_seq, 0);
  5300. else
  5301. err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
  5302. &fl6.saddr, iif, RTM_NEWROUTE,
  5303. NETLINK_CB(in_skb).portid,
  5304. nlh->nlmsg_seq, 0);
  5305. } else {
  5306. err = -ENETUNREACH;
  5307. }
  5308. rcu_read_unlock();
  5309. if (err < 0) {
  5310. kfree_skb(skb);
  5311. goto errout;
  5312. }
  5313. err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
  5314. errout:
  5315. return err;
  5316. }
  5317. void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
  5318. unsigned int nlm_flags)
  5319. {
  5320. struct net *net = info->nl_net;
  5321. struct sk_buff *skb;
  5322. size_t sz;
  5323. u32 seq;
  5324. int err;
  5325. err = -ENOBUFS;
  5326. seq = info->nlh ? info->nlh->nlmsg_seq : 0;
  5327. rcu_read_lock();
  5328. sz = rt6_nlmsg_size(rt);
  5329. retry:
  5330. skb = nlmsg_new(sz, GFP_ATOMIC);
  5331. if (!skb)
  5332. goto errout;
  5333. err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
  5334. event, info->portid, seq, nlm_flags);
  5335. if (err < 0) {
  5336. kfree_skb(skb);
  5337. /* -EMSGSIZE implies needed space grew under us. */
  5338. if (err == -EMSGSIZE) {
  5339. sz = max(rt6_nlmsg_size(rt), sz << 1);
  5340. goto retry;
  5341. }
  5342. goto errout;
  5343. }
  5344. rcu_read_unlock();
  5345. rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
  5346. info->nlh, GFP_ATOMIC);
  5347. return;
  5348. errout:
  5349. rcu_read_unlock();
  5350. rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
  5351. }
  5352. void fib6_rt_update(struct net *net, struct fib6_info *rt,
  5353. struct nl_info *info)
  5354. {
  5355. u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
  5356. struct sk_buff *skb;
  5357. int err = -ENOBUFS;
  5358. skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
  5359. if (!skb)
  5360. goto errout;
  5361. err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
  5362. RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
  5363. if (err < 0) {
  5364. /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
  5365. WARN_ON(err == -EMSGSIZE);
  5366. kfree_skb(skb);
  5367. goto errout;
  5368. }
  5369. rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
  5370. info->nlh, gfp_any());
  5371. return;
  5372. errout:
  5373. rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
  5374. }
  5375. void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
  5376. bool offload, bool trap, bool offload_failed)
  5377. {
  5378. u8 fib_notify_on_flag_change;
  5379. struct sk_buff *skb;
  5380. int err;
  5381. if (READ_ONCE(f6i->offload) == offload &&
  5382. READ_ONCE(f6i->trap) == trap &&
  5383. READ_ONCE(f6i->offload_failed) == offload_failed)
  5384. return;
  5385. WRITE_ONCE(f6i->offload, offload);
  5386. WRITE_ONCE(f6i->trap, trap);
  5387. fib_notify_on_flag_change = READ_ONCE(net->ipv6.sysctl.fib_notify_on_flag_change);
  5388. /* 2 means send notifications only if offload_failed was changed. */
  5389. if (fib_notify_on_flag_change == 2 &&
  5390. READ_ONCE(f6i->offload_failed) == offload_failed)
  5391. return;
  5392. WRITE_ONCE(f6i->offload_failed, offload_failed);
  5393. if (!rcu_access_pointer(f6i->fib6_node))
  5394. /* The route was removed from the tree, do not send
  5395. * notification.
  5396. */
  5397. return;
  5398. if (!fib_notify_on_flag_change)
  5399. return;
  5400. skb = nlmsg_new(rt6_nlmsg_size(f6i), GFP_KERNEL);
  5401. if (!skb) {
  5402. err = -ENOBUFS;
  5403. goto errout;
  5404. }
  5405. err = rt6_fill_node(net, skb, f6i, NULL, NULL, NULL, 0, RTM_NEWROUTE, 0,
  5406. 0, 0);
  5407. if (err < 0) {
  5408. /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
  5409. WARN_ON(err == -EMSGSIZE);
  5410. kfree_skb(skb);
  5411. goto errout;
  5412. }
  5413. rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ROUTE, NULL, GFP_KERNEL);
  5414. return;
  5415. errout:
  5416. rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
  5417. }
  5418. EXPORT_SYMBOL(fib6_info_hw_flags_set);
  5419. static int ip6_route_dev_notify(struct notifier_block *this,
  5420. unsigned long event, void *ptr)
  5421. {
  5422. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  5423. struct net *net = dev_net(dev);
  5424. if (!(dev->flags & IFF_LOOPBACK))
  5425. return NOTIFY_OK;
  5426. if (event == NETDEV_REGISTER) {
  5427. net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
  5428. net->ipv6.ip6_null_entry->dst.dev = dev;
  5429. net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
  5430. #ifdef CONFIG_IPV6_MULTIPLE_TABLES
  5431. net->ipv6.ip6_prohibit_entry->dst.dev = dev;
  5432. net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
  5433. net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
  5434. net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
  5435. #endif
  5436. } else if (event == NETDEV_UNREGISTER &&
  5437. dev->reg_state != NETREG_UNREGISTERED) {
  5438. /* NETDEV_UNREGISTER could be fired for multiple times by
  5439. * netdev_wait_allrefs(). Make sure we only call this once.
  5440. */
  5441. in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
  5442. #ifdef CONFIG_IPV6_MULTIPLE_TABLES
  5443. in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
  5444. in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
  5445. #endif
  5446. }
  5447. return NOTIFY_OK;
  5448. }
  5449. /*
  5450. * /proc
  5451. */
  5452. #ifdef CONFIG_PROC_FS
  5453. static int rt6_stats_seq_show(struct seq_file *seq, void *v)
  5454. {
  5455. struct net *net = (struct net *)seq->private;
  5456. seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
  5457. net->ipv6.rt6_stats->fib_nodes,
  5458. net->ipv6.rt6_stats->fib_route_nodes,
  5459. atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
  5460. net->ipv6.rt6_stats->fib_rt_entries,
  5461. net->ipv6.rt6_stats->fib_rt_cache,
  5462. dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
  5463. net->ipv6.rt6_stats->fib_discarded_routes);
  5464. return 0;
  5465. }
  5466. #endif /* CONFIG_PROC_FS */
  5467. #ifdef CONFIG_SYSCTL
  5468. static int ipv6_sysctl_rtcache_flush(const struct ctl_table *ctl, int write,
  5469. void *buffer, size_t *lenp, loff_t *ppos)
  5470. {
  5471. struct net *net;
  5472. int delay;
  5473. int ret;
  5474. if (!write)
  5475. return -EINVAL;
  5476. ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
  5477. if (ret)
  5478. return ret;
  5479. net = (struct net *)ctl->extra1;
  5480. delay = READ_ONCE(net->ipv6.sysctl.flush_delay);
  5481. fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
  5482. return 0;
  5483. }
  5484. static struct ctl_table ipv6_route_table_template[] = {
  5485. {
  5486. .procname = "max_size",
  5487. .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
  5488. .maxlen = sizeof(int),
  5489. .mode = 0644,
  5490. .proc_handler = proc_dointvec,
  5491. },
  5492. {
  5493. .procname = "gc_thresh",
  5494. .data = &ip6_dst_ops_template.gc_thresh,
  5495. .maxlen = sizeof(int),
  5496. .mode = 0644,
  5497. .proc_handler = proc_dointvec,
  5498. },
  5499. {
  5500. .procname = "flush",
  5501. .data = &init_net.ipv6.sysctl.flush_delay,
  5502. .maxlen = sizeof(int),
  5503. .mode = 0200,
  5504. .proc_handler = ipv6_sysctl_rtcache_flush
  5505. },
  5506. {
  5507. .procname = "gc_min_interval",
  5508. .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
  5509. .maxlen = sizeof(int),
  5510. .mode = 0644,
  5511. .proc_handler = proc_dointvec_jiffies,
  5512. },
  5513. {
  5514. .procname = "gc_timeout",
  5515. .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
  5516. .maxlen = sizeof(int),
  5517. .mode = 0644,
  5518. .proc_handler = proc_dointvec_jiffies,
  5519. },
  5520. {
  5521. .procname = "gc_interval",
  5522. .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
  5523. .maxlen = sizeof(int),
  5524. .mode = 0644,
  5525. .proc_handler = proc_dointvec_jiffies,
  5526. },
  5527. {
  5528. .procname = "gc_elasticity",
  5529. .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
  5530. .maxlen = sizeof(int),
  5531. .mode = 0644,
  5532. .proc_handler = proc_dointvec,
  5533. },
  5534. {
  5535. .procname = "mtu_expires",
  5536. .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
  5537. .maxlen = sizeof(int),
  5538. .mode = 0644,
  5539. .proc_handler = proc_dointvec_jiffies,
  5540. },
  5541. {
  5542. .procname = "min_adv_mss",
  5543. .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
  5544. .maxlen = sizeof(int),
  5545. .mode = 0644,
  5546. .proc_handler = proc_dointvec,
  5547. },
  5548. {
  5549. .procname = "gc_min_interval_ms",
  5550. .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
  5551. .maxlen = sizeof(int),
  5552. .mode = 0644,
  5553. .proc_handler = proc_dointvec_ms_jiffies,
  5554. },
  5555. {
  5556. .procname = "skip_notify_on_dev_down",
  5557. .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
  5558. .maxlen = sizeof(u8),
  5559. .mode = 0644,
  5560. .proc_handler = proc_dou8vec_minmax,
  5561. .extra1 = SYSCTL_ZERO,
  5562. .extra2 = SYSCTL_ONE,
  5563. },
  5564. };
  5565. struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
  5566. {
  5567. struct ctl_table *table;
  5568. table = kmemdup(ipv6_route_table_template,
  5569. sizeof(ipv6_route_table_template),
  5570. GFP_KERNEL);
  5571. if (table) {
  5572. table[0].data = &net->ipv6.sysctl.ip6_rt_max_size;
  5573. table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
  5574. table[2].data = &net->ipv6.sysctl.flush_delay;
  5575. table[2].extra1 = net;
  5576. table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
  5577. table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
  5578. table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
  5579. table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
  5580. table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
  5581. table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
  5582. table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
  5583. table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
  5584. }
  5585. return table;
  5586. }
  5587. size_t ipv6_route_sysctl_table_size(struct net *net)
  5588. {
  5589. /* Don't export sysctls to unprivileged users */
  5590. if (net->user_ns != &init_user_ns)
  5591. return 1;
  5592. return ARRAY_SIZE(ipv6_route_table_template);
  5593. }
  5594. #endif
  5595. static int __net_init ip6_route_net_init(struct net *net)
  5596. {
  5597. int ret = -ENOMEM;
  5598. memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
  5599. sizeof(net->ipv6.ip6_dst_ops));
  5600. if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
  5601. goto out_ip6_dst_ops;
  5602. net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
  5603. if (!net->ipv6.fib6_null_entry)
  5604. goto out_ip6_dst_entries;
  5605. memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
  5606. sizeof(*net->ipv6.fib6_null_entry));
  5607. net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
  5608. sizeof(*net->ipv6.ip6_null_entry),
  5609. GFP_KERNEL);
  5610. if (!net->ipv6.ip6_null_entry)
  5611. goto out_fib6_null_entry;
  5612. net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
  5613. dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
  5614. ip6_template_metrics, true);
  5615. INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->dst.rt_uncached);
  5616. #ifdef CONFIG_IPV6_MULTIPLE_TABLES
  5617. net->ipv6.fib6_has_custom_rules = false;
  5618. net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
  5619. sizeof(*net->ipv6.ip6_prohibit_entry),
  5620. GFP_KERNEL);
  5621. if (!net->ipv6.ip6_prohibit_entry)
  5622. goto out_ip6_null_entry;
  5623. net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
  5624. dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
  5625. ip6_template_metrics, true);
  5626. INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->dst.rt_uncached);
  5627. net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
  5628. sizeof(*net->ipv6.ip6_blk_hole_entry),
  5629. GFP_KERNEL);
  5630. if (!net->ipv6.ip6_blk_hole_entry)
  5631. goto out_ip6_prohibit_entry;
  5632. net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
  5633. dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
  5634. ip6_template_metrics, true);
  5635. INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->dst.rt_uncached);
  5636. #ifdef CONFIG_IPV6_SUBTREES
  5637. net->ipv6.fib6_routes_require_src = 0;
  5638. #endif
  5639. #endif
  5640. net->ipv6.sysctl.flush_delay = 0;
  5641. net->ipv6.sysctl.ip6_rt_max_size = INT_MAX;
  5642. net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
  5643. net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
  5644. net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
  5645. net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
  5646. net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
  5647. net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
  5648. net->ipv6.sysctl.skip_notify_on_dev_down = 0;
  5649. atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);
  5650. ret = 0;
  5651. out:
  5652. return ret;
  5653. #ifdef CONFIG_IPV6_MULTIPLE_TABLES
  5654. out_ip6_prohibit_entry:
  5655. kfree(net->ipv6.ip6_prohibit_entry);
  5656. out_ip6_null_entry:
  5657. kfree(net->ipv6.ip6_null_entry);
  5658. #endif
  5659. out_fib6_null_entry:
  5660. kfree(net->ipv6.fib6_null_entry);
  5661. out_ip6_dst_entries:
  5662. dst_entries_destroy(&net->ipv6.ip6_dst_ops);
  5663. out_ip6_dst_ops:
  5664. goto out;
  5665. }
  5666. static void __net_exit ip6_route_net_exit(struct net *net)
  5667. {
  5668. kfree(net->ipv6.fib6_null_entry);
  5669. kfree(net->ipv6.ip6_null_entry);
  5670. #ifdef CONFIG_IPV6_MULTIPLE_TABLES
  5671. kfree(net->ipv6.ip6_prohibit_entry);
  5672. kfree(net->ipv6.ip6_blk_hole_entry);
  5673. #endif
  5674. dst_entries_destroy(&net->ipv6.ip6_dst_ops);
  5675. }
  5676. static int __net_init ip6_route_net_init_late(struct net *net)
  5677. {
  5678. #ifdef CONFIG_PROC_FS
  5679. if (!proc_create_net("ipv6_route", 0, net->proc_net,
  5680. &ipv6_route_seq_ops,
  5681. sizeof(struct ipv6_route_iter)))
  5682. return -ENOMEM;
  5683. if (!proc_create_net_single("rt6_stats", 0444, net->proc_net,
  5684. rt6_stats_seq_show, NULL)) {
  5685. remove_proc_entry("ipv6_route", net->proc_net);
  5686. return -ENOMEM;
  5687. }
  5688. #endif
  5689. return 0;
  5690. }
  5691. static void __net_exit ip6_route_net_exit_late(struct net *net)
  5692. {
  5693. #ifdef CONFIG_PROC_FS
  5694. remove_proc_entry("ipv6_route", net->proc_net);
  5695. remove_proc_entry("rt6_stats", net->proc_net);
  5696. #endif
  5697. }
  5698. static struct pernet_operations ip6_route_net_ops = {
  5699. .init = ip6_route_net_init,
  5700. .exit = ip6_route_net_exit,
  5701. };
  5702. static int __net_init ipv6_inetpeer_init(struct net *net)
  5703. {
  5704. struct inet_peer_base *bp = kmalloc_obj(*bp);
  5705. if (!bp)
  5706. return -ENOMEM;
  5707. inet_peer_base_init(bp);
  5708. net->ipv6.peers = bp;
  5709. return 0;
  5710. }
  5711. static void __net_exit ipv6_inetpeer_exit(struct net *net)
  5712. {
  5713. struct inet_peer_base *bp = net->ipv6.peers;
  5714. net->ipv6.peers = NULL;
  5715. inetpeer_invalidate_tree(bp);
  5716. kfree(bp);
  5717. }
  5718. static struct pernet_operations ipv6_inetpeer_ops = {
  5719. .init = ipv6_inetpeer_init,
  5720. .exit = ipv6_inetpeer_exit,
  5721. };
  5722. static struct pernet_operations ip6_route_net_late_ops = {
  5723. .init = ip6_route_net_init_late,
  5724. .exit = ip6_route_net_exit_late,
  5725. };
  5726. static struct notifier_block ip6_route_dev_notifier = {
  5727. .notifier_call = ip6_route_dev_notify,
  5728. .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
  5729. };
  5730. void __init ip6_route_init_special_entries(void)
  5731. {
  5732. /* Registering of the loopback is done before this portion of code,
  5733. * the loopback reference in rt6_info will not be taken, do it
  5734. * manually for init_net */
  5735. init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
  5736. init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
  5737. init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
  5738. #ifdef CONFIG_IPV6_MULTIPLE_TABLES
  5739. init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
  5740. init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
  5741. init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
  5742. init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
  5743. #endif
  5744. }
  5745. #if IS_BUILTIN(CONFIG_IPV6)
  5746. #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
  5747. DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
  5748. BTF_ID_LIST_SINGLE(btf_fib6_info_id, struct, fib6_info)
  5749. static const struct bpf_iter_seq_info ipv6_route_seq_info = {
  5750. .seq_ops = &ipv6_route_seq_ops,
  5751. .init_seq_private = bpf_iter_init_seq_net,
  5752. .fini_seq_private = bpf_iter_fini_seq_net,
  5753. .seq_priv_size = sizeof(struct ipv6_route_iter),
  5754. };
  5755. static struct bpf_iter_reg ipv6_route_reg_info = {
  5756. .target = "ipv6_route",
  5757. .ctx_arg_info_size = 1,
  5758. .ctx_arg_info = {
  5759. { offsetof(struct bpf_iter__ipv6_route, rt),
  5760. PTR_TO_BTF_ID_OR_NULL },
  5761. },
  5762. .seq_info = &ipv6_route_seq_info,
  5763. };
  5764. static int __init bpf_iter_register(void)
  5765. {
  5766. ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
  5767. return bpf_iter_reg_target(&ipv6_route_reg_info);
  5768. }
  5769. static void bpf_iter_unregister(void)
  5770. {
  5771. bpf_iter_unreg_target(&ipv6_route_reg_info);
  5772. }
  5773. #endif
  5774. #endif
  5775. static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = {
  5776. {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE,
  5777. .doit = inet6_rtm_newroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
  5778. {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELROUTE,
  5779. .doit = inet6_rtm_delroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
  5780. {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,
  5781. .doit = inet6_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
  5782. };
  5783. int __init ip6_route_init(void)
  5784. {
  5785. int ret;
  5786. int cpu;
  5787. ret = -ENOMEM;
  5788. ip6_dst_ops_template.kmem_cachep =
  5789. kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
  5790. SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
  5791. if (!ip6_dst_ops_template.kmem_cachep)
  5792. goto out;
  5793. ret = dst_entries_init(&ip6_dst_blackhole_ops);
  5794. if (ret)
  5795. goto out_kmem_cache;
  5796. ret = register_pernet_subsys(&ipv6_inetpeer_ops);
  5797. if (ret)
  5798. goto out_dst_entries;
  5799. ret = register_pernet_subsys(&ip6_route_net_ops);
  5800. if (ret)
  5801. goto out_register_inetpeer;
  5802. ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
  5803. ret = fib6_init();
  5804. if (ret)
  5805. goto out_register_subsys;
  5806. ret = xfrm6_init();
  5807. if (ret)
  5808. goto out_fib6_init;
  5809. ret = fib6_rules_init();
  5810. if (ret)
  5811. goto xfrm6_init;
  5812. ret = register_pernet_subsys(&ip6_route_net_late_ops);
  5813. if (ret)
  5814. goto fib6_rules_init;
  5815. ret = rtnl_register_many(ip6_route_rtnl_msg_handlers);
  5816. if (ret < 0)
  5817. goto out_register_late_subsys;
  5818. ret = register_netdevice_notifier(&ip6_route_dev_notifier);
  5819. if (ret)
  5820. goto out_register_late_subsys;
  5821. #if IS_BUILTIN(CONFIG_IPV6)
  5822. #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
  5823. ret = bpf_iter_register();
  5824. if (ret)
  5825. goto out_register_late_subsys;
  5826. #endif
  5827. #endif
  5828. for_each_possible_cpu(cpu) {
  5829. struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
  5830. INIT_LIST_HEAD(&ul->head);
  5831. spin_lock_init(&ul->lock);
  5832. }
  5833. out:
  5834. return ret;
  5835. out_register_late_subsys:
  5836. rtnl_unregister_all(PF_INET6);
  5837. unregister_pernet_subsys(&ip6_route_net_late_ops);
  5838. fib6_rules_init:
  5839. fib6_rules_cleanup();
  5840. xfrm6_init:
  5841. xfrm6_fini();
  5842. out_fib6_init:
  5843. fib6_gc_cleanup();
  5844. out_register_subsys:
  5845. unregister_pernet_subsys(&ip6_route_net_ops);
  5846. out_register_inetpeer:
  5847. unregister_pernet_subsys(&ipv6_inetpeer_ops);
  5848. out_dst_entries:
  5849. dst_entries_destroy(&ip6_dst_blackhole_ops);
  5850. out_kmem_cache:
  5851. kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
  5852. goto out;
  5853. }
  5854. void ip6_route_cleanup(void)
  5855. {
  5856. #if IS_BUILTIN(CONFIG_IPV6)
  5857. #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
  5858. bpf_iter_unregister();
  5859. #endif
  5860. #endif
  5861. unregister_netdevice_notifier(&ip6_route_dev_notifier);
  5862. unregister_pernet_subsys(&ip6_route_net_late_ops);
  5863. fib6_rules_cleanup();
  5864. xfrm6_fini();
  5865. fib6_gc_cleanup();
  5866. unregister_pernet_subsys(&ipv6_inetpeer_ops);
  5867. unregister_pernet_subsys(&ip6_route_net_ops);
  5868. dst_entries_destroy(&ip6_dst_blackhole_ops);
  5869. kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
  5870. }