seccomp_bpf.c 133 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
  4. *
  5. * Test code for seccomp bpf.
  6. */
  7. #define _GNU_SOURCE
  8. #include <sys/types.h>
  9. /*
  10. * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
  11. * we need to use the kernel's siginfo.h file and trick glibc
  12. * into accepting it.
  13. */
  14. #if !__GLIBC_PREREQ(2, 26)
  15. # include <asm/siginfo.h>
  16. # define __have_siginfo_t 1
  17. # define __have_sigval_t 1
  18. # define __have_sigevent_t 1
  19. #endif
  20. #include <errno.h>
  21. #include <linux/filter.h>
  22. #include <sys/prctl.h>
  23. #include <sys/ptrace.h>
  24. #include <sys/time.h>
  25. #include <sys/user.h>
  26. #include <linux/prctl.h>
  27. #include <linux/ptrace.h>
  28. #include <linux/seccomp.h>
  29. #include <pthread.h>
  30. #include <semaphore.h>
  31. #include <signal.h>
  32. #include <stddef.h>
  33. #include <stdbool.h>
  34. #include <string.h>
  35. #include <time.h>
  36. #include <limits.h>
  37. #include <linux/elf.h>
  38. #include <sys/uio.h>
  39. #include <sys/utsname.h>
  40. #include <sys/fcntl.h>
  41. #include <sys/mman.h>
  42. #include <sys/times.h>
  43. #include <sys/socket.h>
  44. #include <sys/ioctl.h>
  45. #include <linux/kcmp.h>
  46. #include <sys/resource.h>
  47. #include <sys/capability.h>
  48. #include <linux/perf_event.h>
  49. #include <unistd.h>
  50. #include <sys/syscall.h>
  51. #include <poll.h>
  52. #include "kselftest_harness.h"
  53. #include "../clone3/clone3_selftests.h"
  54. /* Attempt to de-conflict with the selftests tree. */
  55. #ifndef SKIP
  56. #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
  57. #endif
  58. #ifndef MIN
  59. #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
  60. #endif
  61. #ifndef PR_SET_PTRACER
  62. # define PR_SET_PTRACER 0x59616d61
  63. #endif
  64. #ifndef noinline
  65. #define noinline __attribute__((noinline))
  66. #endif
  67. #ifndef __nocf_check
  68. #define __nocf_check __attribute__((nocf_check))
  69. #endif
  70. #ifndef __naked
  71. #define __naked __attribute__((__naked__))
  72. #endif
  73. #ifndef PR_SET_NO_NEW_PRIVS
  74. #define PR_SET_NO_NEW_PRIVS 38
  75. #define PR_GET_NO_NEW_PRIVS 39
  76. #endif
  77. #ifndef PR_SECCOMP_EXT
  78. #define PR_SECCOMP_EXT 43
  79. #endif
  80. #ifndef SECCOMP_EXT_ACT
  81. #define SECCOMP_EXT_ACT 1
  82. #endif
  83. #ifndef SECCOMP_EXT_ACT_TSYNC
  84. #define SECCOMP_EXT_ACT_TSYNC 1
  85. #endif
  86. #ifndef SECCOMP_MODE_STRICT
  87. #define SECCOMP_MODE_STRICT 1
  88. #endif
  89. #ifndef SECCOMP_MODE_FILTER
  90. #define SECCOMP_MODE_FILTER 2
  91. #endif
  92. #ifndef SECCOMP_RET_ALLOW
  93. struct seccomp_data {
  94. int nr;
  95. __u32 arch;
  96. __u64 instruction_pointer;
  97. __u64 args[6];
  98. };
  99. #endif
  100. #ifndef SECCOMP_RET_KILL_PROCESS
  101. #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
  102. #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */
  103. #endif
  104. #ifndef SECCOMP_RET_KILL
  105. #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD
  106. #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
  107. #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
  108. #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
  109. #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
  110. #endif
  111. #ifndef SECCOMP_RET_LOG
  112. #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */
  113. #endif
  114. #ifndef __NR_seccomp
  115. # if defined(__i386__)
  116. # define __NR_seccomp 354
  117. # elif defined(__x86_64__)
  118. # define __NR_seccomp 317
  119. # elif defined(__arm__)
  120. # define __NR_seccomp 383
  121. # elif defined(__aarch64__)
  122. # define __NR_seccomp 277
  123. # elif defined(__riscv)
  124. # define __NR_seccomp 277
  125. # elif defined(__csky__)
  126. # define __NR_seccomp 277
  127. # elif defined(__loongarch__)
  128. # define __NR_seccomp 277
  129. # elif defined(__hppa__)
  130. # define __NR_seccomp 338
  131. # elif defined(__powerpc__)
  132. # define __NR_seccomp 358
  133. # elif defined(__s390__)
  134. # define __NR_seccomp 348
  135. # elif defined(__xtensa__)
  136. # define __NR_seccomp 337
  137. # elif defined(__sh__)
  138. # define __NR_seccomp 372
  139. # elif defined(__mc68000__)
  140. # define __NR_seccomp 380
  141. # else
  142. # warning "seccomp syscall number unknown for this architecture"
  143. # define __NR_seccomp 0xffff
  144. # endif
  145. #endif
  146. #ifndef __NR_uretprobe
  147. # if defined(__x86_64__)
  148. # define __NR_uretprobe 335
  149. # endif
  150. #endif
  151. #ifndef SECCOMP_SET_MODE_STRICT
  152. #define SECCOMP_SET_MODE_STRICT 0
  153. #endif
  154. #ifndef SECCOMP_SET_MODE_FILTER
  155. #define SECCOMP_SET_MODE_FILTER 1
  156. #endif
  157. #ifndef SECCOMP_GET_ACTION_AVAIL
  158. #define SECCOMP_GET_ACTION_AVAIL 2
  159. #endif
  160. #ifndef SECCOMP_GET_NOTIF_SIZES
  161. #define SECCOMP_GET_NOTIF_SIZES 3
  162. #endif
  163. #ifndef SECCOMP_FILTER_FLAG_TSYNC
  164. #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
  165. #endif
  166. #ifndef SECCOMP_FILTER_FLAG_LOG
  167. #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
  168. #endif
  169. #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
  170. #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
  171. #endif
  172. #ifndef PTRACE_SECCOMP_GET_METADATA
  173. #define PTRACE_SECCOMP_GET_METADATA 0x420d
  174. struct seccomp_metadata {
  175. __u64 filter_off; /* Input: which filter */
  176. __u64 flags; /* Output: filter's flags */
  177. };
  178. #endif
  179. #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
  180. #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
  181. #endif
  182. #ifndef SECCOMP_RET_USER_NOTIF
  183. #define SECCOMP_RET_USER_NOTIF 0x7fc00000U
  184. #define SECCOMP_IOC_MAGIC '!'
  185. #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr)
  186. #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type)
  187. #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type)
  188. #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type)
  189. /* Flags for seccomp notification fd ioctl. */
  190. #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif)
  191. #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \
  192. struct seccomp_notif_resp)
  193. #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64)
  194. struct seccomp_notif {
  195. __u64 id;
  196. __u32 pid;
  197. __u32 flags;
  198. struct seccomp_data data;
  199. };
  200. struct seccomp_notif_resp {
  201. __u64 id;
  202. __s64 val;
  203. __s32 error;
  204. __u32 flags;
  205. };
  206. struct seccomp_notif_sizes {
  207. __u16 seccomp_notif;
  208. __u16 seccomp_notif_resp;
  209. __u16 seccomp_data;
  210. };
  211. #endif
  212. #ifndef SECCOMP_IOCTL_NOTIF_ADDFD
  213. /* On success, the return value is the remote process's added fd number */
  214. #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
  215. struct seccomp_notif_addfd)
  216. /* valid flags for seccomp_notif_addfd */
  217. #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
  218. struct seccomp_notif_addfd {
  219. __u64 id;
  220. __u32 flags;
  221. __u32 srcfd;
  222. __u32 newfd;
  223. __u32 newfd_flags;
  224. };
  225. #endif
  226. #ifndef SECCOMP_ADDFD_FLAG_SEND
  227. #define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
  228. #endif
  229. struct seccomp_notif_addfd_small {
  230. __u64 id;
  231. char weird[4];
  232. };
  233. #define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \
  234. SECCOMP_IOW(3, struct seccomp_notif_addfd_small)
  235. struct seccomp_notif_addfd_big {
  236. union {
  237. struct seccomp_notif_addfd addfd;
  238. char buf[sizeof(struct seccomp_notif_addfd) + 8];
  239. };
  240. };
  241. #define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \
  242. SECCOMP_IOWR(3, struct seccomp_notif_addfd_big)
  243. #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
  244. #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1
  245. #define PTRACE_EVENTMSG_SYSCALL_EXIT 2
  246. #endif
  247. #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
  248. #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
  249. #endif
  250. #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
  251. #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
  252. #endif
  253. #ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV
  254. #define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
  255. #endif
  256. #ifndef seccomp
  257. int seccomp(unsigned int op, unsigned int flags, void *args)
  258. {
  259. errno = 0;
  260. return syscall(__NR_seccomp, op, flags, args);
  261. }
  262. #endif
  263. #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  264. #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
  265. #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  266. #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
  267. #else
  268. #error "wut? Unknown __BYTE_ORDER__?!"
  269. #endif
  270. #define SIBLING_EXIT_UNKILLED 0xbadbeef
  271. #define SIBLING_EXIT_FAILURE 0xbadface
  272. #define SIBLING_EXIT_NEWPRIVS 0xbadfeed
  273. static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
  274. {
  275. #ifdef __NR_kcmp
  276. errno = 0;
  277. return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
  278. #else
  279. errno = ENOSYS;
  280. return -1;
  281. #endif
  282. }
  283. /* Have TH_LOG report actual location filecmp() is used. */
  284. #define filecmp(pid1, pid2, fd1, fd2) ({ \
  285. int _ret; \
  286. \
  287. _ret = __filecmp(pid1, pid2, fd1, fd2); \
  288. if (_ret != 0) { \
  289. if (_ret < 0 && errno == ENOSYS) { \
  290. TH_LOG("kcmp() syscall missing (test is less accurate)");\
  291. _ret = 0; \
  292. } \
  293. } \
  294. _ret; })
  295. TEST(kcmp)
  296. {
  297. int ret;
  298. ret = __filecmp(getpid(), getpid(), 1, 1);
  299. EXPECT_EQ(ret, 0);
  300. if (ret != 0 && errno == ENOSYS)
  301. SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
  302. }
  303. TEST(mode_strict_support)
  304. {
  305. long ret;
  306. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
  307. ASSERT_EQ(0, ret) {
  308. TH_LOG("Kernel does not support CONFIG_SECCOMP");
  309. }
  310. syscall(__NR_exit, 0);
  311. }
  312. TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
  313. {
  314. long ret;
  315. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
  316. ASSERT_EQ(0, ret) {
  317. TH_LOG("Kernel does not support CONFIG_SECCOMP");
  318. }
  319. syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
  320. NULL, NULL, NULL);
  321. EXPECT_FALSE(true) {
  322. TH_LOG("Unreachable!");
  323. }
  324. }
  325. /* Note! This doesn't test no new privs behavior */
  326. TEST(no_new_privs_support)
  327. {
  328. long ret;
  329. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  330. EXPECT_EQ(0, ret) {
  331. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  332. }
  333. }
  334. /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
  335. TEST(mode_filter_support)
  336. {
  337. long ret;
  338. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
  339. ASSERT_EQ(0, ret) {
  340. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  341. }
  342. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
  343. EXPECT_EQ(-1, ret);
  344. EXPECT_EQ(EFAULT, errno) {
  345. TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
  346. }
  347. }
  348. TEST(mode_filter_without_nnp)
  349. {
  350. struct sock_filter filter[] = {
  351. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  352. };
  353. struct sock_fprog prog = {
  354. .len = (unsigned short)ARRAY_SIZE(filter),
  355. .filter = filter,
  356. };
  357. long ret;
  358. cap_t cap = cap_get_proc();
  359. cap_flag_value_t is_cap_sys_admin = 0;
  360. ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
  361. ASSERT_LE(0, ret) {
  362. TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
  363. }
  364. errno = 0;
  365. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
  366. /* Succeeds with CAP_SYS_ADMIN, fails without */
  367. cap_get_flag(cap, CAP_SYS_ADMIN, CAP_EFFECTIVE, &is_cap_sys_admin);
  368. if (!is_cap_sys_admin) {
  369. EXPECT_EQ(-1, ret);
  370. EXPECT_EQ(EACCES, errno);
  371. } else {
  372. EXPECT_EQ(0, ret);
  373. }
  374. }
  375. #define MAX_INSNS_PER_PATH 32768
  376. TEST(filter_size_limits)
  377. {
  378. int i;
  379. int count = BPF_MAXINSNS + 1;
  380. struct sock_filter allow[] = {
  381. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  382. };
  383. struct sock_filter *filter;
  384. struct sock_fprog prog = { };
  385. long ret;
  386. filter = calloc(count, sizeof(*filter));
  387. ASSERT_NE(NULL, filter);
  388. for (i = 0; i < count; i++)
  389. filter[i] = allow[0];
  390. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  391. ASSERT_EQ(0, ret);
  392. prog.filter = filter;
  393. prog.len = count;
  394. /* Too many filter instructions in a single filter. */
  395. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
  396. ASSERT_NE(0, ret) {
  397. TH_LOG("Installing %d insn filter was allowed", prog.len);
  398. }
  399. /* One less is okay, though. */
  400. prog.len -= 1;
  401. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
  402. ASSERT_EQ(0, ret) {
  403. TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
  404. }
  405. }
  406. TEST(filter_chain_limits)
  407. {
  408. int i;
  409. int count = BPF_MAXINSNS;
  410. struct sock_filter allow[] = {
  411. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  412. };
  413. struct sock_filter *filter;
  414. struct sock_fprog prog = { };
  415. long ret;
  416. filter = calloc(count, sizeof(*filter));
  417. ASSERT_NE(NULL, filter);
  418. for (i = 0; i < count; i++)
  419. filter[i] = allow[0];
  420. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  421. ASSERT_EQ(0, ret);
  422. prog.filter = filter;
  423. prog.len = 1;
  424. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
  425. ASSERT_EQ(0, ret);
  426. prog.len = count;
  427. /* Too many total filter instructions. */
  428. for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
  429. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
  430. if (ret != 0)
  431. break;
  432. }
  433. ASSERT_NE(0, ret) {
  434. TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
  435. i, count, i * (count + 4));
  436. }
  437. }
  438. TEST(mode_filter_cannot_move_to_strict)
  439. {
  440. struct sock_filter filter[] = {
  441. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  442. };
  443. struct sock_fprog prog = {
  444. .len = (unsigned short)ARRAY_SIZE(filter),
  445. .filter = filter,
  446. };
  447. long ret;
  448. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  449. ASSERT_EQ(0, ret);
  450. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
  451. ASSERT_EQ(0, ret);
  452. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
  453. EXPECT_EQ(-1, ret);
  454. EXPECT_EQ(EINVAL, errno);
  455. }
  456. TEST(mode_filter_get_seccomp)
  457. {
  458. struct sock_filter filter[] = {
  459. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  460. };
  461. struct sock_fprog prog = {
  462. .len = (unsigned short)ARRAY_SIZE(filter),
  463. .filter = filter,
  464. };
  465. long ret;
  466. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  467. ASSERT_EQ(0, ret);
  468. ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
  469. EXPECT_EQ(0, ret);
  470. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
  471. ASSERT_EQ(0, ret);
  472. ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
  473. EXPECT_EQ(2, ret);
  474. }
  475. TEST(ALLOW_all)
  476. {
  477. struct sock_filter filter[] = {
  478. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  479. };
  480. struct sock_fprog prog = {
  481. .len = (unsigned short)ARRAY_SIZE(filter),
  482. .filter = filter,
  483. };
  484. long ret;
  485. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  486. ASSERT_EQ(0, ret);
  487. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
  488. ASSERT_EQ(0, ret);
  489. }
  490. TEST(empty_prog)
  491. {
  492. struct sock_filter filter[] = {
  493. };
  494. struct sock_fprog prog = {
  495. .len = (unsigned short)ARRAY_SIZE(filter),
  496. .filter = filter,
  497. };
  498. long ret;
  499. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  500. ASSERT_EQ(0, ret);
  501. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
  502. EXPECT_EQ(-1, ret);
  503. EXPECT_EQ(EINVAL, errno);
  504. }
  505. TEST(log_all)
  506. {
  507. struct sock_filter filter[] = {
  508. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
  509. };
  510. struct sock_fprog prog = {
  511. .len = (unsigned short)ARRAY_SIZE(filter),
  512. .filter = filter,
  513. };
  514. long ret;
  515. pid_t parent = getppid();
  516. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  517. ASSERT_EQ(0, ret);
  518. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
  519. ASSERT_EQ(0, ret);
  520. /* getppid() should succeed and be logged (no check for logging) */
  521. EXPECT_EQ(parent, syscall(__NR_getppid));
  522. }
  523. TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
  524. {
  525. struct sock_filter filter[] = {
  526. BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
  527. };
  528. struct sock_fprog prog = {
  529. .len = (unsigned short)ARRAY_SIZE(filter),
  530. .filter = filter,
  531. };
  532. long ret;
  533. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  534. ASSERT_EQ(0, ret);
  535. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
  536. ASSERT_EQ(0, ret);
  537. EXPECT_EQ(0, syscall(__NR_getpid)) {
  538. TH_LOG("getpid() shouldn't ever return");
  539. }
  540. }
  541. /* return code >= 0x80000000 is unused. */
  542. TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
  543. {
  544. struct sock_filter filter[] = {
  545. BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
  546. };
  547. struct sock_fprog prog = {
  548. .len = (unsigned short)ARRAY_SIZE(filter),
  549. .filter = filter,
  550. };
  551. long ret;
  552. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  553. ASSERT_EQ(0, ret);
  554. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
  555. ASSERT_EQ(0, ret);
  556. EXPECT_EQ(0, syscall(__NR_getpid)) {
  557. TH_LOG("getpid() shouldn't ever return");
  558. }
  559. }
  560. TEST_SIGNAL(KILL_all, SIGSYS)
  561. {
  562. struct sock_filter filter[] = {
  563. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
  564. };
  565. struct sock_fprog prog = {
  566. .len = (unsigned short)ARRAY_SIZE(filter),
  567. .filter = filter,
  568. };
  569. long ret;
  570. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  571. ASSERT_EQ(0, ret);
  572. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
  573. ASSERT_EQ(0, ret);
  574. }
  575. TEST_SIGNAL(KILL_one, SIGSYS)
  576. {
  577. struct sock_filter filter[] = {
  578. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  579. offsetof(struct seccomp_data, nr)),
  580. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
  581. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
  582. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  583. };
  584. struct sock_fprog prog = {
  585. .len = (unsigned short)ARRAY_SIZE(filter),
  586. .filter = filter,
  587. };
  588. long ret;
  589. pid_t parent = getppid();
  590. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  591. ASSERT_EQ(0, ret);
  592. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
  593. ASSERT_EQ(0, ret);
  594. EXPECT_EQ(parent, syscall(__NR_getppid));
  595. /* getpid() should never return. */
  596. EXPECT_EQ(0, syscall(__NR_getpid));
  597. }
  598. TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
  599. {
  600. void *fatal_address;
  601. struct sock_filter filter[] = {
  602. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  603. offsetof(struct seccomp_data, nr)),
  604. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
  605. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  606. /* Only both with lower 32-bit for now. */
  607. BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
  608. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
  609. (unsigned long)&fatal_address, 0, 1),
  610. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
  611. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  612. };
  613. struct sock_fprog prog = {
  614. .len = (unsigned short)ARRAY_SIZE(filter),
  615. .filter = filter,
  616. };
  617. long ret;
  618. pid_t parent = getppid();
  619. struct tms timebuf;
  620. clock_t clock = times(&timebuf);
  621. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  622. ASSERT_EQ(0, ret);
  623. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
  624. ASSERT_EQ(0, ret);
  625. EXPECT_EQ(parent, syscall(__NR_getppid));
  626. EXPECT_LE(clock, syscall(__NR_times, &timebuf));
  627. /* times() should never return. */
  628. EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
  629. }
  630. TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
  631. {
  632. #ifndef __NR_mmap2
  633. int sysno = __NR_mmap;
  634. #else
  635. int sysno = __NR_mmap2;
  636. #endif
  637. struct sock_filter filter[] = {
  638. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  639. offsetof(struct seccomp_data, nr)),
  640. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
  641. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  642. /* Only both with lower 32-bit for now. */
  643. BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
  644. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
  645. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
  646. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  647. };
  648. struct sock_fprog prog = {
  649. .len = (unsigned short)ARRAY_SIZE(filter),
  650. .filter = filter,
  651. };
  652. long ret;
  653. pid_t parent = getppid();
  654. int fd;
  655. void *map1, *map2;
  656. int page_size = sysconf(_SC_PAGESIZE);
  657. ASSERT_LT(0, page_size);
  658. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  659. ASSERT_EQ(0, ret);
  660. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
  661. ASSERT_EQ(0, ret);
  662. fd = open("/dev/zero", O_RDONLY);
  663. ASSERT_NE(-1, fd);
  664. EXPECT_EQ(parent, syscall(__NR_getppid));
  665. map1 = (void *)syscall(sysno,
  666. NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
  667. EXPECT_NE(MAP_FAILED, map1);
  668. /* mmap2() should never return. */
  669. map2 = (void *)syscall(sysno,
  670. NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
  671. EXPECT_EQ(MAP_FAILED, map2);
  672. /* The test failed, so clean up the resources. */
  673. munmap(map1, page_size);
  674. munmap(map2, page_size);
  675. close(fd);
  676. }
  677. /* This is a thread task to die via seccomp filter violation. */
  678. void *kill_thread(void *data)
  679. {
  680. bool die = (bool)data;
  681. if (die) {
  682. syscall(__NR_getpid);
  683. return (void *)SIBLING_EXIT_FAILURE;
  684. }
  685. return (void *)SIBLING_EXIT_UNKILLED;
  686. }
  687. enum kill_t {
  688. KILL_THREAD,
  689. KILL_PROCESS,
  690. RET_UNKNOWN
  691. };
  692. /* Prepare a thread that will kill itself or both of us. */
  693. void kill_thread_or_group(struct __test_metadata *_metadata,
  694. enum kill_t kill_how)
  695. {
  696. pthread_t thread;
  697. void *status;
  698. /* Kill only when calling __NR_getpid. */
  699. struct sock_filter filter_thread[] = {
  700. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  701. offsetof(struct seccomp_data, nr)),
  702. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
  703. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
  704. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  705. };
  706. struct sock_fprog prog_thread = {
  707. .len = (unsigned short)ARRAY_SIZE(filter_thread),
  708. .filter = filter_thread,
  709. };
  710. int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA;
  711. struct sock_filter filter_process[] = {
  712. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  713. offsetof(struct seccomp_data, nr)),
  714. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
  715. BPF_STMT(BPF_RET|BPF_K, kill),
  716. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  717. };
  718. struct sock_fprog prog_process = {
  719. .len = (unsigned short)ARRAY_SIZE(filter_process),
  720. .filter = filter_process,
  721. };
  722. ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
  723. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  724. }
  725. ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
  726. kill_how == KILL_THREAD ? &prog_thread
  727. : &prog_process));
  728. /*
  729. * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
  730. * flag cannot be downgraded by a new filter.
  731. */
  732. if (kill_how == KILL_PROCESS)
  733. ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
  734. /* Start a thread that will exit immediately. */
  735. ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
  736. ASSERT_EQ(0, pthread_join(thread, &status));
  737. ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
  738. /* Start a thread that will die immediately. */
  739. ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
  740. ASSERT_EQ(0, pthread_join(thread, &status));
  741. ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
  742. /*
  743. * If we get here, only the spawned thread died. Let the parent know
  744. * the whole process didn't die (i.e. this thread, the spawner,
  745. * stayed running).
  746. */
  747. exit(42);
  748. }
  749. TEST(KILL_thread)
  750. {
  751. int status;
  752. pid_t child_pid;
  753. child_pid = fork();
  754. ASSERT_LE(0, child_pid);
  755. if (child_pid == 0) {
  756. kill_thread_or_group(_metadata, KILL_THREAD);
  757. _exit(38);
  758. }
  759. ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
  760. /* If only the thread was killed, we'll see exit 42. */
  761. ASSERT_TRUE(WIFEXITED(status));
  762. ASSERT_EQ(42, WEXITSTATUS(status));
  763. }
  764. TEST(KILL_process)
  765. {
  766. int status;
  767. pid_t child_pid;
  768. child_pid = fork();
  769. ASSERT_LE(0, child_pid);
  770. if (child_pid == 0) {
  771. kill_thread_or_group(_metadata, KILL_PROCESS);
  772. _exit(38);
  773. }
  774. ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
  775. /* If the entire process was killed, we'll see SIGSYS. */
  776. ASSERT_TRUE(WIFSIGNALED(status));
  777. ASSERT_EQ(SIGSYS, WTERMSIG(status));
  778. }
  779. TEST(KILL_unknown)
  780. {
  781. int status;
  782. pid_t child_pid;
  783. child_pid = fork();
  784. ASSERT_LE(0, child_pid);
  785. if (child_pid == 0) {
  786. kill_thread_or_group(_metadata, RET_UNKNOWN);
  787. _exit(38);
  788. }
  789. ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
  790. /* If the entire process was killed, we'll see SIGSYS. */
  791. EXPECT_TRUE(WIFSIGNALED(status)) {
  792. TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
  793. }
  794. ASSERT_EQ(SIGSYS, WTERMSIG(status));
  795. }
  796. /* TODO(wad) add 64-bit versus 32-bit arg tests. */
  797. TEST(arg_out_of_range)
  798. {
  799. struct sock_filter filter[] = {
  800. BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
  801. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  802. };
  803. struct sock_fprog prog = {
  804. .len = (unsigned short)ARRAY_SIZE(filter),
  805. .filter = filter,
  806. };
  807. long ret;
  808. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  809. ASSERT_EQ(0, ret);
  810. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
  811. EXPECT_EQ(-1, ret);
  812. EXPECT_EQ(EINVAL, errno);
  813. }
  814. #define ERRNO_FILTER(name, errno) \
  815. struct sock_filter _read_filter_##name[] = { \
  816. BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \
  817. offsetof(struct seccomp_data, nr)), \
  818. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \
  819. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \
  820. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \
  821. }; \
  822. struct sock_fprog prog_##name = { \
  823. .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
  824. .filter = _read_filter_##name, \
  825. }
  826. /* Make sure basic errno values are correctly passed through a filter. */
  827. TEST(ERRNO_valid)
  828. {
  829. ERRNO_FILTER(valid, E2BIG);
  830. long ret;
  831. pid_t parent = getppid();
  832. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  833. ASSERT_EQ(0, ret);
  834. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
  835. ASSERT_EQ(0, ret);
  836. EXPECT_EQ(parent, syscall(__NR_getppid));
  837. EXPECT_EQ(-1, read(-1, NULL, 0));
  838. EXPECT_EQ(E2BIG, errno);
  839. }
  840. /* Make sure an errno of zero is correctly handled by the arch code. */
  841. TEST(ERRNO_zero)
  842. {
  843. ERRNO_FILTER(zero, 0);
  844. long ret;
  845. pid_t parent = getppid();
  846. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  847. ASSERT_EQ(0, ret);
  848. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
  849. ASSERT_EQ(0, ret);
  850. EXPECT_EQ(parent, syscall(__NR_getppid));
  851. /* "errno" of 0 is ok. */
  852. EXPECT_EQ(0, read(-1, NULL, 0));
  853. }
  854. /*
  855. * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
  856. * This tests that the errno value gets capped correctly, fixed by
  857. * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
  858. */
  859. TEST(ERRNO_capped)
  860. {
  861. ERRNO_FILTER(capped, 4096);
  862. long ret;
  863. pid_t parent = getppid();
  864. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  865. ASSERT_EQ(0, ret);
  866. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
  867. ASSERT_EQ(0, ret);
  868. EXPECT_EQ(parent, syscall(__NR_getppid));
  869. EXPECT_EQ(-1, read(-1, NULL, 0));
  870. EXPECT_EQ(4095, errno);
  871. }
  872. /*
  873. * Filters are processed in reverse order: last applied is executed first.
  874. * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
  875. * SECCOMP_RET_DATA mask results will follow the most recently applied
  876. * matching filter return (and not the lowest or highest value).
  877. */
  878. TEST(ERRNO_order)
  879. {
  880. ERRNO_FILTER(first, 11);
  881. ERRNO_FILTER(second, 13);
  882. ERRNO_FILTER(third, 12);
  883. long ret;
  884. pid_t parent = getppid();
  885. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  886. ASSERT_EQ(0, ret);
  887. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
  888. ASSERT_EQ(0, ret);
  889. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
  890. ASSERT_EQ(0, ret);
  891. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
  892. ASSERT_EQ(0, ret);
  893. EXPECT_EQ(parent, syscall(__NR_getppid));
  894. EXPECT_EQ(-1, read(-1, NULL, 0));
  895. EXPECT_EQ(12, errno);
  896. }
  897. FIXTURE(TRAP) {
  898. struct sock_fprog prog;
  899. };
  900. FIXTURE_SETUP(TRAP)
  901. {
  902. struct sock_filter filter[] = {
  903. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  904. offsetof(struct seccomp_data, nr)),
  905. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
  906. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
  907. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  908. };
  909. memset(&self->prog, 0, sizeof(self->prog));
  910. self->prog.filter = malloc(sizeof(filter));
  911. ASSERT_NE(NULL, self->prog.filter);
  912. memcpy(self->prog.filter, filter, sizeof(filter));
  913. self->prog.len = (unsigned short)ARRAY_SIZE(filter);
  914. }
  915. FIXTURE_TEARDOWN(TRAP)
  916. {
  917. if (self->prog.filter)
  918. free(self->prog.filter);
  919. }
  920. TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
  921. {
  922. long ret;
  923. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  924. ASSERT_EQ(0, ret);
  925. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
  926. ASSERT_EQ(0, ret);
  927. syscall(__NR_getpid);
  928. }
  929. /* Ensure that SIGSYS overrides SIG_IGN */
  930. TEST_F_SIGNAL(TRAP, ign, SIGSYS)
  931. {
  932. long ret;
  933. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  934. ASSERT_EQ(0, ret);
  935. signal(SIGSYS, SIG_IGN);
  936. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
  937. ASSERT_EQ(0, ret);
  938. syscall(__NR_getpid);
  939. }
  940. static siginfo_t TRAP_info;
  941. static volatile int TRAP_nr;
  942. static void TRAP_action(int nr, siginfo_t *info, void *void_context)
  943. {
  944. memcpy(&TRAP_info, info, sizeof(TRAP_info));
  945. TRAP_nr = nr;
  946. }
  947. TEST_F(TRAP, handler)
  948. {
  949. int ret, test;
  950. struct sigaction act;
  951. sigset_t mask;
  952. memset(&act, 0, sizeof(act));
  953. sigemptyset(&mask);
  954. sigaddset(&mask, SIGSYS);
  955. act.sa_sigaction = &TRAP_action;
  956. act.sa_flags = SA_SIGINFO;
  957. ret = sigaction(SIGSYS, &act, NULL);
  958. ASSERT_EQ(0, ret) {
  959. TH_LOG("sigaction failed");
  960. }
  961. ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
  962. ASSERT_EQ(0, ret) {
  963. TH_LOG("sigprocmask failed");
  964. }
  965. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  966. ASSERT_EQ(0, ret);
  967. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
  968. ASSERT_EQ(0, ret);
  969. TRAP_nr = 0;
  970. memset(&TRAP_info, 0, sizeof(TRAP_info));
  971. /* Expect the registers to be rolled back. (nr = error) may vary
  972. * based on arch. */
  973. ret = syscall(__NR_getpid);
  974. /* Silence gcc warning about volatile. */
  975. test = TRAP_nr;
  976. EXPECT_EQ(SIGSYS, test);
  977. struct local_sigsys {
  978. void *_call_addr; /* calling user insn */
  979. int _syscall; /* triggering system call number */
  980. unsigned int _arch; /* AUDIT_ARCH_* of syscall */
  981. } *sigsys = (struct local_sigsys *)
  982. #ifdef si_syscall
  983. &(TRAP_info.si_call_addr);
  984. #else
  985. &TRAP_info.si_pid;
  986. #endif
  987. EXPECT_EQ(__NR_getpid, sigsys->_syscall);
  988. /* Make sure arch is non-zero. */
  989. EXPECT_NE(0, sigsys->_arch);
  990. EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
  991. }
  992. FIXTURE(precedence) {
  993. struct sock_fprog allow;
  994. struct sock_fprog log;
  995. struct sock_fprog trace;
  996. struct sock_fprog error;
  997. struct sock_fprog trap;
  998. struct sock_fprog kill;
  999. };
  1000. FIXTURE_SETUP(precedence)
  1001. {
  1002. struct sock_filter allow_insns[] = {
  1003. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  1004. };
  1005. struct sock_filter log_insns[] = {
  1006. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  1007. offsetof(struct seccomp_data, nr)),
  1008. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
  1009. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  1010. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
  1011. };
  1012. struct sock_filter trace_insns[] = {
  1013. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  1014. offsetof(struct seccomp_data, nr)),
  1015. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
  1016. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  1017. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
  1018. };
  1019. struct sock_filter error_insns[] = {
  1020. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  1021. offsetof(struct seccomp_data, nr)),
  1022. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
  1023. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  1024. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
  1025. };
  1026. struct sock_filter trap_insns[] = {
  1027. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  1028. offsetof(struct seccomp_data, nr)),
  1029. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
  1030. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  1031. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
  1032. };
  1033. struct sock_filter kill_insns[] = {
  1034. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  1035. offsetof(struct seccomp_data, nr)),
  1036. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
  1037. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  1038. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
  1039. };
  1040. memset(self, 0, sizeof(*self));
  1041. #define FILTER_ALLOC(_x) \
  1042. self->_x.filter = malloc(sizeof(_x##_insns)); \
  1043. ASSERT_NE(NULL, self->_x.filter); \
  1044. memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
  1045. self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
  1046. FILTER_ALLOC(allow);
  1047. FILTER_ALLOC(log);
  1048. FILTER_ALLOC(trace);
  1049. FILTER_ALLOC(error);
  1050. FILTER_ALLOC(trap);
  1051. FILTER_ALLOC(kill);
  1052. }
  1053. FIXTURE_TEARDOWN(precedence)
  1054. {
  1055. #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
  1056. FILTER_FREE(allow);
  1057. FILTER_FREE(log);
  1058. FILTER_FREE(trace);
  1059. FILTER_FREE(error);
  1060. FILTER_FREE(trap);
  1061. FILTER_FREE(kill);
  1062. }
  1063. TEST_F(precedence, allow_ok)
  1064. {
  1065. pid_t parent, res = 0;
  1066. long ret;
  1067. parent = getppid();
  1068. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1069. ASSERT_EQ(0, ret);
  1070. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
  1071. ASSERT_EQ(0, ret);
  1072. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
  1073. ASSERT_EQ(0, ret);
  1074. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
  1075. ASSERT_EQ(0, ret);
  1076. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
  1077. ASSERT_EQ(0, ret);
  1078. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
  1079. ASSERT_EQ(0, ret);
  1080. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
  1081. ASSERT_EQ(0, ret);
  1082. /* Should work just fine. */
  1083. res = syscall(__NR_getppid);
  1084. EXPECT_EQ(parent, res);
  1085. }
  1086. TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
  1087. {
  1088. pid_t parent, res = 0;
  1089. long ret;
  1090. parent = getppid();
  1091. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1092. ASSERT_EQ(0, ret);
  1093. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
  1094. ASSERT_EQ(0, ret);
  1095. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
  1096. ASSERT_EQ(0, ret);
  1097. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
  1098. ASSERT_EQ(0, ret);
  1099. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
  1100. ASSERT_EQ(0, ret);
  1101. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
  1102. ASSERT_EQ(0, ret);
  1103. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
  1104. ASSERT_EQ(0, ret);
  1105. /* Should work just fine. */
  1106. res = syscall(__NR_getppid);
  1107. EXPECT_EQ(parent, res);
  1108. /* getpid() should never return. */
  1109. res = syscall(__NR_getpid);
  1110. EXPECT_EQ(0, res);
  1111. }
  1112. TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
  1113. {
  1114. pid_t parent;
  1115. long ret;
  1116. parent = getppid();
  1117. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1118. ASSERT_EQ(0, ret);
  1119. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
  1120. ASSERT_EQ(0, ret);
  1121. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
  1122. ASSERT_EQ(0, ret);
  1123. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
  1124. ASSERT_EQ(0, ret);
  1125. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
  1126. ASSERT_EQ(0, ret);
  1127. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
  1128. ASSERT_EQ(0, ret);
  1129. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
  1130. ASSERT_EQ(0, ret);
  1131. /* Should work just fine. */
  1132. EXPECT_EQ(parent, syscall(__NR_getppid));
  1133. /* getpid() should never return. */
  1134. EXPECT_EQ(0, syscall(__NR_getpid));
  1135. }
  1136. TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
  1137. {
  1138. pid_t parent;
  1139. long ret;
  1140. parent = getppid();
  1141. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1142. ASSERT_EQ(0, ret);
  1143. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
  1144. ASSERT_EQ(0, ret);
  1145. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
  1146. ASSERT_EQ(0, ret);
  1147. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
  1148. ASSERT_EQ(0, ret);
  1149. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
  1150. ASSERT_EQ(0, ret);
  1151. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
  1152. ASSERT_EQ(0, ret);
  1153. /* Should work just fine. */
  1154. EXPECT_EQ(parent, syscall(__NR_getppid));
  1155. /* getpid() should never return. */
  1156. EXPECT_EQ(0, syscall(__NR_getpid));
  1157. }
  1158. TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
  1159. {
  1160. pid_t parent;
  1161. long ret;
  1162. parent = getppid();
  1163. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1164. ASSERT_EQ(0, ret);
  1165. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
  1166. ASSERT_EQ(0, ret);
  1167. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
  1168. ASSERT_EQ(0, ret);
  1169. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
  1170. ASSERT_EQ(0, ret);
  1171. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
  1172. ASSERT_EQ(0, ret);
  1173. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
  1174. ASSERT_EQ(0, ret);
  1175. /* Should work just fine. */
  1176. EXPECT_EQ(parent, syscall(__NR_getppid));
  1177. /* getpid() should never return. */
  1178. EXPECT_EQ(0, syscall(__NR_getpid));
  1179. }
  1180. TEST_F(precedence, errno_is_third)
  1181. {
  1182. pid_t parent;
  1183. long ret;
  1184. parent = getppid();
  1185. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1186. ASSERT_EQ(0, ret);
  1187. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
  1188. ASSERT_EQ(0, ret);
  1189. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
  1190. ASSERT_EQ(0, ret);
  1191. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
  1192. ASSERT_EQ(0, ret);
  1193. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
  1194. ASSERT_EQ(0, ret);
  1195. /* Should work just fine. */
  1196. EXPECT_EQ(parent, syscall(__NR_getppid));
  1197. EXPECT_EQ(0, syscall(__NR_getpid));
  1198. }
  1199. TEST_F(precedence, errno_is_third_in_any_order)
  1200. {
  1201. pid_t parent;
  1202. long ret;
  1203. parent = getppid();
  1204. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1205. ASSERT_EQ(0, ret);
  1206. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
  1207. ASSERT_EQ(0, ret);
  1208. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
  1209. ASSERT_EQ(0, ret);
  1210. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
  1211. ASSERT_EQ(0, ret);
  1212. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
  1213. ASSERT_EQ(0, ret);
  1214. /* Should work just fine. */
  1215. EXPECT_EQ(parent, syscall(__NR_getppid));
  1216. EXPECT_EQ(0, syscall(__NR_getpid));
  1217. }
  1218. TEST_F(precedence, trace_is_fourth)
  1219. {
  1220. pid_t parent;
  1221. long ret;
  1222. parent = getppid();
  1223. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1224. ASSERT_EQ(0, ret);
  1225. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
  1226. ASSERT_EQ(0, ret);
  1227. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
  1228. ASSERT_EQ(0, ret);
  1229. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
  1230. ASSERT_EQ(0, ret);
  1231. /* Should work just fine. */
  1232. EXPECT_EQ(parent, syscall(__NR_getppid));
  1233. /* No ptracer */
  1234. EXPECT_EQ(-1, syscall(__NR_getpid));
  1235. }
  1236. TEST_F(precedence, trace_is_fourth_in_any_order)
  1237. {
  1238. pid_t parent;
  1239. long ret;
  1240. parent = getppid();
  1241. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1242. ASSERT_EQ(0, ret);
  1243. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
  1244. ASSERT_EQ(0, ret);
  1245. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
  1246. ASSERT_EQ(0, ret);
  1247. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
  1248. ASSERT_EQ(0, ret);
  1249. /* Should work just fine. */
  1250. EXPECT_EQ(parent, syscall(__NR_getppid));
  1251. /* No ptracer */
  1252. EXPECT_EQ(-1, syscall(__NR_getpid));
  1253. }
  1254. TEST_F(precedence, log_is_fifth)
  1255. {
  1256. pid_t mypid, parent;
  1257. long ret;
  1258. mypid = getpid();
  1259. parent = getppid();
  1260. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1261. ASSERT_EQ(0, ret);
  1262. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
  1263. ASSERT_EQ(0, ret);
  1264. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
  1265. ASSERT_EQ(0, ret);
  1266. /* Should work just fine. */
  1267. EXPECT_EQ(parent, syscall(__NR_getppid));
  1268. /* Should also work just fine */
  1269. EXPECT_EQ(mypid, syscall(__NR_getpid));
  1270. }
  1271. TEST_F(precedence, log_is_fifth_in_any_order)
  1272. {
  1273. pid_t mypid, parent;
  1274. long ret;
  1275. mypid = getpid();
  1276. parent = getppid();
  1277. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1278. ASSERT_EQ(0, ret);
  1279. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
  1280. ASSERT_EQ(0, ret);
  1281. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
  1282. ASSERT_EQ(0, ret);
  1283. /* Should work just fine. */
  1284. EXPECT_EQ(parent, syscall(__NR_getppid));
  1285. /* Should also work just fine */
  1286. EXPECT_EQ(mypid, syscall(__NR_getpid));
  1287. }
  1288. #ifndef PTRACE_O_TRACESECCOMP
  1289. #define PTRACE_O_TRACESECCOMP 0x00000080
  1290. #endif
  1291. /* Catch the Ubuntu 12.04 value error. */
  1292. #if PTRACE_EVENT_SECCOMP != 7
  1293. #undef PTRACE_EVENT_SECCOMP
  1294. #endif
  1295. #ifndef PTRACE_EVENT_SECCOMP
  1296. #define PTRACE_EVENT_SECCOMP 7
  1297. #endif
  1298. #define PTRACE_EVENT_MASK(status) ((status) >> 16)
  1299. bool tracer_running;
  1300. void tracer_stop(int sig)
  1301. {
  1302. tracer_running = false;
  1303. }
  1304. typedef void tracer_func_t(struct __test_metadata *_metadata,
  1305. pid_t tracee, int status, void *args);
  1306. void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
  1307. tracer_func_t tracer_func, void *args, bool ptrace_syscall)
  1308. {
  1309. int ret = -1;
  1310. struct sigaction action = {
  1311. .sa_handler = tracer_stop,
  1312. };
  1313. /* Allow external shutdown. */
  1314. tracer_running = true;
  1315. ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
  1316. errno = 0;
  1317. while (ret == -1 && errno != EINVAL)
  1318. ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
  1319. ASSERT_EQ(0, ret) {
  1320. kill(tracee, SIGKILL);
  1321. }
  1322. /* Wait for attach stop */
  1323. wait(NULL);
  1324. ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
  1325. PTRACE_O_TRACESYSGOOD :
  1326. PTRACE_O_TRACESECCOMP);
  1327. ASSERT_EQ(0, ret) {
  1328. TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
  1329. kill(tracee, SIGKILL);
  1330. }
  1331. ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
  1332. tracee, NULL, 0);
  1333. ASSERT_EQ(0, ret);
  1334. /* Unblock the tracee */
  1335. ASSERT_EQ(1, write(fd, "A", 1));
  1336. ASSERT_EQ(0, close(fd));
  1337. /* Run until we're shut down. Must assert to stop execution. */
  1338. while (tracer_running) {
  1339. int status;
  1340. if (wait(&status) != tracee)
  1341. continue;
  1342. if (WIFSIGNALED(status)) {
  1343. /* Child caught a fatal signal. */
  1344. return;
  1345. }
  1346. if (WIFEXITED(status)) {
  1347. /* Child exited with code. */
  1348. return;
  1349. }
  1350. /* Check if we got an expected event. */
  1351. ASSERT_EQ(WIFCONTINUED(status), false);
  1352. ASSERT_EQ(WIFSTOPPED(status), true);
  1353. ASSERT_EQ(WSTOPSIG(status) & SIGTRAP, SIGTRAP) {
  1354. TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status));
  1355. }
  1356. tracer_func(_metadata, tracee, status, args);
  1357. ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
  1358. tracee, NULL, 0);
  1359. ASSERT_EQ(0, ret);
  1360. }
  1361. /* Directly report the status of our test harness results. */
  1362. syscall(__NR_exit, _metadata->exit_code);
  1363. }
  1364. /* Common tracer setup/teardown functions. */
  1365. void cont_handler(int num)
  1366. { }
  1367. pid_t setup_trace_fixture(struct __test_metadata *_metadata,
  1368. tracer_func_t func, void *args, bool ptrace_syscall)
  1369. {
  1370. char sync;
  1371. int pipefd[2];
  1372. pid_t tracer_pid;
  1373. pid_t tracee = getpid();
  1374. /* Setup a pipe for clean synchronization. */
  1375. ASSERT_EQ(0, pipe(pipefd));
  1376. /* Fork a child which we'll promote to tracer */
  1377. tracer_pid = fork();
  1378. ASSERT_LE(0, tracer_pid);
  1379. signal(SIGALRM, cont_handler);
  1380. if (tracer_pid == 0) {
  1381. close(pipefd[0]);
  1382. start_tracer(_metadata, pipefd[1], tracee, func, args,
  1383. ptrace_syscall);
  1384. syscall(__NR_exit, 0);
  1385. }
  1386. close(pipefd[1]);
  1387. prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
  1388. read(pipefd[0], &sync, 1);
  1389. close(pipefd[0]);
  1390. return tracer_pid;
  1391. }
  1392. void teardown_trace_fixture(struct __test_metadata *_metadata,
  1393. pid_t tracer)
  1394. {
  1395. if (tracer) {
  1396. int status;
  1397. ASSERT_EQ(0, kill(tracer, SIGUSR1));
  1398. ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
  1399. }
  1400. }
  1401. /* "poke" tracer arguments and function. */
  1402. struct tracer_args_poke_t {
  1403. unsigned long poke_addr;
  1404. };
  1405. void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
  1406. void *args)
  1407. {
  1408. int ret;
  1409. unsigned long msg;
  1410. struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
  1411. ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
  1412. EXPECT_EQ(0, ret);
  1413. /* If this fails, don't try to recover. */
  1414. ASSERT_EQ(0x1001, msg) {
  1415. kill(tracee, SIGKILL);
  1416. }
  1417. /*
  1418. * Poke in the message.
  1419. * Registers are not touched to try to keep this relatively arch
  1420. * agnostic.
  1421. */
  1422. ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
  1423. EXPECT_EQ(0, ret);
  1424. }
  1425. FIXTURE(TRACE_poke) {
  1426. struct sock_fprog prog;
  1427. pid_t tracer;
  1428. long poked;
  1429. struct tracer_args_poke_t tracer_args;
  1430. };
  1431. FIXTURE_SETUP(TRACE_poke)
  1432. {
  1433. struct sock_filter filter[] = {
  1434. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  1435. offsetof(struct seccomp_data, nr)),
  1436. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
  1437. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
  1438. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  1439. };
  1440. self->poked = 0;
  1441. memset(&self->prog, 0, sizeof(self->prog));
  1442. self->prog.filter = malloc(sizeof(filter));
  1443. ASSERT_NE(NULL, self->prog.filter);
  1444. memcpy(self->prog.filter, filter, sizeof(filter));
  1445. self->prog.len = (unsigned short)ARRAY_SIZE(filter);
  1446. /* Set up tracer args. */
  1447. self->tracer_args.poke_addr = (unsigned long)&self->poked;
  1448. /* Launch tracer. */
  1449. self->tracer = setup_trace_fixture(_metadata, tracer_poke,
  1450. &self->tracer_args, false);
  1451. }
  1452. FIXTURE_TEARDOWN(TRACE_poke)
  1453. {
  1454. teardown_trace_fixture(_metadata, self->tracer);
  1455. if (self->prog.filter)
  1456. free(self->prog.filter);
  1457. }
  1458. TEST_F(TRACE_poke, read_has_side_effects)
  1459. {
  1460. ssize_t ret;
  1461. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1462. ASSERT_EQ(0, ret);
  1463. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
  1464. ASSERT_EQ(0, ret);
  1465. EXPECT_EQ(0, self->poked);
  1466. ret = read(-1, NULL, 0);
  1467. EXPECT_EQ(-1, ret);
  1468. EXPECT_EQ(0x1001, self->poked);
  1469. }
  1470. TEST_F(TRACE_poke, getpid_runs_normally)
  1471. {
  1472. long ret;
  1473. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1474. ASSERT_EQ(0, ret);
  1475. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
  1476. ASSERT_EQ(0, ret);
  1477. EXPECT_EQ(0, self->poked);
  1478. EXPECT_NE(0, syscall(__NR_getpid));
  1479. EXPECT_EQ(0, self->poked);
  1480. }
  1481. #if defined(__x86_64__)
  1482. # define ARCH_REGS struct user_regs_struct
  1483. # define SYSCALL_NUM(_regs) (_regs).orig_rax
  1484. # define SYSCALL_RET(_regs) (_regs).rax
  1485. #elif defined(__i386__)
  1486. # define ARCH_REGS struct user_regs_struct
  1487. # define SYSCALL_NUM(_regs) (_regs).orig_eax
  1488. # define SYSCALL_RET(_regs) (_regs).eax
  1489. #elif defined(__arm__)
  1490. # define ARCH_REGS struct pt_regs
  1491. # define SYSCALL_NUM(_regs) (_regs).ARM_r7
  1492. # ifndef PTRACE_SET_SYSCALL
  1493. # define PTRACE_SET_SYSCALL 23
  1494. # endif
  1495. # define SYSCALL_NUM_SET(_regs, _nr) \
  1496. EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr))
  1497. # define SYSCALL_RET(_regs) (_regs).ARM_r0
  1498. #elif defined(__aarch64__)
  1499. # define ARCH_REGS struct user_pt_regs
  1500. # define SYSCALL_NUM(_regs) (_regs).regs[8]
  1501. # ifndef NT_ARM_SYSTEM_CALL
  1502. # define NT_ARM_SYSTEM_CALL 0x404
  1503. # endif
  1504. # define SYSCALL_NUM_SET(_regs, _nr) \
  1505. do { \
  1506. struct iovec __v; \
  1507. typeof(_nr) __nr = (_nr); \
  1508. __v.iov_base = &__nr; \
  1509. __v.iov_len = sizeof(__nr); \
  1510. EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \
  1511. NT_ARM_SYSTEM_CALL, &__v)); \
  1512. } while (0)
  1513. # define SYSCALL_RET(_regs) (_regs).regs[0]
  1514. #elif defined(__loongarch__)
  1515. # define ARCH_REGS struct user_pt_regs
  1516. # define SYSCALL_NUM(_regs) (_regs).regs[11]
  1517. # define SYSCALL_RET(_regs) (_regs).regs[4]
  1518. #elif defined(__riscv) && __riscv_xlen == 64
  1519. # define ARCH_REGS struct user_regs_struct
  1520. # define SYSCALL_NUM(_regs) (_regs).a7
  1521. # define SYSCALL_RET(_regs) (_regs).a0
  1522. #elif defined(__csky__)
  1523. # define ARCH_REGS struct pt_regs
  1524. # if defined(__CSKYABIV2__)
  1525. # define SYSCALL_NUM(_regs) (_regs).regs[3]
  1526. # else
  1527. # define SYSCALL_NUM(_regs) (_regs).regs[9]
  1528. # endif
  1529. # define SYSCALL_RET(_regs) (_regs).a0
  1530. #elif defined(__hppa__)
  1531. # define ARCH_REGS struct user_regs_struct
  1532. # define SYSCALL_NUM(_regs) (_regs).gr[20]
  1533. # define SYSCALL_RET(_regs) (_regs).gr[28]
  1534. #elif defined(__powerpc__)
  1535. # define ARCH_REGS struct pt_regs
  1536. # define SYSCALL_NUM(_regs) (_regs).gpr[0]
  1537. # define SYSCALL_RET(_regs) (_regs).gpr[3]
  1538. # define SYSCALL_RET_SET(_regs, _val) \
  1539. do { \
  1540. typeof(_val) _result = (_val); \
  1541. if ((_regs.trap & 0xfff0) == 0x3000) { \
  1542. /* \
  1543. * scv 0 system call uses -ve result \
  1544. * for error, so no need to adjust. \
  1545. */ \
  1546. SYSCALL_RET(_regs) = _result; \
  1547. } else { \
  1548. /* \
  1549. * A syscall error is signaled by the \
  1550. * CR0 SO bit and the code is stored as \
  1551. * a positive value. \
  1552. */ \
  1553. if (_result < 0) { \
  1554. SYSCALL_RET(_regs) = -_result; \
  1555. (_regs).ccr |= 0x10000000; \
  1556. } else { \
  1557. SYSCALL_RET(_regs) = _result; \
  1558. (_regs).ccr &= ~0x10000000; \
  1559. } \
  1560. } \
  1561. } while (0)
  1562. # define SYSCALL_RET_SET_ON_PTRACE_EXIT
  1563. #elif defined(__s390__)
  1564. # define ARCH_REGS s390_regs
  1565. # define SYSCALL_NUM(_regs) (_regs).gprs[2]
  1566. # define SYSCALL_RET_SET(_regs, _val) \
  1567. TH_LOG("Can't modify syscall return on this architecture")
  1568. #elif defined(__mips__)
  1569. # include <asm/unistd_nr_n32.h>
  1570. # include <asm/unistd_nr_n64.h>
  1571. # include <asm/unistd_nr_o32.h>
  1572. # define ARCH_REGS struct pt_regs
  1573. # define SYSCALL_NUM(_regs) \
  1574. ({ \
  1575. typeof((_regs).regs[2]) _nr; \
  1576. if ((_regs).regs[2] == __NR_O32_Linux) \
  1577. _nr = (_regs).regs[4]; \
  1578. else \
  1579. _nr = (_regs).regs[2]; \
  1580. _nr; \
  1581. })
  1582. # define SYSCALL_NUM_SET(_regs, _nr) \
  1583. do { \
  1584. if ((_regs).regs[2] == __NR_O32_Linux) \
  1585. (_regs).regs[4] = _nr; \
  1586. else \
  1587. (_regs).regs[2] = _nr; \
  1588. } while (0)
  1589. # define SYSCALL_RET_SET(_regs, _val) \
  1590. TH_LOG("Can't modify syscall return on this architecture")
  1591. #elif defined(__xtensa__)
  1592. # define ARCH_REGS struct user_pt_regs
  1593. # define SYSCALL_NUM(_regs) (_regs).syscall
  1594. /*
  1595. * On xtensa syscall return value is in the register
  1596. * a2 of the current window which is not fixed.
  1597. */
  1598. #define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2]
  1599. #elif defined(__sh__)
  1600. # define ARCH_REGS struct pt_regs
  1601. # define SYSCALL_NUM(_regs) (_regs).regs[3]
  1602. # define SYSCALL_RET(_regs) (_regs).regs[0]
  1603. #elif defined(__mc68000__)
  1604. # define ARCH_REGS struct user_regs_struct
  1605. # define SYSCALL_NUM(_regs) (_regs).orig_d0
  1606. # define SYSCALL_RET(_regs) (_regs).d0
  1607. #else
  1608. # error "Do not know how to find your architecture's registers and syscalls"
  1609. #endif
  1610. /*
  1611. * Most architectures can change the syscall by just updating the
  1612. * associated register. This is the default if not defined above.
  1613. */
  1614. #ifndef SYSCALL_NUM_SET
  1615. # define SYSCALL_NUM_SET(_regs, _nr) \
  1616. do { \
  1617. SYSCALL_NUM(_regs) = (_nr); \
  1618. } while (0)
  1619. #endif
  1620. /*
  1621. * Most architectures can change the syscall return value by just
  1622. * writing to the SYSCALL_RET register. This is the default if not
  1623. * defined above. If an architecture cannot set the return value
  1624. * (for example when the syscall and return value register is
  1625. * shared), report it with TH_LOG() in an arch-specific definition
  1626. * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
  1627. */
  1628. #if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET)
  1629. # error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch"
  1630. #endif
  1631. #ifndef SYSCALL_RET_SET
  1632. # define SYSCALL_RET_SET(_regs, _val) \
  1633. do { \
  1634. SYSCALL_RET(_regs) = (_val); \
  1635. } while (0)
  1636. #endif
  1637. /* When the syscall return can't be changed, stub out the tests for it. */
  1638. #ifndef SYSCALL_RET
  1639. # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action)
  1640. #else
  1641. # define EXPECT_SYSCALL_RETURN(val, action) \
  1642. do { \
  1643. errno = 0; \
  1644. if (val < 0) { \
  1645. EXPECT_EQ(-1, action); \
  1646. EXPECT_EQ(-(val), errno); \
  1647. } else { \
  1648. EXPECT_EQ(val, action); \
  1649. } \
  1650. } while (0)
  1651. #endif
  1652. /*
  1653. * Some architectures (e.g. powerpc) can only set syscall
  1654. * return values on syscall exit during ptrace.
  1655. */
  1656. const bool ptrace_entry_set_syscall_nr = true;
  1657. const bool ptrace_entry_set_syscall_ret =
  1658. #ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT
  1659. true;
  1660. #else
  1661. false;
  1662. #endif
  1663. /*
  1664. * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
  1665. * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
  1666. */
  1667. #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__mc68000__)
  1668. # define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
  1669. # define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
  1670. #else
  1671. # define ARCH_GETREGS(_regs) ({ \
  1672. struct iovec __v; \
  1673. __v.iov_base = &(_regs); \
  1674. __v.iov_len = sizeof(_regs); \
  1675. ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \
  1676. })
  1677. # define ARCH_SETREGS(_regs) ({ \
  1678. struct iovec __v; \
  1679. __v.iov_base = &(_regs); \
  1680. __v.iov_len = sizeof(_regs); \
  1681. ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \
  1682. })
  1683. #endif
  1684. /* Architecture-specific syscall fetching routine. */
  1685. int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
  1686. {
  1687. ARCH_REGS regs;
  1688. EXPECT_EQ(0, ARCH_GETREGS(regs)) {
  1689. return -1;
  1690. }
  1691. return SYSCALL_NUM(regs);
  1692. }
  1693. /* Architecture-specific syscall changing routine. */
  1694. void __change_syscall(struct __test_metadata *_metadata,
  1695. pid_t tracee, long *syscall, long *ret)
  1696. {
  1697. ARCH_REGS orig, regs;
  1698. /* Do not get/set registers if we have nothing to do. */
  1699. if (!syscall && !ret)
  1700. return;
  1701. EXPECT_EQ(0, ARCH_GETREGS(regs)) {
  1702. return;
  1703. }
  1704. orig = regs;
  1705. if (syscall)
  1706. SYSCALL_NUM_SET(regs, *syscall);
  1707. if (ret)
  1708. SYSCALL_RET_SET(regs, *ret);
  1709. /* Flush any register changes made. */
  1710. if (memcmp(&orig, &regs, sizeof(orig)) != 0)
  1711. EXPECT_EQ(0, ARCH_SETREGS(regs));
  1712. }
  1713. /* Change only syscall number. */
  1714. void change_syscall_nr(struct __test_metadata *_metadata,
  1715. pid_t tracee, long syscall)
  1716. {
  1717. __change_syscall(_metadata, tracee, &syscall, NULL);
  1718. }
  1719. /* Change syscall return value (and set syscall number to -1). */
  1720. void change_syscall_ret(struct __test_metadata *_metadata,
  1721. pid_t tracee, long ret)
  1722. {
  1723. long syscall = -1;
  1724. __change_syscall(_metadata, tracee, &syscall, &ret);
  1725. }
  1726. void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
  1727. int status, void *args)
  1728. {
  1729. int ret;
  1730. unsigned long msg;
  1731. EXPECT_EQ(PTRACE_EVENT_MASK(status), PTRACE_EVENT_SECCOMP) {
  1732. TH_LOG("Unexpected ptrace event: %d", PTRACE_EVENT_MASK(status));
  1733. return;
  1734. }
  1735. /* Make sure we got the right message. */
  1736. ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
  1737. EXPECT_EQ(0, ret);
  1738. /* Validate and take action on expected syscalls. */
  1739. switch (msg) {
  1740. case 0x1002:
  1741. /* change getpid to getppid. */
  1742. EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
  1743. change_syscall_nr(_metadata, tracee, __NR_getppid);
  1744. break;
  1745. case 0x1003:
  1746. /* skip gettid with valid return code. */
  1747. EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
  1748. change_syscall_ret(_metadata, tracee, 45000);
  1749. break;
  1750. case 0x1004:
  1751. /* skip openat with error. */
  1752. EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
  1753. change_syscall_ret(_metadata, tracee, -ESRCH);
  1754. break;
  1755. case 0x1005:
  1756. /* do nothing (allow getppid) */
  1757. EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
  1758. break;
  1759. default:
  1760. EXPECT_EQ(0, msg) {
  1761. TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
  1762. kill(tracee, SIGKILL);
  1763. }
  1764. }
  1765. }
  1766. FIXTURE(TRACE_syscall) {
  1767. struct sock_fprog prog;
  1768. pid_t tracer, mytid, mypid, parent;
  1769. long syscall_nr;
  1770. };
  1771. void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
  1772. int status, void *args)
  1773. {
  1774. int ret;
  1775. unsigned long msg;
  1776. static bool entry;
  1777. long syscall_nr_val, syscall_ret_val;
  1778. long *syscall_nr = NULL, *syscall_ret = NULL;
  1779. FIXTURE_DATA(TRACE_syscall) *self = args;
  1780. EXPECT_EQ(WSTOPSIG(status) & 0x80, 0x80) {
  1781. TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status));
  1782. return;
  1783. }
  1784. /*
  1785. * The traditional way to tell PTRACE_SYSCALL entry/exit
  1786. * is by counting.
  1787. */
  1788. entry = !entry;
  1789. /* Make sure we got an appropriate message. */
  1790. ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
  1791. EXPECT_EQ(0, ret);
  1792. EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
  1793. : PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
  1794. /*
  1795. * Some architectures only support setting return values during
  1796. * syscall exit under ptrace, and on exit the syscall number may
  1797. * no longer be available. Therefore, save the initial sycall
  1798. * number here, so it can be examined during both entry and exit
  1799. * phases.
  1800. */
  1801. if (entry)
  1802. self->syscall_nr = get_syscall(_metadata, tracee);
  1803. /*
  1804. * Depending on the architecture's syscall setting abilities, we
  1805. * pick which things to set during this phase (entry or exit).
  1806. */
  1807. if (entry == ptrace_entry_set_syscall_nr)
  1808. syscall_nr = &syscall_nr_val;
  1809. if (entry == ptrace_entry_set_syscall_ret)
  1810. syscall_ret = &syscall_ret_val;
  1811. /* Now handle the actual rewriting cases. */
  1812. switch (self->syscall_nr) {
  1813. case __NR_getpid:
  1814. syscall_nr_val = __NR_getppid;
  1815. /* Never change syscall return for this case. */
  1816. syscall_ret = NULL;
  1817. break;
  1818. case __NR_gettid:
  1819. syscall_nr_val = -1;
  1820. syscall_ret_val = 45000;
  1821. break;
  1822. case __NR_openat:
  1823. syscall_nr_val = -1;
  1824. syscall_ret_val = -ESRCH;
  1825. break;
  1826. default:
  1827. /* Unhandled, do nothing. */
  1828. return;
  1829. }
  1830. __change_syscall(_metadata, tracee, syscall_nr, syscall_ret);
  1831. }
  1832. FIXTURE_VARIANT(TRACE_syscall) {
  1833. /*
  1834. * All of the SECCOMP_RET_TRACE behaviors can be tested with either
  1835. * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL.
  1836. * This indicates if we should use SECCOMP_RET_TRACE (false), or
  1837. * ptrace (true).
  1838. */
  1839. bool use_ptrace;
  1840. };
  1841. FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) {
  1842. .use_ptrace = true,
  1843. };
  1844. FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) {
  1845. .use_ptrace = false,
  1846. };
  1847. FIXTURE_SETUP(TRACE_syscall)
  1848. {
  1849. struct sock_filter filter[] = {
  1850. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  1851. offsetof(struct seccomp_data, nr)),
  1852. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
  1853. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
  1854. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
  1855. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
  1856. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
  1857. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
  1858. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
  1859. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
  1860. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  1861. };
  1862. struct sock_fprog prog = {
  1863. .len = (unsigned short)ARRAY_SIZE(filter),
  1864. .filter = filter,
  1865. };
  1866. long ret;
  1867. /* Prepare some testable syscall results. */
  1868. self->mytid = syscall(__NR_gettid);
  1869. ASSERT_GT(self->mytid, 0);
  1870. ASSERT_NE(self->mytid, 1) {
  1871. TH_LOG("Running this test as init is not supported. :)");
  1872. }
  1873. self->mypid = getpid();
  1874. ASSERT_GT(self->mypid, 0);
  1875. ASSERT_EQ(self->mytid, self->mypid);
  1876. self->parent = getppid();
  1877. ASSERT_GT(self->parent, 0);
  1878. ASSERT_NE(self->parent, self->mypid);
  1879. /* Launch tracer. */
  1880. self->tracer = setup_trace_fixture(_metadata,
  1881. variant->use_ptrace ? tracer_ptrace
  1882. : tracer_seccomp,
  1883. self, variant->use_ptrace);
  1884. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  1885. ASSERT_EQ(0, ret);
  1886. /* Do not install seccomp rewrite filters, as we'll use ptrace instead. */
  1887. if (variant->use_ptrace)
  1888. return;
  1889. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
  1890. ASSERT_EQ(0, ret);
  1891. }
  1892. FIXTURE_TEARDOWN(TRACE_syscall)
  1893. {
  1894. teardown_trace_fixture(_metadata, self->tracer);
  1895. }
  1896. TEST(negative_ENOSYS)
  1897. {
  1898. #if defined(__arm__)
  1899. SKIP(return, "arm32 does not support calling syscall -1");
  1900. #endif
  1901. /*
  1902. * There should be no difference between an "internal" skip
  1903. * and userspace asking for syscall "-1".
  1904. */
  1905. errno = 0;
  1906. EXPECT_EQ(-1, syscall(-1));
  1907. EXPECT_EQ(errno, ENOSYS);
  1908. /* And no difference for "still not valid but not -1". */
  1909. errno = 0;
  1910. EXPECT_EQ(-1, syscall(-101));
  1911. EXPECT_EQ(errno, ENOSYS);
  1912. }
  1913. TEST_F(TRACE_syscall, negative_ENOSYS)
  1914. {
  1915. negative_ENOSYS(_metadata);
  1916. }
  1917. TEST_F(TRACE_syscall, syscall_allowed)
  1918. {
  1919. /* getppid works as expected (no changes). */
  1920. EXPECT_EQ(self->parent, syscall(__NR_getppid));
  1921. EXPECT_NE(self->mypid, syscall(__NR_getppid));
  1922. }
  1923. TEST_F(TRACE_syscall, syscall_redirected)
  1924. {
  1925. /* getpid has been redirected to getppid as expected. */
  1926. EXPECT_EQ(self->parent, syscall(__NR_getpid));
  1927. EXPECT_NE(self->mypid, syscall(__NR_getpid));
  1928. }
  1929. TEST_F(TRACE_syscall, syscall_errno)
  1930. {
  1931. /* Tracer should skip the open syscall, resulting in ESRCH. */
  1932. EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
  1933. }
  1934. TEST_F(TRACE_syscall, syscall_faked)
  1935. {
  1936. /* Tracer skips the gettid syscall and store altered return value. */
  1937. EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
  1938. }
  1939. TEST_F_SIGNAL(TRACE_syscall, kill_immediate, SIGSYS)
  1940. {
  1941. struct sock_filter filter[] = {
  1942. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  1943. offsetof(struct seccomp_data, nr)),
  1944. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mknodat, 0, 1),
  1945. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
  1946. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  1947. };
  1948. struct sock_fprog prog = {
  1949. .len = (unsigned short)ARRAY_SIZE(filter),
  1950. .filter = filter,
  1951. };
  1952. long ret;
  1953. /* Install "kill on mknodat" filter. */
  1954. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
  1955. ASSERT_EQ(0, ret);
  1956. /* This should immediately die with SIGSYS, regardless of tracer. */
  1957. EXPECT_EQ(-1, syscall(__NR_mknodat, -1, NULL, 0, 0));
  1958. }
  1959. TEST_F(TRACE_syscall, skip_after)
  1960. {
  1961. struct sock_filter filter[] = {
  1962. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  1963. offsetof(struct seccomp_data, nr)),
  1964. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
  1965. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
  1966. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  1967. };
  1968. struct sock_fprog prog = {
  1969. .len = (unsigned short)ARRAY_SIZE(filter),
  1970. .filter = filter,
  1971. };
  1972. long ret;
  1973. /* Install additional "errno on getppid" filter. */
  1974. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
  1975. ASSERT_EQ(0, ret);
  1976. /* Tracer will redirect getpid to getppid, and we should see EPERM. */
  1977. errno = 0;
  1978. EXPECT_EQ(-1, syscall(__NR_getpid));
  1979. EXPECT_EQ(EPERM, errno);
  1980. }
  1981. TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS)
  1982. {
  1983. struct sock_filter filter[] = {
  1984. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  1985. offsetof(struct seccomp_data, nr)),
  1986. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
  1987. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
  1988. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  1989. };
  1990. struct sock_fprog prog = {
  1991. .len = (unsigned short)ARRAY_SIZE(filter),
  1992. .filter = filter,
  1993. };
  1994. long ret;
  1995. /* Install additional "death on getppid" filter. */
  1996. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
  1997. ASSERT_EQ(0, ret);
  1998. /* Tracer will redirect getpid to getppid, and we should die. */
  1999. EXPECT_NE(self->mypid, syscall(__NR_getpid));
  2000. }
  2001. TEST(seccomp_syscall)
  2002. {
  2003. struct sock_filter filter[] = {
  2004. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  2005. };
  2006. struct sock_fprog prog = {
  2007. .len = (unsigned short)ARRAY_SIZE(filter),
  2008. .filter = filter,
  2009. };
  2010. long ret;
  2011. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  2012. ASSERT_EQ(0, ret) {
  2013. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  2014. }
  2015. /* Reject insane operation. */
  2016. ret = seccomp(-1, 0, &prog);
  2017. ASSERT_NE(ENOSYS, errno) {
  2018. TH_LOG("Kernel does not support seccomp syscall!");
  2019. }
  2020. EXPECT_EQ(EINVAL, errno) {
  2021. TH_LOG("Did not reject crazy op value!");
  2022. }
  2023. /* Reject strict with flags or pointer. */
  2024. ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
  2025. EXPECT_EQ(EINVAL, errno) {
  2026. TH_LOG("Did not reject mode strict with flags!");
  2027. }
  2028. ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
  2029. EXPECT_EQ(EINVAL, errno) {
  2030. TH_LOG("Did not reject mode strict with uargs!");
  2031. }
  2032. /* Reject insane args for filter. */
  2033. ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
  2034. EXPECT_EQ(EINVAL, errno) {
  2035. TH_LOG("Did not reject crazy filter flags!");
  2036. }
  2037. ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
  2038. EXPECT_EQ(EFAULT, errno) {
  2039. TH_LOG("Did not reject NULL filter!");
  2040. }
  2041. ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
  2042. EXPECT_EQ(0, errno) {
  2043. TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
  2044. strerror(errno));
  2045. }
  2046. }
  2047. TEST(seccomp_syscall_mode_lock)
  2048. {
  2049. struct sock_filter filter[] = {
  2050. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  2051. };
  2052. struct sock_fprog prog = {
  2053. .len = (unsigned short)ARRAY_SIZE(filter),
  2054. .filter = filter,
  2055. };
  2056. long ret;
  2057. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
  2058. ASSERT_EQ(0, ret) {
  2059. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  2060. }
  2061. ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
  2062. ASSERT_NE(ENOSYS, errno) {
  2063. TH_LOG("Kernel does not support seccomp syscall!");
  2064. }
  2065. EXPECT_EQ(0, ret) {
  2066. TH_LOG("Could not install filter!");
  2067. }
  2068. /* Make sure neither entry point will switch to strict. */
  2069. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
  2070. EXPECT_EQ(EINVAL, errno) {
  2071. TH_LOG("Switched to mode strict!");
  2072. }
  2073. ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
  2074. EXPECT_EQ(EINVAL, errno) {
  2075. TH_LOG("Switched to mode strict!");
  2076. }
  2077. }
  2078. /*
  2079. * Test detection of known and unknown filter flags. Userspace needs to be able
  2080. * to check if a filter flag is supported by the current kernel and a good way
  2081. * of doing that is by attempting to enter filter mode, with the flag bit in
  2082. * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
  2083. * that the flag is valid and EINVAL indicates that the flag is invalid.
  2084. */
  2085. TEST(detect_seccomp_filter_flags)
  2086. {
  2087. unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
  2088. SECCOMP_FILTER_FLAG_LOG,
  2089. SECCOMP_FILTER_FLAG_SPEC_ALLOW,
  2090. SECCOMP_FILTER_FLAG_NEW_LISTENER,
  2091. SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
  2092. unsigned int exclusive[] = {
  2093. SECCOMP_FILTER_FLAG_TSYNC,
  2094. SECCOMP_FILTER_FLAG_NEW_LISTENER };
  2095. unsigned int flag, all_flags, exclusive_mask;
  2096. int i;
  2097. long ret;
  2098. /* Test detection of individual known-good filter flags */
  2099. for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
  2100. int bits = 0;
  2101. flag = flags[i];
  2102. /* Make sure the flag is a single bit! */
  2103. while (flag) {
  2104. if (flag & 0x1)
  2105. bits ++;
  2106. flag >>= 1;
  2107. }
  2108. ASSERT_EQ(1, bits);
  2109. flag = flags[i];
  2110. ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
  2111. ASSERT_NE(ENOSYS, errno) {
  2112. TH_LOG("Kernel does not support seccomp syscall!");
  2113. }
  2114. EXPECT_EQ(-1, ret);
  2115. EXPECT_EQ(EFAULT, errno) {
  2116. TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
  2117. flag);
  2118. }
  2119. all_flags |= flag;
  2120. }
  2121. /*
  2122. * Test detection of all known-good filter flags combined. But
  2123. * for the exclusive flags we need to mask them out and try them
  2124. * individually for the "all flags" testing.
  2125. */
  2126. exclusive_mask = 0;
  2127. for (i = 0; i < ARRAY_SIZE(exclusive); i++)
  2128. exclusive_mask |= exclusive[i];
  2129. for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
  2130. flag = all_flags & ~exclusive_mask;
  2131. flag |= exclusive[i];
  2132. ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
  2133. EXPECT_EQ(-1, ret);
  2134. EXPECT_EQ(EFAULT, errno) {
  2135. TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
  2136. flag);
  2137. }
  2138. }
  2139. /* Test detection of an unknown filter flags, without exclusives. */
  2140. flag = -1;
  2141. flag &= ~exclusive_mask;
  2142. ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
  2143. EXPECT_EQ(-1, ret);
  2144. EXPECT_EQ(EINVAL, errno) {
  2145. TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
  2146. flag);
  2147. }
  2148. /*
  2149. * Test detection of an unknown filter flag that may simply need to be
  2150. * added to this test
  2151. */
  2152. flag = flags[ARRAY_SIZE(flags) - 1] << 1;
  2153. ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
  2154. EXPECT_EQ(-1, ret);
  2155. EXPECT_EQ(EINVAL, errno) {
  2156. TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
  2157. flag);
  2158. }
  2159. }
  2160. TEST(TSYNC_first)
  2161. {
  2162. struct sock_filter filter[] = {
  2163. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  2164. };
  2165. struct sock_fprog prog = {
  2166. .len = (unsigned short)ARRAY_SIZE(filter),
  2167. .filter = filter,
  2168. };
  2169. long ret;
  2170. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
  2171. ASSERT_EQ(0, ret) {
  2172. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  2173. }
  2174. ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
  2175. &prog);
  2176. ASSERT_NE(ENOSYS, errno) {
  2177. TH_LOG("Kernel does not support seccomp syscall!");
  2178. }
  2179. EXPECT_EQ(0, ret) {
  2180. TH_LOG("Could not install initial filter with TSYNC!");
  2181. }
  2182. }
  2183. #define TSYNC_SIBLINGS 2
  2184. struct tsync_sibling {
  2185. pthread_t tid;
  2186. pid_t system_tid;
  2187. sem_t *started;
  2188. pthread_cond_t *cond;
  2189. pthread_mutex_t *mutex;
  2190. int diverge;
  2191. int num_waits;
  2192. struct sock_fprog *prog;
  2193. struct __test_metadata *metadata;
  2194. };
  2195. /*
  2196. * To avoid joining joined threads (which is not allowed by Bionic),
  2197. * make sure we both successfully join and clear the tid to skip a
  2198. * later join attempt during fixture teardown. Any remaining threads
  2199. * will be directly killed during teardown.
  2200. */
  2201. #define PTHREAD_JOIN(tid, status) \
  2202. do { \
  2203. int _rc = pthread_join(tid, status); \
  2204. if (_rc) { \
  2205. TH_LOG("pthread_join of tid %u failed: %d\n", \
  2206. (unsigned int)tid, _rc); \
  2207. } else { \
  2208. tid = 0; \
  2209. } \
  2210. } while (0)
  2211. FIXTURE(TSYNC) {
  2212. struct sock_fprog root_prog, apply_prog;
  2213. struct tsync_sibling sibling[TSYNC_SIBLINGS];
  2214. sem_t started;
  2215. pthread_cond_t cond;
  2216. pthread_mutex_t mutex;
  2217. int sibling_count;
  2218. };
  2219. FIXTURE_SETUP(TSYNC)
  2220. {
  2221. struct sock_filter root_filter[] = {
  2222. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  2223. };
  2224. struct sock_filter apply_filter[] = {
  2225. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  2226. offsetof(struct seccomp_data, nr)),
  2227. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
  2228. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
  2229. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  2230. };
  2231. memset(&self->root_prog, 0, sizeof(self->root_prog));
  2232. memset(&self->apply_prog, 0, sizeof(self->apply_prog));
  2233. memset(&self->sibling, 0, sizeof(self->sibling));
  2234. self->root_prog.filter = malloc(sizeof(root_filter));
  2235. ASSERT_NE(NULL, self->root_prog.filter);
  2236. memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
  2237. self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
  2238. self->apply_prog.filter = malloc(sizeof(apply_filter));
  2239. ASSERT_NE(NULL, self->apply_prog.filter);
  2240. memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
  2241. self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
  2242. self->sibling_count = 0;
  2243. pthread_mutex_init(&self->mutex, NULL);
  2244. pthread_cond_init(&self->cond, NULL);
  2245. sem_init(&self->started, 0, 0);
  2246. self->sibling[0].tid = 0;
  2247. self->sibling[0].cond = &self->cond;
  2248. self->sibling[0].started = &self->started;
  2249. self->sibling[0].mutex = &self->mutex;
  2250. self->sibling[0].diverge = 0;
  2251. self->sibling[0].num_waits = 1;
  2252. self->sibling[0].prog = &self->root_prog;
  2253. self->sibling[0].metadata = _metadata;
  2254. self->sibling[1].tid = 0;
  2255. self->sibling[1].cond = &self->cond;
  2256. self->sibling[1].started = &self->started;
  2257. self->sibling[1].mutex = &self->mutex;
  2258. self->sibling[1].diverge = 0;
  2259. self->sibling[1].prog = &self->root_prog;
  2260. self->sibling[1].num_waits = 1;
  2261. self->sibling[1].metadata = _metadata;
  2262. }
  2263. FIXTURE_TEARDOWN(TSYNC)
  2264. {
  2265. int sib = 0;
  2266. if (self->root_prog.filter)
  2267. free(self->root_prog.filter);
  2268. if (self->apply_prog.filter)
  2269. free(self->apply_prog.filter);
  2270. for ( ; sib < self->sibling_count; ++sib) {
  2271. struct tsync_sibling *s = &self->sibling[sib];
  2272. if (!s->tid)
  2273. continue;
  2274. /*
  2275. * If a thread is still running, it may be stuck, so hit
  2276. * it over the head really hard.
  2277. */
  2278. pthread_kill(s->tid, 9);
  2279. }
  2280. pthread_mutex_destroy(&self->mutex);
  2281. pthread_cond_destroy(&self->cond);
  2282. sem_destroy(&self->started);
  2283. }
  2284. void *tsync_sibling(void *data)
  2285. {
  2286. long ret = 0;
  2287. struct tsync_sibling *me = data;
  2288. me->system_tid = syscall(__NR_gettid);
  2289. pthread_mutex_lock(me->mutex);
  2290. if (me->diverge) {
  2291. /* Just re-apply the root prog to fork the tree */
  2292. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
  2293. me->prog, 0, 0);
  2294. }
  2295. sem_post(me->started);
  2296. /* Return outside of started so parent notices failures. */
  2297. if (ret) {
  2298. pthread_mutex_unlock(me->mutex);
  2299. return (void *)SIBLING_EXIT_FAILURE;
  2300. }
  2301. do {
  2302. pthread_cond_wait(me->cond, me->mutex);
  2303. me->num_waits = me->num_waits - 1;
  2304. } while (me->num_waits);
  2305. pthread_mutex_unlock(me->mutex);
  2306. ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
  2307. if (!ret)
  2308. return (void *)SIBLING_EXIT_NEWPRIVS;
  2309. read(-1, NULL, 0);
  2310. return (void *)SIBLING_EXIT_UNKILLED;
  2311. }
  2312. void tsync_start_sibling(struct tsync_sibling *sibling)
  2313. {
  2314. pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
  2315. }
  2316. TEST_F(TSYNC, siblings_fail_prctl)
  2317. {
  2318. long ret;
  2319. void *status;
  2320. struct sock_filter filter[] = {
  2321. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  2322. offsetof(struct seccomp_data, nr)),
  2323. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
  2324. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
  2325. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  2326. };
  2327. struct sock_fprog prog = {
  2328. .len = (unsigned short)ARRAY_SIZE(filter),
  2329. .filter = filter,
  2330. };
  2331. ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
  2332. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  2333. }
  2334. /* Check prctl failure detection by requesting sib 0 diverge. */
  2335. ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
  2336. ASSERT_NE(ENOSYS, errno) {
  2337. TH_LOG("Kernel does not support seccomp syscall!");
  2338. }
  2339. ASSERT_EQ(0, ret) {
  2340. TH_LOG("setting filter failed");
  2341. }
  2342. self->sibling[0].diverge = 1;
  2343. tsync_start_sibling(&self->sibling[0]);
  2344. tsync_start_sibling(&self->sibling[1]);
  2345. while (self->sibling_count < TSYNC_SIBLINGS) {
  2346. sem_wait(&self->started);
  2347. self->sibling_count++;
  2348. }
  2349. /* Signal the threads to clean up*/
  2350. pthread_mutex_lock(&self->mutex);
  2351. ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
  2352. TH_LOG("cond broadcast non-zero");
  2353. }
  2354. pthread_mutex_unlock(&self->mutex);
  2355. /* Ensure diverging sibling failed to call prctl. */
  2356. PTHREAD_JOIN(self->sibling[0].tid, &status);
  2357. EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
  2358. PTHREAD_JOIN(self->sibling[1].tid, &status);
  2359. EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
  2360. }
  2361. TEST_F(TSYNC, two_siblings_with_ancestor)
  2362. {
  2363. long ret;
  2364. void *status;
  2365. ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
  2366. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  2367. }
  2368. ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
  2369. ASSERT_NE(ENOSYS, errno) {
  2370. TH_LOG("Kernel does not support seccomp syscall!");
  2371. }
  2372. ASSERT_EQ(0, ret) {
  2373. TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
  2374. }
  2375. tsync_start_sibling(&self->sibling[0]);
  2376. tsync_start_sibling(&self->sibling[1]);
  2377. while (self->sibling_count < TSYNC_SIBLINGS) {
  2378. sem_wait(&self->started);
  2379. self->sibling_count++;
  2380. }
  2381. ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
  2382. &self->apply_prog);
  2383. ASSERT_EQ(0, ret) {
  2384. TH_LOG("Could install filter on all threads!");
  2385. }
  2386. /* Tell the siblings to test the policy */
  2387. pthread_mutex_lock(&self->mutex);
  2388. ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
  2389. TH_LOG("cond broadcast non-zero");
  2390. }
  2391. pthread_mutex_unlock(&self->mutex);
  2392. /* Ensure they are both killed and don't exit cleanly. */
  2393. PTHREAD_JOIN(self->sibling[0].tid, &status);
  2394. EXPECT_EQ(0x0, (long)status);
  2395. PTHREAD_JOIN(self->sibling[1].tid, &status);
  2396. EXPECT_EQ(0x0, (long)status);
  2397. }
  2398. TEST_F(TSYNC, two_sibling_want_nnp)
  2399. {
  2400. void *status;
  2401. /* start siblings before any prctl() operations */
  2402. tsync_start_sibling(&self->sibling[0]);
  2403. tsync_start_sibling(&self->sibling[1]);
  2404. while (self->sibling_count < TSYNC_SIBLINGS) {
  2405. sem_wait(&self->started);
  2406. self->sibling_count++;
  2407. }
  2408. /* Tell the siblings to test no policy */
  2409. pthread_mutex_lock(&self->mutex);
  2410. ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
  2411. TH_LOG("cond broadcast non-zero");
  2412. }
  2413. pthread_mutex_unlock(&self->mutex);
  2414. /* Ensure they are both upset about lacking nnp. */
  2415. PTHREAD_JOIN(self->sibling[0].tid, &status);
  2416. EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
  2417. PTHREAD_JOIN(self->sibling[1].tid, &status);
  2418. EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
  2419. }
  2420. TEST_F(TSYNC, two_siblings_with_no_filter)
  2421. {
  2422. long ret;
  2423. void *status;
  2424. /* start siblings before any prctl() operations */
  2425. tsync_start_sibling(&self->sibling[0]);
  2426. tsync_start_sibling(&self->sibling[1]);
  2427. while (self->sibling_count < TSYNC_SIBLINGS) {
  2428. sem_wait(&self->started);
  2429. self->sibling_count++;
  2430. }
  2431. ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
  2432. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  2433. }
  2434. ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
  2435. &self->apply_prog);
  2436. ASSERT_NE(ENOSYS, errno) {
  2437. TH_LOG("Kernel does not support seccomp syscall!");
  2438. }
  2439. ASSERT_EQ(0, ret) {
  2440. TH_LOG("Could install filter on all threads!");
  2441. }
  2442. /* Tell the siblings to test the policy */
  2443. pthread_mutex_lock(&self->mutex);
  2444. ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
  2445. TH_LOG("cond broadcast non-zero");
  2446. }
  2447. pthread_mutex_unlock(&self->mutex);
  2448. /* Ensure they are both killed and don't exit cleanly. */
  2449. PTHREAD_JOIN(self->sibling[0].tid, &status);
  2450. EXPECT_EQ(0x0, (long)status);
  2451. PTHREAD_JOIN(self->sibling[1].tid, &status);
  2452. EXPECT_EQ(0x0, (long)status);
  2453. }
  2454. TEST_F(TSYNC, two_siblings_with_one_divergence)
  2455. {
  2456. long ret;
  2457. void *status;
  2458. ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
  2459. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  2460. }
  2461. ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
  2462. ASSERT_NE(ENOSYS, errno) {
  2463. TH_LOG("Kernel does not support seccomp syscall!");
  2464. }
  2465. ASSERT_EQ(0, ret) {
  2466. TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
  2467. }
  2468. self->sibling[0].diverge = 1;
  2469. tsync_start_sibling(&self->sibling[0]);
  2470. tsync_start_sibling(&self->sibling[1]);
  2471. while (self->sibling_count < TSYNC_SIBLINGS) {
  2472. sem_wait(&self->started);
  2473. self->sibling_count++;
  2474. }
  2475. ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
  2476. &self->apply_prog);
  2477. ASSERT_EQ(self->sibling[0].system_tid, ret) {
  2478. TH_LOG("Did not fail on diverged sibling.");
  2479. }
  2480. /* Wake the threads */
  2481. pthread_mutex_lock(&self->mutex);
  2482. ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
  2483. TH_LOG("cond broadcast non-zero");
  2484. }
  2485. pthread_mutex_unlock(&self->mutex);
  2486. /* Ensure they are both unkilled. */
  2487. PTHREAD_JOIN(self->sibling[0].tid, &status);
  2488. EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
  2489. PTHREAD_JOIN(self->sibling[1].tid, &status);
  2490. EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
  2491. }
  2492. TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
  2493. {
  2494. long ret, flags;
  2495. void *status;
  2496. ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
  2497. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  2498. }
  2499. ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
  2500. ASSERT_NE(ENOSYS, errno) {
  2501. TH_LOG("Kernel does not support seccomp syscall!");
  2502. }
  2503. ASSERT_EQ(0, ret) {
  2504. TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
  2505. }
  2506. self->sibling[0].diverge = 1;
  2507. tsync_start_sibling(&self->sibling[0]);
  2508. tsync_start_sibling(&self->sibling[1]);
  2509. while (self->sibling_count < TSYNC_SIBLINGS) {
  2510. sem_wait(&self->started);
  2511. self->sibling_count++;
  2512. }
  2513. flags = SECCOMP_FILTER_FLAG_TSYNC | \
  2514. SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
  2515. ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
  2516. ASSERT_EQ(ESRCH, errno) {
  2517. TH_LOG("Did not return ESRCH for diverged sibling.");
  2518. }
  2519. ASSERT_EQ(-1, ret) {
  2520. TH_LOG("Did not fail on diverged sibling.");
  2521. }
  2522. /* Wake the threads */
  2523. pthread_mutex_lock(&self->mutex);
  2524. ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
  2525. TH_LOG("cond broadcast non-zero");
  2526. }
  2527. pthread_mutex_unlock(&self->mutex);
  2528. /* Ensure they are both unkilled. */
  2529. PTHREAD_JOIN(self->sibling[0].tid, &status);
  2530. EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
  2531. PTHREAD_JOIN(self->sibling[1].tid, &status);
  2532. EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
  2533. }
  2534. TEST_F(TSYNC, two_siblings_not_under_filter)
  2535. {
  2536. long ret, sib;
  2537. void *status;
  2538. struct timespec delay = { .tv_nsec = 100000000 };
  2539. ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
  2540. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  2541. }
  2542. /*
  2543. * Sibling 0 will have its own seccomp policy
  2544. * and Sibling 1 will not be under seccomp at
  2545. * all. Sibling 1 will enter seccomp and 0
  2546. * will cause failure.
  2547. */
  2548. self->sibling[0].diverge = 1;
  2549. tsync_start_sibling(&self->sibling[0]);
  2550. tsync_start_sibling(&self->sibling[1]);
  2551. while (self->sibling_count < TSYNC_SIBLINGS) {
  2552. sem_wait(&self->started);
  2553. self->sibling_count++;
  2554. }
  2555. ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
  2556. ASSERT_NE(ENOSYS, errno) {
  2557. TH_LOG("Kernel does not support seccomp syscall!");
  2558. }
  2559. ASSERT_EQ(0, ret) {
  2560. TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
  2561. }
  2562. ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
  2563. &self->apply_prog);
  2564. ASSERT_EQ(ret, self->sibling[0].system_tid) {
  2565. TH_LOG("Did not fail on diverged sibling.");
  2566. }
  2567. sib = 1;
  2568. if (ret == self->sibling[0].system_tid)
  2569. sib = 0;
  2570. pthread_mutex_lock(&self->mutex);
  2571. /* Increment the other siblings num_waits so we can clean up
  2572. * the one we just saw.
  2573. */
  2574. self->sibling[!sib].num_waits += 1;
  2575. /* Signal the thread to clean up*/
  2576. ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
  2577. TH_LOG("cond broadcast non-zero");
  2578. }
  2579. pthread_mutex_unlock(&self->mutex);
  2580. PTHREAD_JOIN(self->sibling[sib].tid, &status);
  2581. EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
  2582. /* Poll for actual task death. pthread_join doesn't guarantee it. */
  2583. while (!kill(self->sibling[sib].system_tid, 0))
  2584. nanosleep(&delay, NULL);
  2585. /* Switch to the remaining sibling */
  2586. sib = !sib;
  2587. ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
  2588. &self->apply_prog);
  2589. ASSERT_EQ(0, ret) {
  2590. TH_LOG("Expected the remaining sibling to sync");
  2591. };
  2592. pthread_mutex_lock(&self->mutex);
  2593. /* If remaining sibling didn't have a chance to wake up during
  2594. * the first broadcast, manually reduce the num_waits now.
  2595. */
  2596. if (self->sibling[sib].num_waits > 1)
  2597. self->sibling[sib].num_waits = 1;
  2598. ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
  2599. TH_LOG("cond broadcast non-zero");
  2600. }
  2601. pthread_mutex_unlock(&self->mutex);
  2602. PTHREAD_JOIN(self->sibling[sib].tid, &status);
  2603. EXPECT_EQ(0, (long)status);
  2604. /* Poll for actual task death. pthread_join doesn't guarantee it. */
  2605. while (!kill(self->sibling[sib].system_tid, 0))
  2606. nanosleep(&delay, NULL);
  2607. ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
  2608. &self->apply_prog);
  2609. ASSERT_EQ(0, ret); /* just us chickens */
  2610. }
  2611. /* Make sure restarted syscalls are seen directly as "restart_syscall". */
  2612. TEST(syscall_restart)
  2613. {
  2614. long ret;
  2615. unsigned long msg;
  2616. pid_t child_pid;
  2617. int pipefd[2];
  2618. int status;
  2619. siginfo_t info = { };
  2620. struct sock_filter filter[] = {
  2621. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  2622. offsetof(struct seccomp_data, nr)),
  2623. #ifdef __NR_sigreturn
  2624. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0),
  2625. #endif
  2626. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0),
  2627. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0),
  2628. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0),
  2629. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0),
  2630. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0),
  2631. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
  2632. /* Allow __NR_write for easy logging. */
  2633. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
  2634. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  2635. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
  2636. /* The nanosleep jump target. */
  2637. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
  2638. /* The restart_syscall jump target. */
  2639. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
  2640. };
  2641. struct sock_fprog prog = {
  2642. .len = (unsigned short)ARRAY_SIZE(filter),
  2643. .filter = filter,
  2644. };
  2645. #if defined(__arm__)
  2646. struct utsname utsbuf;
  2647. #endif
  2648. ASSERT_EQ(0, pipe(pipefd));
  2649. child_pid = fork();
  2650. ASSERT_LE(0, child_pid);
  2651. if (child_pid == 0) {
  2652. /* Child uses EXPECT not ASSERT to deliver status correctly. */
  2653. char buf = ' ';
  2654. struct timespec timeout = { };
  2655. /* Attach parent as tracer and stop. */
  2656. EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
  2657. EXPECT_EQ(0, raise(SIGSTOP));
  2658. EXPECT_EQ(0, close(pipefd[1]));
  2659. EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
  2660. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  2661. }
  2662. ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
  2663. EXPECT_EQ(0, ret) {
  2664. TH_LOG("Failed to install filter!");
  2665. }
  2666. EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
  2667. TH_LOG("Failed to read() sync from parent");
  2668. }
  2669. EXPECT_EQ('.', buf) {
  2670. TH_LOG("Failed to get sync data from read()");
  2671. }
  2672. /* Start nanosleep to be interrupted. */
  2673. timeout.tv_sec = 1;
  2674. errno = 0;
  2675. EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
  2676. TH_LOG("Call to nanosleep() failed (errno %d: %s)",
  2677. errno, strerror(errno));
  2678. }
  2679. /* Read final sync from parent. */
  2680. EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
  2681. TH_LOG("Failed final read() from parent");
  2682. }
  2683. EXPECT_EQ('!', buf) {
  2684. TH_LOG("Failed to get final data from read()");
  2685. }
  2686. /* Directly report the status of our test harness results. */
  2687. syscall(__NR_exit, _metadata->exit_code);
  2688. }
  2689. EXPECT_EQ(0, close(pipefd[0]));
  2690. /* Attach to child, setup options, and release. */
  2691. ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
  2692. ASSERT_EQ(true, WIFSTOPPED(status));
  2693. ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
  2694. PTRACE_O_TRACESECCOMP));
  2695. ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
  2696. ASSERT_EQ(1, write(pipefd[1], ".", 1));
  2697. /* Wait for nanosleep() to start. */
  2698. ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
  2699. ASSERT_EQ(true, WIFSTOPPED(status));
  2700. ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
  2701. ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
  2702. ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
  2703. ASSERT_EQ(0x100, msg);
  2704. ret = get_syscall(_metadata, child_pid);
  2705. EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep);
  2706. /* Might as well check siginfo for sanity while we're here. */
  2707. ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
  2708. ASSERT_EQ(SIGTRAP, info.si_signo);
  2709. ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
  2710. EXPECT_EQ(0, info.si_errno);
  2711. EXPECT_EQ(getuid(), info.si_uid);
  2712. /* Verify signal delivery came from child (seccomp-triggered). */
  2713. EXPECT_EQ(child_pid, info.si_pid);
  2714. /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
  2715. ASSERT_EQ(0, kill(child_pid, SIGSTOP));
  2716. ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
  2717. ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
  2718. ASSERT_EQ(true, WIFSTOPPED(status));
  2719. ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
  2720. ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
  2721. /*
  2722. * There is no siginfo on SIGSTOP any more, so we can't verify
  2723. * signal delivery came from parent now (getpid() == info.si_pid).
  2724. * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
  2725. * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
  2726. */
  2727. EXPECT_EQ(SIGSTOP, info.si_signo);
  2728. /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
  2729. ASSERT_EQ(0, kill(child_pid, SIGCONT));
  2730. ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
  2731. ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
  2732. ASSERT_EQ(true, WIFSTOPPED(status));
  2733. ASSERT_EQ(SIGCONT, WSTOPSIG(status));
  2734. ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
  2735. /* Wait for restart_syscall() to start. */
  2736. ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
  2737. ASSERT_EQ(true, WIFSTOPPED(status));
  2738. ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
  2739. ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
  2740. ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
  2741. ASSERT_EQ(0x200, msg);
  2742. ret = get_syscall(_metadata, child_pid);
  2743. #if defined(__arm__)
  2744. /*
  2745. * - native ARM registers do NOT expose true syscall.
  2746. * - compat ARM registers on ARM64 DO expose true syscall.
  2747. * - values of utsbuf.machine include 'armv8l' or 'armb8b'
  2748. * for ARM64 running in compat mode.
  2749. */
  2750. ASSERT_EQ(0, uname(&utsbuf));
  2751. if ((strncmp(utsbuf.machine, "arm", 3) == 0) &&
  2752. (strncmp(utsbuf.machine, "armv8l", 6) != 0) &&
  2753. (strncmp(utsbuf.machine, "armv8b", 6) != 0)) {
  2754. EXPECT_EQ(__NR_nanosleep, ret);
  2755. } else
  2756. #endif
  2757. {
  2758. EXPECT_EQ(__NR_restart_syscall, ret);
  2759. }
  2760. /* Write again to end test. */
  2761. ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
  2762. ASSERT_EQ(1, write(pipefd[1], "!", 1));
  2763. EXPECT_EQ(0, close(pipefd[1]));
  2764. ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
  2765. if (WIFSIGNALED(status) || WEXITSTATUS(status))
  2766. _metadata->exit_code = KSFT_FAIL;
  2767. }
  2768. TEST_SIGNAL(filter_flag_log, SIGSYS)
  2769. {
  2770. struct sock_filter allow_filter[] = {
  2771. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  2772. };
  2773. struct sock_filter kill_filter[] = {
  2774. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  2775. offsetof(struct seccomp_data, nr)),
  2776. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
  2777. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
  2778. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  2779. };
  2780. struct sock_fprog allow_prog = {
  2781. .len = (unsigned short)ARRAY_SIZE(allow_filter),
  2782. .filter = allow_filter,
  2783. };
  2784. struct sock_fprog kill_prog = {
  2785. .len = (unsigned short)ARRAY_SIZE(kill_filter),
  2786. .filter = kill_filter,
  2787. };
  2788. long ret;
  2789. pid_t parent = getppid();
  2790. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  2791. ASSERT_EQ(0, ret);
  2792. /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
  2793. ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
  2794. &allow_prog);
  2795. ASSERT_NE(ENOSYS, errno) {
  2796. TH_LOG("Kernel does not support seccomp syscall!");
  2797. }
  2798. EXPECT_NE(0, ret) {
  2799. TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
  2800. }
  2801. EXPECT_EQ(EINVAL, errno) {
  2802. TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
  2803. }
  2804. /* Verify that a simple, permissive filter can be added with no flags */
  2805. ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
  2806. EXPECT_EQ(0, ret);
  2807. /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
  2808. ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
  2809. &allow_prog);
  2810. ASSERT_NE(EINVAL, errno) {
  2811. TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
  2812. }
  2813. EXPECT_EQ(0, ret);
  2814. /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
  2815. ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
  2816. &kill_prog);
  2817. EXPECT_EQ(0, ret);
  2818. EXPECT_EQ(parent, syscall(__NR_getppid));
  2819. /* getpid() should never return. */
  2820. EXPECT_EQ(0, syscall(__NR_getpid));
  2821. }
  2822. TEST(get_action_avail)
  2823. {
  2824. __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
  2825. SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
  2826. SECCOMP_RET_LOG, SECCOMP_RET_ALLOW };
  2827. __u32 unknown_action = 0x10000000U;
  2828. int i;
  2829. long ret;
  2830. ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
  2831. ASSERT_NE(ENOSYS, errno) {
  2832. TH_LOG("Kernel does not support seccomp syscall!");
  2833. }
  2834. ASSERT_NE(EINVAL, errno) {
  2835. TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
  2836. }
  2837. EXPECT_EQ(ret, 0);
  2838. for (i = 0; i < ARRAY_SIZE(actions); i++) {
  2839. ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
  2840. EXPECT_EQ(ret, 0) {
  2841. TH_LOG("Expected action (0x%X) not available!",
  2842. actions[i]);
  2843. }
  2844. }
  2845. /* Check that an unknown action is handled properly (EOPNOTSUPP) */
  2846. ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
  2847. EXPECT_EQ(ret, -1);
  2848. EXPECT_EQ(errno, EOPNOTSUPP);
  2849. }
  2850. TEST(get_metadata)
  2851. {
  2852. pid_t pid;
  2853. int pipefd[2];
  2854. char buf;
  2855. struct seccomp_metadata md;
  2856. long ret;
  2857. /* Only real root can get metadata. */
  2858. if (geteuid()) {
  2859. SKIP(return, "get_metadata requires real root");
  2860. return;
  2861. }
  2862. ASSERT_EQ(0, pipe(pipefd));
  2863. pid = fork();
  2864. ASSERT_GE(pid, 0);
  2865. if (pid == 0) {
  2866. struct sock_filter filter[] = {
  2867. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  2868. };
  2869. struct sock_fprog prog = {
  2870. .len = (unsigned short)ARRAY_SIZE(filter),
  2871. .filter = filter,
  2872. };
  2873. /* one with log, one without */
  2874. EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
  2875. SECCOMP_FILTER_FLAG_LOG, &prog));
  2876. EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
  2877. EXPECT_EQ(0, close(pipefd[0]));
  2878. ASSERT_EQ(1, write(pipefd[1], "1", 1));
  2879. ASSERT_EQ(0, close(pipefd[1]));
  2880. while (1)
  2881. sleep(100);
  2882. }
  2883. ASSERT_EQ(0, close(pipefd[1]));
  2884. ASSERT_EQ(1, read(pipefd[0], &buf, 1));
  2885. ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
  2886. ASSERT_EQ(pid, waitpid(pid, NULL, 0));
  2887. /* Past here must not use ASSERT or child process is never killed. */
  2888. md.filter_off = 0;
  2889. errno = 0;
  2890. ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
  2891. EXPECT_EQ(sizeof(md), ret) {
  2892. if (errno == EINVAL)
  2893. SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
  2894. }
  2895. EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
  2896. EXPECT_EQ(md.filter_off, 0);
  2897. md.filter_off = 1;
  2898. ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
  2899. EXPECT_EQ(sizeof(md), ret);
  2900. EXPECT_EQ(md.flags, 0);
  2901. EXPECT_EQ(md.filter_off, 1);
  2902. skip:
  2903. ASSERT_EQ(0, kill(pid, SIGKILL));
  2904. }
  2905. static int user_notif_syscall(int nr, unsigned int flags)
  2906. {
  2907. struct sock_filter filter[] = {
  2908. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  2909. offsetof(struct seccomp_data, nr)),
  2910. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
  2911. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
  2912. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  2913. };
  2914. struct sock_fprog prog = {
  2915. .len = (unsigned short)ARRAY_SIZE(filter),
  2916. .filter = filter,
  2917. };
  2918. return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
  2919. }
  2920. #define USER_NOTIF_MAGIC INT_MAX
  2921. TEST(user_notification_basic)
  2922. {
  2923. pid_t pid;
  2924. long ret;
  2925. int status, listener;
  2926. struct seccomp_notif req = {};
  2927. struct seccomp_notif_resp resp = {};
  2928. struct pollfd pollfd;
  2929. struct sock_filter filter[] = {
  2930. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  2931. };
  2932. struct sock_fprog prog = {
  2933. .len = (unsigned short)ARRAY_SIZE(filter),
  2934. .filter = filter,
  2935. };
  2936. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  2937. ASSERT_EQ(0, ret) {
  2938. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  2939. }
  2940. pid = fork();
  2941. ASSERT_GE(pid, 0);
  2942. /* Check that we get -ENOSYS with no listener attached */
  2943. if (pid == 0) {
  2944. if (user_notif_syscall(__NR_getppid, 0) < 0)
  2945. exit(1);
  2946. ret = syscall(__NR_getppid);
  2947. exit(ret >= 0 || errno != ENOSYS);
  2948. }
  2949. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  2950. EXPECT_EQ(true, WIFEXITED(status));
  2951. EXPECT_EQ(0, WEXITSTATUS(status));
  2952. /* Add some no-op filters for grins. */
  2953. EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
  2954. EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
  2955. EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
  2956. EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
  2957. /* Check that the basic notification machinery works */
  2958. listener = user_notif_syscall(__NR_getppid,
  2959. SECCOMP_FILTER_FLAG_NEW_LISTENER);
  2960. ASSERT_GE(listener, 0);
  2961. /* Installing a second listener in the chain should EBUSY */
  2962. EXPECT_EQ(user_notif_syscall(__NR_getppid,
  2963. SECCOMP_FILTER_FLAG_NEW_LISTENER),
  2964. -1);
  2965. EXPECT_EQ(errno, EBUSY);
  2966. pid = fork();
  2967. ASSERT_GE(pid, 0);
  2968. if (pid == 0) {
  2969. ret = syscall(__NR_getppid);
  2970. exit(ret != USER_NOTIF_MAGIC);
  2971. }
  2972. pollfd.fd = listener;
  2973. pollfd.events = POLLIN | POLLOUT;
  2974. EXPECT_GT(poll(&pollfd, 1, -1), 0);
  2975. EXPECT_EQ(pollfd.revents, POLLIN);
  2976. /* Test that we can't pass garbage to the kernel. */
  2977. memset(&req, 0, sizeof(req));
  2978. req.pid = -1;
  2979. errno = 0;
  2980. ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
  2981. EXPECT_EQ(-1, ret);
  2982. EXPECT_EQ(EINVAL, errno);
  2983. if (ret) {
  2984. req.pid = 0;
  2985. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  2986. }
  2987. pollfd.fd = listener;
  2988. pollfd.events = POLLIN | POLLOUT;
  2989. EXPECT_GT(poll(&pollfd, 1, -1), 0);
  2990. EXPECT_EQ(pollfd.revents, POLLOUT);
  2991. EXPECT_EQ(req.data.nr, __NR_getppid);
  2992. resp.id = req.id;
  2993. resp.error = 0;
  2994. resp.val = USER_NOTIF_MAGIC;
  2995. /* check that we make sure flags == 0 */
  2996. resp.flags = 1;
  2997. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
  2998. EXPECT_EQ(errno, EINVAL);
  2999. resp.flags = 0;
  3000. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
  3001. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3002. EXPECT_EQ(true, WIFEXITED(status));
  3003. EXPECT_EQ(0, WEXITSTATUS(status));
  3004. }
  3005. TEST(user_notification_with_tsync)
  3006. {
  3007. int ret;
  3008. unsigned int flags;
  3009. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3010. ASSERT_EQ(0, ret) {
  3011. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3012. }
  3013. /* these were exclusive */
  3014. flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
  3015. SECCOMP_FILTER_FLAG_TSYNC;
  3016. ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags));
  3017. ASSERT_EQ(EINVAL, errno);
  3018. /* but now they're not */
  3019. flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
  3020. ret = user_notif_syscall(__NR_getppid, flags);
  3021. close(ret);
  3022. ASSERT_LE(0, ret);
  3023. }
  3024. TEST(user_notification_kill_in_middle)
  3025. {
  3026. pid_t pid;
  3027. long ret;
  3028. int listener;
  3029. struct seccomp_notif req = {};
  3030. struct seccomp_notif_resp resp = {};
  3031. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3032. ASSERT_EQ(0, ret) {
  3033. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3034. }
  3035. listener = user_notif_syscall(__NR_getppid,
  3036. SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3037. ASSERT_GE(listener, 0);
  3038. /*
  3039. * Check that nothing bad happens when we kill the task in the middle
  3040. * of a syscall.
  3041. */
  3042. pid = fork();
  3043. ASSERT_GE(pid, 0);
  3044. if (pid == 0) {
  3045. ret = syscall(__NR_getppid);
  3046. exit(ret != USER_NOTIF_MAGIC);
  3047. }
  3048. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3049. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
  3050. EXPECT_EQ(kill(pid, SIGKILL), 0);
  3051. EXPECT_EQ(waitpid(pid, NULL, 0), pid);
  3052. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
  3053. resp.id = req.id;
  3054. ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
  3055. EXPECT_EQ(ret, -1);
  3056. EXPECT_EQ(errno, ENOENT);
  3057. }
  3058. static int handled = -1;
  3059. static void signal_handler(int signal)
  3060. {
  3061. if (write(handled, "c", 1) != 1)
  3062. perror("write from signal");
  3063. }
  3064. static void signal_handler_nop(int signal)
  3065. {
  3066. }
  3067. TEST(user_notification_signal)
  3068. {
  3069. pid_t pid;
  3070. long ret;
  3071. int status, listener, sk_pair[2];
  3072. struct seccomp_notif req = {};
  3073. struct seccomp_notif_resp resp = {};
  3074. char c;
  3075. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3076. ASSERT_EQ(0, ret) {
  3077. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3078. }
  3079. ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
  3080. listener = user_notif_syscall(__NR_gettid,
  3081. SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3082. ASSERT_GE(listener, 0);
  3083. pid = fork();
  3084. ASSERT_GE(pid, 0);
  3085. if (pid == 0) {
  3086. close(sk_pair[0]);
  3087. handled = sk_pair[1];
  3088. if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
  3089. perror("signal");
  3090. exit(1);
  3091. }
  3092. /*
  3093. * ERESTARTSYS behavior is a bit hard to test, because we need
  3094. * to rely on a signal that has not yet been handled. Let's at
  3095. * least check that the error code gets propagated through, and
  3096. * hope that it doesn't break when there is actually a signal :)
  3097. */
  3098. ret = syscall(__NR_gettid);
  3099. exit(!(ret == -1 && errno == 512));
  3100. }
  3101. close(sk_pair[1]);
  3102. memset(&req, 0, sizeof(req));
  3103. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3104. EXPECT_EQ(kill(pid, SIGUSR1), 0);
  3105. /*
  3106. * Make sure the signal really is delivered, which means we're not
  3107. * stuck in the user notification code any more and the notification
  3108. * should be dead.
  3109. */
  3110. EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
  3111. resp.id = req.id;
  3112. resp.error = -EPERM;
  3113. resp.val = 0;
  3114. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
  3115. EXPECT_EQ(errno, ENOENT);
  3116. memset(&req, 0, sizeof(req));
  3117. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3118. resp.id = req.id;
  3119. resp.error = -512; /* -ERESTARTSYS */
  3120. resp.val = 0;
  3121. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
  3122. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3123. EXPECT_EQ(true, WIFEXITED(status));
  3124. EXPECT_EQ(0, WEXITSTATUS(status));
  3125. }
  3126. TEST(user_notification_closed_listener)
  3127. {
  3128. pid_t pid;
  3129. long ret;
  3130. int status, listener;
  3131. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3132. ASSERT_EQ(0, ret) {
  3133. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3134. }
  3135. listener = user_notif_syscall(__NR_getppid,
  3136. SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3137. ASSERT_GE(listener, 0);
  3138. /*
  3139. * Check that we get an ENOSYS when the listener is closed.
  3140. */
  3141. pid = fork();
  3142. ASSERT_GE(pid, 0);
  3143. if (pid == 0) {
  3144. close(listener);
  3145. ret = syscall(__NR_getppid);
  3146. exit(ret != -1 && errno != ENOSYS);
  3147. }
  3148. close(listener);
  3149. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3150. EXPECT_EQ(true, WIFEXITED(status));
  3151. EXPECT_EQ(0, WEXITSTATUS(status));
  3152. }
  3153. /*
  3154. * Check that a pid in a child namespace still shows up as valid in ours.
  3155. */
  3156. TEST(user_notification_child_pid_ns)
  3157. {
  3158. pid_t pid;
  3159. int status, listener;
  3160. struct seccomp_notif req = {};
  3161. struct seccomp_notif_resp resp = {};
  3162. ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) {
  3163. if (errno == EINVAL)
  3164. SKIP(return, "kernel missing CLONE_NEWUSER support");
  3165. };
  3166. listener = user_notif_syscall(__NR_getppid,
  3167. SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3168. ASSERT_GE(listener, 0);
  3169. pid = fork();
  3170. ASSERT_GE(pid, 0);
  3171. if (pid == 0)
  3172. exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
  3173. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3174. EXPECT_EQ(req.pid, pid);
  3175. resp.id = req.id;
  3176. resp.error = 0;
  3177. resp.val = USER_NOTIF_MAGIC;
  3178. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
  3179. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3180. EXPECT_EQ(true, WIFEXITED(status));
  3181. EXPECT_EQ(0, WEXITSTATUS(status));
  3182. close(listener);
  3183. }
  3184. /*
  3185. * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
  3186. * invalid.
  3187. */
  3188. TEST(user_notification_sibling_pid_ns)
  3189. {
  3190. pid_t pid, pid2;
  3191. int status, listener;
  3192. struct seccomp_notif req = {};
  3193. struct seccomp_notif_resp resp = {};
  3194. ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
  3195. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3196. }
  3197. listener = user_notif_syscall(__NR_getppid,
  3198. SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3199. ASSERT_GE(listener, 0);
  3200. pid = fork();
  3201. ASSERT_GE(pid, 0);
  3202. if (pid == 0) {
  3203. ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
  3204. if (errno == EPERM)
  3205. SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
  3206. else if (errno == EINVAL)
  3207. SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)");
  3208. }
  3209. pid2 = fork();
  3210. ASSERT_GE(pid2, 0);
  3211. if (pid2 == 0)
  3212. exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
  3213. EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
  3214. EXPECT_EQ(true, WIFEXITED(status));
  3215. EXPECT_EQ(0, WEXITSTATUS(status));
  3216. exit(WEXITSTATUS(status));
  3217. }
  3218. /* Create the sibling ns, and sibling in it. */
  3219. ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
  3220. if (errno == EPERM)
  3221. SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
  3222. else if (errno == EINVAL)
  3223. SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)");
  3224. }
  3225. ASSERT_EQ(errno, 0);
  3226. pid2 = fork();
  3227. ASSERT_GE(pid2, 0);
  3228. if (pid2 == 0) {
  3229. ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3230. /*
  3231. * The pid should be 0, i.e. the task is in some namespace that
  3232. * we can't "see".
  3233. */
  3234. EXPECT_EQ(req.pid, 0);
  3235. resp.id = req.id;
  3236. resp.error = 0;
  3237. resp.val = USER_NOTIF_MAGIC;
  3238. ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
  3239. exit(0);
  3240. }
  3241. close(listener);
  3242. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3243. EXPECT_EQ(true, WIFEXITED(status));
  3244. EXPECT_EQ(0, WEXITSTATUS(status));
  3245. EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
  3246. EXPECT_EQ(true, WIFEXITED(status));
  3247. EXPECT_EQ(0, WEXITSTATUS(status));
  3248. }
  3249. TEST(user_notification_fault_recv)
  3250. {
  3251. pid_t pid;
  3252. int status, listener;
  3253. struct seccomp_notif req = {};
  3254. struct seccomp_notif_resp resp = {};
  3255. ASSERT_EQ(unshare(CLONE_NEWUSER), 0) {
  3256. if (errno == EINVAL)
  3257. SKIP(return, "kernel missing CLONE_NEWUSER support");
  3258. }
  3259. listener = user_notif_syscall(__NR_getppid,
  3260. SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3261. ASSERT_GE(listener, 0);
  3262. pid = fork();
  3263. ASSERT_GE(pid, 0);
  3264. if (pid == 0)
  3265. exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
  3266. /* Do a bad recv() */
  3267. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
  3268. EXPECT_EQ(errno, EFAULT);
  3269. /* We should still be able to receive this notification, though. */
  3270. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3271. EXPECT_EQ(req.pid, pid);
  3272. resp.id = req.id;
  3273. resp.error = 0;
  3274. resp.val = USER_NOTIF_MAGIC;
  3275. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
  3276. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3277. EXPECT_EQ(true, WIFEXITED(status));
  3278. EXPECT_EQ(0, WEXITSTATUS(status));
  3279. }
  3280. TEST(seccomp_get_notif_sizes)
  3281. {
  3282. struct seccomp_notif_sizes sizes;
  3283. ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
  3284. EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
  3285. EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
  3286. }
  3287. TEST(user_notification_continue)
  3288. {
  3289. pid_t pid;
  3290. long ret;
  3291. int status, listener;
  3292. struct seccomp_notif req = {};
  3293. struct seccomp_notif_resp resp = {};
  3294. struct pollfd pollfd;
  3295. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3296. ASSERT_EQ(0, ret) {
  3297. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3298. }
  3299. listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3300. ASSERT_GE(listener, 0);
  3301. pid = fork();
  3302. ASSERT_GE(pid, 0);
  3303. if (pid == 0) {
  3304. int dup_fd, pipe_fds[2];
  3305. pid_t self;
  3306. ASSERT_GE(pipe(pipe_fds), 0);
  3307. dup_fd = dup(pipe_fds[0]);
  3308. ASSERT_GE(dup_fd, 0);
  3309. EXPECT_NE(pipe_fds[0], dup_fd);
  3310. self = getpid();
  3311. ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0);
  3312. exit(0);
  3313. }
  3314. pollfd.fd = listener;
  3315. pollfd.events = POLLIN | POLLOUT;
  3316. EXPECT_GT(poll(&pollfd, 1, -1), 0);
  3317. EXPECT_EQ(pollfd.revents, POLLIN);
  3318. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3319. pollfd.fd = listener;
  3320. pollfd.events = POLLIN | POLLOUT;
  3321. EXPECT_GT(poll(&pollfd, 1, -1), 0);
  3322. EXPECT_EQ(pollfd.revents, POLLOUT);
  3323. EXPECT_EQ(req.data.nr, __NR_dup);
  3324. resp.id = req.id;
  3325. resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
  3326. /*
  3327. * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
  3328. * args be set to 0.
  3329. */
  3330. resp.error = 0;
  3331. resp.val = USER_NOTIF_MAGIC;
  3332. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
  3333. EXPECT_EQ(errno, EINVAL);
  3334. resp.error = USER_NOTIF_MAGIC;
  3335. resp.val = 0;
  3336. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
  3337. EXPECT_EQ(errno, EINVAL);
  3338. resp.error = 0;
  3339. resp.val = 0;
  3340. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
  3341. if (errno == EINVAL)
  3342. SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
  3343. }
  3344. skip:
  3345. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3346. EXPECT_EQ(true, WIFEXITED(status));
  3347. EXPECT_EQ(0, WEXITSTATUS(status)) {
  3348. if (WEXITSTATUS(status) == 2) {
  3349. SKIP(return, "Kernel does not support kcmp() syscall");
  3350. return;
  3351. }
  3352. }
  3353. }
  3354. TEST(user_notification_filter_empty)
  3355. {
  3356. pid_t pid;
  3357. long ret;
  3358. int status;
  3359. struct pollfd pollfd;
  3360. struct __clone_args args = {
  3361. .flags = CLONE_FILES,
  3362. .exit_signal = SIGCHLD,
  3363. };
  3364. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3365. ASSERT_EQ(0, ret) {
  3366. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3367. }
  3368. if (__NR_clone3 < 0)
  3369. SKIP(return, "Test not built with clone3 support");
  3370. pid = sys_clone3(&args, sizeof(args));
  3371. ASSERT_GE(pid, 0);
  3372. if (pid == 0) {
  3373. int listener;
  3374. listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3375. if (listener < 0)
  3376. _exit(EXIT_FAILURE);
  3377. if (dup2(listener, 200) != 200)
  3378. _exit(EXIT_FAILURE);
  3379. close(listener);
  3380. _exit(EXIT_SUCCESS);
  3381. }
  3382. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3383. EXPECT_EQ(true, WIFEXITED(status));
  3384. EXPECT_EQ(0, WEXITSTATUS(status));
  3385. /*
  3386. * The seccomp filter has become unused so we should be notified once
  3387. * the kernel gets around to cleaning up task struct.
  3388. */
  3389. pollfd.fd = 200;
  3390. pollfd.events = POLLHUP;
  3391. EXPECT_GT(poll(&pollfd, 1, 2000), 0);
  3392. EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
  3393. }
  3394. TEST(user_ioctl_notification_filter_empty)
  3395. {
  3396. pid_t pid;
  3397. long ret;
  3398. int status, p[2];
  3399. struct __clone_args args = {
  3400. .flags = CLONE_FILES,
  3401. .exit_signal = SIGCHLD,
  3402. };
  3403. struct seccomp_notif req = {};
  3404. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3405. ASSERT_EQ(0, ret) {
  3406. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3407. }
  3408. if (__NR_clone3 < 0)
  3409. SKIP(return, "Test not built with clone3 support");
  3410. ASSERT_EQ(0, pipe(p));
  3411. pid = sys_clone3(&args, sizeof(args));
  3412. ASSERT_GE(pid, 0);
  3413. if (pid == 0) {
  3414. int listener;
  3415. listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3416. if (listener < 0)
  3417. _exit(EXIT_FAILURE);
  3418. if (dup2(listener, 200) != 200)
  3419. _exit(EXIT_FAILURE);
  3420. close(p[1]);
  3421. close(listener);
  3422. sleep(1);
  3423. _exit(EXIT_SUCCESS);
  3424. }
  3425. if (read(p[0], &status, 1) != 0)
  3426. _exit(EXIT_SUCCESS);
  3427. close(p[0]);
  3428. /*
  3429. * The seccomp filter has become unused so we should be notified once
  3430. * the kernel gets around to cleaning up task struct.
  3431. */
  3432. EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1);
  3433. EXPECT_EQ(errno, ENOENT);
  3434. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3435. EXPECT_EQ(true, WIFEXITED(status));
  3436. EXPECT_EQ(0, WEXITSTATUS(status));
  3437. }
  3438. static void *do_thread(void *data)
  3439. {
  3440. return NULL;
  3441. }
  3442. TEST(user_notification_filter_empty_threaded)
  3443. {
  3444. pid_t pid;
  3445. long ret;
  3446. int status;
  3447. struct pollfd pollfd;
  3448. struct __clone_args args = {
  3449. .flags = CLONE_FILES,
  3450. .exit_signal = SIGCHLD,
  3451. };
  3452. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3453. ASSERT_EQ(0, ret) {
  3454. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3455. }
  3456. if (__NR_clone3 < 0)
  3457. SKIP(return, "Test not built with clone3 support");
  3458. pid = sys_clone3(&args, sizeof(args));
  3459. ASSERT_GE(pid, 0);
  3460. if (pid == 0) {
  3461. pid_t pid1, pid2;
  3462. int listener, status;
  3463. pthread_t thread;
  3464. listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3465. if (listener < 0)
  3466. _exit(EXIT_FAILURE);
  3467. if (dup2(listener, 200) != 200)
  3468. _exit(EXIT_FAILURE);
  3469. close(listener);
  3470. pid1 = fork();
  3471. if (pid1 < 0)
  3472. _exit(EXIT_FAILURE);
  3473. if (pid1 == 0)
  3474. _exit(EXIT_SUCCESS);
  3475. pid2 = fork();
  3476. if (pid2 < 0)
  3477. _exit(EXIT_FAILURE);
  3478. if (pid2 == 0)
  3479. _exit(EXIT_SUCCESS);
  3480. if (pthread_create(&thread, NULL, do_thread, NULL) ||
  3481. pthread_join(thread, NULL))
  3482. _exit(EXIT_FAILURE);
  3483. if (pthread_create(&thread, NULL, do_thread, NULL) ||
  3484. pthread_join(thread, NULL))
  3485. _exit(EXIT_FAILURE);
  3486. if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) ||
  3487. WEXITSTATUS(status))
  3488. _exit(EXIT_FAILURE);
  3489. if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) ||
  3490. WEXITSTATUS(status))
  3491. _exit(EXIT_FAILURE);
  3492. exit(EXIT_SUCCESS);
  3493. }
  3494. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3495. EXPECT_EQ(true, WIFEXITED(status));
  3496. EXPECT_EQ(0, WEXITSTATUS(status));
  3497. /*
  3498. * The seccomp filter has become unused so we should be notified once
  3499. * the kernel gets around to cleaning up task struct.
  3500. */
  3501. pollfd.fd = 200;
  3502. pollfd.events = POLLHUP;
  3503. EXPECT_GT(poll(&pollfd, 1, 2000), 0);
  3504. EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
  3505. }
  3506. int get_next_fd(int prev_fd)
  3507. {
  3508. for (int i = prev_fd + 1; i < FD_SETSIZE; ++i) {
  3509. if (fcntl(i, F_GETFD) == -1)
  3510. return i;
  3511. }
  3512. _exit(EXIT_FAILURE);
  3513. }
  3514. TEST(user_notification_addfd)
  3515. {
  3516. pid_t pid;
  3517. long ret;
  3518. int status, listener, memfd, fd, nextfd;
  3519. struct seccomp_notif_addfd addfd = {};
  3520. struct seccomp_notif_addfd_small small = {};
  3521. struct seccomp_notif_addfd_big big = {};
  3522. struct seccomp_notif req = {};
  3523. struct seccomp_notif_resp resp = {};
  3524. /* 100 ms */
  3525. struct timespec delay = { .tv_nsec = 100000000 };
  3526. /* There may be arbitrary already-open fds at test start. */
  3527. memfd = memfd_create("test", 0);
  3528. ASSERT_GE(memfd, 0);
  3529. nextfd = get_next_fd(memfd);
  3530. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3531. ASSERT_EQ(0, ret) {
  3532. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3533. }
  3534. /* fd: 4 */
  3535. /* Check that the basic notification machinery works */
  3536. listener = user_notif_syscall(__NR_getppid,
  3537. SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3538. ASSERT_EQ(listener, nextfd);
  3539. nextfd = get_next_fd(nextfd);
  3540. pid = fork();
  3541. ASSERT_GE(pid, 0);
  3542. if (pid == 0) {
  3543. /* fds will be added and this value is expected */
  3544. if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
  3545. exit(1);
  3546. /* Atomic addfd+send is received here. Check it is a valid fd */
  3547. if (fcntl(syscall(__NR_getppid), F_GETFD) == -1)
  3548. exit(1);
  3549. exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
  3550. }
  3551. ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3552. addfd.srcfd = memfd;
  3553. addfd.newfd = 0;
  3554. addfd.id = req.id;
  3555. addfd.flags = 0x0;
  3556. /* Verify bad newfd_flags cannot be set */
  3557. addfd.newfd_flags = ~O_CLOEXEC;
  3558. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
  3559. EXPECT_EQ(errno, EINVAL);
  3560. addfd.newfd_flags = O_CLOEXEC;
  3561. /* Verify bad flags cannot be set */
  3562. addfd.flags = 0xff;
  3563. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
  3564. EXPECT_EQ(errno, EINVAL);
  3565. addfd.flags = 0;
  3566. /* Verify that remote_fd cannot be set without setting flags */
  3567. addfd.newfd = 1;
  3568. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
  3569. EXPECT_EQ(errno, EINVAL);
  3570. addfd.newfd = 0;
  3571. /* Verify small size cannot be set */
  3572. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1);
  3573. EXPECT_EQ(errno, EINVAL);
  3574. /* Verify we can't send bits filled in unknown buffer area */
  3575. memset(&big, 0xAA, sizeof(big));
  3576. big.addfd = addfd;
  3577. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1);
  3578. EXPECT_EQ(errno, E2BIG);
  3579. /* Verify we can set an arbitrary remote fd */
  3580. fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
  3581. EXPECT_EQ(fd, nextfd);
  3582. nextfd = get_next_fd(nextfd);
  3583. EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
  3584. /* Verify we can set an arbitrary remote fd with large size */
  3585. memset(&big, 0x0, sizeof(big));
  3586. big.addfd = addfd;
  3587. fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
  3588. EXPECT_EQ(fd, nextfd);
  3589. nextfd = get_next_fd(nextfd);
  3590. /* Verify we can set a specific remote fd */
  3591. addfd.newfd = 42;
  3592. addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
  3593. fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
  3594. EXPECT_EQ(fd, 42);
  3595. EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
  3596. /* Resume syscall */
  3597. resp.id = req.id;
  3598. resp.error = 0;
  3599. resp.val = USER_NOTIF_MAGIC;
  3600. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
  3601. /*
  3602. * This sets the ID of the ADD FD to the last request plus 1. The
  3603. * notification ID increments 1 per notification.
  3604. */
  3605. addfd.id = req.id + 1;
  3606. /* This spins until the underlying notification is generated */
  3607. while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
  3608. errno != -EINPROGRESS)
  3609. nanosleep(&delay, NULL);
  3610. memset(&req, 0, sizeof(req));
  3611. ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3612. ASSERT_EQ(addfd.id, req.id);
  3613. /* Verify we can do an atomic addfd and send */
  3614. addfd.newfd = 0;
  3615. addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
  3616. fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
  3617. /*
  3618. * Child has earlier "low" fds and now 42, so we expect the next
  3619. * lowest available fd to be assigned here.
  3620. */
  3621. EXPECT_EQ(fd, nextfd);
  3622. nextfd = get_next_fd(nextfd);
  3623. ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
  3624. /*
  3625. * This sets the ID of the ADD FD to the last request plus 1. The
  3626. * notification ID increments 1 per notification.
  3627. */
  3628. addfd.id = req.id + 1;
  3629. /* This spins until the underlying notification is generated */
  3630. while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
  3631. errno != -EINPROGRESS)
  3632. nanosleep(&delay, NULL);
  3633. memset(&req, 0, sizeof(req));
  3634. ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3635. ASSERT_EQ(addfd.id, req.id);
  3636. resp.id = req.id;
  3637. resp.error = 0;
  3638. resp.val = USER_NOTIF_MAGIC;
  3639. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
  3640. /* Wait for child to finish. */
  3641. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3642. EXPECT_EQ(true, WIFEXITED(status));
  3643. EXPECT_EQ(0, WEXITSTATUS(status));
  3644. close(memfd);
  3645. }
  3646. TEST(user_notification_addfd_rlimit)
  3647. {
  3648. pid_t pid;
  3649. long ret;
  3650. int status, listener, memfd;
  3651. struct seccomp_notif_addfd addfd = {};
  3652. struct seccomp_notif req = {};
  3653. struct seccomp_notif_resp resp = {};
  3654. const struct rlimit lim = {
  3655. .rlim_cur = 0,
  3656. .rlim_max = 0,
  3657. };
  3658. memfd = memfd_create("test", 0);
  3659. ASSERT_GE(memfd, 0);
  3660. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3661. ASSERT_EQ(0, ret) {
  3662. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3663. }
  3664. /* Check that the basic notification machinery works */
  3665. listener = user_notif_syscall(__NR_getppid,
  3666. SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3667. ASSERT_GE(listener, 0);
  3668. pid = fork();
  3669. ASSERT_GE(pid, 0);
  3670. if (pid == 0)
  3671. exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
  3672. ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3673. ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0);
  3674. addfd.srcfd = memfd;
  3675. addfd.newfd_flags = O_CLOEXEC;
  3676. addfd.newfd = 0;
  3677. addfd.id = req.id;
  3678. addfd.flags = 0;
  3679. /* Should probably spot check /proc/sys/fs/file-nr */
  3680. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
  3681. EXPECT_EQ(errno, EMFILE);
  3682. addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
  3683. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
  3684. EXPECT_EQ(errno, EMFILE);
  3685. addfd.newfd = 100;
  3686. addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
  3687. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
  3688. EXPECT_EQ(errno, EBADF);
  3689. resp.id = req.id;
  3690. resp.error = 0;
  3691. resp.val = USER_NOTIF_MAGIC;
  3692. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
  3693. /* Wait for child to finish. */
  3694. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3695. EXPECT_EQ(true, WIFEXITED(status));
  3696. EXPECT_EQ(0, WEXITSTATUS(status));
  3697. close(memfd);
  3698. }
  3699. #ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
  3700. #define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
  3701. #define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
  3702. #endif
  3703. TEST(user_notification_sync)
  3704. {
  3705. struct seccomp_notif req = {};
  3706. struct seccomp_notif_resp resp = {};
  3707. int status, listener;
  3708. pid_t pid;
  3709. long ret;
  3710. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3711. ASSERT_EQ(0, ret) {
  3712. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3713. }
  3714. listener = user_notif_syscall(__NR_getppid,
  3715. SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3716. ASSERT_GE(listener, 0);
  3717. /* Try to set invalid flags. */
  3718. EXPECT_SYSCALL_RETURN(-EINVAL,
  3719. ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0));
  3720. ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
  3721. SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0);
  3722. pid = fork();
  3723. ASSERT_GE(pid, 0);
  3724. if (pid == 0) {
  3725. ret = syscall(__NR_getppid);
  3726. ASSERT_EQ(ret, USER_NOTIF_MAGIC) {
  3727. _exit(1);
  3728. }
  3729. _exit(0);
  3730. }
  3731. req.pid = 0;
  3732. ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3733. ASSERT_EQ(req.data.nr, __NR_getppid);
  3734. resp.id = req.id;
  3735. resp.error = 0;
  3736. resp.val = USER_NOTIF_MAGIC;
  3737. resp.flags = 0;
  3738. ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
  3739. ASSERT_EQ(waitpid(pid, &status, 0), pid);
  3740. ASSERT_EQ(status, 0);
  3741. }
  3742. /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
  3743. FIXTURE(O_SUSPEND_SECCOMP) {
  3744. pid_t pid;
  3745. };
  3746. FIXTURE_SETUP(O_SUSPEND_SECCOMP)
  3747. {
  3748. ERRNO_FILTER(block_read, E2BIG);
  3749. cap_value_t cap_list[] = { CAP_SYS_ADMIN };
  3750. cap_t caps;
  3751. self->pid = 0;
  3752. /* make sure we don't have CAP_SYS_ADMIN */
  3753. caps = cap_get_proc();
  3754. ASSERT_NE(NULL, caps);
  3755. ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR));
  3756. ASSERT_EQ(0, cap_set_proc(caps));
  3757. cap_free(caps);
  3758. ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
  3759. ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read));
  3760. self->pid = fork();
  3761. ASSERT_GE(self->pid, 0);
  3762. if (self->pid == 0) {
  3763. while (1)
  3764. pause();
  3765. _exit(127);
  3766. }
  3767. }
  3768. FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP)
  3769. {
  3770. if (self->pid)
  3771. kill(self->pid, SIGKILL);
  3772. }
  3773. TEST_F(O_SUSPEND_SECCOMP, setoptions)
  3774. {
  3775. int wstatus;
  3776. ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0));
  3777. ASSERT_EQ(self->pid, wait(&wstatus));
  3778. ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP));
  3779. if (errno == EINVAL)
  3780. SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
  3781. ASSERT_EQ(EPERM, errno);
  3782. }
  3783. TEST_F(O_SUSPEND_SECCOMP, seize)
  3784. {
  3785. int ret;
  3786. ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP);
  3787. ASSERT_EQ(-1, ret);
  3788. if (errno == EINVAL)
  3789. SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
  3790. ASSERT_EQ(EPERM, errno);
  3791. }
  3792. /*
  3793. * get_nth - Get the nth, space separated entry in a file.
  3794. *
  3795. * Returns the length of the read field.
  3796. * Throws error if field is zero-lengthed.
  3797. */
  3798. static ssize_t get_nth(struct __test_metadata *_metadata, const char *path,
  3799. const unsigned int position, char **entry)
  3800. {
  3801. char *line = NULL;
  3802. unsigned int i;
  3803. ssize_t nread;
  3804. size_t len = 0;
  3805. FILE *f;
  3806. f = fopen(path, "r");
  3807. ASSERT_NE(f, NULL) {
  3808. TH_LOG("Could not open %s: %s", path, strerror(errno));
  3809. }
  3810. for (i = 0; i < position; i++) {
  3811. nread = getdelim(&line, &len, ' ', f);
  3812. ASSERT_GE(nread, 0) {
  3813. TH_LOG("Failed to read %d entry in file %s", i, path);
  3814. }
  3815. }
  3816. fclose(f);
  3817. ASSERT_GT(nread, 0) {
  3818. TH_LOG("Entry in file %s had zero length", path);
  3819. }
  3820. *entry = line;
  3821. return nread - 1;
  3822. }
  3823. /* For a given PID, get the task state (D, R, etc...) */
  3824. static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid)
  3825. {
  3826. char proc_path[100] = {0};
  3827. char status;
  3828. char *line;
  3829. snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid);
  3830. ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1);
  3831. status = *line;
  3832. free(line);
  3833. return status;
  3834. }
  3835. TEST(user_notification_fifo)
  3836. {
  3837. struct seccomp_notif_resp resp = {};
  3838. struct seccomp_notif req = {};
  3839. int i, status, listener;
  3840. pid_t pid, pids[3];
  3841. __u64 baseid;
  3842. long ret;
  3843. /* 100 ms */
  3844. struct timespec delay = { .tv_nsec = 100000000 };
  3845. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3846. ASSERT_EQ(0, ret) {
  3847. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3848. }
  3849. /* Setup a listener */
  3850. listener = user_notif_syscall(__NR_getppid,
  3851. SECCOMP_FILTER_FLAG_NEW_LISTENER);
  3852. ASSERT_GE(listener, 0);
  3853. pid = fork();
  3854. ASSERT_GE(pid, 0);
  3855. if (pid == 0) {
  3856. ret = syscall(__NR_getppid);
  3857. exit(ret != USER_NOTIF_MAGIC);
  3858. }
  3859. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3860. baseid = req.id + 1;
  3861. resp.id = req.id;
  3862. resp.error = 0;
  3863. resp.val = USER_NOTIF_MAGIC;
  3864. /* check that we make sure flags == 0 */
  3865. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
  3866. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3867. EXPECT_EQ(true, WIFEXITED(status));
  3868. EXPECT_EQ(0, WEXITSTATUS(status));
  3869. /* Start children, and generate notifications */
  3870. for (i = 0; i < ARRAY_SIZE(pids); i++) {
  3871. pid = fork();
  3872. if (pid == 0) {
  3873. ret = syscall(__NR_getppid);
  3874. exit(ret != USER_NOTIF_MAGIC);
  3875. }
  3876. pids[i] = pid;
  3877. }
  3878. /* This spins until all of the children are sleeping */
  3879. restart_wait:
  3880. for (i = 0; i < ARRAY_SIZE(pids); i++) {
  3881. if (get_proc_stat(_metadata, pids[i]) != 'S') {
  3882. nanosleep(&delay, NULL);
  3883. goto restart_wait;
  3884. }
  3885. }
  3886. /* Read the notifications in order (and respond) */
  3887. for (i = 0; i < ARRAY_SIZE(pids); i++) {
  3888. memset(&req, 0, sizeof(req));
  3889. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  3890. EXPECT_EQ(req.id, baseid + i);
  3891. resp.id = req.id;
  3892. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
  3893. }
  3894. /* Make sure notifications were received */
  3895. for (i = 0; i < ARRAY_SIZE(pids); i++) {
  3896. EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]);
  3897. EXPECT_EQ(true, WIFEXITED(status));
  3898. EXPECT_EQ(0, WEXITSTATUS(status));
  3899. }
  3900. }
  3901. /* get_proc_syscall - Get the syscall in progress for a given pid
  3902. *
  3903. * Returns the current syscall number for a given process
  3904. * Returns -1 if not in syscall (running or blocked)
  3905. */
  3906. static long get_proc_syscall(struct __test_metadata *_metadata, int pid)
  3907. {
  3908. char proc_path[100] = {0};
  3909. long ret = -1;
  3910. ssize_t nread;
  3911. char *line;
  3912. snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid);
  3913. nread = get_nth(_metadata, proc_path, 1, &line);
  3914. ASSERT_GT(nread, 0);
  3915. if (!strncmp("running", line, MIN(7, nread)))
  3916. ret = strtol(line, NULL, 16);
  3917. free(line);
  3918. return ret;
  3919. }
  3920. /* Ensure non-fatal signals prior to receive are unmodified */
  3921. TEST(user_notification_wait_killable_pre_notification)
  3922. {
  3923. struct sigaction new_action = {
  3924. .sa_handler = signal_handler,
  3925. };
  3926. int listener, status, sk_pair[2];
  3927. pid_t pid;
  3928. long ret;
  3929. char c;
  3930. /* 100 ms */
  3931. struct timespec delay = { .tv_nsec = 100000000 };
  3932. ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
  3933. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3934. ASSERT_EQ(0, ret)
  3935. {
  3936. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3937. }
  3938. ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
  3939. listener = user_notif_syscall(
  3940. __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
  3941. SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
  3942. ASSERT_GE(listener, 0);
  3943. /*
  3944. * Check that we can kill the process with SIGUSR1 prior to receiving
  3945. * the notification. SIGUSR1 is wired up to a custom signal handler,
  3946. * and make sure it gets called.
  3947. */
  3948. pid = fork();
  3949. ASSERT_GE(pid, 0);
  3950. if (pid == 0) {
  3951. close(sk_pair[0]);
  3952. handled = sk_pair[1];
  3953. /* Setup the non-fatal sigaction without SA_RESTART */
  3954. if (sigaction(SIGUSR1, &new_action, NULL)) {
  3955. perror("sigaction");
  3956. exit(1);
  3957. }
  3958. ret = syscall(__NR_getppid);
  3959. /* Make sure we got a return from a signal interruption */
  3960. exit(ret != -1 || errno != EINTR);
  3961. }
  3962. /*
  3963. * Make sure we've gotten to the seccomp user notification wait
  3964. * from getppid prior to sending any signals
  3965. */
  3966. while (get_proc_syscall(_metadata, pid) != __NR_getppid &&
  3967. get_proc_stat(_metadata, pid) != 'S')
  3968. nanosleep(&delay, NULL);
  3969. /* Send non-fatal kill signal */
  3970. EXPECT_EQ(kill(pid, SIGUSR1), 0);
  3971. /* wait for process to exit (exit checks for EINTR) */
  3972. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  3973. EXPECT_EQ(true, WIFEXITED(status));
  3974. EXPECT_EQ(0, WEXITSTATUS(status));
  3975. EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
  3976. }
  3977. /* Ensure non-fatal signals after receive are blocked */
  3978. TEST(user_notification_wait_killable)
  3979. {
  3980. struct sigaction new_action = {
  3981. .sa_handler = signal_handler,
  3982. };
  3983. struct seccomp_notif_resp resp = {};
  3984. struct seccomp_notif req = {};
  3985. int listener, status, sk_pair[2];
  3986. pid_t pid;
  3987. long ret;
  3988. char c;
  3989. /* 100 ms */
  3990. struct timespec delay = { .tv_nsec = 100000000 };
  3991. ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
  3992. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  3993. ASSERT_EQ(0, ret)
  3994. {
  3995. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  3996. }
  3997. ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
  3998. listener = user_notif_syscall(
  3999. __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
  4000. SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
  4001. ASSERT_GE(listener, 0);
  4002. pid = fork();
  4003. ASSERT_GE(pid, 0);
  4004. if (pid == 0) {
  4005. close(sk_pair[0]);
  4006. handled = sk_pair[1];
  4007. /* Setup the sigaction without SA_RESTART */
  4008. if (sigaction(SIGUSR1, &new_action, NULL)) {
  4009. perror("sigaction");
  4010. exit(1);
  4011. }
  4012. /* Make sure that the syscall is completed (no EINTR) */
  4013. ret = syscall(__NR_getppid);
  4014. exit(ret != USER_NOTIF_MAGIC);
  4015. }
  4016. /*
  4017. * Get the notification, to make move the notifying process into a
  4018. * non-preemptible (TASK_KILLABLE) state.
  4019. */
  4020. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  4021. /* Send non-fatal kill signal */
  4022. EXPECT_EQ(kill(pid, SIGUSR1), 0);
  4023. /*
  4024. * Make sure the task enters moves to TASK_KILLABLE by waiting for
  4025. * D (Disk Sleep) state after receiving non-fatal signal.
  4026. */
  4027. while (get_proc_stat(_metadata, pid) != 'D')
  4028. nanosleep(&delay, NULL);
  4029. resp.id = req.id;
  4030. resp.val = USER_NOTIF_MAGIC;
  4031. /* Make sure the notification is found and able to be replied to */
  4032. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
  4033. /*
  4034. * Make sure that the signal handler does get called once we're back in
  4035. * userspace.
  4036. */
  4037. EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
  4038. /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */
  4039. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  4040. EXPECT_EQ(true, WIFEXITED(status));
  4041. EXPECT_EQ(0, WEXITSTATUS(status));
  4042. }
  4043. /* Ensure fatal signals after receive are not blocked */
  4044. TEST(user_notification_wait_killable_fatal)
  4045. {
  4046. struct seccomp_notif req = {};
  4047. int listener, status;
  4048. pid_t pid;
  4049. long ret;
  4050. /* 100 ms */
  4051. struct timespec delay = { .tv_nsec = 100000000 };
  4052. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  4053. ASSERT_EQ(0, ret)
  4054. {
  4055. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  4056. }
  4057. listener = user_notif_syscall(
  4058. __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
  4059. SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
  4060. ASSERT_GE(listener, 0);
  4061. pid = fork();
  4062. ASSERT_GE(pid, 0);
  4063. if (pid == 0) {
  4064. /* This should never complete as it should get a SIGTERM */
  4065. syscall(__NR_getppid);
  4066. exit(1);
  4067. }
  4068. while (get_proc_stat(_metadata, pid) != 'S')
  4069. nanosleep(&delay, NULL);
  4070. /*
  4071. * Get the notification, to make move the notifying process into a
  4072. * non-preemptible (TASK_KILLABLE) state.
  4073. */
  4074. EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
  4075. /* Kill the process with a fatal signal */
  4076. EXPECT_EQ(kill(pid, SIGTERM), 0);
  4077. /*
  4078. * Wait for the process to exit, and make sure the process terminated
  4079. * due to the SIGTERM signal.
  4080. */
  4081. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  4082. EXPECT_EQ(true, WIFSIGNALED(status));
  4083. EXPECT_EQ(SIGTERM, WTERMSIG(status));
  4084. }
  4085. /* Ensure signals after the reply do not interrupt */
  4086. TEST(user_notification_wait_killable_after_reply)
  4087. {
  4088. int i, max_iter = 100000;
  4089. int listener, status;
  4090. int pipe_fds[2];
  4091. pid_t pid;
  4092. long ret;
  4093. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  4094. ASSERT_EQ(0, ret)
  4095. {
  4096. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  4097. }
  4098. listener = user_notif_syscall(
  4099. __NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER |
  4100. SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
  4101. ASSERT_GE(listener, 0);
  4102. /*
  4103. * Used to count invocations. One token is transferred from the child
  4104. * to the parent per syscall invocation, the parent tries to take
  4105. * one token per successful RECV. If the syscall is restarted after
  4106. * RECV the parent will try to get two tokens while the child only
  4107. * provided one.
  4108. */
  4109. ASSERT_EQ(pipe(pipe_fds), 0);
  4110. pid = fork();
  4111. ASSERT_GE(pid, 0);
  4112. if (pid == 0) {
  4113. struct sigaction new_action = {
  4114. .sa_handler = signal_handler_nop,
  4115. .sa_flags = SA_RESTART,
  4116. };
  4117. struct itimerval timer = {
  4118. .it_value = { .tv_usec = 1000 },
  4119. .it_interval = { .tv_usec = 1000 },
  4120. };
  4121. char c = 'a';
  4122. close(pipe_fds[0]);
  4123. /* Setup the sigaction with SA_RESTART */
  4124. if (sigaction(SIGALRM, &new_action, NULL)) {
  4125. perror("sigaction");
  4126. exit(1);
  4127. }
  4128. /*
  4129. * Kill with SIGALRM repeatedly, to try to hit the race when
  4130. * handling the syscall.
  4131. */
  4132. if (setitimer(ITIMER_REAL, &timer, NULL) < 0)
  4133. perror("setitimer");
  4134. for (i = 0; i < max_iter; ++i) {
  4135. int fd;
  4136. /* Send one token per iteration to catch repeats. */
  4137. if (write(pipe_fds[1], &c, sizeof(c)) != 1) {
  4138. perror("write");
  4139. exit(1);
  4140. }
  4141. fd = syscall(__NR_dup, 0);
  4142. if (fd < 0) {
  4143. perror("dup");
  4144. exit(1);
  4145. }
  4146. close(fd);
  4147. }
  4148. exit(0);
  4149. }
  4150. close(pipe_fds[1]);
  4151. for (i = 0; i < max_iter; ++i) {
  4152. struct seccomp_notif req = {};
  4153. struct seccomp_notif_addfd addfd = {};
  4154. struct pollfd pfd = {
  4155. .fd = pipe_fds[0],
  4156. .events = POLLIN,
  4157. };
  4158. char c;
  4159. /*
  4160. * Try to receive one token. If it failed, one child syscall
  4161. * was restarted after RECV and needed to be handled twice.
  4162. */
  4163. ASSERT_EQ(poll(&pfd, 1, 1000), 1)
  4164. kill(pid, SIGKILL);
  4165. ASSERT_EQ(read(pipe_fds[0], &c, sizeof(c)), 1)
  4166. kill(pid, SIGKILL);
  4167. /*
  4168. * Get the notification, reply to it as fast as possible to test
  4169. * whether the child wrongly skips going into the non-preemptible
  4170. * (TASK_KILLABLE) state.
  4171. */
  4172. do
  4173. ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
  4174. while (ret < 0 && errno == ENOENT); /* Accept interruptions before RECV */
  4175. ASSERT_EQ(ret, 0)
  4176. kill(pid, SIGKILL);
  4177. addfd.id = req.id;
  4178. addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
  4179. addfd.srcfd = 0;
  4180. ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0)
  4181. kill(pid, SIGKILL);
  4182. }
  4183. /*
  4184. * Wait for the process to exit, and make sure the process terminated
  4185. * with a zero exit code..
  4186. */
  4187. EXPECT_EQ(waitpid(pid, &status, 0), pid);
  4188. EXPECT_EQ(true, WIFEXITED(status));
  4189. EXPECT_EQ(0, WEXITSTATUS(status));
  4190. }
  4191. struct tsync_vs_thread_leader_args {
  4192. pthread_t leader;
  4193. };
  4194. static void *tsync_vs_dead_thread_leader_sibling(void *_args)
  4195. {
  4196. struct sock_filter allow_filter[] = {
  4197. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  4198. };
  4199. struct sock_fprog allow_prog = {
  4200. .len = (unsigned short)ARRAY_SIZE(allow_filter),
  4201. .filter = allow_filter,
  4202. };
  4203. struct tsync_vs_thread_leader_args *args = _args;
  4204. void *retval;
  4205. long ret;
  4206. ret = pthread_join(args->leader, &retval);
  4207. if (ret)
  4208. exit(1);
  4209. if (retval != _args)
  4210. exit(2);
  4211. ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog);
  4212. if (ret)
  4213. exit(3);
  4214. exit(0);
  4215. }
  4216. /*
  4217. * Ensure that a dead thread leader doesn't prevent installing new filters with
  4218. * SECCOMP_FILTER_FLAG_TSYNC from other threads.
  4219. */
  4220. TEST(tsync_vs_dead_thread_leader)
  4221. {
  4222. int status;
  4223. pid_t pid;
  4224. long ret;
  4225. ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
  4226. ASSERT_EQ(0, ret) {
  4227. TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
  4228. }
  4229. pid = fork();
  4230. ASSERT_GE(pid, 0);
  4231. if (pid == 0) {
  4232. struct sock_filter allow_filter[] = {
  4233. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  4234. };
  4235. struct sock_fprog allow_prog = {
  4236. .len = (unsigned short)ARRAY_SIZE(allow_filter),
  4237. .filter = allow_filter,
  4238. };
  4239. struct tsync_vs_thread_leader_args *args;
  4240. pthread_t sibling;
  4241. args = malloc(sizeof(*args));
  4242. ASSERT_NE(NULL, args);
  4243. args->leader = pthread_self();
  4244. ret = pthread_create(&sibling, NULL,
  4245. tsync_vs_dead_thread_leader_sibling, args);
  4246. ASSERT_EQ(0, ret);
  4247. /* Install a new filter just to the leader thread. */
  4248. ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
  4249. ASSERT_EQ(0, ret);
  4250. pthread_exit(args);
  4251. exit(1);
  4252. }
  4253. EXPECT_EQ(pid, waitpid(pid, &status, 0));
  4254. EXPECT_EQ(0, status);
  4255. }
  4256. #ifdef __x86_64__
  4257. /*
  4258. * We need naked probed_uprobe function. Using __nocf_check
  4259. * check to skip possible endbr64 instruction and ignoring
  4260. * -Wattributes, otherwise the compilation might fail.
  4261. */
  4262. #pragma GCC diagnostic push
  4263. #pragma GCC diagnostic ignored "-Wattributes"
  4264. __naked __nocf_check noinline int probed_uprobe(void)
  4265. {
  4266. /*
  4267. * Optimized uprobe is possible only on top of nop5 instruction.
  4268. */
  4269. asm volatile (" \n"
  4270. ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n"
  4271. "ret \n"
  4272. );
  4273. }
  4274. #pragma GCC diagnostic pop
  4275. #else
  4276. noinline int probed_uprobe(void)
  4277. {
  4278. return 1;
  4279. }
  4280. #endif
  4281. noinline int probed_uretprobe(void)
  4282. {
  4283. return 1;
  4284. }
  4285. static int parse_uint_from_file(const char *file, const char *fmt)
  4286. {
  4287. int err = -1, ret;
  4288. FILE *f;
  4289. f = fopen(file, "re");
  4290. if (f) {
  4291. err = fscanf(f, fmt, &ret);
  4292. fclose(f);
  4293. }
  4294. return err == 1 ? ret : err;
  4295. }
  4296. static int determine_uprobe_perf_type(void)
  4297. {
  4298. const char *file = "/sys/bus/event_source/devices/uprobe/type";
  4299. return parse_uint_from_file(file, "%d\n");
  4300. }
  4301. static int determine_uprobe_retprobe_bit(void)
  4302. {
  4303. const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
  4304. return parse_uint_from_file(file, "config:%d\n");
  4305. }
  4306. static ssize_t get_uprobe_offset(const void *addr)
  4307. {
  4308. size_t start, base, end;
  4309. bool found = false;
  4310. char buf[256];
  4311. FILE *f;
  4312. f = fopen("/proc/self/maps", "r");
  4313. if (!f)
  4314. return -1;
  4315. while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
  4316. if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) {
  4317. found = true;
  4318. break;
  4319. }
  4320. }
  4321. fclose(f);
  4322. return found ? (uintptr_t)addr - start + base : -1;
  4323. }
  4324. FIXTURE(UPROBE) {
  4325. int fd;
  4326. };
  4327. FIXTURE_VARIANT(UPROBE) {
  4328. /*
  4329. * All of the U(RET)PROBE behaviors can be tested with either
  4330. * u(ret)probe attached or not
  4331. */
  4332. bool attach;
  4333. /*
  4334. * Test both uprobe and uretprobe.
  4335. */
  4336. bool uretprobe;
  4337. };
  4338. FIXTURE_VARIANT_ADD(UPROBE, not_attached) {
  4339. .attach = false,
  4340. .uretprobe = false,
  4341. };
  4342. FIXTURE_VARIANT_ADD(UPROBE, uprobe_attached) {
  4343. .attach = true,
  4344. .uretprobe = false,
  4345. };
  4346. FIXTURE_VARIANT_ADD(UPROBE, uretprobe_attached) {
  4347. .attach = true,
  4348. .uretprobe = true,
  4349. };
  4350. FIXTURE_SETUP(UPROBE)
  4351. {
  4352. const size_t attr_sz = sizeof(struct perf_event_attr);
  4353. struct perf_event_attr attr;
  4354. ssize_t offset;
  4355. int type, bit;
  4356. #if !defined(__NR_uprobe) || !defined(__NR_uretprobe)
  4357. SKIP(return, "__NR_uprobe ot __NR_uretprobe syscalls not defined");
  4358. #endif
  4359. if (!variant->attach)
  4360. return;
  4361. memset(&attr, 0, attr_sz);
  4362. type = determine_uprobe_perf_type();
  4363. ASSERT_GE(type, 0);
  4364. if (variant->uretprobe) {
  4365. bit = determine_uprobe_retprobe_bit();
  4366. ASSERT_GE(bit, 0);
  4367. }
  4368. offset = get_uprobe_offset(variant->uretprobe ? probed_uretprobe : probed_uprobe);
  4369. ASSERT_GE(offset, 0);
  4370. if (variant->uretprobe)
  4371. attr.config |= 1 << bit;
  4372. attr.size = attr_sz;
  4373. attr.type = type;
  4374. attr.config1 = ptr_to_u64("/proc/self/exe");
  4375. attr.config2 = offset;
  4376. self->fd = syscall(__NR_perf_event_open, &attr,
  4377. getpid() /* pid */, -1 /* cpu */, -1 /* group_fd */,
  4378. PERF_FLAG_FD_CLOEXEC);
  4379. }
  4380. FIXTURE_TEARDOWN(UPROBE)
  4381. {
  4382. /* we could call close(self->fd), but we'd need extra filter for
  4383. * that and since we are calling _exit right away..
  4384. */
  4385. }
  4386. static int run_probed_with_filter(struct sock_fprog *prog)
  4387. {
  4388. if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
  4389. seccomp(SECCOMP_SET_MODE_FILTER, 0, prog)) {
  4390. return -1;
  4391. }
  4392. /*
  4393. * Uprobe is optimized after first hit, so let's hit twice.
  4394. */
  4395. probed_uprobe();
  4396. probed_uprobe();
  4397. probed_uretprobe();
  4398. return 0;
  4399. }
  4400. TEST_F(UPROBE, uprobe_default_allow)
  4401. {
  4402. struct sock_filter filter[] = {
  4403. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  4404. };
  4405. struct sock_fprog prog = {
  4406. .len = (unsigned short)ARRAY_SIZE(filter),
  4407. .filter = filter,
  4408. };
  4409. ASSERT_EQ(0, run_probed_with_filter(&prog));
  4410. }
  4411. TEST_F(UPROBE, uprobe_default_block)
  4412. {
  4413. struct sock_filter filter[] = {
  4414. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  4415. offsetof(struct seccomp_data, nr)),
  4416. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0),
  4417. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
  4418. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  4419. };
  4420. struct sock_fprog prog = {
  4421. .len = (unsigned short)ARRAY_SIZE(filter),
  4422. .filter = filter,
  4423. };
  4424. ASSERT_EQ(0, run_probed_with_filter(&prog));
  4425. }
  4426. TEST_F(UPROBE, uprobe_block_syscall)
  4427. {
  4428. struct sock_filter filter[] = {
  4429. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  4430. offsetof(struct seccomp_data, nr)),
  4431. #ifdef __NR_uprobe
  4432. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 1, 2),
  4433. #endif
  4434. #ifdef __NR_uretprobe
  4435. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1),
  4436. #endif
  4437. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
  4438. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  4439. };
  4440. struct sock_fprog prog = {
  4441. .len = (unsigned short)ARRAY_SIZE(filter),
  4442. .filter = filter,
  4443. };
  4444. ASSERT_EQ(0, run_probed_with_filter(&prog));
  4445. }
  4446. TEST_F(UPROBE, uprobe_default_block_with_syscall)
  4447. {
  4448. struct sock_filter filter[] = {
  4449. BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
  4450. offsetof(struct seccomp_data, nr)),
  4451. #ifdef __NR_uprobe
  4452. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 3, 0),
  4453. #endif
  4454. #ifdef __NR_uretprobe
  4455. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0),
  4456. #endif
  4457. BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0),
  4458. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
  4459. BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
  4460. };
  4461. struct sock_fprog prog = {
  4462. .len = (unsigned short)ARRAY_SIZE(filter),
  4463. .filter = filter,
  4464. };
  4465. ASSERT_EQ(0, run_probed_with_filter(&prog));
  4466. }
  4467. /*
  4468. * TODO:
  4469. * - expand NNP testing
  4470. * - better arch-specific TRACE and TRAP handlers.
  4471. * - endianness checking when appropriate
  4472. * - 64-bit arg prodding
  4473. * - arch value testing (x86 modes especially)
  4474. * - verify that FILTER_FLAG_LOG filters generate log messages
  4475. * - verify that RET_LOG generates log messages
  4476. */
  4477. TEST_HARNESS_MAIN