af_unix.c 91 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * NET4: Implementation of BSD Unix domain sockets.
  4. *
  5. * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
  6. *
  7. * Fixes:
  8. * Linus Torvalds : Assorted bug cures.
  9. * Niibe Yutaka : async I/O support.
  10. * Carsten Paeth : PF_UNIX check, address fixes.
  11. * Alan Cox : Limit size of allocated blocks.
  12. * Alan Cox : Fixed the stupid socketpair bug.
  13. * Alan Cox : BSD compatibility fine tuning.
  14. * Alan Cox : Fixed a bug in connect when interrupted.
  15. * Alan Cox : Sorted out a proper draft version of
  16. * file descriptor passing hacked up from
  17. * Mike Shaver's work.
  18. * Marty Leisner : Fixes to fd passing
  19. * Nick Nevin : recvmsg bugfix.
  20. * Alan Cox : Started proper garbage collector
  21. * Heiko EiBfeldt : Missing verify_area check
  22. * Alan Cox : Started POSIXisms
  23. * Andreas Schwab : Replace inode by dentry for proper
  24. * reference counting
  25. * Kirk Petersen : Made this a module
  26. * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
  27. * Lots of bug fixes.
  28. * Alexey Kuznetosv : Repaired (I hope) bugs introduces
  29. * by above two patches.
  30. * Andrea Arcangeli : If possible we block in connect(2)
  31. * if the max backlog of the listen socket
  32. * is been reached. This won't break
  33. * old apps and it will avoid huge amount
  34. * of socks hashed (this for unix_gc()
  35. * performances reasons).
  36. * Security fix that limits the max
  37. * number of socks to 2*max_files and
  38. * the number of skb queueable in the
  39. * dgram receiver.
  40. * Artur Skawina : Hash function optimizations
  41. * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
  42. * Malcolm Beattie : Set peercred for socketpair
  43. * Michal Ostrowski : Module initialization cleanup.
  44. * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
  45. * the core infrastructure is doing that
  46. * for all net proto families now (2.5.69+)
  47. *
  48. * Known differences from reference BSD that was tested:
  49. *
  50. * [TO FIX]
  51. * ECONNREFUSED is not returned from one end of a connected() socket to the
  52. * other the moment one end closes.
  53. * fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54. * and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55. * [NOT TO FIX]
  56. * accept() returns a path name even if the connecting socket has closed
  57. * in the meantime (BSD loses the path and gives up).
  58. * accept() returns 0 length path for an unbound connector. BSD returns 16
  59. * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60. * socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61. * BSD af_unix apparently has connect forgetting to block properly.
  62. * (need to check this with the POSIX spec in detail)
  63. *
  64. * Differences from 2.0.0-11-... (ANK)
  65. * Bug fixes and improvements.
  66. * - client shutdown killed server socket.
  67. * - removed all useless cli/sti pairs.
  68. *
  69. * Semantic changes/extensions.
  70. * - generic control message passing.
  71. * - SCM_CREDENTIALS control message.
  72. * - "Abstract" (not FS based) socket bindings.
  73. * Abstract names are sequences of bytes (not zero terminated)
  74. * started by 0, so that this name space does not intersect
  75. * with BSD names.
  76. */
  77. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  78. #include <linux/bpf-cgroup.h>
  79. #include <linux/btf_ids.h>
  80. #include <linux/dcache.h>
  81. #include <linux/errno.h>
  82. #include <linux/fcntl.h>
  83. #include <linux/file.h>
  84. #include <linux/filter.h>
  85. #include <linux/fs.h>
  86. #include <linux/fs_struct.h>
  87. #include <linux/init.h>
  88. #include <linux/kernel.h>
  89. #include <linux/mount.h>
  90. #include <linux/namei.h>
  91. #include <linux/net.h>
  92. #include <linux/pidfs.h>
  93. #include <linux/poll.h>
  94. #include <linux/proc_fs.h>
  95. #include <linux/sched/signal.h>
  96. #include <linux/security.h>
  97. #include <linux/seq_file.h>
  98. #include <linux/skbuff.h>
  99. #include <linux/slab.h>
  100. #include <linux/socket.h>
  101. #include <linux/splice.h>
  102. #include <linux/string.h>
  103. #include <linux/uaccess.h>
  104. #include <net/af_unix.h>
  105. #include <net/net_namespace.h>
  106. #include <net/scm.h>
  107. #include <net/tcp_states.h>
  108. #include <uapi/linux/sockios.h>
  109. #include <uapi/linux/termios.h>
  110. #include "af_unix.h"
  111. static atomic_long_t unix_nr_socks;
  112. static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
  113. static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
  114. /* SMP locking strategy:
  115. * hash table is protected with spinlock.
  116. * each socket state is protected by separate spinlock.
  117. */
  118. #ifdef CONFIG_PROVE_LOCKING
  119. #define cmp_ptr(l, r) (((l) > (r)) - ((l) < (r)))
  120. static int unix_table_lock_cmp_fn(const struct lockdep_map *a,
  121. const struct lockdep_map *b)
  122. {
  123. return cmp_ptr(a, b);
  124. }
  125. static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
  126. const struct lockdep_map *_b)
  127. {
  128. const struct unix_sock *a, *b;
  129. a = container_of(_a, struct unix_sock, lock.dep_map);
  130. b = container_of(_b, struct unix_sock, lock.dep_map);
  131. if (a->sk.sk_state == TCP_LISTEN) {
  132. /* unix_stream_connect(): Before the 2nd unix_state_lock(),
  133. *
  134. * 1. a is TCP_LISTEN.
  135. * 2. b is not a.
  136. * 3. concurrent connect(b -> a) must fail.
  137. *
  138. * Except for 2. & 3., the b's state can be any possible
  139. * value due to concurrent connect() or listen().
  140. *
  141. * 2. is detected in debug_spin_lock_before(), and 3. cannot
  142. * be expressed as lock_cmp_fn.
  143. */
  144. switch (b->sk.sk_state) {
  145. case TCP_CLOSE:
  146. case TCP_ESTABLISHED:
  147. case TCP_LISTEN:
  148. return -1;
  149. default:
  150. /* Invalid case. */
  151. return 0;
  152. }
  153. }
  154. /* Should never happen. Just to be symmetric. */
  155. if (b->sk.sk_state == TCP_LISTEN) {
  156. switch (b->sk.sk_state) {
  157. case TCP_CLOSE:
  158. case TCP_ESTABLISHED:
  159. return 1;
  160. default:
  161. return 0;
  162. }
  163. }
  164. /* unix_state_double_lock(): ascending address order. */
  165. return cmp_ptr(a, b);
  166. }
  167. static int unix_recvq_lock_cmp_fn(const struct lockdep_map *_a,
  168. const struct lockdep_map *_b)
  169. {
  170. const struct sock *a, *b;
  171. a = container_of(_a, struct sock, sk_receive_queue.lock.dep_map);
  172. b = container_of(_b, struct sock, sk_receive_queue.lock.dep_map);
  173. /* unix_collect_skb(): listener -> embryo order. */
  174. if (a->sk_state == TCP_LISTEN && unix_sk(b)->listener == a)
  175. return -1;
  176. /* Should never happen. Just to be symmetric. */
  177. if (b->sk_state == TCP_LISTEN && unix_sk(a)->listener == b)
  178. return 1;
  179. return 0;
  180. }
  181. #endif
  182. static unsigned int unix_unbound_hash(struct sock *sk)
  183. {
  184. unsigned long hash = (unsigned long)sk;
  185. hash ^= hash >> 16;
  186. hash ^= hash >> 8;
  187. hash ^= sk->sk_type;
  188. return hash & UNIX_HASH_MOD;
  189. }
  190. static unsigned int unix_bsd_hash(struct inode *i)
  191. {
  192. return i->i_ino & UNIX_HASH_MOD;
  193. }
  194. static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
  195. int addr_len, int type)
  196. {
  197. __wsum csum = csum_partial(sunaddr, addr_len, 0);
  198. unsigned int hash;
  199. hash = (__force unsigned int)csum_fold(csum);
  200. hash ^= hash >> 8;
  201. hash ^= type;
  202. return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
  203. }
  204. static void unix_table_double_lock(struct net *net,
  205. unsigned int hash1, unsigned int hash2)
  206. {
  207. if (hash1 == hash2) {
  208. spin_lock(&net->unx.table.locks[hash1]);
  209. return;
  210. }
  211. if (hash1 > hash2)
  212. swap(hash1, hash2);
  213. spin_lock(&net->unx.table.locks[hash1]);
  214. spin_lock(&net->unx.table.locks[hash2]);
  215. }
  216. static void unix_table_double_unlock(struct net *net,
  217. unsigned int hash1, unsigned int hash2)
  218. {
  219. if (hash1 == hash2) {
  220. spin_unlock(&net->unx.table.locks[hash1]);
  221. return;
  222. }
  223. spin_unlock(&net->unx.table.locks[hash1]);
  224. spin_unlock(&net->unx.table.locks[hash2]);
  225. }
  226. #ifdef CONFIG_SECURITY_NETWORK
  227. static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
  228. {
  229. UNIXCB(skb).secid = scm->secid;
  230. }
  231. static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
  232. {
  233. scm->secid = UNIXCB(skb).secid;
  234. }
  235. static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
  236. {
  237. return (scm->secid == UNIXCB(skb).secid);
  238. }
  239. #else
  240. static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
  241. { }
  242. static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
  243. { }
  244. static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
  245. {
  246. return true;
  247. }
  248. #endif /* CONFIG_SECURITY_NETWORK */
  249. static inline int unix_may_send(struct sock *sk, struct sock *osk)
  250. {
  251. return !unix_peer(osk) || unix_peer(osk) == sk;
  252. }
  253. static inline int unix_recvq_full_lockless(const struct sock *sk)
  254. {
  255. return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
  256. }
  257. struct sock *unix_peer_get(struct sock *s)
  258. {
  259. struct sock *peer;
  260. unix_state_lock(s);
  261. peer = unix_peer(s);
  262. if (peer)
  263. sock_hold(peer);
  264. unix_state_unlock(s);
  265. return peer;
  266. }
  267. EXPORT_SYMBOL_GPL(unix_peer_get);
  268. static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
  269. int addr_len)
  270. {
  271. struct unix_address *addr;
  272. addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
  273. if (!addr)
  274. return NULL;
  275. refcount_set(&addr->refcnt, 1);
  276. addr->len = addr_len;
  277. memcpy(addr->name, sunaddr, addr_len);
  278. return addr;
  279. }
  280. static inline void unix_release_addr(struct unix_address *addr)
  281. {
  282. if (refcount_dec_and_test(&addr->refcnt))
  283. kfree(addr);
  284. }
  285. /*
  286. * Check unix socket name:
  287. * - should be not zero length.
  288. * - if started by not zero, should be NULL terminated (FS object)
  289. * - if started by zero, it is abstract name.
  290. */
  291. static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
  292. {
  293. if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
  294. addr_len > sizeof(*sunaddr))
  295. return -EINVAL;
  296. if (sunaddr->sun_family != AF_UNIX)
  297. return -EINVAL;
  298. return 0;
  299. }
  300. static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
  301. {
  302. struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
  303. short offset = offsetof(struct sockaddr_storage, __data);
  304. BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
  305. /* This may look like an off by one error but it is a bit more
  306. * subtle. 108 is the longest valid AF_UNIX path for a binding.
  307. * sun_path[108] doesn't as such exist. However in kernel space
  308. * we are guaranteed that it is a valid memory location in our
  309. * kernel address buffer because syscall functions always pass
  310. * a pointer of struct sockaddr_storage which has a bigger buffer
  311. * than 108. Also, we must terminate sun_path for strlen() in
  312. * getname_kernel().
  313. */
  314. addr->__data[addr_len - offset] = 0;
  315. /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will
  316. * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen()
  317. * know the actual buffer.
  318. */
  319. return strlen(addr->__data) + offset + 1;
  320. }
  321. static void __unix_remove_socket(struct sock *sk)
  322. {
  323. sk_del_node_init(sk);
  324. }
  325. static void __unix_insert_socket(struct net *net, struct sock *sk)
  326. {
  327. DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
  328. sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
  329. }
  330. static void __unix_set_addr_hash(struct net *net, struct sock *sk,
  331. struct unix_address *addr, unsigned int hash)
  332. {
  333. __unix_remove_socket(sk);
  334. smp_store_release(&unix_sk(sk)->addr, addr);
  335. sk->sk_hash = hash;
  336. __unix_insert_socket(net, sk);
  337. }
  338. static void unix_remove_socket(struct net *net, struct sock *sk)
  339. {
  340. spin_lock(&net->unx.table.locks[sk->sk_hash]);
  341. __unix_remove_socket(sk);
  342. spin_unlock(&net->unx.table.locks[sk->sk_hash]);
  343. }
  344. static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
  345. {
  346. spin_lock(&net->unx.table.locks[sk->sk_hash]);
  347. __unix_insert_socket(net, sk);
  348. spin_unlock(&net->unx.table.locks[sk->sk_hash]);
  349. }
  350. static void unix_insert_bsd_socket(struct sock *sk)
  351. {
  352. spin_lock(&bsd_socket_locks[sk->sk_hash]);
  353. sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
  354. spin_unlock(&bsd_socket_locks[sk->sk_hash]);
  355. }
  356. static void unix_remove_bsd_socket(struct sock *sk)
  357. {
  358. if (!hlist_unhashed(&sk->sk_bind_node)) {
  359. spin_lock(&bsd_socket_locks[sk->sk_hash]);
  360. __sk_del_bind_node(sk);
  361. spin_unlock(&bsd_socket_locks[sk->sk_hash]);
  362. sk_node_init(&sk->sk_bind_node);
  363. }
  364. }
  365. static struct sock *__unix_find_socket_byname(struct net *net,
  366. struct sockaddr_un *sunname,
  367. int len, unsigned int hash)
  368. {
  369. struct sock *s;
  370. sk_for_each(s, &net->unx.table.buckets[hash]) {
  371. struct unix_sock *u = unix_sk(s);
  372. if (u->addr->len == len &&
  373. !memcmp(u->addr->name, sunname, len))
  374. return s;
  375. }
  376. return NULL;
  377. }
  378. static inline struct sock *unix_find_socket_byname(struct net *net,
  379. struct sockaddr_un *sunname,
  380. int len, unsigned int hash)
  381. {
  382. struct sock *s;
  383. spin_lock(&net->unx.table.locks[hash]);
  384. s = __unix_find_socket_byname(net, sunname, len, hash);
  385. if (s)
  386. sock_hold(s);
  387. spin_unlock(&net->unx.table.locks[hash]);
  388. return s;
  389. }
  390. static struct sock *unix_find_socket_byinode(struct inode *i)
  391. {
  392. unsigned int hash = unix_bsd_hash(i);
  393. struct sock *s;
  394. spin_lock(&bsd_socket_locks[hash]);
  395. sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
  396. struct dentry *dentry = unix_sk(s)->path.dentry;
  397. if (dentry && d_backing_inode(dentry) == i) {
  398. sock_hold(s);
  399. spin_unlock(&bsd_socket_locks[hash]);
  400. return s;
  401. }
  402. }
  403. spin_unlock(&bsd_socket_locks[hash]);
  404. return NULL;
  405. }
  406. /* Support code for asymmetrically connected dgram sockets
  407. *
  408. * If a datagram socket is connected to a socket not itself connected
  409. * to the first socket (eg, /dev/log), clients may only enqueue more
  410. * messages if the present receive queue of the server socket is not
  411. * "too large". This means there's a second writeability condition
  412. * poll and sendmsg need to test. The dgram recv code will do a wake
  413. * up on the peer_wait wait queue of a socket upon reception of a
  414. * datagram which needs to be propagated to sleeping would-be writers
  415. * since these might not have sent anything so far. This can't be
  416. * accomplished via poll_wait because the lifetime of the server
  417. * socket might be less than that of its clients if these break their
  418. * association with it or if the server socket is closed while clients
  419. * are still connected to it and there's no way to inform "a polling
  420. * implementation" that it should let go of a certain wait queue
  421. *
  422. * In order to propagate a wake up, a wait_queue_entry_t of the client
  423. * socket is enqueued on the peer_wait queue of the server socket
  424. * whose wake function does a wake_up on the ordinary client socket
  425. * wait queue. This connection is established whenever a write (or
  426. * poll for write) hit the flow control condition and broken when the
  427. * association to the server socket is dissolved or after a wake up
  428. * was relayed.
  429. */
  430. static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
  431. void *key)
  432. {
  433. struct unix_sock *u;
  434. wait_queue_head_t *u_sleep;
  435. u = container_of(q, struct unix_sock, peer_wake);
  436. __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
  437. q);
  438. u->peer_wake.private = NULL;
  439. /* relaying can only happen while the wq still exists */
  440. u_sleep = sk_sleep(&u->sk);
  441. if (u_sleep)
  442. wake_up_interruptible_poll(u_sleep, key_to_poll(key));
  443. return 0;
  444. }
  445. static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
  446. {
  447. struct unix_sock *u, *u_other;
  448. int rc;
  449. u = unix_sk(sk);
  450. u_other = unix_sk(other);
  451. rc = 0;
  452. spin_lock(&u_other->peer_wait.lock);
  453. if (!u->peer_wake.private) {
  454. u->peer_wake.private = other;
  455. __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
  456. rc = 1;
  457. }
  458. spin_unlock(&u_other->peer_wait.lock);
  459. return rc;
  460. }
  461. static void unix_dgram_peer_wake_disconnect(struct sock *sk,
  462. struct sock *other)
  463. {
  464. struct unix_sock *u, *u_other;
  465. u = unix_sk(sk);
  466. u_other = unix_sk(other);
  467. spin_lock(&u_other->peer_wait.lock);
  468. if (u->peer_wake.private == other) {
  469. __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
  470. u->peer_wake.private = NULL;
  471. }
  472. spin_unlock(&u_other->peer_wait.lock);
  473. }
  474. static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
  475. struct sock *other)
  476. {
  477. unix_dgram_peer_wake_disconnect(sk, other);
  478. wake_up_interruptible_poll(sk_sleep(sk),
  479. EPOLLOUT |
  480. EPOLLWRNORM |
  481. EPOLLWRBAND);
  482. }
  483. /* preconditions:
  484. * - unix_peer(sk) == other
  485. * - association is stable
  486. */
  487. static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
  488. {
  489. int connected;
  490. connected = unix_dgram_peer_wake_connect(sk, other);
  491. /* If other is SOCK_DEAD, we want to make sure we signal
  492. * POLLOUT, such that a subsequent write() can get a
  493. * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
  494. * to other and its full, we will hang waiting for POLLOUT.
  495. */
  496. if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
  497. return 1;
  498. if (connected)
  499. unix_dgram_peer_wake_disconnect(sk, other);
  500. return 0;
  501. }
  502. static int unix_writable(const struct sock *sk, unsigned char state)
  503. {
  504. return state != TCP_LISTEN &&
  505. (refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
  506. }
  507. static void unix_write_space(struct sock *sk)
  508. {
  509. struct socket_wq *wq;
  510. rcu_read_lock();
  511. if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
  512. wq = rcu_dereference(sk->sk_wq);
  513. if (skwq_has_sleeper(wq))
  514. wake_up_interruptible_sync_poll(&wq->wait,
  515. EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
  516. sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
  517. }
  518. rcu_read_unlock();
  519. }
  520. /* When dgram socket disconnects (or changes its peer), we clear its receive
  521. * queue of packets arrived from previous peer. First, it allows to do
  522. * flow control based only on wmem_alloc; second, sk connected to peer
  523. * may receive messages only from that peer. */
  524. static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
  525. {
  526. if (!skb_queue_empty(&sk->sk_receive_queue)) {
  527. skb_queue_purge_reason(&sk->sk_receive_queue,
  528. SKB_DROP_REASON_UNIX_DISCONNECT);
  529. wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
  530. /* If one link of bidirectional dgram pipe is disconnected,
  531. * we signal error. Messages are lost. Do not make this,
  532. * when peer was not connected to us.
  533. */
  534. if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
  535. WRITE_ONCE(other->sk_err, ECONNRESET);
  536. sk_error_report(other);
  537. }
  538. }
  539. }
  540. static void unix_sock_destructor(struct sock *sk)
  541. {
  542. struct unix_sock *u = unix_sk(sk);
  543. skb_queue_purge_reason(&sk->sk_receive_queue, SKB_DROP_REASON_SOCKET_CLOSE);
  544. DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
  545. DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
  546. DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
  547. if (!sock_flag(sk, SOCK_DEAD)) {
  548. pr_info("Attempt to release alive unix socket: %p\n", sk);
  549. return;
  550. }
  551. if (u->addr)
  552. unix_release_addr(u->addr);
  553. atomic_long_dec(&unix_nr_socks);
  554. sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
  555. #ifdef UNIX_REFCNT_DEBUG
  556. pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
  557. atomic_long_read(&unix_nr_socks));
  558. #endif
  559. }
  560. static unsigned int unix_skb_len(const struct sk_buff *skb)
  561. {
  562. return skb->len - UNIXCB(skb).consumed;
  563. }
  564. static void unix_release_sock(struct sock *sk, int embrion)
  565. {
  566. struct unix_sock *u = unix_sk(sk);
  567. struct sock *skpair;
  568. struct sk_buff *skb;
  569. struct path path;
  570. int state;
  571. unix_remove_socket(sock_net(sk), sk);
  572. unix_remove_bsd_socket(sk);
  573. /* Clear state */
  574. unix_state_lock(sk);
  575. sock_orphan(sk);
  576. WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
  577. path = u->path;
  578. u->path.dentry = NULL;
  579. u->path.mnt = NULL;
  580. state = sk->sk_state;
  581. WRITE_ONCE(sk->sk_state, TCP_CLOSE);
  582. skpair = unix_peer(sk);
  583. unix_peer(sk) = NULL;
  584. unix_state_unlock(sk);
  585. #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
  586. u->oob_skb = NULL;
  587. #endif
  588. wake_up_interruptible_all(&u->peer_wait);
  589. if (skpair != NULL) {
  590. if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
  591. struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
  592. #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
  593. if (skb && !unix_skb_len(skb))
  594. skb = skb_peek_next(skb, &sk->sk_receive_queue);
  595. #endif
  596. unix_state_lock(skpair);
  597. /* No more writes */
  598. WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
  599. if (skb || embrion)
  600. WRITE_ONCE(skpair->sk_err, ECONNRESET);
  601. unix_state_unlock(skpair);
  602. skpair->sk_state_change(skpair);
  603. sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
  604. }
  605. unix_dgram_peer_wake_disconnect(sk, skpair);
  606. sock_put(skpair); /* It may now die */
  607. }
  608. /* Try to flush out this socket. Throw out buffers at least */
  609. while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
  610. if (state == TCP_LISTEN)
  611. unix_release_sock(skb->sk, 1);
  612. /* passed fds are erased in the kfree_skb hook */
  613. kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE);
  614. }
  615. if (path.dentry)
  616. path_put(&path);
  617. sock_put(sk);
  618. /* ---- Socket is dead now and most probably destroyed ---- */
  619. unix_schedule_gc(NULL);
  620. }
  621. struct unix_peercred {
  622. struct pid *peer_pid;
  623. const struct cred *peer_cred;
  624. };
  625. static inline int prepare_peercred(struct unix_peercred *peercred)
  626. {
  627. struct pid *pid;
  628. int err;
  629. pid = task_tgid(current);
  630. err = pidfs_register_pid(pid);
  631. if (likely(!err)) {
  632. peercred->peer_pid = get_pid(pid);
  633. peercred->peer_cred = get_current_cred();
  634. }
  635. return err;
  636. }
  637. static void drop_peercred(struct unix_peercred *peercred)
  638. {
  639. const struct cred *cred = NULL;
  640. struct pid *pid = NULL;
  641. might_sleep();
  642. swap(peercred->peer_pid, pid);
  643. swap(peercred->peer_cred, cred);
  644. put_pid(pid);
  645. put_cred(cred);
  646. }
  647. static inline void init_peercred(struct sock *sk,
  648. const struct unix_peercred *peercred)
  649. {
  650. sk->sk_peer_pid = peercred->peer_pid;
  651. sk->sk_peer_cred = peercred->peer_cred;
  652. }
  653. static void update_peercred(struct sock *sk, struct unix_peercred *peercred)
  654. {
  655. const struct cred *old_cred;
  656. struct pid *old_pid;
  657. spin_lock(&sk->sk_peer_lock);
  658. old_pid = sk->sk_peer_pid;
  659. old_cred = sk->sk_peer_cred;
  660. init_peercred(sk, peercred);
  661. spin_unlock(&sk->sk_peer_lock);
  662. peercred->peer_pid = old_pid;
  663. peercred->peer_cred = old_cred;
  664. }
  665. static void copy_peercred(struct sock *sk, struct sock *peersk)
  666. {
  667. lockdep_assert_held(&unix_sk(peersk)->lock);
  668. spin_lock(&sk->sk_peer_lock);
  669. sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
  670. sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
  671. spin_unlock(&sk->sk_peer_lock);
  672. }
  673. static bool unix_may_passcred(const struct sock *sk)
  674. {
  675. return sk->sk_scm_credentials || sk->sk_scm_pidfd;
  676. }
  677. static int unix_listen(struct socket *sock, int backlog)
  678. {
  679. int err;
  680. struct sock *sk = sock->sk;
  681. struct unix_sock *u = unix_sk(sk);
  682. struct unix_peercred peercred = {};
  683. err = -EOPNOTSUPP;
  684. if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
  685. goto out; /* Only stream/seqpacket sockets accept */
  686. err = -EINVAL;
  687. if (!READ_ONCE(u->addr))
  688. goto out; /* No listens on an unbound socket */
  689. err = prepare_peercred(&peercred);
  690. if (err)
  691. goto out;
  692. unix_state_lock(sk);
  693. if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
  694. goto out_unlock;
  695. if (backlog > sk->sk_max_ack_backlog)
  696. wake_up_interruptible_all(&u->peer_wait);
  697. sk->sk_max_ack_backlog = backlog;
  698. WRITE_ONCE(sk->sk_state, TCP_LISTEN);
  699. /* set credentials so connect can copy them */
  700. update_peercred(sk, &peercred);
  701. err = 0;
  702. out_unlock:
  703. unix_state_unlock(sk);
  704. drop_peercred(&peercred);
  705. out:
  706. return err;
  707. }
  708. static int unix_release(struct socket *);
  709. static int unix_bind(struct socket *, struct sockaddr_unsized *, int);
  710. static int unix_stream_connect(struct socket *, struct sockaddr_unsized *,
  711. int addr_len, int flags);
  712. static int unix_socketpair(struct socket *, struct socket *);
  713. static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg);
  714. static int unix_getname(struct socket *, struct sockaddr *, int);
  715. static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
  716. static __poll_t unix_dgram_poll(struct file *, struct socket *,
  717. poll_table *);
  718. static int unix_ioctl(struct socket *, unsigned int, unsigned long);
  719. #ifdef CONFIG_COMPAT
  720. static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
  721. #endif
  722. static int unix_shutdown(struct socket *, int);
  723. static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
  724. static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
  725. static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
  726. struct pipe_inode_info *, size_t size,
  727. unsigned int flags);
  728. static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
  729. static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
  730. static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
  731. static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
  732. static int unix_dgram_connect(struct socket *, struct sockaddr_unsized *,
  733. int, int);
  734. static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
  735. static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
  736. int);
  737. #ifdef CONFIG_PROC_FS
  738. static int unix_count_nr_fds(struct sock *sk)
  739. {
  740. struct sk_buff *skb;
  741. struct unix_sock *u;
  742. int nr_fds = 0;
  743. spin_lock(&sk->sk_receive_queue.lock);
  744. skb = skb_peek(&sk->sk_receive_queue);
  745. while (skb) {
  746. u = unix_sk(skb->sk);
  747. nr_fds += atomic_read(&u->scm_stat.nr_fds);
  748. skb = skb_peek_next(skb, &sk->sk_receive_queue);
  749. }
  750. spin_unlock(&sk->sk_receive_queue.lock);
  751. return nr_fds;
  752. }
  753. static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
  754. {
  755. struct sock *sk = sock->sk;
  756. unsigned char s_state;
  757. struct unix_sock *u;
  758. int nr_fds = 0;
  759. if (sk) {
  760. s_state = READ_ONCE(sk->sk_state);
  761. u = unix_sk(sk);
  762. /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
  763. * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
  764. * SOCK_DGRAM is ordinary. So, no lock is needed.
  765. */
  766. if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
  767. nr_fds = atomic_read(&u->scm_stat.nr_fds);
  768. else if (s_state == TCP_LISTEN)
  769. nr_fds = unix_count_nr_fds(sk);
  770. seq_printf(m, "scm_fds: %u\n", nr_fds);
  771. }
  772. }
  773. #else
  774. #define unix_show_fdinfo NULL
  775. #endif
  776. static bool unix_custom_sockopt(int optname)
  777. {
  778. switch (optname) {
  779. case SO_INQ:
  780. return true;
  781. default:
  782. return false;
  783. }
  784. }
  785. static int unix_setsockopt(struct socket *sock, int level, int optname,
  786. sockptr_t optval, unsigned int optlen)
  787. {
  788. struct unix_sock *u = unix_sk(sock->sk);
  789. struct sock *sk = sock->sk;
  790. int val;
  791. if (level != SOL_SOCKET)
  792. return -EOPNOTSUPP;
  793. if (!unix_custom_sockopt(optname))
  794. return sock_setsockopt(sock, level, optname, optval, optlen);
  795. if (optlen != sizeof(int))
  796. return -EINVAL;
  797. if (copy_from_sockptr(&val, optval, sizeof(val)))
  798. return -EFAULT;
  799. switch (optname) {
  800. case SO_INQ:
  801. if (sk->sk_type != SOCK_STREAM)
  802. return -EINVAL;
  803. if (val > 1 || val < 0)
  804. return -EINVAL;
  805. WRITE_ONCE(u->recvmsg_inq, val);
  806. break;
  807. default:
  808. return -ENOPROTOOPT;
  809. }
  810. return 0;
  811. }
  812. static const struct proto_ops unix_stream_ops = {
  813. .family = PF_UNIX,
  814. .owner = THIS_MODULE,
  815. .release = unix_release,
  816. .bind = unix_bind,
  817. .connect = unix_stream_connect,
  818. .socketpair = unix_socketpair,
  819. .accept = unix_accept,
  820. .getname = unix_getname,
  821. .poll = unix_poll,
  822. .ioctl = unix_ioctl,
  823. #ifdef CONFIG_COMPAT
  824. .compat_ioctl = unix_compat_ioctl,
  825. #endif
  826. .listen = unix_listen,
  827. .shutdown = unix_shutdown,
  828. .setsockopt = unix_setsockopt,
  829. .sendmsg = unix_stream_sendmsg,
  830. .recvmsg = unix_stream_recvmsg,
  831. .read_skb = unix_stream_read_skb,
  832. .mmap = sock_no_mmap,
  833. .splice_read = unix_stream_splice_read,
  834. .set_peek_off = sk_set_peek_off,
  835. .show_fdinfo = unix_show_fdinfo,
  836. };
  837. static const struct proto_ops unix_dgram_ops = {
  838. .family = PF_UNIX,
  839. .owner = THIS_MODULE,
  840. .release = unix_release,
  841. .bind = unix_bind,
  842. .connect = unix_dgram_connect,
  843. .socketpair = unix_socketpair,
  844. .accept = sock_no_accept,
  845. .getname = unix_getname,
  846. .poll = unix_dgram_poll,
  847. .ioctl = unix_ioctl,
  848. #ifdef CONFIG_COMPAT
  849. .compat_ioctl = unix_compat_ioctl,
  850. #endif
  851. .listen = sock_no_listen,
  852. .shutdown = unix_shutdown,
  853. .sendmsg = unix_dgram_sendmsg,
  854. .read_skb = unix_read_skb,
  855. .recvmsg = unix_dgram_recvmsg,
  856. .mmap = sock_no_mmap,
  857. .set_peek_off = sk_set_peek_off,
  858. .show_fdinfo = unix_show_fdinfo,
  859. };
  860. static const struct proto_ops unix_seqpacket_ops = {
  861. .family = PF_UNIX,
  862. .owner = THIS_MODULE,
  863. .release = unix_release,
  864. .bind = unix_bind,
  865. .connect = unix_stream_connect,
  866. .socketpair = unix_socketpair,
  867. .accept = unix_accept,
  868. .getname = unix_getname,
  869. .poll = unix_dgram_poll,
  870. .ioctl = unix_ioctl,
  871. #ifdef CONFIG_COMPAT
  872. .compat_ioctl = unix_compat_ioctl,
  873. #endif
  874. .listen = unix_listen,
  875. .shutdown = unix_shutdown,
  876. .sendmsg = unix_seqpacket_sendmsg,
  877. .recvmsg = unix_seqpacket_recvmsg,
  878. .mmap = sock_no_mmap,
  879. .set_peek_off = sk_set_peek_off,
  880. .show_fdinfo = unix_show_fdinfo,
  881. };
  882. static void unix_close(struct sock *sk, long timeout)
  883. {
  884. /* Nothing to do here, unix socket does not need a ->close().
  885. * This is merely for sockmap.
  886. */
  887. }
  888. static bool unix_bpf_bypass_getsockopt(int level, int optname)
  889. {
  890. if (level == SOL_SOCKET) {
  891. switch (optname) {
  892. case SO_PEERPIDFD:
  893. return true;
  894. default:
  895. return false;
  896. }
  897. }
  898. return false;
  899. }
  900. struct proto unix_dgram_proto = {
  901. .name = "UNIX",
  902. .owner = THIS_MODULE,
  903. .obj_size = sizeof(struct unix_sock),
  904. .close = unix_close,
  905. .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
  906. #ifdef CONFIG_BPF_SYSCALL
  907. .psock_update_sk_prot = unix_dgram_bpf_update_proto,
  908. #endif
  909. };
  910. struct proto unix_stream_proto = {
  911. .name = "UNIX-STREAM",
  912. .owner = THIS_MODULE,
  913. .obj_size = sizeof(struct unix_sock),
  914. .close = unix_close,
  915. .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
  916. #ifdef CONFIG_BPF_SYSCALL
  917. .psock_update_sk_prot = unix_stream_bpf_update_proto,
  918. #endif
  919. };
  920. static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
  921. {
  922. struct unix_sock *u;
  923. struct sock *sk;
  924. int err;
  925. atomic_long_inc(&unix_nr_socks);
  926. if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
  927. err = -ENFILE;
  928. goto err;
  929. }
  930. if (type == SOCK_STREAM)
  931. sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
  932. else /*dgram and seqpacket */
  933. sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
  934. if (!sk) {
  935. err = -ENOMEM;
  936. goto err;
  937. }
  938. sock_init_data(sock, sk);
  939. sk->sk_scm_rights = 1;
  940. sk->sk_hash = unix_unbound_hash(sk);
  941. sk->sk_allocation = GFP_KERNEL_ACCOUNT;
  942. sk->sk_write_space = unix_write_space;
  943. sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen);
  944. sk->sk_destruct = unix_sock_destructor;
  945. lock_set_cmp_fn(&sk->sk_receive_queue.lock, unix_recvq_lock_cmp_fn, NULL);
  946. u = unix_sk(sk);
  947. u->listener = NULL;
  948. u->vertex = NULL;
  949. u->path.dentry = NULL;
  950. u->path.mnt = NULL;
  951. spin_lock_init(&u->lock);
  952. lock_set_cmp_fn(&u->lock, unix_state_lock_cmp_fn, NULL);
  953. mutex_init(&u->iolock); /* single task reading lock */
  954. mutex_init(&u->bindlock); /* single task binding lock */
  955. init_waitqueue_head(&u->peer_wait);
  956. init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
  957. memset(&u->scm_stat, 0, sizeof(struct scm_stat));
  958. unix_insert_unbound_socket(net, sk);
  959. sock_prot_inuse_add(net, sk->sk_prot, 1);
  960. return sk;
  961. err:
  962. atomic_long_dec(&unix_nr_socks);
  963. return ERR_PTR(err);
  964. }
  965. static int unix_create(struct net *net, struct socket *sock, int protocol,
  966. int kern)
  967. {
  968. struct sock *sk;
  969. if (protocol && protocol != PF_UNIX)
  970. return -EPROTONOSUPPORT;
  971. sock->state = SS_UNCONNECTED;
  972. switch (sock->type) {
  973. case SOCK_STREAM:
  974. set_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
  975. sock->ops = &unix_stream_ops;
  976. break;
  977. /*
  978. * Believe it or not BSD has AF_UNIX, SOCK_RAW though
  979. * nothing uses it.
  980. */
  981. case SOCK_RAW:
  982. sock->type = SOCK_DGRAM;
  983. fallthrough;
  984. case SOCK_DGRAM:
  985. sock->ops = &unix_dgram_ops;
  986. break;
  987. case SOCK_SEQPACKET:
  988. sock->ops = &unix_seqpacket_ops;
  989. break;
  990. default:
  991. return -ESOCKTNOSUPPORT;
  992. }
  993. sk = unix_create1(net, sock, kern, sock->type);
  994. if (IS_ERR(sk))
  995. return PTR_ERR(sk);
  996. return 0;
  997. }
  998. static int unix_release(struct socket *sock)
  999. {
  1000. struct sock *sk = sock->sk;
  1001. if (!sk)
  1002. return 0;
  1003. sk->sk_prot->close(sk, 0);
  1004. unix_release_sock(sk, 0);
  1005. sock->sk = NULL;
  1006. return 0;
  1007. }
  1008. static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
  1009. int type, int flags)
  1010. {
  1011. struct inode *inode;
  1012. struct path path;
  1013. struct sock *sk;
  1014. int err;
  1015. unix_mkname_bsd(sunaddr, addr_len);
  1016. if (flags & SOCK_COREDUMP) {
  1017. struct path root;
  1018. task_lock(&init_task);
  1019. get_fs_root(init_task.fs, &root);
  1020. task_unlock(&init_task);
  1021. scoped_with_kernel_creds()
  1022. err = vfs_path_lookup(root.dentry, root.mnt, sunaddr->sun_path,
  1023. LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS |
  1024. LOOKUP_NO_MAGICLINKS, &path);
  1025. path_put(&root);
  1026. if (err)
  1027. goto fail;
  1028. } else {
  1029. err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
  1030. if (err)
  1031. goto fail;
  1032. err = path_permission(&path, MAY_WRITE);
  1033. if (err)
  1034. goto path_put;
  1035. }
  1036. err = -ECONNREFUSED;
  1037. inode = d_backing_inode(path.dentry);
  1038. if (!S_ISSOCK(inode->i_mode))
  1039. goto path_put;
  1040. sk = unix_find_socket_byinode(inode);
  1041. if (!sk)
  1042. goto path_put;
  1043. err = -EPROTOTYPE;
  1044. if (sk->sk_type == type)
  1045. touch_atime(&path);
  1046. else
  1047. goto sock_put;
  1048. path_put(&path);
  1049. return sk;
  1050. sock_put:
  1051. sock_put(sk);
  1052. path_put:
  1053. path_put(&path);
  1054. fail:
  1055. return ERR_PTR(err);
  1056. }
  1057. static struct sock *unix_find_abstract(struct net *net,
  1058. struct sockaddr_un *sunaddr,
  1059. int addr_len, int type)
  1060. {
  1061. unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
  1062. struct dentry *dentry;
  1063. struct sock *sk;
  1064. sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
  1065. if (!sk)
  1066. return ERR_PTR(-ECONNREFUSED);
  1067. dentry = unix_sk(sk)->path.dentry;
  1068. if (dentry)
  1069. touch_atime(&unix_sk(sk)->path);
  1070. return sk;
  1071. }
  1072. static struct sock *unix_find_other(struct net *net,
  1073. struct sockaddr_un *sunaddr,
  1074. int addr_len, int type, int flags)
  1075. {
  1076. struct sock *sk;
  1077. if (sunaddr->sun_path[0])
  1078. sk = unix_find_bsd(sunaddr, addr_len, type, flags);
  1079. else
  1080. sk = unix_find_abstract(net, sunaddr, addr_len, type);
  1081. return sk;
  1082. }
  1083. static int unix_autobind(struct sock *sk)
  1084. {
  1085. struct unix_sock *u = unix_sk(sk);
  1086. unsigned int new_hash, old_hash;
  1087. struct net *net = sock_net(sk);
  1088. struct unix_address *addr;
  1089. u32 lastnum, ordernum;
  1090. int err;
  1091. err = mutex_lock_interruptible(&u->bindlock);
  1092. if (err)
  1093. return err;
  1094. if (u->addr)
  1095. goto out;
  1096. err = -ENOMEM;
  1097. addr = kzalloc(sizeof(*addr) +
  1098. offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
  1099. if (!addr)
  1100. goto out;
  1101. addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
  1102. addr->name->sun_family = AF_UNIX;
  1103. refcount_set(&addr->refcnt, 1);
  1104. old_hash = sk->sk_hash;
  1105. ordernum = get_random_u32();
  1106. lastnum = ordernum & 0xFFFFF;
  1107. retry:
  1108. ordernum = (ordernum + 1) & 0xFFFFF;
  1109. sprintf(addr->name->sun_path + 1, "%05x", ordernum);
  1110. new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
  1111. unix_table_double_lock(net, old_hash, new_hash);
  1112. if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
  1113. unix_table_double_unlock(net, old_hash, new_hash);
  1114. /* __unix_find_socket_byname() may take long time if many names
  1115. * are already in use.
  1116. */
  1117. cond_resched();
  1118. if (ordernum == lastnum) {
  1119. /* Give up if all names seems to be in use. */
  1120. err = -ENOSPC;
  1121. unix_release_addr(addr);
  1122. goto out;
  1123. }
  1124. goto retry;
  1125. }
  1126. __unix_set_addr_hash(net, sk, addr, new_hash);
  1127. unix_table_double_unlock(net, old_hash, new_hash);
  1128. err = 0;
  1129. out: mutex_unlock(&u->bindlock);
  1130. return err;
  1131. }
  1132. static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
  1133. int addr_len)
  1134. {
  1135. umode_t mode = S_IFSOCK |
  1136. (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
  1137. struct unix_sock *u = unix_sk(sk);
  1138. unsigned int new_hash, old_hash;
  1139. struct net *net = sock_net(sk);
  1140. struct mnt_idmap *idmap;
  1141. struct unix_address *addr;
  1142. struct dentry *dentry;
  1143. struct path parent;
  1144. int err;
  1145. addr_len = unix_mkname_bsd(sunaddr, addr_len);
  1146. addr = unix_create_addr(sunaddr, addr_len);
  1147. if (!addr)
  1148. return -ENOMEM;
  1149. /*
  1150. * Get the parent directory, calculate the hash for last
  1151. * component.
  1152. */
  1153. dentry = start_creating_path(AT_FDCWD, addr->name->sun_path, &parent, 0);
  1154. if (IS_ERR(dentry)) {
  1155. err = PTR_ERR(dentry);
  1156. goto out;
  1157. }
  1158. /*
  1159. * All right, let's create it.
  1160. */
  1161. idmap = mnt_idmap(parent.mnt);
  1162. err = security_path_mknod(&parent, dentry, mode, 0);
  1163. if (!err)
  1164. err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0, NULL);
  1165. if (err)
  1166. goto out_path;
  1167. err = mutex_lock_interruptible(&u->bindlock);
  1168. if (err)
  1169. goto out_unlink;
  1170. if (u->addr)
  1171. goto out_unlock;
  1172. old_hash = sk->sk_hash;
  1173. new_hash = unix_bsd_hash(d_backing_inode(dentry));
  1174. unix_table_double_lock(net, old_hash, new_hash);
  1175. u->path.mnt = mntget(parent.mnt);
  1176. u->path.dentry = dget(dentry);
  1177. __unix_set_addr_hash(net, sk, addr, new_hash);
  1178. unix_table_double_unlock(net, old_hash, new_hash);
  1179. unix_insert_bsd_socket(sk);
  1180. mutex_unlock(&u->bindlock);
  1181. end_creating_path(&parent, dentry);
  1182. return 0;
  1183. out_unlock:
  1184. mutex_unlock(&u->bindlock);
  1185. err = -EINVAL;
  1186. out_unlink:
  1187. /* failed after successful mknod? unlink what we'd created... */
  1188. vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
  1189. out_path:
  1190. end_creating_path(&parent, dentry);
  1191. out:
  1192. unix_release_addr(addr);
  1193. return err == -EEXIST ? -EADDRINUSE : err;
  1194. }
  1195. static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
  1196. int addr_len)
  1197. {
  1198. struct unix_sock *u = unix_sk(sk);
  1199. unsigned int new_hash, old_hash;
  1200. struct net *net = sock_net(sk);
  1201. struct unix_address *addr;
  1202. int err;
  1203. addr = unix_create_addr(sunaddr, addr_len);
  1204. if (!addr)
  1205. return -ENOMEM;
  1206. err = mutex_lock_interruptible(&u->bindlock);
  1207. if (err)
  1208. goto out;
  1209. if (u->addr) {
  1210. err = -EINVAL;
  1211. goto out_mutex;
  1212. }
  1213. old_hash = sk->sk_hash;
  1214. new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
  1215. unix_table_double_lock(net, old_hash, new_hash);
  1216. if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
  1217. goto out_spin;
  1218. __unix_set_addr_hash(net, sk, addr, new_hash);
  1219. unix_table_double_unlock(net, old_hash, new_hash);
  1220. mutex_unlock(&u->bindlock);
  1221. return 0;
  1222. out_spin:
  1223. unix_table_double_unlock(net, old_hash, new_hash);
  1224. err = -EADDRINUSE;
  1225. out_mutex:
  1226. mutex_unlock(&u->bindlock);
  1227. out:
  1228. unix_release_addr(addr);
  1229. return err;
  1230. }
  1231. static int unix_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len)
  1232. {
  1233. struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
  1234. struct sock *sk = sock->sk;
  1235. int err;
  1236. if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
  1237. sunaddr->sun_family == AF_UNIX)
  1238. return unix_autobind(sk);
  1239. err = unix_validate_addr(sunaddr, addr_len);
  1240. if (err)
  1241. return err;
  1242. if (sunaddr->sun_path[0])
  1243. err = unix_bind_bsd(sk, sunaddr, addr_len);
  1244. else
  1245. err = unix_bind_abstract(sk, sunaddr, addr_len);
  1246. return err;
  1247. }
  1248. static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
  1249. {
  1250. if (unlikely(sk1 == sk2) || !sk2) {
  1251. unix_state_lock(sk1);
  1252. return;
  1253. }
  1254. if (sk1 > sk2)
  1255. swap(sk1, sk2);
  1256. unix_state_lock(sk1);
  1257. unix_state_lock(sk2);
  1258. }
  1259. static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
  1260. {
  1261. if (unlikely(sk1 == sk2) || !sk2) {
  1262. unix_state_unlock(sk1);
  1263. return;
  1264. }
  1265. unix_state_unlock(sk1);
  1266. unix_state_unlock(sk2);
  1267. }
  1268. static int unix_dgram_connect(struct socket *sock, struct sockaddr_unsized *addr,
  1269. int alen, int flags)
  1270. {
  1271. struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
  1272. struct sock *sk = sock->sk;
  1273. struct sock *other;
  1274. int err;
  1275. err = -EINVAL;
  1276. if (alen < offsetofend(struct sockaddr, sa_family))
  1277. goto out;
  1278. if (addr->sa_family != AF_UNSPEC) {
  1279. err = unix_validate_addr(sunaddr, alen);
  1280. if (err)
  1281. goto out;
  1282. err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
  1283. if (err)
  1284. goto out;
  1285. if (unix_may_passcred(sk) && !READ_ONCE(unix_sk(sk)->addr)) {
  1286. err = unix_autobind(sk);
  1287. if (err)
  1288. goto out;
  1289. }
  1290. restart:
  1291. other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type, 0);
  1292. if (IS_ERR(other)) {
  1293. err = PTR_ERR(other);
  1294. goto out;
  1295. }
  1296. unix_state_double_lock(sk, other);
  1297. /* Apparently VFS overslept socket death. Retry. */
  1298. if (sock_flag(other, SOCK_DEAD)) {
  1299. unix_state_double_unlock(sk, other);
  1300. sock_put(other);
  1301. goto restart;
  1302. }
  1303. err = -EPERM;
  1304. if (!unix_may_send(sk, other))
  1305. goto out_unlock;
  1306. err = security_unix_may_send(sk->sk_socket, other->sk_socket);
  1307. if (err)
  1308. goto out_unlock;
  1309. WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
  1310. WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
  1311. } else {
  1312. /*
  1313. * 1003.1g breaking connected state with AF_UNSPEC
  1314. */
  1315. other = NULL;
  1316. unix_state_double_lock(sk, other);
  1317. }
  1318. /*
  1319. * If it was connected, reconnect.
  1320. */
  1321. if (unix_peer(sk)) {
  1322. struct sock *old_peer = unix_peer(sk);
  1323. unix_peer(sk) = other;
  1324. if (!other)
  1325. WRITE_ONCE(sk->sk_state, TCP_CLOSE);
  1326. unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
  1327. unix_state_double_unlock(sk, other);
  1328. if (other != old_peer) {
  1329. unix_dgram_disconnected(sk, old_peer);
  1330. unix_state_lock(old_peer);
  1331. if (!unix_peer(old_peer))
  1332. WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
  1333. unix_state_unlock(old_peer);
  1334. }
  1335. sock_put(old_peer);
  1336. } else {
  1337. unix_peer(sk) = other;
  1338. unix_state_double_unlock(sk, other);
  1339. }
  1340. return 0;
  1341. out_unlock:
  1342. unix_state_double_unlock(sk, other);
  1343. sock_put(other);
  1344. out:
  1345. return err;
  1346. }
  1347. static long unix_wait_for_peer(struct sock *other, long timeo)
  1348. {
  1349. struct unix_sock *u = unix_sk(other);
  1350. int sched;
  1351. DEFINE_WAIT(wait);
  1352. prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
  1353. sched = !sock_flag(other, SOCK_DEAD) &&
  1354. !(other->sk_shutdown & RCV_SHUTDOWN) &&
  1355. unix_recvq_full_lockless(other);
  1356. unix_state_unlock(other);
  1357. if (sched)
  1358. timeo = schedule_timeout(timeo);
  1359. finish_wait(&u->peer_wait, &wait);
  1360. return timeo;
  1361. }
  1362. static int unix_stream_connect(struct socket *sock, struct sockaddr_unsized *uaddr,
  1363. int addr_len, int flags)
  1364. {
  1365. struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
  1366. struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
  1367. struct unix_sock *u = unix_sk(sk), *newu, *otheru;
  1368. struct unix_peercred peercred = {};
  1369. struct net *net = sock_net(sk);
  1370. struct sk_buff *skb = NULL;
  1371. unsigned char state;
  1372. long timeo;
  1373. int err;
  1374. err = unix_validate_addr(sunaddr, addr_len);
  1375. if (err)
  1376. goto out;
  1377. err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
  1378. if (err)
  1379. goto out;
  1380. if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
  1381. err = unix_autobind(sk);
  1382. if (err)
  1383. goto out;
  1384. }
  1385. timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
  1386. err = prepare_peercred(&peercred);
  1387. if (err)
  1388. goto out;
  1389. /* create new sock for complete connection */
  1390. newsk = unix_create1(net, NULL, 0, sock->type);
  1391. if (IS_ERR(newsk)) {
  1392. err = PTR_ERR(newsk);
  1393. goto out;
  1394. }
  1395. /* Allocate skb for sending to listening sock */
  1396. skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
  1397. if (!skb) {
  1398. err = -ENOMEM;
  1399. goto out_free_sk;
  1400. }
  1401. restart:
  1402. /* Find listening sock. */
  1403. other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, flags);
  1404. if (IS_ERR(other)) {
  1405. err = PTR_ERR(other);
  1406. goto out_free_skb;
  1407. }
  1408. unix_state_lock(other);
  1409. /* Apparently VFS overslept socket death. Retry. */
  1410. if (sock_flag(other, SOCK_DEAD)) {
  1411. unix_state_unlock(other);
  1412. sock_put(other);
  1413. goto restart;
  1414. }
  1415. if (other->sk_state != TCP_LISTEN ||
  1416. other->sk_shutdown & RCV_SHUTDOWN) {
  1417. err = -ECONNREFUSED;
  1418. goto out_unlock;
  1419. }
  1420. if (unix_recvq_full_lockless(other)) {
  1421. if (!timeo) {
  1422. err = -EAGAIN;
  1423. goto out_unlock;
  1424. }
  1425. timeo = unix_wait_for_peer(other, timeo);
  1426. sock_put(other);
  1427. err = sock_intr_errno(timeo);
  1428. if (signal_pending(current))
  1429. goto out_free_skb;
  1430. goto restart;
  1431. }
  1432. /* self connect and simultaneous connect are eliminated
  1433. * by rejecting TCP_LISTEN socket to avoid deadlock.
  1434. */
  1435. state = READ_ONCE(sk->sk_state);
  1436. if (unlikely(state != TCP_CLOSE)) {
  1437. err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
  1438. goto out_unlock;
  1439. }
  1440. unix_state_lock(sk);
  1441. if (unlikely(sk->sk_state != TCP_CLOSE)) {
  1442. err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
  1443. unix_state_unlock(sk);
  1444. goto out_unlock;
  1445. }
  1446. err = security_unix_stream_connect(sk, other, newsk);
  1447. if (err) {
  1448. unix_state_unlock(sk);
  1449. goto out_unlock;
  1450. }
  1451. /* The way is open! Fastly set all the necessary fields... */
  1452. sock_hold(sk);
  1453. unix_peer(newsk) = sk;
  1454. newsk->sk_state = TCP_ESTABLISHED;
  1455. newsk->sk_type = sk->sk_type;
  1456. newsk->sk_scm_recv_flags = other->sk_scm_recv_flags;
  1457. init_peercred(newsk, &peercred);
  1458. newu = unix_sk(newsk);
  1459. newu->listener = other;
  1460. RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
  1461. otheru = unix_sk(other);
  1462. /* copy address information from listening to new sock
  1463. *
  1464. * The contents of *(otheru->addr) and otheru->path
  1465. * are seen fully set up here, since we have found
  1466. * otheru in hash under its lock. Insertion into the
  1467. * hash chain we'd found it in had been done in an
  1468. * earlier critical area protected by the chain's lock,
  1469. * the same one where we'd set *(otheru->addr) contents,
  1470. * as well as otheru->path and otheru->addr itself.
  1471. *
  1472. * Using smp_store_release() here to set newu->addr
  1473. * is enough to make those stores, as well as stores
  1474. * to newu->path visible to anyone who gets newu->addr
  1475. * by smp_load_acquire(). IOW, the same warranties
  1476. * as for unix_sock instances bound in unix_bind() or
  1477. * in unix_autobind().
  1478. */
  1479. if (otheru->path.dentry) {
  1480. path_get(&otheru->path);
  1481. newu->path = otheru->path;
  1482. }
  1483. refcount_inc(&otheru->addr->refcnt);
  1484. smp_store_release(&newu->addr, otheru->addr);
  1485. /* Set credentials */
  1486. copy_peercred(sk, other);
  1487. sock->state = SS_CONNECTED;
  1488. WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
  1489. sock_hold(newsk);
  1490. smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
  1491. unix_peer(sk) = newsk;
  1492. unix_state_unlock(sk);
  1493. /* take ten and send info to listening sock */
  1494. spin_lock(&other->sk_receive_queue.lock);
  1495. __skb_queue_tail(&other->sk_receive_queue, skb);
  1496. spin_unlock(&other->sk_receive_queue.lock);
  1497. unix_state_unlock(other);
  1498. READ_ONCE(other->sk_data_ready)(other);
  1499. sock_put(other);
  1500. return 0;
  1501. out_unlock:
  1502. unix_state_unlock(other);
  1503. sock_put(other);
  1504. out_free_skb:
  1505. consume_skb(skb);
  1506. out_free_sk:
  1507. unix_release_sock(newsk, 0);
  1508. out:
  1509. drop_peercred(&peercred);
  1510. return err;
  1511. }
  1512. static int unix_socketpair(struct socket *socka, struct socket *sockb)
  1513. {
  1514. struct unix_peercred ska_peercred = {}, skb_peercred = {};
  1515. struct sock *ska = socka->sk, *skb = sockb->sk;
  1516. int err;
  1517. err = prepare_peercred(&ska_peercred);
  1518. if (err)
  1519. return err;
  1520. err = prepare_peercred(&skb_peercred);
  1521. if (err) {
  1522. drop_peercred(&ska_peercred);
  1523. return err;
  1524. }
  1525. /* Join our sockets back to back */
  1526. sock_hold(ska);
  1527. sock_hold(skb);
  1528. unix_peer(ska) = skb;
  1529. unix_peer(skb) = ska;
  1530. init_peercred(ska, &ska_peercred);
  1531. init_peercred(skb, &skb_peercred);
  1532. ska->sk_state = TCP_ESTABLISHED;
  1533. skb->sk_state = TCP_ESTABLISHED;
  1534. socka->state = SS_CONNECTED;
  1535. sockb->state = SS_CONNECTED;
  1536. return 0;
  1537. }
  1538. static int unix_accept(struct socket *sock, struct socket *newsock,
  1539. struct proto_accept_arg *arg)
  1540. {
  1541. struct sock *sk = sock->sk;
  1542. struct sk_buff *skb;
  1543. struct sock *tsk;
  1544. arg->err = -EOPNOTSUPP;
  1545. if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
  1546. goto out;
  1547. arg->err = -EINVAL;
  1548. if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
  1549. goto out;
  1550. /* If socket state is TCP_LISTEN it cannot change (for now...),
  1551. * so that no locks are necessary.
  1552. */
  1553. skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
  1554. &arg->err);
  1555. if (!skb) {
  1556. /* This means receive shutdown. */
  1557. if (arg->err == 0)
  1558. arg->err = -EINVAL;
  1559. goto out;
  1560. }
  1561. tsk = skb->sk;
  1562. skb_free_datagram(sk, skb);
  1563. wake_up_interruptible(&unix_sk(sk)->peer_wait);
  1564. if (tsk->sk_type == SOCK_STREAM)
  1565. set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
  1566. /* attach accepted sock to socket */
  1567. unix_state_lock(tsk);
  1568. unix_update_edges(unix_sk(tsk));
  1569. newsock->state = SS_CONNECTED;
  1570. sock_graft(tsk, newsock);
  1571. unix_state_unlock(tsk);
  1572. return 0;
  1573. out:
  1574. return arg->err;
  1575. }
  1576. static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
  1577. {
  1578. struct sock *sk = sock->sk;
  1579. struct unix_address *addr;
  1580. DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
  1581. int err = 0;
  1582. if (peer) {
  1583. sk = unix_peer_get(sk);
  1584. err = -ENOTCONN;
  1585. if (!sk)
  1586. goto out;
  1587. err = 0;
  1588. } else {
  1589. sock_hold(sk);
  1590. }
  1591. addr = smp_load_acquire(&unix_sk(sk)->addr);
  1592. if (!addr) {
  1593. sunaddr->sun_family = AF_UNIX;
  1594. sunaddr->sun_path[0] = 0;
  1595. err = offsetof(struct sockaddr_un, sun_path);
  1596. } else {
  1597. err = addr->len;
  1598. memcpy(sunaddr, addr->name, addr->len);
  1599. if (peer)
  1600. BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
  1601. CGROUP_UNIX_GETPEERNAME);
  1602. else
  1603. BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
  1604. CGROUP_UNIX_GETSOCKNAME);
  1605. }
  1606. sock_put(sk);
  1607. out:
  1608. return err;
  1609. }
  1610. /* The "user->unix_inflight" variable is protected by the garbage
  1611. * collection lock, and we just read it locklessly here. If you go
  1612. * over the limit, there might be a tiny race in actually noticing
  1613. * it across threads. Tough.
  1614. */
  1615. static inline bool too_many_unix_fds(struct task_struct *p)
  1616. {
  1617. struct user_struct *user = current_user();
  1618. if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
  1619. return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
  1620. return false;
  1621. }
  1622. static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
  1623. {
  1624. if (too_many_unix_fds(current))
  1625. return -ETOOMANYREFS;
  1626. UNIXCB(skb).fp = scm->fp;
  1627. scm->fp = NULL;
  1628. if (unix_prepare_fpl(UNIXCB(skb).fp))
  1629. return -ENOMEM;
  1630. return 0;
  1631. }
  1632. static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
  1633. {
  1634. scm->fp = UNIXCB(skb).fp;
  1635. UNIXCB(skb).fp = NULL;
  1636. unix_destroy_fpl(scm->fp);
  1637. }
  1638. static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
  1639. {
  1640. scm->fp = scm_fp_dup(UNIXCB(skb).fp);
  1641. unix_peek_fpl(scm->fp);
  1642. }
  1643. static void unix_destruct_scm(struct sk_buff *skb)
  1644. {
  1645. struct scm_cookie scm;
  1646. memset(&scm, 0, sizeof(scm));
  1647. scm.pid = UNIXCB(skb).pid;
  1648. if (UNIXCB(skb).fp)
  1649. unix_detach_fds(&scm, skb);
  1650. /* Alas, it calls VFS */
  1651. /* So fscking what? fput() had been SMP-safe since the last Summer */
  1652. scm_destroy(&scm);
  1653. sock_wfree(skb);
  1654. }
  1655. static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
  1656. {
  1657. int err = 0;
  1658. UNIXCB(skb).pid = get_pid(scm->pid);
  1659. UNIXCB(skb).uid = scm->creds.uid;
  1660. UNIXCB(skb).gid = scm->creds.gid;
  1661. UNIXCB(skb).fp = NULL;
  1662. unix_get_secdata(scm, skb);
  1663. if (scm->fp && send_fds)
  1664. err = unix_attach_fds(scm, skb);
  1665. skb->destructor = unix_destruct_scm;
  1666. return err;
  1667. }
  1668. static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
  1669. {
  1670. scm_set_cred(scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
  1671. unix_set_secdata(scm, skb);
  1672. }
  1673. /**
  1674. * unix_maybe_add_creds() - Adds current task uid/gid and struct pid to skb if needed.
  1675. * @skb: skb to attach creds to.
  1676. * @sk: Sender sock.
  1677. * @other: Receiver sock.
  1678. *
  1679. * Some apps rely on write() giving SCM_CREDENTIALS
  1680. * We include credentials if source or destination socket
  1681. * asserted SOCK_PASSCRED.
  1682. *
  1683. * Context: May sleep.
  1684. * Return: On success zero, on error a negative error code is returned.
  1685. */
  1686. static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
  1687. const struct sock *other)
  1688. {
  1689. if (UNIXCB(skb).pid)
  1690. return 0;
  1691. if (unix_may_passcred(sk) || unix_may_passcred(other) ||
  1692. !other->sk_socket) {
  1693. struct pid *pid;
  1694. int err;
  1695. pid = task_tgid(current);
  1696. err = pidfs_register_pid(pid);
  1697. if (unlikely(err))
  1698. return err;
  1699. UNIXCB(skb).pid = get_pid(pid);
  1700. current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
  1701. }
  1702. return 0;
  1703. }
  1704. static bool unix_skb_scm_eq(struct sk_buff *skb,
  1705. struct scm_cookie *scm)
  1706. {
  1707. return UNIXCB(skb).pid == scm->pid &&
  1708. uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
  1709. gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
  1710. unix_secdata_eq(scm, skb);
  1711. }
  1712. static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
  1713. {
  1714. struct scm_fp_list *fp = UNIXCB(skb).fp;
  1715. struct unix_sock *u = unix_sk(sk);
  1716. if (unlikely(fp && fp->count)) {
  1717. atomic_add(fp->count, &u->scm_stat.nr_fds);
  1718. unix_add_edges(fp, u);
  1719. }
  1720. }
  1721. static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
  1722. {
  1723. struct scm_fp_list *fp = UNIXCB(skb).fp;
  1724. struct unix_sock *u = unix_sk(sk);
  1725. if (unlikely(fp && fp->count)) {
  1726. atomic_sub(fp->count, &u->scm_stat.nr_fds);
  1727. unix_del_edges(fp);
  1728. }
  1729. }
  1730. /*
  1731. * Send AF_UNIX data.
  1732. */
  1733. static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
  1734. size_t len)
  1735. {
  1736. struct sock *sk = sock->sk, *other = NULL;
  1737. struct unix_sock *u = unix_sk(sk);
  1738. struct scm_cookie scm;
  1739. struct sk_buff *skb;
  1740. int data_len = 0;
  1741. int sk_locked;
  1742. long timeo;
  1743. int err;
  1744. err = scm_send(sock, msg, &scm, false);
  1745. if (err < 0)
  1746. return err;
  1747. if (msg->msg_flags & MSG_OOB) {
  1748. err = -EOPNOTSUPP;
  1749. goto out;
  1750. }
  1751. if (msg->msg_namelen) {
  1752. err = unix_validate_addr(msg->msg_name, msg->msg_namelen);
  1753. if (err)
  1754. goto out;
  1755. err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
  1756. msg->msg_name,
  1757. &msg->msg_namelen,
  1758. NULL);
  1759. if (err)
  1760. goto out;
  1761. }
  1762. if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
  1763. err = unix_autobind(sk);
  1764. if (err)
  1765. goto out;
  1766. }
  1767. if (len > READ_ONCE(sk->sk_sndbuf) - 32) {
  1768. err = -EMSGSIZE;
  1769. goto out;
  1770. }
  1771. if (len > SKB_MAX_ALLOC) {
  1772. data_len = min_t(size_t,
  1773. len - SKB_MAX_ALLOC,
  1774. MAX_SKB_FRAGS * PAGE_SIZE);
  1775. data_len = PAGE_ALIGN(data_len);
  1776. BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
  1777. }
  1778. skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
  1779. msg->msg_flags & MSG_DONTWAIT, &err,
  1780. PAGE_ALLOC_COSTLY_ORDER);
  1781. if (!skb)
  1782. goto out;
  1783. err = unix_scm_to_skb(&scm, skb, true);
  1784. if (err < 0)
  1785. goto out_free;
  1786. skb_put(skb, len - data_len);
  1787. skb->data_len = data_len;
  1788. skb->len = len;
  1789. err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
  1790. if (err)
  1791. goto out_free;
  1792. timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
  1793. if (msg->msg_namelen) {
  1794. lookup:
  1795. other = unix_find_other(sock_net(sk), msg->msg_name,
  1796. msg->msg_namelen, sk->sk_type, 0);
  1797. if (IS_ERR(other)) {
  1798. err = PTR_ERR(other);
  1799. goto out_free;
  1800. }
  1801. } else {
  1802. other = unix_peer_get(sk);
  1803. if (!other) {
  1804. err = -ENOTCONN;
  1805. goto out_free;
  1806. }
  1807. }
  1808. if (sk_filter(other, skb) < 0) {
  1809. /* Toss the packet but do not return any error to the sender */
  1810. err = len;
  1811. goto out_sock_put;
  1812. }
  1813. err = unix_maybe_add_creds(skb, sk, other);
  1814. if (err)
  1815. goto out_sock_put;
  1816. restart:
  1817. sk_locked = 0;
  1818. unix_state_lock(other);
  1819. restart_locked:
  1820. if (!unix_may_send(sk, other)) {
  1821. err = -EPERM;
  1822. goto out_unlock;
  1823. }
  1824. if (unlikely(sock_flag(other, SOCK_DEAD))) {
  1825. /* Check with 1003.1g - what should datagram error */
  1826. unix_state_unlock(other);
  1827. if (sk->sk_type == SOCK_SEQPACKET) {
  1828. /* We are here only when racing with unix_release_sock()
  1829. * is clearing @other. Never change state to TCP_CLOSE
  1830. * unlike SOCK_DGRAM wants.
  1831. */
  1832. err = -EPIPE;
  1833. goto out_sock_put;
  1834. }
  1835. if (!sk_locked)
  1836. unix_state_lock(sk);
  1837. if (unix_peer(sk) == other) {
  1838. unix_peer(sk) = NULL;
  1839. unix_dgram_peer_wake_disconnect_wakeup(sk, other);
  1840. WRITE_ONCE(sk->sk_state, TCP_CLOSE);
  1841. unix_state_unlock(sk);
  1842. unix_dgram_disconnected(sk, other);
  1843. sock_put(other);
  1844. err = -ECONNREFUSED;
  1845. goto out_sock_put;
  1846. }
  1847. unix_state_unlock(sk);
  1848. if (!msg->msg_namelen) {
  1849. err = -ECONNRESET;
  1850. goto out_sock_put;
  1851. }
  1852. sock_put(other);
  1853. goto lookup;
  1854. }
  1855. if (other->sk_shutdown & RCV_SHUTDOWN) {
  1856. err = -EPIPE;
  1857. goto out_unlock;
  1858. }
  1859. if (UNIXCB(skb).fp && !other->sk_scm_rights) {
  1860. err = -EPERM;
  1861. goto out_unlock;
  1862. }
  1863. if (sk->sk_type != SOCK_SEQPACKET) {
  1864. err = security_unix_may_send(sk->sk_socket, other->sk_socket);
  1865. if (err)
  1866. goto out_unlock;
  1867. }
  1868. /* other == sk && unix_peer(other) != sk if
  1869. * - unix_peer(sk) == NULL, destination address bound to sk
  1870. * - unix_peer(sk) == sk by time of get but disconnected before lock
  1871. */
  1872. if (other != sk &&
  1873. unlikely(unix_peer(other) != sk &&
  1874. unix_recvq_full_lockless(other))) {
  1875. if (timeo) {
  1876. timeo = unix_wait_for_peer(other, timeo);
  1877. err = sock_intr_errno(timeo);
  1878. if (signal_pending(current))
  1879. goto out_sock_put;
  1880. goto restart;
  1881. }
  1882. if (!sk_locked) {
  1883. unix_state_unlock(other);
  1884. unix_state_double_lock(sk, other);
  1885. }
  1886. if (unix_peer(sk) != other ||
  1887. unix_dgram_peer_wake_me(sk, other)) {
  1888. err = -EAGAIN;
  1889. sk_locked = 1;
  1890. goto out_unlock;
  1891. }
  1892. if (!sk_locked) {
  1893. sk_locked = 1;
  1894. goto restart_locked;
  1895. }
  1896. }
  1897. if (unlikely(sk_locked))
  1898. unix_state_unlock(sk);
  1899. if (sock_flag(other, SOCK_RCVTSTAMP))
  1900. __net_timestamp(skb);
  1901. scm_stat_add(other, skb);
  1902. skb_queue_tail(&other->sk_receive_queue, skb);
  1903. unix_state_unlock(other);
  1904. READ_ONCE(other->sk_data_ready)(other);
  1905. sock_put(other);
  1906. scm_destroy(&scm);
  1907. return len;
  1908. out_unlock:
  1909. if (sk_locked)
  1910. unix_state_unlock(sk);
  1911. unix_state_unlock(other);
  1912. out_sock_put:
  1913. sock_put(other);
  1914. out_free:
  1915. consume_skb(skb);
  1916. out:
  1917. scm_destroy(&scm);
  1918. return err;
  1919. }
  1920. /* We use paged skbs for stream sockets, and limit occupancy to 32768
  1921. * bytes, and a minimum of a full page.
  1922. */
  1923. #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
  1924. #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
  1925. static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other,
  1926. struct scm_cookie *scm, bool fds_sent)
  1927. {
  1928. struct unix_sock *ousk = unix_sk(other);
  1929. struct sk_buff *skb;
  1930. int err;
  1931. skb = sock_alloc_send_skb(sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
  1932. if (!skb)
  1933. return err;
  1934. err = unix_scm_to_skb(scm, skb, !fds_sent);
  1935. if (err < 0)
  1936. goto out;
  1937. err = unix_maybe_add_creds(skb, sk, other);
  1938. if (err)
  1939. goto out;
  1940. skb_put(skb, 1);
  1941. err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
  1942. if (err)
  1943. goto out;
  1944. unix_state_lock(other);
  1945. if (sock_flag(other, SOCK_DEAD) ||
  1946. (other->sk_shutdown & RCV_SHUTDOWN)) {
  1947. err = -EPIPE;
  1948. goto out_unlock;
  1949. }
  1950. if (UNIXCB(skb).fp && !other->sk_scm_rights) {
  1951. err = -EPERM;
  1952. goto out_unlock;
  1953. }
  1954. scm_stat_add(other, skb);
  1955. spin_lock(&other->sk_receive_queue.lock);
  1956. WRITE_ONCE(ousk->oob_skb, skb);
  1957. WRITE_ONCE(ousk->inq_len, ousk->inq_len + 1);
  1958. __skb_queue_tail(&other->sk_receive_queue, skb);
  1959. spin_unlock(&other->sk_receive_queue.lock);
  1960. sk_send_sigurg(other);
  1961. unix_state_unlock(other);
  1962. READ_ONCE(other->sk_data_ready)(other);
  1963. return 0;
  1964. out_unlock:
  1965. unix_state_unlock(other);
  1966. out:
  1967. consume_skb(skb);
  1968. return err;
  1969. }
  1970. #endif
  1971. static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
  1972. size_t len)
  1973. {
  1974. struct sock *sk = sock->sk;
  1975. struct sk_buff *skb = NULL;
  1976. struct sock *other = NULL;
  1977. struct unix_sock *otheru;
  1978. struct scm_cookie scm;
  1979. bool fds_sent = false;
  1980. int err, sent = 0;
  1981. err = scm_send(sock, msg, &scm, false);
  1982. if (err < 0)
  1983. return err;
  1984. if (msg->msg_flags & MSG_OOB) {
  1985. err = -EOPNOTSUPP;
  1986. #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
  1987. if (len)
  1988. len--;
  1989. else
  1990. #endif
  1991. goto out_err;
  1992. }
  1993. if (msg->msg_namelen) {
  1994. err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
  1995. goto out_err;
  1996. }
  1997. other = unix_peer(sk);
  1998. if (!other) {
  1999. err = -ENOTCONN;
  2000. goto out_err;
  2001. }
  2002. otheru = unix_sk(other);
  2003. if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
  2004. goto out_pipe;
  2005. while (sent < len) {
  2006. int size = len - sent;
  2007. int data_len;
  2008. if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
  2009. skb = sock_alloc_send_pskb(sk, 0, 0,
  2010. msg->msg_flags & MSG_DONTWAIT,
  2011. &err, 0);
  2012. } else {
  2013. /* Keep two messages in the pipe so it schedules better */
  2014. size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
  2015. /* allow fallback to order-0 allocations */
  2016. size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
  2017. data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
  2018. data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
  2019. skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
  2020. msg->msg_flags & MSG_DONTWAIT, &err,
  2021. get_order(UNIX_SKB_FRAGS_SZ));
  2022. }
  2023. if (!skb)
  2024. goto out_err;
  2025. /* Only send the fds in the first buffer */
  2026. err = unix_scm_to_skb(&scm, skb, !fds_sent);
  2027. if (err < 0)
  2028. goto out_free;
  2029. fds_sent = true;
  2030. err = unix_maybe_add_creds(skb, sk, other);
  2031. if (err)
  2032. goto out_free;
  2033. if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
  2034. skb->ip_summed = CHECKSUM_UNNECESSARY;
  2035. err = skb_splice_from_iter(skb, &msg->msg_iter, size);
  2036. if (err < 0)
  2037. goto out_free;
  2038. size = err;
  2039. refcount_add(size, &sk->sk_wmem_alloc);
  2040. } else {
  2041. skb_put(skb, size - data_len);
  2042. skb->data_len = data_len;
  2043. skb->len = size;
  2044. err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
  2045. if (err)
  2046. goto out_free;
  2047. }
  2048. unix_state_lock(other);
  2049. if (sock_flag(other, SOCK_DEAD) ||
  2050. (other->sk_shutdown & RCV_SHUTDOWN))
  2051. goto out_pipe_unlock;
  2052. if (UNIXCB(skb).fp && !other->sk_scm_rights) {
  2053. unix_state_unlock(other);
  2054. err = -EPERM;
  2055. goto out_free;
  2056. }
  2057. scm_stat_add(other, skb);
  2058. spin_lock(&other->sk_receive_queue.lock);
  2059. WRITE_ONCE(otheru->inq_len, otheru->inq_len + skb->len);
  2060. __skb_queue_tail(&other->sk_receive_queue, skb);
  2061. spin_unlock(&other->sk_receive_queue.lock);
  2062. unix_state_unlock(other);
  2063. READ_ONCE(other->sk_data_ready)(other);
  2064. sent += size;
  2065. }
  2066. #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
  2067. if (msg->msg_flags & MSG_OOB) {
  2068. err = queue_oob(sk, msg, other, &scm, fds_sent);
  2069. if (err)
  2070. goto out_err;
  2071. sent++;
  2072. }
  2073. #endif
  2074. scm_destroy(&scm);
  2075. return sent;
  2076. out_pipe_unlock:
  2077. unix_state_unlock(other);
  2078. out_pipe:
  2079. if (!sent && !(msg->msg_flags & MSG_NOSIGNAL))
  2080. send_sig(SIGPIPE, current, 0);
  2081. err = -EPIPE;
  2082. out_free:
  2083. consume_skb(skb);
  2084. out_err:
  2085. scm_destroy(&scm);
  2086. return sent ? : err;
  2087. }
  2088. static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
  2089. size_t len)
  2090. {
  2091. int err;
  2092. struct sock *sk = sock->sk;
  2093. err = sock_error(sk);
  2094. if (err)
  2095. return err;
  2096. if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
  2097. return -ENOTCONN;
  2098. if (msg->msg_namelen)
  2099. msg->msg_namelen = 0;
  2100. return unix_dgram_sendmsg(sock, msg, len);
  2101. }
  2102. static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
  2103. size_t size, int flags)
  2104. {
  2105. struct sock *sk = sock->sk;
  2106. if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
  2107. return -ENOTCONN;
  2108. return unix_dgram_recvmsg(sock, msg, size, flags);
  2109. }
  2110. static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
  2111. {
  2112. struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
  2113. if (addr) {
  2114. msg->msg_namelen = addr->len;
  2115. memcpy(msg->msg_name, addr->name, addr->len);
  2116. }
  2117. }
  2118. int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
  2119. int flags)
  2120. {
  2121. struct scm_cookie scm;
  2122. struct socket *sock = sk->sk_socket;
  2123. struct unix_sock *u = unix_sk(sk);
  2124. struct sk_buff *skb, *last;
  2125. long timeo;
  2126. int skip;
  2127. int err;
  2128. err = -EOPNOTSUPP;
  2129. if (flags&MSG_OOB)
  2130. goto out;
  2131. timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
  2132. do {
  2133. mutex_lock(&u->iolock);
  2134. skip = sk_peek_offset(sk, flags);
  2135. skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
  2136. &skip, &err, &last);
  2137. if (skb) {
  2138. if (!(flags & MSG_PEEK))
  2139. scm_stat_del(sk, skb);
  2140. break;
  2141. }
  2142. mutex_unlock(&u->iolock);
  2143. if (err != -EAGAIN)
  2144. break;
  2145. } while (timeo &&
  2146. !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
  2147. &err, &timeo, last));
  2148. if (!skb) { /* implies iolock unlocked */
  2149. /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
  2150. if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
  2151. (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN))
  2152. err = 0;
  2153. goto out;
  2154. }
  2155. if (wq_has_sleeper(&u->peer_wait))
  2156. wake_up_interruptible_sync_poll(&u->peer_wait,
  2157. EPOLLOUT | EPOLLWRNORM |
  2158. EPOLLWRBAND);
  2159. if (msg->msg_name) {
  2160. unix_copy_addr(msg, skb->sk);
  2161. BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
  2162. msg->msg_name,
  2163. &msg->msg_namelen);
  2164. }
  2165. if (size > skb->len - skip)
  2166. size = skb->len - skip;
  2167. else if (size < skb->len - skip)
  2168. msg->msg_flags |= MSG_TRUNC;
  2169. err = skb_copy_datagram_msg(skb, skip, msg, size);
  2170. if (err)
  2171. goto out_free;
  2172. if (sock_flag(sk, SOCK_RCVTSTAMP))
  2173. __sock_recv_timestamp(msg, sk, skb);
  2174. memset(&scm, 0, sizeof(scm));
  2175. unix_skb_to_scm(skb, &scm);
  2176. if (!(flags & MSG_PEEK)) {
  2177. if (UNIXCB(skb).fp)
  2178. unix_detach_fds(&scm, skb);
  2179. sk_peek_offset_bwd(sk, skb->len);
  2180. } else {
  2181. /* It is questionable: on PEEK we could:
  2182. - do not return fds - good, but too simple 8)
  2183. - return fds, and do not return them on read (old strategy,
  2184. apparently wrong)
  2185. - clone fds (I chose it for now, it is the most universal
  2186. solution)
  2187. POSIX 1003.1g does not actually define this clearly
  2188. at all. POSIX 1003.1g doesn't define a lot of things
  2189. clearly however!
  2190. */
  2191. sk_peek_offset_fwd(sk, size);
  2192. if (UNIXCB(skb).fp)
  2193. unix_peek_fds(&scm, skb);
  2194. }
  2195. err = (flags & MSG_TRUNC) ? skb->len - skip : size;
  2196. scm_recv_unix(sock, msg, &scm, flags);
  2197. out_free:
  2198. skb_free_datagram(sk, skb);
  2199. mutex_unlock(&u->iolock);
  2200. out:
  2201. return err;
  2202. }
  2203. static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
  2204. int flags)
  2205. {
  2206. struct sock *sk = sock->sk;
  2207. #ifdef CONFIG_BPF_SYSCALL
  2208. const struct proto *prot = READ_ONCE(sk->sk_prot);
  2209. if (prot != &unix_dgram_proto)
  2210. return prot->recvmsg(sk, msg, size, flags, NULL);
  2211. #endif
  2212. return __unix_dgram_recvmsg(sk, msg, size, flags);
  2213. }
  2214. static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
  2215. {
  2216. struct unix_sock *u = unix_sk(sk);
  2217. struct sk_buff *skb;
  2218. int err;
  2219. mutex_lock(&u->iolock);
  2220. skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
  2221. mutex_unlock(&u->iolock);
  2222. if (!skb)
  2223. return err;
  2224. return recv_actor(sk, skb);
  2225. }
  2226. /*
  2227. * Sleep until more data has arrived. But check for races..
  2228. */
  2229. static long unix_stream_data_wait(struct sock *sk, long timeo,
  2230. struct sk_buff *last, unsigned int last_len,
  2231. bool freezable)
  2232. {
  2233. unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
  2234. struct sk_buff *tail;
  2235. DEFINE_WAIT(wait);
  2236. unix_state_lock(sk);
  2237. for (;;) {
  2238. prepare_to_wait(sk_sleep(sk), &wait, state);
  2239. tail = skb_peek_tail(&sk->sk_receive_queue);
  2240. if (tail != last ||
  2241. (tail && tail->len != last_len) ||
  2242. sk->sk_err ||
  2243. (sk->sk_shutdown & RCV_SHUTDOWN) ||
  2244. signal_pending(current) ||
  2245. !timeo)
  2246. break;
  2247. sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
  2248. unix_state_unlock(sk);
  2249. timeo = schedule_timeout(timeo);
  2250. unix_state_lock(sk);
  2251. if (sock_flag(sk, SOCK_DEAD))
  2252. break;
  2253. sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
  2254. }
  2255. finish_wait(sk_sleep(sk), &wait);
  2256. unix_state_unlock(sk);
  2257. return timeo;
  2258. }
  2259. struct unix_stream_read_state {
  2260. int (*recv_actor)(struct sk_buff *, int, int,
  2261. struct unix_stream_read_state *);
  2262. struct socket *socket;
  2263. struct msghdr *msg;
  2264. struct pipe_inode_info *pipe;
  2265. size_t size;
  2266. int flags;
  2267. unsigned int splice_flags;
  2268. };
  2269. #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
  2270. static int unix_stream_recv_urg(struct unix_stream_read_state *state)
  2271. {
  2272. struct sk_buff *oob_skb, *read_skb = NULL;
  2273. struct socket *sock = state->socket;
  2274. struct sock *sk = sock->sk;
  2275. struct unix_sock *u = unix_sk(sk);
  2276. int chunk = 1;
  2277. mutex_lock(&u->iolock);
  2278. unix_state_lock(sk);
  2279. spin_lock(&sk->sk_receive_queue.lock);
  2280. if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
  2281. spin_unlock(&sk->sk_receive_queue.lock);
  2282. unix_state_unlock(sk);
  2283. mutex_unlock(&u->iolock);
  2284. return -EINVAL;
  2285. }
  2286. oob_skb = u->oob_skb;
  2287. if (!(state->flags & MSG_PEEK)) {
  2288. WRITE_ONCE(u->oob_skb, NULL);
  2289. WRITE_ONCE(u->inq_len, u->inq_len - 1);
  2290. if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue &&
  2291. !unix_skb_len(oob_skb->prev)) {
  2292. read_skb = oob_skb->prev;
  2293. __skb_unlink(read_skb, &sk->sk_receive_queue);
  2294. }
  2295. }
  2296. spin_unlock(&sk->sk_receive_queue.lock);
  2297. unix_state_unlock(sk);
  2298. chunk = state->recv_actor(oob_skb, 0, chunk, state);
  2299. if (!(state->flags & MSG_PEEK))
  2300. UNIXCB(oob_skb).consumed += 1;
  2301. mutex_unlock(&u->iolock);
  2302. consume_skb(read_skb);
  2303. if (chunk < 0)
  2304. return -EFAULT;
  2305. state->msg->msg_flags |= MSG_OOB;
  2306. return 1;
  2307. }
  2308. static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
  2309. int flags, int copied)
  2310. {
  2311. struct sk_buff *read_skb = NULL, *unread_skb = NULL;
  2312. struct unix_sock *u = unix_sk(sk);
  2313. if (likely(unix_skb_len(skb) && skb != READ_ONCE(u->oob_skb)))
  2314. return skb;
  2315. spin_lock(&sk->sk_receive_queue.lock);
  2316. if (!unix_skb_len(skb)) {
  2317. if (copied && (!u->oob_skb || skb == u->oob_skb)) {
  2318. skb = NULL;
  2319. } else if (flags & MSG_PEEK) {
  2320. skb = skb_peek_next(skb, &sk->sk_receive_queue);
  2321. } else {
  2322. read_skb = skb;
  2323. skb = skb_peek_next(skb, &sk->sk_receive_queue);
  2324. __skb_unlink(read_skb, &sk->sk_receive_queue);
  2325. }
  2326. if (!skb)
  2327. goto unlock;
  2328. }
  2329. if (skb != u->oob_skb)
  2330. goto unlock;
  2331. if (copied) {
  2332. skb = NULL;
  2333. } else if (!(flags & MSG_PEEK)) {
  2334. WRITE_ONCE(u->oob_skb, NULL);
  2335. if (!sock_flag(sk, SOCK_URGINLINE)) {
  2336. __skb_unlink(skb, &sk->sk_receive_queue);
  2337. unread_skb = skb;
  2338. skb = skb_peek(&sk->sk_receive_queue);
  2339. }
  2340. } else if (!sock_flag(sk, SOCK_URGINLINE)) {
  2341. skb = skb_peek_next(skb, &sk->sk_receive_queue);
  2342. }
  2343. unlock:
  2344. spin_unlock(&sk->sk_receive_queue.lock);
  2345. consume_skb(read_skb);
  2346. kfree_skb_reason(unread_skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
  2347. return skb;
  2348. }
  2349. #endif
  2350. static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
  2351. {
  2352. struct sk_buff_head *queue = &sk->sk_receive_queue;
  2353. struct unix_sock *u = unix_sk(sk);
  2354. struct sk_buff *skb;
  2355. int err;
  2356. if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
  2357. return -ENOTCONN;
  2358. err = sock_error(sk);
  2359. if (err)
  2360. return err;
  2361. mutex_lock(&u->iolock);
  2362. spin_lock(&queue->lock);
  2363. skb = __skb_dequeue(queue);
  2364. if (!skb) {
  2365. spin_unlock(&queue->lock);
  2366. mutex_unlock(&u->iolock);
  2367. return -EAGAIN;
  2368. }
  2369. WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
  2370. #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
  2371. if (skb == u->oob_skb) {
  2372. WRITE_ONCE(u->oob_skb, NULL);
  2373. spin_unlock(&queue->lock);
  2374. mutex_unlock(&u->iolock);
  2375. kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
  2376. return -EAGAIN;
  2377. }
  2378. #endif
  2379. spin_unlock(&queue->lock);
  2380. mutex_unlock(&u->iolock);
  2381. return recv_actor(sk, skb);
  2382. }
  2383. static int unix_stream_read_generic(struct unix_stream_read_state *state,
  2384. bool freezable)
  2385. {
  2386. int noblock = state->flags & MSG_DONTWAIT;
  2387. struct socket *sock = state->socket;
  2388. struct msghdr *msg = state->msg;
  2389. struct sock *sk = sock->sk;
  2390. size_t size = state->size;
  2391. int flags = state->flags;
  2392. bool check_creds = false;
  2393. struct scm_cookie scm;
  2394. unsigned int last_len;
  2395. struct unix_sock *u;
  2396. int copied = 0;
  2397. int err = 0;
  2398. long timeo;
  2399. int target;
  2400. int skip;
  2401. if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
  2402. err = -EINVAL;
  2403. goto out;
  2404. }
  2405. if (unlikely(flags & MSG_OOB)) {
  2406. err = -EOPNOTSUPP;
  2407. #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
  2408. err = unix_stream_recv_urg(state);
  2409. #endif
  2410. goto out;
  2411. }
  2412. target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
  2413. timeo = sock_rcvtimeo(sk, noblock);
  2414. memset(&scm, 0, sizeof(scm));
  2415. u = unix_sk(sk);
  2416. redo:
  2417. /* Lock the socket to prevent queue disordering
  2418. * while sleeps in memcpy_tomsg
  2419. */
  2420. mutex_lock(&u->iolock);
  2421. skip = max(sk_peek_offset(sk, flags), 0);
  2422. do {
  2423. struct sk_buff *skb, *last;
  2424. int chunk;
  2425. unix_state_lock(sk);
  2426. if (sock_flag(sk, SOCK_DEAD)) {
  2427. err = -ECONNRESET;
  2428. goto unlock;
  2429. }
  2430. last = skb = skb_peek(&sk->sk_receive_queue);
  2431. last_len = last ? last->len : 0;
  2432. again:
  2433. #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
  2434. if (skb) {
  2435. skb = manage_oob(skb, sk, flags, copied);
  2436. if (!skb && copied) {
  2437. unix_state_unlock(sk);
  2438. break;
  2439. }
  2440. }
  2441. #endif
  2442. if (skb == NULL) {
  2443. if (copied >= target)
  2444. goto unlock;
  2445. /*
  2446. * POSIX 1003.1g mandates this order.
  2447. */
  2448. err = sock_error(sk);
  2449. if (err)
  2450. goto unlock;
  2451. if (sk->sk_shutdown & RCV_SHUTDOWN)
  2452. goto unlock;
  2453. unix_state_unlock(sk);
  2454. if (!timeo) {
  2455. err = -EAGAIN;
  2456. break;
  2457. }
  2458. mutex_unlock(&u->iolock);
  2459. timeo = unix_stream_data_wait(sk, timeo, last,
  2460. last_len, freezable);
  2461. if (signal_pending(current)) {
  2462. err = sock_intr_errno(timeo);
  2463. scm_destroy(&scm);
  2464. goto out;
  2465. }
  2466. goto redo;
  2467. unlock:
  2468. unix_state_unlock(sk);
  2469. break;
  2470. }
  2471. while (skip >= unix_skb_len(skb)) {
  2472. skip -= unix_skb_len(skb);
  2473. last = skb;
  2474. last_len = skb->len;
  2475. skb = skb_peek_next(skb, &sk->sk_receive_queue);
  2476. if (!skb)
  2477. goto again;
  2478. }
  2479. unix_state_unlock(sk);
  2480. if (check_creds) {
  2481. /* Never glue messages from different writers */
  2482. if (!unix_skb_scm_eq(skb, &scm))
  2483. break;
  2484. } else if (unix_may_passcred(sk)) {
  2485. /* Copy credentials */
  2486. unix_skb_to_scm(skb, &scm);
  2487. check_creds = true;
  2488. }
  2489. /* Copy address just once */
  2490. if (msg && msg->msg_name) {
  2491. DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
  2492. unix_copy_addr(msg, skb->sk);
  2493. BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, msg->msg_name,
  2494. &msg->msg_namelen);
  2495. sunaddr = NULL;
  2496. }
  2497. chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
  2498. chunk = state->recv_actor(skb, skip, chunk, state);
  2499. if (chunk < 0) {
  2500. if (copied == 0)
  2501. copied = -EFAULT;
  2502. break;
  2503. }
  2504. copied += chunk;
  2505. size -= chunk;
  2506. /* Mark read part of skb as used */
  2507. if (!(flags & MSG_PEEK)) {
  2508. UNIXCB(skb).consumed += chunk;
  2509. sk_peek_offset_bwd(sk, chunk);
  2510. if (UNIXCB(skb).fp) {
  2511. scm_stat_del(sk, skb);
  2512. unix_detach_fds(&scm, skb);
  2513. }
  2514. if (unix_skb_len(skb))
  2515. break;
  2516. spin_lock(&sk->sk_receive_queue.lock);
  2517. WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
  2518. __skb_unlink(skb, &sk->sk_receive_queue);
  2519. spin_unlock(&sk->sk_receive_queue.lock);
  2520. consume_skb(skb);
  2521. if (scm.fp)
  2522. break;
  2523. } else {
  2524. /* It is questionable, see note in unix_dgram_recvmsg.
  2525. */
  2526. if (UNIXCB(skb).fp)
  2527. unix_peek_fds(&scm, skb);
  2528. sk_peek_offset_fwd(sk, chunk);
  2529. if (UNIXCB(skb).fp)
  2530. break;
  2531. skip = 0;
  2532. last = skb;
  2533. last_len = skb->len;
  2534. unix_state_lock(sk);
  2535. skb = skb_peek_next(skb, &sk->sk_receive_queue);
  2536. if (skb)
  2537. goto again;
  2538. unix_state_unlock(sk);
  2539. break;
  2540. }
  2541. } while (size);
  2542. mutex_unlock(&u->iolock);
  2543. if (msg) {
  2544. bool do_cmsg = READ_ONCE(u->recvmsg_inq);
  2545. scm_recv_unix(sock, msg, &scm, flags);
  2546. if ((do_cmsg | msg->msg_get_inq) && (copied ?: err) >= 0) {
  2547. msg->msg_inq = READ_ONCE(u->inq_len);
  2548. if (do_cmsg)
  2549. put_cmsg(msg, SOL_SOCKET, SCM_INQ,
  2550. sizeof(msg->msg_inq), &msg->msg_inq);
  2551. }
  2552. } else {
  2553. scm_destroy(&scm);
  2554. }
  2555. out:
  2556. return copied ? : err;
  2557. }
  2558. static int unix_stream_read_actor(struct sk_buff *skb,
  2559. int skip, int chunk,
  2560. struct unix_stream_read_state *state)
  2561. {
  2562. int ret;
  2563. ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
  2564. state->msg, chunk);
  2565. return ret ?: chunk;
  2566. }
  2567. int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
  2568. size_t size, int flags)
  2569. {
  2570. struct unix_stream_read_state state = {
  2571. .recv_actor = unix_stream_read_actor,
  2572. .socket = sk->sk_socket,
  2573. .msg = msg,
  2574. .size = size,
  2575. .flags = flags
  2576. };
  2577. return unix_stream_read_generic(&state, true);
  2578. }
  2579. static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
  2580. size_t size, int flags)
  2581. {
  2582. struct unix_stream_read_state state = {
  2583. .recv_actor = unix_stream_read_actor,
  2584. .socket = sock,
  2585. .msg = msg,
  2586. .size = size,
  2587. .flags = flags
  2588. };
  2589. #ifdef CONFIG_BPF_SYSCALL
  2590. struct sock *sk = sock->sk;
  2591. const struct proto *prot = READ_ONCE(sk->sk_prot);
  2592. if (prot != &unix_stream_proto)
  2593. return prot->recvmsg(sk, msg, size, flags, NULL);
  2594. #endif
  2595. return unix_stream_read_generic(&state, true);
  2596. }
  2597. static int unix_stream_splice_actor(struct sk_buff *skb,
  2598. int skip, int chunk,
  2599. struct unix_stream_read_state *state)
  2600. {
  2601. return skb_splice_bits(skb, state->socket->sk,
  2602. UNIXCB(skb).consumed + skip,
  2603. state->pipe, chunk, state->splice_flags);
  2604. }
  2605. static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
  2606. struct pipe_inode_info *pipe,
  2607. size_t size, unsigned int flags)
  2608. {
  2609. struct unix_stream_read_state state = {
  2610. .recv_actor = unix_stream_splice_actor,
  2611. .socket = sock,
  2612. .pipe = pipe,
  2613. .size = size,
  2614. .splice_flags = flags,
  2615. };
  2616. if (unlikely(*ppos))
  2617. return -ESPIPE;
  2618. if (sock->file->f_flags & O_NONBLOCK ||
  2619. flags & SPLICE_F_NONBLOCK)
  2620. state.flags = MSG_DONTWAIT;
  2621. return unix_stream_read_generic(&state, false);
  2622. }
  2623. static int unix_shutdown(struct socket *sock, int mode)
  2624. {
  2625. struct sock *sk = sock->sk;
  2626. struct sock *other;
  2627. if (mode < SHUT_RD || mode > SHUT_RDWR)
  2628. return -EINVAL;
  2629. /* This maps:
  2630. * SHUT_RD (0) -> RCV_SHUTDOWN (1)
  2631. * SHUT_WR (1) -> SEND_SHUTDOWN (2)
  2632. * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
  2633. */
  2634. ++mode;
  2635. unix_state_lock(sk);
  2636. WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
  2637. other = unix_peer(sk);
  2638. if (other)
  2639. sock_hold(other);
  2640. unix_state_unlock(sk);
  2641. sk->sk_state_change(sk);
  2642. if (other &&
  2643. (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
  2644. int peer_mode = 0;
  2645. const struct proto *prot = READ_ONCE(other->sk_prot);
  2646. if (prot->unhash)
  2647. prot->unhash(other);
  2648. if (mode&RCV_SHUTDOWN)
  2649. peer_mode |= SEND_SHUTDOWN;
  2650. if (mode&SEND_SHUTDOWN)
  2651. peer_mode |= RCV_SHUTDOWN;
  2652. unix_state_lock(other);
  2653. WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
  2654. unix_state_unlock(other);
  2655. other->sk_state_change(other);
  2656. if (peer_mode == SHUTDOWN_MASK)
  2657. sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
  2658. else if (peer_mode & RCV_SHUTDOWN)
  2659. sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
  2660. }
  2661. if (other)
  2662. sock_put(other);
  2663. return 0;
  2664. }
  2665. long unix_inq_len(struct sock *sk)
  2666. {
  2667. struct sk_buff *skb;
  2668. long amount = 0;
  2669. if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
  2670. return -EINVAL;
  2671. if (sk->sk_type == SOCK_STREAM)
  2672. return READ_ONCE(unix_sk(sk)->inq_len);
  2673. spin_lock(&sk->sk_receive_queue.lock);
  2674. if (sk->sk_type == SOCK_SEQPACKET) {
  2675. skb_queue_walk(&sk->sk_receive_queue, skb)
  2676. amount += unix_skb_len(skb);
  2677. } else {
  2678. skb = skb_peek(&sk->sk_receive_queue);
  2679. if (skb)
  2680. amount = skb->len;
  2681. }
  2682. spin_unlock(&sk->sk_receive_queue.lock);
  2683. return amount;
  2684. }
  2685. EXPORT_SYMBOL_GPL(unix_inq_len);
  2686. long unix_outq_len(struct sock *sk)
  2687. {
  2688. return sk_wmem_alloc_get(sk);
  2689. }
  2690. EXPORT_SYMBOL_GPL(unix_outq_len);
  2691. static int unix_open_file(struct sock *sk)
  2692. {
  2693. if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
  2694. return -EPERM;
  2695. if (!smp_load_acquire(&unix_sk(sk)->addr))
  2696. return -ENOENT;
  2697. if (!unix_sk(sk)->path.dentry)
  2698. return -ENOENT;
  2699. return FD_ADD(O_CLOEXEC, dentry_open(&unix_sk(sk)->path, O_PATH, current_cred()));
  2700. }
  2701. static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
  2702. {
  2703. struct sock *sk = sock->sk;
  2704. long amount = 0;
  2705. int err;
  2706. switch (cmd) {
  2707. case SIOCOUTQ:
  2708. amount = unix_outq_len(sk);
  2709. err = put_user(amount, (int __user *)arg);
  2710. break;
  2711. case SIOCINQ:
  2712. amount = unix_inq_len(sk);
  2713. if (amount < 0)
  2714. err = amount;
  2715. else
  2716. err = put_user(amount, (int __user *)arg);
  2717. break;
  2718. case SIOCUNIXFILE:
  2719. err = unix_open_file(sk);
  2720. break;
  2721. #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
  2722. case SIOCATMARK:
  2723. {
  2724. struct unix_sock *u = unix_sk(sk);
  2725. struct sk_buff *skb;
  2726. int answ = 0;
  2727. mutex_lock(&u->iolock);
  2728. skb = skb_peek(&sk->sk_receive_queue);
  2729. if (skb) {
  2730. struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
  2731. struct sk_buff *next_skb;
  2732. next_skb = skb_peek_next(skb, &sk->sk_receive_queue);
  2733. if (skb == oob_skb ||
  2734. (!unix_skb_len(skb) &&
  2735. (!oob_skb || next_skb == oob_skb)))
  2736. answ = 1;
  2737. }
  2738. mutex_unlock(&u->iolock);
  2739. err = put_user(answ, (int __user *)arg);
  2740. }
  2741. break;
  2742. #endif
  2743. default:
  2744. err = -ENOIOCTLCMD;
  2745. break;
  2746. }
  2747. return err;
  2748. }
  2749. #ifdef CONFIG_COMPAT
  2750. static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
  2751. {
  2752. return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
  2753. }
  2754. #endif
  2755. static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
  2756. {
  2757. struct sock *sk = sock->sk;
  2758. unsigned char state;
  2759. __poll_t mask;
  2760. u8 shutdown;
  2761. sock_poll_wait(file, sock, wait);
  2762. mask = 0;
  2763. shutdown = READ_ONCE(sk->sk_shutdown);
  2764. state = READ_ONCE(sk->sk_state);
  2765. /* exceptional events? */
  2766. if (READ_ONCE(sk->sk_err))
  2767. mask |= EPOLLERR;
  2768. if (shutdown == SHUTDOWN_MASK)
  2769. mask |= EPOLLHUP;
  2770. if (shutdown & RCV_SHUTDOWN)
  2771. mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
  2772. /* readable? */
  2773. if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
  2774. mask |= EPOLLIN | EPOLLRDNORM;
  2775. if (sk_is_readable(sk))
  2776. mask |= EPOLLIN | EPOLLRDNORM;
  2777. #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
  2778. if (READ_ONCE(unix_sk(sk)->oob_skb))
  2779. mask |= EPOLLPRI;
  2780. #endif
  2781. /* Connection-based need to check for termination and startup */
  2782. if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
  2783. state == TCP_CLOSE)
  2784. mask |= EPOLLHUP;
  2785. /*
  2786. * we set writable also when the other side has shut down the
  2787. * connection. This prevents stuck sockets.
  2788. */
  2789. if (unix_writable(sk, state))
  2790. mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
  2791. return mask;
  2792. }
  2793. static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
  2794. poll_table *wait)
  2795. {
  2796. struct sock *sk = sock->sk, *other;
  2797. unsigned int writable;
  2798. unsigned char state;
  2799. __poll_t mask;
  2800. u8 shutdown;
  2801. sock_poll_wait(file, sock, wait);
  2802. mask = 0;
  2803. shutdown = READ_ONCE(sk->sk_shutdown);
  2804. state = READ_ONCE(sk->sk_state);
  2805. /* exceptional events? */
  2806. if (READ_ONCE(sk->sk_err) ||
  2807. !skb_queue_empty_lockless(&sk->sk_error_queue))
  2808. mask |= EPOLLERR |
  2809. (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
  2810. if (shutdown & RCV_SHUTDOWN)
  2811. mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
  2812. if (shutdown == SHUTDOWN_MASK)
  2813. mask |= EPOLLHUP;
  2814. /* readable? */
  2815. if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
  2816. mask |= EPOLLIN | EPOLLRDNORM;
  2817. if (sk_is_readable(sk))
  2818. mask |= EPOLLIN | EPOLLRDNORM;
  2819. /* Connection-based need to check for termination and startup */
  2820. if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
  2821. mask |= EPOLLHUP;
  2822. /* No write status requested, avoid expensive OUT tests. */
  2823. if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
  2824. return mask;
  2825. writable = unix_writable(sk, state);
  2826. if (writable) {
  2827. unix_state_lock(sk);
  2828. other = unix_peer(sk);
  2829. if (other && unix_peer(other) != sk &&
  2830. unix_recvq_full_lockless(other) &&
  2831. unix_dgram_peer_wake_me(sk, other))
  2832. writable = 0;
  2833. unix_state_unlock(sk);
  2834. }
  2835. if (writable)
  2836. mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
  2837. else
  2838. sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
  2839. return mask;
  2840. }
  2841. #ifdef CONFIG_PROC_FS
  2842. #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
  2843. #define get_bucket(x) ((x) >> BUCKET_SPACE)
  2844. #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
  2845. #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
  2846. static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
  2847. {
  2848. unsigned long offset = get_offset(*pos);
  2849. unsigned long bucket = get_bucket(*pos);
  2850. unsigned long count = 0;
  2851. struct sock *sk;
  2852. for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
  2853. sk; sk = sk_next(sk)) {
  2854. if (++count == offset)
  2855. break;
  2856. }
  2857. return sk;
  2858. }
  2859. static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
  2860. {
  2861. unsigned long bucket = get_bucket(*pos);
  2862. struct net *net = seq_file_net(seq);
  2863. struct sock *sk;
  2864. while (bucket < UNIX_HASH_SIZE) {
  2865. spin_lock(&net->unx.table.locks[bucket]);
  2866. sk = unix_from_bucket(seq, pos);
  2867. if (sk)
  2868. return sk;
  2869. spin_unlock(&net->unx.table.locks[bucket]);
  2870. *pos = set_bucket_offset(++bucket, 1);
  2871. }
  2872. return NULL;
  2873. }
  2874. static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
  2875. loff_t *pos)
  2876. {
  2877. unsigned long bucket = get_bucket(*pos);
  2878. sk = sk_next(sk);
  2879. if (sk)
  2880. return sk;
  2881. spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
  2882. *pos = set_bucket_offset(++bucket, 1);
  2883. return unix_get_first(seq, pos);
  2884. }
  2885. static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
  2886. {
  2887. if (!*pos)
  2888. return SEQ_START_TOKEN;
  2889. return unix_get_first(seq, pos);
  2890. }
  2891. static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  2892. {
  2893. ++*pos;
  2894. if (v == SEQ_START_TOKEN)
  2895. return unix_get_first(seq, pos);
  2896. return unix_get_next(seq, v, pos);
  2897. }
  2898. static void unix_seq_stop(struct seq_file *seq, void *v)
  2899. {
  2900. struct sock *sk = v;
  2901. if (sk)
  2902. spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
  2903. }
  2904. static int unix_seq_show(struct seq_file *seq, void *v)
  2905. {
  2906. if (v == SEQ_START_TOKEN)
  2907. seq_puts(seq, "Num RefCount Protocol Flags Type St "
  2908. "Inode Path\n");
  2909. else {
  2910. struct sock *s = v;
  2911. struct unix_sock *u = unix_sk(s);
  2912. unix_state_lock(s);
  2913. seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
  2914. s,
  2915. refcount_read(&s->sk_refcnt),
  2916. 0,
  2917. s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
  2918. s->sk_type,
  2919. s->sk_socket ?
  2920. (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
  2921. (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
  2922. sock_i_ino(s));
  2923. if (u->addr) { // under a hash table lock here
  2924. int i, len;
  2925. seq_putc(seq, ' ');
  2926. i = 0;
  2927. len = u->addr->len -
  2928. offsetof(struct sockaddr_un, sun_path);
  2929. if (u->addr->name->sun_path[0]) {
  2930. len--;
  2931. } else {
  2932. seq_putc(seq, '@');
  2933. i++;
  2934. }
  2935. for ( ; i < len; i++)
  2936. seq_putc(seq, u->addr->name->sun_path[i] ?:
  2937. '@');
  2938. }
  2939. unix_state_unlock(s);
  2940. seq_putc(seq, '\n');
  2941. }
  2942. return 0;
  2943. }
  2944. static const struct seq_operations unix_seq_ops = {
  2945. .start = unix_seq_start,
  2946. .next = unix_seq_next,
  2947. .stop = unix_seq_stop,
  2948. .show = unix_seq_show,
  2949. };
  2950. #ifdef CONFIG_BPF_SYSCALL
  2951. struct bpf_unix_iter_state {
  2952. struct seq_net_private p;
  2953. unsigned int cur_sk;
  2954. unsigned int end_sk;
  2955. unsigned int max_sk;
  2956. struct sock **batch;
  2957. bool st_bucket_done;
  2958. };
  2959. struct bpf_iter__unix {
  2960. __bpf_md_ptr(struct bpf_iter_meta *, meta);
  2961. __bpf_md_ptr(struct unix_sock *, unix_sk);
  2962. uid_t uid __aligned(8);
  2963. };
  2964. static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
  2965. struct unix_sock *unix_sk, uid_t uid)
  2966. {
  2967. struct bpf_iter__unix ctx;
  2968. meta->seq_num--; /* skip SEQ_START_TOKEN */
  2969. ctx.meta = meta;
  2970. ctx.unix_sk = unix_sk;
  2971. ctx.uid = uid;
  2972. return bpf_iter_run_prog(prog, &ctx);
  2973. }
  2974. static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
  2975. {
  2976. struct bpf_unix_iter_state *iter = seq->private;
  2977. unsigned int expected = 1;
  2978. struct sock *sk;
  2979. sock_hold(start_sk);
  2980. iter->batch[iter->end_sk++] = start_sk;
  2981. for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
  2982. if (iter->end_sk < iter->max_sk) {
  2983. sock_hold(sk);
  2984. iter->batch[iter->end_sk++] = sk;
  2985. }
  2986. expected++;
  2987. }
  2988. spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
  2989. return expected;
  2990. }
  2991. static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
  2992. {
  2993. while (iter->cur_sk < iter->end_sk)
  2994. sock_put(iter->batch[iter->cur_sk++]);
  2995. }
  2996. static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
  2997. unsigned int new_batch_sz)
  2998. {
  2999. struct sock **new_batch;
  3000. new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
  3001. GFP_USER | __GFP_NOWARN);
  3002. if (!new_batch)
  3003. return -ENOMEM;
  3004. bpf_iter_unix_put_batch(iter);
  3005. kvfree(iter->batch);
  3006. iter->batch = new_batch;
  3007. iter->max_sk = new_batch_sz;
  3008. return 0;
  3009. }
  3010. static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
  3011. loff_t *pos)
  3012. {
  3013. struct bpf_unix_iter_state *iter = seq->private;
  3014. unsigned int expected;
  3015. bool resized = false;
  3016. struct sock *sk;
  3017. if (iter->st_bucket_done)
  3018. *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
  3019. again:
  3020. /* Get a new batch */
  3021. iter->cur_sk = 0;
  3022. iter->end_sk = 0;
  3023. sk = unix_get_first(seq, pos);
  3024. if (!sk)
  3025. return NULL; /* Done */
  3026. expected = bpf_iter_unix_hold_batch(seq, sk);
  3027. if (iter->end_sk == expected) {
  3028. iter->st_bucket_done = true;
  3029. return sk;
  3030. }
  3031. if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
  3032. resized = true;
  3033. goto again;
  3034. }
  3035. return sk;
  3036. }
  3037. static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
  3038. {
  3039. if (!*pos)
  3040. return SEQ_START_TOKEN;
  3041. /* bpf iter does not support lseek, so it always
  3042. * continue from where it was stop()-ped.
  3043. */
  3044. return bpf_iter_unix_batch(seq, pos);
  3045. }
  3046. static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  3047. {
  3048. struct bpf_unix_iter_state *iter = seq->private;
  3049. struct sock *sk;
  3050. /* Whenever seq_next() is called, the iter->cur_sk is
  3051. * done with seq_show(), so advance to the next sk in
  3052. * the batch.
  3053. */
  3054. if (iter->cur_sk < iter->end_sk)
  3055. sock_put(iter->batch[iter->cur_sk++]);
  3056. ++*pos;
  3057. if (iter->cur_sk < iter->end_sk)
  3058. sk = iter->batch[iter->cur_sk];
  3059. else
  3060. sk = bpf_iter_unix_batch(seq, pos);
  3061. return sk;
  3062. }
  3063. static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
  3064. {
  3065. struct bpf_iter_meta meta;
  3066. struct bpf_prog *prog;
  3067. struct sock *sk = v;
  3068. uid_t uid;
  3069. bool slow;
  3070. int ret;
  3071. if (v == SEQ_START_TOKEN)
  3072. return 0;
  3073. slow = lock_sock_fast(sk);
  3074. if (unlikely(sk_unhashed(sk))) {
  3075. ret = SEQ_SKIP;
  3076. goto unlock;
  3077. }
  3078. uid = from_kuid_munged(seq_user_ns(seq), sk_uid(sk));
  3079. meta.seq = seq;
  3080. prog = bpf_iter_get_info(&meta, false);
  3081. ret = unix_prog_seq_show(prog, &meta, v, uid);
  3082. unlock:
  3083. unlock_sock_fast(sk, slow);
  3084. return ret;
  3085. }
  3086. static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
  3087. {
  3088. struct bpf_unix_iter_state *iter = seq->private;
  3089. struct bpf_iter_meta meta;
  3090. struct bpf_prog *prog;
  3091. if (!v) {
  3092. meta.seq = seq;
  3093. prog = bpf_iter_get_info(&meta, true);
  3094. if (prog)
  3095. (void)unix_prog_seq_show(prog, &meta, v, 0);
  3096. }
  3097. if (iter->cur_sk < iter->end_sk)
  3098. bpf_iter_unix_put_batch(iter);
  3099. }
  3100. static const struct seq_operations bpf_iter_unix_seq_ops = {
  3101. .start = bpf_iter_unix_seq_start,
  3102. .next = bpf_iter_unix_seq_next,
  3103. .stop = bpf_iter_unix_seq_stop,
  3104. .show = bpf_iter_unix_seq_show,
  3105. };
  3106. #endif
  3107. #endif
  3108. static const struct net_proto_family unix_family_ops = {
  3109. .family = PF_UNIX,
  3110. .create = unix_create,
  3111. .owner = THIS_MODULE,
  3112. };
  3113. static int __net_init unix_net_init(struct net *net)
  3114. {
  3115. int i;
  3116. net->unx.sysctl_max_dgram_qlen = 10;
  3117. if (unix_sysctl_register(net))
  3118. goto out;
  3119. #ifdef CONFIG_PROC_FS
  3120. if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
  3121. sizeof(struct seq_net_private)))
  3122. goto err_sysctl;
  3123. #endif
  3124. net->unx.table.locks = kvmalloc_objs(spinlock_t, UNIX_HASH_SIZE);
  3125. if (!net->unx.table.locks)
  3126. goto err_proc;
  3127. net->unx.table.buckets = kvmalloc_objs(struct hlist_head,
  3128. UNIX_HASH_SIZE);
  3129. if (!net->unx.table.buckets)
  3130. goto free_locks;
  3131. for (i = 0; i < UNIX_HASH_SIZE; i++) {
  3132. spin_lock_init(&net->unx.table.locks[i]);
  3133. lock_set_cmp_fn(&net->unx.table.locks[i], unix_table_lock_cmp_fn, NULL);
  3134. INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
  3135. }
  3136. return 0;
  3137. free_locks:
  3138. kvfree(net->unx.table.locks);
  3139. err_proc:
  3140. #ifdef CONFIG_PROC_FS
  3141. remove_proc_entry("unix", net->proc_net);
  3142. err_sysctl:
  3143. #endif
  3144. unix_sysctl_unregister(net);
  3145. out:
  3146. return -ENOMEM;
  3147. }
  3148. static void __net_exit unix_net_exit(struct net *net)
  3149. {
  3150. kvfree(net->unx.table.buckets);
  3151. kvfree(net->unx.table.locks);
  3152. unix_sysctl_unregister(net);
  3153. remove_proc_entry("unix", net->proc_net);
  3154. }
  3155. static struct pernet_operations unix_net_ops = {
  3156. .init = unix_net_init,
  3157. .exit = unix_net_exit,
  3158. };
  3159. #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
  3160. DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
  3161. struct unix_sock *unix_sk, uid_t uid)
  3162. #define INIT_BATCH_SZ 16
  3163. static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
  3164. {
  3165. struct bpf_unix_iter_state *iter = priv_data;
  3166. int err;
  3167. err = bpf_iter_init_seq_net(priv_data, aux);
  3168. if (err)
  3169. return err;
  3170. err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
  3171. if (err) {
  3172. bpf_iter_fini_seq_net(priv_data);
  3173. return err;
  3174. }
  3175. return 0;
  3176. }
  3177. static void bpf_iter_fini_unix(void *priv_data)
  3178. {
  3179. struct bpf_unix_iter_state *iter = priv_data;
  3180. bpf_iter_fini_seq_net(priv_data);
  3181. kvfree(iter->batch);
  3182. }
  3183. static const struct bpf_iter_seq_info unix_seq_info = {
  3184. .seq_ops = &bpf_iter_unix_seq_ops,
  3185. .init_seq_private = bpf_iter_init_unix,
  3186. .fini_seq_private = bpf_iter_fini_unix,
  3187. .seq_priv_size = sizeof(struct bpf_unix_iter_state),
  3188. };
  3189. static const struct bpf_func_proto *
  3190. bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
  3191. const struct bpf_prog *prog)
  3192. {
  3193. switch (func_id) {
  3194. case BPF_FUNC_setsockopt:
  3195. return &bpf_sk_setsockopt_proto;
  3196. case BPF_FUNC_getsockopt:
  3197. return &bpf_sk_getsockopt_proto;
  3198. default:
  3199. return NULL;
  3200. }
  3201. }
  3202. static struct bpf_iter_reg unix_reg_info = {
  3203. .target = "unix",
  3204. .ctx_arg_info_size = 1,
  3205. .ctx_arg_info = {
  3206. { offsetof(struct bpf_iter__unix, unix_sk),
  3207. PTR_TO_BTF_ID_OR_NULL },
  3208. },
  3209. .get_func_proto = bpf_iter_unix_get_func_proto,
  3210. .seq_info = &unix_seq_info,
  3211. };
  3212. static void __init bpf_iter_register(void)
  3213. {
  3214. unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
  3215. if (bpf_iter_reg_target(&unix_reg_info))
  3216. pr_warn("Warning: could not register bpf iterator unix\n");
  3217. }
  3218. #endif
  3219. static int __init af_unix_init(void)
  3220. {
  3221. int i, rc = -1;
  3222. BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
  3223. for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
  3224. spin_lock_init(&bsd_socket_locks[i]);
  3225. INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
  3226. }
  3227. rc = proto_register(&unix_dgram_proto, 1);
  3228. if (rc != 0) {
  3229. pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
  3230. goto out;
  3231. }
  3232. rc = proto_register(&unix_stream_proto, 1);
  3233. if (rc != 0) {
  3234. pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
  3235. proto_unregister(&unix_dgram_proto);
  3236. goto out;
  3237. }
  3238. sock_register(&unix_family_ops);
  3239. register_pernet_subsys(&unix_net_ops);
  3240. unix_bpf_build_proto();
  3241. #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
  3242. bpf_iter_register();
  3243. #endif
  3244. out:
  3245. return rc;
  3246. }
  3247. /* Later than subsys_initcall() because we depend on stuff initialised there */
  3248. fs_initcall(af_unix_init);