xprtsock.c 98 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * linux/net/sunrpc/xprtsock.c
  4. *
  5. * Client-side transport implementation for sockets.
  6. *
  7. * TCP callback races fixes (C) 1998 Red Hat
  8. * TCP send fixes (C) 1998 Red Hat
  9. * TCP NFS related read + write fixes
  10. * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
  11. *
  12. * Rewrite of larges part of the code in order to stabilize TCP stuff.
  13. * Fix behaviour when socket buffer is full.
  14. * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no>
  15. *
  16. * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com>
  17. *
  18. * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005.
  19. * <gilles.quillard@bull.net>
  20. */
  21. #include <linux/types.h>
  22. #include <linux/string.h>
  23. #include <linux/slab.h>
  24. #include <linux/module.h>
  25. #include <linux/capability.h>
  26. #include <linux/pagemap.h>
  27. #include <linux/errno.h>
  28. #include <linux/socket.h>
  29. #include <linux/in.h>
  30. #include <linux/net.h>
  31. #include <linux/mm.h>
  32. #include <linux/un.h>
  33. #include <linux/udp.h>
  34. #include <linux/tcp.h>
  35. #include <linux/sunrpc/clnt.h>
  36. #include <linux/sunrpc/addr.h>
  37. #include <linux/sunrpc/sched.h>
  38. #include <linux/sunrpc/svcsock.h>
  39. #include <linux/sunrpc/xprtsock.h>
  40. #include <linux/file.h>
  41. #ifdef CONFIG_SUNRPC_BACKCHANNEL
  42. #include <linux/sunrpc/bc_xprt.h>
  43. #endif
  44. #include <net/sock.h>
  45. #include <net/checksum.h>
  46. #include <net/udp.h>
  47. #include <net/tcp.h>
  48. #include <net/tls_prot.h>
  49. #include <net/handshake.h>
  50. #include <linux/bvec.h>
  51. #include <linux/highmem.h>
  52. #include <linux/uio.h>
  53. #include <linux/sched/mm.h>
  54. #include <trace/events/sock.h>
  55. #include <trace/events/sunrpc.h>
  56. #include "socklib.h"
  57. #include "sunrpc.h"
  58. static void xs_close(struct rpc_xprt *xprt);
  59. static void xs_reset_srcport(struct sock_xprt *transport);
  60. static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock);
  61. static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
  62. struct socket *sock);
  63. /*
  64. * xprtsock tunables
  65. */
  66. static unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
  67. static unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE;
  68. static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE;
  69. static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
  70. static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
  71. #define XS_TCP_LINGER_TO (15U * HZ)
  72. static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
  73. /*
  74. * We can register our own files under /proc/sys/sunrpc by
  75. * calling register_sysctl() again. The files in that
  76. * directory become the union of all files registered there.
  77. *
  78. * We simply need to make sure that we don't collide with
  79. * someone else's file names!
  80. */
  81. static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
  82. static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
  83. static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT;
  84. static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
  85. static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
  86. static struct ctl_table_header *sunrpc_table_header;
  87. static struct xprt_class xs_local_transport;
  88. static struct xprt_class xs_udp_transport;
  89. static struct xprt_class xs_tcp_transport;
  90. static struct xprt_class xs_tcp_tls_transport;
  91. static struct xprt_class xs_bc_tcp_transport;
  92. /*
  93. * FIXME: changing the UDP slot table size should also resize the UDP
  94. * socket buffers for existing UDP transports
  95. */
  96. static struct ctl_table xs_tunables_table[] = {
  97. {
  98. .procname = "udp_slot_table_entries",
  99. .data = &xprt_udp_slot_table_entries,
  100. .maxlen = sizeof(unsigned int),
  101. .mode = 0644,
  102. .proc_handler = proc_dointvec_minmax,
  103. .extra1 = &min_slot_table_size,
  104. .extra2 = &max_slot_table_size
  105. },
  106. {
  107. .procname = "tcp_slot_table_entries",
  108. .data = &xprt_tcp_slot_table_entries,
  109. .maxlen = sizeof(unsigned int),
  110. .mode = 0644,
  111. .proc_handler = proc_dointvec_minmax,
  112. .extra1 = &min_slot_table_size,
  113. .extra2 = &max_slot_table_size
  114. },
  115. {
  116. .procname = "tcp_max_slot_table_entries",
  117. .data = &xprt_max_tcp_slot_table_entries,
  118. .maxlen = sizeof(unsigned int),
  119. .mode = 0644,
  120. .proc_handler = proc_dointvec_minmax,
  121. .extra1 = &min_slot_table_size,
  122. .extra2 = &max_tcp_slot_table_limit
  123. },
  124. {
  125. .procname = "min_resvport",
  126. .data = &xprt_min_resvport,
  127. .maxlen = sizeof(unsigned int),
  128. .mode = 0644,
  129. .proc_handler = proc_dointvec_minmax,
  130. .extra1 = &xprt_min_resvport_limit,
  131. .extra2 = &xprt_max_resvport_limit
  132. },
  133. {
  134. .procname = "max_resvport",
  135. .data = &xprt_max_resvport,
  136. .maxlen = sizeof(unsigned int),
  137. .mode = 0644,
  138. .proc_handler = proc_dointvec_minmax,
  139. .extra1 = &xprt_min_resvport_limit,
  140. .extra2 = &xprt_max_resvport_limit
  141. },
  142. {
  143. .procname = "tcp_fin_timeout",
  144. .data = &xs_tcp_fin_timeout,
  145. .maxlen = sizeof(xs_tcp_fin_timeout),
  146. .mode = 0644,
  147. .proc_handler = proc_dointvec_jiffies,
  148. },
  149. };
  150. /*
  151. * Wait duration for a reply from the RPC portmapper.
  152. */
  153. #define XS_BIND_TO (60U * HZ)
  154. /*
  155. * Delay if a UDP socket connect error occurs. This is most likely some
  156. * kind of resource problem on the local host.
  157. */
  158. #define XS_UDP_REEST_TO (2U * HZ)
  159. /*
  160. * The reestablish timeout allows clients to delay for a bit before attempting
  161. * to reconnect to a server that just dropped our connection.
  162. *
  163. * We implement an exponential backoff when trying to reestablish a TCP
  164. * transport connection with the server. Some servers like to drop a TCP
  165. * connection when they are overworked, so we start with a short timeout and
  166. * increase over time if the server is down or not responding.
  167. */
  168. #define XS_TCP_INIT_REEST_TO (3U * HZ)
  169. /*
  170. * TCP idle timeout; client drops the transport socket if it is idle
  171. * for this long. Note that we also timeout UDP sockets to prevent
  172. * holding port numbers when there is no RPC traffic.
  173. */
  174. #define XS_IDLE_DISC_TO (5U * 60 * HZ)
  175. /*
  176. * TLS handshake timeout.
  177. */
  178. #define XS_TLS_HANDSHAKE_TO (10U * HZ)
  179. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  180. # undef RPC_DEBUG_DATA
  181. # define RPCDBG_FACILITY RPCDBG_TRANS
  182. #endif
  183. #ifdef RPC_DEBUG_DATA
  184. static void xs_pktdump(char *msg, u32 *packet, unsigned int count)
  185. {
  186. u8 *buf = (u8 *) packet;
  187. int j;
  188. dprintk("RPC: %s\n", msg);
  189. for (j = 0; j < count && j < 128; j += 4) {
  190. if (!(j & 31)) {
  191. if (j)
  192. dprintk("\n");
  193. dprintk("0x%04x ", j);
  194. }
  195. dprintk("%02x%02x%02x%02x ",
  196. buf[j], buf[j+1], buf[j+2], buf[j+3]);
  197. }
  198. dprintk("\n");
  199. }
  200. #else
  201. static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count)
  202. {
  203. /* NOP */
  204. }
  205. #endif
  206. static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
  207. {
  208. return (struct rpc_xprt *) sk->sk_user_data;
  209. }
  210. static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt)
  211. {
  212. return (struct sockaddr *) &xprt->addr;
  213. }
  214. static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt)
  215. {
  216. return (struct sockaddr_un *) &xprt->addr;
  217. }
  218. static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt)
  219. {
  220. return (struct sockaddr_in *) &xprt->addr;
  221. }
  222. static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt)
  223. {
  224. return (struct sockaddr_in6 *) &xprt->addr;
  225. }
  226. static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
  227. {
  228. struct sockaddr *sap = xs_addr(xprt);
  229. struct sockaddr_in6 *sin6;
  230. struct sockaddr_in *sin;
  231. struct sockaddr_un *sun;
  232. char buf[128];
  233. switch (sap->sa_family) {
  234. case AF_LOCAL:
  235. sun = xs_addr_un(xprt);
  236. if (sun->sun_path[0]) {
  237. strscpy(buf, sun->sun_path, sizeof(buf));
  238. } else {
  239. buf[0] = '@';
  240. strscpy(buf+1, sun->sun_path+1, sizeof(buf)-1);
  241. }
  242. xprt->address_strings[RPC_DISPLAY_ADDR] =
  243. kstrdup(buf, GFP_KERNEL);
  244. break;
  245. case AF_INET:
  246. (void)rpc_ntop(sap, buf, sizeof(buf));
  247. xprt->address_strings[RPC_DISPLAY_ADDR] =
  248. kstrdup(buf, GFP_KERNEL);
  249. sin = xs_addr_in(xprt);
  250. snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
  251. break;
  252. case AF_INET6:
  253. (void)rpc_ntop(sap, buf, sizeof(buf));
  254. xprt->address_strings[RPC_DISPLAY_ADDR] =
  255. kstrdup(buf, GFP_KERNEL);
  256. sin6 = xs_addr_in6(xprt);
  257. snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
  258. break;
  259. default:
  260. BUG();
  261. }
  262. xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
  263. }
  264. static void xs_format_common_peer_ports(struct rpc_xprt *xprt)
  265. {
  266. struct sockaddr *sap = xs_addr(xprt);
  267. char buf[128];
  268. snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
  269. xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
  270. snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
  271. xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
  272. }
  273. static void xs_format_peer_addresses(struct rpc_xprt *xprt,
  274. const char *protocol,
  275. const char *netid)
  276. {
  277. xprt->address_strings[RPC_DISPLAY_PROTO] = protocol;
  278. xprt->address_strings[RPC_DISPLAY_NETID] = netid;
  279. xs_format_common_peer_addresses(xprt);
  280. xs_format_common_peer_ports(xprt);
  281. }
  282. static void xs_update_peer_port(struct rpc_xprt *xprt)
  283. {
  284. kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
  285. kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
  286. xs_format_common_peer_ports(xprt);
  287. }
  288. static void xs_free_peer_addresses(struct rpc_xprt *xprt)
  289. {
  290. unsigned int i;
  291. for (i = 0; i < RPC_DISPLAY_MAX; i++)
  292. switch (i) {
  293. case RPC_DISPLAY_PROTO:
  294. case RPC_DISPLAY_NETID:
  295. continue;
  296. default:
  297. kfree(xprt->address_strings[i]);
  298. }
  299. }
  300. static size_t
  301. xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
  302. {
  303. size_t i,n;
  304. if (!want || !(buf->flags & XDRBUF_SPARSE_PAGES))
  305. return want;
  306. n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT;
  307. for (i = 0; i < n; i++) {
  308. if (buf->pages[i])
  309. continue;
  310. buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp);
  311. if (!buf->pages[i]) {
  312. i *= PAGE_SIZE;
  313. return i > buf->page_base ? i - buf->page_base : 0;
  314. }
  315. }
  316. return want;
  317. }
  318. static int
  319. xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
  320. unsigned int *msg_flags, struct cmsghdr *cmsg, int ret)
  321. {
  322. u8 content_type = tls_get_record_type(sock->sk, cmsg);
  323. u8 level, description;
  324. switch (content_type) {
  325. case 0:
  326. break;
  327. case TLS_RECORD_TYPE_DATA:
  328. /* TLS sets EOR at the end of each application data
  329. * record, even though there might be more frames
  330. * waiting to be decrypted.
  331. */
  332. *msg_flags &= ~MSG_EOR;
  333. break;
  334. case TLS_RECORD_TYPE_ALERT:
  335. tls_alert_recv(sock->sk, msg, &level, &description);
  336. ret = (level == TLS_ALERT_LEVEL_FATAL) ?
  337. -EACCES : -EAGAIN;
  338. break;
  339. default:
  340. /* discard this record type */
  341. ret = -EAGAIN;
  342. }
  343. return ret;
  344. }
  345. static int
  346. xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags)
  347. {
  348. union {
  349. struct cmsghdr cmsg;
  350. u8 buf[CMSG_SPACE(sizeof(u8))];
  351. } u;
  352. u8 alert[2];
  353. struct kvec alert_kvec = {
  354. .iov_base = alert,
  355. .iov_len = sizeof(alert),
  356. };
  357. struct msghdr msg = {
  358. .msg_flags = *msg_flags,
  359. .msg_control = &u,
  360. .msg_controllen = sizeof(u),
  361. };
  362. int ret;
  363. iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1,
  364. alert_kvec.iov_len);
  365. ret = sock_recvmsg(sock, &msg, flags);
  366. if (ret > 0) {
  367. if (tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT)
  368. iov_iter_revert(&msg.msg_iter, ret);
  369. ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg,
  370. -EAGAIN);
  371. }
  372. return ret;
  373. }
  374. static ssize_t
  375. xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek)
  376. {
  377. ssize_t ret;
  378. if (seek != 0)
  379. iov_iter_advance(&msg->msg_iter, seek);
  380. ret = sock_recvmsg(sock, msg, flags);
  381. /* Handle TLS inband control message lazily */
  382. if (msg->msg_flags & MSG_CTRUNC) {
  383. msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR);
  384. if (ret == 0 || ret == -EIO)
  385. ret = xs_sock_recv_cmsg(sock, &msg->msg_flags, flags);
  386. }
  387. return ret > 0 ? ret + seek : ret;
  388. }
  389. static ssize_t
  390. xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags,
  391. struct kvec *kvec, size_t count, size_t seek)
  392. {
  393. iov_iter_kvec(&msg->msg_iter, ITER_DEST, kvec, 1, count);
  394. return xs_sock_recvmsg(sock, msg, flags, seek);
  395. }
  396. static ssize_t
  397. xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags,
  398. struct bio_vec *bvec, unsigned long nr, size_t count,
  399. size_t seek)
  400. {
  401. iov_iter_bvec(&msg->msg_iter, ITER_DEST, bvec, nr, count);
  402. return xs_sock_recvmsg(sock, msg, flags, seek);
  403. }
  404. static ssize_t
  405. xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
  406. size_t count)
  407. {
  408. iov_iter_discard(&msg->msg_iter, ITER_DEST, count);
  409. return xs_sock_recvmsg(sock, msg, flags, 0);
  410. }
  411. #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
  412. static void
  413. xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek)
  414. {
  415. struct bvec_iter bi = {
  416. .bi_size = count,
  417. };
  418. struct bio_vec bv;
  419. bvec_iter_advance(bvec, &bi, seek & PAGE_MASK);
  420. for_each_bvec(bv, bvec, bi, bi)
  421. flush_dcache_page(bv.bv_page);
  422. }
  423. #else
  424. static inline void
  425. xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek)
  426. {
  427. }
  428. #endif
  429. static ssize_t
  430. xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
  431. struct xdr_buf *buf, size_t count, size_t seek, size_t *read)
  432. {
  433. size_t want, seek_init = seek, offset = 0;
  434. ssize_t ret;
  435. want = min_t(size_t, count, buf->head[0].iov_len);
  436. if (seek < want) {
  437. ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek);
  438. if (ret <= 0)
  439. goto sock_err;
  440. offset += ret;
  441. if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
  442. goto out;
  443. if (ret != want)
  444. goto out;
  445. seek = 0;
  446. } else {
  447. seek -= want;
  448. offset += want;
  449. }
  450. want = xs_alloc_sparse_pages(
  451. buf, min_t(size_t, count - offset, buf->page_len),
  452. GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
  453. if (seek < want) {
  454. ret = xs_read_bvec(sock, msg, flags, buf->bvec,
  455. xdr_buf_pagecount(buf),
  456. want + buf->page_base,
  457. seek + buf->page_base);
  458. if (ret <= 0)
  459. goto sock_err;
  460. xs_flush_bvec(buf->bvec, ret, seek + buf->page_base);
  461. ret -= buf->page_base;
  462. offset += ret;
  463. if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
  464. goto out;
  465. if (ret != want)
  466. goto out;
  467. seek = 0;
  468. } else {
  469. seek -= want;
  470. offset += want;
  471. }
  472. want = min_t(size_t, count - offset, buf->tail[0].iov_len);
  473. if (seek < want) {
  474. ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
  475. if (ret <= 0)
  476. goto sock_err;
  477. offset += ret;
  478. if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
  479. goto out;
  480. if (ret != want)
  481. goto out;
  482. } else if (offset < seek_init)
  483. offset = seek_init;
  484. ret = -EMSGSIZE;
  485. out:
  486. *read = offset - seek_init;
  487. return ret;
  488. sock_err:
  489. offset += seek;
  490. goto out;
  491. }
  492. static void
  493. xs_read_header(struct sock_xprt *transport, struct xdr_buf *buf)
  494. {
  495. if (!transport->recv.copied) {
  496. if (buf->head[0].iov_len >= transport->recv.offset)
  497. memcpy(buf->head[0].iov_base,
  498. &transport->recv.xid,
  499. transport->recv.offset);
  500. transport->recv.copied = transport->recv.offset;
  501. }
  502. }
  503. static bool
  504. xs_read_stream_request_done(struct sock_xprt *transport)
  505. {
  506. return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT);
  507. }
  508. static void
  509. xs_read_stream_check_eor(struct sock_xprt *transport,
  510. struct msghdr *msg)
  511. {
  512. if (xs_read_stream_request_done(transport))
  513. msg->msg_flags |= MSG_EOR;
  514. }
  515. static ssize_t
  516. xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
  517. int flags, struct rpc_rqst *req)
  518. {
  519. struct xdr_buf *buf = &req->rq_private_buf;
  520. size_t want, read;
  521. ssize_t ret;
  522. xs_read_header(transport, buf);
  523. want = transport->recv.len - transport->recv.offset;
  524. if (want != 0) {
  525. ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
  526. transport->recv.copied + want,
  527. transport->recv.copied,
  528. &read);
  529. transport->recv.offset += read;
  530. transport->recv.copied += read;
  531. }
  532. if (transport->recv.offset == transport->recv.len)
  533. xs_read_stream_check_eor(transport, msg);
  534. if (want == 0)
  535. return 0;
  536. switch (ret) {
  537. default:
  538. break;
  539. case -EFAULT:
  540. case -EMSGSIZE:
  541. msg->msg_flags |= MSG_TRUNC;
  542. return read;
  543. case 0:
  544. return -ESHUTDOWN;
  545. }
  546. return ret < 0 ? ret : read;
  547. }
  548. static size_t
  549. xs_read_stream_headersize(bool isfrag)
  550. {
  551. if (isfrag)
  552. return sizeof(__be32);
  553. return 3 * sizeof(__be32);
  554. }
  555. static ssize_t
  556. xs_read_stream_header(struct sock_xprt *transport, struct msghdr *msg,
  557. int flags, size_t want, size_t seek)
  558. {
  559. struct kvec kvec = {
  560. .iov_base = &transport->recv.fraghdr,
  561. .iov_len = want,
  562. };
  563. return xs_read_kvec(transport->sock, msg, flags, &kvec, want, seek);
  564. }
  565. #if defined(CONFIG_SUNRPC_BACKCHANNEL)
  566. static ssize_t
  567. xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
  568. {
  569. struct rpc_xprt *xprt = &transport->xprt;
  570. struct rpc_rqst *req;
  571. ssize_t ret;
  572. /* Is this transport associated with the backchannel? */
  573. if (!xprt->bc_serv)
  574. return -ESHUTDOWN;
  575. /* Look up and lock the request corresponding to the given XID */
  576. req = xprt_lookup_bc_request(xprt, transport->recv.xid);
  577. if (!req) {
  578. printk(KERN_WARNING "Callback slot table overflowed\n");
  579. return -ESHUTDOWN;
  580. }
  581. if (transport->recv.copied && !req->rq_private_buf.len)
  582. return -ESHUTDOWN;
  583. ret = xs_read_stream_request(transport, msg, flags, req);
  584. if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
  585. xprt_complete_bc_request(req, transport->recv.copied);
  586. else
  587. req->rq_private_buf.len = transport->recv.copied;
  588. return ret;
  589. }
  590. #else /* CONFIG_SUNRPC_BACKCHANNEL */
  591. static ssize_t
  592. xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
  593. {
  594. return -ESHUTDOWN;
  595. }
  596. #endif /* CONFIG_SUNRPC_BACKCHANNEL */
  597. static ssize_t
  598. xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
  599. {
  600. struct rpc_xprt *xprt = &transport->xprt;
  601. struct rpc_rqst *req;
  602. ssize_t ret = 0;
  603. /* Look up and lock the request corresponding to the given XID */
  604. spin_lock(&xprt->queue_lock);
  605. req = xprt_lookup_rqst(xprt, transport->recv.xid);
  606. if (!req || (transport->recv.copied && !req->rq_private_buf.len)) {
  607. msg->msg_flags |= MSG_TRUNC;
  608. goto out;
  609. }
  610. xprt_pin_rqst(req);
  611. spin_unlock(&xprt->queue_lock);
  612. ret = xs_read_stream_request(transport, msg, flags, req);
  613. spin_lock(&xprt->queue_lock);
  614. if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
  615. xprt_complete_rqst(req->rq_task, transport->recv.copied);
  616. else
  617. req->rq_private_buf.len = transport->recv.copied;
  618. xprt_unpin_rqst(req);
  619. out:
  620. spin_unlock(&xprt->queue_lock);
  621. return ret;
  622. }
  623. static ssize_t
  624. xs_read_stream(struct sock_xprt *transport, int flags)
  625. {
  626. struct msghdr msg = { 0 };
  627. size_t want, read = 0;
  628. ssize_t ret = 0;
  629. if (transport->recv.len == 0) {
  630. want = xs_read_stream_headersize(transport->recv.copied != 0);
  631. ret = xs_read_stream_header(transport, &msg, flags, want,
  632. transport->recv.offset);
  633. if (ret <= 0)
  634. goto out_err;
  635. transport->recv.offset = ret;
  636. if (transport->recv.offset != want)
  637. return transport->recv.offset;
  638. transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
  639. RPC_FRAGMENT_SIZE_MASK;
  640. transport->recv.offset -= sizeof(transport->recv.fraghdr);
  641. read = ret;
  642. }
  643. switch (be32_to_cpu(transport->recv.calldir)) {
  644. default:
  645. msg.msg_flags |= MSG_TRUNC;
  646. break;
  647. case RPC_CALL:
  648. ret = xs_read_stream_call(transport, &msg, flags);
  649. break;
  650. case RPC_REPLY:
  651. ret = xs_read_stream_reply(transport, &msg, flags);
  652. }
  653. if (msg.msg_flags & MSG_TRUNC) {
  654. transport->recv.calldir = cpu_to_be32(-1);
  655. transport->recv.copied = -1;
  656. }
  657. if (ret < 0)
  658. goto out_err;
  659. read += ret;
  660. if (transport->recv.offset < transport->recv.len) {
  661. if (!(msg.msg_flags & MSG_TRUNC))
  662. return read;
  663. msg.msg_flags = 0;
  664. ret = xs_read_discard(transport->sock, &msg, flags,
  665. transport->recv.len - transport->recv.offset);
  666. if (ret <= 0)
  667. goto out_err;
  668. transport->recv.offset += ret;
  669. read += ret;
  670. if (transport->recv.offset != transport->recv.len)
  671. return read;
  672. }
  673. if (xs_read_stream_request_done(transport)) {
  674. trace_xs_stream_read_request(transport);
  675. transport->recv.copied = 0;
  676. }
  677. transport->recv.offset = 0;
  678. transport->recv.len = 0;
  679. return read;
  680. out_err:
  681. return ret != 0 ? ret : -ESHUTDOWN;
  682. }
  683. static __poll_t xs_poll_socket(struct sock_xprt *transport)
  684. {
  685. return transport->sock->ops->poll(transport->file, transport->sock,
  686. NULL);
  687. }
  688. static bool xs_poll_socket_readable(struct sock_xprt *transport)
  689. {
  690. __poll_t events = xs_poll_socket(transport);
  691. return (events & (EPOLLIN | EPOLLRDNORM)) && !(events & EPOLLRDHUP);
  692. }
  693. static void xs_poll_check_readable(struct sock_xprt *transport)
  694. {
  695. clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
  696. if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state))
  697. return;
  698. if (!xs_poll_socket_readable(transport))
  699. return;
  700. if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
  701. queue_work(xprtiod_workqueue, &transport->recv_worker);
  702. }
  703. static void xs_stream_data_receive(struct sock_xprt *transport)
  704. {
  705. size_t read = 0;
  706. ssize_t ret = 0;
  707. mutex_lock(&transport->recv_mutex);
  708. if (transport->sock == NULL)
  709. goto out;
  710. for (;;) {
  711. ret = xs_read_stream(transport, MSG_DONTWAIT);
  712. if (ret < 0)
  713. break;
  714. read += ret;
  715. cond_resched();
  716. }
  717. if (ret == -ESHUTDOWN)
  718. kernel_sock_shutdown(transport->sock, SHUT_RDWR);
  719. else if (ret == -EACCES)
  720. xprt_wake_pending_tasks(&transport->xprt, -EACCES);
  721. else
  722. xs_poll_check_readable(transport);
  723. out:
  724. mutex_unlock(&transport->recv_mutex);
  725. trace_xs_stream_read_data(&transport->xprt, ret, read);
  726. }
  727. static void xs_stream_data_receive_workfn(struct work_struct *work)
  728. {
  729. struct sock_xprt *transport =
  730. container_of(work, struct sock_xprt, recv_worker);
  731. unsigned int pflags = memalloc_nofs_save();
  732. xs_stream_data_receive(transport);
  733. memalloc_nofs_restore(pflags);
  734. }
  735. static void
  736. xs_stream_reset_connect(struct sock_xprt *transport)
  737. {
  738. transport->recv.offset = 0;
  739. transport->recv.len = 0;
  740. transport->recv.copied = 0;
  741. transport->xmit.offset = 0;
  742. }
  743. static void
  744. xs_stream_start_connect(struct sock_xprt *transport)
  745. {
  746. transport->xprt.stat.connect_count++;
  747. transport->xprt.stat.connect_start = jiffies;
  748. }
  749. #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
  750. /**
  751. * xs_nospace - handle transmit was incomplete
  752. * @req: pointer to RPC request
  753. * @transport: pointer to struct sock_xprt
  754. *
  755. */
  756. static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport)
  757. {
  758. struct rpc_xprt *xprt = &transport->xprt;
  759. struct sock *sk = transport->inet;
  760. int ret = -EAGAIN;
  761. trace_rpc_socket_nospace(req, transport);
  762. /* Protect against races with write_space */
  763. spin_lock(&xprt->transport_lock);
  764. /* Don't race with disconnect */
  765. if (xprt_connected(xprt)) {
  766. /* wait for more buffer space */
  767. set_bit(XPRT_SOCK_NOSPACE, &transport->sock_state);
  768. set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  769. sk->sk_write_pending++;
  770. xprt_wait_for_buffer_space(xprt);
  771. } else
  772. ret = -ENOTCONN;
  773. spin_unlock(&xprt->transport_lock);
  774. return ret;
  775. }
  776. static int xs_sock_nospace(struct rpc_rqst *req)
  777. {
  778. struct sock_xprt *transport =
  779. container_of(req->rq_xprt, struct sock_xprt, xprt);
  780. struct sock *sk = transport->inet;
  781. int ret = -EAGAIN;
  782. lock_sock(sk);
  783. if (!sock_writeable(sk))
  784. ret = xs_nospace(req, transport);
  785. release_sock(sk);
  786. return ret;
  787. }
  788. static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait)
  789. {
  790. struct sock_xprt *transport =
  791. container_of(req->rq_xprt, struct sock_xprt, xprt);
  792. struct sock *sk = transport->inet;
  793. int ret = -EAGAIN;
  794. if (vm_wait)
  795. return -ENOBUFS;
  796. lock_sock(sk);
  797. if (!sk_stream_memory_free(sk))
  798. ret = xs_nospace(req, transport);
  799. release_sock(sk);
  800. return ret;
  801. }
  802. static int xs_stream_prepare_request(struct rpc_rqst *req, struct xdr_buf *buf)
  803. {
  804. return xdr_alloc_bvec(buf, rpc_task_gfp_mask());
  805. }
  806. static void xs_stream_abort_send_request(struct rpc_rqst *req)
  807. {
  808. struct rpc_xprt *xprt = req->rq_xprt;
  809. struct sock_xprt *transport =
  810. container_of(xprt, struct sock_xprt, xprt);
  811. if (transport->xmit.offset != 0 &&
  812. !test_bit(XPRT_CLOSE_WAIT, &xprt->state))
  813. xprt_force_disconnect(xprt);
  814. }
  815. /*
  816. * Determine if the previous message in the stream was aborted before it
  817. * could complete transmission.
  818. */
  819. static bool
  820. xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req)
  821. {
  822. return transport->xmit.offset != 0 && req->rq_bytes_sent == 0;
  823. }
  824. /*
  825. * Return the stream record marker field for a record of length < 2^31-1
  826. */
  827. static rpc_fraghdr
  828. xs_stream_record_marker(struct xdr_buf *xdr)
  829. {
  830. if (!xdr->len)
  831. return 0;
  832. return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len);
  833. }
  834. /**
  835. * xs_local_send_request - write an RPC request to an AF_LOCAL socket
  836. * @req: pointer to RPC request
  837. *
  838. * Return values:
  839. * 0: The request has been sent
  840. * EAGAIN: The socket was blocked, please call again later to
  841. * complete the request
  842. * ENOTCONN: Caller needs to invoke connect logic then call again
  843. * other: Some other error occurred, the request was not sent
  844. */
  845. static int xs_local_send_request(struct rpc_rqst *req)
  846. {
  847. struct rpc_xprt *xprt = req->rq_xprt;
  848. struct sock_xprt *transport =
  849. container_of(xprt, struct sock_xprt, xprt);
  850. struct xdr_buf *xdr = &req->rq_snd_buf;
  851. rpc_fraghdr rm = xs_stream_record_marker(xdr);
  852. unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
  853. struct msghdr msg = {
  854. .msg_flags = XS_SENDMSG_FLAGS,
  855. };
  856. bool vm_wait;
  857. unsigned int sent;
  858. int status;
  859. /* Close the stream if the previous transmission was incomplete */
  860. if (xs_send_request_was_aborted(transport, req)) {
  861. xprt_force_disconnect(xprt);
  862. return -ENOTCONN;
  863. }
  864. xs_pktdump("packet data:",
  865. req->rq_svec->iov_base, req->rq_svec->iov_len);
  866. vm_wait = sk_stream_is_writeable(transport->inet) ? true : false;
  867. req->rq_xtime = ktime_get();
  868. status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
  869. transport->xmit.offset, rm, &sent);
  870. dprintk("RPC: %s(%u) = %d\n",
  871. __func__, xdr->len - transport->xmit.offset, status);
  872. if (likely(sent > 0) || status == 0) {
  873. transport->xmit.offset += sent;
  874. req->rq_bytes_sent = transport->xmit.offset;
  875. if (likely(req->rq_bytes_sent >= msglen)) {
  876. req->rq_xmit_bytes_sent += transport->xmit.offset;
  877. transport->xmit.offset = 0;
  878. return 0;
  879. }
  880. status = -EAGAIN;
  881. vm_wait = false;
  882. }
  883. switch (status) {
  884. case -EAGAIN:
  885. status = xs_stream_nospace(req, vm_wait);
  886. break;
  887. default:
  888. dprintk("RPC: sendmsg returned unrecognized error %d\n",
  889. -status);
  890. fallthrough;
  891. case -EPIPE:
  892. xprt_force_disconnect(xprt);
  893. status = -ENOTCONN;
  894. }
  895. return status;
  896. }
  897. /**
  898. * xs_udp_send_request - write an RPC request to a UDP socket
  899. * @req: pointer to RPC request
  900. *
  901. * Return values:
  902. * 0: The request has been sent
  903. * EAGAIN: The socket was blocked, please call again later to
  904. * complete the request
  905. * ENOTCONN: Caller needs to invoke connect logic then call again
  906. * other: Some other error occurred, the request was not sent
  907. */
  908. static int xs_udp_send_request(struct rpc_rqst *req)
  909. {
  910. struct rpc_xprt *xprt = req->rq_xprt;
  911. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  912. struct xdr_buf *xdr = &req->rq_snd_buf;
  913. struct msghdr msg = {
  914. .msg_name = xs_addr(xprt),
  915. .msg_namelen = xprt->addrlen,
  916. .msg_flags = XS_SENDMSG_FLAGS,
  917. };
  918. unsigned int sent;
  919. int status;
  920. xs_pktdump("packet data:",
  921. req->rq_svec->iov_base,
  922. req->rq_svec->iov_len);
  923. if (!xprt_bound(xprt))
  924. return -ENOTCONN;
  925. if (!xprt_request_get_cong(xprt, req))
  926. return -EBADSLT;
  927. status = xdr_alloc_bvec(xdr, rpc_task_gfp_mask());
  928. if (status < 0)
  929. return status;
  930. req->rq_xtime = ktime_get();
  931. status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, 0, &sent);
  932. dprintk("RPC: xs_udp_send_request(%u) = %d\n",
  933. xdr->len, status);
  934. /* firewall is blocking us, don't return -EAGAIN or we end up looping */
  935. if (status == -EPERM)
  936. goto process_status;
  937. if (status == -EAGAIN && sock_writeable(transport->inet))
  938. status = -ENOBUFS;
  939. if (sent > 0 || status == 0) {
  940. req->rq_xmit_bytes_sent += sent;
  941. if (sent >= req->rq_slen)
  942. return 0;
  943. /* Still some bytes left; set up for a retry later. */
  944. status = -EAGAIN;
  945. }
  946. process_status:
  947. switch (status) {
  948. case -ENOTSOCK:
  949. status = -ENOTCONN;
  950. /* Should we call xs_close() here? */
  951. break;
  952. case -EAGAIN:
  953. status = xs_sock_nospace(req);
  954. break;
  955. case -ENETUNREACH:
  956. case -ENOBUFS:
  957. case -EPIPE:
  958. case -ECONNREFUSED:
  959. case -EPERM:
  960. /* When the server has died, an ICMP port unreachable message
  961. * prompts ECONNREFUSED. */
  962. break;
  963. default:
  964. dprintk("RPC: sendmsg returned unrecognized error %d\n",
  965. -status);
  966. }
  967. return status;
  968. }
  969. /**
  970. * xs_tcp_send_request - write an RPC request to a TCP socket
  971. * @req: pointer to RPC request
  972. *
  973. * Return values:
  974. * 0: The request has been sent
  975. * EAGAIN: The socket was blocked, please call again later to
  976. * complete the request
  977. * ENOTCONN: Caller needs to invoke connect logic then call again
  978. * other: Some other error occurred, the request was not sent
  979. *
  980. * XXX: In the case of soft timeouts, should we eventually give up
  981. * if sendmsg is not able to make progress?
  982. */
  983. static int xs_tcp_send_request(struct rpc_rqst *req)
  984. {
  985. struct rpc_xprt *xprt = req->rq_xprt;
  986. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  987. struct xdr_buf *xdr = &req->rq_snd_buf;
  988. rpc_fraghdr rm = xs_stream_record_marker(xdr);
  989. unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
  990. struct msghdr msg = {
  991. .msg_flags = XS_SENDMSG_FLAGS,
  992. };
  993. bool vm_wait;
  994. unsigned int sent;
  995. int status;
  996. /* Close the stream if the previous transmission was incomplete */
  997. if (xs_send_request_was_aborted(transport, req)) {
  998. if (transport->sock != NULL)
  999. kernel_sock_shutdown(transport->sock, SHUT_RDWR);
  1000. return -ENOTCONN;
  1001. }
  1002. if (!transport->inet)
  1003. return -ENOTCONN;
  1004. xs_pktdump("packet data:",
  1005. req->rq_svec->iov_base,
  1006. req->rq_svec->iov_len);
  1007. if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
  1008. xs_tcp_set_socket_timeouts(xprt, transport->sock);
  1009. xs_set_srcport(transport, transport->sock);
  1010. /* Continue transmitting the packet/record. We must be careful
  1011. * to cope with writespace callbacks arriving _after_ we have
  1012. * called sendmsg(). */
  1013. req->rq_xtime = ktime_get();
  1014. tcp_sock_set_cork(transport->inet, true);
  1015. vm_wait = sk_stream_is_writeable(transport->inet) ? true : false;
  1016. do {
  1017. status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
  1018. transport->xmit.offset, rm, &sent);
  1019. dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
  1020. xdr->len - transport->xmit.offset, status);
  1021. /* If we've sent the entire packet, immediately
  1022. * reset the count of bytes sent. */
  1023. transport->xmit.offset += sent;
  1024. req->rq_bytes_sent = transport->xmit.offset;
  1025. if (likely(req->rq_bytes_sent >= msglen)) {
  1026. req->rq_xmit_bytes_sent += transport->xmit.offset;
  1027. transport->xmit.offset = 0;
  1028. if (atomic_long_read(&xprt->xmit_queuelen) == 1)
  1029. tcp_sock_set_cork(transport->inet, false);
  1030. return 0;
  1031. }
  1032. WARN_ON_ONCE(sent == 0 && status == 0);
  1033. if (sent > 0)
  1034. vm_wait = false;
  1035. } while (status == 0);
  1036. switch (status) {
  1037. case -ENOTSOCK:
  1038. status = -ENOTCONN;
  1039. /* Should we call xs_close() here? */
  1040. break;
  1041. case -EAGAIN:
  1042. status = xs_stream_nospace(req, vm_wait);
  1043. break;
  1044. case -ECONNRESET:
  1045. case -ECONNREFUSED:
  1046. case -ENOTCONN:
  1047. case -EADDRINUSE:
  1048. case -ENOBUFS:
  1049. case -EPIPE:
  1050. break;
  1051. default:
  1052. dprintk("RPC: sendmsg returned unrecognized error %d\n",
  1053. -status);
  1054. }
  1055. return status;
  1056. }
  1057. static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk)
  1058. {
  1059. transport->old_data_ready = sk->sk_data_ready;
  1060. transport->old_state_change = sk->sk_state_change;
  1061. transport->old_write_space = sk->sk_write_space;
  1062. transport->old_error_report = sk->sk_error_report;
  1063. }
  1064. static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk)
  1065. {
  1066. sk->sk_data_ready = transport->old_data_ready;
  1067. sk->sk_state_change = transport->old_state_change;
  1068. sk->sk_write_space = transport->old_write_space;
  1069. sk->sk_error_report = transport->old_error_report;
  1070. }
  1071. static void xs_sock_reset_state_flags(struct rpc_xprt *xprt)
  1072. {
  1073. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  1074. transport->xprt_err = 0;
  1075. clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
  1076. clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state);
  1077. clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state);
  1078. clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state);
  1079. clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state);
  1080. clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
  1081. }
  1082. static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr)
  1083. {
  1084. set_bit(nr, &transport->sock_state);
  1085. queue_work(xprtiod_workqueue, &transport->error_worker);
  1086. }
  1087. static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
  1088. {
  1089. xprt->connect_cookie++;
  1090. smp_mb__before_atomic();
  1091. clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
  1092. clear_bit(XPRT_CLOSING, &xprt->state);
  1093. xs_sock_reset_state_flags(xprt);
  1094. smp_mb__after_atomic();
  1095. }
  1096. /**
  1097. * xs_error_report - callback to handle TCP socket state errors
  1098. * @sk: socket
  1099. *
  1100. * Note: we don't call sock_error() since there may be a rpc_task
  1101. * using the socket, and so we don't want to clear sk->sk_err.
  1102. */
  1103. static void xs_error_report(struct sock *sk)
  1104. {
  1105. struct sock_xprt *transport;
  1106. struct rpc_xprt *xprt;
  1107. if (!(xprt = xprt_from_sock(sk)))
  1108. return;
  1109. transport = container_of(xprt, struct sock_xprt, xprt);
  1110. transport->xprt_err = -sk->sk_err;
  1111. if (transport->xprt_err == 0)
  1112. return;
  1113. dprintk("RPC: xs_error_report client %p, error=%d...\n",
  1114. xprt, -transport->xprt_err);
  1115. trace_rpc_socket_error(xprt, sk->sk_socket, transport->xprt_err);
  1116. /* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */
  1117. smp_mb__before_atomic();
  1118. xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR);
  1119. }
  1120. static void xs_reset_transport(struct sock_xprt *transport)
  1121. {
  1122. struct socket *sock = transport->sock;
  1123. struct sock *sk = transport->inet;
  1124. struct rpc_xprt *xprt = &transport->xprt;
  1125. struct file *filp = transport->file;
  1126. if (sk == NULL)
  1127. return;
  1128. /*
  1129. * Make sure we're calling this in a context from which it is safe
  1130. * to call __fput_sync(). In practice that means rpciod and the
  1131. * system workqueue.
  1132. */
  1133. if (!(current->flags & PF_WQ_WORKER)) {
  1134. WARN_ON_ONCE(1);
  1135. set_bit(XPRT_CLOSE_WAIT, &xprt->state);
  1136. return;
  1137. }
  1138. if (atomic_read(&transport->xprt.swapper))
  1139. sk_clear_memalloc(sk);
  1140. tls_handshake_cancel(sk);
  1141. kernel_sock_shutdown(sock, SHUT_RDWR);
  1142. mutex_lock(&transport->recv_mutex);
  1143. lock_sock(sk);
  1144. transport->inet = NULL;
  1145. transport->sock = NULL;
  1146. transport->file = NULL;
  1147. sk->sk_user_data = NULL;
  1148. sk->sk_sndtimeo = 0;
  1149. xs_restore_old_callbacks(transport, sk);
  1150. xprt_clear_connected(xprt);
  1151. xs_sock_reset_connection_flags(xprt);
  1152. /* Reset stream record info */
  1153. xs_stream_reset_connect(transport);
  1154. release_sock(sk);
  1155. mutex_unlock(&transport->recv_mutex);
  1156. trace_rpc_socket_close(xprt, sock);
  1157. __fput_sync(filp);
  1158. xprt_disconnect_done(xprt);
  1159. }
  1160. /**
  1161. * xs_close - close a socket
  1162. * @xprt: transport
  1163. *
  1164. * This is used when all requests are complete; ie, no DRC state remains
  1165. * on the server we want to save.
  1166. *
  1167. * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with
  1168. * xs_reset_transport() zeroing the socket from underneath a writer.
  1169. */
  1170. static void xs_close(struct rpc_xprt *xprt)
  1171. {
  1172. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  1173. dprintk("RPC: xs_close xprt %p\n", xprt);
  1174. if (transport->sock)
  1175. tls_handshake_close(transport->sock);
  1176. xs_reset_transport(transport);
  1177. xprt->reestablish_timeout = 0;
  1178. }
  1179. static void xs_inject_disconnect(struct rpc_xprt *xprt)
  1180. {
  1181. dprintk("RPC: injecting transport disconnect on xprt=%p\n",
  1182. xprt);
  1183. xprt_disconnect_done(xprt);
  1184. }
  1185. static void xs_xprt_free(struct rpc_xprt *xprt)
  1186. {
  1187. xs_free_peer_addresses(xprt);
  1188. xprt_free(xprt);
  1189. }
  1190. /**
  1191. * xs_destroy - prepare to shutdown a transport
  1192. * @xprt: doomed transport
  1193. *
  1194. */
  1195. static void xs_destroy(struct rpc_xprt *xprt)
  1196. {
  1197. struct sock_xprt *transport = container_of(xprt,
  1198. struct sock_xprt, xprt);
  1199. dprintk("RPC: xs_destroy xprt %p\n", xprt);
  1200. cancel_delayed_work_sync(&transport->connect_worker);
  1201. xs_close(xprt);
  1202. cancel_work_sync(&transport->recv_worker);
  1203. cancel_work_sync(&transport->error_worker);
  1204. xs_xprt_free(xprt);
  1205. module_put(THIS_MODULE);
  1206. }
  1207. /**
  1208. * xs_udp_data_read_skb - receive callback for UDP sockets
  1209. * @xprt: transport
  1210. * @sk: socket
  1211. * @skb: skbuff
  1212. *
  1213. */
  1214. static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
  1215. struct sock *sk,
  1216. struct sk_buff *skb)
  1217. {
  1218. struct rpc_task *task;
  1219. struct rpc_rqst *rovr;
  1220. int repsize, copied;
  1221. u32 _xid;
  1222. __be32 *xp;
  1223. repsize = skb->len;
  1224. if (repsize < 4) {
  1225. dprintk("RPC: impossible RPC reply size %d!\n", repsize);
  1226. return;
  1227. }
  1228. /* Copy the XID from the skb... */
  1229. xp = skb_header_pointer(skb, 0, sizeof(_xid), &_xid);
  1230. if (xp == NULL)
  1231. return;
  1232. /* Look up and lock the request corresponding to the given XID */
  1233. spin_lock(&xprt->queue_lock);
  1234. rovr = xprt_lookup_rqst(xprt, *xp);
  1235. if (!rovr)
  1236. goto out_unlock;
  1237. xprt_pin_rqst(rovr);
  1238. xprt_update_rtt(rovr->rq_task);
  1239. spin_unlock(&xprt->queue_lock);
  1240. task = rovr->rq_task;
  1241. if ((copied = rovr->rq_private_buf.buflen) > repsize)
  1242. copied = repsize;
  1243. /* Suck it into the iovec, verify checksum if not done by hw. */
  1244. if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
  1245. spin_lock(&xprt->queue_lock);
  1246. __UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
  1247. goto out_unpin;
  1248. }
  1249. spin_lock(&xprt->transport_lock);
  1250. xprt_adjust_cwnd(xprt, task, copied);
  1251. spin_unlock(&xprt->transport_lock);
  1252. spin_lock(&xprt->queue_lock);
  1253. xprt_complete_rqst(task, copied);
  1254. __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
  1255. out_unpin:
  1256. xprt_unpin_rqst(rovr);
  1257. out_unlock:
  1258. spin_unlock(&xprt->queue_lock);
  1259. }
  1260. static void xs_udp_data_receive(struct sock_xprt *transport)
  1261. {
  1262. struct sk_buff *skb;
  1263. struct sock *sk;
  1264. int err;
  1265. mutex_lock(&transport->recv_mutex);
  1266. sk = transport->inet;
  1267. if (sk == NULL)
  1268. goto out;
  1269. for (;;) {
  1270. skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
  1271. if (skb == NULL)
  1272. break;
  1273. xs_udp_data_read_skb(&transport->xprt, sk, skb);
  1274. consume_skb(skb);
  1275. cond_resched();
  1276. }
  1277. xs_poll_check_readable(transport);
  1278. out:
  1279. mutex_unlock(&transport->recv_mutex);
  1280. }
  1281. static void xs_udp_data_receive_workfn(struct work_struct *work)
  1282. {
  1283. struct sock_xprt *transport =
  1284. container_of(work, struct sock_xprt, recv_worker);
  1285. unsigned int pflags = memalloc_nofs_save();
  1286. xs_udp_data_receive(transport);
  1287. memalloc_nofs_restore(pflags);
  1288. }
  1289. /**
  1290. * xs_data_ready - "data ready" callback for sockets
  1291. * @sk: socket with data to read
  1292. *
  1293. */
  1294. static void xs_data_ready(struct sock *sk)
  1295. {
  1296. struct rpc_xprt *xprt;
  1297. trace_sk_data_ready(sk);
  1298. xprt = xprt_from_sock(sk);
  1299. if (xprt != NULL) {
  1300. struct sock_xprt *transport = container_of(xprt,
  1301. struct sock_xprt, xprt);
  1302. trace_xs_data_ready(xprt);
  1303. transport->old_data_ready(sk);
  1304. if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state))
  1305. return;
  1306. /* Any data means we had a useful conversation, so
  1307. * then we don't need to delay the next reconnect
  1308. */
  1309. if (xprt->reestablish_timeout)
  1310. xprt->reestablish_timeout = 0;
  1311. if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
  1312. queue_work(xprtiod_workqueue, &transport->recv_worker);
  1313. }
  1314. }
  1315. /*
  1316. * Helper function to force a TCP close if the server is sending
  1317. * junk and/or it has put us in CLOSE_WAIT
  1318. */
  1319. static void xs_tcp_force_close(struct rpc_xprt *xprt)
  1320. {
  1321. xprt_force_disconnect(xprt);
  1322. }
  1323. #if defined(CONFIG_SUNRPC_BACKCHANNEL)
  1324. static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
  1325. {
  1326. return PAGE_SIZE;
  1327. }
  1328. #endif /* CONFIG_SUNRPC_BACKCHANNEL */
  1329. /**
  1330. * xs_local_state_change - callback to handle AF_LOCAL socket state changes
  1331. * @sk: socket whose state has changed
  1332. *
  1333. */
  1334. static void xs_local_state_change(struct sock *sk)
  1335. {
  1336. struct rpc_xprt *xprt;
  1337. struct sock_xprt *transport;
  1338. if (!(xprt = xprt_from_sock(sk)))
  1339. return;
  1340. transport = container_of(xprt, struct sock_xprt, xprt);
  1341. if (sk->sk_shutdown & SHUTDOWN_MASK) {
  1342. clear_bit(XPRT_CONNECTED, &xprt->state);
  1343. /* Trigger the socket release */
  1344. xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
  1345. }
  1346. }
  1347. /**
  1348. * xs_tcp_state_change - callback to handle TCP socket state changes
  1349. * @sk: socket whose state has changed
  1350. *
  1351. */
  1352. static void xs_tcp_state_change(struct sock *sk)
  1353. {
  1354. struct rpc_xprt *xprt;
  1355. struct sock_xprt *transport;
  1356. if (!(xprt = xprt_from_sock(sk)))
  1357. return;
  1358. dprintk("RPC: xs_tcp_state_change client %p...\n", xprt);
  1359. dprintk("RPC: state %x conn %d dead %d zapped %d sk_shutdown %d\n",
  1360. sk->sk_state, xprt_connected(xprt),
  1361. sock_flag(sk, SOCK_DEAD),
  1362. sock_flag(sk, SOCK_ZAPPED),
  1363. sk->sk_shutdown);
  1364. transport = container_of(xprt, struct sock_xprt, xprt);
  1365. trace_rpc_socket_state_change(xprt, sk->sk_socket);
  1366. switch (sk->sk_state) {
  1367. case TCP_ESTABLISHED:
  1368. if (!xprt_test_and_set_connected(xprt)) {
  1369. xprt->connect_cookie++;
  1370. clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
  1371. xprt_clear_connecting(xprt);
  1372. xprt->stat.connect_count++;
  1373. xprt->stat.connect_time += (long)jiffies -
  1374. xprt->stat.connect_start;
  1375. xs_run_error_worker(transport, XPRT_SOCK_WAKE_PENDING);
  1376. }
  1377. break;
  1378. case TCP_FIN_WAIT1:
  1379. /* The client initiated a shutdown of the socket */
  1380. xprt->connect_cookie++;
  1381. xprt->reestablish_timeout = 0;
  1382. set_bit(XPRT_CLOSING, &xprt->state);
  1383. smp_mb__before_atomic();
  1384. clear_bit(XPRT_CONNECTED, &xprt->state);
  1385. clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
  1386. smp_mb__after_atomic();
  1387. break;
  1388. case TCP_CLOSE_WAIT:
  1389. /* The server initiated a shutdown of the socket */
  1390. xprt->connect_cookie++;
  1391. clear_bit(XPRT_CONNECTED, &xprt->state);
  1392. xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
  1393. fallthrough;
  1394. case TCP_CLOSING:
  1395. /*
  1396. * If the server closed down the connection, make sure that
  1397. * we back off before reconnecting
  1398. */
  1399. if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
  1400. xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
  1401. break;
  1402. case TCP_LAST_ACK:
  1403. set_bit(XPRT_CLOSING, &xprt->state);
  1404. smp_mb__before_atomic();
  1405. clear_bit(XPRT_CONNECTED, &xprt->state);
  1406. smp_mb__after_atomic();
  1407. break;
  1408. case TCP_CLOSE:
  1409. if (test_and_clear_bit(XPRT_SOCK_CONNECTING,
  1410. &transport->sock_state)) {
  1411. xs_reset_srcport(transport);
  1412. xprt_clear_connecting(xprt);
  1413. }
  1414. clear_bit(XPRT_CLOSING, &xprt->state);
  1415. /* Trigger the socket release */
  1416. xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
  1417. }
  1418. }
  1419. static void xs_write_space(struct sock *sk)
  1420. {
  1421. struct sock_xprt *transport;
  1422. struct rpc_xprt *xprt;
  1423. if (!sk->sk_socket)
  1424. return;
  1425. clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  1426. if (unlikely(!(xprt = xprt_from_sock(sk))))
  1427. return;
  1428. transport = container_of(xprt, struct sock_xprt, xprt);
  1429. if (!test_and_clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state))
  1430. return;
  1431. xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE);
  1432. sk->sk_write_pending--;
  1433. }
  1434. /**
  1435. * xs_udp_write_space - callback invoked when socket buffer space
  1436. * becomes available
  1437. * @sk: socket whose state has changed
  1438. *
  1439. * Called when more output buffer space is available for this socket.
  1440. * We try not to wake our writers until they can make "significant"
  1441. * progress, otherwise we'll waste resources thrashing kernel_sendmsg
  1442. * with a bunch of small requests.
  1443. */
  1444. static void xs_udp_write_space(struct sock *sk)
  1445. {
  1446. /* from net/core/sock.c:sock_def_write_space */
  1447. if (sock_writeable(sk))
  1448. xs_write_space(sk);
  1449. }
  1450. /**
  1451. * xs_tcp_write_space - callback invoked when socket buffer space
  1452. * becomes available
  1453. * @sk: socket whose state has changed
  1454. *
  1455. * Called when more output buffer space is available for this socket.
  1456. * We try not to wake our writers until they can make "significant"
  1457. * progress, otherwise we'll waste resources thrashing kernel_sendmsg
  1458. * with a bunch of small requests.
  1459. */
  1460. static void xs_tcp_write_space(struct sock *sk)
  1461. {
  1462. /* from net/core/stream.c:sk_stream_write_space */
  1463. if (sk_stream_is_writeable(sk))
  1464. xs_write_space(sk);
  1465. }
  1466. static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)
  1467. {
  1468. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  1469. struct sock *sk = transport->inet;
  1470. if (transport->rcvsize) {
  1471. sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
  1472. sk->sk_rcvbuf = transport->rcvsize * xprt->max_reqs * 2;
  1473. }
  1474. if (transport->sndsize) {
  1475. sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
  1476. sk->sk_sndbuf = transport->sndsize * xprt->max_reqs * 2;
  1477. sk->sk_write_space(sk);
  1478. }
  1479. }
  1480. /**
  1481. * xs_udp_set_buffer_size - set send and receive limits
  1482. * @xprt: generic transport
  1483. * @sndsize: requested size of send buffer, in bytes
  1484. * @rcvsize: requested size of receive buffer, in bytes
  1485. *
  1486. * Set socket send and receive buffer size limits.
  1487. */
  1488. static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize)
  1489. {
  1490. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  1491. transport->sndsize = 0;
  1492. if (sndsize)
  1493. transport->sndsize = sndsize + 1024;
  1494. transport->rcvsize = 0;
  1495. if (rcvsize)
  1496. transport->rcvsize = rcvsize + 1024;
  1497. xs_udp_do_set_buffer_size(xprt);
  1498. }
  1499. /**
  1500. * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport
  1501. * @xprt: controlling transport
  1502. * @task: task that timed out
  1503. *
  1504. * Adjust the congestion window after a retransmit timeout has occurred.
  1505. */
  1506. static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task)
  1507. {
  1508. spin_lock(&xprt->transport_lock);
  1509. xprt_adjust_cwnd(xprt, task, -ETIMEDOUT);
  1510. spin_unlock(&xprt->transport_lock);
  1511. }
  1512. static int xs_get_random_port(void)
  1513. {
  1514. unsigned short min = xprt_min_resvport, max = xprt_max_resvport;
  1515. unsigned short range;
  1516. unsigned short rand;
  1517. if (max < min)
  1518. return -EADDRINUSE;
  1519. range = max - min + 1;
  1520. rand = get_random_u32_below(range);
  1521. return rand + min;
  1522. }
  1523. static unsigned short xs_sock_getport(struct socket *sock)
  1524. {
  1525. struct sockaddr_storage buf;
  1526. unsigned short port = 0;
  1527. if (kernel_getsockname(sock, (struct sockaddr *)&buf) < 0)
  1528. goto out;
  1529. switch (buf.ss_family) {
  1530. case AF_INET6:
  1531. port = ntohs(((struct sockaddr_in6 *)&buf)->sin6_port);
  1532. break;
  1533. case AF_INET:
  1534. port = ntohs(((struct sockaddr_in *)&buf)->sin_port);
  1535. }
  1536. out:
  1537. return port;
  1538. }
  1539. /**
  1540. * xs_set_port - reset the port number in the remote endpoint address
  1541. * @xprt: generic transport
  1542. * @port: new port number
  1543. *
  1544. */
  1545. static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
  1546. {
  1547. dprintk("RPC: setting port for xprt %p to %u\n", xprt, port);
  1548. rpc_set_port(xs_addr(xprt), port);
  1549. xs_update_peer_port(xprt);
  1550. }
  1551. static void xs_reset_srcport(struct sock_xprt *transport)
  1552. {
  1553. transport->srcport = 0;
  1554. }
  1555. static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock)
  1556. {
  1557. if (transport->srcport == 0 && transport->xprt.reuseport)
  1558. transport->srcport = xs_sock_getport(sock);
  1559. }
  1560. static int xs_get_srcport(struct sock_xprt *transport)
  1561. {
  1562. int port = transport->srcport;
  1563. if (port == 0 && transport->xprt.resvport)
  1564. port = xs_get_random_port();
  1565. return port;
  1566. }
  1567. static unsigned short xs_sock_srcport(struct rpc_xprt *xprt)
  1568. {
  1569. struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt);
  1570. unsigned short ret = 0;
  1571. mutex_lock(&sock->recv_mutex);
  1572. if (sock->sock)
  1573. ret = xs_sock_getport(sock->sock);
  1574. mutex_unlock(&sock->recv_mutex);
  1575. return ret;
  1576. }
  1577. static int xs_sock_srcaddr(struct rpc_xprt *xprt, char *buf, size_t buflen)
  1578. {
  1579. struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt);
  1580. union {
  1581. struct sockaddr sa;
  1582. struct sockaddr_storage st;
  1583. } saddr;
  1584. int ret = -ENOTCONN;
  1585. mutex_lock(&sock->recv_mutex);
  1586. if (sock->sock) {
  1587. ret = kernel_getsockname(sock->sock, &saddr.sa);
  1588. if (ret >= 0)
  1589. ret = snprintf(buf, buflen, "%pISc", &saddr.sa);
  1590. }
  1591. mutex_unlock(&sock->recv_mutex);
  1592. return ret;
  1593. }
  1594. static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port)
  1595. {
  1596. if (transport->srcport != 0)
  1597. transport->srcport = 0;
  1598. if (!transport->xprt.resvport)
  1599. return 0;
  1600. if (port <= xprt_min_resvport || port > xprt_max_resvport)
  1601. return xprt_max_resvport;
  1602. return --port;
  1603. }
  1604. static int xs_bind(struct sock_xprt *transport, struct socket *sock)
  1605. {
  1606. struct sockaddr_storage myaddr;
  1607. int err, nloop = 0;
  1608. int port = xs_get_srcport(transport);
  1609. unsigned short last;
  1610. /*
  1611. * If we are asking for any ephemeral port (i.e. port == 0 &&
  1612. * transport->xprt.resvport == 0), don't bind. Let the local
  1613. * port selection happen implicitly when the socket is used
  1614. * (for example at connect time).
  1615. *
  1616. * This ensures that we can continue to establish TCP
  1617. * connections even when all local ephemeral ports are already
  1618. * a part of some TCP connection. This makes no difference
  1619. * for UDP sockets, but also doesn't harm them.
  1620. *
  1621. * If we're asking for any reserved port (i.e. port == 0 &&
  1622. * transport->xprt.resvport == 1) xs_get_srcport above will
  1623. * ensure that port is non-zero and we will bind as needed.
  1624. */
  1625. if (port <= 0)
  1626. return port;
  1627. memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
  1628. do {
  1629. rpc_set_port((struct sockaddr *)&myaddr, port);
  1630. err = kernel_bind(sock, (struct sockaddr_unsized *)&myaddr,
  1631. transport->xprt.addrlen);
  1632. if (err == 0) {
  1633. if (transport->xprt.reuseport)
  1634. transport->srcport = port;
  1635. break;
  1636. }
  1637. last = port;
  1638. port = xs_next_srcport(transport, port);
  1639. if (port > last)
  1640. nloop++;
  1641. } while (err == -EADDRINUSE && nloop != 2);
  1642. if (myaddr.ss_family == AF_INET)
  1643. dprintk("RPC: %s %pI4:%u: %s (%d)\n", __func__,
  1644. &((struct sockaddr_in *)&myaddr)->sin_addr,
  1645. port, err ? "failed" : "ok", err);
  1646. else
  1647. dprintk("RPC: %s %pI6:%u: %s (%d)\n", __func__,
  1648. &((struct sockaddr_in6 *)&myaddr)->sin6_addr,
  1649. port, err ? "failed" : "ok", err);
  1650. return err;
  1651. }
  1652. /*
  1653. * We don't support autobind on AF_LOCAL sockets
  1654. */
  1655. static void xs_local_rpcbind(struct rpc_task *task)
  1656. {
  1657. xprt_set_bound(task->tk_xprt);
  1658. }
  1659. static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port)
  1660. {
  1661. }
  1662. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  1663. static struct lock_class_key xs_key[3];
  1664. static struct lock_class_key xs_slock_key[3];
  1665. static inline void xs_reclassify_socketu(struct socket *sock)
  1666. {
  1667. struct sock *sk = sock->sk;
  1668. sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC",
  1669. &xs_slock_key[0], "sk_lock-AF_LOCAL-RPC", &xs_key[0]);
  1670. }
  1671. static inline void xs_reclassify_socket4(struct socket *sock)
  1672. {
  1673. struct sock *sk = sock->sk;
  1674. sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC",
  1675. &xs_slock_key[1], "sk_lock-AF_INET-RPC", &xs_key[1]);
  1676. }
  1677. static inline void xs_reclassify_socket6(struct socket *sock)
  1678. {
  1679. struct sock *sk = sock->sk;
  1680. sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
  1681. &xs_slock_key[2], "sk_lock-AF_INET6-RPC", &xs_key[2]);
  1682. }
  1683. static inline void xs_reclassify_socket(int family, struct socket *sock)
  1684. {
  1685. if (WARN_ON_ONCE(!sock_allow_reclassification(sock->sk)))
  1686. return;
  1687. switch (family) {
  1688. case AF_LOCAL:
  1689. xs_reclassify_socketu(sock);
  1690. break;
  1691. case AF_INET:
  1692. xs_reclassify_socket4(sock);
  1693. break;
  1694. case AF_INET6:
  1695. xs_reclassify_socket6(sock);
  1696. break;
  1697. }
  1698. }
  1699. #else
  1700. static inline void xs_reclassify_socket(int family, struct socket *sock)
  1701. {
  1702. }
  1703. #endif
  1704. static void xs_dummy_setup_socket(struct work_struct *work)
  1705. {
  1706. }
  1707. static struct socket *xs_create_sock(struct rpc_xprt *xprt,
  1708. struct sock_xprt *transport, int family, int type,
  1709. int protocol, bool reuseport)
  1710. {
  1711. struct file *filp;
  1712. struct socket *sock;
  1713. int err;
  1714. err = __sock_create(xprt->xprt_net, family, type, protocol, &sock, 1);
  1715. if (err < 0) {
  1716. dprintk("RPC: can't create %d transport socket (%d).\n",
  1717. protocol, -err);
  1718. goto out;
  1719. }
  1720. xs_reclassify_socket(family, sock);
  1721. if (reuseport)
  1722. sock_set_reuseport(sock->sk);
  1723. err = xs_bind(transport, sock);
  1724. if (err) {
  1725. sock_release(sock);
  1726. goto out;
  1727. }
  1728. if (protocol == IPPROTO_TCP)
  1729. sk_net_refcnt_upgrade(sock->sk);
  1730. filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
  1731. if (IS_ERR(filp))
  1732. return ERR_CAST(filp);
  1733. transport->file = filp;
  1734. return sock;
  1735. out:
  1736. return ERR_PTR(err);
  1737. }
  1738. static int xs_local_finish_connecting(struct rpc_xprt *xprt,
  1739. struct socket *sock)
  1740. {
  1741. struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
  1742. xprt);
  1743. if (!transport->inet) {
  1744. struct sock *sk = sock->sk;
  1745. lock_sock(sk);
  1746. xs_save_old_callbacks(transport, sk);
  1747. sk->sk_user_data = xprt;
  1748. sk->sk_data_ready = xs_data_ready;
  1749. sk->sk_write_space = xs_udp_write_space;
  1750. sk->sk_state_change = xs_local_state_change;
  1751. sk->sk_error_report = xs_error_report;
  1752. sk->sk_use_task_frag = false;
  1753. xprt_clear_connected(xprt);
  1754. /* Reset to new socket */
  1755. transport->sock = sock;
  1756. transport->inet = sk;
  1757. release_sock(sk);
  1758. }
  1759. xs_stream_start_connect(transport);
  1760. return kernel_connect(sock, (struct sockaddr_unsized *)xs_addr(xprt), xprt->addrlen, 0);
  1761. }
  1762. /**
  1763. * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint
  1764. * @transport: socket transport to connect
  1765. */
  1766. static int xs_local_setup_socket(struct sock_xprt *transport)
  1767. {
  1768. struct rpc_xprt *xprt = &transport->xprt;
  1769. struct file *filp;
  1770. struct socket *sock;
  1771. int status;
  1772. status = __sock_create(xprt->xprt_net, AF_LOCAL,
  1773. SOCK_STREAM, 0, &sock, 1);
  1774. if (status < 0) {
  1775. dprintk("RPC: can't create AF_LOCAL "
  1776. "transport socket (%d).\n", -status);
  1777. goto out;
  1778. }
  1779. xs_reclassify_socket(AF_LOCAL, sock);
  1780. filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
  1781. if (IS_ERR(filp)) {
  1782. status = PTR_ERR(filp);
  1783. goto out;
  1784. }
  1785. transport->file = filp;
  1786. dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n",
  1787. xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
  1788. status = xs_local_finish_connecting(xprt, sock);
  1789. trace_rpc_socket_connect(xprt, sock, status);
  1790. switch (status) {
  1791. case 0:
  1792. dprintk("RPC: xprt %p connected to %s\n",
  1793. xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
  1794. xprt->stat.connect_count++;
  1795. xprt->stat.connect_time += (long)jiffies -
  1796. xprt->stat.connect_start;
  1797. xprt_set_connected(xprt);
  1798. break;
  1799. case -ENOBUFS:
  1800. break;
  1801. case -ENOENT:
  1802. dprintk("RPC: xprt %p: socket %s does not exist\n",
  1803. xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
  1804. break;
  1805. case -ECONNREFUSED:
  1806. dprintk("RPC: xprt %p: connection refused for %s\n",
  1807. xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
  1808. break;
  1809. default:
  1810. printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n",
  1811. __func__, -status,
  1812. xprt->address_strings[RPC_DISPLAY_ADDR]);
  1813. }
  1814. out:
  1815. xprt_clear_connecting(xprt);
  1816. xprt_wake_pending_tasks(xprt, status);
  1817. return status;
  1818. }
  1819. static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
  1820. {
  1821. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  1822. int ret;
  1823. if (transport->file)
  1824. goto force_disconnect;
  1825. if (RPC_IS_ASYNC(task)) {
  1826. /*
  1827. * We want the AF_LOCAL connect to be resolved in the
  1828. * filesystem namespace of the process making the rpc
  1829. * call. Thus we connect synchronously.
  1830. *
  1831. * If we want to support asynchronous AF_LOCAL calls,
  1832. * we'll need to figure out how to pass a namespace to
  1833. * connect.
  1834. */
  1835. rpc_task_set_rpc_status(task, -ENOTCONN);
  1836. goto out_wake;
  1837. }
  1838. ret = xs_local_setup_socket(transport);
  1839. if (ret && !RPC_IS_SOFTCONN(task))
  1840. msleep_interruptible(15000);
  1841. return;
  1842. force_disconnect:
  1843. xprt_force_disconnect(xprt);
  1844. out_wake:
  1845. xprt_clear_connecting(xprt);
  1846. xprt_wake_pending_tasks(xprt, -ENOTCONN);
  1847. }
  1848. #if IS_ENABLED(CONFIG_SUNRPC_SWAP)
  1849. /*
  1850. * Note that this should be called with XPRT_LOCKED held, or recv_mutex
  1851. * held, or when we otherwise know that we have exclusive access to the
  1852. * socket, to guard against races with xs_reset_transport.
  1853. */
  1854. static void xs_set_memalloc(struct rpc_xprt *xprt)
  1855. {
  1856. struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
  1857. xprt);
  1858. /*
  1859. * If there's no sock, then we have nothing to set. The
  1860. * reconnecting process will get it for us.
  1861. */
  1862. if (!transport->inet)
  1863. return;
  1864. if (atomic_read(&xprt->swapper))
  1865. sk_set_memalloc(transport->inet);
  1866. }
  1867. /**
  1868. * xs_enable_swap - Tag this transport as being used for swap.
  1869. * @xprt: transport to tag
  1870. *
  1871. * Take a reference to this transport on behalf of the rpc_clnt, and
  1872. * optionally mark it for swapping if it wasn't already.
  1873. */
  1874. static int
  1875. xs_enable_swap(struct rpc_xprt *xprt)
  1876. {
  1877. struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
  1878. mutex_lock(&xs->recv_mutex);
  1879. if (atomic_inc_return(&xprt->swapper) == 1 &&
  1880. xs->inet)
  1881. sk_set_memalloc(xs->inet);
  1882. mutex_unlock(&xs->recv_mutex);
  1883. return 0;
  1884. }
  1885. /**
  1886. * xs_disable_swap - Untag this transport as being used for swap.
  1887. * @xprt: transport to tag
  1888. *
  1889. * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the
  1890. * swapper refcount goes to 0, untag the socket as a memalloc socket.
  1891. */
  1892. static void
  1893. xs_disable_swap(struct rpc_xprt *xprt)
  1894. {
  1895. struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
  1896. mutex_lock(&xs->recv_mutex);
  1897. if (atomic_dec_and_test(&xprt->swapper) &&
  1898. xs->inet)
  1899. sk_clear_memalloc(xs->inet);
  1900. mutex_unlock(&xs->recv_mutex);
  1901. }
  1902. #else
  1903. static void xs_set_memalloc(struct rpc_xprt *xprt)
  1904. {
  1905. }
  1906. static int
  1907. xs_enable_swap(struct rpc_xprt *xprt)
  1908. {
  1909. return -EINVAL;
  1910. }
  1911. static void
  1912. xs_disable_swap(struct rpc_xprt *xprt)
  1913. {
  1914. }
  1915. #endif
  1916. static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
  1917. {
  1918. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  1919. if (!transport->inet) {
  1920. struct sock *sk = sock->sk;
  1921. lock_sock(sk);
  1922. xs_save_old_callbacks(transport, sk);
  1923. sk->sk_user_data = xprt;
  1924. sk->sk_data_ready = xs_data_ready;
  1925. sk->sk_write_space = xs_udp_write_space;
  1926. sk->sk_use_task_frag = false;
  1927. xprt_set_connected(xprt);
  1928. /* Reset to new socket */
  1929. transport->sock = sock;
  1930. transport->inet = sk;
  1931. xs_set_memalloc(xprt);
  1932. release_sock(sk);
  1933. }
  1934. xs_udp_do_set_buffer_size(xprt);
  1935. xprt->stat.connect_start = jiffies;
  1936. }
  1937. static void xs_udp_setup_socket(struct work_struct *work)
  1938. {
  1939. struct sock_xprt *transport =
  1940. container_of(work, struct sock_xprt, connect_worker.work);
  1941. struct rpc_xprt *xprt = &transport->xprt;
  1942. struct socket *sock;
  1943. int status = -EIO;
  1944. unsigned int pflags = current->flags;
  1945. if (atomic_read(&xprt->swapper))
  1946. current->flags |= PF_MEMALLOC;
  1947. sock = xs_create_sock(xprt, transport,
  1948. xs_addr(xprt)->sa_family, SOCK_DGRAM,
  1949. IPPROTO_UDP, false);
  1950. if (IS_ERR(sock))
  1951. goto out;
  1952. dprintk("RPC: worker connecting xprt %p via %s to "
  1953. "%s (port %s)\n", xprt,
  1954. xprt->address_strings[RPC_DISPLAY_PROTO],
  1955. xprt->address_strings[RPC_DISPLAY_ADDR],
  1956. xprt->address_strings[RPC_DISPLAY_PORT]);
  1957. xs_udp_finish_connecting(xprt, sock);
  1958. trace_rpc_socket_connect(xprt, sock, 0);
  1959. status = 0;
  1960. out:
  1961. xprt_clear_connecting(xprt);
  1962. xprt_unlock_connect(xprt, transport);
  1963. xprt_wake_pending_tasks(xprt, status);
  1964. current_restore_flags(pflags, PF_MEMALLOC);
  1965. }
  1966. /**
  1967. * xs_tcp_shutdown - gracefully shut down a TCP socket
  1968. * @xprt: transport
  1969. *
  1970. * Initiates a graceful shutdown of the TCP socket by calling the
  1971. * equivalent of shutdown(SHUT_RDWR);
  1972. */
  1973. static void xs_tcp_shutdown(struct rpc_xprt *xprt)
  1974. {
  1975. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  1976. struct socket *sock = transport->sock;
  1977. int skst = transport->inet ? transport->inet->sk_state : TCP_CLOSE;
  1978. if (sock == NULL)
  1979. return;
  1980. if (!xprt->reuseport) {
  1981. xs_close(xprt);
  1982. return;
  1983. }
  1984. switch (skst) {
  1985. case TCP_FIN_WAIT1:
  1986. case TCP_FIN_WAIT2:
  1987. case TCP_LAST_ACK:
  1988. break;
  1989. case TCP_ESTABLISHED:
  1990. case TCP_CLOSE_WAIT:
  1991. kernel_sock_shutdown(sock, SHUT_RDWR);
  1992. trace_rpc_socket_shutdown(xprt, sock);
  1993. break;
  1994. default:
  1995. xs_reset_transport(transport);
  1996. }
  1997. }
  1998. static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
  1999. struct socket *sock)
  2000. {
  2001. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  2002. struct net *net = sock_net(sock->sk);
  2003. unsigned long connect_timeout;
  2004. unsigned long syn_retries;
  2005. unsigned int keepidle;
  2006. unsigned int keepcnt;
  2007. unsigned int timeo;
  2008. unsigned long t;
  2009. spin_lock(&xprt->transport_lock);
  2010. keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
  2011. keepcnt = xprt->timeout->to_retries + 1;
  2012. timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
  2013. (xprt->timeout->to_retries + 1);
  2014. clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
  2015. spin_unlock(&xprt->transport_lock);
  2016. /* TCP Keepalive options */
  2017. sock_set_keepalive(sock->sk);
  2018. tcp_sock_set_keepidle(sock->sk, keepidle);
  2019. tcp_sock_set_keepintvl(sock->sk, keepidle);
  2020. tcp_sock_set_keepcnt(sock->sk, keepcnt);
  2021. /* TCP user timeout (see RFC5482) */
  2022. tcp_sock_set_user_timeout(sock->sk, timeo);
  2023. /* Connect timeout */
  2024. connect_timeout = max_t(unsigned long,
  2025. DIV_ROUND_UP(xprt->connect_timeout, HZ), 1);
  2026. syn_retries = max_t(unsigned long,
  2027. READ_ONCE(net->ipv4.sysctl_tcp_syn_retries), 1);
  2028. for (t = 0; t <= syn_retries && (1UL << t) < connect_timeout; t++)
  2029. ;
  2030. if (t <= syn_retries)
  2031. tcp_sock_set_syncnt(sock->sk, t - 1);
  2032. }
  2033. static void xs_tcp_do_set_connect_timeout(struct rpc_xprt *xprt,
  2034. unsigned long connect_timeout)
  2035. {
  2036. struct sock_xprt *transport =
  2037. container_of(xprt, struct sock_xprt, xprt);
  2038. struct rpc_timeout to;
  2039. unsigned long initval;
  2040. memcpy(&to, xprt->timeout, sizeof(to));
  2041. /* Arbitrary lower limit */
  2042. initval = max_t(unsigned long, connect_timeout, XS_TCP_INIT_REEST_TO);
  2043. to.to_initval = initval;
  2044. to.to_maxval = initval;
  2045. to.to_retries = 0;
  2046. memcpy(&transport->tcp_timeout, &to, sizeof(transport->tcp_timeout));
  2047. xprt->timeout = &transport->tcp_timeout;
  2048. xprt->connect_timeout = connect_timeout;
  2049. }
  2050. static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
  2051. unsigned long connect_timeout,
  2052. unsigned long reconnect_timeout)
  2053. {
  2054. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  2055. spin_lock(&xprt->transport_lock);
  2056. if (reconnect_timeout < xprt->max_reconnect_timeout)
  2057. xprt->max_reconnect_timeout = reconnect_timeout;
  2058. if (connect_timeout < xprt->connect_timeout)
  2059. xs_tcp_do_set_connect_timeout(xprt, connect_timeout);
  2060. set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
  2061. spin_unlock(&xprt->transport_lock);
  2062. }
  2063. static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
  2064. {
  2065. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  2066. if (!transport->inet) {
  2067. struct sock *sk = sock->sk;
  2068. /* Avoid temporary address, they are bad for long-lived
  2069. * connections such as NFS mounts.
  2070. * RFC4941, section 3.6 suggests that:
  2071. * Individual applications, which have specific
  2072. * knowledge about the normal duration of connections,
  2073. * MAY override this as appropriate.
  2074. */
  2075. if (xs_addr(xprt)->sa_family == PF_INET6) {
  2076. ip6_sock_set_addr_preferences(sk,
  2077. IPV6_PREFER_SRC_PUBLIC);
  2078. }
  2079. xs_tcp_set_socket_timeouts(xprt, sock);
  2080. tcp_sock_set_nodelay(sk);
  2081. lock_sock(sk);
  2082. xs_save_old_callbacks(transport, sk);
  2083. sk->sk_user_data = xprt;
  2084. sk->sk_data_ready = xs_data_ready;
  2085. sk->sk_state_change = xs_tcp_state_change;
  2086. sk->sk_write_space = xs_tcp_write_space;
  2087. sk->sk_error_report = xs_error_report;
  2088. sk->sk_use_task_frag = false;
  2089. /* socket options */
  2090. sock_reset_flag(sk, SOCK_LINGER);
  2091. xprt_clear_connected(xprt);
  2092. /* Reset to new socket */
  2093. transport->sock = sock;
  2094. transport->inet = sk;
  2095. release_sock(sk);
  2096. }
  2097. if (!xprt_bound(xprt))
  2098. return -ENOTCONN;
  2099. xs_set_memalloc(xprt);
  2100. xs_stream_start_connect(transport);
  2101. /* Tell the socket layer to start connecting... */
  2102. set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
  2103. return kernel_connect(sock, (struct sockaddr_unsized *)xs_addr(xprt),
  2104. xprt->addrlen, O_NONBLOCK);
  2105. }
  2106. /**
  2107. * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
  2108. * @work: queued work item
  2109. *
  2110. * Invoked by a work queue tasklet.
  2111. */
  2112. static void xs_tcp_setup_socket(struct work_struct *work)
  2113. {
  2114. struct sock_xprt *transport =
  2115. container_of(work, struct sock_xprt, connect_worker.work);
  2116. struct socket *sock = transport->sock;
  2117. struct rpc_xprt *xprt = &transport->xprt;
  2118. int status;
  2119. unsigned int pflags = current->flags;
  2120. if (atomic_read(&xprt->swapper))
  2121. current->flags |= PF_MEMALLOC;
  2122. if (xprt_connected(xprt))
  2123. goto out;
  2124. if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT,
  2125. &transport->sock_state) ||
  2126. !sock) {
  2127. xs_reset_transport(transport);
  2128. sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family,
  2129. SOCK_STREAM, IPPROTO_TCP, true);
  2130. if (IS_ERR(sock)) {
  2131. xprt_wake_pending_tasks(xprt, PTR_ERR(sock));
  2132. goto out;
  2133. }
  2134. }
  2135. dprintk("RPC: worker connecting xprt %p via %s to "
  2136. "%s (port %s)\n", xprt,
  2137. xprt->address_strings[RPC_DISPLAY_PROTO],
  2138. xprt->address_strings[RPC_DISPLAY_ADDR],
  2139. xprt->address_strings[RPC_DISPLAY_PORT]);
  2140. status = xs_tcp_finish_connecting(xprt, sock);
  2141. trace_rpc_socket_connect(xprt, sock, status);
  2142. dprintk("RPC: %p connect status %d connected %d sock state %d\n",
  2143. xprt, -status, xprt_connected(xprt),
  2144. sock->sk->sk_state);
  2145. switch (status) {
  2146. case 0:
  2147. case -EINPROGRESS:
  2148. /* SYN_SENT! */
  2149. set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state);
  2150. if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
  2151. xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
  2152. fallthrough;
  2153. case -EALREADY:
  2154. goto out_unlock;
  2155. case -EADDRNOTAVAIL:
  2156. /* Source port number is unavailable. Try a new one! */
  2157. transport->srcport = 0;
  2158. status = -EAGAIN;
  2159. break;
  2160. case -EPERM:
  2161. /* Happens, for instance, if a BPF program is preventing
  2162. * the connect. Remap the error so upper layers can better
  2163. * deal with it.
  2164. */
  2165. status = -ECONNREFUSED;
  2166. fallthrough;
  2167. case -EINVAL:
  2168. /* Happens, for instance, if the user specified a link
  2169. * local IPv6 address without a scope-id.
  2170. */
  2171. case -ECONNREFUSED:
  2172. case -ECONNRESET:
  2173. case -ENETDOWN:
  2174. case -ENETUNREACH:
  2175. case -EHOSTUNREACH:
  2176. case -EADDRINUSE:
  2177. case -ENOBUFS:
  2178. case -ENOTCONN:
  2179. break;
  2180. default:
  2181. printk("%s: connect returned unhandled error %d\n",
  2182. __func__, status);
  2183. status = -EAGAIN;
  2184. }
  2185. /* xs_tcp_force_close() wakes tasks with a fixed error code.
  2186. * We need to wake them first to ensure the correct error code.
  2187. */
  2188. xprt_wake_pending_tasks(xprt, status);
  2189. xs_tcp_force_close(xprt);
  2190. out:
  2191. xprt_clear_connecting(xprt);
  2192. out_unlock:
  2193. xprt_unlock_connect(xprt, transport);
  2194. current_restore_flags(pflags, PF_MEMALLOC);
  2195. }
  2196. /*
  2197. * Transfer the connected socket to @upper_transport, then mark that
  2198. * xprt CONNECTED.
  2199. */
  2200. static int xs_tcp_tls_finish_connecting(struct rpc_xprt *lower_xprt,
  2201. struct sock_xprt *upper_transport)
  2202. {
  2203. struct sock_xprt *lower_transport =
  2204. container_of(lower_xprt, struct sock_xprt, xprt);
  2205. struct rpc_xprt *upper_xprt = &upper_transport->xprt;
  2206. if (!upper_transport->inet) {
  2207. struct socket *sock = lower_transport->sock;
  2208. struct sock *sk = sock->sk;
  2209. /* Avoid temporary address, they are bad for long-lived
  2210. * connections such as NFS mounts.
  2211. * RFC4941, section 3.6 suggests that:
  2212. * Individual applications, which have specific
  2213. * knowledge about the normal duration of connections,
  2214. * MAY override this as appropriate.
  2215. */
  2216. if (xs_addr(upper_xprt)->sa_family == PF_INET6)
  2217. ip6_sock_set_addr_preferences(sk, IPV6_PREFER_SRC_PUBLIC);
  2218. xs_tcp_set_socket_timeouts(upper_xprt, sock);
  2219. tcp_sock_set_nodelay(sk);
  2220. lock_sock(sk);
  2221. /* @sk is already connected, so it now has the RPC callbacks.
  2222. * Reach into @lower_transport to save the original ones.
  2223. */
  2224. upper_transport->old_data_ready = lower_transport->old_data_ready;
  2225. upper_transport->old_state_change = lower_transport->old_state_change;
  2226. upper_transport->old_write_space = lower_transport->old_write_space;
  2227. upper_transport->old_error_report = lower_transport->old_error_report;
  2228. sk->sk_user_data = upper_xprt;
  2229. /* socket options */
  2230. sock_reset_flag(sk, SOCK_LINGER);
  2231. xprt_clear_connected(upper_xprt);
  2232. upper_transport->sock = sock;
  2233. upper_transport->inet = sk;
  2234. upper_transport->file = lower_transport->file;
  2235. release_sock(sk);
  2236. /* Reset lower_transport before shutting down its clnt */
  2237. mutex_lock(&lower_transport->recv_mutex);
  2238. lower_transport->inet = NULL;
  2239. lower_transport->sock = NULL;
  2240. lower_transport->file = NULL;
  2241. xprt_clear_connected(lower_xprt);
  2242. xs_sock_reset_connection_flags(lower_xprt);
  2243. xs_stream_reset_connect(lower_transport);
  2244. mutex_unlock(&lower_transport->recv_mutex);
  2245. }
  2246. if (!xprt_bound(upper_xprt))
  2247. return -ENOTCONN;
  2248. xs_set_memalloc(upper_xprt);
  2249. if (!xprt_test_and_set_connected(upper_xprt)) {
  2250. upper_xprt->connect_cookie++;
  2251. clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state);
  2252. xprt_clear_connecting(upper_xprt);
  2253. upper_xprt->stat.connect_count++;
  2254. upper_xprt->stat.connect_time += (long)jiffies -
  2255. upper_xprt->stat.connect_start;
  2256. xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING);
  2257. }
  2258. return 0;
  2259. }
  2260. /**
  2261. * xs_tls_handshake_done - TLS handshake completion handler
  2262. * @data: address of xprt to wake
  2263. * @status: status of handshake
  2264. * @peerid: serial number of key containing the remote's identity
  2265. *
  2266. */
  2267. static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid)
  2268. {
  2269. struct rpc_xprt *lower_xprt = data;
  2270. struct sock_xprt *lower_transport =
  2271. container_of(lower_xprt, struct sock_xprt, xprt);
  2272. switch (status) {
  2273. case 0:
  2274. case -EACCES:
  2275. case -ETIMEDOUT:
  2276. lower_transport->xprt_err = status;
  2277. break;
  2278. default:
  2279. lower_transport->xprt_err = -EACCES;
  2280. }
  2281. complete(&lower_transport->handshake_done);
  2282. xprt_put(lower_xprt);
  2283. }
  2284. static int xs_tls_handshake_sync(struct rpc_xprt *lower_xprt, struct xprtsec_parms *xprtsec)
  2285. {
  2286. struct sock_xprt *lower_transport =
  2287. container_of(lower_xprt, struct sock_xprt, xprt);
  2288. struct tls_handshake_args args = {
  2289. .ta_sock = lower_transport->sock,
  2290. .ta_done = xs_tls_handshake_done,
  2291. .ta_data = xprt_get(lower_xprt),
  2292. .ta_peername = lower_xprt->servername,
  2293. };
  2294. struct sock *sk = lower_transport->inet;
  2295. int rc;
  2296. init_completion(&lower_transport->handshake_done);
  2297. set_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state);
  2298. lower_transport->xprt_err = -ETIMEDOUT;
  2299. switch (xprtsec->policy) {
  2300. case RPC_XPRTSEC_TLS_ANON:
  2301. rc = tls_client_hello_anon(&args, GFP_KERNEL);
  2302. if (rc)
  2303. goto out_put_xprt;
  2304. break;
  2305. case RPC_XPRTSEC_TLS_X509:
  2306. args.ta_my_cert = xprtsec->cert_serial;
  2307. args.ta_my_privkey = xprtsec->privkey_serial;
  2308. rc = tls_client_hello_x509(&args, GFP_KERNEL);
  2309. if (rc)
  2310. goto out_put_xprt;
  2311. break;
  2312. default:
  2313. rc = -EACCES;
  2314. goto out_put_xprt;
  2315. }
  2316. rc = wait_for_completion_interruptible_timeout(&lower_transport->handshake_done,
  2317. XS_TLS_HANDSHAKE_TO);
  2318. if (rc <= 0) {
  2319. tls_handshake_cancel(sk);
  2320. if (rc == 0)
  2321. rc = -ETIMEDOUT;
  2322. goto out_put_xprt;
  2323. }
  2324. rc = lower_transport->xprt_err;
  2325. out:
  2326. xs_stream_reset_connect(lower_transport);
  2327. clear_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state);
  2328. return rc;
  2329. out_put_xprt:
  2330. xprt_put(lower_xprt);
  2331. goto out;
  2332. }
  2333. /**
  2334. * xs_tcp_tls_setup_socket - establish a TLS session on a TCP socket
  2335. * @work: queued work item
  2336. *
  2337. * Invoked by a work queue tasklet.
  2338. *
  2339. * For RPC-with-TLS, there is a two-stage connection process.
  2340. *
  2341. * The "upper-layer xprt" is visible to the RPC consumer. Once it has
  2342. * been marked connected, the consumer knows that a TCP connection and
  2343. * a TLS session have been established.
  2344. *
  2345. * A "lower-layer xprt", created in this function, handles the mechanics
  2346. * of connecting the TCP socket, performing the RPC_AUTH_TLS probe, and
  2347. * then driving the TLS handshake. Once all that is complete, the upper
  2348. * layer xprt is marked connected.
  2349. */
  2350. static void xs_tcp_tls_setup_socket(struct work_struct *work)
  2351. {
  2352. struct sock_xprt *upper_transport =
  2353. container_of(work, struct sock_xprt, connect_worker.work);
  2354. struct rpc_clnt *upper_clnt = upper_transport->clnt;
  2355. struct rpc_xprt *upper_xprt = &upper_transport->xprt;
  2356. struct rpc_create_args args = {
  2357. .net = upper_xprt->xprt_net,
  2358. .protocol = upper_xprt->prot,
  2359. .address = (struct sockaddr *)&upper_xprt->addr,
  2360. .addrsize = upper_xprt->addrlen,
  2361. .timeout = upper_clnt->cl_timeout,
  2362. .servername = upper_xprt->servername,
  2363. .program = upper_clnt->cl_program,
  2364. .prognumber = upper_clnt->cl_prog,
  2365. .version = upper_clnt->cl_vers,
  2366. .authflavor = RPC_AUTH_TLS,
  2367. .cred = upper_clnt->cl_cred,
  2368. .xprtsec = {
  2369. .policy = RPC_XPRTSEC_NONE,
  2370. },
  2371. .stats = upper_clnt->cl_stats,
  2372. };
  2373. unsigned int pflags = current->flags;
  2374. struct rpc_clnt *lower_clnt;
  2375. struct rpc_xprt *lower_xprt;
  2376. int status;
  2377. if (atomic_read(&upper_xprt->swapper))
  2378. current->flags |= PF_MEMALLOC;
  2379. xs_stream_start_connect(upper_transport);
  2380. /* This implicitly sends an RPC_AUTH_TLS probe */
  2381. lower_clnt = rpc_create(&args);
  2382. if (IS_ERR(lower_clnt)) {
  2383. trace_rpc_tls_unavailable(upper_clnt, upper_xprt);
  2384. clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state);
  2385. xprt_clear_connecting(upper_xprt);
  2386. xprt_wake_pending_tasks(upper_xprt, PTR_ERR(lower_clnt));
  2387. xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING);
  2388. goto out_unlock;
  2389. }
  2390. /* RPC_AUTH_TLS probe was successful. Try a TLS handshake on
  2391. * the lower xprt.
  2392. */
  2393. rcu_read_lock();
  2394. lower_xprt = rcu_dereference(lower_clnt->cl_xprt);
  2395. rcu_read_unlock();
  2396. if (wait_on_bit_lock(&lower_xprt->state, XPRT_LOCKED, TASK_KILLABLE))
  2397. goto out_unlock;
  2398. status = xs_tls_handshake_sync(lower_xprt, &upper_xprt->xprtsec);
  2399. if (status) {
  2400. trace_rpc_tls_not_started(upper_clnt, upper_xprt);
  2401. goto out_close;
  2402. }
  2403. status = xs_tcp_tls_finish_connecting(lower_xprt, upper_transport);
  2404. if (status)
  2405. goto out_close;
  2406. xprt_release_write(lower_xprt, NULL);
  2407. trace_rpc_socket_connect(upper_xprt, upper_transport->sock, 0);
  2408. rpc_shutdown_client(lower_clnt);
  2409. /* Check for ingress data that arrived before the socket's
  2410. * ->data_ready callback was set up.
  2411. */
  2412. xs_poll_check_readable(upper_transport);
  2413. out_unlock:
  2414. current_restore_flags(pflags, PF_MEMALLOC);
  2415. upper_transport->clnt = NULL;
  2416. xprt_unlock_connect(upper_xprt, upper_transport);
  2417. return;
  2418. out_close:
  2419. xprt_release_write(lower_xprt, NULL);
  2420. rpc_shutdown_client(lower_clnt);
  2421. /* xprt_force_disconnect() wakes tasks with a fixed tk_status code.
  2422. * Wake them first here to ensure they get our tk_status code.
  2423. */
  2424. xprt_wake_pending_tasks(upper_xprt, status);
  2425. xs_tcp_force_close(upper_xprt);
  2426. xprt_clear_connecting(upper_xprt);
  2427. goto out_unlock;
  2428. }
  2429. /**
  2430. * xs_connect - connect a socket to a remote endpoint
  2431. * @xprt: pointer to transport structure
  2432. * @task: address of RPC task that manages state of connect request
  2433. *
  2434. * TCP: If the remote end dropped the connection, delay reconnecting.
  2435. *
  2436. * UDP socket connects are synchronous, but we use a work queue anyway
  2437. * to guarantee that even unprivileged user processes can set up a
  2438. * socket on a privileged port.
  2439. *
  2440. * If a UDP socket connect fails, the delay behavior here prevents
  2441. * retry floods (hard mounts).
  2442. */
  2443. static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
  2444. {
  2445. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  2446. unsigned long delay = 0;
  2447. WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport));
  2448. if (transport->sock != NULL) {
  2449. dprintk("RPC: xs_connect delayed xprt %p for %lu "
  2450. "seconds\n", xprt, xprt->reestablish_timeout / HZ);
  2451. delay = xprt_reconnect_delay(xprt);
  2452. xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO);
  2453. } else
  2454. dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
  2455. transport->clnt = task->tk_client;
  2456. queue_delayed_work(xprtiod_workqueue,
  2457. &transport->connect_worker,
  2458. delay);
  2459. }
  2460. static void xs_wake_disconnect(struct sock_xprt *transport)
  2461. {
  2462. if (test_and_clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state))
  2463. xs_tcp_force_close(&transport->xprt);
  2464. }
  2465. static void xs_wake_write(struct sock_xprt *transport)
  2466. {
  2467. if (test_and_clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state))
  2468. xprt_write_space(&transport->xprt);
  2469. }
  2470. static void xs_wake_error(struct sock_xprt *transport)
  2471. {
  2472. int sockerr;
  2473. if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
  2474. return;
  2475. sockerr = xchg(&transport->xprt_err, 0);
  2476. if (sockerr < 0) {
  2477. xprt_wake_pending_tasks(&transport->xprt, sockerr);
  2478. xs_tcp_force_close(&transport->xprt);
  2479. }
  2480. }
  2481. static void xs_wake_pending(struct sock_xprt *transport)
  2482. {
  2483. if (test_and_clear_bit(XPRT_SOCK_WAKE_PENDING, &transport->sock_state))
  2484. xprt_wake_pending_tasks(&transport->xprt, -EAGAIN);
  2485. }
  2486. static void xs_error_handle(struct work_struct *work)
  2487. {
  2488. struct sock_xprt *transport = container_of(work,
  2489. struct sock_xprt, error_worker);
  2490. xs_wake_disconnect(transport);
  2491. xs_wake_write(transport);
  2492. xs_wake_error(transport);
  2493. xs_wake_pending(transport);
  2494. }
  2495. /**
  2496. * xs_local_print_stats - display AF_LOCAL socket-specific stats
  2497. * @xprt: rpc_xprt struct containing statistics
  2498. * @seq: output file
  2499. *
  2500. */
  2501. static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
  2502. {
  2503. long idle_time = 0;
  2504. if (xprt_connected(xprt))
  2505. idle_time = (long)(jiffies - xprt->last_used) / HZ;
  2506. seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu "
  2507. "%llu %llu %lu %llu %llu\n",
  2508. xprt->stat.bind_count,
  2509. xprt->stat.connect_count,
  2510. xprt->stat.connect_time / HZ,
  2511. idle_time,
  2512. xprt->stat.sends,
  2513. xprt->stat.recvs,
  2514. xprt->stat.bad_xids,
  2515. xprt->stat.req_u,
  2516. xprt->stat.bklog_u,
  2517. xprt->stat.max_slots,
  2518. xprt->stat.sending_u,
  2519. xprt->stat.pending_u);
  2520. }
  2521. /**
  2522. * xs_udp_print_stats - display UDP socket-specific stats
  2523. * @xprt: rpc_xprt struct containing statistics
  2524. * @seq: output file
  2525. *
  2526. */
  2527. static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
  2528. {
  2529. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  2530. seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %llu %llu "
  2531. "%lu %llu %llu\n",
  2532. transport->srcport,
  2533. xprt->stat.bind_count,
  2534. xprt->stat.sends,
  2535. xprt->stat.recvs,
  2536. xprt->stat.bad_xids,
  2537. xprt->stat.req_u,
  2538. xprt->stat.bklog_u,
  2539. xprt->stat.max_slots,
  2540. xprt->stat.sending_u,
  2541. xprt->stat.pending_u);
  2542. }
  2543. /**
  2544. * xs_tcp_print_stats - display TCP socket-specific stats
  2545. * @xprt: rpc_xprt struct containing statistics
  2546. * @seq: output file
  2547. *
  2548. */
  2549. static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
  2550. {
  2551. struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
  2552. long idle_time = 0;
  2553. if (xprt_connected(xprt))
  2554. idle_time = (long)(jiffies - xprt->last_used) / HZ;
  2555. seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu "
  2556. "%llu %llu %lu %llu %llu\n",
  2557. transport->srcport,
  2558. xprt->stat.bind_count,
  2559. xprt->stat.connect_count,
  2560. xprt->stat.connect_time / HZ,
  2561. idle_time,
  2562. xprt->stat.sends,
  2563. xprt->stat.recvs,
  2564. xprt->stat.bad_xids,
  2565. xprt->stat.req_u,
  2566. xprt->stat.bklog_u,
  2567. xprt->stat.max_slots,
  2568. xprt->stat.sending_u,
  2569. xprt->stat.pending_u);
  2570. }
  2571. /*
  2572. * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason
  2573. * we allocate pages instead doing a kmalloc like rpc_malloc is because we want
  2574. * to use the server side send routines.
  2575. */
  2576. static int bc_malloc(struct rpc_task *task)
  2577. {
  2578. struct rpc_rqst *rqst = task->tk_rqstp;
  2579. size_t size = rqst->rq_callsize;
  2580. struct page *page;
  2581. struct rpc_buffer *buf;
  2582. if (size > PAGE_SIZE - sizeof(struct rpc_buffer)) {
  2583. WARN_ONCE(1, "xprtsock: large bc buffer request (size %zu)\n",
  2584. size);
  2585. return -EINVAL;
  2586. }
  2587. page = alloc_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
  2588. if (!page)
  2589. return -ENOMEM;
  2590. buf = page_address(page);
  2591. buf->len = PAGE_SIZE;
  2592. rqst->rq_buffer = buf->data;
  2593. rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize;
  2594. return 0;
  2595. }
  2596. /*
  2597. * Free the space allocated in the bc_alloc routine
  2598. */
  2599. static void bc_free(struct rpc_task *task)
  2600. {
  2601. void *buffer = task->tk_rqstp->rq_buffer;
  2602. struct rpc_buffer *buf;
  2603. buf = container_of(buffer, struct rpc_buffer, data);
  2604. free_page((unsigned long)buf);
  2605. }
  2606. static int bc_sendto(struct rpc_rqst *req)
  2607. {
  2608. struct xdr_buf *xdr = &req->rq_snd_buf;
  2609. struct sock_xprt *transport =
  2610. container_of(req->rq_xprt, struct sock_xprt, xprt);
  2611. struct msghdr msg = {
  2612. .msg_flags = 0,
  2613. };
  2614. rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
  2615. (u32)xdr->len);
  2616. unsigned int sent = 0;
  2617. int err;
  2618. req->rq_xtime = ktime_get();
  2619. err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask());
  2620. if (err < 0)
  2621. return err;
  2622. err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent);
  2623. xdr_free_bvec(xdr);
  2624. if (err < 0 || sent != (xdr->len + sizeof(marker)))
  2625. return -EAGAIN;
  2626. return sent;
  2627. }
  2628. /**
  2629. * bc_send_request - Send a backchannel Call on a TCP socket
  2630. * @req: rpc_rqst containing Call message to be sent
  2631. *
  2632. * xpt_mutex ensures @rqstp's whole message is written to the socket
  2633. * without interruption.
  2634. *
  2635. * Return values:
  2636. * %0 if the message was sent successfully
  2637. * %ENOTCONN if the message was not sent
  2638. */
  2639. static int bc_send_request(struct rpc_rqst *req)
  2640. {
  2641. struct svc_xprt *xprt;
  2642. int len;
  2643. /*
  2644. * Get the server socket associated with this callback xprt
  2645. */
  2646. xprt = req->rq_xprt->bc_xprt;
  2647. /*
  2648. * Grab the mutex to serialize data as the connection is shared
  2649. * with the fore channel
  2650. */
  2651. mutex_lock(&xprt->xpt_mutex);
  2652. if (test_bit(XPT_DEAD, &xprt->xpt_flags))
  2653. len = -ENOTCONN;
  2654. else
  2655. len = bc_sendto(req);
  2656. mutex_unlock(&xprt->xpt_mutex);
  2657. if (len > 0)
  2658. len = 0;
  2659. return len;
  2660. }
  2661. static void bc_close(struct rpc_xprt *xprt)
  2662. {
  2663. xprt_disconnect_done(xprt);
  2664. }
  2665. static void bc_destroy(struct rpc_xprt *xprt)
  2666. {
  2667. dprintk("RPC: bc_destroy xprt %p\n", xprt);
  2668. xs_xprt_free(xprt);
  2669. module_put(THIS_MODULE);
  2670. }
  2671. static const struct rpc_xprt_ops xs_local_ops = {
  2672. .reserve_xprt = xprt_reserve_xprt,
  2673. .release_xprt = xprt_release_xprt,
  2674. .alloc_slot = xprt_alloc_slot,
  2675. .free_slot = xprt_free_slot,
  2676. .rpcbind = xs_local_rpcbind,
  2677. .set_port = xs_local_set_port,
  2678. .connect = xs_local_connect,
  2679. .buf_alloc = rpc_malloc,
  2680. .buf_free = rpc_free,
  2681. .prepare_request = xs_stream_prepare_request,
  2682. .send_request = xs_local_send_request,
  2683. .abort_send_request = xs_stream_abort_send_request,
  2684. .wait_for_reply_request = xprt_wait_for_reply_request_def,
  2685. .close = xs_close,
  2686. .destroy = xs_destroy,
  2687. .print_stats = xs_local_print_stats,
  2688. .enable_swap = xs_enable_swap,
  2689. .disable_swap = xs_disable_swap,
  2690. };
  2691. static const struct rpc_xprt_ops xs_udp_ops = {
  2692. .set_buffer_size = xs_udp_set_buffer_size,
  2693. .reserve_xprt = xprt_reserve_xprt_cong,
  2694. .release_xprt = xprt_release_xprt_cong,
  2695. .alloc_slot = xprt_alloc_slot,
  2696. .free_slot = xprt_free_slot,
  2697. .rpcbind = rpcb_getport_async,
  2698. .set_port = xs_set_port,
  2699. .connect = xs_connect,
  2700. .get_srcaddr = xs_sock_srcaddr,
  2701. .get_srcport = xs_sock_srcport,
  2702. .buf_alloc = rpc_malloc,
  2703. .buf_free = rpc_free,
  2704. .send_request = xs_udp_send_request,
  2705. .wait_for_reply_request = xprt_wait_for_reply_request_rtt,
  2706. .timer = xs_udp_timer,
  2707. .release_request = xprt_release_rqst_cong,
  2708. .close = xs_close,
  2709. .destroy = xs_destroy,
  2710. .print_stats = xs_udp_print_stats,
  2711. .enable_swap = xs_enable_swap,
  2712. .disable_swap = xs_disable_swap,
  2713. .inject_disconnect = xs_inject_disconnect,
  2714. };
  2715. static const struct rpc_xprt_ops xs_tcp_ops = {
  2716. .reserve_xprt = xprt_reserve_xprt,
  2717. .release_xprt = xprt_release_xprt,
  2718. .alloc_slot = xprt_alloc_slot,
  2719. .free_slot = xprt_free_slot,
  2720. .rpcbind = rpcb_getport_async,
  2721. .set_port = xs_set_port,
  2722. .connect = xs_connect,
  2723. .get_srcaddr = xs_sock_srcaddr,
  2724. .get_srcport = xs_sock_srcport,
  2725. .buf_alloc = rpc_malloc,
  2726. .buf_free = rpc_free,
  2727. .prepare_request = xs_stream_prepare_request,
  2728. .send_request = xs_tcp_send_request,
  2729. .abort_send_request = xs_stream_abort_send_request,
  2730. .wait_for_reply_request = xprt_wait_for_reply_request_def,
  2731. .close = xs_tcp_shutdown,
  2732. .destroy = xs_destroy,
  2733. .set_connect_timeout = xs_tcp_set_connect_timeout,
  2734. .print_stats = xs_tcp_print_stats,
  2735. .enable_swap = xs_enable_swap,
  2736. .disable_swap = xs_disable_swap,
  2737. .inject_disconnect = xs_inject_disconnect,
  2738. #ifdef CONFIG_SUNRPC_BACKCHANNEL
  2739. .bc_setup = xprt_setup_bc,
  2740. .bc_maxpayload = xs_tcp_bc_maxpayload,
  2741. .bc_num_slots = xprt_bc_max_slots,
  2742. .bc_free_rqst = xprt_free_bc_rqst,
  2743. .bc_destroy = xprt_destroy_bc,
  2744. #endif
  2745. };
  2746. /*
  2747. * The rpc_xprt_ops for the server backchannel
  2748. */
  2749. static const struct rpc_xprt_ops bc_tcp_ops = {
  2750. .reserve_xprt = xprt_reserve_xprt,
  2751. .release_xprt = xprt_release_xprt,
  2752. .alloc_slot = xprt_alloc_slot,
  2753. .free_slot = xprt_free_slot,
  2754. .buf_alloc = bc_malloc,
  2755. .buf_free = bc_free,
  2756. .send_request = bc_send_request,
  2757. .wait_for_reply_request = xprt_wait_for_reply_request_def,
  2758. .close = bc_close,
  2759. .destroy = bc_destroy,
  2760. .print_stats = xs_tcp_print_stats,
  2761. .enable_swap = xs_enable_swap,
  2762. .disable_swap = xs_disable_swap,
  2763. .inject_disconnect = xs_inject_disconnect,
  2764. };
  2765. static int xs_init_anyaddr(const int family, struct sockaddr *sap)
  2766. {
  2767. static const struct sockaddr_in sin = {
  2768. .sin_family = AF_INET,
  2769. .sin_addr.s_addr = htonl(INADDR_ANY),
  2770. };
  2771. static const struct sockaddr_in6 sin6 = {
  2772. .sin6_family = AF_INET6,
  2773. .sin6_addr = IN6ADDR_ANY_INIT,
  2774. };
  2775. switch (family) {
  2776. case AF_LOCAL:
  2777. break;
  2778. case AF_INET:
  2779. memcpy(sap, &sin, sizeof(sin));
  2780. break;
  2781. case AF_INET6:
  2782. memcpy(sap, &sin6, sizeof(sin6));
  2783. break;
  2784. default:
  2785. dprintk("RPC: %s: Bad address family\n", __func__);
  2786. return -EAFNOSUPPORT;
  2787. }
  2788. return 0;
  2789. }
  2790. static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
  2791. unsigned int slot_table_size,
  2792. unsigned int max_slot_table_size)
  2793. {
  2794. struct rpc_xprt *xprt;
  2795. struct sock_xprt *new;
  2796. if (args->addrlen > sizeof(xprt->addr)) {
  2797. dprintk("RPC: xs_setup_xprt: address too large\n");
  2798. return ERR_PTR(-EBADF);
  2799. }
  2800. xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size,
  2801. max_slot_table_size);
  2802. if (xprt == NULL) {
  2803. dprintk("RPC: xs_setup_xprt: couldn't allocate "
  2804. "rpc_xprt\n");
  2805. return ERR_PTR(-ENOMEM);
  2806. }
  2807. new = container_of(xprt, struct sock_xprt, xprt);
  2808. mutex_init(&new->recv_mutex);
  2809. memcpy(&xprt->addr, args->dstaddr, args->addrlen);
  2810. xprt->addrlen = args->addrlen;
  2811. if (args->srcaddr)
  2812. memcpy(&new->srcaddr, args->srcaddr, args->addrlen);
  2813. else {
  2814. int err;
  2815. err = xs_init_anyaddr(args->dstaddr->sa_family,
  2816. (struct sockaddr *)&new->srcaddr);
  2817. if (err != 0) {
  2818. xprt_free(xprt);
  2819. return ERR_PTR(err);
  2820. }
  2821. }
  2822. return xprt;
  2823. }
  2824. static const struct rpc_timeout xs_local_default_timeout = {
  2825. .to_initval = 10 * HZ,
  2826. .to_maxval = 10 * HZ,
  2827. .to_retries = 2,
  2828. };
  2829. /**
  2830. * xs_setup_local - Set up transport to use an AF_LOCAL socket
  2831. * @args: rpc transport creation arguments
  2832. *
  2833. * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP
  2834. */
  2835. static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
  2836. {
  2837. struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr;
  2838. struct sock_xprt *transport;
  2839. struct rpc_xprt *xprt;
  2840. struct rpc_xprt *ret;
  2841. xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
  2842. xprt_max_tcp_slot_table_entries);
  2843. if (IS_ERR(xprt))
  2844. return xprt;
  2845. transport = container_of(xprt, struct sock_xprt, xprt);
  2846. xprt->prot = 0;
  2847. xprt->xprt_class = &xs_local_transport;
  2848. xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
  2849. xprt->bind_timeout = XS_BIND_TO;
  2850. xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
  2851. xprt->idle_timeout = XS_IDLE_DISC_TO;
  2852. xprt->ops = &xs_local_ops;
  2853. xprt->timeout = &xs_local_default_timeout;
  2854. INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
  2855. INIT_WORK(&transport->error_worker, xs_error_handle);
  2856. INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket);
  2857. switch (sun->sun_family) {
  2858. case AF_LOCAL:
  2859. if (sun->sun_path[0] != '/' && sun->sun_path[0] != '\0') {
  2860. dprintk("RPC: bad AF_LOCAL address: %s\n",
  2861. sun->sun_path);
  2862. ret = ERR_PTR(-EINVAL);
  2863. goto out_err;
  2864. }
  2865. xprt_set_bound(xprt);
  2866. xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
  2867. break;
  2868. default:
  2869. ret = ERR_PTR(-EAFNOSUPPORT);
  2870. goto out_err;
  2871. }
  2872. dprintk("RPC: set up xprt to %s via AF_LOCAL\n",
  2873. xprt->address_strings[RPC_DISPLAY_ADDR]);
  2874. if (try_module_get(THIS_MODULE))
  2875. return xprt;
  2876. ret = ERR_PTR(-EINVAL);
  2877. out_err:
  2878. xs_xprt_free(xprt);
  2879. return ret;
  2880. }
  2881. static const struct rpc_timeout xs_udp_default_timeout = {
  2882. .to_initval = 5 * HZ,
  2883. .to_maxval = 30 * HZ,
  2884. .to_increment = 5 * HZ,
  2885. .to_retries = 5,
  2886. };
  2887. /**
  2888. * xs_setup_udp - Set up transport to use a UDP socket
  2889. * @args: rpc transport creation arguments
  2890. *
  2891. */
  2892. static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
  2893. {
  2894. struct sockaddr *addr = args->dstaddr;
  2895. struct rpc_xprt *xprt;
  2896. struct sock_xprt *transport;
  2897. struct rpc_xprt *ret;
  2898. xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries,
  2899. xprt_udp_slot_table_entries);
  2900. if (IS_ERR(xprt))
  2901. return xprt;
  2902. transport = container_of(xprt, struct sock_xprt, xprt);
  2903. xprt->prot = IPPROTO_UDP;
  2904. xprt->xprt_class = &xs_udp_transport;
  2905. /* XXX: header size can vary due to auth type, IPv6, etc. */
  2906. xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
  2907. xprt->bind_timeout = XS_BIND_TO;
  2908. xprt->reestablish_timeout = XS_UDP_REEST_TO;
  2909. xprt->idle_timeout = XS_IDLE_DISC_TO;
  2910. xprt->ops = &xs_udp_ops;
  2911. xprt->timeout = &xs_udp_default_timeout;
  2912. INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn);
  2913. INIT_WORK(&transport->error_worker, xs_error_handle);
  2914. INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket);
  2915. switch (addr->sa_family) {
  2916. case AF_INET:
  2917. if (((struct sockaddr_in *)addr)->sin_port != htons(0))
  2918. xprt_set_bound(xprt);
  2919. xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
  2920. break;
  2921. case AF_INET6:
  2922. if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
  2923. xprt_set_bound(xprt);
  2924. xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
  2925. break;
  2926. default:
  2927. ret = ERR_PTR(-EAFNOSUPPORT);
  2928. goto out_err;
  2929. }
  2930. if (xprt_bound(xprt))
  2931. dprintk("RPC: set up xprt to %s (port %s) via %s\n",
  2932. xprt->address_strings[RPC_DISPLAY_ADDR],
  2933. xprt->address_strings[RPC_DISPLAY_PORT],
  2934. xprt->address_strings[RPC_DISPLAY_PROTO]);
  2935. else
  2936. dprintk("RPC: set up xprt to %s (autobind) via %s\n",
  2937. xprt->address_strings[RPC_DISPLAY_ADDR],
  2938. xprt->address_strings[RPC_DISPLAY_PROTO]);
  2939. if (try_module_get(THIS_MODULE))
  2940. return xprt;
  2941. ret = ERR_PTR(-EINVAL);
  2942. out_err:
  2943. xs_xprt_free(xprt);
  2944. return ret;
  2945. }
  2946. static const struct rpc_timeout xs_tcp_default_timeout = {
  2947. .to_initval = 60 * HZ,
  2948. .to_maxval = 60 * HZ,
  2949. .to_retries = 2,
  2950. };
  2951. /**
  2952. * xs_setup_tcp - Set up transport to use a TCP socket
  2953. * @args: rpc transport creation arguments
  2954. *
  2955. */
  2956. static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
  2957. {
  2958. struct sockaddr *addr = args->dstaddr;
  2959. struct rpc_xprt *xprt;
  2960. struct sock_xprt *transport;
  2961. struct rpc_xprt *ret;
  2962. unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries;
  2963. if (args->flags & XPRT_CREATE_INFINITE_SLOTS)
  2964. max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT;
  2965. xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
  2966. max_slot_table_size);
  2967. if (IS_ERR(xprt))
  2968. return xprt;
  2969. transport = container_of(xprt, struct sock_xprt, xprt);
  2970. xprt->prot = IPPROTO_TCP;
  2971. xprt->xprt_class = &xs_tcp_transport;
  2972. xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
  2973. xprt->bind_timeout = XS_BIND_TO;
  2974. xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
  2975. xprt->idle_timeout = XS_IDLE_DISC_TO;
  2976. xprt->ops = &xs_tcp_ops;
  2977. xprt->timeout = &xs_tcp_default_timeout;
  2978. xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
  2979. if (args->reconnect_timeout)
  2980. xprt->max_reconnect_timeout = args->reconnect_timeout;
  2981. xprt->connect_timeout = xprt->timeout->to_initval *
  2982. (xprt->timeout->to_retries + 1);
  2983. if (args->connect_timeout)
  2984. xs_tcp_do_set_connect_timeout(xprt, args->connect_timeout);
  2985. INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
  2986. INIT_WORK(&transport->error_worker, xs_error_handle);
  2987. INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
  2988. switch (addr->sa_family) {
  2989. case AF_INET:
  2990. if (((struct sockaddr_in *)addr)->sin_port != htons(0))
  2991. xprt_set_bound(xprt);
  2992. xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
  2993. break;
  2994. case AF_INET6:
  2995. if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
  2996. xprt_set_bound(xprt);
  2997. xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
  2998. break;
  2999. default:
  3000. ret = ERR_PTR(-EAFNOSUPPORT);
  3001. goto out_err;
  3002. }
  3003. if (xprt_bound(xprt))
  3004. dprintk("RPC: set up xprt to %s (port %s) via %s\n",
  3005. xprt->address_strings[RPC_DISPLAY_ADDR],
  3006. xprt->address_strings[RPC_DISPLAY_PORT],
  3007. xprt->address_strings[RPC_DISPLAY_PROTO]);
  3008. else
  3009. dprintk("RPC: set up xprt to %s (autobind) via %s\n",
  3010. xprt->address_strings[RPC_DISPLAY_ADDR],
  3011. xprt->address_strings[RPC_DISPLAY_PROTO]);
  3012. if (try_module_get(THIS_MODULE))
  3013. return xprt;
  3014. ret = ERR_PTR(-EINVAL);
  3015. out_err:
  3016. xs_xprt_free(xprt);
  3017. return ret;
  3018. }
  3019. /**
  3020. * xs_setup_tcp_tls - Set up transport to use a TCP with TLS
  3021. * @args: rpc transport creation arguments
  3022. *
  3023. */
  3024. static struct rpc_xprt *xs_setup_tcp_tls(struct xprt_create *args)
  3025. {
  3026. struct sockaddr *addr = args->dstaddr;
  3027. struct rpc_xprt *xprt;
  3028. struct sock_xprt *transport;
  3029. struct rpc_xprt *ret;
  3030. unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries;
  3031. if (args->flags & XPRT_CREATE_INFINITE_SLOTS)
  3032. max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT;
  3033. xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
  3034. max_slot_table_size);
  3035. if (IS_ERR(xprt))
  3036. return xprt;
  3037. transport = container_of(xprt, struct sock_xprt, xprt);
  3038. xprt->prot = IPPROTO_TCP;
  3039. xprt->xprt_class = &xs_tcp_transport;
  3040. xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
  3041. xprt->bind_timeout = XS_BIND_TO;
  3042. xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
  3043. xprt->idle_timeout = XS_IDLE_DISC_TO;
  3044. xprt->ops = &xs_tcp_ops;
  3045. xprt->timeout = &xs_tcp_default_timeout;
  3046. xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
  3047. xprt->connect_timeout = xprt->timeout->to_initval *
  3048. (xprt->timeout->to_retries + 1);
  3049. INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
  3050. INIT_WORK(&transport->error_worker, xs_error_handle);
  3051. switch (args->xprtsec.policy) {
  3052. case RPC_XPRTSEC_TLS_ANON:
  3053. case RPC_XPRTSEC_TLS_X509:
  3054. xprt->xprtsec = args->xprtsec;
  3055. INIT_DELAYED_WORK(&transport->connect_worker,
  3056. xs_tcp_tls_setup_socket);
  3057. break;
  3058. default:
  3059. ret = ERR_PTR(-EACCES);
  3060. goto out_err;
  3061. }
  3062. switch (addr->sa_family) {
  3063. case AF_INET:
  3064. if (((struct sockaddr_in *)addr)->sin_port != htons(0))
  3065. xprt_set_bound(xprt);
  3066. xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
  3067. break;
  3068. case AF_INET6:
  3069. if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
  3070. xprt_set_bound(xprt);
  3071. xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
  3072. break;
  3073. default:
  3074. ret = ERR_PTR(-EAFNOSUPPORT);
  3075. goto out_err;
  3076. }
  3077. if (xprt_bound(xprt))
  3078. dprintk("RPC: set up xprt to %s (port %s) via %s\n",
  3079. xprt->address_strings[RPC_DISPLAY_ADDR],
  3080. xprt->address_strings[RPC_DISPLAY_PORT],
  3081. xprt->address_strings[RPC_DISPLAY_PROTO]);
  3082. else
  3083. dprintk("RPC: set up xprt to %s (autobind) via %s\n",
  3084. xprt->address_strings[RPC_DISPLAY_ADDR],
  3085. xprt->address_strings[RPC_DISPLAY_PROTO]);
  3086. if (try_module_get(THIS_MODULE))
  3087. return xprt;
  3088. ret = ERR_PTR(-EINVAL);
  3089. out_err:
  3090. xs_xprt_free(xprt);
  3091. return ret;
  3092. }
  3093. /**
  3094. * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket
  3095. * @args: rpc transport creation arguments
  3096. *
  3097. */
  3098. static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
  3099. {
  3100. struct sockaddr *addr = args->dstaddr;
  3101. struct rpc_xprt *xprt;
  3102. struct sock_xprt *transport;
  3103. struct svc_sock *bc_sock;
  3104. struct rpc_xprt *ret;
  3105. xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
  3106. xprt_tcp_slot_table_entries);
  3107. if (IS_ERR(xprt))
  3108. return xprt;
  3109. transport = container_of(xprt, struct sock_xprt, xprt);
  3110. xprt->prot = IPPROTO_TCP;
  3111. xprt->xprt_class = &xs_bc_tcp_transport;
  3112. xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
  3113. xprt->timeout = &xs_tcp_default_timeout;
  3114. /* backchannel */
  3115. xprt_set_bound(xprt);
  3116. xprt->bind_timeout = 0;
  3117. xprt->reestablish_timeout = 0;
  3118. xprt->idle_timeout = 0;
  3119. xprt->ops = &bc_tcp_ops;
  3120. switch (addr->sa_family) {
  3121. case AF_INET:
  3122. xs_format_peer_addresses(xprt, "tcp",
  3123. RPCBIND_NETID_TCP);
  3124. break;
  3125. case AF_INET6:
  3126. xs_format_peer_addresses(xprt, "tcp",
  3127. RPCBIND_NETID_TCP6);
  3128. break;
  3129. default:
  3130. ret = ERR_PTR(-EAFNOSUPPORT);
  3131. goto out_err;
  3132. }
  3133. dprintk("RPC: set up xprt to %s (port %s) via %s\n",
  3134. xprt->address_strings[RPC_DISPLAY_ADDR],
  3135. xprt->address_strings[RPC_DISPLAY_PORT],
  3136. xprt->address_strings[RPC_DISPLAY_PROTO]);
  3137. /*
  3138. * Once we've associated a backchannel xprt with a connection,
  3139. * we want to keep it around as long as the connection lasts,
  3140. * in case we need to start using it for a backchannel again;
  3141. * this reference won't be dropped until bc_xprt is destroyed.
  3142. */
  3143. xprt_get(xprt);
  3144. args->bc_xprt->xpt_bc_xprt = xprt;
  3145. xprt->bc_xprt = args->bc_xprt;
  3146. bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
  3147. transport->sock = bc_sock->sk_sock;
  3148. transport->inet = bc_sock->sk_sk;
  3149. /*
  3150. * Since we don't want connections for the backchannel, we set
  3151. * the xprt status to connected
  3152. */
  3153. xprt_set_connected(xprt);
  3154. if (try_module_get(THIS_MODULE))
  3155. return xprt;
  3156. args->bc_xprt->xpt_bc_xprt = NULL;
  3157. args->bc_xprt->xpt_bc_xps = NULL;
  3158. xprt_put(xprt);
  3159. ret = ERR_PTR(-EINVAL);
  3160. out_err:
  3161. xs_xprt_free(xprt);
  3162. return ret;
  3163. }
  3164. static struct xprt_class xs_local_transport = {
  3165. .list = LIST_HEAD_INIT(xs_local_transport.list),
  3166. .name = "named UNIX socket",
  3167. .owner = THIS_MODULE,
  3168. .ident = XPRT_TRANSPORT_LOCAL,
  3169. .setup = xs_setup_local,
  3170. .netid = { "" },
  3171. };
  3172. static struct xprt_class xs_udp_transport = {
  3173. .list = LIST_HEAD_INIT(xs_udp_transport.list),
  3174. .name = "udp",
  3175. .owner = THIS_MODULE,
  3176. .ident = XPRT_TRANSPORT_UDP,
  3177. .setup = xs_setup_udp,
  3178. .netid = { "udp", "udp6", "" },
  3179. };
  3180. static struct xprt_class xs_tcp_transport = {
  3181. .list = LIST_HEAD_INIT(xs_tcp_transport.list),
  3182. .name = "tcp",
  3183. .owner = THIS_MODULE,
  3184. .ident = XPRT_TRANSPORT_TCP,
  3185. .setup = xs_setup_tcp,
  3186. .netid = { "tcp", "tcp6", "" },
  3187. };
  3188. static struct xprt_class xs_tcp_tls_transport = {
  3189. .list = LIST_HEAD_INIT(xs_tcp_tls_transport.list),
  3190. .name = "tcp-with-tls",
  3191. .owner = THIS_MODULE,
  3192. .ident = XPRT_TRANSPORT_TCP_TLS,
  3193. .setup = xs_setup_tcp_tls,
  3194. .netid = { "tcp", "tcp6", "" },
  3195. };
  3196. static struct xprt_class xs_bc_tcp_transport = {
  3197. .list = LIST_HEAD_INIT(xs_bc_tcp_transport.list),
  3198. .name = "tcp NFSv4.1 backchannel",
  3199. .owner = THIS_MODULE,
  3200. .ident = XPRT_TRANSPORT_BC_TCP,
  3201. .setup = xs_setup_bc_tcp,
  3202. .netid = { "" },
  3203. };
  3204. /**
  3205. * init_socket_xprt - set up xprtsock's sysctls, register with RPC client
  3206. *
  3207. */
  3208. int init_socket_xprt(void)
  3209. {
  3210. if (!sunrpc_table_header)
  3211. sunrpc_table_header = register_sysctl("sunrpc", xs_tunables_table);
  3212. xprt_register_transport(&xs_local_transport);
  3213. xprt_register_transport(&xs_udp_transport);
  3214. xprt_register_transport(&xs_tcp_transport);
  3215. xprt_register_transport(&xs_tcp_tls_transport);
  3216. xprt_register_transport(&xs_bc_tcp_transport);
  3217. return 0;
  3218. }
  3219. /**
  3220. * cleanup_socket_xprt - remove xprtsock's sysctls, unregister
  3221. *
  3222. */
  3223. void cleanup_socket_xprt(void)
  3224. {
  3225. if (sunrpc_table_header) {
  3226. unregister_sysctl_table(sunrpc_table_header);
  3227. sunrpc_table_header = NULL;
  3228. }
  3229. xprt_unregister_transport(&xs_local_transport);
  3230. xprt_unregister_transport(&xs_udp_transport);
  3231. xprt_unregister_transport(&xs_tcp_transport);
  3232. xprt_unregister_transport(&xs_tcp_tls_transport);
  3233. xprt_unregister_transport(&xs_bc_tcp_transport);
  3234. }
  3235. static int param_set_portnr(const char *val, const struct kernel_param *kp)
  3236. {
  3237. return param_set_uint_minmax(val, kp,
  3238. RPC_MIN_RESVPORT,
  3239. RPC_MAX_RESVPORT);
  3240. }
  3241. static const struct kernel_param_ops param_ops_portnr = {
  3242. .set = param_set_portnr,
  3243. .get = param_get_uint,
  3244. };
  3245. #define param_check_portnr(name, p) \
  3246. __param_check(name, p, unsigned int);
  3247. module_param_named(min_resvport, xprt_min_resvport, portnr, 0644);
  3248. module_param_named(max_resvport, xprt_max_resvport, portnr, 0644);
  3249. static int param_set_slot_table_size(const char *val,
  3250. const struct kernel_param *kp)
  3251. {
  3252. return param_set_uint_minmax(val, kp,
  3253. RPC_MIN_SLOT_TABLE,
  3254. RPC_MAX_SLOT_TABLE);
  3255. }
  3256. static const struct kernel_param_ops param_ops_slot_table_size = {
  3257. .set = param_set_slot_table_size,
  3258. .get = param_get_uint,
  3259. };
  3260. #define param_check_slot_table_size(name, p) \
  3261. __param_check(name, p, unsigned int);
  3262. static int param_set_max_slot_table_size(const char *val,
  3263. const struct kernel_param *kp)
  3264. {
  3265. return param_set_uint_minmax(val, kp,
  3266. RPC_MIN_SLOT_TABLE,
  3267. RPC_MAX_SLOT_TABLE_LIMIT);
  3268. }
  3269. static const struct kernel_param_ops param_ops_max_slot_table_size = {
  3270. .set = param_set_max_slot_table_size,
  3271. .get = param_get_uint,
  3272. };
  3273. #define param_check_max_slot_table_size(name, p) \
  3274. __param_check(name, p, unsigned int);
  3275. module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries,
  3276. slot_table_size, 0644);
  3277. module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries,
  3278. max_slot_table_size, 0644);
  3279. module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries,
  3280. slot_table_size, 0644);