af_smc.c 91 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Shared Memory Communications over RDMA (SMC-R) and RoCE
  4. *
  5. * AF_SMC protocol family socket handler keeping the AF_INET sock address type
  6. * applies to SOCK_STREAM sockets only
  7. * offers an alternative communication option for TCP-protocol sockets
  8. * applicable with RoCE-cards only
  9. *
  10. * Initial restrictions:
  11. * - support for alternate links postponed
  12. *
  13. * Copyright IBM Corp. 2016, 2018
  14. *
  15. * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
  16. * based on prototype from Frank Blaschka
  17. */
  18. #define pr_fmt(fmt) "smc: " fmt
  19. #include <linux/module.h>
  20. #include <linux/socket.h>
  21. #include <linux/workqueue.h>
  22. #include <linux/in.h>
  23. #include <linux/sched/signal.h>
  24. #include <linux/if_vlan.h>
  25. #include <linux/rcupdate_wait.h>
  26. #include <linux/ctype.h>
  27. #include <linux/splice.h>
  28. #include <net/sock.h>
  29. #include <net/inet_common.h>
  30. #if IS_ENABLED(CONFIG_IPV6)
  31. #include <net/ipv6.h>
  32. #endif
  33. #include <net/tcp.h>
  34. #include <net/smc.h>
  35. #include <asm/ioctls.h>
  36. #include <net/net_namespace.h>
  37. #include <net/netns/generic.h>
  38. #include "smc_netns.h"
  39. #include "smc.h"
  40. #include "smc_clc.h"
  41. #include "smc_llc.h"
  42. #include "smc_cdc.h"
  43. #include "smc_core.h"
  44. #include "smc_ib.h"
  45. #include "smc_ism.h"
  46. #include "smc_pnet.h"
  47. #include "smc_netlink.h"
  48. #include "smc_tx.h"
  49. #include "smc_rx.h"
  50. #include "smc_close.h"
  51. #include "smc_stats.h"
  52. #include "smc_tracepoint.h"
  53. #include "smc_sysctl.h"
  54. #include "smc_inet.h"
  55. #include "smc_hs_bpf.h"
  56. static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
  57. * creation on server
  58. */
  59. static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group
  60. * creation on client
  61. */
  62. static struct workqueue_struct *smc_tcp_ls_wq; /* wq for tcp listen work */
  63. struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
  64. struct workqueue_struct *smc_close_wq; /* wq for close work */
  65. static void smc_tcp_listen_work(struct work_struct *);
  66. static void smc_connect_work(struct work_struct *);
  67. int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb)
  68. {
  69. struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
  70. void *hdr;
  71. if (cb_ctx->pos[0])
  72. goto out;
  73. hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  74. &smc_gen_nl_family, NLM_F_MULTI,
  75. SMC_NETLINK_DUMP_HS_LIMITATION);
  76. if (!hdr)
  77. return -ENOMEM;
  78. if (nla_put_u8(skb, SMC_NLA_HS_LIMITATION_ENABLED,
  79. sock_net(skb->sk)->smc.limit_smc_hs))
  80. goto err;
  81. genlmsg_end(skb, hdr);
  82. cb_ctx->pos[0] = 1;
  83. out:
  84. return skb->len;
  85. err:
  86. genlmsg_cancel(skb, hdr);
  87. return -EMSGSIZE;
  88. }
  89. int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info)
  90. {
  91. sock_net(skb->sk)->smc.limit_smc_hs = true;
  92. return 0;
  93. }
  94. int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info)
  95. {
  96. sock_net(skb->sk)->smc.limit_smc_hs = false;
  97. return 0;
  98. }
  99. static void smc_set_keepalive(struct sock *sk, int val)
  100. {
  101. struct smc_sock *smc = smc_sk(sk);
  102. smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
  103. }
  104. static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk,
  105. struct sk_buff *skb,
  106. struct request_sock *req,
  107. struct dst_entry *dst,
  108. struct request_sock *req_unhash,
  109. bool *own_req,
  110. void (*opt_child_init)(struct sock *newsk,
  111. const struct sock *sk))
  112. {
  113. struct smc_sock *smc;
  114. struct sock *child;
  115. rcu_read_lock();
  116. smc = smc_clcsock_user_data_rcu(sk);
  117. if (!smc || !refcount_inc_not_zero(&smc->sk.sk_refcnt)) {
  118. rcu_read_unlock();
  119. smc = NULL;
  120. goto drop;
  121. }
  122. rcu_read_unlock();
  123. if (READ_ONCE(sk->sk_ack_backlog) + atomic_read(&smc->queued_smc_hs) >
  124. sk->sk_max_ack_backlog)
  125. goto drop;
  126. if (sk_acceptq_is_full(&smc->sk)) {
  127. NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
  128. goto drop;
  129. }
  130. /* passthrough to original syn recv sock fct */
  131. child = smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash,
  132. own_req, opt_child_init);
  133. /* child must not inherit smc or its ops */
  134. if (child) {
  135. rcu_assign_sk_user_data(child, NULL);
  136. /* v4-mapped sockets don't inherit parent ops. Don't restore. */
  137. if (inet_csk(child)->icsk_af_ops == inet_csk(sk)->icsk_af_ops)
  138. inet_csk(child)->icsk_af_ops = smc->ori_af_ops;
  139. }
  140. sock_put(&smc->sk);
  141. return child;
  142. drop:
  143. dst_release(dst);
  144. tcp_listendrop(sk);
  145. if (smc)
  146. sock_put(&smc->sk);
  147. return NULL;
  148. }
  149. static bool smc_hs_congested(const struct sock *sk)
  150. {
  151. const struct smc_sock *smc;
  152. smc = smc_clcsock_user_data(sk);
  153. if (!smc)
  154. return true;
  155. if (workqueue_congested(WORK_CPU_UNBOUND, smc_hs_wq))
  156. return true;
  157. return false;
  158. }
  159. struct smc_hashinfo smc_v4_hashinfo = {
  160. .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
  161. };
  162. struct smc_hashinfo smc_v6_hashinfo = {
  163. .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
  164. };
  165. int smc_hash_sk(struct sock *sk)
  166. {
  167. struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
  168. struct hlist_head *head;
  169. head = &h->ht;
  170. write_lock_bh(&h->lock);
  171. sk_add_node(sk, head);
  172. write_unlock_bh(&h->lock);
  173. sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
  174. return 0;
  175. }
  176. void smc_unhash_sk(struct sock *sk)
  177. {
  178. struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
  179. write_lock_bh(&h->lock);
  180. if (sk_del_node_init(sk))
  181. sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
  182. write_unlock_bh(&h->lock);
  183. }
  184. /* This will be called before user really release sock_lock. So do the
  185. * work which we didn't do because of user hold the sock_lock in the
  186. * BH context
  187. */
  188. void smc_release_cb(struct sock *sk)
  189. {
  190. struct smc_sock *smc = smc_sk(sk);
  191. if (smc->conn.tx_in_release_sock) {
  192. smc_tx_pending(&smc->conn);
  193. smc->conn.tx_in_release_sock = false;
  194. }
  195. }
  196. struct proto smc_proto = {
  197. .name = "SMC",
  198. .owner = THIS_MODULE,
  199. .keepalive = smc_set_keepalive,
  200. .hash = smc_hash_sk,
  201. .unhash = smc_unhash_sk,
  202. .release_cb = smc_release_cb,
  203. .obj_size = sizeof(struct smc_sock),
  204. .h.smc_hash = &smc_v4_hashinfo,
  205. .slab_flags = SLAB_TYPESAFE_BY_RCU,
  206. };
  207. EXPORT_SYMBOL_GPL(smc_proto);
  208. struct proto smc_proto6 = {
  209. .name = "SMC6",
  210. .owner = THIS_MODULE,
  211. .keepalive = smc_set_keepalive,
  212. .hash = smc_hash_sk,
  213. .unhash = smc_unhash_sk,
  214. .release_cb = smc_release_cb,
  215. .obj_size = sizeof(struct smc_sock),
  216. .h.smc_hash = &smc_v6_hashinfo,
  217. .slab_flags = SLAB_TYPESAFE_BY_RCU,
  218. };
  219. EXPORT_SYMBOL_GPL(smc_proto6);
  220. static void smc_fback_restore_callbacks(struct smc_sock *smc)
  221. {
  222. struct sock *clcsk = smc->clcsock->sk;
  223. write_lock_bh(&clcsk->sk_callback_lock);
  224. rcu_assign_sk_user_data(clcsk, NULL);
  225. smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change);
  226. smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready);
  227. smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space);
  228. smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report);
  229. write_unlock_bh(&clcsk->sk_callback_lock);
  230. }
  231. static void smc_restore_fallback_changes(struct smc_sock *smc)
  232. {
  233. if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
  234. smc->clcsock->file->private_data = smc->sk.sk_socket;
  235. smc->clcsock->file = NULL;
  236. smc_fback_restore_callbacks(smc);
  237. }
  238. }
  239. static int __smc_release(struct smc_sock *smc)
  240. {
  241. struct sock *sk = &smc->sk;
  242. int rc = 0;
  243. if (!smc->use_fallback) {
  244. rc = smc_close_active(smc);
  245. smc_sock_set_flag(sk, SOCK_DEAD);
  246. sk->sk_shutdown |= SHUTDOWN_MASK;
  247. } else {
  248. if (sk->sk_state != SMC_CLOSED) {
  249. if (sk->sk_state != SMC_LISTEN &&
  250. sk->sk_state != SMC_INIT)
  251. sock_put(sk); /* passive closing */
  252. if (sk->sk_state == SMC_LISTEN) {
  253. /* wake up clcsock accept */
  254. rc = kernel_sock_shutdown(smc->clcsock,
  255. SHUT_RDWR);
  256. }
  257. sk->sk_state = SMC_CLOSED;
  258. sk->sk_state_change(sk);
  259. }
  260. smc_restore_fallback_changes(smc);
  261. }
  262. sk->sk_prot->unhash(sk);
  263. if (sk->sk_state == SMC_CLOSED) {
  264. if (smc->clcsock) {
  265. release_sock(sk);
  266. smc_clcsock_release(smc);
  267. lock_sock(sk);
  268. }
  269. if (!smc->use_fallback)
  270. smc_conn_free(&smc->conn);
  271. }
  272. return rc;
  273. }
  274. int smc_release(struct socket *sock)
  275. {
  276. struct sock *sk = sock->sk;
  277. struct smc_sock *smc;
  278. int old_state, rc = 0;
  279. if (!sk)
  280. goto out;
  281. sock_hold(sk); /* sock_put below */
  282. smc = smc_sk(sk);
  283. old_state = sk->sk_state;
  284. /* cleanup for a dangling non-blocking connect */
  285. if (smc->connect_nonblock && old_state == SMC_INIT)
  286. tcp_abort(smc->clcsock->sk, ECONNABORTED);
  287. if (cancel_work_sync(&smc->connect_work))
  288. sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */
  289. if (sk->sk_state == SMC_LISTEN)
  290. /* smc_close_non_accepted() is called and acquires
  291. * sock lock for child sockets again
  292. */
  293. lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
  294. else
  295. lock_sock(sk);
  296. if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE &&
  297. !smc->use_fallback)
  298. smc_close_active_abort(smc);
  299. rc = __smc_release(smc);
  300. /* detach socket */
  301. sock_orphan(sk);
  302. sock->sk = NULL;
  303. release_sock(sk);
  304. sock_put(sk); /* sock_hold above */
  305. sock_put(sk); /* final sock_put */
  306. out:
  307. return rc;
  308. }
  309. static void smc_destruct(struct sock *sk)
  310. {
  311. if (sk->sk_state != SMC_CLOSED)
  312. return;
  313. if (!sock_flag(sk, SOCK_DEAD))
  314. return;
  315. switch (sk->sk_family) {
  316. case AF_INET:
  317. inet_sock_destruct(sk);
  318. break;
  319. #if IS_ENABLED(CONFIG_IPV6)
  320. case AF_INET6:
  321. inet6_sock_destruct(sk);
  322. break;
  323. #endif
  324. }
  325. }
  326. static struct lock_class_key smc_key;
  327. static struct lock_class_key smc_slock_key;
  328. void smc_sk_init(struct net *net, struct sock *sk, int protocol)
  329. {
  330. struct smc_sock *smc = smc_sk(sk);
  331. sk->sk_state = SMC_INIT;
  332. sk->sk_destruct = smc_destruct;
  333. sk->sk_protocol = protocol;
  334. WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
  335. WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
  336. INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
  337. INIT_WORK(&smc->connect_work, smc_connect_work);
  338. INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
  339. INIT_LIST_HEAD(&smc->accept_q);
  340. sock_lock_init_class_and_name(sk, "slock-AF_SMC", &smc_slock_key,
  341. "sk_lock-AF_SMC", &smc_key);
  342. spin_lock_init(&smc->accept_q_lock);
  343. spin_lock_init(&smc->conn.send_lock);
  344. sk->sk_prot->hash(sk);
  345. mutex_init(&smc->clcsock_release_lock);
  346. smc_init_saved_callbacks(smc);
  347. smc->limit_smc_hs = net->smc.limit_smc_hs;
  348. smc->use_fallback = false; /* assume rdma capability first */
  349. smc->fallback_rsn = 0;
  350. smc_close_init(smc);
  351. }
  352. static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
  353. int protocol)
  354. {
  355. struct proto *prot;
  356. struct sock *sk;
  357. prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
  358. sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
  359. if (!sk)
  360. return NULL;
  361. sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
  362. smc_sk_init(net, sk, protocol);
  363. return sk;
  364. }
  365. int smc_bind(struct socket *sock, struct sockaddr_unsized *uaddr,
  366. int addr_len)
  367. {
  368. struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
  369. struct sock *sk = sock->sk;
  370. struct smc_sock *smc;
  371. int rc;
  372. smc = smc_sk(sk);
  373. /* replicate tests from inet_bind(), to be safe wrt. future changes */
  374. rc = -EINVAL;
  375. if (addr_len < sizeof(struct sockaddr_in))
  376. goto out;
  377. rc = -EAFNOSUPPORT;
  378. if (addr->sin_family != AF_INET &&
  379. addr->sin_family != AF_INET6 &&
  380. addr->sin_family != AF_UNSPEC)
  381. goto out;
  382. /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
  383. if (addr->sin_family == AF_UNSPEC &&
  384. addr->sin_addr.s_addr != htonl(INADDR_ANY))
  385. goto out;
  386. lock_sock(sk);
  387. /* Check if socket is already active */
  388. rc = -EINVAL;
  389. if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
  390. goto out_rel;
  391. smc->clcsock->sk->sk_reuse = sk->sk_reuse;
  392. smc->clcsock->sk->sk_reuseport = sk->sk_reuseport;
  393. rc = kernel_bind(smc->clcsock, uaddr, addr_len);
  394. out_rel:
  395. release_sock(sk);
  396. out:
  397. return rc;
  398. }
  399. /* copy only relevant settings and flags of SOL_SOCKET level from smc to
  400. * clc socket (since smc is not called for these options from net/core)
  401. */
  402. #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
  403. (1UL << SOCK_KEEPOPEN) | \
  404. (1UL << SOCK_LINGER) | \
  405. (1UL << SOCK_BROADCAST) | \
  406. (1UL << SOCK_TIMESTAMP) | \
  407. (1UL << SOCK_DBG) | \
  408. (1UL << SOCK_RCVTSTAMP) | \
  409. (1UL << SOCK_RCVTSTAMPNS) | \
  410. (1UL << SOCK_LOCALROUTE) | \
  411. (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
  412. (1UL << SOCK_RXQ_OVFL) | \
  413. (1UL << SOCK_WIFI_STATUS) | \
  414. (1UL << SOCK_NOFCS) | \
  415. (1UL << SOCK_FILTER_LOCKED) | \
  416. (1UL << SOCK_TSTAMP_NEW))
  417. /* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
  418. static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
  419. unsigned long mask)
  420. {
  421. nsk->sk_userlocks = osk->sk_userlocks;
  422. if (osk->sk_userlocks & SOCK_SNDBUF_LOCK)
  423. nsk->sk_sndbuf = osk->sk_sndbuf;
  424. if (osk->sk_userlocks & SOCK_RCVBUF_LOCK)
  425. nsk->sk_rcvbuf = osk->sk_rcvbuf;
  426. }
  427. static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
  428. unsigned long mask)
  429. {
  430. /* options we don't get control via setsockopt for */
  431. nsk->sk_type = osk->sk_type;
  432. nsk->sk_sndtimeo = READ_ONCE(osk->sk_sndtimeo);
  433. nsk->sk_rcvtimeo = READ_ONCE(osk->sk_rcvtimeo);
  434. nsk->sk_mark = READ_ONCE(osk->sk_mark);
  435. nsk->sk_priority = READ_ONCE(osk->sk_priority);
  436. nsk->sk_rcvlowat = osk->sk_rcvlowat;
  437. nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
  438. nsk->sk_err = osk->sk_err;
  439. nsk->sk_flags &= ~mask;
  440. nsk->sk_flags |= osk->sk_flags & mask;
  441. smc_adjust_sock_bufsizes(nsk, osk, mask);
  442. }
  443. static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
  444. {
  445. smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
  446. }
  447. #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
  448. (1UL << SOCK_KEEPOPEN) | \
  449. (1UL << SOCK_LINGER) | \
  450. (1UL << SOCK_DBG))
  451. /* copy only settings and flags relevant for smc from clc to smc socket */
  452. static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
  453. {
  454. smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
  455. }
  456. /* register the new vzalloced sndbuf on all links */
  457. static int smcr_lgr_reg_sndbufs(struct smc_link *link,
  458. struct smc_buf_desc *snd_desc)
  459. {
  460. struct smc_link_group *lgr = link->lgr;
  461. int i, rc = 0;
  462. if (!snd_desc->is_vm)
  463. return -EINVAL;
  464. /* protect against parallel smcr_link_reg_buf() */
  465. down_write(&lgr->llc_conf_mutex);
  466. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  467. if (!smc_link_active(&lgr->lnk[i]))
  468. continue;
  469. rc = smcr_link_reg_buf(&lgr->lnk[i], snd_desc);
  470. if (rc)
  471. break;
  472. }
  473. up_write(&lgr->llc_conf_mutex);
  474. return rc;
  475. }
  476. /* register the new rmb on all links */
  477. static int smcr_lgr_reg_rmbs(struct smc_link *link,
  478. struct smc_buf_desc *rmb_desc)
  479. {
  480. struct smc_link_group *lgr = link->lgr;
  481. bool do_slow = false;
  482. int i, rc = 0;
  483. rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
  484. if (rc)
  485. return rc;
  486. down_read(&lgr->llc_conf_mutex);
  487. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  488. if (!smc_link_active(&lgr->lnk[i]))
  489. continue;
  490. if (!rmb_desc->is_reg_mr[link->link_idx]) {
  491. up_read(&lgr->llc_conf_mutex);
  492. goto slow_path;
  493. }
  494. }
  495. /* mr register already */
  496. goto fast_path;
  497. slow_path:
  498. do_slow = true;
  499. /* protect against parallel smc_llc_cli_rkey_exchange() and
  500. * parallel smcr_link_reg_buf()
  501. */
  502. down_write(&lgr->llc_conf_mutex);
  503. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  504. if (!smc_link_active(&lgr->lnk[i]))
  505. continue;
  506. rc = smcr_link_reg_buf(&lgr->lnk[i], rmb_desc);
  507. if (rc)
  508. goto out;
  509. }
  510. fast_path:
  511. /* exchange confirm_rkey msg with peer */
  512. rc = smc_llc_do_confirm_rkey(link, rmb_desc);
  513. if (rc) {
  514. rc = -EFAULT;
  515. goto out;
  516. }
  517. rmb_desc->is_conf_rkey = true;
  518. out:
  519. do_slow ? up_write(&lgr->llc_conf_mutex) : up_read(&lgr->llc_conf_mutex);
  520. smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
  521. return rc;
  522. }
  523. static int smcr_clnt_conf_first_link(struct smc_sock *smc)
  524. {
  525. struct smc_link *link = smc->conn.lnk;
  526. struct smc_llc_qentry *qentry;
  527. int rc;
  528. /* Receive CONFIRM LINK request from server over RoCE fabric.
  529. * Increasing the client's timeout by twice as much as the server's
  530. * timeout by default can temporarily avoid decline messages of
  531. * both sides crossing or colliding
  532. */
  533. qentry = smc_llc_wait(link->lgr, NULL, 2 * SMC_LLC_WAIT_TIME,
  534. SMC_LLC_CONFIRM_LINK);
  535. if (!qentry) {
  536. struct smc_clc_msg_decline dclc;
  537. rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
  538. SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
  539. return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
  540. }
  541. smc_llc_save_peer_uid(qentry);
  542. rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
  543. smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
  544. if (rc)
  545. return SMC_CLC_DECL_RMBE_EC;
  546. rc = smc_ib_modify_qp_rts(link);
  547. if (rc)
  548. return SMC_CLC_DECL_ERR_RDYLNK;
  549. smc_wr_remember_qp_attr(link);
  550. /* reg the sndbuf if it was vzalloced */
  551. if (smc->conn.sndbuf_desc->is_vm) {
  552. if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
  553. return SMC_CLC_DECL_ERR_REGBUF;
  554. }
  555. /* reg the rmb */
  556. if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
  557. return SMC_CLC_DECL_ERR_REGBUF;
  558. /* confirm_rkey is implicit on 1st contact */
  559. smc->conn.rmb_desc->is_conf_rkey = true;
  560. /* send CONFIRM LINK response over RoCE fabric */
  561. rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
  562. if (rc < 0)
  563. return SMC_CLC_DECL_TIMEOUT_CL;
  564. smc_llc_link_active(link);
  565. smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
  566. if (link->lgr->max_links > 1) {
  567. /* optional 2nd link, receive ADD LINK request from server */
  568. qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
  569. SMC_LLC_ADD_LINK);
  570. if (!qentry) {
  571. struct smc_clc_msg_decline dclc;
  572. rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
  573. SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
  574. if (rc == -EAGAIN)
  575. rc = 0; /* no DECLINE received, go with one link */
  576. return rc;
  577. }
  578. smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
  579. smc_llc_cli_add_link(link, qentry);
  580. }
  581. return 0;
  582. }
  583. static bool smc_isascii(char *hostname)
  584. {
  585. int i;
  586. for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
  587. if (!isascii(hostname[i]))
  588. return false;
  589. return true;
  590. }
  591. static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
  592. struct smc_clc_msg_accept_confirm *clc)
  593. {
  594. struct smc_clc_first_contact_ext *fce;
  595. int clc_v2_len;
  596. if (clc->hdr.version == SMC_V1 ||
  597. !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
  598. return;
  599. if (smc->conn.lgr->is_smcd) {
  600. memcpy(smc->conn.lgr->negotiated_eid, clc->d1.eid,
  601. SMC_MAX_EID_LEN);
  602. clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, d1);
  603. } else {
  604. memcpy(smc->conn.lgr->negotiated_eid, clc->r1.eid,
  605. SMC_MAX_EID_LEN);
  606. clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, r1);
  607. }
  608. fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc) + clc_v2_len);
  609. smc->conn.lgr->peer_os = fce->os_type;
  610. smc->conn.lgr->peer_smc_release = fce->release;
  611. if (smc_isascii(fce->hostname))
  612. memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
  613. SMC_MAX_HOSTNAME_LEN);
  614. }
  615. static void smcr_conn_save_peer_info(struct smc_sock *smc,
  616. struct smc_clc_msg_accept_confirm *clc)
  617. {
  618. int bufsize = smc_uncompress_bufsize(clc->r0.rmbe_size);
  619. smc->conn.peer_rmbe_idx = clc->r0.rmbe_idx;
  620. smc->conn.local_tx_ctrl.token = ntohl(clc->r0.rmbe_alert_token);
  621. smc->conn.peer_rmbe_size = bufsize;
  622. atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
  623. smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
  624. }
  625. static void smcd_conn_save_peer_info(struct smc_sock *smc,
  626. struct smc_clc_msg_accept_confirm *clc)
  627. {
  628. int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size);
  629. smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx;
  630. smc->conn.peer_token = ntohll(clc->d0.token);
  631. /* msg header takes up space in the buffer */
  632. smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
  633. atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
  634. smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
  635. }
  636. static void smc_conn_save_peer_info(struct smc_sock *smc,
  637. struct smc_clc_msg_accept_confirm *clc)
  638. {
  639. if (smc->conn.lgr->is_smcd)
  640. smcd_conn_save_peer_info(smc, clc);
  641. else
  642. smcr_conn_save_peer_info(smc, clc);
  643. smc_conn_save_peer_info_fce(smc, clc);
  644. }
  645. static void smc_link_save_peer_info(struct smc_link *link,
  646. struct smc_clc_msg_accept_confirm *clc,
  647. struct smc_init_info *ini)
  648. {
  649. link->peer_qpn = ntoh24(clc->r0.qpn);
  650. memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE);
  651. memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac));
  652. link->peer_psn = ntoh24(clc->r0.psn);
  653. link->peer_mtu = clc->r0.qp_mtu;
  654. }
  655. static void smc_stat_inc_fback_rsn_cnt(struct smc_sock *smc,
  656. struct smc_stats_fback *fback_arr)
  657. {
  658. int cnt;
  659. for (cnt = 0; cnt < SMC_MAX_FBACK_RSN_CNT; cnt++) {
  660. if (fback_arr[cnt].fback_code == smc->fallback_rsn) {
  661. fback_arr[cnt].count++;
  662. break;
  663. }
  664. if (!fback_arr[cnt].fback_code) {
  665. fback_arr[cnt].fback_code = smc->fallback_rsn;
  666. fback_arr[cnt].count++;
  667. break;
  668. }
  669. }
  670. }
  671. static void smc_stat_fallback(struct smc_sock *smc)
  672. {
  673. struct net *net = sock_net(&smc->sk);
  674. mutex_lock(&net->smc.mutex_fback_rsn);
  675. if (smc->listen_smc) {
  676. smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->srv);
  677. net->smc.fback_rsn->srv_fback_cnt++;
  678. } else {
  679. smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->clnt);
  680. net->smc.fback_rsn->clnt_fback_cnt++;
  681. }
  682. mutex_unlock(&net->smc.mutex_fback_rsn);
  683. }
  684. /* must be called under rcu read lock */
  685. static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key)
  686. {
  687. struct socket_wq *wq;
  688. __poll_t flags;
  689. wq = rcu_dereference(smc->sk.sk_wq);
  690. if (!skwq_has_sleeper(wq))
  691. return;
  692. /* wake up smc sk->sk_wq */
  693. if (!key) {
  694. /* sk_state_change */
  695. wake_up_interruptible_all(&wq->wait);
  696. } else {
  697. flags = key_to_poll(key);
  698. if (flags & (EPOLLIN | EPOLLOUT))
  699. /* sk_data_ready or sk_write_space */
  700. wake_up_interruptible_sync_poll(&wq->wait, flags);
  701. else if (flags & EPOLLERR)
  702. /* sk_error_report */
  703. wake_up_interruptible_poll(&wq->wait, flags);
  704. }
  705. }
  706. static int smc_fback_mark_woken(wait_queue_entry_t *wait,
  707. unsigned int mode, int sync, void *key)
  708. {
  709. struct smc_mark_woken *mark =
  710. container_of(wait, struct smc_mark_woken, wait_entry);
  711. mark->woken = true;
  712. mark->key = key;
  713. return 0;
  714. }
  715. static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk,
  716. void (*clcsock_callback)(struct sock *sk))
  717. {
  718. struct smc_mark_woken mark = { .woken = false };
  719. struct socket_wq *wq;
  720. init_waitqueue_func_entry(&mark.wait_entry,
  721. smc_fback_mark_woken);
  722. rcu_read_lock();
  723. wq = rcu_dereference(clcsk->sk_wq);
  724. if (!wq)
  725. goto out;
  726. add_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
  727. clcsock_callback(clcsk);
  728. remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
  729. if (mark.woken)
  730. smc_fback_wakeup_waitqueue(smc, mark.key);
  731. out:
  732. rcu_read_unlock();
  733. }
  734. static void smc_fback_state_change(struct sock *clcsk)
  735. {
  736. struct smc_sock *smc;
  737. read_lock_bh(&clcsk->sk_callback_lock);
  738. smc = smc_clcsock_user_data(clcsk);
  739. if (smc)
  740. smc_fback_forward_wakeup(smc, clcsk,
  741. smc->clcsk_state_change);
  742. read_unlock_bh(&clcsk->sk_callback_lock);
  743. }
  744. static void smc_fback_data_ready(struct sock *clcsk)
  745. {
  746. struct smc_sock *smc;
  747. read_lock_bh(&clcsk->sk_callback_lock);
  748. smc = smc_clcsock_user_data(clcsk);
  749. if (smc)
  750. smc_fback_forward_wakeup(smc, clcsk,
  751. smc->clcsk_data_ready);
  752. read_unlock_bh(&clcsk->sk_callback_lock);
  753. }
  754. static void smc_fback_write_space(struct sock *clcsk)
  755. {
  756. struct smc_sock *smc;
  757. read_lock_bh(&clcsk->sk_callback_lock);
  758. smc = smc_clcsock_user_data(clcsk);
  759. if (smc)
  760. smc_fback_forward_wakeup(smc, clcsk,
  761. smc->clcsk_write_space);
  762. read_unlock_bh(&clcsk->sk_callback_lock);
  763. }
  764. static void smc_fback_error_report(struct sock *clcsk)
  765. {
  766. struct smc_sock *smc;
  767. read_lock_bh(&clcsk->sk_callback_lock);
  768. smc = smc_clcsock_user_data(clcsk);
  769. if (smc)
  770. smc_fback_forward_wakeup(smc, clcsk,
  771. smc->clcsk_error_report);
  772. read_unlock_bh(&clcsk->sk_callback_lock);
  773. }
  774. static void smc_fback_replace_callbacks(struct smc_sock *smc)
  775. {
  776. struct sock *clcsk = smc->clcsock->sk;
  777. write_lock_bh(&clcsk->sk_callback_lock);
  778. __rcu_assign_sk_user_data_with_flags(clcsk, smc, SK_USER_DATA_NOCOPY);
  779. smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change,
  780. &smc->clcsk_state_change);
  781. smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready,
  782. &smc->clcsk_data_ready);
  783. smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space,
  784. &smc->clcsk_write_space);
  785. smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report,
  786. &smc->clcsk_error_report);
  787. write_unlock_bh(&clcsk->sk_callback_lock);
  788. }
  789. static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
  790. {
  791. int rc = 0;
  792. mutex_lock(&smc->clcsock_release_lock);
  793. if (!smc->clcsock) {
  794. rc = -EBADF;
  795. goto out;
  796. }
  797. smc->use_fallback = true;
  798. smc->fallback_rsn = reason_code;
  799. smc_stat_fallback(smc);
  800. trace_smc_switch_to_fallback(smc, reason_code);
  801. if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
  802. smc->clcsock->file = smc->sk.sk_socket->file;
  803. smc->clcsock->file->private_data = smc->clcsock;
  804. smc->clcsock->wq.fasync_list =
  805. smc->sk.sk_socket->wq.fasync_list;
  806. smc->sk.sk_socket->wq.fasync_list = NULL;
  807. /* There might be some wait entries remaining
  808. * in smc sk->sk_wq and they should be woken up
  809. * as clcsock's wait queue is woken up.
  810. */
  811. smc_fback_replace_callbacks(smc);
  812. }
  813. out:
  814. mutex_unlock(&smc->clcsock_release_lock);
  815. return rc;
  816. }
  817. /* fall back during connect */
  818. static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
  819. {
  820. struct net *net = sock_net(&smc->sk);
  821. int rc = 0;
  822. rc = smc_switch_to_fallback(smc, reason_code);
  823. if (rc) { /* fallback fails */
  824. this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
  825. if (smc->sk.sk_state == SMC_INIT)
  826. sock_put(&smc->sk); /* passive closing */
  827. return rc;
  828. }
  829. smc_copy_sock_settings_to_clc(smc);
  830. smc->connect_nonblock = 0;
  831. if (smc->sk.sk_state == SMC_INIT)
  832. smc->sk.sk_state = SMC_ACTIVE;
  833. return 0;
  834. }
  835. /* decline and fall back during connect */
  836. static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
  837. u8 version)
  838. {
  839. struct net *net = sock_net(&smc->sk);
  840. int rc;
  841. if (reason_code < 0) { /* error, fallback is not possible */
  842. this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
  843. if (smc->sk.sk_state == SMC_INIT)
  844. sock_put(&smc->sk); /* passive closing */
  845. return reason_code;
  846. }
  847. if (reason_code != SMC_CLC_DECL_PEERDECL) {
  848. rc = smc_clc_send_decline(smc, reason_code, version);
  849. if (rc < 0) {
  850. this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
  851. if (smc->sk.sk_state == SMC_INIT)
  852. sock_put(&smc->sk); /* passive closing */
  853. return rc;
  854. }
  855. }
  856. return smc_connect_fallback(smc, reason_code);
  857. }
  858. static void smc_conn_abort(struct smc_sock *smc, int local_first)
  859. {
  860. struct smc_connection *conn = &smc->conn;
  861. struct smc_link_group *lgr = conn->lgr;
  862. bool lgr_valid = false;
  863. if (smc_conn_lgr_valid(conn))
  864. lgr_valid = true;
  865. smc_conn_free(conn);
  866. if (local_first && lgr_valid)
  867. smc_lgr_cleanup_early(lgr);
  868. }
  869. /* check if there is a rdma device available for this connection. */
  870. /* called for connect and listen */
  871. static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
  872. {
  873. /* PNET table look up: search active ib_device and port
  874. * within same PNETID that also contains the ethernet device
  875. * used for the internal TCP socket
  876. */
  877. smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
  878. if (!ini->check_smcrv2 && !ini->ib_dev)
  879. return SMC_CLC_DECL_NOSMCRDEV;
  880. if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2)
  881. return SMC_CLC_DECL_NOSMCRDEV;
  882. return 0;
  883. }
  884. /* check if there is an ISM device available for this connection. */
  885. /* called for connect and listen */
  886. static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
  887. {
  888. /* Find ISM device with same PNETID as connecting interface */
  889. smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
  890. if (!ini->ism_dev[0])
  891. return SMC_CLC_DECL_NOSMCDDEV;
  892. else
  893. ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]);
  894. return 0;
  895. }
  896. /* is chid unique for the ism devices that are already determined? */
  897. static bool smc_find_ism_v2_is_unique_chid(u16 chid, struct smc_init_info *ini,
  898. int cnt)
  899. {
  900. int i = (!ini->ism_dev[0]) ? 1 : 0;
  901. for (; i < cnt; i++)
  902. if (ini->ism_chid[i] == chid)
  903. return false;
  904. return true;
  905. }
  906. /* determine possible V2 ISM devices (either without PNETID or with PNETID plus
  907. * PNETID matching net_device)
  908. */
  909. static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
  910. struct smc_init_info *ini)
  911. {
  912. int rc = SMC_CLC_DECL_NOSMCDDEV;
  913. struct smcd_dev *smcd;
  914. int i = 1, entry = 1;
  915. bool is_emulated;
  916. u16 chid;
  917. if (smcd_indicated(ini->smc_type_v1))
  918. rc = 0; /* already initialized for V1 */
  919. mutex_lock(&smcd_dev_list.mutex);
  920. list_for_each_entry(smcd, &smcd_dev_list.list, list) {
  921. if (smcd->going_away || smcd == ini->ism_dev[0])
  922. continue;
  923. chid = smc_ism_get_chid(smcd);
  924. if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
  925. continue;
  926. is_emulated = __smc_ism_is_emulated(chid);
  927. if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
  928. smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
  929. if (is_emulated && entry == SMCD_CLC_MAX_V2_GID_ENTRIES)
  930. /* It's the last GID-CHID entry left in CLC
  931. * Proposal SMC-Dv2 extension, but an Emulated-
  932. * ISM device will take two entries. So give
  933. * up it and try the next potential ISM device.
  934. */
  935. continue;
  936. ini->ism_dev[i] = smcd;
  937. ini->ism_chid[i] = chid;
  938. ini->is_smcd = true;
  939. rc = 0;
  940. i++;
  941. entry = is_emulated ? entry + 2 : entry + 1;
  942. if (entry > SMCD_CLC_MAX_V2_GID_ENTRIES)
  943. break;
  944. }
  945. }
  946. mutex_unlock(&smcd_dev_list.mutex);
  947. ini->ism_offered_cnt = i - 1;
  948. if (!ini->ism_dev[0] && !ini->ism_dev[1])
  949. ini->smcd_version = 0;
  950. return rc;
  951. }
  952. /* Check for VLAN ID and register it on ISM device just for CLC handshake */
  953. static int smc_connect_ism_vlan_setup(struct smc_init_info *ini)
  954. {
  955. if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id))
  956. return SMC_CLC_DECL_ISMVLANERR;
  957. return 0;
  958. }
  959. static int smc_find_proposal_devices(struct smc_sock *smc,
  960. struct smc_init_info *ini)
  961. {
  962. int rc = 0;
  963. /* check if there is an ism device available */
  964. if (!(ini->smcd_version & SMC_V1) ||
  965. smc_find_ism_device(smc, ini) ||
  966. smc_connect_ism_vlan_setup(ini))
  967. ini->smcd_version &= ~SMC_V1;
  968. /* else ISM V1 is supported for this connection */
  969. /* check if there is an rdma device available */
  970. if (!(ini->smcr_version & SMC_V1) ||
  971. smc_find_rdma_device(smc, ini))
  972. ini->smcr_version &= ~SMC_V1;
  973. /* else RDMA is supported for this connection */
  974. ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1,
  975. ini->smcr_version & SMC_V1);
  976. /* check if there is an ism v2 device available */
  977. if (!(ini->smcd_version & SMC_V2) ||
  978. !smc_ism_is_v2_capable() ||
  979. smc_find_ism_v2_device_clnt(smc, ini))
  980. ini->smcd_version &= ~SMC_V2;
  981. /* check if there is an rdma v2 device available */
  982. ini->check_smcrv2 = true;
  983. ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
  984. if (!(ini->smcr_version & SMC_V2) ||
  985. #if IS_ENABLED(CONFIG_IPV6)
  986. (smc->clcsock->sk->sk_family == AF_INET6 &&
  987. !ipv6_addr_v4mapped(&smc->clcsock->sk->sk_v6_rcv_saddr)) ||
  988. #endif
  989. !smc_clc_ueid_count() ||
  990. smc_find_rdma_device(smc, ini))
  991. ini->smcr_version &= ~SMC_V2;
  992. ini->check_smcrv2 = false;
  993. ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2,
  994. ini->smcr_version & SMC_V2);
  995. /* if neither ISM nor RDMA are supported, fallback */
  996. if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
  997. rc = SMC_CLC_DECL_NOSMCDEV;
  998. return rc;
  999. }
  1000. /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
  1001. * used, the VLAN ID will be registered again during the connection setup.
  1002. */
  1003. static int smc_connect_ism_vlan_cleanup(struct smc_init_info *ini)
  1004. {
  1005. if (!smcd_indicated(ini->smc_type_v1))
  1006. return 0;
  1007. if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id))
  1008. return SMC_CLC_DECL_CNFERR;
  1009. return 0;
  1010. }
  1011. #define SMC_CLC_MAX_ACCEPT_LEN \
  1012. (sizeof(struct smc_clc_msg_accept_confirm) + \
  1013. sizeof(struct smc_clc_first_contact_ext_v2x) + \
  1014. sizeof(struct smc_clc_msg_trail))
  1015. /* CLC handshake during connect */
  1016. static int smc_connect_clc(struct smc_sock *smc,
  1017. struct smc_clc_msg_accept_confirm *aclc,
  1018. struct smc_init_info *ini)
  1019. {
  1020. int rc = 0;
  1021. /* do inband token exchange */
  1022. rc = smc_clc_send_proposal(smc, ini);
  1023. if (rc)
  1024. return rc;
  1025. /* receive SMC Accept CLC message */
  1026. return smc_clc_wait_msg(smc, aclc, SMC_CLC_MAX_ACCEPT_LEN,
  1027. SMC_CLC_ACCEPT, CLC_WAIT_TIME);
  1028. }
  1029. void smc_fill_gid_list(struct smc_link_group *lgr,
  1030. struct smc_gidlist *gidlist,
  1031. struct smc_ib_device *known_dev, u8 *known_gid)
  1032. {
  1033. struct smc_init_info *alt_ini = NULL;
  1034. memset(gidlist, 0, sizeof(*gidlist));
  1035. memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE);
  1036. alt_ini = kzalloc_obj(*alt_ini);
  1037. if (!alt_ini)
  1038. goto out;
  1039. alt_ini->vlan_id = lgr->vlan_id;
  1040. alt_ini->check_smcrv2 = true;
  1041. alt_ini->smcrv2.saddr = lgr->saddr;
  1042. smc_pnet_find_alt_roce(lgr, alt_ini, known_dev);
  1043. if (!alt_ini->smcrv2.ib_dev_v2)
  1044. goto out;
  1045. memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2,
  1046. SMC_GID_SIZE);
  1047. out:
  1048. kfree(alt_ini);
  1049. }
  1050. static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
  1051. struct smc_clc_msg_accept_confirm *aclc,
  1052. struct smc_init_info *ini)
  1053. {
  1054. struct smc_clc_first_contact_ext *fce =
  1055. smc_get_clc_first_contact_ext(aclc, false);
  1056. struct net *net = sock_net(&smc->sk);
  1057. int rc;
  1058. if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
  1059. return 0;
  1060. if (fce->v2_direct) {
  1061. memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
  1062. ini->smcrv2.uses_gateway = false;
  1063. } else {
  1064. if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
  1065. smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
  1066. ini->smcrv2.nexthop_mac,
  1067. &ini->smcrv2.uses_gateway))
  1068. return SMC_CLC_DECL_NOROUTE;
  1069. if (!ini->smcrv2.uses_gateway) {
  1070. /* mismatch: peer claims indirect, but its direct */
  1071. return SMC_CLC_DECL_NOINDIRECT;
  1072. }
  1073. }
  1074. ini->release_nr = fce->release;
  1075. rc = smc_clc_clnt_v2x_features_validate(fce, ini);
  1076. if (rc)
  1077. return rc;
  1078. return 0;
  1079. }
  1080. /* setup for RDMA connection of client */
  1081. static int smc_connect_rdma(struct smc_sock *smc,
  1082. struct smc_clc_msg_accept_confirm *aclc,
  1083. struct smc_init_info *ini)
  1084. {
  1085. int i, reason_code = 0;
  1086. struct smc_link *link;
  1087. u8 *eid = NULL;
  1088. ini->is_smcd = false;
  1089. ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
  1090. ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
  1091. memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
  1092. memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
  1093. memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
  1094. ini->max_conns = SMC_CONN_PER_LGR_MAX;
  1095. ini->max_links = SMC_LINKS_ADD_LNK_MAX;
  1096. reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
  1097. if (reason_code)
  1098. return reason_code;
  1099. mutex_lock(&smc_client_lgr_pending);
  1100. reason_code = smc_conn_create(smc, ini);
  1101. if (reason_code) {
  1102. mutex_unlock(&smc_client_lgr_pending);
  1103. return reason_code;
  1104. }
  1105. smc_conn_save_peer_info(smc, aclc);
  1106. if (ini->first_contact_local) {
  1107. link = smc->conn.lnk;
  1108. } else {
  1109. /* set link that was assigned by server */
  1110. link = NULL;
  1111. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  1112. struct smc_link *l = &smc->conn.lgr->lnk[i];
  1113. if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
  1114. !memcmp(l->peer_gid, &aclc->r0.lcl.gid,
  1115. SMC_GID_SIZE) &&
  1116. (aclc->hdr.version > SMC_V1 ||
  1117. !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
  1118. sizeof(l->peer_mac)))) {
  1119. link = l;
  1120. break;
  1121. }
  1122. }
  1123. if (!link) {
  1124. reason_code = SMC_CLC_DECL_NOSRVLINK;
  1125. goto connect_abort;
  1126. }
  1127. smc_switch_link_and_count(&smc->conn, link);
  1128. }
  1129. /* create send buffer and rmb */
  1130. if (smc_buf_create(smc, false)) {
  1131. reason_code = SMC_CLC_DECL_MEM;
  1132. goto connect_abort;
  1133. }
  1134. if (ini->first_contact_local)
  1135. smc_link_save_peer_info(link, aclc, ini);
  1136. if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) {
  1137. reason_code = SMC_CLC_DECL_ERR_RTOK;
  1138. goto connect_abort;
  1139. }
  1140. smc_rx_init(smc);
  1141. if (ini->first_contact_local) {
  1142. if (smc_ib_ready_link(link)) {
  1143. reason_code = SMC_CLC_DECL_ERR_RDYLNK;
  1144. goto connect_abort;
  1145. }
  1146. } else {
  1147. /* reg sendbufs if they were vzalloced */
  1148. if (smc->conn.sndbuf_desc->is_vm) {
  1149. if (smcr_lgr_reg_sndbufs(link, smc->conn.sndbuf_desc)) {
  1150. reason_code = SMC_CLC_DECL_ERR_REGBUF;
  1151. goto connect_abort;
  1152. }
  1153. }
  1154. if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) {
  1155. reason_code = SMC_CLC_DECL_ERR_REGBUF;
  1156. goto connect_abort;
  1157. }
  1158. }
  1159. if (aclc->hdr.version > SMC_V1) {
  1160. eid = aclc->r1.eid;
  1161. if (ini->first_contact_local)
  1162. smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist,
  1163. link->smcibdev, link->gid);
  1164. }
  1165. reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
  1166. aclc->hdr.version, eid, ini);
  1167. if (reason_code)
  1168. goto connect_abort;
  1169. smc_tx_init(smc);
  1170. if (ini->first_contact_local) {
  1171. /* QP confirmation over RoCE fabric */
  1172. smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
  1173. reason_code = smcr_clnt_conf_first_link(smc);
  1174. smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
  1175. if (reason_code)
  1176. goto connect_abort;
  1177. }
  1178. mutex_unlock(&smc_client_lgr_pending);
  1179. smc_copy_sock_settings_to_clc(smc);
  1180. smc->connect_nonblock = 0;
  1181. if (smc->sk.sk_state == SMC_INIT)
  1182. smc->sk.sk_state = SMC_ACTIVE;
  1183. return 0;
  1184. connect_abort:
  1185. smc_conn_abort(smc, ini->first_contact_local);
  1186. mutex_unlock(&smc_client_lgr_pending);
  1187. smc->connect_nonblock = 0;
  1188. return reason_code;
  1189. }
  1190. /* The server has chosen one of the proposed ISM devices for the communication.
  1191. * Determine from the CHID of the received CLC ACCEPT the ISM device chosen.
  1192. */
  1193. static int
  1194. smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc,
  1195. struct smc_init_info *ini)
  1196. {
  1197. int i;
  1198. for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
  1199. if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
  1200. ini->ism_selected = i;
  1201. return 0;
  1202. }
  1203. }
  1204. return -EPROTO;
  1205. }
  1206. /* setup for ISM connection of client */
  1207. static int smc_connect_ism(struct smc_sock *smc,
  1208. struct smc_clc_msg_accept_confirm *aclc,
  1209. struct smc_init_info *ini)
  1210. {
  1211. u8 *eid = NULL;
  1212. int rc = 0;
  1213. ini->is_smcd = true;
  1214. ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
  1215. if (aclc->hdr.version == SMC_V2) {
  1216. if (ini->first_contact_peer) {
  1217. struct smc_clc_first_contact_ext *fce =
  1218. smc_get_clc_first_contact_ext(aclc, true);
  1219. ini->release_nr = fce->release;
  1220. rc = smc_clc_clnt_v2x_features_validate(fce, ini);
  1221. if (rc)
  1222. return rc;
  1223. }
  1224. rc = smc_v2_determine_accepted_chid(aclc, ini);
  1225. if (rc)
  1226. return rc;
  1227. if (__smc_ism_is_emulated(ini->ism_chid[ini->ism_selected]))
  1228. ini->ism_peer_gid[ini->ism_selected].gid_ext =
  1229. ntohll(aclc->d1.gid_ext);
  1230. /* for non-Emulated-ISM devices, peer gid_ext remains 0. */
  1231. }
  1232. ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid);
  1233. /* there is only one lgr role for SMC-D; use server lock */
  1234. mutex_lock(&smc_server_lgr_pending);
  1235. rc = smc_conn_create(smc, ini);
  1236. if (rc) {
  1237. mutex_unlock(&smc_server_lgr_pending);
  1238. return rc;
  1239. }
  1240. /* Create send and receive buffers */
  1241. rc = smc_buf_create(smc, true);
  1242. if (rc) {
  1243. rc = (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : SMC_CLC_DECL_MEM;
  1244. goto connect_abort;
  1245. }
  1246. smc_conn_save_peer_info(smc, aclc);
  1247. if (smc_ism_support_dmb_nocopy(smc->conn.lgr->smcd)) {
  1248. rc = smcd_buf_attach(smc);
  1249. if (rc) {
  1250. rc = SMC_CLC_DECL_MEM; /* try to fallback */
  1251. goto connect_abort;
  1252. }
  1253. }
  1254. smc_rx_init(smc);
  1255. smc_tx_init(smc);
  1256. if (aclc->hdr.version > SMC_V1)
  1257. eid = aclc->d1.eid;
  1258. rc = smc_clc_send_confirm(smc, ini->first_contact_local,
  1259. aclc->hdr.version, eid, ini);
  1260. if (rc)
  1261. goto connect_abort;
  1262. mutex_unlock(&smc_server_lgr_pending);
  1263. smc_copy_sock_settings_to_clc(smc);
  1264. smc->connect_nonblock = 0;
  1265. if (smc->sk.sk_state == SMC_INIT)
  1266. smc->sk.sk_state = SMC_ACTIVE;
  1267. return 0;
  1268. connect_abort:
  1269. smc_conn_abort(smc, ini->first_contact_local);
  1270. mutex_unlock(&smc_server_lgr_pending);
  1271. smc->connect_nonblock = 0;
  1272. return rc;
  1273. }
  1274. /* check if received accept type and version matches a proposed one */
  1275. static int smc_connect_check_aclc(struct smc_init_info *ini,
  1276. struct smc_clc_msg_accept_confirm *aclc)
  1277. {
  1278. if (aclc->hdr.version >= SMC_V2) {
  1279. if ((aclc->hdr.typev1 == SMC_TYPE_R &&
  1280. !smcr_indicated(ini->smc_type_v2)) ||
  1281. (aclc->hdr.typev1 == SMC_TYPE_D &&
  1282. !smcd_indicated(ini->smc_type_v2)))
  1283. return SMC_CLC_DECL_MODEUNSUPP;
  1284. } else {
  1285. if ((aclc->hdr.typev1 == SMC_TYPE_R &&
  1286. !smcr_indicated(ini->smc_type_v1)) ||
  1287. (aclc->hdr.typev1 == SMC_TYPE_D &&
  1288. !smcd_indicated(ini->smc_type_v1)))
  1289. return SMC_CLC_DECL_MODEUNSUPP;
  1290. }
  1291. return 0;
  1292. }
  1293. /* perform steps before actually connecting */
  1294. static int __smc_connect(struct smc_sock *smc)
  1295. {
  1296. u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
  1297. struct smc_clc_msg_accept_confirm *aclc;
  1298. struct smc_init_info *ini = NULL;
  1299. u8 *buf = NULL;
  1300. int rc = 0;
  1301. if (smc->use_fallback)
  1302. return smc_connect_fallback(smc, smc->fallback_rsn);
  1303. /* if peer has not signalled SMC-capability, fall back */
  1304. if (!tcp_sk(smc->clcsock->sk)->syn_smc)
  1305. return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
  1306. /* IPSec connections opt out of SMC optimizations */
  1307. if (using_ipsec(smc))
  1308. return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC,
  1309. version);
  1310. ini = kzalloc_obj(*ini);
  1311. if (!ini)
  1312. return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
  1313. version);
  1314. ini->smcd_version = SMC_V1 | SMC_V2;
  1315. ini->smcr_version = SMC_V1 | SMC_V2;
  1316. ini->smc_type_v1 = SMC_TYPE_B;
  1317. ini->smc_type_v2 = SMC_TYPE_B;
  1318. /* get vlan id from IP device */
  1319. if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
  1320. ini->smcd_version &= ~SMC_V1;
  1321. ini->smcr_version = 0;
  1322. ini->smc_type_v1 = SMC_TYPE_N;
  1323. }
  1324. rc = smc_find_proposal_devices(smc, ini);
  1325. if (rc)
  1326. goto fallback;
  1327. buf = kzalloc(SMC_CLC_MAX_ACCEPT_LEN, GFP_KERNEL);
  1328. if (!buf) {
  1329. rc = SMC_CLC_DECL_MEM;
  1330. goto fallback;
  1331. }
  1332. aclc = (struct smc_clc_msg_accept_confirm *)buf;
  1333. /* perform CLC handshake */
  1334. rc = smc_connect_clc(smc, aclc, ini);
  1335. if (rc) {
  1336. /* -EAGAIN on timeout, see tcp_recvmsg() */
  1337. if (rc == -EAGAIN) {
  1338. rc = -ETIMEDOUT;
  1339. smc->sk.sk_err = ETIMEDOUT;
  1340. }
  1341. goto vlan_cleanup;
  1342. }
  1343. /* check if smc modes and versions of CLC proposal and accept match */
  1344. rc = smc_connect_check_aclc(ini, aclc);
  1345. version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2;
  1346. if (rc)
  1347. goto vlan_cleanup;
  1348. /* depending on previous steps, connect using rdma or ism */
  1349. if (aclc->hdr.typev1 == SMC_TYPE_R) {
  1350. ini->smcr_version = version;
  1351. rc = smc_connect_rdma(smc, aclc, ini);
  1352. } else if (aclc->hdr.typev1 == SMC_TYPE_D) {
  1353. ini->smcd_version = version;
  1354. rc = smc_connect_ism(smc, aclc, ini);
  1355. }
  1356. if (rc)
  1357. goto vlan_cleanup;
  1358. SMC_STAT_CLNT_SUCC_INC(sock_net(smc->clcsock->sk), aclc);
  1359. smc_connect_ism_vlan_cleanup(ini);
  1360. kfree(buf);
  1361. kfree(ini);
  1362. return 0;
  1363. vlan_cleanup:
  1364. smc_connect_ism_vlan_cleanup(ini);
  1365. kfree(buf);
  1366. fallback:
  1367. kfree(ini);
  1368. return smc_connect_decline_fallback(smc, rc, version);
  1369. }
  1370. static void smc_connect_work(struct work_struct *work)
  1371. {
  1372. struct smc_sock *smc = container_of(work, struct smc_sock,
  1373. connect_work);
  1374. long timeo = READ_ONCE(smc->sk.sk_sndtimeo);
  1375. int rc = 0;
  1376. if (!timeo)
  1377. timeo = MAX_SCHEDULE_TIMEOUT;
  1378. lock_sock(smc->clcsock->sk);
  1379. if (smc->clcsock->sk->sk_err) {
  1380. smc->sk.sk_err = smc->clcsock->sk->sk_err;
  1381. } else if ((1 << smc->clcsock->sk->sk_state) &
  1382. (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
  1383. rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
  1384. if ((rc == -EPIPE) &&
  1385. ((1 << smc->clcsock->sk->sk_state) &
  1386. (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
  1387. rc = 0;
  1388. }
  1389. release_sock(smc->clcsock->sk);
  1390. lock_sock(&smc->sk);
  1391. if (rc != 0 || smc->sk.sk_err) {
  1392. smc->sk.sk_state = SMC_CLOSED;
  1393. if (rc == -EPIPE || rc == -EAGAIN)
  1394. smc->sk.sk_err = EPIPE;
  1395. else if (rc == -ECONNREFUSED)
  1396. smc->sk.sk_err = ECONNREFUSED;
  1397. else if (signal_pending(current))
  1398. smc->sk.sk_err = -sock_intr_errno(timeo);
  1399. sock_put(&smc->sk); /* passive closing */
  1400. goto out;
  1401. }
  1402. rc = __smc_connect(smc);
  1403. if (rc < 0)
  1404. smc->sk.sk_err = -rc;
  1405. out:
  1406. if (!sock_flag(&smc->sk, SOCK_DEAD)) {
  1407. if (smc->sk.sk_err) {
  1408. smc->sk.sk_state_change(&smc->sk);
  1409. } else { /* allow polling before and after fallback decision */
  1410. smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
  1411. smc->sk.sk_write_space(&smc->sk);
  1412. }
  1413. }
  1414. release_sock(&smc->sk);
  1415. }
  1416. int smc_connect(struct socket *sock, struct sockaddr_unsized *addr,
  1417. int alen, int flags)
  1418. {
  1419. struct sock *sk = sock->sk;
  1420. struct smc_sock *smc;
  1421. int rc = -EINVAL;
  1422. smc = smc_sk(sk);
  1423. /* separate smc parameter checking to be safe */
  1424. if (alen < sizeof(addr->sa_family))
  1425. goto out_err;
  1426. if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
  1427. goto out_err;
  1428. lock_sock(sk);
  1429. switch (sock->state) {
  1430. default:
  1431. rc = -EINVAL;
  1432. goto out;
  1433. case SS_CONNECTED:
  1434. rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
  1435. goto out;
  1436. case SS_CONNECTING:
  1437. if (sk->sk_state == SMC_ACTIVE)
  1438. goto connected;
  1439. break;
  1440. case SS_UNCONNECTED:
  1441. sock->state = SS_CONNECTING;
  1442. break;
  1443. }
  1444. switch (sk->sk_state) {
  1445. default:
  1446. goto out;
  1447. case SMC_CLOSED:
  1448. rc = sock_error(sk) ? : -ECONNABORTED;
  1449. sock->state = SS_UNCONNECTED;
  1450. goto out;
  1451. case SMC_ACTIVE:
  1452. rc = -EISCONN;
  1453. goto out;
  1454. case SMC_INIT:
  1455. break;
  1456. }
  1457. smc_copy_sock_settings_to_clc(smc);
  1458. tcp_sk(smc->clcsock->sk)->syn_smc = 1;
  1459. if (smc->connect_nonblock) {
  1460. rc = -EALREADY;
  1461. goto out;
  1462. }
  1463. rc = kernel_connect(smc->clcsock, (struct sockaddr_unsized *)addr, alen, flags);
  1464. if (rc && rc != -EINPROGRESS)
  1465. goto out;
  1466. if (smc->use_fallback) {
  1467. sock->state = rc ? SS_CONNECTING : SS_CONNECTED;
  1468. goto out;
  1469. }
  1470. sock_hold(&smc->sk); /* sock put in passive closing */
  1471. if (flags & O_NONBLOCK) {
  1472. if (queue_work(smc_hs_wq, &smc->connect_work))
  1473. smc->connect_nonblock = 1;
  1474. rc = -EINPROGRESS;
  1475. goto out;
  1476. } else {
  1477. rc = __smc_connect(smc);
  1478. if (rc < 0)
  1479. goto out;
  1480. }
  1481. connected:
  1482. rc = 0;
  1483. sock->state = SS_CONNECTED;
  1484. out:
  1485. release_sock(sk);
  1486. out_err:
  1487. return rc;
  1488. }
  1489. static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
  1490. {
  1491. struct socket *new_clcsock = NULL;
  1492. struct sock *lsk = &lsmc->sk;
  1493. struct sock *new_sk;
  1494. int rc = -EINVAL;
  1495. release_sock(lsk);
  1496. new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
  1497. if (!new_sk) {
  1498. rc = -ENOMEM;
  1499. lsk->sk_err = ENOMEM;
  1500. *new_smc = NULL;
  1501. lock_sock(lsk);
  1502. goto out;
  1503. }
  1504. *new_smc = smc_sk(new_sk);
  1505. mutex_lock(&lsmc->clcsock_release_lock);
  1506. if (lsmc->clcsock)
  1507. rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK);
  1508. mutex_unlock(&lsmc->clcsock_release_lock);
  1509. lock_sock(lsk);
  1510. if (rc < 0 && rc != -EAGAIN)
  1511. lsk->sk_err = -rc;
  1512. if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
  1513. new_sk->sk_prot->unhash(new_sk);
  1514. if (new_clcsock)
  1515. sock_release(new_clcsock);
  1516. new_sk->sk_state = SMC_CLOSED;
  1517. smc_sock_set_flag(new_sk, SOCK_DEAD);
  1518. sock_put(new_sk); /* final */
  1519. *new_smc = NULL;
  1520. goto out;
  1521. }
  1522. /* new clcsock has inherited the smc listen-specific sk_data_ready
  1523. * function; switch it back to the original sk_data_ready function
  1524. */
  1525. new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
  1526. /* if new clcsock has also inherited the fallback-specific callback
  1527. * functions, switch them back to the original ones.
  1528. */
  1529. if (lsmc->use_fallback) {
  1530. if (lsmc->clcsk_state_change)
  1531. new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change;
  1532. if (lsmc->clcsk_write_space)
  1533. new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space;
  1534. if (lsmc->clcsk_error_report)
  1535. new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report;
  1536. }
  1537. (*new_smc)->clcsock = new_clcsock;
  1538. out:
  1539. return rc;
  1540. }
  1541. /* add a just created sock to the accept queue of the listen sock as
  1542. * candidate for a following socket accept call from user space
  1543. */
  1544. static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
  1545. {
  1546. struct smc_sock *par = smc_sk(parent);
  1547. sock_hold(sk); /* sock_put in smc_accept_unlink () */
  1548. spin_lock(&par->accept_q_lock);
  1549. list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
  1550. spin_unlock(&par->accept_q_lock);
  1551. sk_acceptq_added(parent);
  1552. }
  1553. /* remove a socket from the accept queue of its parental listening socket */
  1554. static void smc_accept_unlink(struct sock *sk)
  1555. {
  1556. struct smc_sock *par = smc_sk(sk)->listen_smc;
  1557. spin_lock(&par->accept_q_lock);
  1558. list_del_init(&smc_sk(sk)->accept_q);
  1559. spin_unlock(&par->accept_q_lock);
  1560. sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
  1561. sock_put(sk); /* sock_hold in smc_accept_enqueue */
  1562. }
  1563. /* remove a sock from the accept queue to bind it to a new socket created
  1564. * for a socket accept call from user space
  1565. */
  1566. struct sock *smc_accept_dequeue(struct sock *parent,
  1567. struct socket *new_sock)
  1568. {
  1569. struct smc_sock *isk, *n;
  1570. struct sock *new_sk;
  1571. list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
  1572. new_sk = (struct sock *)isk;
  1573. smc_accept_unlink(new_sk);
  1574. if (new_sk->sk_state == SMC_CLOSED) {
  1575. new_sk->sk_prot->unhash(new_sk);
  1576. if (isk->clcsock) {
  1577. sock_release(isk->clcsock);
  1578. isk->clcsock = NULL;
  1579. }
  1580. sock_put(new_sk); /* final */
  1581. continue;
  1582. }
  1583. if (new_sock) {
  1584. sock_graft(new_sk, new_sock);
  1585. new_sock->state = SS_CONNECTED;
  1586. if (isk->use_fallback) {
  1587. smc_sk(new_sk)->clcsock->file = new_sock->file;
  1588. isk->clcsock->file->private_data = isk->clcsock;
  1589. }
  1590. }
  1591. return new_sk;
  1592. }
  1593. return NULL;
  1594. }
  1595. /* clean up for a created but never accepted sock */
  1596. void smc_close_non_accepted(struct sock *sk)
  1597. {
  1598. struct smc_sock *smc = smc_sk(sk);
  1599. sock_hold(sk); /* sock_put below */
  1600. lock_sock(sk);
  1601. if (!sk->sk_lingertime)
  1602. /* wait for peer closing */
  1603. WRITE_ONCE(sk->sk_lingertime, SMC_MAX_STREAM_WAIT_TIMEOUT);
  1604. __smc_release(smc);
  1605. release_sock(sk);
  1606. sock_put(sk); /* sock_hold above */
  1607. sock_put(sk); /* final sock_put */
  1608. }
  1609. static int smcr_serv_conf_first_link(struct smc_sock *smc)
  1610. {
  1611. struct smc_link *link = smc->conn.lnk;
  1612. struct smc_llc_qentry *qentry;
  1613. int rc;
  1614. /* reg the sndbuf if it was vzalloced*/
  1615. if (smc->conn.sndbuf_desc->is_vm) {
  1616. if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
  1617. return SMC_CLC_DECL_ERR_REGBUF;
  1618. }
  1619. /* reg the rmb */
  1620. if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
  1621. return SMC_CLC_DECL_ERR_REGBUF;
  1622. /* send CONFIRM LINK request to client over the RoCE fabric */
  1623. rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
  1624. if (rc < 0)
  1625. return SMC_CLC_DECL_TIMEOUT_CL;
  1626. /* receive CONFIRM LINK response from client over the RoCE fabric */
  1627. qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
  1628. SMC_LLC_CONFIRM_LINK);
  1629. if (!qentry) {
  1630. struct smc_clc_msg_decline dclc;
  1631. rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
  1632. SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
  1633. return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
  1634. }
  1635. smc_llc_save_peer_uid(qentry);
  1636. rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
  1637. smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
  1638. if (rc)
  1639. return SMC_CLC_DECL_RMBE_EC;
  1640. /* confirm_rkey is implicit on 1st contact */
  1641. smc->conn.rmb_desc->is_conf_rkey = true;
  1642. smc_llc_link_active(link);
  1643. smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
  1644. if (link->lgr->max_links > 1) {
  1645. down_write(&link->lgr->llc_conf_mutex);
  1646. /* initial contact - try to establish second link */
  1647. smc_llc_srv_add_link(link, NULL);
  1648. up_write(&link->lgr->llc_conf_mutex);
  1649. }
  1650. return 0;
  1651. }
  1652. /* listen worker: finish */
  1653. static void smc_listen_out(struct smc_sock *new_smc)
  1654. {
  1655. struct smc_sock *lsmc = new_smc->listen_smc;
  1656. struct sock *newsmcsk = &new_smc->sk;
  1657. if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
  1658. atomic_dec(&lsmc->queued_smc_hs);
  1659. release_sock(newsmcsk); /* lock in smc_listen_work() */
  1660. if (lsmc->sk.sk_state == SMC_LISTEN) {
  1661. lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
  1662. smc_accept_enqueue(&lsmc->sk, newsmcsk);
  1663. release_sock(&lsmc->sk);
  1664. } else { /* no longer listening */
  1665. smc_close_non_accepted(newsmcsk);
  1666. }
  1667. /* Wake up accept */
  1668. lsmc->sk.sk_data_ready(&lsmc->sk);
  1669. sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
  1670. }
  1671. /* listen worker: finish in state connected */
  1672. static void smc_listen_out_connected(struct smc_sock *new_smc)
  1673. {
  1674. struct sock *newsmcsk = &new_smc->sk;
  1675. if (newsmcsk->sk_state == SMC_INIT)
  1676. newsmcsk->sk_state = SMC_ACTIVE;
  1677. smc_listen_out(new_smc);
  1678. }
  1679. /* listen worker: finish in error state */
  1680. static void smc_listen_out_err(struct smc_sock *new_smc)
  1681. {
  1682. struct sock *newsmcsk = &new_smc->sk;
  1683. struct net *net = sock_net(newsmcsk);
  1684. this_cpu_inc(net->smc.smc_stats->srv_hshake_err_cnt);
  1685. if (newsmcsk->sk_state == SMC_INIT)
  1686. sock_put(&new_smc->sk); /* passive closing */
  1687. newsmcsk->sk_state = SMC_CLOSED;
  1688. smc_listen_out(new_smc);
  1689. }
  1690. /* listen worker: decline and fall back if possible */
  1691. static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
  1692. int local_first, u8 version)
  1693. {
  1694. /* RDMA setup failed, switch back to TCP */
  1695. smc_conn_abort(new_smc, local_first);
  1696. if (reason_code < 0 ||
  1697. smc_switch_to_fallback(new_smc, reason_code)) {
  1698. /* error, no fallback possible */
  1699. smc_listen_out_err(new_smc);
  1700. return;
  1701. }
  1702. if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
  1703. if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
  1704. smc_listen_out_err(new_smc);
  1705. return;
  1706. }
  1707. }
  1708. smc_listen_out_connected(new_smc);
  1709. }
  1710. /* listen worker: version checking */
  1711. static int smc_listen_v2_check(struct smc_sock *new_smc,
  1712. struct smc_clc_msg_proposal *pclc,
  1713. struct smc_init_info *ini)
  1714. {
  1715. struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext;
  1716. struct smc_clc_v2_extension *pclc_v2_ext;
  1717. int rc = SMC_CLC_DECL_PEERNOSMC;
  1718. ini->smc_type_v1 = pclc->hdr.typev1;
  1719. ini->smc_type_v2 = pclc->hdr.typev2;
  1720. ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
  1721. ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
  1722. if (pclc->hdr.version > SMC_V1) {
  1723. if (smcd_indicated(ini->smc_type_v2))
  1724. ini->smcd_version |= SMC_V2;
  1725. if (smcr_indicated(ini->smc_type_v2))
  1726. ini->smcr_version |= SMC_V2;
  1727. }
  1728. if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) {
  1729. rc = SMC_CLC_DECL_PEERNOSMC;
  1730. goto out;
  1731. }
  1732. pclc_v2_ext = smc_get_clc_v2_ext(pclc);
  1733. if (!pclc_v2_ext) {
  1734. ini->smcd_version &= ~SMC_V2;
  1735. ini->smcr_version &= ~SMC_V2;
  1736. rc = SMC_CLC_DECL_NOV2EXT;
  1737. goto out;
  1738. }
  1739. pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
  1740. if (ini->smcd_version & SMC_V2) {
  1741. if (!smc_ism_is_v2_capable()) {
  1742. ini->smcd_version &= ~SMC_V2;
  1743. rc = SMC_CLC_DECL_NOISM2SUPP;
  1744. } else if (!pclc_smcd_v2_ext) {
  1745. ini->smcd_version &= ~SMC_V2;
  1746. rc = SMC_CLC_DECL_NOV2DEXT;
  1747. } else if (!pclc_v2_ext->hdr.eid_cnt &&
  1748. !pclc_v2_ext->hdr.flag.seid) {
  1749. ini->smcd_version &= ~SMC_V2;
  1750. rc = SMC_CLC_DECL_NOUEID;
  1751. }
  1752. }
  1753. if (ini->smcr_version & SMC_V2) {
  1754. if (!pclc_v2_ext->hdr.eid_cnt) {
  1755. ini->smcr_version &= ~SMC_V2;
  1756. rc = SMC_CLC_DECL_NOUEID;
  1757. }
  1758. }
  1759. ini->release_nr = pclc_v2_ext->hdr.flag.release;
  1760. if (pclc_v2_ext->hdr.flag.release > SMC_RELEASE)
  1761. ini->release_nr = SMC_RELEASE;
  1762. out:
  1763. if (!ini->smcd_version && !ini->smcr_version)
  1764. return rc;
  1765. return 0;
  1766. }
  1767. /* listen worker: check prefixes */
  1768. static int smc_listen_prfx_check(struct smc_sock *new_smc,
  1769. struct smc_clc_msg_proposal *pclc)
  1770. {
  1771. struct smc_clc_msg_proposal_prefix *pclc_prfx;
  1772. struct socket *newclcsock = new_smc->clcsock;
  1773. if (pclc->hdr.typev1 == SMC_TYPE_N)
  1774. return 0;
  1775. pclc_prfx = smc_clc_proposal_get_prefix(pclc);
  1776. if (!pclc_prfx)
  1777. return -EPROTO;
  1778. if (smc_clc_prfx_match(newclcsock, pclc_prfx))
  1779. return SMC_CLC_DECL_DIFFPREFIX;
  1780. return 0;
  1781. }
  1782. /* listen worker: initialize connection and buffers */
  1783. static int smc_listen_rdma_init(struct smc_sock *new_smc,
  1784. struct smc_init_info *ini)
  1785. {
  1786. int rc;
  1787. /* allocate connection / link group */
  1788. rc = smc_conn_create(new_smc, ini);
  1789. if (rc)
  1790. return rc;
  1791. /* create send buffer and rmb */
  1792. if (smc_buf_create(new_smc, false)) {
  1793. smc_conn_abort(new_smc, ini->first_contact_local);
  1794. return SMC_CLC_DECL_MEM;
  1795. }
  1796. return 0;
  1797. }
  1798. /* listen worker: initialize connection and buffers for SMC-D */
  1799. static int smc_listen_ism_init(struct smc_sock *new_smc,
  1800. struct smc_init_info *ini)
  1801. {
  1802. int rc;
  1803. rc = smc_conn_create(new_smc, ini);
  1804. if (rc)
  1805. return rc;
  1806. /* Create send and receive buffers */
  1807. rc = smc_buf_create(new_smc, true);
  1808. if (rc) {
  1809. smc_conn_abort(new_smc, ini->first_contact_local);
  1810. return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB :
  1811. SMC_CLC_DECL_MEM;
  1812. }
  1813. return 0;
  1814. }
  1815. static bool smc_is_already_selected(struct smcd_dev *smcd,
  1816. struct smc_init_info *ini,
  1817. int matches)
  1818. {
  1819. int i;
  1820. for (i = 0; i < matches; i++)
  1821. if (smcd == ini->ism_dev[i])
  1822. return true;
  1823. return false;
  1824. }
  1825. /* check for ISM devices matching proposed ISM devices */
  1826. static void smc_check_ism_v2_match(struct smc_init_info *ini,
  1827. u16 proposed_chid,
  1828. struct smcd_gid *proposed_gid,
  1829. unsigned int *matches)
  1830. {
  1831. struct smcd_dev *smcd;
  1832. list_for_each_entry(smcd, &smcd_dev_list.list, list) {
  1833. if (smcd->going_away)
  1834. continue;
  1835. if (smc_is_already_selected(smcd, ini, *matches))
  1836. continue;
  1837. if (smc_ism_get_chid(smcd) == proposed_chid &&
  1838. !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
  1839. ini->ism_peer_gid[*matches].gid = proposed_gid->gid;
  1840. if (__smc_ism_is_emulated(proposed_chid))
  1841. ini->ism_peer_gid[*matches].gid_ext =
  1842. proposed_gid->gid_ext;
  1843. /* non-Emulated-ISM's peer gid_ext remains 0. */
  1844. ini->ism_dev[*matches] = smcd;
  1845. (*matches)++;
  1846. break;
  1847. }
  1848. }
  1849. }
  1850. static void smc_init_info_store_rc(u32 rc, struct smc_init_info *ini)
  1851. {
  1852. if (!ini->rc)
  1853. ini->rc = rc;
  1854. }
  1855. static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
  1856. struct smc_clc_msg_proposal *pclc,
  1857. struct smc_init_info *ini)
  1858. {
  1859. struct smc_clc_smcd_v2_extension *smcd_v2_ext;
  1860. struct smc_clc_v2_extension *smc_v2_ext;
  1861. struct smc_clc_msg_smcd *pclc_smcd;
  1862. unsigned int matches = 0;
  1863. struct smcd_gid smcd_gid;
  1864. u8 smcd_version;
  1865. u8 *eid = NULL;
  1866. int i, rc;
  1867. u16 chid;
  1868. if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2))
  1869. goto not_found;
  1870. pclc_smcd = smc_get_clc_msg_smcd(pclc);
  1871. smc_v2_ext = smc_get_clc_v2_ext(pclc);
  1872. smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
  1873. if (!pclc_smcd || !smc_v2_ext || !smcd_v2_ext)
  1874. goto not_found;
  1875. mutex_lock(&smcd_dev_list.mutex);
  1876. if (pclc_smcd->ism.chid) {
  1877. /* check for ISM device matching proposed native ISM device */
  1878. smcd_gid.gid = ntohll(pclc_smcd->ism.gid);
  1879. smcd_gid.gid_ext = 0;
  1880. smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid),
  1881. &smcd_gid, &matches);
  1882. }
  1883. for (i = 0; i < smc_v2_ext->hdr.ism_gid_cnt; i++) {
  1884. /* check for ISM devices matching proposed non-native ISM
  1885. * devices
  1886. */
  1887. smcd_gid.gid = ntohll(smcd_v2_ext->gidchid[i].gid);
  1888. smcd_gid.gid_ext = 0;
  1889. chid = ntohs(smcd_v2_ext->gidchid[i].chid);
  1890. if (__smc_ism_is_emulated(chid)) {
  1891. if ((i + 1) == smc_v2_ext->hdr.ism_gid_cnt ||
  1892. chid != ntohs(smcd_v2_ext->gidchid[i + 1].chid))
  1893. /* each Emulated-ISM device takes two GID-CHID
  1894. * entries and CHID of the second entry repeats
  1895. * that of the first entry.
  1896. *
  1897. * So check if the next GID-CHID entry exists
  1898. * and both two entries' CHIDs are the same.
  1899. */
  1900. continue;
  1901. smcd_gid.gid_ext =
  1902. ntohll(smcd_v2_ext->gidchid[++i].gid);
  1903. }
  1904. smc_check_ism_v2_match(ini, chid, &smcd_gid, &matches);
  1905. }
  1906. mutex_unlock(&smcd_dev_list.mutex);
  1907. if (!ini->ism_dev[0]) {
  1908. smc_init_info_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini);
  1909. goto not_found;
  1910. }
  1911. smc_ism_get_system_eid(&eid);
  1912. if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext,
  1913. smcd_v2_ext->system_eid, eid))
  1914. goto not_found;
  1915. /* separate - outside the smcd_dev_list.lock */
  1916. smcd_version = ini->smcd_version;
  1917. for (i = 0; i < matches; i++) {
  1918. ini->smcd_version = SMC_V2;
  1919. ini->is_smcd = true;
  1920. ini->ism_selected = i;
  1921. rc = smc_listen_ism_init(new_smc, ini);
  1922. if (rc) {
  1923. smc_init_info_store_rc(rc, ini);
  1924. /* try next active ISM device */
  1925. continue;
  1926. }
  1927. return; /* matching and usable V2 ISM device found */
  1928. }
  1929. /* no V2 ISM device could be initialized */
  1930. ini->smcd_version = smcd_version; /* restore original value */
  1931. ini->negotiated_eid[0] = 0;
  1932. not_found:
  1933. ini->smcd_version &= ~SMC_V2;
  1934. ini->ism_dev[0] = NULL;
  1935. ini->is_smcd = false;
  1936. }
  1937. static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
  1938. struct smc_clc_msg_proposal *pclc,
  1939. struct smc_init_info *ini)
  1940. {
  1941. struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc);
  1942. int rc = 0;
  1943. /* check if ISM V1 is available */
  1944. if (!(ini->smcd_version & SMC_V1) ||
  1945. !smcd_indicated(ini->smc_type_v1) ||
  1946. !pclc_smcd)
  1947. goto not_found;
  1948. ini->is_smcd = true; /* prepare ISM check */
  1949. ini->ism_peer_gid[0].gid = ntohll(pclc_smcd->ism.gid);
  1950. ini->ism_peer_gid[0].gid_ext = 0;
  1951. rc = smc_find_ism_device(new_smc, ini);
  1952. if (rc)
  1953. goto not_found;
  1954. ini->ism_selected = 0;
  1955. rc = smc_listen_ism_init(new_smc, ini);
  1956. if (!rc)
  1957. return; /* V1 ISM device found */
  1958. not_found:
  1959. smc_init_info_store_rc(rc, ini);
  1960. ini->smcd_version &= ~SMC_V1;
  1961. ini->ism_dev[0] = NULL;
  1962. ini->is_smcd = false;
  1963. }
  1964. /* listen worker: register buffers */
  1965. static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
  1966. {
  1967. struct smc_connection *conn = &new_smc->conn;
  1968. if (!local_first) {
  1969. /* reg sendbufs if they were vzalloced */
  1970. if (conn->sndbuf_desc->is_vm) {
  1971. if (smcr_lgr_reg_sndbufs(conn->lnk,
  1972. conn->sndbuf_desc))
  1973. return SMC_CLC_DECL_ERR_REGBUF;
  1974. }
  1975. if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
  1976. return SMC_CLC_DECL_ERR_REGBUF;
  1977. }
  1978. return 0;
  1979. }
  1980. static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
  1981. struct smc_clc_msg_proposal *pclc,
  1982. struct smc_init_info *ini)
  1983. {
  1984. struct smc_clc_v2_extension *smc_v2_ext;
  1985. u8 smcr_version;
  1986. int rc;
  1987. if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2))
  1988. goto not_found;
  1989. smc_v2_ext = smc_get_clc_v2_ext(pclc);
  1990. if (!smc_v2_ext ||
  1991. !smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
  1992. goto not_found;
  1993. /* prepare RDMA check */
  1994. memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
  1995. memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE);
  1996. memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
  1997. ini->check_smcrv2 = true;
  1998. ini->smcrv2.clc_sk = new_smc->clcsock->sk;
  1999. ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
  2000. ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
  2001. rc = smc_find_rdma_device(new_smc, ini);
  2002. if (rc) {
  2003. smc_init_info_store_rc(rc, ini);
  2004. goto not_found;
  2005. }
  2006. if (!ini->smcrv2.uses_gateway)
  2007. memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN);
  2008. smcr_version = ini->smcr_version;
  2009. ini->smcr_version = SMC_V2;
  2010. rc = smc_listen_rdma_init(new_smc, ini);
  2011. if (!rc) {
  2012. rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
  2013. if (rc)
  2014. smc_conn_abort(new_smc, ini->first_contact_local);
  2015. }
  2016. if (!rc)
  2017. return;
  2018. ini->smcr_version = smcr_version;
  2019. smc_init_info_store_rc(rc, ini);
  2020. not_found:
  2021. ini->smcr_version &= ~SMC_V2;
  2022. ini->smcrv2.ib_dev_v2 = NULL;
  2023. ini->check_smcrv2 = false;
  2024. }
  2025. static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
  2026. struct smc_clc_msg_proposal *pclc,
  2027. struct smc_init_info *ini)
  2028. {
  2029. int rc;
  2030. if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1))
  2031. return SMC_CLC_DECL_NOSMCDEV;
  2032. /* prepare RDMA check */
  2033. memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
  2034. memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE);
  2035. memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
  2036. rc = smc_find_rdma_device(new_smc, ini);
  2037. if (rc) {
  2038. /* no RDMA device found */
  2039. return SMC_CLC_DECL_NOSMCDEV;
  2040. }
  2041. rc = smc_listen_rdma_init(new_smc, ini);
  2042. if (rc)
  2043. return rc;
  2044. return smc_listen_rdma_reg(new_smc, ini->first_contact_local);
  2045. }
  2046. /* determine the local device matching to proposal */
  2047. static int smc_listen_find_device(struct smc_sock *new_smc,
  2048. struct smc_clc_msg_proposal *pclc,
  2049. struct smc_init_info *ini)
  2050. {
  2051. int prfx_rc;
  2052. /* check for ISM device matching V2 proposed device */
  2053. smc_find_ism_v2_device_serv(new_smc, pclc, ini);
  2054. if (ini->ism_dev[0])
  2055. return 0;
  2056. /* check for matching IP prefix and subnet length (V1) */
  2057. prfx_rc = smc_listen_prfx_check(new_smc, pclc);
  2058. if (prfx_rc)
  2059. smc_init_info_store_rc(prfx_rc, ini);
  2060. /* get vlan id from IP device */
  2061. if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
  2062. return ini->rc ?: SMC_CLC_DECL_GETVLANERR;
  2063. /* check for ISM device matching V1 proposed device */
  2064. if (!prfx_rc)
  2065. smc_find_ism_v1_device_serv(new_smc, pclc, ini);
  2066. if (ini->ism_dev[0])
  2067. return 0;
  2068. if (!smcr_indicated(pclc->hdr.typev1) &&
  2069. !smcr_indicated(pclc->hdr.typev2))
  2070. /* skip RDMA and decline */
  2071. return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV;
  2072. /* check if RDMA V2 is available */
  2073. smc_find_rdma_v2_device_serv(new_smc, pclc, ini);
  2074. if (ini->smcrv2.ib_dev_v2)
  2075. return 0;
  2076. /* check if RDMA V1 is available */
  2077. if (!prfx_rc) {
  2078. int rc;
  2079. rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
  2080. smc_init_info_store_rc(rc, ini);
  2081. return (!rc) ? 0 : ini->rc;
  2082. }
  2083. return prfx_rc;
  2084. }
  2085. /* listen worker: finish RDMA setup */
  2086. static int smc_listen_rdma_finish(struct smc_sock *new_smc,
  2087. struct smc_clc_msg_accept_confirm *cclc,
  2088. bool local_first,
  2089. struct smc_init_info *ini)
  2090. {
  2091. struct smc_link *link = new_smc->conn.lnk;
  2092. int reason_code = 0;
  2093. if (local_first)
  2094. smc_link_save_peer_info(link, cclc, ini);
  2095. if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc))
  2096. return SMC_CLC_DECL_ERR_RTOK;
  2097. if (local_first) {
  2098. if (smc_ib_ready_link(link))
  2099. return SMC_CLC_DECL_ERR_RDYLNK;
  2100. /* QP confirmation over RoCE fabric */
  2101. smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
  2102. reason_code = smcr_serv_conf_first_link(new_smc);
  2103. smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
  2104. }
  2105. return reason_code;
  2106. }
  2107. /* setup for connection of server */
  2108. static void smc_listen_work(struct work_struct *work)
  2109. {
  2110. struct smc_sock *new_smc = container_of(work, struct smc_sock,
  2111. smc_listen_work);
  2112. struct socket *newclcsock = new_smc->clcsock;
  2113. struct smc_clc_msg_accept_confirm *cclc;
  2114. struct smc_clc_msg_proposal_area *buf;
  2115. struct smc_clc_msg_proposal *pclc;
  2116. struct smc_init_info *ini = NULL;
  2117. u8 proposal_version = SMC_V1;
  2118. u8 accept_version;
  2119. int rc = 0;
  2120. lock_sock(&new_smc->sk); /* release in smc_listen_out() */
  2121. if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
  2122. return smc_listen_out_err(new_smc);
  2123. if (new_smc->use_fallback) {
  2124. smc_listen_out_connected(new_smc);
  2125. return;
  2126. }
  2127. /* check if peer is smc capable */
  2128. if (!tcp_sk(newclcsock->sk)->syn_smc) {
  2129. rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
  2130. if (rc)
  2131. smc_listen_out_err(new_smc);
  2132. else
  2133. smc_listen_out_connected(new_smc);
  2134. return;
  2135. }
  2136. /* do inband token exchange -
  2137. * wait for and receive SMC Proposal CLC message
  2138. */
  2139. buf = kzalloc_obj(*buf);
  2140. if (!buf) {
  2141. rc = SMC_CLC_DECL_MEM;
  2142. goto out_decl;
  2143. }
  2144. pclc = (struct smc_clc_msg_proposal *)buf;
  2145. rc = smc_clc_wait_msg(new_smc, pclc, sizeof(*buf),
  2146. SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
  2147. if (rc)
  2148. goto out_decl;
  2149. if (pclc->hdr.version > SMC_V1)
  2150. proposal_version = SMC_V2;
  2151. /* IPSec connections opt out of SMC optimizations */
  2152. if (using_ipsec(new_smc)) {
  2153. rc = SMC_CLC_DECL_IPSEC;
  2154. goto out_decl;
  2155. }
  2156. ini = kzalloc_obj(*ini);
  2157. if (!ini) {
  2158. rc = SMC_CLC_DECL_MEM;
  2159. goto out_decl;
  2160. }
  2161. /* initial version checking */
  2162. rc = smc_listen_v2_check(new_smc, pclc, ini);
  2163. if (rc)
  2164. goto out_decl;
  2165. rc = smc_clc_srv_v2x_features_validate(new_smc, pclc, ini);
  2166. if (rc)
  2167. goto out_decl;
  2168. mutex_lock(&smc_server_lgr_pending);
  2169. smc_rx_init(new_smc);
  2170. smc_tx_init(new_smc);
  2171. /* determine ISM or RoCE device used for connection */
  2172. rc = smc_listen_find_device(new_smc, pclc, ini);
  2173. if (rc)
  2174. goto out_unlock;
  2175. /* send SMC Accept CLC message */
  2176. accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
  2177. rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
  2178. accept_version, ini->negotiated_eid, ini);
  2179. if (rc)
  2180. goto out_unlock;
  2181. /* SMC-D does not need this lock any more */
  2182. if (ini->is_smcd)
  2183. mutex_unlock(&smc_server_lgr_pending);
  2184. /* receive SMC Confirm CLC message */
  2185. memset(buf, 0, sizeof(*buf));
  2186. cclc = (struct smc_clc_msg_accept_confirm *)buf;
  2187. rc = smc_clc_wait_msg(new_smc, cclc, sizeof(*buf),
  2188. SMC_CLC_CONFIRM, CLC_WAIT_TIME);
  2189. if (rc) {
  2190. if (!ini->is_smcd)
  2191. goto out_unlock;
  2192. goto out_decl;
  2193. }
  2194. rc = smc_clc_v2x_features_confirm_check(cclc, ini);
  2195. if (rc) {
  2196. if (!ini->is_smcd)
  2197. goto out_unlock;
  2198. goto out_decl;
  2199. }
  2200. /* fce smc release version is needed in smc_listen_rdma_finish,
  2201. * so save fce info here.
  2202. */
  2203. smc_conn_save_peer_info_fce(new_smc, cclc);
  2204. /* finish worker */
  2205. if (!ini->is_smcd) {
  2206. rc = smc_listen_rdma_finish(new_smc, cclc,
  2207. ini->first_contact_local, ini);
  2208. if (rc)
  2209. goto out_unlock;
  2210. mutex_unlock(&smc_server_lgr_pending);
  2211. }
  2212. smc_conn_save_peer_info(new_smc, cclc);
  2213. if (ini->is_smcd &&
  2214. smc_ism_support_dmb_nocopy(new_smc->conn.lgr->smcd)) {
  2215. rc = smcd_buf_attach(new_smc);
  2216. if (rc)
  2217. goto out_decl;
  2218. }
  2219. SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
  2220. /* smc_listen_out() will release smcsk */
  2221. smc_listen_out_connected(new_smc);
  2222. goto out_free;
  2223. out_unlock:
  2224. mutex_unlock(&smc_server_lgr_pending);
  2225. out_decl:
  2226. smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
  2227. proposal_version);
  2228. out_free:
  2229. kfree(ini);
  2230. kfree(buf);
  2231. }
  2232. static void smc_tcp_listen_work(struct work_struct *work)
  2233. {
  2234. struct smc_sock *lsmc = container_of(work, struct smc_sock,
  2235. tcp_listen_work);
  2236. struct sock *lsk = &lsmc->sk;
  2237. struct smc_sock *new_smc;
  2238. int rc = 0;
  2239. lock_sock(lsk);
  2240. while (lsk->sk_state == SMC_LISTEN) {
  2241. rc = smc_clcsock_accept(lsmc, &new_smc);
  2242. if (rc) /* clcsock accept queue empty or error */
  2243. goto out;
  2244. if (!new_smc)
  2245. continue;
  2246. if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
  2247. atomic_inc(&lsmc->queued_smc_hs);
  2248. new_smc->listen_smc = lsmc;
  2249. new_smc->use_fallback = lsmc->use_fallback;
  2250. new_smc->fallback_rsn = lsmc->fallback_rsn;
  2251. sock_hold(lsk); /* sock_put in smc_listen_work */
  2252. INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
  2253. smc_copy_sock_settings_to_smc(new_smc);
  2254. sock_hold(&new_smc->sk); /* sock_put in passive closing */
  2255. if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
  2256. sock_put(&new_smc->sk);
  2257. }
  2258. out:
  2259. release_sock(lsk);
  2260. sock_put(&lsmc->sk); /* sock_hold in smc_clcsock_data_ready() */
  2261. }
  2262. static void smc_clcsock_data_ready(struct sock *listen_clcsock)
  2263. {
  2264. struct smc_sock *lsmc;
  2265. read_lock_bh(&listen_clcsock->sk_callback_lock);
  2266. lsmc = smc_clcsock_user_data(listen_clcsock);
  2267. if (!lsmc)
  2268. goto out;
  2269. lsmc->clcsk_data_ready(listen_clcsock);
  2270. if (lsmc->sk.sk_state == SMC_LISTEN) {
  2271. sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
  2272. if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work))
  2273. sock_put(&lsmc->sk);
  2274. }
  2275. out:
  2276. read_unlock_bh(&listen_clcsock->sk_callback_lock);
  2277. }
  2278. int smc_listen(struct socket *sock, int backlog)
  2279. {
  2280. struct sock *sk = sock->sk;
  2281. struct smc_sock *smc;
  2282. int rc;
  2283. smc = smc_sk(sk);
  2284. lock_sock(sk);
  2285. rc = -EINVAL;
  2286. if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
  2287. smc->connect_nonblock || sock->state != SS_UNCONNECTED)
  2288. goto out;
  2289. rc = 0;
  2290. if (sk->sk_state == SMC_LISTEN) {
  2291. sk->sk_max_ack_backlog = backlog;
  2292. goto out;
  2293. }
  2294. /* some socket options are handled in core, so we could not apply
  2295. * them to the clc socket -- copy smc socket options to clc socket
  2296. */
  2297. smc_copy_sock_settings_to_clc(smc);
  2298. if (!smc->use_fallback)
  2299. tcp_sk(smc->clcsock->sk)->syn_smc = 1;
  2300. /* save original sk_data_ready function and establish
  2301. * smc-specific sk_data_ready function
  2302. */
  2303. write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
  2304. __rcu_assign_sk_user_data_with_flags(smc->clcsock->sk, smc,
  2305. SK_USER_DATA_NOCOPY);
  2306. smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready,
  2307. smc_clcsock_data_ready, &smc->clcsk_data_ready);
  2308. write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
  2309. /* save original ops */
  2310. smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops;
  2311. smc->af_ops = *smc->ori_af_ops;
  2312. smc->af_ops.syn_recv_sock = smc_tcp_syn_recv_sock;
  2313. inet_csk(smc->clcsock->sk)->icsk_af_ops = &smc->af_ops;
  2314. if (smc->limit_smc_hs)
  2315. tcp_sk(smc->clcsock->sk)->smc_hs_congested = smc_hs_congested;
  2316. rc = kernel_listen(smc->clcsock, backlog);
  2317. if (rc) {
  2318. write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
  2319. smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
  2320. &smc->clcsk_data_ready);
  2321. rcu_assign_sk_user_data(smc->clcsock->sk, NULL);
  2322. write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
  2323. goto out;
  2324. }
  2325. sock_set_flag(sk, SOCK_RCU_FREE);
  2326. sk->sk_max_ack_backlog = backlog;
  2327. sk->sk_ack_backlog = 0;
  2328. sk->sk_state = SMC_LISTEN;
  2329. out:
  2330. release_sock(sk);
  2331. return rc;
  2332. }
  2333. int smc_accept(struct socket *sock, struct socket *new_sock,
  2334. struct proto_accept_arg *arg)
  2335. {
  2336. struct sock *sk = sock->sk, *nsk;
  2337. DECLARE_WAITQUEUE(wait, current);
  2338. struct smc_sock *lsmc;
  2339. long timeo;
  2340. int rc = 0;
  2341. lsmc = smc_sk(sk);
  2342. sock_hold(sk); /* sock_put below */
  2343. lock_sock(sk);
  2344. if (lsmc->sk.sk_state != SMC_LISTEN) {
  2345. rc = -EINVAL;
  2346. release_sock(sk);
  2347. goto out;
  2348. }
  2349. /* Wait for an incoming connection */
  2350. timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
  2351. add_wait_queue_exclusive(sk_sleep(sk), &wait);
  2352. while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
  2353. set_current_state(TASK_INTERRUPTIBLE);
  2354. if (!timeo) {
  2355. rc = -EAGAIN;
  2356. break;
  2357. }
  2358. release_sock(sk);
  2359. timeo = schedule_timeout(timeo);
  2360. /* wakeup by sk_data_ready in smc_listen_work() */
  2361. sched_annotate_sleep();
  2362. lock_sock(sk);
  2363. if (signal_pending(current)) {
  2364. rc = sock_intr_errno(timeo);
  2365. break;
  2366. }
  2367. }
  2368. set_current_state(TASK_RUNNING);
  2369. remove_wait_queue(sk_sleep(sk), &wait);
  2370. if (!rc)
  2371. rc = sock_error(nsk);
  2372. release_sock(sk);
  2373. if (rc)
  2374. goto out;
  2375. if (lsmc->sockopt_defer_accept && !(arg->flags & O_NONBLOCK)) {
  2376. /* wait till data arrives on the socket */
  2377. timeo = secs_to_jiffies(lsmc->sockopt_defer_accept);
  2378. if (smc_sk(nsk)->use_fallback) {
  2379. struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
  2380. lock_sock(clcsk);
  2381. if (skb_queue_empty(&clcsk->sk_receive_queue))
  2382. sk_wait_data(clcsk, &timeo, NULL);
  2383. release_sock(clcsk);
  2384. } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
  2385. lock_sock(nsk);
  2386. smc_rx_wait(smc_sk(nsk), &timeo, 0, smc_rx_data_available);
  2387. release_sock(nsk);
  2388. }
  2389. }
  2390. out:
  2391. sock_put(sk); /* sock_hold above */
  2392. return rc;
  2393. }
  2394. int smc_getname(struct socket *sock, struct sockaddr *addr,
  2395. int peer)
  2396. {
  2397. struct smc_sock *smc;
  2398. if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
  2399. (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
  2400. return -ENOTCONN;
  2401. smc = smc_sk(sock->sk);
  2402. return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
  2403. }
  2404. int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
  2405. {
  2406. struct sock *sk = sock->sk;
  2407. struct smc_sock *smc;
  2408. int rc;
  2409. smc = smc_sk(sk);
  2410. lock_sock(sk);
  2411. /* SMC does not support connect with fastopen */
  2412. if (msg->msg_flags & MSG_FASTOPEN) {
  2413. /* not connected yet, fallback */
  2414. if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
  2415. rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
  2416. if (rc)
  2417. goto out;
  2418. } else {
  2419. rc = -EINVAL;
  2420. goto out;
  2421. }
  2422. } else if ((sk->sk_state != SMC_ACTIVE) &&
  2423. (sk->sk_state != SMC_APPCLOSEWAIT1) &&
  2424. (sk->sk_state != SMC_INIT)) {
  2425. rc = -EPIPE;
  2426. goto out;
  2427. }
  2428. if (smc->use_fallback) {
  2429. rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
  2430. } else {
  2431. rc = smc_tx_sendmsg(smc, msg, len);
  2432. SMC_STAT_TX_PAYLOAD(smc, len, rc);
  2433. }
  2434. out:
  2435. release_sock(sk);
  2436. return rc;
  2437. }
  2438. int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
  2439. int flags)
  2440. {
  2441. struct sock *sk = sock->sk;
  2442. struct smc_sock *smc;
  2443. int rc = -ENOTCONN;
  2444. smc = smc_sk(sk);
  2445. lock_sock(sk);
  2446. if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
  2447. /* socket was connected before, no more data to read */
  2448. rc = 0;
  2449. goto out;
  2450. }
  2451. if ((sk->sk_state == SMC_INIT) ||
  2452. (sk->sk_state == SMC_LISTEN) ||
  2453. (sk->sk_state == SMC_CLOSED))
  2454. goto out;
  2455. if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
  2456. rc = 0;
  2457. goto out;
  2458. }
  2459. if (smc->use_fallback) {
  2460. rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
  2461. } else {
  2462. msg->msg_namelen = 0;
  2463. rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
  2464. SMC_STAT_RX_PAYLOAD(smc, rc, rc);
  2465. }
  2466. out:
  2467. release_sock(sk);
  2468. return rc;
  2469. }
  2470. static __poll_t smc_accept_poll(struct sock *parent)
  2471. {
  2472. struct smc_sock *isk = smc_sk(parent);
  2473. __poll_t mask = 0;
  2474. spin_lock(&isk->accept_q_lock);
  2475. if (!list_empty(&isk->accept_q))
  2476. mask = EPOLLIN | EPOLLRDNORM;
  2477. spin_unlock(&isk->accept_q_lock);
  2478. return mask;
  2479. }
  2480. __poll_t smc_poll(struct file *file, struct socket *sock,
  2481. poll_table *wait)
  2482. {
  2483. struct sock *sk = sock->sk;
  2484. struct smc_sock *smc;
  2485. __poll_t mask = 0;
  2486. if (!sk)
  2487. return EPOLLNVAL;
  2488. smc = smc_sk(sock->sk);
  2489. if (smc->use_fallback) {
  2490. /* delegate to CLC child sock */
  2491. mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
  2492. sk->sk_err = smc->clcsock->sk->sk_err;
  2493. } else {
  2494. if (sk->sk_state != SMC_CLOSED)
  2495. sock_poll_wait(file, sock, wait);
  2496. if (sk->sk_err)
  2497. mask |= EPOLLERR;
  2498. if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
  2499. (sk->sk_state == SMC_CLOSED))
  2500. mask |= EPOLLHUP;
  2501. if (sk->sk_state == SMC_LISTEN) {
  2502. /* woken up by sk_data_ready in smc_listen_work() */
  2503. mask |= smc_accept_poll(sk);
  2504. } else if (smc->use_fallback) { /* as result of connect_work()*/
  2505. mask |= smc->clcsock->ops->poll(file, smc->clcsock,
  2506. wait);
  2507. sk->sk_err = smc->clcsock->sk->sk_err;
  2508. } else {
  2509. if ((sk->sk_state != SMC_INIT &&
  2510. atomic_read(&smc->conn.sndbuf_space)) ||
  2511. sk->sk_shutdown & SEND_SHUTDOWN) {
  2512. mask |= EPOLLOUT | EPOLLWRNORM;
  2513. } else {
  2514. sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
  2515. set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  2516. if (sk->sk_state != SMC_INIT) {
  2517. /* Race breaker the same way as tcp_poll(). */
  2518. smp_mb__after_atomic();
  2519. if (atomic_read(&smc->conn.sndbuf_space))
  2520. mask |= EPOLLOUT | EPOLLWRNORM;
  2521. }
  2522. }
  2523. if (atomic_read(&smc->conn.bytes_to_rcv))
  2524. mask |= EPOLLIN | EPOLLRDNORM;
  2525. if (sk->sk_shutdown & RCV_SHUTDOWN)
  2526. mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
  2527. if (sk->sk_state == SMC_APPCLOSEWAIT1)
  2528. mask |= EPOLLIN;
  2529. if (smc->conn.urg_state == SMC_URG_VALID)
  2530. mask |= EPOLLPRI;
  2531. }
  2532. }
  2533. return mask;
  2534. }
  2535. int smc_shutdown(struct socket *sock, int how)
  2536. {
  2537. struct sock *sk = sock->sk;
  2538. bool do_shutdown = true;
  2539. struct smc_sock *smc;
  2540. int rc = -EINVAL;
  2541. int old_state;
  2542. int rc1 = 0;
  2543. smc = smc_sk(sk);
  2544. if ((how < SHUT_RD) || (how > SHUT_RDWR))
  2545. return rc;
  2546. lock_sock(sk);
  2547. if (sock->state == SS_CONNECTING) {
  2548. if (sk->sk_state == SMC_ACTIVE)
  2549. sock->state = SS_CONNECTED;
  2550. else if (sk->sk_state == SMC_PEERCLOSEWAIT1 ||
  2551. sk->sk_state == SMC_PEERCLOSEWAIT2 ||
  2552. sk->sk_state == SMC_APPCLOSEWAIT1 ||
  2553. sk->sk_state == SMC_APPCLOSEWAIT2 ||
  2554. sk->sk_state == SMC_APPFINCLOSEWAIT)
  2555. sock->state = SS_DISCONNECTING;
  2556. }
  2557. rc = -ENOTCONN;
  2558. if ((sk->sk_state != SMC_ACTIVE) &&
  2559. (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
  2560. (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
  2561. (sk->sk_state != SMC_APPCLOSEWAIT1) &&
  2562. (sk->sk_state != SMC_APPCLOSEWAIT2) &&
  2563. (sk->sk_state != SMC_APPFINCLOSEWAIT))
  2564. goto out;
  2565. if (smc->use_fallback) {
  2566. rc = kernel_sock_shutdown(smc->clcsock, how);
  2567. sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
  2568. if (sk->sk_shutdown == SHUTDOWN_MASK) {
  2569. sk->sk_state = SMC_CLOSED;
  2570. sk->sk_socket->state = SS_UNCONNECTED;
  2571. sock_put(sk);
  2572. }
  2573. goto out;
  2574. }
  2575. switch (how) {
  2576. case SHUT_RDWR: /* shutdown in both directions */
  2577. old_state = sk->sk_state;
  2578. rc = smc_close_active(smc);
  2579. if (old_state == SMC_ACTIVE &&
  2580. sk->sk_state == SMC_PEERCLOSEWAIT1)
  2581. do_shutdown = false;
  2582. break;
  2583. case SHUT_WR:
  2584. rc = smc_close_shutdown_write(smc);
  2585. break;
  2586. case SHUT_RD:
  2587. rc = 0;
  2588. /* nothing more to do because peer is not involved */
  2589. break;
  2590. }
  2591. if (do_shutdown && smc->clcsock)
  2592. rc1 = kernel_sock_shutdown(smc->clcsock, how);
  2593. /* map sock_shutdown_cmd constants to sk_shutdown value range */
  2594. sk->sk_shutdown |= how + 1;
  2595. if (sk->sk_state == SMC_CLOSED)
  2596. sock->state = SS_UNCONNECTED;
  2597. else
  2598. sock->state = SS_DISCONNECTING;
  2599. out:
  2600. release_sock(sk);
  2601. return rc ? rc : rc1;
  2602. }
  2603. static int __smc_getsockopt(struct socket *sock, int level, int optname,
  2604. char __user *optval, int __user *optlen)
  2605. {
  2606. struct smc_sock *smc;
  2607. int val, len;
  2608. smc = smc_sk(sock->sk);
  2609. if (get_user(len, optlen))
  2610. return -EFAULT;
  2611. len = min_t(int, len, sizeof(int));
  2612. if (len < 0)
  2613. return -EINVAL;
  2614. switch (optname) {
  2615. case SMC_LIMIT_HS:
  2616. val = smc->limit_smc_hs;
  2617. break;
  2618. default:
  2619. return -EOPNOTSUPP;
  2620. }
  2621. if (put_user(len, optlen))
  2622. return -EFAULT;
  2623. if (copy_to_user(optval, &val, len))
  2624. return -EFAULT;
  2625. return 0;
  2626. }
  2627. static int __smc_setsockopt(struct socket *sock, int level, int optname,
  2628. sockptr_t optval, unsigned int optlen)
  2629. {
  2630. struct sock *sk = sock->sk;
  2631. struct smc_sock *smc;
  2632. int val, rc;
  2633. smc = smc_sk(sk);
  2634. lock_sock(sk);
  2635. switch (optname) {
  2636. case SMC_LIMIT_HS:
  2637. if (optlen < sizeof(int)) {
  2638. rc = -EINVAL;
  2639. break;
  2640. }
  2641. if (copy_from_sockptr(&val, optval, sizeof(int))) {
  2642. rc = -EFAULT;
  2643. break;
  2644. }
  2645. smc->limit_smc_hs = !!val;
  2646. rc = 0;
  2647. break;
  2648. default:
  2649. rc = -EOPNOTSUPP;
  2650. break;
  2651. }
  2652. release_sock(sk);
  2653. return rc;
  2654. }
  2655. int smc_setsockopt(struct socket *sock, int level, int optname,
  2656. sockptr_t optval, unsigned int optlen)
  2657. {
  2658. struct sock *sk = sock->sk;
  2659. struct smc_sock *smc;
  2660. int val, rc;
  2661. if (level == SOL_TCP && optname == TCP_ULP)
  2662. return -EOPNOTSUPP;
  2663. else if (level == SOL_SMC)
  2664. return __smc_setsockopt(sock, level, optname, optval, optlen);
  2665. smc = smc_sk(sk);
  2666. /* generic setsockopts reaching us here always apply to the
  2667. * CLC socket
  2668. */
  2669. mutex_lock(&smc->clcsock_release_lock);
  2670. if (!smc->clcsock) {
  2671. mutex_unlock(&smc->clcsock_release_lock);
  2672. return -EBADF;
  2673. }
  2674. if (unlikely(!smc->clcsock->ops->setsockopt))
  2675. rc = -EOPNOTSUPP;
  2676. else
  2677. rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
  2678. optval, optlen);
  2679. if (smc->clcsock->sk->sk_err) {
  2680. sk->sk_err = smc->clcsock->sk->sk_err;
  2681. sk_error_report(sk);
  2682. }
  2683. mutex_unlock(&smc->clcsock_release_lock);
  2684. if (optlen < sizeof(int))
  2685. return -EINVAL;
  2686. if (copy_from_sockptr(&val, optval, sizeof(int)))
  2687. return -EFAULT;
  2688. lock_sock(sk);
  2689. if (rc || smc->use_fallback)
  2690. goto out;
  2691. switch (optname) {
  2692. case TCP_FASTOPEN:
  2693. case TCP_FASTOPEN_CONNECT:
  2694. case TCP_FASTOPEN_KEY:
  2695. case TCP_FASTOPEN_NO_COOKIE:
  2696. /* option not supported by SMC */
  2697. if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
  2698. rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
  2699. } else {
  2700. rc = -EINVAL;
  2701. }
  2702. break;
  2703. case TCP_NODELAY:
  2704. if (sk->sk_state != SMC_INIT &&
  2705. sk->sk_state != SMC_LISTEN &&
  2706. sk->sk_state != SMC_CLOSED) {
  2707. if (val) {
  2708. SMC_STAT_INC(smc, ndly_cnt);
  2709. smc_tx_pending(&smc->conn);
  2710. cancel_delayed_work(&smc->conn.tx_work);
  2711. }
  2712. }
  2713. break;
  2714. case TCP_CORK:
  2715. if (sk->sk_state != SMC_INIT &&
  2716. sk->sk_state != SMC_LISTEN &&
  2717. sk->sk_state != SMC_CLOSED) {
  2718. if (!val) {
  2719. SMC_STAT_INC(smc, cork_cnt);
  2720. smc_tx_pending(&smc->conn);
  2721. cancel_delayed_work(&smc->conn.tx_work);
  2722. }
  2723. }
  2724. break;
  2725. case TCP_DEFER_ACCEPT:
  2726. smc->sockopt_defer_accept = val;
  2727. break;
  2728. default:
  2729. break;
  2730. }
  2731. out:
  2732. release_sock(sk);
  2733. return rc;
  2734. }
  2735. int smc_getsockopt(struct socket *sock, int level, int optname,
  2736. char __user *optval, int __user *optlen)
  2737. {
  2738. struct smc_sock *smc;
  2739. int rc;
  2740. if (level == SOL_SMC)
  2741. return __smc_getsockopt(sock, level, optname, optval, optlen);
  2742. smc = smc_sk(sock->sk);
  2743. mutex_lock(&smc->clcsock_release_lock);
  2744. if (!smc->clcsock) {
  2745. mutex_unlock(&smc->clcsock_release_lock);
  2746. return -EBADF;
  2747. }
  2748. /* socket options apply to the CLC socket */
  2749. if (unlikely(!smc->clcsock->ops->getsockopt)) {
  2750. mutex_unlock(&smc->clcsock_release_lock);
  2751. return -EOPNOTSUPP;
  2752. }
  2753. rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
  2754. optval, optlen);
  2755. mutex_unlock(&smc->clcsock_release_lock);
  2756. return rc;
  2757. }
  2758. int smc_ioctl(struct socket *sock, unsigned int cmd,
  2759. unsigned long arg)
  2760. {
  2761. union smc_host_cursor cons, urg;
  2762. struct smc_connection *conn;
  2763. struct smc_sock *smc;
  2764. int answ;
  2765. smc = smc_sk(sock->sk);
  2766. conn = &smc->conn;
  2767. lock_sock(&smc->sk);
  2768. if (smc->use_fallback) {
  2769. if (!smc->clcsock) {
  2770. release_sock(&smc->sk);
  2771. return -EBADF;
  2772. }
  2773. answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
  2774. release_sock(&smc->sk);
  2775. return answ;
  2776. }
  2777. switch (cmd) {
  2778. case SIOCINQ: /* same as FIONREAD */
  2779. if (smc->sk.sk_state == SMC_LISTEN) {
  2780. release_sock(&smc->sk);
  2781. return -EINVAL;
  2782. }
  2783. if (smc->sk.sk_state == SMC_INIT ||
  2784. smc->sk.sk_state == SMC_CLOSED)
  2785. answ = 0;
  2786. else
  2787. answ = atomic_read(&smc->conn.bytes_to_rcv);
  2788. break;
  2789. case SIOCOUTQ:
  2790. /* output queue size (not send + not acked) */
  2791. if (smc->sk.sk_state == SMC_LISTEN) {
  2792. release_sock(&smc->sk);
  2793. return -EINVAL;
  2794. }
  2795. if (smc->sk.sk_state == SMC_INIT ||
  2796. smc->sk.sk_state == SMC_CLOSED)
  2797. answ = 0;
  2798. else
  2799. answ = smc->conn.sndbuf_desc->len -
  2800. atomic_read(&smc->conn.sndbuf_space);
  2801. break;
  2802. case SIOCOUTQNSD:
  2803. /* output queue size (not send only) */
  2804. if (smc->sk.sk_state == SMC_LISTEN) {
  2805. release_sock(&smc->sk);
  2806. return -EINVAL;
  2807. }
  2808. if (smc->sk.sk_state == SMC_INIT ||
  2809. smc->sk.sk_state == SMC_CLOSED)
  2810. answ = 0;
  2811. else
  2812. answ = smc_tx_prepared_sends(&smc->conn);
  2813. break;
  2814. case SIOCATMARK:
  2815. if (smc->sk.sk_state == SMC_LISTEN) {
  2816. release_sock(&smc->sk);
  2817. return -EINVAL;
  2818. }
  2819. if (smc->sk.sk_state == SMC_INIT ||
  2820. smc->sk.sk_state == SMC_CLOSED) {
  2821. answ = 0;
  2822. } else {
  2823. smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
  2824. smc_curs_copy(&urg, &conn->urg_curs, conn);
  2825. answ = smc_curs_diff(conn->rmb_desc->len,
  2826. &cons, &urg) == 1;
  2827. }
  2828. break;
  2829. default:
  2830. release_sock(&smc->sk);
  2831. return -ENOIOCTLCMD;
  2832. }
  2833. release_sock(&smc->sk);
  2834. return put_user(answ, (int __user *)arg);
  2835. }
  2836. /* Map the affected portions of the rmbe into an spd, note the number of bytes
  2837. * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
  2838. * updates till whenever a respective page has been fully processed.
  2839. * Note that subsequent recv() calls have to wait till all splice() processing
  2840. * completed.
  2841. */
  2842. ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
  2843. struct pipe_inode_info *pipe, size_t len,
  2844. unsigned int flags)
  2845. {
  2846. struct sock *sk = sock->sk;
  2847. struct smc_sock *smc;
  2848. int rc = -ENOTCONN;
  2849. smc = smc_sk(sk);
  2850. lock_sock(sk);
  2851. if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
  2852. /* socket was connected before, no more data to read */
  2853. rc = 0;
  2854. goto out;
  2855. }
  2856. if (sk->sk_state == SMC_INIT ||
  2857. sk->sk_state == SMC_LISTEN ||
  2858. sk->sk_state == SMC_CLOSED)
  2859. goto out;
  2860. if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
  2861. rc = 0;
  2862. goto out;
  2863. }
  2864. if (smc->use_fallback) {
  2865. rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
  2866. pipe, len, flags);
  2867. } else {
  2868. if (*ppos) {
  2869. rc = -ESPIPE;
  2870. goto out;
  2871. }
  2872. if (flags & SPLICE_F_NONBLOCK)
  2873. flags = MSG_DONTWAIT;
  2874. else
  2875. flags = 0;
  2876. SMC_STAT_INC(smc, splice_cnt);
  2877. rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
  2878. }
  2879. out:
  2880. release_sock(sk);
  2881. return rc;
  2882. }
  2883. /* must look like tcp */
  2884. static const struct proto_ops smc_sock_ops = {
  2885. .family = PF_SMC,
  2886. .owner = THIS_MODULE,
  2887. .release = smc_release,
  2888. .bind = smc_bind,
  2889. .connect = smc_connect,
  2890. .socketpair = sock_no_socketpair,
  2891. .accept = smc_accept,
  2892. .getname = smc_getname,
  2893. .poll = smc_poll,
  2894. .ioctl = smc_ioctl,
  2895. .listen = smc_listen,
  2896. .shutdown = smc_shutdown,
  2897. .setsockopt = smc_setsockopt,
  2898. .getsockopt = smc_getsockopt,
  2899. .sendmsg = smc_sendmsg,
  2900. .recvmsg = smc_recvmsg,
  2901. .mmap = sock_no_mmap,
  2902. .splice_read = smc_splice_read,
  2903. };
  2904. int smc_create_clcsk(struct net *net, struct sock *sk, int family)
  2905. {
  2906. struct smc_sock *smc = smc_sk(sk);
  2907. int rc;
  2908. rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
  2909. &smc->clcsock);
  2910. if (rc)
  2911. return rc;
  2912. /* smc_clcsock_release() does not wait smc->clcsock->sk's
  2913. * destruction; its sk_state might not be TCP_CLOSE after
  2914. * smc->sk is close()d, and TCP timers can be fired later,
  2915. * which need net ref.
  2916. */
  2917. sk = smc->clcsock->sk;
  2918. sk_net_refcnt_upgrade(sk);
  2919. return 0;
  2920. }
  2921. static int smc_create(struct net *net, struct socket *sock, int protocol,
  2922. int kern)
  2923. {
  2924. int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
  2925. struct sock *sk;
  2926. int rc;
  2927. rc = -ESOCKTNOSUPPORT;
  2928. if (sock->type != SOCK_STREAM)
  2929. goto out;
  2930. rc = -EPROTONOSUPPORT;
  2931. if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
  2932. goto out;
  2933. rc = -ENOBUFS;
  2934. sock->ops = &smc_sock_ops;
  2935. sock->state = SS_UNCONNECTED;
  2936. sk = smc_sock_alloc(net, sock, protocol);
  2937. if (!sk)
  2938. goto out;
  2939. rc = smc_create_clcsk(net, sk, family);
  2940. if (rc) {
  2941. sk_common_release(sk);
  2942. sock->sk = NULL;
  2943. }
  2944. out:
  2945. return rc;
  2946. }
  2947. static const struct net_proto_family smc_sock_family_ops = {
  2948. .family = PF_SMC,
  2949. .owner = THIS_MODULE,
  2950. .create = smc_create,
  2951. };
  2952. unsigned int smc_net_id;
  2953. static __net_init int smc_net_init(struct net *net)
  2954. {
  2955. int rc;
  2956. rc = smc_sysctl_net_init(net);
  2957. if (rc)
  2958. return rc;
  2959. return smc_pnet_net_init(net);
  2960. }
  2961. static void __net_exit smc_net_exit(struct net *net)
  2962. {
  2963. smc_sysctl_net_exit(net);
  2964. smc_pnet_net_exit(net);
  2965. }
  2966. static __net_init int smc_net_stat_init(struct net *net)
  2967. {
  2968. return smc_stats_init(net);
  2969. }
  2970. static void __net_exit smc_net_stat_exit(struct net *net)
  2971. {
  2972. smc_stats_exit(net);
  2973. }
  2974. static struct pernet_operations smc_net_ops = {
  2975. .init = smc_net_init,
  2976. .exit = smc_net_exit,
  2977. .id = &smc_net_id,
  2978. .size = sizeof(struct smc_net),
  2979. };
  2980. static struct pernet_operations smc_net_stat_ops = {
  2981. .init = smc_net_stat_init,
  2982. .exit = smc_net_stat_exit,
  2983. };
  2984. static int __init smc_init(void)
  2985. {
  2986. int rc;
  2987. rc = register_pernet_subsys(&smc_net_ops);
  2988. if (rc)
  2989. return rc;
  2990. rc = register_pernet_subsys(&smc_net_stat_ops);
  2991. if (rc)
  2992. goto out_pernet_subsys;
  2993. rc = smc_ism_init();
  2994. if (rc)
  2995. goto out_pernet_subsys_stat;
  2996. smc_clc_init();
  2997. rc = smc_nl_init();
  2998. if (rc)
  2999. goto out_ism;
  3000. rc = smc_pnet_init();
  3001. if (rc)
  3002. goto out_nl;
  3003. rc = -ENOMEM;
  3004. smc_tcp_ls_wq = alloc_workqueue("smc_tcp_ls_wq", WQ_PERCPU, 0);
  3005. if (!smc_tcp_ls_wq)
  3006. goto out_pnet;
  3007. smc_hs_wq = alloc_workqueue("smc_hs_wq", WQ_PERCPU, 0);
  3008. if (!smc_hs_wq)
  3009. goto out_alloc_tcp_ls_wq;
  3010. smc_close_wq = alloc_workqueue("smc_close_wq", WQ_PERCPU, 0);
  3011. if (!smc_close_wq)
  3012. goto out_alloc_hs_wq;
  3013. rc = smc_core_init();
  3014. if (rc) {
  3015. pr_err("%s: smc_core_init fails with %d\n", __func__, rc);
  3016. goto out_alloc_wqs;
  3017. }
  3018. rc = smc_llc_init();
  3019. if (rc) {
  3020. pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
  3021. goto out_core;
  3022. }
  3023. rc = smc_cdc_init();
  3024. if (rc) {
  3025. pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
  3026. goto out_core;
  3027. }
  3028. rc = proto_register(&smc_proto, 1);
  3029. if (rc) {
  3030. pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
  3031. goto out_core;
  3032. }
  3033. rc = proto_register(&smc_proto6, 1);
  3034. if (rc) {
  3035. pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
  3036. goto out_proto;
  3037. }
  3038. rc = sock_register(&smc_sock_family_ops);
  3039. if (rc) {
  3040. pr_err("%s: sock_register fails with %d\n", __func__, rc);
  3041. goto out_proto6;
  3042. }
  3043. INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
  3044. INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
  3045. rc = smc_ib_register_client();
  3046. if (rc) {
  3047. pr_err("%s: ib_register fails with %d\n", __func__, rc);
  3048. goto out_sock;
  3049. }
  3050. rc = smc_inet_init();
  3051. if (rc) {
  3052. pr_err("%s: smc_inet_init fails with %d\n", __func__, rc);
  3053. goto out_ib;
  3054. }
  3055. rc = bpf_smc_hs_ctrl_init();
  3056. if (rc) {
  3057. pr_err("%s: bpf_smc_hs_ctrl_init fails with %d\n", __func__,
  3058. rc);
  3059. goto out_inet;
  3060. }
  3061. static_branch_enable(&tcp_have_smc);
  3062. return 0;
  3063. out_inet:
  3064. smc_inet_exit();
  3065. out_ib:
  3066. smc_ib_unregister_client();
  3067. out_sock:
  3068. sock_unregister(PF_SMC);
  3069. out_proto6:
  3070. proto_unregister(&smc_proto6);
  3071. out_proto:
  3072. proto_unregister(&smc_proto);
  3073. out_core:
  3074. smc_core_exit();
  3075. out_alloc_wqs:
  3076. destroy_workqueue(smc_close_wq);
  3077. out_alloc_hs_wq:
  3078. destroy_workqueue(smc_hs_wq);
  3079. out_alloc_tcp_ls_wq:
  3080. destroy_workqueue(smc_tcp_ls_wq);
  3081. out_pnet:
  3082. smc_pnet_exit();
  3083. out_nl:
  3084. smc_nl_exit();
  3085. out_ism:
  3086. smc_clc_exit();
  3087. smc_ism_exit();
  3088. out_pernet_subsys_stat:
  3089. unregister_pernet_subsys(&smc_net_stat_ops);
  3090. out_pernet_subsys:
  3091. unregister_pernet_subsys(&smc_net_ops);
  3092. return rc;
  3093. }
  3094. static void __exit smc_exit(void)
  3095. {
  3096. static_branch_disable(&tcp_have_smc);
  3097. smc_inet_exit();
  3098. sock_unregister(PF_SMC);
  3099. smc_core_exit();
  3100. smc_ib_unregister_client();
  3101. smc_ism_exit();
  3102. destroy_workqueue(smc_close_wq);
  3103. destroy_workqueue(smc_tcp_ls_wq);
  3104. destroy_workqueue(smc_hs_wq);
  3105. proto_unregister(&smc_proto6);
  3106. proto_unregister(&smc_proto);
  3107. smc_pnet_exit();
  3108. smc_nl_exit();
  3109. smc_clc_exit();
  3110. unregister_pernet_subsys(&smc_net_stat_ops);
  3111. unregister_pernet_subsys(&smc_net_ops);
  3112. rcu_barrier();
  3113. }
  3114. module_init(smc_init);
  3115. module_exit(smc_exit);
  3116. MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
  3117. MODULE_DESCRIPTION("smc socket address family");
  3118. MODULE_LICENSE("GPL");
  3119. MODULE_ALIAS_NETPROTO(PF_SMC);
  3120. /* 256 for IPPROTO_SMC and 1 for SOCK_STREAM */
  3121. MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 256, 1);
  3122. #if IS_ENABLED(CONFIG_IPV6)
  3123. MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 256, 1);
  3124. #endif /* CONFIG_IPV6 */
  3125. MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);