smc_core.c 73 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Shared Memory Communications over RDMA (SMC-R) and RoCE
  4. *
  5. * Basic Transport Functions exploiting Infiniband API
  6. *
  7. * Copyright IBM Corp. 2016
  8. *
  9. * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
  10. */
  11. #include <linux/socket.h>
  12. #include <linux/if_vlan.h>
  13. #include <linux/random.h>
  14. #include <linux/workqueue.h>
  15. #include <linux/wait.h>
  16. #include <linux/reboot.h>
  17. #include <linux/mutex.h>
  18. #include <linux/list.h>
  19. #include <linux/smc.h>
  20. #include <net/tcp.h>
  21. #include <net/sock.h>
  22. #include <rdma/ib_verbs.h>
  23. #include <rdma/ib_cache.h>
  24. #include "smc.h"
  25. #include "smc_clc.h"
  26. #include "smc_core.h"
  27. #include "smc_ib.h"
  28. #include "smc_wr.h"
  29. #include "smc_llc.h"
  30. #include "smc_cdc.h"
  31. #include "smc_close.h"
  32. #include "smc_ism.h"
  33. #include "smc_netlink.h"
  34. #include "smc_stats.h"
  35. #include "smc_tracepoint.h"
  36. #define SMC_LGR_NUM_INCR 256
  37. #define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
  38. #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
  39. struct smc_lgr_list smc_lgr_list = { /* established link groups */
  40. .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
  41. .list = LIST_HEAD_INIT(smc_lgr_list.list),
  42. .num = 0,
  43. };
  44. static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
  45. static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
  46. static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
  47. struct smc_buf_desc *buf_desc);
  48. static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
  49. static void smc_link_down_work(struct work_struct *work);
  50. /* return head of link group list and its lock for a given link group */
  51. static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
  52. spinlock_t **lgr_lock)
  53. {
  54. if (lgr->is_smcd) {
  55. *lgr_lock = &lgr->smcd->lgr_lock;
  56. return &lgr->smcd->lgr_list;
  57. }
  58. *lgr_lock = &smc_lgr_list.lock;
  59. return &smc_lgr_list.list;
  60. }
  61. static void smc_ibdev_cnt_inc(struct smc_link *lnk)
  62. {
  63. atomic_inc(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
  64. }
  65. static void smc_ibdev_cnt_dec(struct smc_link *lnk)
  66. {
  67. atomic_dec(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
  68. }
  69. static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
  70. {
  71. /* client link group creation always follows the server link group
  72. * creation. For client use a somewhat higher removal delay time,
  73. * otherwise there is a risk of out-of-sync link groups.
  74. */
  75. if (!lgr->freeing) {
  76. mod_delayed_work(system_percpu_wq, &lgr->free_work,
  77. (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
  78. SMC_LGR_FREE_DELAY_CLNT :
  79. SMC_LGR_FREE_DELAY_SERV);
  80. }
  81. }
  82. /* Register connection's alert token in our lookup structure.
  83. * To use rbtrees we have to implement our own insert core.
  84. * Requires @conns_lock
  85. * @smc connection to register
  86. * Returns 0 on success, != otherwise.
  87. */
  88. static void smc_lgr_add_alert_token(struct smc_connection *conn)
  89. {
  90. struct rb_node **link, *parent = NULL;
  91. u32 token = conn->alert_token_local;
  92. link = &conn->lgr->conns_all.rb_node;
  93. while (*link) {
  94. struct smc_connection *cur = rb_entry(*link,
  95. struct smc_connection, alert_node);
  96. parent = *link;
  97. if (cur->alert_token_local > token)
  98. link = &parent->rb_left;
  99. else
  100. link = &parent->rb_right;
  101. }
  102. /* Put the new node there */
  103. rb_link_node(&conn->alert_node, parent, link);
  104. rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
  105. }
  106. /* assign an SMC-R link to the connection */
  107. static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
  108. {
  109. enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
  110. SMC_LNK_ACTIVE;
  111. int i, j;
  112. /* do link balancing */
  113. conn->lnk = NULL; /* reset conn->lnk first */
  114. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  115. struct smc_link *lnk = &conn->lgr->lnk[i];
  116. if (lnk->state != expected || lnk->link_is_asym)
  117. continue;
  118. if (conn->lgr->role == SMC_CLNT) {
  119. conn->lnk = lnk; /* temporary, SMC server assigns link*/
  120. break;
  121. }
  122. if (conn->lgr->conns_num % 2) {
  123. for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
  124. struct smc_link *lnk2;
  125. lnk2 = &conn->lgr->lnk[j];
  126. if (lnk2->state == expected &&
  127. !lnk2->link_is_asym) {
  128. conn->lnk = lnk2;
  129. break;
  130. }
  131. }
  132. }
  133. if (!conn->lnk)
  134. conn->lnk = lnk;
  135. break;
  136. }
  137. if (!conn->lnk)
  138. return SMC_CLC_DECL_NOACTLINK;
  139. atomic_inc(&conn->lnk->conn_cnt);
  140. return 0;
  141. }
  142. /* Register connection in link group by assigning an alert token
  143. * registered in a search tree.
  144. * Requires @conns_lock
  145. * Note that '0' is a reserved value and not assigned.
  146. */
  147. static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
  148. {
  149. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  150. static atomic_t nexttoken = ATOMIC_INIT(0);
  151. int rc;
  152. if (!conn->lgr->is_smcd) {
  153. rc = smcr_lgr_conn_assign_link(conn, first);
  154. if (rc) {
  155. conn->lgr = NULL;
  156. return rc;
  157. }
  158. }
  159. /* find a new alert_token_local value not yet used by some connection
  160. * in this link group
  161. */
  162. sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
  163. while (!conn->alert_token_local) {
  164. conn->alert_token_local = atomic_inc_return(&nexttoken);
  165. if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
  166. conn->alert_token_local = 0;
  167. }
  168. smc_lgr_add_alert_token(conn);
  169. conn->lgr->conns_num++;
  170. return 0;
  171. }
  172. /* Unregister connection and reset the alert token of the given connection<
  173. */
  174. static void __smc_lgr_unregister_conn(struct smc_connection *conn)
  175. {
  176. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  177. struct smc_link_group *lgr = conn->lgr;
  178. rb_erase(&conn->alert_node, &lgr->conns_all);
  179. if (conn->lnk)
  180. atomic_dec(&conn->lnk->conn_cnt);
  181. lgr->conns_num--;
  182. conn->alert_token_local = 0;
  183. sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
  184. }
  185. /* Unregister connection from lgr
  186. */
  187. static void smc_lgr_unregister_conn(struct smc_connection *conn)
  188. {
  189. struct smc_link_group *lgr = conn->lgr;
  190. if (!smc_conn_lgr_valid(conn))
  191. return;
  192. write_lock_bh(&lgr->conns_lock);
  193. if (conn->alert_token_local) {
  194. __smc_lgr_unregister_conn(conn);
  195. }
  196. write_unlock_bh(&lgr->conns_lock);
  197. }
  198. static void smc_lgr_buf_list_add(struct smc_link_group *lgr,
  199. bool is_rmb,
  200. struct list_head *buf_list,
  201. struct smc_buf_desc *buf_desc)
  202. {
  203. list_add(&buf_desc->list, buf_list);
  204. if (is_rmb) {
  205. lgr->alloc_rmbs += buf_desc->len;
  206. lgr->alloc_rmbs +=
  207. lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
  208. } else {
  209. lgr->alloc_sndbufs += buf_desc->len;
  210. }
  211. }
  212. static void smc_lgr_buf_list_del(struct smc_link_group *lgr,
  213. bool is_rmb,
  214. struct smc_buf_desc *buf_desc)
  215. {
  216. list_del(&buf_desc->list);
  217. if (is_rmb) {
  218. lgr->alloc_rmbs -= buf_desc->len;
  219. lgr->alloc_rmbs -=
  220. lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
  221. } else {
  222. lgr->alloc_sndbufs -= buf_desc->len;
  223. }
  224. }
  225. int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
  226. {
  227. struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
  228. char hostname[SMC_MAX_HOSTNAME_LEN + 1];
  229. char smc_seid[SMC_MAX_EID_LEN + 1];
  230. struct nlattr *attrs;
  231. u8 *seid = NULL;
  232. u8 *host = NULL;
  233. void *nlh;
  234. nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  235. &smc_gen_nl_family, NLM_F_MULTI,
  236. SMC_NETLINK_GET_SYS_INFO);
  237. if (!nlh)
  238. goto errmsg;
  239. if (cb_ctx->pos[0])
  240. goto errout;
  241. attrs = nla_nest_start(skb, SMC_GEN_SYS_INFO);
  242. if (!attrs)
  243. goto errout;
  244. if (nla_put_u8(skb, SMC_NLA_SYS_VER, SMC_V2))
  245. goto errattr;
  246. if (nla_put_u8(skb, SMC_NLA_SYS_REL, SMC_RELEASE))
  247. goto errattr;
  248. if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable()))
  249. goto errattr;
  250. if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true))
  251. goto errattr;
  252. smc_clc_get_hostname(&host);
  253. if (host) {
  254. memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN);
  255. hostname[SMC_MAX_HOSTNAME_LEN] = 0;
  256. if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname))
  257. goto errattr;
  258. }
  259. if (smc_ism_is_v2_capable()) {
  260. smc_ism_get_system_eid(&seid);
  261. memcpy(smc_seid, seid, SMC_MAX_EID_LEN);
  262. smc_seid[SMC_MAX_EID_LEN] = 0;
  263. if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid))
  264. goto errattr;
  265. }
  266. nla_nest_end(skb, attrs);
  267. genlmsg_end(skb, nlh);
  268. cb_ctx->pos[0] = 1;
  269. return skb->len;
  270. errattr:
  271. nla_nest_cancel(skb, attrs);
  272. errout:
  273. genlmsg_cancel(skb, nlh);
  274. errmsg:
  275. return skb->len;
  276. }
  277. /* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
  278. static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr,
  279. struct sk_buff *skb,
  280. struct netlink_callback *cb,
  281. struct nlattr *v2_attrs)
  282. {
  283. char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
  284. char smc_eid[SMC_MAX_EID_LEN + 1];
  285. if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
  286. goto errv2attr;
  287. if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
  288. goto errv2attr;
  289. if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
  290. goto errv2attr;
  291. memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
  292. smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
  293. if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
  294. goto errv2attr;
  295. memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
  296. smc_eid[SMC_MAX_EID_LEN] = 0;
  297. if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
  298. goto errv2attr;
  299. nla_nest_end(skb, v2_attrs);
  300. return 0;
  301. errv2attr:
  302. nla_nest_cancel(skb, v2_attrs);
  303. return -EMSGSIZE;
  304. }
  305. static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
  306. struct sk_buff *skb,
  307. struct netlink_callback *cb)
  308. {
  309. struct nlattr *v2_attrs;
  310. v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2);
  311. if (!v2_attrs)
  312. goto errattr;
  313. if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
  314. goto errv2attr;
  315. if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_CONNS, lgr->max_conns))
  316. goto errv2attr;
  317. if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_LINKS, lgr->max_links))
  318. goto errv2attr;
  319. nla_nest_end(skb, v2_attrs);
  320. return 0;
  321. errv2attr:
  322. nla_nest_cancel(skb, v2_attrs);
  323. errattr:
  324. return -EMSGSIZE;
  325. }
  326. static int smc_nl_fill_lgr(struct smc_link_group *lgr,
  327. struct sk_buff *skb,
  328. struct netlink_callback *cb)
  329. {
  330. char smc_target[SMC_MAX_PNETID_LEN + 1];
  331. struct nlattr *attrs, *v2_attrs;
  332. attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR);
  333. if (!attrs)
  334. goto errout;
  335. if (nla_put_u32(skb, SMC_NLA_LGR_R_ID, *((u32 *)&lgr->id)))
  336. goto errattr;
  337. if (nla_put_u32(skb, SMC_NLA_LGR_R_CONNS_NUM, lgr->conns_num))
  338. goto errattr;
  339. if (nla_put_u8(skb, SMC_NLA_LGR_R_ROLE, lgr->role))
  340. goto errattr;
  341. if (nla_put_u8(skb, SMC_NLA_LGR_R_TYPE, lgr->type))
  342. goto errattr;
  343. if (nla_put_u8(skb, SMC_NLA_LGR_R_BUF_TYPE, lgr->buf_type))
  344. goto errattr;
  345. if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id))
  346. goto errattr;
  347. if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE,
  348. lgr->net->net_cookie, SMC_NLA_LGR_R_PAD))
  349. goto errattr;
  350. memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN);
  351. smc_target[SMC_MAX_PNETID_LEN] = 0;
  352. if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
  353. goto errattr;
  354. if (nla_put_uint(skb, SMC_NLA_LGR_R_SNDBUF_ALLOC, lgr->alloc_sndbufs))
  355. goto errattr;
  356. if (nla_put_uint(skb, SMC_NLA_LGR_R_RMB_ALLOC, lgr->alloc_rmbs))
  357. goto errattr;
  358. if (lgr->smc_version > SMC_V1) {
  359. v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
  360. if (!v2_attrs)
  361. goto errattr;
  362. if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
  363. goto errattr;
  364. if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb))
  365. goto errattr;
  366. }
  367. nla_nest_end(skb, attrs);
  368. return 0;
  369. errattr:
  370. nla_nest_cancel(skb, attrs);
  371. errout:
  372. return -EMSGSIZE;
  373. }
  374. static int smc_nl_fill_lgr_link(struct smc_link_group *lgr,
  375. struct smc_link *link,
  376. struct sk_buff *skb,
  377. struct netlink_callback *cb)
  378. {
  379. char smc_ibname[IB_DEVICE_NAME_MAX];
  380. u8 smc_gid_target[41];
  381. struct nlattr *attrs;
  382. u32 link_uid = 0;
  383. void *nlh;
  384. nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  385. &smc_gen_nl_family, NLM_F_MULTI,
  386. SMC_NETLINK_GET_LINK_SMCR);
  387. if (!nlh)
  388. goto errmsg;
  389. attrs = nla_nest_start(skb, SMC_GEN_LINK_SMCR);
  390. if (!attrs)
  391. goto errout;
  392. if (nla_put_u8(skb, SMC_NLA_LINK_ID, link->link_id))
  393. goto errattr;
  394. if (nla_put_u32(skb, SMC_NLA_LINK_STATE, link->state))
  395. goto errattr;
  396. if (nla_put_u32(skb, SMC_NLA_LINK_CONN_CNT,
  397. atomic_read(&link->conn_cnt)))
  398. goto errattr;
  399. if (nla_put_u8(skb, SMC_NLA_LINK_IB_PORT, link->ibport))
  400. goto errattr;
  401. if (nla_put_u32(skb, SMC_NLA_LINK_NET_DEV, link->ndev_ifidx))
  402. goto errattr;
  403. snprintf(smc_ibname, sizeof(smc_ibname), "%s", link->ibname);
  404. if (nla_put_string(skb, SMC_NLA_LINK_IB_DEV, smc_ibname))
  405. goto errattr;
  406. memcpy(&link_uid, link->link_uid, sizeof(link_uid));
  407. if (nla_put_u32(skb, SMC_NLA_LINK_UID, link_uid))
  408. goto errattr;
  409. memcpy(&link_uid, link->peer_link_uid, sizeof(link_uid));
  410. if (nla_put_u32(skb, SMC_NLA_LINK_PEER_UID, link_uid))
  411. goto errattr;
  412. memset(smc_gid_target, 0, sizeof(smc_gid_target));
  413. smc_gid_be16_convert(smc_gid_target, link->gid);
  414. if (nla_put_string(skb, SMC_NLA_LINK_GID, smc_gid_target))
  415. goto errattr;
  416. memset(smc_gid_target, 0, sizeof(smc_gid_target));
  417. smc_gid_be16_convert(smc_gid_target, link->peer_gid);
  418. if (nla_put_string(skb, SMC_NLA_LINK_PEER_GID, smc_gid_target))
  419. goto errattr;
  420. nla_nest_end(skb, attrs);
  421. genlmsg_end(skb, nlh);
  422. return 0;
  423. errattr:
  424. nla_nest_cancel(skb, attrs);
  425. errout:
  426. genlmsg_cancel(skb, nlh);
  427. errmsg:
  428. return -EMSGSIZE;
  429. }
  430. static int smc_nl_handle_lgr(struct smc_link_group *lgr,
  431. struct sk_buff *skb,
  432. struct netlink_callback *cb,
  433. bool list_links)
  434. {
  435. void *nlh;
  436. int i;
  437. nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  438. &smc_gen_nl_family, NLM_F_MULTI,
  439. SMC_NETLINK_GET_LGR_SMCR);
  440. if (!nlh)
  441. goto errmsg;
  442. if (smc_nl_fill_lgr(lgr, skb, cb))
  443. goto errout;
  444. genlmsg_end(skb, nlh);
  445. if (!list_links)
  446. goto out;
  447. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  448. if (!smc_link_usable(&lgr->lnk[i]))
  449. continue;
  450. if (smc_nl_fill_lgr_link(lgr, &lgr->lnk[i], skb, cb))
  451. goto errout;
  452. }
  453. out:
  454. return 0;
  455. errout:
  456. genlmsg_cancel(skb, nlh);
  457. errmsg:
  458. return -EMSGSIZE;
  459. }
  460. static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr,
  461. struct sk_buff *skb,
  462. struct netlink_callback *cb,
  463. bool list_links)
  464. {
  465. struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
  466. struct smc_link_group *lgr;
  467. int snum = cb_ctx->pos[0];
  468. int num = 0;
  469. spin_lock_bh(&smc_lgr->lock);
  470. list_for_each_entry(lgr, &smc_lgr->list, list) {
  471. if (num < snum)
  472. goto next;
  473. if (smc_nl_handle_lgr(lgr, skb, cb, list_links))
  474. goto errout;
  475. next:
  476. num++;
  477. }
  478. errout:
  479. spin_unlock_bh(&smc_lgr->lock);
  480. cb_ctx->pos[0] = num;
  481. }
  482. static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
  483. struct sk_buff *skb,
  484. struct netlink_callback *cb)
  485. {
  486. char smc_pnet[SMC_MAX_PNETID_LEN + 1];
  487. struct smcd_dev *smcd = lgr->smcd;
  488. struct smcd_gid smcd_gid;
  489. struct nlattr *attrs;
  490. void *nlh;
  491. nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  492. &smc_gen_nl_family, NLM_F_MULTI,
  493. SMC_NETLINK_GET_LGR_SMCD);
  494. if (!nlh)
  495. goto errmsg;
  496. attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCD);
  497. if (!attrs)
  498. goto errout;
  499. if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id)))
  500. goto errattr;
  501. copy_to_smcdgid(&smcd_gid, &smcd->dibs->gid);
  502. if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID,
  503. smcd_gid.gid, SMC_NLA_LGR_D_PAD))
  504. goto errattr;
  505. if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_EXT_GID,
  506. smcd_gid.gid_ext, SMC_NLA_LGR_D_PAD))
  507. goto errattr;
  508. if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid.gid,
  509. SMC_NLA_LGR_D_PAD))
  510. goto errattr;
  511. if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_EXT_GID,
  512. lgr->peer_gid.gid_ext, SMC_NLA_LGR_D_PAD))
  513. goto errattr;
  514. if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id))
  515. goto errattr;
  516. if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num))
  517. goto errattr;
  518. if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
  519. goto errattr;
  520. if (nla_put_uint(skb, SMC_NLA_LGR_D_SNDBUF_ALLOC, lgr->alloc_sndbufs))
  521. goto errattr;
  522. if (nla_put_uint(skb, SMC_NLA_LGR_D_DMB_ALLOC, lgr->alloc_rmbs))
  523. goto errattr;
  524. memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN);
  525. smc_pnet[SMC_MAX_PNETID_LEN] = 0;
  526. if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
  527. goto errattr;
  528. if (lgr->smc_version > SMC_V1) {
  529. struct nlattr *v2_attrs;
  530. v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON);
  531. if (!v2_attrs)
  532. goto errattr;
  533. if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
  534. goto errattr;
  535. }
  536. nla_nest_end(skb, attrs);
  537. genlmsg_end(skb, nlh);
  538. return 0;
  539. errattr:
  540. nla_nest_cancel(skb, attrs);
  541. errout:
  542. genlmsg_cancel(skb, nlh);
  543. errmsg:
  544. return -EMSGSIZE;
  545. }
  546. static int smc_nl_handle_smcd_lgr(struct smcd_dev *dev,
  547. struct sk_buff *skb,
  548. struct netlink_callback *cb)
  549. {
  550. struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
  551. struct smc_link_group *lgr;
  552. int snum = cb_ctx->pos[1];
  553. int rc = 0, num = 0;
  554. spin_lock_bh(&dev->lgr_lock);
  555. list_for_each_entry(lgr, &dev->lgr_list, list) {
  556. if (!lgr->is_smcd)
  557. continue;
  558. if (num < snum)
  559. goto next;
  560. rc = smc_nl_fill_smcd_lgr(lgr, skb, cb);
  561. if (rc)
  562. goto errout;
  563. next:
  564. num++;
  565. }
  566. errout:
  567. spin_unlock_bh(&dev->lgr_lock);
  568. cb_ctx->pos[1] = num;
  569. return rc;
  570. }
  571. static int smc_nl_fill_smcd_dev(struct smcd_dev_list *dev_list,
  572. struct sk_buff *skb,
  573. struct netlink_callback *cb)
  574. {
  575. struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
  576. struct smcd_dev *smcd_dev;
  577. int snum = cb_ctx->pos[0];
  578. int rc = 0, num = 0;
  579. mutex_lock(&dev_list->mutex);
  580. list_for_each_entry(smcd_dev, &dev_list->list, list) {
  581. if (list_empty(&smcd_dev->lgr_list))
  582. continue;
  583. if (num < snum)
  584. goto next;
  585. rc = smc_nl_handle_smcd_lgr(smcd_dev, skb, cb);
  586. if (rc)
  587. goto errout;
  588. next:
  589. num++;
  590. }
  591. errout:
  592. mutex_unlock(&dev_list->mutex);
  593. cb_ctx->pos[0] = num;
  594. return rc;
  595. }
  596. int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
  597. {
  598. bool list_links = false;
  599. smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
  600. return skb->len;
  601. }
  602. int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb)
  603. {
  604. bool list_links = true;
  605. smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
  606. return skb->len;
  607. }
  608. int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
  609. {
  610. smc_nl_fill_smcd_dev(&smcd_dev_list, skb, cb);
  611. return skb->len;
  612. }
  613. void smc_lgr_cleanup_early(struct smc_link_group *lgr)
  614. {
  615. spinlock_t *lgr_lock;
  616. if (!lgr)
  617. return;
  618. smc_lgr_list_head(lgr, &lgr_lock);
  619. spin_lock_bh(lgr_lock);
  620. /* do not use this link group for new connections */
  621. if (!list_empty(&lgr->list))
  622. list_del_init(&lgr->list);
  623. spin_unlock_bh(lgr_lock);
  624. __smc_lgr_terminate(lgr, true);
  625. }
  626. static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
  627. {
  628. int i;
  629. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  630. struct smc_link *lnk = &lgr->lnk[i];
  631. if (smc_link_sendable(lnk))
  632. lnk->state = SMC_LNK_INACTIVE;
  633. }
  634. wake_up_all(&lgr->llc_msg_waiter);
  635. wake_up_all(&lgr->llc_flow_waiter);
  636. }
  637. static void smc_lgr_free(struct smc_link_group *lgr);
  638. static void smc_lgr_free_work(struct work_struct *work)
  639. {
  640. struct smc_link_group *lgr = container_of(to_delayed_work(work),
  641. struct smc_link_group,
  642. free_work);
  643. spinlock_t *lgr_lock;
  644. bool conns;
  645. smc_lgr_list_head(lgr, &lgr_lock);
  646. spin_lock_bh(lgr_lock);
  647. if (lgr->freeing) {
  648. spin_unlock_bh(lgr_lock);
  649. return;
  650. }
  651. read_lock_bh(&lgr->conns_lock);
  652. conns = RB_EMPTY_ROOT(&lgr->conns_all);
  653. read_unlock_bh(&lgr->conns_lock);
  654. if (!conns) { /* number of lgr connections is no longer zero */
  655. spin_unlock_bh(lgr_lock);
  656. return;
  657. }
  658. list_del_init(&lgr->list); /* remove from smc_lgr_list */
  659. lgr->freeing = 1; /* this instance does the freeing, no new schedule */
  660. spin_unlock_bh(lgr_lock);
  661. cancel_delayed_work(&lgr->free_work);
  662. if (!lgr->is_smcd && !lgr->terminating)
  663. smc_llc_send_link_delete_all(lgr, true,
  664. SMC_LLC_DEL_PROG_INIT_TERM);
  665. if (lgr->is_smcd && !lgr->terminating)
  666. smc_ism_signal_shutdown(lgr);
  667. if (!lgr->is_smcd)
  668. smcr_lgr_link_deactivate_all(lgr);
  669. smc_lgr_free(lgr);
  670. }
  671. static void smc_lgr_terminate_work(struct work_struct *work)
  672. {
  673. struct smc_link_group *lgr = container_of(work, struct smc_link_group,
  674. terminate_work);
  675. __smc_lgr_terminate(lgr, true);
  676. }
  677. /* return next unique link id for the lgr */
  678. static u8 smcr_next_link_id(struct smc_link_group *lgr)
  679. {
  680. u8 link_id;
  681. int i;
  682. while (1) {
  683. again:
  684. link_id = ++lgr->next_link_id;
  685. if (!link_id) /* skip zero as link_id */
  686. link_id = ++lgr->next_link_id;
  687. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  688. if (smc_link_usable(&lgr->lnk[i]) &&
  689. lgr->lnk[i].link_id == link_id)
  690. goto again;
  691. }
  692. break;
  693. }
  694. return link_id;
  695. }
  696. static void smcr_copy_dev_info_to_link(struct smc_link *link)
  697. {
  698. struct smc_ib_device *smcibdev = link->smcibdev;
  699. snprintf(link->ibname, sizeof(link->ibname), "%s",
  700. smcibdev->ibdev->name);
  701. link->ndev_ifidx = smcibdev->ndev_ifidx[link->ibport - 1];
  702. }
  703. int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
  704. u8 link_idx, struct smc_init_info *ini)
  705. {
  706. struct smc_ib_device *smcibdev;
  707. u8 rndvec[3];
  708. int rc;
  709. if (lgr->smc_version == SMC_V2) {
  710. lnk->smcibdev = ini->smcrv2.ib_dev_v2;
  711. lnk->ibport = ini->smcrv2.ib_port_v2;
  712. lnk->wr_rx_sge_cnt = lnk->smcibdev->ibdev->attrs.max_recv_sge < 2 ? 1 : 2;
  713. lnk->wr_rx_buflen = smc_link_shared_v2_rxbuf(lnk) ?
  714. SMC_WR_BUF_SIZE : SMC_WR_BUF_V2_SIZE;
  715. } else {
  716. lnk->smcibdev = ini->ib_dev;
  717. lnk->ibport = ini->ib_port;
  718. lnk->wr_rx_sge_cnt = 1;
  719. lnk->wr_rx_buflen = SMC_WR_BUF_SIZE;
  720. }
  721. get_device(&lnk->smcibdev->ibdev->dev);
  722. atomic_inc(&lnk->smcibdev->lnk_cnt);
  723. refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */
  724. lnk->clearing = 0;
  725. lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
  726. lnk->link_id = smcr_next_link_id(lgr);
  727. lnk->max_send_wr = lgr->max_send_wr;
  728. lnk->max_recv_wr = lgr->max_recv_wr;
  729. lnk->lgr = lgr;
  730. smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
  731. lnk->link_idx = link_idx;
  732. lnk->wr_rx_id_compl = 0;
  733. smc_ibdev_cnt_inc(lnk);
  734. smcr_copy_dev_info_to_link(lnk);
  735. atomic_set(&lnk->conn_cnt, 0);
  736. smc_llc_link_set_uid(lnk);
  737. INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
  738. if (!lnk->smcibdev->initialized) {
  739. rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev);
  740. if (rc)
  741. goto out;
  742. }
  743. get_random_bytes(rndvec, sizeof(rndvec));
  744. lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
  745. (rndvec[2] << 16);
  746. rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
  747. ini->vlan_id, lnk->gid, &lnk->sgid_index,
  748. lgr->smc_version == SMC_V2 ?
  749. &ini->smcrv2 : NULL);
  750. if (rc)
  751. goto out;
  752. rc = smc_llc_link_init(lnk);
  753. if (rc)
  754. goto out;
  755. rc = smc_ib_create_protection_domain(lnk);
  756. if (rc)
  757. goto clear_llc_lnk;
  758. do {
  759. rc = smc_ib_create_queue_pair(lnk);
  760. if (rc)
  761. goto dealloc_pd;
  762. rc = smc_wr_alloc_link_mem(lnk);
  763. if (!rc)
  764. break;
  765. else if (rc != -ENOMEM) /* give up */
  766. goto destroy_qp;
  767. /* retry with smaller ... */
  768. lnk->max_send_wr /= 2;
  769. lnk->max_recv_wr /= 2;
  770. /* ... unless droping below old SMC_WR_BUF_SIZE */
  771. if (lnk->max_send_wr < 16 || lnk->max_recv_wr < 48)
  772. goto destroy_qp;
  773. smc_ib_destroy_queue_pair(lnk);
  774. } while (1);
  775. rc = smc_wr_create_link(lnk);
  776. if (rc)
  777. goto free_link_mem;
  778. lnk->state = SMC_LNK_ACTIVATING;
  779. return 0;
  780. free_link_mem:
  781. smc_wr_free_link_mem(lnk);
  782. destroy_qp:
  783. smc_ib_destroy_queue_pair(lnk);
  784. dealloc_pd:
  785. smc_ib_dealloc_protection_domain(lnk);
  786. clear_llc_lnk:
  787. smc_llc_link_clear(lnk, false);
  788. out:
  789. smc_ibdev_cnt_dec(lnk);
  790. put_device(&lnk->smcibdev->ibdev->dev);
  791. smcibdev = lnk->smcibdev;
  792. memset(lnk, 0, sizeof(struct smc_link));
  793. lnk->state = SMC_LNK_UNUSED;
  794. if (!atomic_dec_return(&smcibdev->lnk_cnt))
  795. wake_up(&smcibdev->lnks_deleted);
  796. smc_lgr_put(lgr); /* lgr_hold above */
  797. return rc;
  798. }
  799. /* create a new SMC link group */
  800. static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
  801. {
  802. struct smc_link_group *lgr;
  803. struct list_head *lgr_list;
  804. struct smcd_dev *smcd;
  805. struct smc_link *lnk;
  806. spinlock_t *lgr_lock;
  807. u8 link_idx;
  808. int rc = 0;
  809. int i;
  810. if (ini->is_smcd && ini->vlan_id) {
  811. if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected],
  812. ini->vlan_id)) {
  813. rc = SMC_CLC_DECL_ISMVLANERR;
  814. goto out;
  815. }
  816. }
  817. lgr = kzalloc_obj(*lgr);
  818. if (!lgr) {
  819. rc = SMC_CLC_DECL_MEM;
  820. goto ism_put_vlan;
  821. }
  822. lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", WQ_PERCPU, 0,
  823. SMC_LGR_ID_SIZE, &lgr->id);
  824. if (!lgr->tx_wq) {
  825. rc = -ENOMEM;
  826. goto free_lgr;
  827. }
  828. lgr->is_smcd = ini->is_smcd;
  829. lgr->sync_err = 0;
  830. lgr->terminating = 0;
  831. lgr->freeing = 0;
  832. lgr->vlan_id = ini->vlan_id;
  833. refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
  834. init_rwsem(&lgr->sndbufs_lock);
  835. init_rwsem(&lgr->rmbs_lock);
  836. rwlock_init(&lgr->conns_lock);
  837. for (i = 0; i < SMC_RMBE_SIZES; i++) {
  838. INIT_LIST_HEAD(&lgr->sndbufs[i]);
  839. INIT_LIST_HEAD(&lgr->rmbs[i]);
  840. }
  841. lgr->next_link_id = 0;
  842. smc_lgr_list.num += SMC_LGR_NUM_INCR;
  843. memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
  844. INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
  845. INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
  846. lgr->conns_all = RB_ROOT;
  847. if (ini->is_smcd) {
  848. /* SMC-D specific settings */
  849. smcd = ini->ism_dev[ini->ism_selected];
  850. get_device(&smcd->dibs->dev);
  851. lgr->peer_gid.gid =
  852. ini->ism_peer_gid[ini->ism_selected].gid;
  853. lgr->peer_gid.gid_ext =
  854. ini->ism_peer_gid[ini->ism_selected].gid_ext;
  855. lgr->smcd = ini->ism_dev[ini->ism_selected];
  856. lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
  857. lgr_lock = &lgr->smcd->lgr_lock;
  858. lgr->smc_version = ini->smcd_version;
  859. lgr->peer_shutdown = 0;
  860. atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
  861. } else {
  862. /* SMC-R specific settings */
  863. struct smc_ib_device *ibdev;
  864. int ibport;
  865. lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
  866. lgr->smc_version = ini->smcr_version;
  867. memcpy(lgr->peer_systemid, ini->peer_systemid,
  868. SMC_SYSTEMID_LEN);
  869. if (lgr->smc_version == SMC_V2) {
  870. ibdev = ini->smcrv2.ib_dev_v2;
  871. ibport = ini->smcrv2.ib_port_v2;
  872. lgr->saddr = ini->smcrv2.saddr;
  873. lgr->uses_gateway = ini->smcrv2.uses_gateway;
  874. memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
  875. ETH_ALEN);
  876. lgr->max_conns = ini->max_conns;
  877. lgr->max_links = ini->max_links;
  878. } else {
  879. ibdev = ini->ib_dev;
  880. ibport = ini->ib_port;
  881. lgr->max_conns = SMC_CONN_PER_LGR_MAX;
  882. lgr->max_links = SMC_LINKS_ADD_LNK_MAX;
  883. }
  884. memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
  885. SMC_MAX_PNETID_LEN);
  886. rc = smc_wr_alloc_lgr_mem(lgr);
  887. if (rc)
  888. goto free_wq;
  889. smc_llc_lgr_init(lgr, smc);
  890. link_idx = SMC_SINGLE_LINK;
  891. lnk = &lgr->lnk[link_idx];
  892. rc = smcr_link_init(lgr, lnk, link_idx, ini);
  893. if (rc) {
  894. smc_wr_free_lgr_mem(lgr);
  895. goto free_wq;
  896. }
  897. lgr->net = smc_ib_net(lnk->smcibdev);
  898. lgr_list = &smc_lgr_list.list;
  899. lgr_lock = &smc_lgr_list.lock;
  900. lgr->buf_type = lgr->net->smc.sysctl_smcr_buf_type;
  901. atomic_inc(&lgr_cnt);
  902. }
  903. smc->conn.lgr = lgr;
  904. spin_lock_bh(lgr_lock);
  905. list_add_tail(&lgr->list, lgr_list);
  906. spin_unlock_bh(lgr_lock);
  907. return 0;
  908. free_wq:
  909. destroy_workqueue(lgr->tx_wq);
  910. free_lgr:
  911. kfree(lgr);
  912. ism_put_vlan:
  913. if (ini->is_smcd && ini->vlan_id)
  914. smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id);
  915. out:
  916. if (rc < 0) {
  917. if (rc == -ENOMEM)
  918. rc = SMC_CLC_DECL_MEM;
  919. else
  920. rc = SMC_CLC_DECL_INTERR;
  921. }
  922. return rc;
  923. }
  924. static int smc_write_space(struct smc_connection *conn)
  925. {
  926. int buffer_len = conn->peer_rmbe_size;
  927. union smc_host_cursor prod;
  928. union smc_host_cursor cons;
  929. int space;
  930. smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
  931. smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
  932. /* determine rx_buf space */
  933. space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
  934. return space;
  935. }
  936. static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
  937. struct smc_wr_buf *wr_buf)
  938. {
  939. struct smc_connection *conn = &smc->conn;
  940. union smc_host_cursor cons, fin;
  941. int rc = 0;
  942. int diff;
  943. smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
  944. smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
  945. /* set prod cursor to old state, enforce tx_rdma_writes() */
  946. smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
  947. smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
  948. if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
  949. /* cons cursor advanced more than fin, and prod was set
  950. * fin above, so now prod is smaller than cons. Fix that.
  951. */
  952. diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
  953. smc_curs_add(conn->sndbuf_desc->len,
  954. &conn->tx_curs_sent, diff);
  955. smc_curs_add(conn->sndbuf_desc->len,
  956. &conn->tx_curs_fin, diff);
  957. smp_mb__before_atomic();
  958. atomic_add(diff, &conn->sndbuf_space);
  959. smp_mb__after_atomic();
  960. smc_curs_add(conn->peer_rmbe_size,
  961. &conn->local_tx_ctrl.prod, diff);
  962. smc_curs_add(conn->peer_rmbe_size,
  963. &conn->local_tx_ctrl_fin, diff);
  964. }
  965. /* recalculate, value is used by tx_rdma_writes() */
  966. atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
  967. if (smc->sk.sk_state != SMC_INIT &&
  968. smc->sk.sk_state != SMC_CLOSED) {
  969. rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
  970. if (!rc) {
  971. queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0);
  972. smc->sk.sk_data_ready(&smc->sk);
  973. }
  974. } else {
  975. smc_wr_tx_put_slot(conn->lnk,
  976. (struct smc_wr_tx_pend_priv *)pend);
  977. }
  978. return rc;
  979. }
  980. void smc_switch_link_and_count(struct smc_connection *conn,
  981. struct smc_link *to_lnk)
  982. {
  983. atomic_dec(&conn->lnk->conn_cnt);
  984. /* link_hold in smc_conn_create() */
  985. smcr_link_put(conn->lnk);
  986. conn->lnk = to_lnk;
  987. atomic_inc(&conn->lnk->conn_cnt);
  988. /* link_put in smc_conn_free() */
  989. smcr_link_hold(conn->lnk);
  990. }
  991. struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
  992. struct smc_link *from_lnk, bool is_dev_err)
  993. {
  994. struct smc_link *to_lnk = NULL;
  995. struct smc_cdc_tx_pend *pend;
  996. struct smc_connection *conn;
  997. struct smc_wr_buf *wr_buf;
  998. struct smc_sock *smc;
  999. struct rb_node *node;
  1000. int i, rc = 0;
  1001. /* link is inactive, wake up tx waiters */
  1002. smc_wr_wakeup_tx_wait(from_lnk);
  1003. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  1004. if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx)
  1005. continue;
  1006. if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
  1007. from_lnk->ibport == lgr->lnk[i].ibport) {
  1008. continue;
  1009. }
  1010. to_lnk = &lgr->lnk[i];
  1011. break;
  1012. }
  1013. if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) {
  1014. smc_lgr_terminate_sched(lgr);
  1015. return NULL;
  1016. }
  1017. again:
  1018. read_lock_bh(&lgr->conns_lock);
  1019. for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
  1020. conn = rb_entry(node, struct smc_connection, alert_node);
  1021. if (conn->lnk != from_lnk)
  1022. continue;
  1023. smc = container_of(conn, struct smc_sock, conn);
  1024. /* conn->lnk not yet set in SMC_INIT state */
  1025. if (smc->sk.sk_state == SMC_INIT)
  1026. continue;
  1027. if (smc->sk.sk_state == SMC_CLOSED ||
  1028. smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
  1029. smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
  1030. smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
  1031. smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
  1032. smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
  1033. smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
  1034. smc->sk.sk_state == SMC_PEERABORTWAIT ||
  1035. smc->sk.sk_state == SMC_PROCESSABORT) {
  1036. spin_lock_bh(&conn->send_lock);
  1037. smc_switch_link_and_count(conn, to_lnk);
  1038. spin_unlock_bh(&conn->send_lock);
  1039. continue;
  1040. }
  1041. sock_hold(&smc->sk);
  1042. read_unlock_bh(&lgr->conns_lock);
  1043. /* pre-fetch buffer outside of send_lock, might sleep */
  1044. rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
  1045. if (rc)
  1046. goto err_out;
  1047. /* avoid race with smcr_tx_sndbuf_nonempty() */
  1048. spin_lock_bh(&conn->send_lock);
  1049. smc_switch_link_and_count(conn, to_lnk);
  1050. rc = smc_switch_cursor(smc, pend, wr_buf);
  1051. spin_unlock_bh(&conn->send_lock);
  1052. sock_put(&smc->sk);
  1053. if (rc)
  1054. goto err_out;
  1055. goto again;
  1056. }
  1057. read_unlock_bh(&lgr->conns_lock);
  1058. smc_wr_tx_link_put(to_lnk);
  1059. return to_lnk;
  1060. err_out:
  1061. smcr_link_down_cond_sched(to_lnk);
  1062. smc_wr_tx_link_put(to_lnk);
  1063. return NULL;
  1064. }
  1065. static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
  1066. struct smc_link_group *lgr)
  1067. {
  1068. struct rw_semaphore *lock; /* lock buffer list */
  1069. int rc;
  1070. if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) {
  1071. /* unregister rmb with peer */
  1072. rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
  1073. if (!rc) {
  1074. /* protect against smc_llc_cli_rkey_exchange() */
  1075. down_read(&lgr->llc_conf_mutex);
  1076. smc_llc_do_delete_rkey(lgr, buf_desc);
  1077. buf_desc->is_conf_rkey = false;
  1078. up_read(&lgr->llc_conf_mutex);
  1079. smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
  1080. }
  1081. }
  1082. if (buf_desc->is_reg_err) {
  1083. /* buf registration failed, reuse not possible */
  1084. lock = is_rmb ? &lgr->rmbs_lock :
  1085. &lgr->sndbufs_lock;
  1086. down_write(lock);
  1087. smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
  1088. up_write(lock);
  1089. smc_buf_free(lgr, is_rmb, buf_desc);
  1090. } else {
  1091. /* memzero_explicit provides potential memory barrier semantics */
  1092. memzero_explicit(buf_desc->cpu_addr, buf_desc->len);
  1093. WRITE_ONCE(buf_desc->used, 0);
  1094. }
  1095. }
  1096. static void smcd_buf_detach(struct smc_connection *conn)
  1097. {
  1098. struct smcd_dev *smcd = conn->lgr->smcd;
  1099. u64 peer_token = conn->peer_token;
  1100. if (!conn->sndbuf_desc)
  1101. return;
  1102. smc_ism_detach_dmb(smcd, peer_token);
  1103. kfree(conn->sndbuf_desc);
  1104. conn->sndbuf_desc = NULL;
  1105. }
  1106. static void smc_buf_unuse(struct smc_connection *conn,
  1107. struct smc_link_group *lgr)
  1108. {
  1109. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  1110. bool is_smcd = lgr->is_smcd;
  1111. int bufsize;
  1112. if (conn->sndbuf_desc) {
  1113. bufsize = conn->sndbuf_desc->len;
  1114. if (!is_smcd && conn->sndbuf_desc->is_vm) {
  1115. smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
  1116. } else {
  1117. memzero_explicit(conn->sndbuf_desc->cpu_addr, bufsize);
  1118. WRITE_ONCE(conn->sndbuf_desc->used, 0);
  1119. }
  1120. SMC_STAT_RMB_SIZE(smc, is_smcd, false, false, bufsize);
  1121. }
  1122. if (conn->rmb_desc) {
  1123. bufsize = conn->rmb_desc->len;
  1124. if (!is_smcd) {
  1125. smcr_buf_unuse(conn->rmb_desc, true, lgr);
  1126. } else {
  1127. bufsize += sizeof(struct smcd_cdc_msg);
  1128. memzero_explicit(conn->rmb_desc->cpu_addr, bufsize);
  1129. WRITE_ONCE(conn->rmb_desc->used, 0);
  1130. }
  1131. SMC_STAT_RMB_SIZE(smc, is_smcd, true, false, bufsize);
  1132. }
  1133. }
  1134. /* remove a finished connection from its link group */
  1135. void smc_conn_free(struct smc_connection *conn)
  1136. {
  1137. struct smc_link_group *lgr = conn->lgr;
  1138. if (!lgr || conn->freed)
  1139. /* Connection has never been registered in a
  1140. * link group, or has already been freed.
  1141. */
  1142. return;
  1143. conn->freed = 1;
  1144. if (!smc_conn_lgr_valid(conn))
  1145. /* Connection has already unregistered from
  1146. * link group.
  1147. */
  1148. goto lgr_put;
  1149. if (lgr->is_smcd) {
  1150. if (!list_empty(&lgr->list))
  1151. smc_ism_unset_conn(conn);
  1152. if (smc_ism_support_dmb_nocopy(lgr->smcd))
  1153. smcd_buf_detach(conn);
  1154. tasklet_kill(&conn->rx_tsklet);
  1155. } else {
  1156. smc_cdc_wait_pend_tx_wr(conn);
  1157. if (current_work() != &conn->abort_work)
  1158. cancel_work_sync(&conn->abort_work);
  1159. }
  1160. if (!list_empty(&lgr->list)) {
  1161. smc_buf_unuse(conn, lgr); /* allow buffer reuse */
  1162. smc_lgr_unregister_conn(conn);
  1163. }
  1164. if (!lgr->conns_num)
  1165. smc_lgr_schedule_free_work(lgr);
  1166. lgr_put:
  1167. if (!lgr->is_smcd)
  1168. smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */
  1169. smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */
  1170. }
  1171. /* unregister a link from a buf_desc */
  1172. static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
  1173. struct smc_link *lnk)
  1174. {
  1175. if (is_rmb || buf_desc->is_vm)
  1176. buf_desc->is_reg_mr[lnk->link_idx] = false;
  1177. if (!buf_desc->is_map_ib[lnk->link_idx])
  1178. return;
  1179. if ((is_rmb || buf_desc->is_vm) &&
  1180. buf_desc->mr[lnk->link_idx]) {
  1181. smc_ib_put_memory_region(buf_desc->mr[lnk->link_idx]);
  1182. buf_desc->mr[lnk->link_idx] = NULL;
  1183. }
  1184. if (is_rmb)
  1185. smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
  1186. else
  1187. smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
  1188. sg_free_table(&buf_desc->sgt[lnk->link_idx]);
  1189. buf_desc->is_map_ib[lnk->link_idx] = false;
  1190. }
  1191. /* unmap all buffers of lgr for a deleted link */
  1192. static void smcr_buf_unmap_lgr(struct smc_link *lnk)
  1193. {
  1194. struct smc_link_group *lgr = lnk->lgr;
  1195. struct smc_buf_desc *buf_desc, *bf;
  1196. int i;
  1197. for (i = 0; i < SMC_RMBE_SIZES; i++) {
  1198. down_write(&lgr->rmbs_lock);
  1199. list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
  1200. smcr_buf_unmap_link(buf_desc, true, lnk);
  1201. up_write(&lgr->rmbs_lock);
  1202. down_write(&lgr->sndbufs_lock);
  1203. list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
  1204. list)
  1205. smcr_buf_unmap_link(buf_desc, false, lnk);
  1206. up_write(&lgr->sndbufs_lock);
  1207. }
  1208. }
  1209. static void smcr_rtoken_clear_link(struct smc_link *lnk)
  1210. {
  1211. struct smc_link_group *lgr = lnk->lgr;
  1212. int i;
  1213. for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
  1214. lgr->rtokens[i][lnk->link_idx].rkey = 0;
  1215. lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
  1216. }
  1217. }
  1218. static void __smcr_link_clear(struct smc_link *lnk)
  1219. {
  1220. struct smc_link_group *lgr = lnk->lgr;
  1221. struct smc_ib_device *smcibdev;
  1222. smc_wr_free_link_mem(lnk);
  1223. smc_ibdev_cnt_dec(lnk);
  1224. put_device(&lnk->smcibdev->ibdev->dev);
  1225. smcibdev = lnk->smcibdev;
  1226. memset(lnk, 0, sizeof(struct smc_link));
  1227. lnk->state = SMC_LNK_UNUSED;
  1228. if (!atomic_dec_return(&smcibdev->lnk_cnt))
  1229. wake_up(&smcibdev->lnks_deleted);
  1230. smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */
  1231. }
  1232. /* must be called under lgr->llc_conf_mutex lock */
  1233. void smcr_link_clear(struct smc_link *lnk, bool log)
  1234. {
  1235. if (!lnk->lgr || lnk->clearing ||
  1236. lnk->state == SMC_LNK_UNUSED)
  1237. return;
  1238. lnk->clearing = 1;
  1239. lnk->peer_qpn = 0;
  1240. smc_llc_link_clear(lnk, log);
  1241. smcr_buf_unmap_lgr(lnk);
  1242. smcr_rtoken_clear_link(lnk);
  1243. smc_ib_modify_qp_error(lnk);
  1244. smc_wr_free_link(lnk);
  1245. smc_ib_destroy_queue_pair(lnk);
  1246. smc_ib_dealloc_protection_domain(lnk);
  1247. smcr_link_put(lnk); /* theoretically last link_put */
  1248. }
  1249. void smcr_link_hold(struct smc_link *lnk)
  1250. {
  1251. refcount_inc(&lnk->refcnt);
  1252. }
  1253. void smcr_link_put(struct smc_link *lnk)
  1254. {
  1255. if (refcount_dec_and_test(&lnk->refcnt))
  1256. __smcr_link_clear(lnk);
  1257. }
  1258. static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
  1259. struct smc_buf_desc *buf_desc)
  1260. {
  1261. int i;
  1262. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
  1263. smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
  1264. if (!buf_desc->is_vm && buf_desc->pages)
  1265. __free_pages(buf_desc->pages, buf_desc->order);
  1266. else if (buf_desc->is_vm && buf_desc->cpu_addr)
  1267. vfree(buf_desc->cpu_addr);
  1268. kfree(buf_desc);
  1269. }
  1270. static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
  1271. struct smc_buf_desc *buf_desc)
  1272. {
  1273. if (is_dmb) {
  1274. /* restore original buf len */
  1275. buf_desc->len += sizeof(struct smcd_cdc_msg);
  1276. smc_ism_unregister_dmb(lgr->smcd, buf_desc);
  1277. } else {
  1278. kfree(buf_desc->cpu_addr);
  1279. }
  1280. kfree(buf_desc);
  1281. }
  1282. static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
  1283. struct smc_buf_desc *buf_desc)
  1284. {
  1285. if (lgr->is_smcd)
  1286. smcd_buf_free(lgr, is_rmb, buf_desc);
  1287. else
  1288. smcr_buf_free(lgr, is_rmb, buf_desc);
  1289. }
  1290. static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
  1291. {
  1292. struct smc_buf_desc *buf_desc, *bf_desc;
  1293. struct list_head *buf_list;
  1294. int i;
  1295. for (i = 0; i < SMC_RMBE_SIZES; i++) {
  1296. if (is_rmb)
  1297. buf_list = &lgr->rmbs[i];
  1298. else
  1299. buf_list = &lgr->sndbufs[i];
  1300. list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
  1301. list) {
  1302. smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
  1303. smc_buf_free(lgr, is_rmb, buf_desc);
  1304. }
  1305. }
  1306. }
  1307. static void smc_lgr_free_bufs(struct smc_link_group *lgr)
  1308. {
  1309. /* free send buffers */
  1310. __smc_lgr_free_bufs(lgr, false);
  1311. /* free rmbs */
  1312. __smc_lgr_free_bufs(lgr, true);
  1313. }
  1314. /* won't be freed until no one accesses to lgr anymore */
  1315. static void __smc_lgr_free(struct smc_link_group *lgr)
  1316. {
  1317. smc_lgr_free_bufs(lgr);
  1318. if (lgr->is_smcd) {
  1319. if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
  1320. wake_up(&lgr->smcd->lgrs_deleted);
  1321. } else {
  1322. smc_wr_free_lgr_mem(lgr);
  1323. if (!atomic_dec_return(&lgr_cnt))
  1324. wake_up(&lgrs_deleted);
  1325. }
  1326. kfree(lgr);
  1327. }
  1328. /* remove a link group */
  1329. static void smc_lgr_free(struct smc_link_group *lgr)
  1330. {
  1331. int i;
  1332. if (!lgr->is_smcd) {
  1333. down_write(&lgr->llc_conf_mutex);
  1334. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  1335. if (lgr->lnk[i].state != SMC_LNK_UNUSED)
  1336. smcr_link_clear(&lgr->lnk[i], false);
  1337. }
  1338. up_write(&lgr->llc_conf_mutex);
  1339. smc_llc_lgr_clear(lgr);
  1340. }
  1341. destroy_workqueue(lgr->tx_wq);
  1342. if (lgr->is_smcd) {
  1343. smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
  1344. put_device(&lgr->smcd->dibs->dev);
  1345. }
  1346. smc_lgr_put(lgr); /* theoretically last lgr_put */
  1347. }
  1348. void smc_lgr_hold(struct smc_link_group *lgr)
  1349. {
  1350. refcount_inc(&lgr->refcnt);
  1351. }
  1352. void smc_lgr_put(struct smc_link_group *lgr)
  1353. {
  1354. if (refcount_dec_and_test(&lgr->refcnt))
  1355. __smc_lgr_free(lgr);
  1356. }
  1357. static void smc_sk_wake_ups(struct smc_sock *smc)
  1358. {
  1359. smc->sk.sk_write_space(&smc->sk);
  1360. smc->sk.sk_data_ready(&smc->sk);
  1361. smc->sk.sk_state_change(&smc->sk);
  1362. }
  1363. /* kill a connection */
  1364. static void smc_conn_kill(struct smc_connection *conn, bool soft)
  1365. {
  1366. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  1367. if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
  1368. conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
  1369. else
  1370. smc_close_abort(conn);
  1371. conn->killed = 1;
  1372. smc->sk.sk_err = ECONNABORTED;
  1373. smc_sk_wake_ups(smc);
  1374. if (conn->lgr->is_smcd) {
  1375. smc_ism_unset_conn(conn);
  1376. if (smc_ism_support_dmb_nocopy(conn->lgr->smcd))
  1377. smcd_buf_detach(conn);
  1378. if (soft)
  1379. tasklet_kill(&conn->rx_tsklet);
  1380. else
  1381. tasklet_unlock_wait(&conn->rx_tsklet);
  1382. } else {
  1383. smc_cdc_wait_pend_tx_wr(conn);
  1384. }
  1385. smc_lgr_unregister_conn(conn);
  1386. smc_close_active_abort(smc);
  1387. }
  1388. static void smc_lgr_cleanup(struct smc_link_group *lgr)
  1389. {
  1390. if (lgr->is_smcd) {
  1391. smc_ism_signal_shutdown(lgr);
  1392. } else {
  1393. u32 rsn = lgr->llc_termination_rsn;
  1394. if (!rsn)
  1395. rsn = SMC_LLC_DEL_PROG_INIT_TERM;
  1396. smc_llc_send_link_delete_all(lgr, false, rsn);
  1397. smcr_lgr_link_deactivate_all(lgr);
  1398. }
  1399. }
  1400. /* terminate link group
  1401. * @soft: true if link group shutdown can take its time
  1402. * false if immediate link group shutdown is required
  1403. */
  1404. static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
  1405. {
  1406. struct smc_connection *conn;
  1407. struct smc_sock *smc;
  1408. struct rb_node *node;
  1409. if (lgr->terminating)
  1410. return; /* lgr already terminating */
  1411. /* cancel free_work sync, will terminate when lgr->freeing is set */
  1412. cancel_delayed_work(&lgr->free_work);
  1413. lgr->terminating = 1;
  1414. /* kill remaining link group connections */
  1415. read_lock_bh(&lgr->conns_lock);
  1416. node = rb_first(&lgr->conns_all);
  1417. while (node) {
  1418. read_unlock_bh(&lgr->conns_lock);
  1419. conn = rb_entry(node, struct smc_connection, alert_node);
  1420. smc = container_of(conn, struct smc_sock, conn);
  1421. sock_hold(&smc->sk); /* sock_put below */
  1422. lock_sock(&smc->sk);
  1423. smc_conn_kill(conn, soft);
  1424. release_sock(&smc->sk);
  1425. sock_put(&smc->sk); /* sock_hold above */
  1426. read_lock_bh(&lgr->conns_lock);
  1427. node = rb_first(&lgr->conns_all);
  1428. }
  1429. read_unlock_bh(&lgr->conns_lock);
  1430. smc_lgr_cleanup(lgr);
  1431. smc_lgr_free(lgr);
  1432. }
  1433. /* unlink link group and schedule termination */
  1434. void smc_lgr_terminate_sched(struct smc_link_group *lgr)
  1435. {
  1436. spinlock_t *lgr_lock;
  1437. smc_lgr_list_head(lgr, &lgr_lock);
  1438. spin_lock_bh(lgr_lock);
  1439. if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
  1440. spin_unlock_bh(lgr_lock);
  1441. return; /* lgr already terminating */
  1442. }
  1443. list_del_init(&lgr->list);
  1444. lgr->freeing = 1;
  1445. spin_unlock_bh(lgr_lock);
  1446. schedule_work(&lgr->terminate_work);
  1447. }
  1448. /* Called when peer lgr shutdown (regularly or abnormally) is received */
  1449. void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
  1450. unsigned short vlan)
  1451. {
  1452. struct smc_link_group *lgr, *l;
  1453. LIST_HEAD(lgr_free_list);
  1454. /* run common cleanup function and build free list */
  1455. spin_lock_bh(&dev->lgr_lock);
  1456. list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
  1457. if ((!peer_gid->gid ||
  1458. (lgr->peer_gid.gid == peer_gid->gid &&
  1459. !smc_ism_is_emulated(dev) ? 1 :
  1460. lgr->peer_gid.gid_ext == peer_gid->gid_ext)) &&
  1461. (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
  1462. if (peer_gid->gid) /* peer triggered termination */
  1463. lgr->peer_shutdown = 1;
  1464. list_move(&lgr->list, &lgr_free_list);
  1465. lgr->freeing = 1;
  1466. }
  1467. }
  1468. spin_unlock_bh(&dev->lgr_lock);
  1469. /* cancel the regular free workers and actually free lgrs */
  1470. list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
  1471. list_del_init(&lgr->list);
  1472. schedule_work(&lgr->terminate_work);
  1473. }
  1474. }
  1475. /* Called when an SMCD device is removed or the smc module is unloaded */
  1476. void smc_smcd_terminate_all(struct smcd_dev *smcd)
  1477. {
  1478. struct smc_link_group *lgr, *lg;
  1479. LIST_HEAD(lgr_free_list);
  1480. spin_lock_bh(&smcd->lgr_lock);
  1481. list_splice_init(&smcd->lgr_list, &lgr_free_list);
  1482. list_for_each_entry(lgr, &lgr_free_list, list)
  1483. lgr->freeing = 1;
  1484. spin_unlock_bh(&smcd->lgr_lock);
  1485. list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
  1486. list_del_init(&lgr->list);
  1487. __smc_lgr_terminate(lgr, false);
  1488. }
  1489. if (atomic_read(&smcd->lgr_cnt))
  1490. wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
  1491. }
  1492. /* Called when an SMCR device is removed or the smc module is unloaded.
  1493. * If smcibdev is given, all SMCR link groups using this device are terminated.
  1494. * If smcibdev is NULL, all SMCR link groups are terminated.
  1495. */
  1496. void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
  1497. {
  1498. struct smc_link_group *lgr, *lg;
  1499. LIST_HEAD(lgr_free_list);
  1500. int i;
  1501. spin_lock_bh(&smc_lgr_list.lock);
  1502. if (!smcibdev) {
  1503. list_splice_init(&smc_lgr_list.list, &lgr_free_list);
  1504. list_for_each_entry(lgr, &lgr_free_list, list)
  1505. lgr->freeing = 1;
  1506. } else {
  1507. list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
  1508. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  1509. if (lgr->lnk[i].smcibdev == smcibdev)
  1510. smcr_link_down_cond_sched(&lgr->lnk[i]);
  1511. }
  1512. }
  1513. }
  1514. spin_unlock_bh(&smc_lgr_list.lock);
  1515. list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
  1516. list_del_init(&lgr->list);
  1517. smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
  1518. __smc_lgr_terminate(lgr, false);
  1519. }
  1520. if (smcibdev) {
  1521. if (atomic_read(&smcibdev->lnk_cnt))
  1522. wait_event(smcibdev->lnks_deleted,
  1523. !atomic_read(&smcibdev->lnk_cnt));
  1524. } else {
  1525. if (atomic_read(&lgr_cnt))
  1526. wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
  1527. }
  1528. }
  1529. /* set new lgr type and clear all asymmetric link tagging */
  1530. void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
  1531. {
  1532. char *lgr_type = "";
  1533. int i;
  1534. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
  1535. if (smc_link_usable(&lgr->lnk[i]))
  1536. lgr->lnk[i].link_is_asym = false;
  1537. if (lgr->type == new_type)
  1538. return;
  1539. lgr->type = new_type;
  1540. switch (lgr->type) {
  1541. case SMC_LGR_NONE:
  1542. lgr_type = "NONE";
  1543. break;
  1544. case SMC_LGR_SINGLE:
  1545. lgr_type = "SINGLE";
  1546. break;
  1547. case SMC_LGR_SYMMETRIC:
  1548. lgr_type = "SYMMETRIC";
  1549. break;
  1550. case SMC_LGR_ASYMMETRIC_PEER:
  1551. lgr_type = "ASYMMETRIC_PEER";
  1552. break;
  1553. case SMC_LGR_ASYMMETRIC_LOCAL:
  1554. lgr_type = "ASYMMETRIC_LOCAL";
  1555. break;
  1556. }
  1557. pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: "
  1558. "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
  1559. lgr->net->net_cookie, lgr_type, lgr->pnet_id);
  1560. }
  1561. /* set new lgr type and tag a link as asymmetric */
  1562. void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
  1563. enum smc_lgr_type new_type, int asym_lnk_idx)
  1564. {
  1565. smcr_lgr_set_type(lgr, new_type);
  1566. lgr->lnk[asym_lnk_idx].link_is_asym = true;
  1567. }
  1568. /* abort connection, abort_work scheduled from tasklet context */
  1569. static void smc_conn_abort_work(struct work_struct *work)
  1570. {
  1571. struct smc_connection *conn = container_of(work,
  1572. struct smc_connection,
  1573. abort_work);
  1574. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  1575. lock_sock(&smc->sk);
  1576. smc_conn_kill(conn, true);
  1577. release_sock(&smc->sk);
  1578. sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
  1579. }
  1580. void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
  1581. {
  1582. struct smc_link_group *lgr, *n;
  1583. spin_lock_bh(&smc_lgr_list.lock);
  1584. list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
  1585. struct smc_link *link;
  1586. if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
  1587. SMC_MAX_PNETID_LEN) ||
  1588. lgr->type == SMC_LGR_SYMMETRIC ||
  1589. lgr->type == SMC_LGR_ASYMMETRIC_PEER ||
  1590. !rdma_dev_access_netns(smcibdev->ibdev, lgr->net))
  1591. continue;
  1592. if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1)
  1593. continue;
  1594. /* trigger local add link processing */
  1595. link = smc_llc_usable_link(lgr);
  1596. if (link)
  1597. smc_llc_add_link_local(link);
  1598. }
  1599. spin_unlock_bh(&smc_lgr_list.lock);
  1600. }
  1601. /* link is down - switch connections to alternate link,
  1602. * must be called under lgr->llc_conf_mutex lock
  1603. */
  1604. static void smcr_link_down(struct smc_link *lnk)
  1605. {
  1606. struct smc_link_group *lgr = lnk->lgr;
  1607. struct smc_link *to_lnk;
  1608. int del_link_id;
  1609. if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
  1610. return;
  1611. to_lnk = smc_switch_conns(lgr, lnk, true);
  1612. if (!to_lnk) { /* no backup link available */
  1613. smcr_link_clear(lnk, true);
  1614. return;
  1615. }
  1616. smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
  1617. del_link_id = lnk->link_id;
  1618. if (lgr->role == SMC_SERV) {
  1619. /* trigger local delete link processing */
  1620. smc_llc_srv_delete_link_local(to_lnk, del_link_id);
  1621. } else {
  1622. if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
  1623. /* another llc task is ongoing */
  1624. up_write(&lgr->llc_conf_mutex);
  1625. wait_event_timeout(lgr->llc_flow_waiter,
  1626. (list_empty(&lgr->list) ||
  1627. lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
  1628. SMC_LLC_WAIT_TIME);
  1629. down_write(&lgr->llc_conf_mutex);
  1630. }
  1631. if (!list_empty(&lgr->list)) {
  1632. smc_llc_send_delete_link(to_lnk, del_link_id,
  1633. SMC_LLC_REQ, true,
  1634. SMC_LLC_DEL_LOST_PATH);
  1635. smcr_link_clear(lnk, true);
  1636. }
  1637. wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */
  1638. }
  1639. }
  1640. /* must be called under lgr->llc_conf_mutex lock */
  1641. void smcr_link_down_cond(struct smc_link *lnk)
  1642. {
  1643. if (smc_link_downing(&lnk->state)) {
  1644. trace_smcr_link_down(lnk, __builtin_return_address(0));
  1645. smcr_link_down(lnk);
  1646. }
  1647. }
  1648. /* will get the lgr->llc_conf_mutex lock */
  1649. void smcr_link_down_cond_sched(struct smc_link *lnk)
  1650. {
  1651. if (smc_link_downing(&lnk->state)) {
  1652. trace_smcr_link_down(lnk, __builtin_return_address(0));
  1653. smcr_link_hold(lnk); /* smcr_link_put in link_down_wrk */
  1654. if (!schedule_work(&lnk->link_down_wrk))
  1655. smcr_link_put(lnk);
  1656. }
  1657. }
  1658. void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
  1659. {
  1660. struct smc_link_group *lgr, *n;
  1661. int i;
  1662. list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
  1663. if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
  1664. SMC_MAX_PNETID_LEN))
  1665. continue; /* lgr is not affected */
  1666. if (list_empty(&lgr->list))
  1667. continue;
  1668. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  1669. struct smc_link *lnk = &lgr->lnk[i];
  1670. if (smc_link_usable(lnk) &&
  1671. lnk->smcibdev == smcibdev && lnk->ibport == ibport)
  1672. smcr_link_down_cond_sched(lnk);
  1673. }
  1674. }
  1675. }
  1676. static void smc_link_down_work(struct work_struct *work)
  1677. {
  1678. struct smc_link *link = container_of(work, struct smc_link,
  1679. link_down_wrk);
  1680. struct smc_link_group *lgr = link->lgr;
  1681. if (list_empty(&lgr->list))
  1682. goto out;
  1683. wake_up_all(&lgr->llc_msg_waiter);
  1684. down_write(&lgr->llc_conf_mutex);
  1685. smcr_link_down(link);
  1686. up_write(&lgr->llc_conf_mutex);
  1687. out:
  1688. smcr_link_put(link); /* smcr_link_hold by schedulers of link_down_work */
  1689. }
  1690. static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
  1691. struct netdev_nested_priv *priv)
  1692. {
  1693. unsigned short *vlan_id = (unsigned short *)priv->data;
  1694. if (is_vlan_dev(lower_dev)) {
  1695. *vlan_id = vlan_dev_vlan_id(lower_dev);
  1696. return 1;
  1697. }
  1698. return 0;
  1699. }
  1700. /* Determine vlan of internal TCP socket. */
  1701. int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
  1702. {
  1703. struct netdev_nested_priv priv;
  1704. struct net_device *ndev;
  1705. struct dst_entry *dst;
  1706. int rc = 0;
  1707. ini->vlan_id = 0;
  1708. rcu_read_lock();
  1709. dst = __sk_dst_get(clcsock->sk);
  1710. ndev = dst ? dst_dev_rcu(dst) : NULL;
  1711. if (!ndev) {
  1712. rc = -ENODEV;
  1713. goto out;
  1714. }
  1715. if (is_vlan_dev(ndev)) {
  1716. ini->vlan_id = vlan_dev_vlan_id(ndev);
  1717. goto out;
  1718. }
  1719. priv.data = (void *)&ini->vlan_id;
  1720. netdev_walk_all_lower_dev_rcu(ndev, smc_vlan_by_tcpsk_walk, &priv);
  1721. out:
  1722. rcu_read_unlock();
  1723. return rc;
  1724. }
  1725. static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
  1726. u8 peer_systemid[],
  1727. u8 peer_gid[],
  1728. u8 peer_mac_v1[],
  1729. enum smc_lgr_role role, u32 clcqpn,
  1730. struct net *net)
  1731. {
  1732. struct smc_link *lnk;
  1733. int i;
  1734. if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
  1735. lgr->role != role)
  1736. return false;
  1737. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  1738. lnk = &lgr->lnk[i];
  1739. if (!smc_link_active(lnk))
  1740. continue;
  1741. /* use verbs API to check netns, instead of lgr->net */
  1742. if (!rdma_dev_access_netns(lnk->smcibdev->ibdev, net))
  1743. return false;
  1744. if ((lgr->role == SMC_SERV || lnk->peer_qpn == clcqpn) &&
  1745. !memcmp(lnk->peer_gid, peer_gid, SMC_GID_SIZE) &&
  1746. (smcr_version == SMC_V2 ||
  1747. !memcmp(lnk->peer_mac, peer_mac_v1, ETH_ALEN)))
  1748. return true;
  1749. }
  1750. return false;
  1751. }
  1752. static bool smcd_lgr_match(struct smc_link_group *lgr,
  1753. struct smcd_dev *smcismdev,
  1754. struct smcd_gid *peer_gid)
  1755. {
  1756. if (lgr->peer_gid.gid != peer_gid->gid ||
  1757. lgr->smcd != smcismdev)
  1758. return false;
  1759. if (smc_ism_is_emulated(smcismdev) &&
  1760. lgr->peer_gid.gid_ext != peer_gid->gid_ext)
  1761. return false;
  1762. return true;
  1763. }
  1764. /* create a new SMC connection (and a new link group if necessary) */
  1765. int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
  1766. {
  1767. struct smc_connection *conn = &smc->conn;
  1768. struct net *net = sock_net(&smc->sk);
  1769. struct list_head *lgr_list;
  1770. struct smc_link_group *lgr;
  1771. enum smc_lgr_role role;
  1772. spinlock_t *lgr_lock;
  1773. int rc = 0;
  1774. lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list :
  1775. &smc_lgr_list.list;
  1776. lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock :
  1777. &smc_lgr_list.lock;
  1778. ini->first_contact_local = 1;
  1779. role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
  1780. if (role == SMC_CLNT && ini->first_contact_peer)
  1781. /* create new link group as well */
  1782. goto create;
  1783. /* determine if an existing link group can be reused */
  1784. spin_lock_bh(lgr_lock);
  1785. list_for_each_entry(lgr, lgr_list, list) {
  1786. write_lock_bh(&lgr->conns_lock);
  1787. if ((ini->is_smcd ?
  1788. smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
  1789. &ini->ism_peer_gid[ini->ism_selected]) :
  1790. smcr_lgr_match(lgr, ini->smcr_version,
  1791. ini->peer_systemid,
  1792. ini->peer_gid, ini->peer_mac, role,
  1793. ini->ib_clcqpn, net)) &&
  1794. !lgr->sync_err &&
  1795. (ini->smcd_version == SMC_V2 ||
  1796. lgr->vlan_id == ini->vlan_id) &&
  1797. (role == SMC_CLNT || ini->is_smcd ||
  1798. (lgr->conns_num < lgr->max_conns &&
  1799. !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
  1800. /* link group found */
  1801. ini->first_contact_local = 0;
  1802. conn->lgr = lgr;
  1803. rc = smc_lgr_register_conn(conn, false);
  1804. write_unlock_bh(&lgr->conns_lock);
  1805. if (!rc && delayed_work_pending(&lgr->free_work))
  1806. cancel_delayed_work(&lgr->free_work);
  1807. break;
  1808. }
  1809. write_unlock_bh(&lgr->conns_lock);
  1810. }
  1811. spin_unlock_bh(lgr_lock);
  1812. if (rc)
  1813. return rc;
  1814. if (role == SMC_CLNT && !ini->first_contact_peer &&
  1815. ini->first_contact_local) {
  1816. /* Server reuses a link group, but Client wants to start
  1817. * a new one
  1818. * send out_of_sync decline, reason synchr. error
  1819. */
  1820. return SMC_CLC_DECL_SYNCERR;
  1821. }
  1822. create:
  1823. if (ini->first_contact_local) {
  1824. rc = smc_lgr_create(smc, ini);
  1825. if (rc)
  1826. goto out;
  1827. lgr = conn->lgr;
  1828. write_lock_bh(&lgr->conns_lock);
  1829. rc = smc_lgr_register_conn(conn, true);
  1830. write_unlock_bh(&lgr->conns_lock);
  1831. if (rc) {
  1832. smc_lgr_cleanup_early(lgr);
  1833. goto out;
  1834. }
  1835. }
  1836. smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
  1837. if (!conn->lgr->is_smcd)
  1838. smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */
  1839. conn->freed = 0;
  1840. conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
  1841. conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
  1842. conn->urg_state = SMC_URG_READ;
  1843. init_waitqueue_head(&conn->cdc_pend_tx_wq);
  1844. INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
  1845. if (ini->is_smcd) {
  1846. conn->rx_off = sizeof(struct smcd_cdc_msg);
  1847. smcd_cdc_rx_init(conn); /* init tasklet for this conn */
  1848. } else {
  1849. conn->rx_off = 0;
  1850. }
  1851. #ifndef KERNEL_HAS_ATOMIC64
  1852. spin_lock_init(&conn->acurs_lock);
  1853. #endif
  1854. out:
  1855. return rc;
  1856. }
  1857. #define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
  1858. #define SMCR_RMBE_SIZES 15 /* 0 -> 16KB, 1 -> 32KB, .. 15 -> 512MB */
  1859. /* convert the RMB size into the compressed notation (minimum 16K, see
  1860. * SMCD/R_DMBE_SIZES.
  1861. * In contrast to plain ilog2, this rounds towards the next power of 2,
  1862. * so the socket application gets at least its desired sndbuf / rcvbuf size.
  1863. */
  1864. static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
  1865. {
  1866. u8 compressed;
  1867. if (size <= SMC_BUF_MIN_SIZE)
  1868. return 0;
  1869. size = (size - 1) >> 14; /* convert to 16K multiple */
  1870. compressed = min_t(u8, ilog2(size) + 1,
  1871. is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
  1872. #ifdef CONFIG_ARCH_NO_SG_CHAIN
  1873. if (!is_smcd && is_rmb)
  1874. /* RMBs are backed by & limited to max size of scatterlists */
  1875. compressed = min_t(u8, compressed, ilog2((SG_MAX_SINGLE_ALLOC * PAGE_SIZE) >> 14));
  1876. #endif
  1877. return compressed;
  1878. }
  1879. /* convert the RMB size from compressed notation into integer */
  1880. int smc_uncompress_bufsize(u8 compressed)
  1881. {
  1882. u32 size;
  1883. size = 0x00000001 << (((int)compressed) + 14);
  1884. return (int)size;
  1885. }
  1886. /* try to reuse a sndbuf or rmb description slot for a certain
  1887. * buffer size; if not available, return NULL
  1888. */
  1889. static struct smc_buf_desc *smc_buf_get_slot(struct rw_semaphore *lock,
  1890. struct list_head *buf_list)
  1891. {
  1892. struct smc_buf_desc *buf_slot;
  1893. down_read(lock);
  1894. list_for_each_entry(buf_slot, buf_list, list) {
  1895. if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
  1896. up_read(lock);
  1897. return buf_slot;
  1898. }
  1899. }
  1900. up_read(lock);
  1901. return NULL;
  1902. }
  1903. /* one of the conditions for announcing a receiver's current window size is
  1904. * that it "results in a minimum increase in the window size of 10% of the
  1905. * receive buffer space" [RFC7609]
  1906. */
  1907. static inline int smc_rmb_wnd_update_limit(int rmbe_size)
  1908. {
  1909. return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
  1910. }
  1911. /* map an buf to a link */
  1912. static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
  1913. struct smc_link *lnk)
  1914. {
  1915. int rc, i, nents, offset, buf_size, size, access_flags;
  1916. struct scatterlist *sg;
  1917. void *buf;
  1918. if (buf_desc->is_map_ib[lnk->link_idx])
  1919. return 0;
  1920. if (buf_desc->is_vm) {
  1921. buf = buf_desc->cpu_addr;
  1922. buf_size = buf_desc->len;
  1923. offset = offset_in_page(buf_desc->cpu_addr);
  1924. nents = PAGE_ALIGN(buf_size + offset) / PAGE_SIZE;
  1925. } else {
  1926. nents = 1;
  1927. }
  1928. rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], nents, GFP_KERNEL);
  1929. if (rc)
  1930. return rc;
  1931. if (buf_desc->is_vm) {
  1932. /* virtually contiguous buffer */
  1933. for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) {
  1934. size = min_t(int, PAGE_SIZE - offset, buf_size);
  1935. sg_set_page(sg, vmalloc_to_page(buf), size, offset);
  1936. buf += size;
  1937. buf_size -= size;
  1938. offset = 0;
  1939. }
  1940. } else {
  1941. /* physically contiguous buffer */
  1942. sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
  1943. buf_desc->cpu_addr, buf_desc->len);
  1944. }
  1945. /* map sg table to DMA address */
  1946. rc = smc_ib_buf_map_sg(lnk, buf_desc,
  1947. is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
  1948. /* SMC protocol depends on mapping to one DMA address only */
  1949. if (rc != nents) {
  1950. rc = -EAGAIN;
  1951. goto free_table;
  1952. }
  1953. buf_desc->is_dma_need_sync |=
  1954. smc_ib_is_sg_need_sync(lnk, buf_desc) << lnk->link_idx;
  1955. if (is_rmb || buf_desc->is_vm) {
  1956. /* create a new memory region for the RMB or vzalloced sndbuf */
  1957. access_flags = is_rmb ?
  1958. IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
  1959. IB_ACCESS_LOCAL_WRITE;
  1960. rc = smc_ib_get_memory_region(lnk->roce_pd, access_flags,
  1961. buf_desc, lnk->link_idx);
  1962. if (rc)
  1963. goto buf_unmap;
  1964. smc_ib_sync_sg_for_device(lnk, buf_desc,
  1965. is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
  1966. }
  1967. buf_desc->is_map_ib[lnk->link_idx] = true;
  1968. return 0;
  1969. buf_unmap:
  1970. smc_ib_buf_unmap_sg(lnk, buf_desc,
  1971. is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
  1972. free_table:
  1973. sg_free_table(&buf_desc->sgt[lnk->link_idx]);
  1974. return rc;
  1975. }
  1976. /* register a new buf on IB device, rmb or vzalloced sndbuf
  1977. * must be called under lgr->llc_conf_mutex lock
  1978. */
  1979. int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc)
  1980. {
  1981. if (list_empty(&link->lgr->list))
  1982. return -ENOLINK;
  1983. if (!buf_desc->is_reg_mr[link->link_idx]) {
  1984. /* register memory region for new buf */
  1985. if (buf_desc->is_vm)
  1986. buf_desc->mr[link->link_idx]->iova =
  1987. (uintptr_t)buf_desc->cpu_addr;
  1988. if (smc_wr_reg_send(link, buf_desc->mr[link->link_idx])) {
  1989. buf_desc->is_reg_err = true;
  1990. return -EFAULT;
  1991. }
  1992. buf_desc->is_reg_mr[link->link_idx] = true;
  1993. }
  1994. return 0;
  1995. }
  1996. static int _smcr_buf_map_lgr(struct smc_link *lnk, struct rw_semaphore *lock,
  1997. struct list_head *lst, bool is_rmb)
  1998. {
  1999. struct smc_buf_desc *buf_desc, *bf;
  2000. int rc = 0;
  2001. down_write(lock);
  2002. list_for_each_entry_safe(buf_desc, bf, lst, list) {
  2003. if (!buf_desc->used)
  2004. continue;
  2005. rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
  2006. if (rc)
  2007. goto out;
  2008. }
  2009. out:
  2010. up_write(lock);
  2011. return rc;
  2012. }
  2013. /* map all used buffers of lgr for a new link */
  2014. int smcr_buf_map_lgr(struct smc_link *lnk)
  2015. {
  2016. struct smc_link_group *lgr = lnk->lgr;
  2017. int i, rc = 0;
  2018. for (i = 0; i < SMC_RMBE_SIZES; i++) {
  2019. rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
  2020. &lgr->rmbs[i], true);
  2021. if (rc)
  2022. return rc;
  2023. rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
  2024. &lgr->sndbufs[i], false);
  2025. if (rc)
  2026. return rc;
  2027. }
  2028. return 0;
  2029. }
  2030. /* register all used buffers of lgr for a new link,
  2031. * must be called under lgr->llc_conf_mutex lock
  2032. */
  2033. int smcr_buf_reg_lgr(struct smc_link *lnk)
  2034. {
  2035. struct smc_link_group *lgr = lnk->lgr;
  2036. struct smc_buf_desc *buf_desc, *bf;
  2037. int i, rc = 0;
  2038. /* reg all RMBs for a new link */
  2039. down_write(&lgr->rmbs_lock);
  2040. for (i = 0; i < SMC_RMBE_SIZES; i++) {
  2041. list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
  2042. if (!buf_desc->used)
  2043. continue;
  2044. rc = smcr_link_reg_buf(lnk, buf_desc);
  2045. if (rc) {
  2046. up_write(&lgr->rmbs_lock);
  2047. return rc;
  2048. }
  2049. }
  2050. }
  2051. up_write(&lgr->rmbs_lock);
  2052. if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
  2053. return rc;
  2054. /* reg all vzalloced sndbufs for a new link */
  2055. down_write(&lgr->sndbufs_lock);
  2056. for (i = 0; i < SMC_RMBE_SIZES; i++) {
  2057. list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) {
  2058. if (!buf_desc->used || !buf_desc->is_vm)
  2059. continue;
  2060. rc = smcr_link_reg_buf(lnk, buf_desc);
  2061. if (rc) {
  2062. up_write(&lgr->sndbufs_lock);
  2063. return rc;
  2064. }
  2065. }
  2066. }
  2067. up_write(&lgr->sndbufs_lock);
  2068. return rc;
  2069. }
  2070. static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
  2071. int bufsize)
  2072. {
  2073. struct smc_buf_desc *buf_desc;
  2074. /* try to alloc a new buffer */
  2075. buf_desc = kzalloc_obj(*buf_desc);
  2076. if (!buf_desc)
  2077. return ERR_PTR(-ENOMEM);
  2078. switch (lgr->buf_type) {
  2079. case SMCR_PHYS_CONT_BUFS:
  2080. case SMCR_MIXED_BUFS:
  2081. buf_desc->order = get_order(bufsize);
  2082. buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
  2083. __GFP_NOMEMALLOC | __GFP_COMP |
  2084. __GFP_NORETRY | __GFP_ZERO,
  2085. buf_desc->order);
  2086. if (buf_desc->pages) {
  2087. buf_desc->cpu_addr =
  2088. (void *)page_address(buf_desc->pages);
  2089. buf_desc->len = bufsize;
  2090. buf_desc->is_vm = false;
  2091. break;
  2092. }
  2093. if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
  2094. goto out;
  2095. fallthrough; // try virtually contiguous buf
  2096. case SMCR_VIRT_CONT_BUFS:
  2097. buf_desc->order = get_order(bufsize);
  2098. buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order);
  2099. if (!buf_desc->cpu_addr)
  2100. goto out;
  2101. buf_desc->pages = NULL;
  2102. buf_desc->len = bufsize;
  2103. buf_desc->is_vm = true;
  2104. break;
  2105. }
  2106. return buf_desc;
  2107. out:
  2108. kfree(buf_desc);
  2109. return ERR_PTR(-EAGAIN);
  2110. }
  2111. /* map buf_desc on all usable links,
  2112. * unused buffers stay mapped as long as the link is up
  2113. */
  2114. static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
  2115. struct smc_buf_desc *buf_desc, bool is_rmb)
  2116. {
  2117. int i, rc = 0, cnt = 0;
  2118. /* protect against parallel link reconfiguration */
  2119. down_read(&lgr->llc_conf_mutex);
  2120. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  2121. struct smc_link *lnk = &lgr->lnk[i];
  2122. if (!smc_link_usable(lnk))
  2123. continue;
  2124. if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
  2125. rc = -ENOMEM;
  2126. goto out;
  2127. }
  2128. cnt++;
  2129. }
  2130. out:
  2131. up_read(&lgr->llc_conf_mutex);
  2132. if (!rc && !cnt)
  2133. rc = -EINVAL;
  2134. return rc;
  2135. }
  2136. static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
  2137. bool is_dmb, int bufsize)
  2138. {
  2139. struct smc_buf_desc *buf_desc;
  2140. int rc;
  2141. /* try to alloc a new DMB */
  2142. buf_desc = kzalloc_obj(*buf_desc);
  2143. if (!buf_desc)
  2144. return ERR_PTR(-ENOMEM);
  2145. if (is_dmb) {
  2146. rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
  2147. if (rc) {
  2148. kfree(buf_desc);
  2149. if (rc == -ENOMEM)
  2150. return ERR_PTR(-EAGAIN);
  2151. if (rc == -ENOSPC)
  2152. return ERR_PTR(-ENOSPC);
  2153. return ERR_PTR(-EIO);
  2154. }
  2155. buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
  2156. /* CDC header stored in buf. So, pretend it was smaller */
  2157. buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
  2158. } else {
  2159. buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
  2160. __GFP_NOWARN | __GFP_NORETRY |
  2161. __GFP_NOMEMALLOC);
  2162. if (!buf_desc->cpu_addr) {
  2163. kfree(buf_desc);
  2164. return ERR_PTR(-EAGAIN);
  2165. }
  2166. buf_desc->len = bufsize;
  2167. }
  2168. return buf_desc;
  2169. }
  2170. static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
  2171. {
  2172. struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
  2173. struct smc_connection *conn = &smc->conn;
  2174. struct smc_link_group *lgr = conn->lgr;
  2175. struct list_head *buf_list;
  2176. int bufsize, bufsize_comp;
  2177. struct rw_semaphore *lock; /* lock buffer list */
  2178. bool is_dgraded = false;
  2179. if (is_rmb)
  2180. /* use socket recv buffer size (w/o overhead) as start value */
  2181. bufsize = smc->sk.sk_rcvbuf / 2;
  2182. else
  2183. /* use socket send buffer size (w/o overhead) as start value */
  2184. bufsize = smc->sk.sk_sndbuf / 2;
  2185. for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb);
  2186. bufsize_comp >= 0; bufsize_comp--) {
  2187. if (is_rmb) {
  2188. lock = &lgr->rmbs_lock;
  2189. buf_list = &lgr->rmbs[bufsize_comp];
  2190. } else {
  2191. lock = &lgr->sndbufs_lock;
  2192. buf_list = &lgr->sndbufs[bufsize_comp];
  2193. }
  2194. bufsize = smc_uncompress_bufsize(bufsize_comp);
  2195. /* check for reusable slot in the link group */
  2196. buf_desc = smc_buf_get_slot(lock, buf_list);
  2197. if (buf_desc) {
  2198. buf_desc->is_dma_need_sync = 0;
  2199. SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
  2200. SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
  2201. break; /* found reusable slot */
  2202. }
  2203. if (is_smcd)
  2204. buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
  2205. else
  2206. buf_desc = smcr_new_buf_create(lgr, bufsize);
  2207. if (PTR_ERR(buf_desc) == -ENOMEM)
  2208. break;
  2209. if (IS_ERR(buf_desc)) {
  2210. if (!is_dgraded) {
  2211. is_dgraded = true;
  2212. SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rmb);
  2213. }
  2214. continue;
  2215. }
  2216. SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
  2217. SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
  2218. buf_desc->used = 1;
  2219. down_write(lock);
  2220. smc_lgr_buf_list_add(lgr, is_rmb, buf_list, buf_desc);
  2221. up_write(lock);
  2222. break; /* found */
  2223. }
  2224. if (IS_ERR(buf_desc))
  2225. return PTR_ERR(buf_desc);
  2226. if (!is_smcd) {
  2227. if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
  2228. smcr_buf_unuse(buf_desc, is_rmb, lgr);
  2229. return -ENOMEM;
  2230. }
  2231. }
  2232. if (is_rmb) {
  2233. conn->rmb_desc = buf_desc;
  2234. conn->rmbe_size_comp = bufsize_comp;
  2235. smc->sk.sk_rcvbuf = bufsize * 2;
  2236. atomic_set(&conn->bytes_to_rcv, 0);
  2237. conn->rmbe_update_limit =
  2238. smc_rmb_wnd_update_limit(buf_desc->len);
  2239. if (is_smcd)
  2240. smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
  2241. } else {
  2242. conn->sndbuf_desc = buf_desc;
  2243. smc->sk.sk_sndbuf = bufsize * 2;
  2244. atomic_set(&conn->sndbuf_space, bufsize);
  2245. }
  2246. return 0;
  2247. }
  2248. void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
  2249. {
  2250. if (!conn->sndbuf_desc->is_dma_need_sync)
  2251. return;
  2252. if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
  2253. !smc_link_active(conn->lnk))
  2254. return;
  2255. smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
  2256. }
  2257. void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
  2258. {
  2259. int i;
  2260. if (!conn->rmb_desc->is_dma_need_sync)
  2261. return;
  2262. if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
  2263. return;
  2264. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  2265. if (!smc_link_active(&conn->lgr->lnk[i]))
  2266. continue;
  2267. smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
  2268. DMA_FROM_DEVICE);
  2269. }
  2270. }
  2271. /* create the send and receive buffer for an SMC socket;
  2272. * receive buffers are called RMBs;
  2273. * (even though the SMC protocol allows more than one RMB-element per RMB,
  2274. * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
  2275. * extra RMB for every connection in a link group
  2276. */
  2277. int smc_buf_create(struct smc_sock *smc, bool is_smcd)
  2278. {
  2279. int rc;
  2280. /* create send buffer */
  2281. if (is_smcd &&
  2282. smc_ism_support_dmb_nocopy(smc->conn.lgr->smcd))
  2283. goto create_rmb;
  2284. rc = __smc_buf_create(smc, is_smcd, false);
  2285. if (rc)
  2286. return rc;
  2287. create_rmb:
  2288. /* create rmb */
  2289. rc = __smc_buf_create(smc, is_smcd, true);
  2290. if (rc && smc->conn.sndbuf_desc) {
  2291. down_write(&smc->conn.lgr->sndbufs_lock);
  2292. smc_lgr_buf_list_del(smc->conn.lgr, false,
  2293. smc->conn.sndbuf_desc);
  2294. up_write(&smc->conn.lgr->sndbufs_lock);
  2295. smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
  2296. smc->conn.sndbuf_desc = NULL;
  2297. }
  2298. return rc;
  2299. }
  2300. int smcd_buf_attach(struct smc_sock *smc)
  2301. {
  2302. struct smc_connection *conn = &smc->conn;
  2303. struct smcd_dev *smcd = conn->lgr->smcd;
  2304. u64 peer_token = conn->peer_token;
  2305. struct smc_buf_desc *buf_desc;
  2306. int rc;
  2307. buf_desc = kzalloc_obj(*buf_desc);
  2308. if (!buf_desc)
  2309. return -ENOMEM;
  2310. /* The ghost sndbuf_desc describes the same memory region as
  2311. * peer RMB. Its lifecycle is consistent with the connection's
  2312. * and it will be freed with the connections instead of the
  2313. * link group.
  2314. */
  2315. rc = smc_ism_attach_dmb(smcd, peer_token, buf_desc);
  2316. if (rc)
  2317. goto free;
  2318. smc->sk.sk_sndbuf = buf_desc->len;
  2319. buf_desc->cpu_addr =
  2320. (u8 *)buf_desc->cpu_addr + sizeof(struct smcd_cdc_msg);
  2321. buf_desc->len -= sizeof(struct smcd_cdc_msg);
  2322. conn->sndbuf_desc = buf_desc;
  2323. conn->sndbuf_desc->used = 1;
  2324. atomic_set(&conn->sndbuf_space, conn->sndbuf_desc->len);
  2325. return 0;
  2326. free:
  2327. kfree(buf_desc);
  2328. return rc;
  2329. }
  2330. static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
  2331. {
  2332. int i;
  2333. for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
  2334. if (!test_and_set_bit(i, lgr->rtokens_used_mask))
  2335. return i;
  2336. }
  2337. return -ENOSPC;
  2338. }
  2339. static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
  2340. u32 rkey)
  2341. {
  2342. int i;
  2343. for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
  2344. if (test_bit(i, lgr->rtokens_used_mask) &&
  2345. lgr->rtokens[i][lnk_idx].rkey == rkey)
  2346. return i;
  2347. }
  2348. return -ENOENT;
  2349. }
  2350. /* set rtoken for a new link to an existing rmb */
  2351. void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
  2352. __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
  2353. {
  2354. int rtok_idx;
  2355. rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
  2356. if (rtok_idx == -ENOENT)
  2357. return;
  2358. lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
  2359. lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
  2360. }
  2361. /* set rtoken for a new link whose link_id is given */
  2362. void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
  2363. __be64 nw_vaddr, __be32 nw_rkey)
  2364. {
  2365. u64 dma_addr = be64_to_cpu(nw_vaddr);
  2366. u32 rkey = ntohl(nw_rkey);
  2367. bool found = false;
  2368. int link_idx;
  2369. for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
  2370. if (lgr->lnk[link_idx].link_id == link_id) {
  2371. found = true;
  2372. break;
  2373. }
  2374. }
  2375. if (!found)
  2376. return;
  2377. lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
  2378. lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
  2379. }
  2380. /* add a new rtoken from peer */
  2381. int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
  2382. {
  2383. struct smc_link_group *lgr = smc_get_lgr(lnk);
  2384. u64 dma_addr = be64_to_cpu(nw_vaddr);
  2385. u32 rkey = ntohl(nw_rkey);
  2386. int i;
  2387. for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
  2388. if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
  2389. lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
  2390. test_bit(i, lgr->rtokens_used_mask)) {
  2391. /* already in list */
  2392. return i;
  2393. }
  2394. }
  2395. i = smc_rmb_reserve_rtoken_idx(lgr);
  2396. if (i < 0)
  2397. return i;
  2398. lgr->rtokens[i][lnk->link_idx].rkey = rkey;
  2399. lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
  2400. return i;
  2401. }
  2402. /* delete an rtoken from all links */
  2403. int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
  2404. {
  2405. struct smc_link_group *lgr = smc_get_lgr(lnk);
  2406. u32 rkey = ntohl(nw_rkey);
  2407. int i, j;
  2408. for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
  2409. if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
  2410. test_bit(i, lgr->rtokens_used_mask)) {
  2411. for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
  2412. lgr->rtokens[i][j].rkey = 0;
  2413. lgr->rtokens[i][j].dma_addr = 0;
  2414. }
  2415. clear_bit(i, lgr->rtokens_used_mask);
  2416. return 0;
  2417. }
  2418. }
  2419. return -ENOENT;
  2420. }
  2421. /* save rkey and dma_addr received from peer during clc handshake */
  2422. int smc_rmb_rtoken_handling(struct smc_connection *conn,
  2423. struct smc_link *lnk,
  2424. struct smc_clc_msg_accept_confirm *clc)
  2425. {
  2426. conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr,
  2427. clc->r0.rmb_rkey);
  2428. if (conn->rtoken_idx < 0)
  2429. return conn->rtoken_idx;
  2430. return 0;
  2431. }
  2432. static void smc_core_going_away(void)
  2433. {
  2434. struct smc_ib_device *smcibdev;
  2435. struct smcd_dev *smcd;
  2436. mutex_lock(&smc_ib_devices.mutex);
  2437. list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
  2438. int i;
  2439. for (i = 0; i < SMC_MAX_PORTS; i++)
  2440. set_bit(i, smcibdev->ports_going_away);
  2441. }
  2442. mutex_unlock(&smc_ib_devices.mutex);
  2443. mutex_lock(&smcd_dev_list.mutex);
  2444. list_for_each_entry(smcd, &smcd_dev_list.list, list) {
  2445. smcd->going_away = 1;
  2446. }
  2447. mutex_unlock(&smcd_dev_list.mutex);
  2448. }
  2449. /* Clean up all SMC link groups */
  2450. static void smc_lgrs_shutdown(void)
  2451. {
  2452. struct smcd_dev *smcd;
  2453. smc_core_going_away();
  2454. smc_smcr_terminate_all(NULL);
  2455. mutex_lock(&smcd_dev_list.mutex);
  2456. list_for_each_entry(smcd, &smcd_dev_list.list, list)
  2457. smc_smcd_terminate_all(smcd);
  2458. mutex_unlock(&smcd_dev_list.mutex);
  2459. }
  2460. static int smc_core_reboot_event(struct notifier_block *this,
  2461. unsigned long event, void *ptr)
  2462. {
  2463. smc_lgrs_shutdown();
  2464. smc_ib_unregister_client();
  2465. smc_ism_exit();
  2466. return 0;
  2467. }
  2468. static struct notifier_block smc_reboot_notifier = {
  2469. .notifier_call = smc_core_reboot_event,
  2470. };
  2471. int __init smc_core_init(void)
  2472. {
  2473. return register_reboot_notifier(&smc_reboot_notifier);
  2474. }
  2475. /* Called (from smc_exit) when module is removed */
  2476. void smc_core_exit(void)
  2477. {
  2478. unregister_reboot_notifier(&smc_reboot_notifier);
  2479. smc_lgrs_shutdown();
  2480. }