tc_conntrack.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /****************************************************************************
  3. * Driver for Solarflare network controllers and boards
  4. * Copyright 2023, Advanced Micro Devices, Inc.
  5. *
  6. * This program is free software; you can redistribute it and/or modify it
  7. * under the terms of the GNU General Public License version 2 as published
  8. * by the Free Software Foundation, incorporated herein by reference.
  9. */
  10. #include "tc_conntrack.h"
  11. #include "tc.h"
  12. #include "mae.h"
  13. static int efx_tc_flow_block(enum tc_setup_type type, void *type_data,
  14. void *cb_priv);
  15. static const struct rhashtable_params efx_tc_ct_zone_ht_params = {
  16. .key_len = sizeof_field(struct efx_tc_ct_zone, zone),
  17. .key_offset = 0,
  18. .head_offset = offsetof(struct efx_tc_ct_zone, linkage),
  19. };
  20. static const struct rhashtable_params efx_tc_ct_ht_params = {
  21. .key_len = offsetof(struct efx_tc_ct_entry, linkage),
  22. .key_offset = 0,
  23. .head_offset = offsetof(struct efx_tc_ct_entry, linkage),
  24. };
  25. static void efx_tc_ct_zone_free(void *ptr, void *arg)
  26. {
  27. struct efx_tc_ct_zone *zone = ptr;
  28. struct efx_nic *efx = zone->efx;
  29. netif_err(efx, drv, efx->net_dev,
  30. "tc ct_zone %u still present at teardown, removing\n",
  31. zone->zone);
  32. nf_flow_table_offload_del_cb(zone->nf_ft, efx_tc_flow_block, zone);
  33. kfree(zone);
  34. }
  35. static void efx_tc_ct_free(void *ptr, void *arg)
  36. {
  37. struct efx_tc_ct_entry *conn = ptr;
  38. struct efx_nic *efx = arg;
  39. netif_err(efx, drv, efx->net_dev,
  40. "tc ct_entry %lx still present at teardown\n",
  41. conn->cookie);
  42. /* We can release the counter, but we can't remove the CT itself
  43. * from hardware because the table meta is already gone.
  44. */
  45. efx_tc_flower_release_counter(efx, conn->cnt);
  46. kfree(conn);
  47. }
  48. int efx_tc_init_conntrack(struct efx_nic *efx)
  49. {
  50. int rc;
  51. rc = rhashtable_init(&efx->tc->ct_zone_ht, &efx_tc_ct_zone_ht_params);
  52. if (rc < 0)
  53. goto fail_ct_zone_ht;
  54. rc = rhashtable_init(&efx->tc->ct_ht, &efx_tc_ct_ht_params);
  55. if (rc < 0)
  56. goto fail_ct_ht;
  57. return 0;
  58. fail_ct_ht:
  59. rhashtable_destroy(&efx->tc->ct_zone_ht);
  60. fail_ct_zone_ht:
  61. return rc;
  62. }
  63. /* Only call this in init failure teardown.
  64. * Normal exit should fini instead as there may be entries in the table.
  65. */
  66. void efx_tc_destroy_conntrack(struct efx_nic *efx)
  67. {
  68. rhashtable_destroy(&efx->tc->ct_ht);
  69. rhashtable_destroy(&efx->tc->ct_zone_ht);
  70. }
  71. void efx_tc_fini_conntrack(struct efx_nic *efx)
  72. {
  73. rhashtable_free_and_destroy(&efx->tc->ct_zone_ht, efx_tc_ct_zone_free, NULL);
  74. rhashtable_free_and_destroy(&efx->tc->ct_ht, efx_tc_ct_free, efx);
  75. }
  76. #define EFX_NF_TCP_FLAG(flg) cpu_to_be16(be32_to_cpu(TCP_FLAG_##flg) >> 16)
  77. static int efx_tc_ct_parse_match(struct efx_nic *efx, struct flow_rule *fr,
  78. struct efx_tc_ct_entry *conn)
  79. {
  80. struct flow_dissector *dissector = fr->match.dissector;
  81. unsigned char ipv = 0;
  82. bool tcp = false;
  83. if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_CONTROL)) {
  84. struct flow_match_control fm;
  85. flow_rule_match_control(fr, &fm);
  86. if (IS_ALL_ONES(fm.mask->addr_type))
  87. switch (fm.key->addr_type) {
  88. case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
  89. ipv = 4;
  90. break;
  91. case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
  92. ipv = 6;
  93. break;
  94. default:
  95. break;
  96. }
  97. }
  98. if (!ipv) {
  99. netif_dbg(efx, drv, efx->net_dev,
  100. "Conntrack missing ipv specification\n");
  101. return -EOPNOTSUPP;
  102. }
  103. if (dissector->used_keys &
  104. ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
  105. BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
  106. BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
  107. BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
  108. BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
  109. BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
  110. BIT_ULL(FLOW_DISSECTOR_KEY_META))) {
  111. netif_dbg(efx, drv, efx->net_dev,
  112. "Unsupported conntrack keys %#llx\n",
  113. dissector->used_keys);
  114. return -EOPNOTSUPP;
  115. }
  116. if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_BASIC)) {
  117. struct flow_match_basic fm;
  118. flow_rule_match_basic(fr, &fm);
  119. if (!IS_ALL_ONES(fm.mask->n_proto)) {
  120. netif_dbg(efx, drv, efx->net_dev,
  121. "Conntrack eth_proto is not exact-match; mask %04x\n",
  122. ntohs(fm.mask->n_proto));
  123. return -EOPNOTSUPP;
  124. }
  125. conn->eth_proto = fm.key->n_proto;
  126. if (conn->eth_proto != (ipv == 4 ? htons(ETH_P_IP)
  127. : htons(ETH_P_IPV6))) {
  128. netif_dbg(efx, drv, efx->net_dev,
  129. "Conntrack eth_proto is not IPv%u, is %04x\n",
  130. ipv, ntohs(conn->eth_proto));
  131. return -EOPNOTSUPP;
  132. }
  133. if (!IS_ALL_ONES(fm.mask->ip_proto)) {
  134. netif_dbg(efx, drv, efx->net_dev,
  135. "Conntrack ip_proto is not exact-match; mask %02x\n",
  136. fm.mask->ip_proto);
  137. return -EOPNOTSUPP;
  138. }
  139. conn->ip_proto = fm.key->ip_proto;
  140. switch (conn->ip_proto) {
  141. case IPPROTO_TCP:
  142. tcp = true;
  143. break;
  144. case IPPROTO_UDP:
  145. break;
  146. default:
  147. netif_dbg(efx, drv, efx->net_dev,
  148. "Conntrack ip_proto not TCP or UDP, is %02x\n",
  149. conn->ip_proto);
  150. return -EOPNOTSUPP;
  151. }
  152. } else {
  153. netif_dbg(efx, drv, efx->net_dev,
  154. "Conntrack missing eth_proto, ip_proto\n");
  155. return -EOPNOTSUPP;
  156. }
  157. if (ipv == 4 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
  158. struct flow_match_ipv4_addrs fm;
  159. flow_rule_match_ipv4_addrs(fr, &fm);
  160. if (!IS_ALL_ONES(fm.mask->src)) {
  161. netif_dbg(efx, drv, efx->net_dev,
  162. "Conntrack ipv4.src is not exact-match; mask %08x\n",
  163. ntohl(fm.mask->src));
  164. return -EOPNOTSUPP;
  165. }
  166. conn->src_ip = fm.key->src;
  167. if (!IS_ALL_ONES(fm.mask->dst)) {
  168. netif_dbg(efx, drv, efx->net_dev,
  169. "Conntrack ipv4.dst is not exact-match; mask %08x\n",
  170. ntohl(fm.mask->dst));
  171. return -EOPNOTSUPP;
  172. }
  173. conn->dst_ip = fm.key->dst;
  174. } else if (ipv == 6 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
  175. struct flow_match_ipv6_addrs fm;
  176. flow_rule_match_ipv6_addrs(fr, &fm);
  177. if (!efx_ipv6_addr_all_ones(&fm.mask->src)) {
  178. netif_dbg(efx, drv, efx->net_dev,
  179. "Conntrack ipv6.src is not exact-match; mask %pI6\n",
  180. &fm.mask->src);
  181. return -EOPNOTSUPP;
  182. }
  183. conn->src_ip6 = fm.key->src;
  184. if (!efx_ipv6_addr_all_ones(&fm.mask->dst)) {
  185. netif_dbg(efx, drv, efx->net_dev,
  186. "Conntrack ipv6.dst is not exact-match; mask %pI6\n",
  187. &fm.mask->dst);
  188. return -EOPNOTSUPP;
  189. }
  190. conn->dst_ip6 = fm.key->dst;
  191. } else {
  192. netif_dbg(efx, drv, efx->net_dev,
  193. "Conntrack missing IPv%u addrs\n", ipv);
  194. return -EOPNOTSUPP;
  195. }
  196. if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_PORTS)) {
  197. struct flow_match_ports fm;
  198. flow_rule_match_ports(fr, &fm);
  199. if (!IS_ALL_ONES(fm.mask->src)) {
  200. netif_dbg(efx, drv, efx->net_dev,
  201. "Conntrack ports.src is not exact-match; mask %04x\n",
  202. ntohs(fm.mask->src));
  203. return -EOPNOTSUPP;
  204. }
  205. conn->l4_sport = fm.key->src;
  206. if (!IS_ALL_ONES(fm.mask->dst)) {
  207. netif_dbg(efx, drv, efx->net_dev,
  208. "Conntrack ports.dst is not exact-match; mask %04x\n",
  209. ntohs(fm.mask->dst));
  210. return -EOPNOTSUPP;
  211. }
  212. conn->l4_dport = fm.key->dst;
  213. } else {
  214. netif_dbg(efx, drv, efx->net_dev, "Conntrack missing L4 ports\n");
  215. return -EOPNOTSUPP;
  216. }
  217. if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_TCP)) {
  218. __be16 tcp_interesting_flags;
  219. struct flow_match_tcp fm;
  220. if (!tcp) {
  221. netif_dbg(efx, drv, efx->net_dev,
  222. "Conntrack matching on TCP keys but ipproto is not tcp\n");
  223. return -EOPNOTSUPP;
  224. }
  225. flow_rule_match_tcp(fr, &fm);
  226. tcp_interesting_flags = EFX_NF_TCP_FLAG(SYN) |
  227. EFX_NF_TCP_FLAG(RST) |
  228. EFX_NF_TCP_FLAG(FIN);
  229. /* If any of the tcp_interesting_flags is set, we always
  230. * inhibit CT lookup in LHS (so SW can update CT table).
  231. */
  232. if (fm.key->flags & tcp_interesting_flags) {
  233. netif_dbg(efx, drv, efx->net_dev,
  234. "Unsupported conntrack tcp.flags %04x/%04x\n",
  235. ntohs(fm.key->flags), ntohs(fm.mask->flags));
  236. return -EOPNOTSUPP;
  237. }
  238. /* Other TCP flags cannot be filtered at CT */
  239. if (fm.mask->flags & ~tcp_interesting_flags) {
  240. netif_dbg(efx, drv, efx->net_dev,
  241. "Unsupported conntrack tcp.flags %04x/%04x\n",
  242. ntohs(fm.key->flags), ntohs(fm.mask->flags));
  243. return -EOPNOTSUPP;
  244. }
  245. }
  246. return 0;
  247. }
  248. /**
  249. * struct efx_tc_ct_mangler_state - tracks which fields have been pedited
  250. *
  251. * @ipv4: IP source or destination addr has been set
  252. * @tcpudp: TCP/UDP source or destination port has been set
  253. */
  254. struct efx_tc_ct_mangler_state {
  255. u8 ipv4:1;
  256. u8 tcpudp:1;
  257. };
  258. static int efx_tc_ct_mangle(struct efx_nic *efx, struct efx_tc_ct_entry *conn,
  259. const struct flow_action_entry *fa,
  260. struct efx_tc_ct_mangler_state *mung)
  261. {
  262. /* Is this the first mangle we've processed for this rule? */
  263. bool first = !(mung->ipv4 || mung->tcpudp);
  264. bool dnat = false;
  265. switch (fa->mangle.htype) {
  266. case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
  267. switch (fa->mangle.offset) {
  268. case offsetof(struct iphdr, daddr):
  269. dnat = true;
  270. fallthrough;
  271. case offsetof(struct iphdr, saddr):
  272. if (fa->mangle.mask)
  273. return -EOPNOTSUPP;
  274. conn->nat_ip = htonl(fa->mangle.val);
  275. mung->ipv4 = 1;
  276. break;
  277. default:
  278. return -EOPNOTSUPP;
  279. }
  280. break;
  281. case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
  282. case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
  283. /* Both struct tcphdr and struct udphdr start with
  284. * __be16 source;
  285. * __be16 dest;
  286. * so we can use the same code for both.
  287. */
  288. switch (fa->mangle.offset) {
  289. case offsetof(struct tcphdr, dest):
  290. BUILD_BUG_ON(offsetof(struct tcphdr, dest) !=
  291. offsetof(struct udphdr, dest));
  292. dnat = true;
  293. fallthrough;
  294. case offsetof(struct tcphdr, source):
  295. BUILD_BUG_ON(offsetof(struct tcphdr, source) !=
  296. offsetof(struct udphdr, source));
  297. if (~fa->mangle.mask != 0xffff)
  298. return -EOPNOTSUPP;
  299. conn->l4_natport = htons(fa->mangle.val);
  300. mung->tcpudp = 1;
  301. break;
  302. default:
  303. return -EOPNOTSUPP;
  304. }
  305. break;
  306. default:
  307. return -EOPNOTSUPP;
  308. }
  309. /* first mangle tells us whether this is SNAT or DNAT;
  310. * subsequent mangles must match that
  311. */
  312. if (first)
  313. conn->dnat = dnat;
  314. else if (conn->dnat != dnat)
  315. return -EOPNOTSUPP;
  316. return 0;
  317. }
  318. static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone,
  319. struct flow_cls_offload *tc)
  320. {
  321. struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
  322. struct efx_tc_ct_mangler_state mung = {};
  323. struct efx_tc_ct_entry *conn, *old;
  324. struct efx_nic *efx = ct_zone->efx;
  325. const struct flow_action_entry *fa;
  326. struct efx_tc_counter *cnt;
  327. int rc, i;
  328. if (WARN_ON(!efx->tc))
  329. return -ENETDOWN;
  330. if (WARN_ON(!efx->tc->up))
  331. return -ENETDOWN;
  332. conn = kzalloc_obj(*conn, GFP_USER);
  333. if (!conn)
  334. return -ENOMEM;
  335. conn->cookie = tc->cookie;
  336. old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_ht,
  337. &conn->linkage,
  338. efx_tc_ct_ht_params);
  339. if (IS_ERR(old)) {
  340. rc = PTR_ERR(old);
  341. goto release;
  342. } else if (old) {
  343. netif_dbg(efx, drv, efx->net_dev,
  344. "Already offloaded conntrack (cookie %lx)\n", tc->cookie);
  345. rc = -EEXIST;
  346. goto release;
  347. }
  348. /* Parse match */
  349. conn->zone = ct_zone;
  350. rc = efx_tc_ct_parse_match(efx, fr, conn);
  351. if (rc)
  352. goto release;
  353. /* Parse actions */
  354. flow_action_for_each(i, fa, &fr->action) {
  355. switch (fa->id) {
  356. case FLOW_ACTION_CT_METADATA:
  357. conn->mark = fa->ct_metadata.mark;
  358. if (memchr_inv(fa->ct_metadata.labels, 0, sizeof(fa->ct_metadata.labels))) {
  359. netif_dbg(efx, drv, efx->net_dev,
  360. "Setting CT label not supported\n");
  361. rc = -EOPNOTSUPP;
  362. goto release;
  363. }
  364. break;
  365. case FLOW_ACTION_MANGLE:
  366. if (conn->eth_proto != htons(ETH_P_IP)) {
  367. netif_dbg(efx, drv, efx->net_dev,
  368. "NAT only supported for IPv4\n");
  369. rc = -EOPNOTSUPP;
  370. goto release;
  371. }
  372. rc = efx_tc_ct_mangle(efx, conn, fa, &mung);
  373. if (rc)
  374. goto release;
  375. break;
  376. default:
  377. netif_dbg(efx, drv, efx->net_dev,
  378. "Unhandled action %u for conntrack\n", fa->id);
  379. rc = -EOPNOTSUPP;
  380. goto release;
  381. }
  382. }
  383. /* fill in defaults for unmangled values */
  384. if (!mung.ipv4)
  385. conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip;
  386. if (!mung.tcpudp)
  387. conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport;
  388. cnt = efx_tc_flower_allocate_counter(efx, EFX_TC_COUNTER_TYPE_CT);
  389. if (IS_ERR(cnt)) {
  390. rc = PTR_ERR(cnt);
  391. goto release;
  392. }
  393. conn->cnt = cnt;
  394. rc = efx_mae_insert_ct(efx, conn);
  395. if (rc) {
  396. netif_dbg(efx, drv, efx->net_dev,
  397. "Failed to insert conntrack, %d\n", rc);
  398. goto release;
  399. }
  400. mutex_lock(&ct_zone->mutex);
  401. list_add_tail(&conn->list, &ct_zone->cts);
  402. mutex_unlock(&ct_zone->mutex);
  403. return 0;
  404. release:
  405. if (conn->cnt)
  406. efx_tc_flower_release_counter(efx, conn->cnt);
  407. if (!old)
  408. rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
  409. efx_tc_ct_ht_params);
  410. kfree(conn);
  411. return rc;
  412. }
  413. /* Caller must follow with efx_tc_ct_remove_finish() after RCU grace period! */
  414. static void efx_tc_ct_remove(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
  415. {
  416. int rc;
  417. /* Remove it from HW */
  418. rc = efx_mae_remove_ct(efx, conn);
  419. /* Delete it from SW */
  420. rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
  421. efx_tc_ct_ht_params);
  422. if (rc) {
  423. netif_err(efx, drv, efx->net_dev,
  424. "Failed to remove conntrack %lx from hw, rc %d\n",
  425. conn->cookie, rc);
  426. } else {
  427. netif_dbg(efx, drv, efx->net_dev, "Removed conntrack %lx\n",
  428. conn->cookie);
  429. }
  430. }
  431. static void efx_tc_ct_remove_finish(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
  432. {
  433. /* Remove related CT counter. This is delayed after the conn object we
  434. * are working with has been successfully removed. This protects the
  435. * counter from being used-after-free inside efx_tc_ct_stats.
  436. */
  437. efx_tc_flower_release_counter(efx, conn->cnt);
  438. kfree(conn);
  439. }
  440. static int efx_tc_ct_destroy(struct efx_tc_ct_zone *ct_zone,
  441. struct flow_cls_offload *tc)
  442. {
  443. struct efx_nic *efx = ct_zone->efx;
  444. struct efx_tc_ct_entry *conn;
  445. conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
  446. efx_tc_ct_ht_params);
  447. if (!conn) {
  448. netif_warn(efx, drv, efx->net_dev,
  449. "Conntrack %lx not found to remove\n", tc->cookie);
  450. return -ENOENT;
  451. }
  452. mutex_lock(&ct_zone->mutex);
  453. list_del(&conn->list);
  454. efx_tc_ct_remove(efx, conn);
  455. mutex_unlock(&ct_zone->mutex);
  456. synchronize_rcu();
  457. efx_tc_ct_remove_finish(efx, conn);
  458. return 0;
  459. }
  460. static int efx_tc_ct_stats(struct efx_tc_ct_zone *ct_zone,
  461. struct flow_cls_offload *tc)
  462. {
  463. struct efx_nic *efx = ct_zone->efx;
  464. struct efx_tc_ct_entry *conn;
  465. struct efx_tc_counter *cnt;
  466. rcu_read_lock();
  467. conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
  468. efx_tc_ct_ht_params);
  469. if (!conn) {
  470. netif_warn(efx, drv, efx->net_dev,
  471. "Conntrack %lx not found for stats\n", tc->cookie);
  472. rcu_read_unlock();
  473. return -ENOENT;
  474. }
  475. cnt = conn->cnt;
  476. spin_lock_bh(&cnt->lock);
  477. /* Report only last use */
  478. flow_stats_update(&tc->stats, 0, 0, 0, cnt->touched,
  479. FLOW_ACTION_HW_STATS_DELAYED);
  480. spin_unlock_bh(&cnt->lock);
  481. rcu_read_unlock();
  482. return 0;
  483. }
  484. static int efx_tc_flow_block(enum tc_setup_type type, void *type_data,
  485. void *cb_priv)
  486. {
  487. struct flow_cls_offload *tcb = type_data;
  488. struct efx_tc_ct_zone *ct_zone = cb_priv;
  489. if (type != TC_SETUP_CLSFLOWER)
  490. return -EOPNOTSUPP;
  491. switch (tcb->command) {
  492. case FLOW_CLS_REPLACE:
  493. return efx_tc_ct_replace(ct_zone, tcb);
  494. case FLOW_CLS_DESTROY:
  495. return efx_tc_ct_destroy(ct_zone, tcb);
  496. case FLOW_CLS_STATS:
  497. return efx_tc_ct_stats(ct_zone, tcb);
  498. default:
  499. break;
  500. }
  501. return -EOPNOTSUPP;
  502. }
  503. struct efx_tc_ct_zone *efx_tc_ct_register_zone(struct efx_nic *efx, u16 zone,
  504. struct nf_flowtable *ct_ft)
  505. {
  506. struct efx_tc_ct_zone *ct_zone, *old;
  507. int rc;
  508. ct_zone = kzalloc_obj(*ct_zone, GFP_USER);
  509. if (!ct_zone)
  510. return ERR_PTR(-ENOMEM);
  511. ct_zone->zone = zone;
  512. old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_zone_ht,
  513. &ct_zone->linkage,
  514. efx_tc_ct_zone_ht_params);
  515. if (old) {
  516. /* don't need our new entry */
  517. kfree(ct_zone);
  518. if (IS_ERR(old)) /* oh dear, it's actually an error */
  519. return ERR_CAST(old);
  520. if (!refcount_inc_not_zero(&old->ref))
  521. return ERR_PTR(-EAGAIN);
  522. /* existing entry found */
  523. WARN_ON_ONCE(old->nf_ft != ct_ft);
  524. netif_dbg(efx, drv, efx->net_dev,
  525. "Found existing ct_zone for %u\n", zone);
  526. return old;
  527. }
  528. ct_zone->nf_ft = ct_ft;
  529. ct_zone->efx = efx;
  530. INIT_LIST_HEAD(&ct_zone->cts);
  531. mutex_init(&ct_zone->mutex);
  532. rc = nf_flow_table_offload_add_cb(ct_ft, efx_tc_flow_block, ct_zone);
  533. netif_dbg(efx, drv, efx->net_dev, "Adding new ct_zone for %u, rc %d\n",
  534. zone, rc);
  535. if (rc < 0)
  536. goto fail;
  537. refcount_set(&ct_zone->ref, 1);
  538. return ct_zone;
  539. fail:
  540. rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage,
  541. efx_tc_ct_zone_ht_params);
  542. kfree(ct_zone);
  543. return ERR_PTR(rc);
  544. }
  545. void efx_tc_ct_unregister_zone(struct efx_nic *efx,
  546. struct efx_tc_ct_zone *ct_zone)
  547. {
  548. struct efx_tc_ct_entry *conn, *next;
  549. if (!refcount_dec_and_test(&ct_zone->ref))
  550. return; /* still in use */
  551. nf_flow_table_offload_del_cb(ct_zone->nf_ft, efx_tc_flow_block, ct_zone);
  552. rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage,
  553. efx_tc_ct_zone_ht_params);
  554. mutex_lock(&ct_zone->mutex);
  555. list_for_each_entry(conn, &ct_zone->cts, list)
  556. efx_tc_ct_remove(efx, conn);
  557. synchronize_rcu();
  558. /* need to use _safe because efx_tc_ct_remove_finish() frees conn */
  559. list_for_each_entry_safe(conn, next, &ct_zone->cts, list)
  560. efx_tc_ct_remove_finish(efx, conn);
  561. mutex_unlock(&ct_zone->mutex);
  562. mutex_destroy(&ct_zone->mutex);
  563. netif_dbg(efx, drv, efx->net_dev, "Removed ct_zone for %u\n",
  564. ct_zone->zone);
  565. kfree(ct_zone);
  566. }