act_police.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * net/sched/act_police.c Input police filter
  4. *
  5. * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  6. * J Hadi Salim (action changes)
  7. */
  8. #include <linux/module.h>
  9. #include <linux/types.h>
  10. #include <linux/kernel.h>
  11. #include <linux/string.h>
  12. #include <linux/errno.h>
  13. #include <linux/skbuff.h>
  14. #include <linux/rtnetlink.h>
  15. #include <linux/init.h>
  16. #include <linux/slab.h>
  17. #include <net/act_api.h>
  18. #include <net/gso.h>
  19. #include <net/netlink.h>
  20. #include <net/pkt_cls.h>
  21. #include <net/tc_act/tc_police.h>
  22. #include <net/tc_wrapper.h>
  23. /* Each policer is serialized by its individual spinlock */
  24. static struct tc_action_ops act_police_ops;
  25. static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
  26. [TCA_POLICE_RATE] = { .len = TC_RTAB_SIZE },
  27. [TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE },
  28. [TCA_POLICE_AVRATE] = { .type = NLA_U32 },
  29. [TCA_POLICE_RESULT] = { .type = NLA_U32 },
  30. [TCA_POLICE_RATE64] = { .type = NLA_U64 },
  31. [TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 },
  32. [TCA_POLICE_PKTRATE64] = { .type = NLA_U64, .min = 1 },
  33. [TCA_POLICE_PKTBURST64] = { .type = NLA_U64, .min = 1 },
  34. };
  35. static int tcf_police_init(struct net *net, struct nlattr *nla,
  36. struct nlattr *est, struct tc_action **a,
  37. struct tcf_proto *tp, u32 flags,
  38. struct netlink_ext_ack *extack)
  39. {
  40. int ret = 0, tcfp_result = TC_ACT_OK, err, size;
  41. bool bind = flags & TCA_ACT_FLAGS_BIND;
  42. struct nlattr *tb[TCA_POLICE_MAX + 1];
  43. struct tcf_chain *goto_ch = NULL;
  44. struct tc_police *parm;
  45. struct tcf_police *police;
  46. struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
  47. struct tc_action_net *tn = net_generic(net, act_police_ops.net_id);
  48. struct tcf_police_params *new;
  49. bool exists = false;
  50. u32 index;
  51. u64 rate64, prate64;
  52. u64 pps, ppsburst;
  53. if (nla == NULL)
  54. return -EINVAL;
  55. err = nla_parse_nested_deprecated(tb, TCA_POLICE_MAX, nla,
  56. police_policy, NULL);
  57. if (err < 0)
  58. return err;
  59. if (tb[TCA_POLICE_TBF] == NULL)
  60. return -EINVAL;
  61. size = nla_len(tb[TCA_POLICE_TBF]);
  62. if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
  63. return -EINVAL;
  64. parm = nla_data(tb[TCA_POLICE_TBF]);
  65. index = parm->index;
  66. err = tcf_idr_check_alloc(tn, &index, a, bind);
  67. if (err < 0)
  68. return err;
  69. exists = err;
  70. if (exists && bind)
  71. return ACT_P_BOUND;
  72. if (!exists) {
  73. ret = tcf_idr_create(tn, index, NULL, a,
  74. &act_police_ops, bind, true, flags);
  75. if (ret) {
  76. tcf_idr_cleanup(tn, index);
  77. return ret;
  78. }
  79. ret = ACT_P_CREATED;
  80. spin_lock_init(&(to_police(*a)->tcfp_lock));
  81. } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
  82. tcf_idr_release(*a, bind);
  83. return -EEXIST;
  84. }
  85. err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
  86. if (err < 0)
  87. goto release_idr;
  88. police = to_police(*a);
  89. if (parm->rate.rate) {
  90. err = -ENOMEM;
  91. R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE], NULL);
  92. if (R_tab == NULL)
  93. goto failure;
  94. if (parm->peakrate.rate) {
  95. P_tab = qdisc_get_rtab(&parm->peakrate,
  96. tb[TCA_POLICE_PEAKRATE], NULL);
  97. if (P_tab == NULL)
  98. goto failure;
  99. }
  100. }
  101. if (est) {
  102. err = gen_replace_estimator(&police->tcf_bstats,
  103. police->common.cpu_bstats,
  104. &police->tcf_rate_est,
  105. &police->tcf_lock,
  106. false, est);
  107. if (err)
  108. goto failure;
  109. } else if (tb[TCA_POLICE_AVRATE] &&
  110. (ret == ACT_P_CREATED ||
  111. !gen_estimator_active(&police->tcf_rate_est))) {
  112. err = -EINVAL;
  113. goto failure;
  114. }
  115. if (tb[TCA_POLICE_RESULT]) {
  116. tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
  117. if (TC_ACT_EXT_CMP(tcfp_result, TC_ACT_GOTO_CHAIN)) {
  118. NL_SET_ERR_MSG(extack,
  119. "goto chain not allowed on fallback");
  120. err = -EINVAL;
  121. goto failure;
  122. }
  123. }
  124. if ((tb[TCA_POLICE_PKTRATE64] && !tb[TCA_POLICE_PKTBURST64]) ||
  125. (!tb[TCA_POLICE_PKTRATE64] && tb[TCA_POLICE_PKTBURST64])) {
  126. NL_SET_ERR_MSG(extack,
  127. "Both or neither packet-per-second burst and rate must be provided");
  128. err = -EINVAL;
  129. goto failure;
  130. }
  131. if (tb[TCA_POLICE_PKTRATE64] && R_tab) {
  132. NL_SET_ERR_MSG(extack,
  133. "packet-per-second and byte-per-second rate limits not allowed in same action");
  134. err = -EINVAL;
  135. goto failure;
  136. }
  137. new = kzalloc_obj(*new);
  138. if (unlikely(!new)) {
  139. err = -ENOMEM;
  140. goto failure;
  141. }
  142. /* No failure allowed after this point */
  143. new->tcfp_result = tcfp_result;
  144. new->tcfp_mtu = parm->mtu;
  145. if (!new->tcfp_mtu) {
  146. new->tcfp_mtu = ~0;
  147. if (R_tab)
  148. new->tcfp_mtu = 255 << R_tab->rate.cell_log;
  149. }
  150. if (R_tab) {
  151. new->rate_present = true;
  152. rate64 = nla_get_u64_default(tb[TCA_POLICE_RATE64], 0);
  153. psched_ratecfg_precompute(&new->rate, &R_tab->rate, rate64);
  154. qdisc_put_rtab(R_tab);
  155. } else {
  156. new->rate_present = false;
  157. }
  158. if (P_tab) {
  159. new->peak_present = true;
  160. prate64 = nla_get_u64_default(tb[TCA_POLICE_PEAKRATE64], 0);
  161. psched_ratecfg_precompute(&new->peak, &P_tab->rate, prate64);
  162. qdisc_put_rtab(P_tab);
  163. } else {
  164. new->peak_present = false;
  165. }
  166. new->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
  167. if (new->peak_present)
  168. new->tcfp_mtu_ptoks = (s64)psched_l2t_ns(&new->peak,
  169. new->tcfp_mtu);
  170. if (tb[TCA_POLICE_AVRATE])
  171. new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
  172. if (tb[TCA_POLICE_PKTRATE64]) {
  173. pps = nla_get_u64(tb[TCA_POLICE_PKTRATE64]);
  174. ppsburst = nla_get_u64(tb[TCA_POLICE_PKTBURST64]);
  175. new->pps_present = true;
  176. new->tcfp_pkt_burst = PSCHED_TICKS2NS(ppsburst);
  177. psched_ppscfg_precompute(&new->ppsrate, pps);
  178. }
  179. new->action = parm->action;
  180. spin_lock_bh(&police->tcf_lock);
  181. spin_lock_bh(&police->tcfp_lock);
  182. police->tcfp_t_c = ktime_get_ns();
  183. police->tcfp_toks = new->tcfp_burst;
  184. if (new->peak_present)
  185. police->tcfp_ptoks = new->tcfp_mtu_ptoks;
  186. spin_unlock_bh(&police->tcfp_lock);
  187. goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
  188. new = rcu_replace_pointer(police->params,
  189. new,
  190. lockdep_is_held(&police->tcf_lock));
  191. spin_unlock_bh(&police->tcf_lock);
  192. if (goto_ch)
  193. tcf_chain_put_by_act(goto_ch);
  194. if (new)
  195. kfree_rcu(new, rcu);
  196. return ret;
  197. failure:
  198. qdisc_put_rtab(P_tab);
  199. qdisc_put_rtab(R_tab);
  200. if (goto_ch)
  201. tcf_chain_put_by_act(goto_ch);
  202. release_idr:
  203. tcf_idr_release(*a, bind);
  204. return err;
  205. }
  206. static bool tcf_police_mtu_check(struct sk_buff *skb, u32 limit)
  207. {
  208. u32 len;
  209. if (skb_is_gso(skb))
  210. return skb_gso_validate_mac_len(skb, limit);
  211. len = qdisc_pkt_len(skb);
  212. if (skb_at_tc_ingress(skb))
  213. len += skb->mac_len;
  214. return len <= limit;
  215. }
  216. TC_INDIRECT_SCOPE int tcf_police_act(struct sk_buff *skb,
  217. const struct tc_action *a,
  218. struct tcf_result *res)
  219. {
  220. struct tcf_police *police = to_police(a);
  221. s64 now, toks, ppstoks = 0, ptoks = 0;
  222. struct tcf_police_params *p;
  223. int ret;
  224. tcf_lastuse_update(&police->tcf_tm);
  225. bstats_update(this_cpu_ptr(police->common.cpu_bstats), skb);
  226. p = rcu_dereference_bh(police->params);
  227. ret = p->action;
  228. if (p->tcfp_ewma_rate) {
  229. struct gnet_stats_rate_est64 sample;
  230. if (!gen_estimator_read(&police->tcf_rate_est, &sample) ||
  231. sample.bps >= p->tcfp_ewma_rate)
  232. goto inc_overlimits;
  233. }
  234. if (tcf_police_mtu_check(skb, p->tcfp_mtu)) {
  235. if (!p->rate_present && !p->pps_present) {
  236. ret = p->tcfp_result;
  237. goto end;
  238. }
  239. now = ktime_get_ns();
  240. spin_lock_bh(&police->tcfp_lock);
  241. toks = min_t(s64, now - police->tcfp_t_c, p->tcfp_burst);
  242. if (p->peak_present) {
  243. ptoks = toks + police->tcfp_ptoks;
  244. if (ptoks > p->tcfp_mtu_ptoks)
  245. ptoks = p->tcfp_mtu_ptoks;
  246. ptoks -= (s64)psched_l2t_ns(&p->peak,
  247. qdisc_pkt_len(skb));
  248. }
  249. if (p->rate_present) {
  250. toks += police->tcfp_toks;
  251. if (toks > p->tcfp_burst)
  252. toks = p->tcfp_burst;
  253. toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
  254. } else if (p->pps_present) {
  255. ppstoks = min_t(s64, now - police->tcfp_t_c, p->tcfp_pkt_burst);
  256. ppstoks += police->tcfp_pkttoks;
  257. if (ppstoks > p->tcfp_pkt_burst)
  258. ppstoks = p->tcfp_pkt_burst;
  259. ppstoks -= (s64)psched_pkt2t_ns(&p->ppsrate, 1);
  260. }
  261. if ((toks | ptoks | ppstoks) >= 0) {
  262. police->tcfp_t_c = now;
  263. police->tcfp_toks = toks;
  264. police->tcfp_ptoks = ptoks;
  265. police->tcfp_pkttoks = ppstoks;
  266. spin_unlock_bh(&police->tcfp_lock);
  267. ret = p->tcfp_result;
  268. goto inc_drops;
  269. }
  270. spin_unlock_bh(&police->tcfp_lock);
  271. }
  272. inc_overlimits:
  273. qstats_overlimit_inc(this_cpu_ptr(police->common.cpu_qstats));
  274. inc_drops:
  275. if (ret == TC_ACT_SHOT)
  276. qstats_drop_inc(this_cpu_ptr(police->common.cpu_qstats));
  277. end:
  278. return ret;
  279. }
  280. static void tcf_police_cleanup(struct tc_action *a)
  281. {
  282. struct tcf_police *police = to_police(a);
  283. struct tcf_police_params *p;
  284. p = rcu_dereference_protected(police->params, 1);
  285. if (p)
  286. kfree_rcu(p, rcu);
  287. }
  288. static void tcf_police_stats_update(struct tc_action *a,
  289. u64 bytes, u64 packets, u64 drops,
  290. u64 lastuse, bool hw)
  291. {
  292. struct tcf_police *police = to_police(a);
  293. struct tcf_t *tm = &police->tcf_tm;
  294. tcf_action_update_stats(a, bytes, packets, drops, hw);
  295. tm->lastuse = max_t(u64, tm->lastuse, lastuse);
  296. }
  297. static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
  298. int bind, int ref)
  299. {
  300. const struct tcf_police *police = to_police(a);
  301. unsigned char *b = skb_tail_pointer(skb);
  302. const struct tcf_police_params *p;
  303. struct tc_police opt = {
  304. .index = police->tcf_index,
  305. .refcnt = refcount_read(&police->tcf_refcnt) - ref,
  306. .bindcnt = atomic_read(&police->tcf_bindcnt) - bind,
  307. };
  308. struct tcf_t t;
  309. rcu_read_lock();
  310. p = rcu_dereference(police->params);
  311. opt.action = p->action;
  312. opt.mtu = p->tcfp_mtu;
  313. opt.burst = PSCHED_NS2TICKS(p->tcfp_burst);
  314. if (p->rate_present) {
  315. psched_ratecfg_getrate(&opt.rate, &p->rate);
  316. if ((p->rate.rate_bytes_ps >= (1ULL << 32)) &&
  317. nla_put_u64_64bit(skb, TCA_POLICE_RATE64,
  318. p->rate.rate_bytes_ps,
  319. TCA_POLICE_PAD))
  320. goto nla_put_failure;
  321. }
  322. if (p->peak_present) {
  323. psched_ratecfg_getrate(&opt.peakrate, &p->peak);
  324. if ((p->peak.rate_bytes_ps >= (1ULL << 32)) &&
  325. nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64,
  326. p->peak.rate_bytes_ps,
  327. TCA_POLICE_PAD))
  328. goto nla_put_failure;
  329. }
  330. if (p->pps_present) {
  331. if (nla_put_u64_64bit(skb, TCA_POLICE_PKTRATE64,
  332. p->ppsrate.rate_pkts_ps,
  333. TCA_POLICE_PAD))
  334. goto nla_put_failure;
  335. if (nla_put_u64_64bit(skb, TCA_POLICE_PKTBURST64,
  336. PSCHED_NS2TICKS(p->tcfp_pkt_burst),
  337. TCA_POLICE_PAD))
  338. goto nla_put_failure;
  339. }
  340. if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
  341. goto nla_put_failure;
  342. if (p->tcfp_result &&
  343. nla_put_u32(skb, TCA_POLICE_RESULT, p->tcfp_result))
  344. goto nla_put_failure;
  345. if (p->tcfp_ewma_rate &&
  346. nla_put_u32(skb, TCA_POLICE_AVRATE, p->tcfp_ewma_rate))
  347. goto nla_put_failure;
  348. tcf_tm_dump(&t, &police->tcf_tm);
  349. if (nla_put_64bit(skb, TCA_POLICE_TM, sizeof(t), &t, TCA_POLICE_PAD))
  350. goto nla_put_failure;
  351. rcu_read_unlock();
  352. return skb->len;
  353. nla_put_failure:
  354. rcu_read_unlock();
  355. nlmsg_trim(skb, b);
  356. return -1;
  357. }
  358. static int tcf_police_act_to_flow_act(int tc_act, u32 *extval,
  359. struct netlink_ext_ack *extack)
  360. {
  361. int act_id = -EOPNOTSUPP;
  362. if (!TC_ACT_EXT_OPCODE(tc_act)) {
  363. if (tc_act == TC_ACT_OK)
  364. act_id = FLOW_ACTION_ACCEPT;
  365. else if (tc_act == TC_ACT_SHOT)
  366. act_id = FLOW_ACTION_DROP;
  367. else if (tc_act == TC_ACT_PIPE)
  368. act_id = FLOW_ACTION_PIPE;
  369. else if (tc_act == TC_ACT_RECLASSIFY)
  370. NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform/exceed action is \"reclassify\"");
  371. else
  372. NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload");
  373. } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_GOTO_CHAIN)) {
  374. act_id = FLOW_ACTION_GOTO;
  375. *extval = tc_act & TC_ACT_EXT_VAL_MASK;
  376. } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_JUMP)) {
  377. act_id = FLOW_ACTION_JUMP;
  378. *extval = tc_act & TC_ACT_EXT_VAL_MASK;
  379. } else if (tc_act == TC_ACT_UNSPEC) {
  380. act_id = FLOW_ACTION_CONTINUE;
  381. } else {
  382. NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload");
  383. }
  384. return act_id;
  385. }
  386. static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data,
  387. u32 *index_inc, bool bind,
  388. struct netlink_ext_ack *extack)
  389. {
  390. if (bind) {
  391. struct flow_action_entry *entry = entry_data;
  392. struct tcf_police *police = to_police(act);
  393. struct tcf_police_params *p;
  394. int act_id;
  395. p = rcu_dereference_protected(police->params,
  396. lockdep_is_held(&police->tcf_lock));
  397. entry->id = FLOW_ACTION_POLICE;
  398. entry->police.burst = tcf_police_burst(act);
  399. entry->police.rate_bytes_ps =
  400. tcf_police_rate_bytes_ps(act);
  401. entry->police.peakrate_bytes_ps = tcf_police_peakrate_bytes_ps(act);
  402. entry->police.avrate = tcf_police_tcfp_ewma_rate(act);
  403. entry->police.overhead = tcf_police_rate_overhead(act);
  404. entry->police.burst_pkt = tcf_police_burst_pkt(act);
  405. entry->police.rate_pkt_ps =
  406. tcf_police_rate_pkt_ps(act);
  407. entry->police.mtu = tcf_police_tcfp_mtu(act);
  408. act_id = tcf_police_act_to_flow_act(police->tcf_action,
  409. &entry->police.exceed.extval,
  410. extack);
  411. if (act_id < 0)
  412. return act_id;
  413. entry->police.exceed.act_id = act_id;
  414. act_id = tcf_police_act_to_flow_act(p->tcfp_result,
  415. &entry->police.notexceed.extval,
  416. extack);
  417. if (act_id < 0)
  418. return act_id;
  419. entry->police.notexceed.act_id = act_id;
  420. *index_inc = 1;
  421. } else {
  422. struct flow_offload_action *fl_action = entry_data;
  423. fl_action->id = FLOW_ACTION_POLICE;
  424. }
  425. return 0;
  426. }
  427. MODULE_AUTHOR("Alexey Kuznetsov");
  428. MODULE_DESCRIPTION("Policing actions");
  429. MODULE_LICENSE("GPL");
  430. static struct tc_action_ops act_police_ops = {
  431. .kind = "police",
  432. .id = TCA_ID_POLICE,
  433. .owner = THIS_MODULE,
  434. .stats_update = tcf_police_stats_update,
  435. .act = tcf_police_act,
  436. .dump = tcf_police_dump,
  437. .init = tcf_police_init,
  438. .cleanup = tcf_police_cleanup,
  439. .offload_act_setup = tcf_police_offload_act_setup,
  440. .size = sizeof(struct tcf_police),
  441. };
  442. MODULE_ALIAS_NET_ACT("police");
  443. static __net_init int police_init_net(struct net *net)
  444. {
  445. struct tc_action_net *tn = net_generic(net, act_police_ops.net_id);
  446. return tc_action_net_init(net, tn, &act_police_ops);
  447. }
  448. static void __net_exit police_exit_net(struct list_head *net_list)
  449. {
  450. tc_action_net_exit(net_list, act_police_ops.net_id);
  451. }
  452. static struct pernet_operations police_net_ops = {
  453. .init = police_init_net,
  454. .exit_batch = police_exit_net,
  455. .id = &act_police_ops.net_id,
  456. .size = sizeof(struct tc_action_net),
  457. };
  458. static int __init police_init_module(void)
  459. {
  460. return tcf_register_action(&act_police_ops, &police_net_ops);
  461. }
  462. static void __exit police_cleanup_module(void)
  463. {
  464. tcf_unregister_action(&act_police_ops, &police_net_ops);
  465. }
  466. module_init(police_init_module);
  467. module_exit(police_cleanup_module);