ip6_flowlabel.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * ip6_flowlabel.c IPv6 flowlabel manager.
  4. *
  5. * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  6. */
  7. #include <linux/capability.h>
  8. #include <linux/errno.h>
  9. #include <linux/types.h>
  10. #include <linux/socket.h>
  11. #include <linux/net.h>
  12. #include <linux/netdevice.h>
  13. #include <linux/in6.h>
  14. #include <linux/proc_fs.h>
  15. #include <linux/seq_file.h>
  16. #include <linux/slab.h>
  17. #include <linux/export.h>
  18. #include <linux/pid_namespace.h>
  19. #include <linux/jump_label_ratelimit.h>
  20. #include <net/net_namespace.h>
  21. #include <net/sock.h>
  22. #include <net/ipv6.h>
  23. #include <net/rawv6.h>
  24. #include <net/transp_v6.h>
  25. #include <linux/uaccess.h>
  26. #define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified
  27. in old IPv6 RFC. Well, it was reasonable value.
  28. */
  29. #define FL_MAX_LINGER 150 /* Maximal linger timeout */
  30. /* FL hash table */
  31. #define FL_MAX_PER_SOCK 32
  32. #define FL_MAX_SIZE 4096
  33. #define FL_HASH_MASK 255
  34. #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK)
  35. static atomic_t fl_size = ATOMIC_INIT(0);
  36. static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
  37. static void ip6_fl_gc(struct timer_list *unused);
  38. static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc);
  39. /* FL hash table lock: it protects only of GC */
  40. static DEFINE_SPINLOCK(ip6_fl_lock);
  41. /* Big socket sock */
  42. static DEFINE_SPINLOCK(ip6_sk_fl_lock);
  43. DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ);
  44. EXPORT_SYMBOL(ipv6_flowlabel_exclusive);
  45. #define for_each_fl_rcu(hash, fl) \
  46. for (fl = rcu_dereference(fl_ht[(hash)]); \
  47. fl != NULL; \
  48. fl = rcu_dereference(fl->next))
  49. #define for_each_fl_continue_rcu(fl) \
  50. for (fl = rcu_dereference(fl->next); \
  51. fl != NULL; \
  52. fl = rcu_dereference(fl->next))
  53. #define for_each_sk_fl_rcu(sk, sfl) \
  54. for (sfl = rcu_dereference(inet_sk(sk)->ipv6_fl_list); \
  55. sfl != NULL; \
  56. sfl = rcu_dereference(sfl->next))
  57. static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
  58. {
  59. struct ip6_flowlabel *fl;
  60. for_each_fl_rcu(FL_HASH(label), fl) {
  61. if (fl->label == label && net_eq(fl->fl_net, net))
  62. return fl;
  63. }
  64. return NULL;
  65. }
  66. static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
  67. {
  68. struct ip6_flowlabel *fl;
  69. rcu_read_lock();
  70. fl = __fl_lookup(net, label);
  71. if (fl && !atomic_inc_not_zero(&fl->users))
  72. fl = NULL;
  73. rcu_read_unlock();
  74. return fl;
  75. }
  76. static bool fl_shared_exclusive(struct ip6_flowlabel *fl)
  77. {
  78. return fl->share == IPV6_FL_S_EXCL ||
  79. fl->share == IPV6_FL_S_PROCESS ||
  80. fl->share == IPV6_FL_S_USER;
  81. }
  82. static void fl_free_rcu(struct rcu_head *head)
  83. {
  84. struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
  85. if (fl->share == IPV6_FL_S_PROCESS)
  86. put_pid(fl->owner.pid);
  87. kfree(fl->opt);
  88. kfree(fl);
  89. }
  90. static void fl_free(struct ip6_flowlabel *fl)
  91. {
  92. if (!fl)
  93. return;
  94. if (fl_shared_exclusive(fl) || fl->opt)
  95. static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive);
  96. call_rcu(&fl->rcu, fl_free_rcu);
  97. }
  98. static void fl_release(struct ip6_flowlabel *fl)
  99. {
  100. spin_lock_bh(&ip6_fl_lock);
  101. fl->lastuse = jiffies;
  102. if (atomic_dec_and_test(&fl->users)) {
  103. unsigned long ttd = fl->lastuse + fl->linger;
  104. if (time_after(ttd, fl->expires))
  105. fl->expires = ttd;
  106. ttd = fl->expires;
  107. if (!timer_pending(&ip6_fl_gc_timer) ||
  108. time_after(ip6_fl_gc_timer.expires, ttd))
  109. mod_timer(&ip6_fl_gc_timer, ttd);
  110. }
  111. spin_unlock_bh(&ip6_fl_lock);
  112. }
  113. static void ip6_fl_gc(struct timer_list *unused)
  114. {
  115. int i;
  116. unsigned long now = jiffies;
  117. unsigned long sched = 0;
  118. spin_lock(&ip6_fl_lock);
  119. for (i = 0; i <= FL_HASH_MASK; i++) {
  120. struct ip6_flowlabel *fl;
  121. struct ip6_flowlabel __rcu **flp;
  122. flp = &fl_ht[i];
  123. while ((fl = rcu_dereference_protected(*flp,
  124. lockdep_is_held(&ip6_fl_lock))) != NULL) {
  125. if (atomic_read(&fl->users) == 0) {
  126. unsigned long ttd = fl->lastuse + fl->linger;
  127. if (time_after(ttd, fl->expires))
  128. fl->expires = ttd;
  129. ttd = fl->expires;
  130. if (time_after_eq(now, ttd)) {
  131. *flp = fl->next;
  132. fl_free(fl);
  133. atomic_dec(&fl_size);
  134. continue;
  135. }
  136. if (!sched || time_before(ttd, sched))
  137. sched = ttd;
  138. }
  139. flp = &fl->next;
  140. }
  141. }
  142. if (!sched && atomic_read(&fl_size))
  143. sched = now + FL_MAX_LINGER;
  144. if (sched) {
  145. mod_timer(&ip6_fl_gc_timer, sched);
  146. }
  147. spin_unlock(&ip6_fl_lock);
  148. }
  149. static void __net_exit ip6_fl_purge(struct net *net)
  150. {
  151. int i;
  152. spin_lock_bh(&ip6_fl_lock);
  153. for (i = 0; i <= FL_HASH_MASK; i++) {
  154. struct ip6_flowlabel *fl;
  155. struct ip6_flowlabel __rcu **flp;
  156. flp = &fl_ht[i];
  157. while ((fl = rcu_dereference_protected(*flp,
  158. lockdep_is_held(&ip6_fl_lock))) != NULL) {
  159. if (net_eq(fl->fl_net, net) &&
  160. atomic_read(&fl->users) == 0) {
  161. *flp = fl->next;
  162. fl_free(fl);
  163. atomic_dec(&fl_size);
  164. continue;
  165. }
  166. flp = &fl->next;
  167. }
  168. }
  169. spin_unlock_bh(&ip6_fl_lock);
  170. }
  171. static struct ip6_flowlabel *fl_intern(struct net *net,
  172. struct ip6_flowlabel *fl, __be32 label)
  173. {
  174. struct ip6_flowlabel *lfl;
  175. fl->label = label & IPV6_FLOWLABEL_MASK;
  176. rcu_read_lock();
  177. spin_lock_bh(&ip6_fl_lock);
  178. if (label == 0) {
  179. for (;;) {
  180. fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK;
  181. if (fl->label) {
  182. lfl = __fl_lookup(net, fl->label);
  183. if (!lfl)
  184. break;
  185. }
  186. }
  187. } else {
  188. /*
  189. * we dropper the ip6_fl_lock, so this entry could reappear
  190. * and we need to recheck with it.
  191. *
  192. * OTOH no need to search the active socket first, like it is
  193. * done in ipv6_flowlabel_opt - sock is locked, so new entry
  194. * with the same label can only appear on another sock
  195. */
  196. lfl = __fl_lookup(net, fl->label);
  197. if (lfl) {
  198. atomic_inc(&lfl->users);
  199. spin_unlock_bh(&ip6_fl_lock);
  200. rcu_read_unlock();
  201. return lfl;
  202. }
  203. }
  204. fl->lastuse = jiffies;
  205. fl->next = fl_ht[FL_HASH(fl->label)];
  206. rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
  207. atomic_inc(&fl_size);
  208. spin_unlock_bh(&ip6_fl_lock);
  209. rcu_read_unlock();
  210. return NULL;
  211. }
  212. /* Socket flowlabel lists */
  213. struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label)
  214. {
  215. struct ipv6_fl_socklist *sfl;
  216. label &= IPV6_FLOWLABEL_MASK;
  217. rcu_read_lock();
  218. for_each_sk_fl_rcu(sk, sfl) {
  219. struct ip6_flowlabel *fl = sfl->fl;
  220. if (fl->label == label && atomic_inc_not_zero(&fl->users)) {
  221. fl->lastuse = jiffies;
  222. rcu_read_unlock();
  223. return fl;
  224. }
  225. }
  226. rcu_read_unlock();
  227. return NULL;
  228. }
  229. EXPORT_SYMBOL_GPL(__fl6_sock_lookup);
  230. void fl6_free_socklist(struct sock *sk)
  231. {
  232. struct inet_sock *inet = inet_sk(sk);
  233. struct ipv6_fl_socklist *sfl;
  234. if (!rcu_access_pointer(inet->ipv6_fl_list))
  235. return;
  236. spin_lock_bh(&ip6_sk_fl_lock);
  237. while ((sfl = rcu_dereference_protected(inet->ipv6_fl_list,
  238. lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
  239. inet->ipv6_fl_list = sfl->next;
  240. spin_unlock_bh(&ip6_sk_fl_lock);
  241. fl_release(sfl->fl);
  242. kfree_rcu(sfl, rcu);
  243. spin_lock_bh(&ip6_sk_fl_lock);
  244. }
  245. spin_unlock_bh(&ip6_sk_fl_lock);
  246. }
  247. /* Service routines */
  248. /*
  249. It is the only difficult place. flowlabel enforces equal headers
  250. before and including routing header, however user may supply options
  251. following rthdr.
  252. */
  253. struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
  254. struct ip6_flowlabel *fl,
  255. struct ipv6_txoptions *fopt)
  256. {
  257. struct ipv6_txoptions *fl_opt = fl->opt;
  258. if (!fopt || fopt->opt_flen == 0)
  259. return fl_opt;
  260. if (fl_opt) {
  261. opt_space->hopopt = fl_opt->hopopt;
  262. opt_space->dst0opt = fl_opt->dst0opt;
  263. opt_space->srcrt = fl_opt->srcrt;
  264. opt_space->opt_nflen = fl_opt->opt_nflen;
  265. } else {
  266. if (fopt->opt_nflen == 0)
  267. return fopt;
  268. opt_space->hopopt = NULL;
  269. opt_space->dst0opt = NULL;
  270. opt_space->srcrt = NULL;
  271. opt_space->opt_nflen = 0;
  272. }
  273. opt_space->dst1opt = fopt->dst1opt;
  274. opt_space->opt_flen = fopt->opt_flen;
  275. opt_space->tot_len = fopt->tot_len;
  276. return opt_space;
  277. }
  278. EXPORT_SYMBOL_GPL(fl6_merge_options);
  279. static unsigned long check_linger(unsigned long ttl)
  280. {
  281. if (ttl < FL_MIN_LINGER)
  282. return FL_MIN_LINGER*HZ;
  283. if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
  284. return 0;
  285. return ttl*HZ;
  286. }
  287. static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
  288. {
  289. linger = check_linger(linger);
  290. if (!linger)
  291. return -EPERM;
  292. expires = check_linger(expires);
  293. if (!expires)
  294. return -EPERM;
  295. spin_lock_bh(&ip6_fl_lock);
  296. fl->lastuse = jiffies;
  297. if (time_before(fl->linger, linger))
  298. fl->linger = linger;
  299. if (time_before(expires, fl->linger))
  300. expires = fl->linger;
  301. if (time_before(fl->expires, fl->lastuse + expires))
  302. fl->expires = fl->lastuse + expires;
  303. spin_unlock_bh(&ip6_fl_lock);
  304. return 0;
  305. }
  306. static struct ip6_flowlabel *
  307. fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
  308. sockptr_t optval, int optlen, int *err_p)
  309. {
  310. struct ip6_flowlabel *fl = NULL;
  311. int olen;
  312. int addr_type;
  313. int err;
  314. olen = optlen - CMSG_ALIGN(sizeof(*freq));
  315. err = -EINVAL;
  316. if (olen > 64 * 1024)
  317. goto done;
  318. err = -ENOMEM;
  319. fl = kzalloc_obj(*fl);
  320. if (!fl)
  321. goto done;
  322. if (olen > 0) {
  323. struct msghdr msg;
  324. struct flowi6 flowi6;
  325. struct ipcm6_cookie ipc6;
  326. err = -ENOMEM;
  327. fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
  328. if (!fl->opt)
  329. goto done;
  330. memset(fl->opt, 0, sizeof(*fl->opt));
  331. fl->opt->tot_len = sizeof(*fl->opt) + olen;
  332. err = -EFAULT;
  333. if (copy_from_sockptr_offset(fl->opt + 1, optval,
  334. CMSG_ALIGN(sizeof(*freq)), olen))
  335. goto done;
  336. msg.msg_controllen = olen;
  337. msg.msg_control = (void *)(fl->opt+1);
  338. memset(&flowi6, 0, sizeof(flowi6));
  339. ipc6.opt = fl->opt;
  340. err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6);
  341. if (err)
  342. goto done;
  343. err = -EINVAL;
  344. if (fl->opt->opt_flen)
  345. goto done;
  346. if (fl->opt->opt_nflen == 0) {
  347. kfree(fl->opt);
  348. fl->opt = NULL;
  349. }
  350. }
  351. fl->fl_net = net;
  352. fl->expires = jiffies;
  353. err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
  354. if (err)
  355. goto done;
  356. fl->share = freq->flr_share;
  357. addr_type = ipv6_addr_type(&freq->flr_dst);
  358. if ((addr_type & IPV6_ADDR_MAPPED) ||
  359. addr_type == IPV6_ADDR_ANY) {
  360. err = -EINVAL;
  361. goto done;
  362. }
  363. fl->dst = freq->flr_dst;
  364. atomic_set(&fl->users, 1);
  365. switch (fl->share) {
  366. case IPV6_FL_S_EXCL:
  367. case IPV6_FL_S_ANY:
  368. break;
  369. case IPV6_FL_S_PROCESS:
  370. fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
  371. break;
  372. case IPV6_FL_S_USER:
  373. fl->owner.uid = current_euid();
  374. break;
  375. default:
  376. err = -EINVAL;
  377. goto done;
  378. }
  379. if (fl_shared_exclusive(fl) || fl->opt) {
  380. WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1);
  381. static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
  382. }
  383. return fl;
  384. done:
  385. if (fl) {
  386. kfree(fl->opt);
  387. kfree(fl);
  388. }
  389. *err_p = err;
  390. return NULL;
  391. }
  392. static int mem_check(struct sock *sk)
  393. {
  394. int room = FL_MAX_SIZE - atomic_read(&fl_size);
  395. struct ipv6_fl_socklist *sfl;
  396. int count = 0;
  397. if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
  398. return 0;
  399. rcu_read_lock();
  400. for_each_sk_fl_rcu(sk, sfl)
  401. count++;
  402. rcu_read_unlock();
  403. if (room <= 0 ||
  404. ((count >= FL_MAX_PER_SOCK ||
  405. (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
  406. !capable(CAP_NET_ADMIN)))
  407. return -ENOBUFS;
  408. return 0;
  409. }
  410. static inline void fl_link(struct sock *sk, struct ipv6_fl_socklist *sfl,
  411. struct ip6_flowlabel *fl)
  412. {
  413. struct inet_sock *inet = inet_sk(sk);
  414. spin_lock_bh(&ip6_sk_fl_lock);
  415. sfl->fl = fl;
  416. sfl->next = inet->ipv6_fl_list;
  417. rcu_assign_pointer(inet->ipv6_fl_list, sfl);
  418. spin_unlock_bh(&ip6_sk_fl_lock);
  419. }
  420. int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
  421. int flags)
  422. {
  423. struct ipv6_pinfo *np = inet6_sk(sk);
  424. struct ipv6_fl_socklist *sfl;
  425. if (flags & IPV6_FL_F_REMOTE) {
  426. freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK;
  427. return 0;
  428. }
  429. if (inet6_test_bit(REPFLOW, sk)) {
  430. freq->flr_label = np->flow_label;
  431. return 0;
  432. }
  433. rcu_read_lock();
  434. for_each_sk_fl_rcu(sk, sfl) {
  435. if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) {
  436. spin_lock_bh(&ip6_fl_lock);
  437. freq->flr_label = sfl->fl->label;
  438. freq->flr_dst = sfl->fl->dst;
  439. freq->flr_share = sfl->fl->share;
  440. freq->flr_expires = (sfl->fl->expires - jiffies) / HZ;
  441. freq->flr_linger = sfl->fl->linger / HZ;
  442. spin_unlock_bh(&ip6_fl_lock);
  443. rcu_read_unlock();
  444. return 0;
  445. }
  446. }
  447. rcu_read_unlock();
  448. return -ENOENT;
  449. }
  450. #define socklist_dereference(__sflp) \
  451. rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock))
  452. static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
  453. {
  454. struct ipv6_pinfo *np = inet6_sk(sk);
  455. struct ipv6_fl_socklist __rcu **sflp;
  456. struct ipv6_fl_socklist *sfl;
  457. if (freq->flr_flags & IPV6_FL_F_REFLECT) {
  458. if (sk->sk_protocol != IPPROTO_TCP)
  459. return -ENOPROTOOPT;
  460. if (!inet6_test_bit(REPFLOW, sk))
  461. return -ESRCH;
  462. np->flow_label = 0;
  463. inet6_clear_bit(REPFLOW, sk);
  464. return 0;
  465. }
  466. spin_lock_bh(&ip6_sk_fl_lock);
  467. for (sflp = &inet_sk(sk)->ipv6_fl_list;
  468. (sfl = socklist_dereference(*sflp)) != NULL;
  469. sflp = &sfl->next) {
  470. if (sfl->fl->label == freq->flr_label)
  471. goto found;
  472. }
  473. spin_unlock_bh(&ip6_sk_fl_lock);
  474. return -ESRCH;
  475. found:
  476. if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK))
  477. np->flow_label &= ~IPV6_FLOWLABEL_MASK;
  478. *sflp = sfl->next;
  479. spin_unlock_bh(&ip6_sk_fl_lock);
  480. fl_release(sfl->fl);
  481. kfree_rcu(sfl, rcu);
  482. return 0;
  483. }
  484. static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq)
  485. {
  486. struct net *net = sock_net(sk);
  487. struct ipv6_fl_socklist *sfl;
  488. int err;
  489. rcu_read_lock();
  490. for_each_sk_fl_rcu(sk, sfl) {
  491. if (sfl->fl->label == freq->flr_label) {
  492. err = fl6_renew(sfl->fl, freq->flr_linger,
  493. freq->flr_expires);
  494. rcu_read_unlock();
  495. return err;
  496. }
  497. }
  498. rcu_read_unlock();
  499. if (freq->flr_share == IPV6_FL_S_NONE &&
  500. ns_capable(net->user_ns, CAP_NET_ADMIN)) {
  501. struct ip6_flowlabel *fl = fl_lookup(net, freq->flr_label);
  502. if (fl) {
  503. err = fl6_renew(fl, freq->flr_linger,
  504. freq->flr_expires);
  505. fl_release(fl);
  506. return err;
  507. }
  508. }
  509. return -ESRCH;
  510. }
  511. static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
  512. sockptr_t optval, int optlen)
  513. {
  514. struct ipv6_fl_socklist *sfl, *sfl1 = NULL;
  515. struct ip6_flowlabel *fl, *fl1 = NULL;
  516. struct net *net = sock_net(sk);
  517. int err;
  518. if (freq->flr_flags & IPV6_FL_F_REFLECT) {
  519. if (net->ipv6.sysctl.flowlabel_consistency) {
  520. net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
  521. return -EPERM;
  522. }
  523. if (sk->sk_protocol != IPPROTO_TCP)
  524. return -ENOPROTOOPT;
  525. inet6_set_bit(REPFLOW, sk);
  526. return 0;
  527. }
  528. if (freq->flr_label & ~IPV6_FLOWLABEL_MASK)
  529. return -EINVAL;
  530. if (net->ipv6.sysctl.flowlabel_state_ranges &&
  531. (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
  532. return -ERANGE;
  533. fl = fl_create(net, sk, freq, optval, optlen, &err);
  534. if (!fl)
  535. return err;
  536. sfl1 = kmalloc_obj(*sfl1);
  537. if (freq->flr_label) {
  538. err = -EEXIST;
  539. rcu_read_lock();
  540. for_each_sk_fl_rcu(sk, sfl) {
  541. if (sfl->fl->label == freq->flr_label) {
  542. if (freq->flr_flags & IPV6_FL_F_EXCL) {
  543. rcu_read_unlock();
  544. goto done;
  545. }
  546. fl1 = sfl->fl;
  547. if (!atomic_inc_not_zero(&fl1->users))
  548. fl1 = NULL;
  549. break;
  550. }
  551. }
  552. rcu_read_unlock();
  553. if (!fl1)
  554. fl1 = fl_lookup(net, freq->flr_label);
  555. if (fl1) {
  556. recheck:
  557. err = -EEXIST;
  558. if (freq->flr_flags&IPV6_FL_F_EXCL)
  559. goto release;
  560. err = -EPERM;
  561. if (fl1->share == IPV6_FL_S_EXCL ||
  562. fl1->share != fl->share ||
  563. ((fl1->share == IPV6_FL_S_PROCESS) &&
  564. (fl1->owner.pid != fl->owner.pid)) ||
  565. ((fl1->share == IPV6_FL_S_USER) &&
  566. !uid_eq(fl1->owner.uid, fl->owner.uid)))
  567. goto release;
  568. err = -ENOMEM;
  569. if (!sfl1)
  570. goto release;
  571. if (fl->linger > fl1->linger)
  572. fl1->linger = fl->linger;
  573. if ((long)(fl->expires - fl1->expires) > 0)
  574. fl1->expires = fl->expires;
  575. fl_link(sk, sfl1, fl1);
  576. fl_free(fl);
  577. return 0;
  578. release:
  579. fl_release(fl1);
  580. goto done;
  581. }
  582. }
  583. err = -ENOENT;
  584. if (!(freq->flr_flags & IPV6_FL_F_CREATE))
  585. goto done;
  586. err = -ENOMEM;
  587. if (!sfl1)
  588. goto done;
  589. err = mem_check(sk);
  590. if (err != 0)
  591. goto done;
  592. fl1 = fl_intern(net, fl, freq->flr_label);
  593. if (fl1)
  594. goto recheck;
  595. if (!freq->flr_label) {
  596. size_t offset = offsetof(struct in6_flowlabel_req, flr_label);
  597. if (copy_to_sockptr_offset(optval, offset, &fl->label,
  598. sizeof(fl->label))) {
  599. /* Intentionally ignore fault. */
  600. }
  601. }
  602. fl_link(sk, sfl1, fl);
  603. return 0;
  604. done:
  605. fl_free(fl);
  606. kfree(sfl1);
  607. return err;
  608. }
  609. int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen)
  610. {
  611. struct in6_flowlabel_req freq;
  612. if (optlen < sizeof(freq))
  613. return -EINVAL;
  614. if (copy_from_sockptr(&freq, optval, sizeof(freq)))
  615. return -EFAULT;
  616. switch (freq.flr_action) {
  617. case IPV6_FL_A_PUT:
  618. return ipv6_flowlabel_put(sk, &freq);
  619. case IPV6_FL_A_RENEW:
  620. return ipv6_flowlabel_renew(sk, &freq);
  621. case IPV6_FL_A_GET:
  622. return ipv6_flowlabel_get(sk, &freq, optval, optlen);
  623. default:
  624. return -EINVAL;
  625. }
  626. }
  627. #ifdef CONFIG_PROC_FS
  628. struct ip6fl_iter_state {
  629. struct seq_net_private p;
  630. struct pid_namespace *pid_ns;
  631. int bucket;
  632. };
  633. #define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private)
  634. static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
  635. {
  636. struct ip6_flowlabel *fl = NULL;
  637. struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
  638. struct net *net = seq_file_net(seq);
  639. for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
  640. for_each_fl_rcu(state->bucket, fl) {
  641. if (net_eq(fl->fl_net, net))
  642. goto out;
  643. }
  644. }
  645. fl = NULL;
  646. out:
  647. return fl;
  648. }
  649. static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
  650. {
  651. struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
  652. struct net *net = seq_file_net(seq);
  653. for_each_fl_continue_rcu(fl) {
  654. if (net_eq(fl->fl_net, net))
  655. goto out;
  656. }
  657. try_again:
  658. if (++state->bucket <= FL_HASH_MASK) {
  659. for_each_fl_rcu(state->bucket, fl) {
  660. if (net_eq(fl->fl_net, net))
  661. goto out;
  662. }
  663. goto try_again;
  664. }
  665. fl = NULL;
  666. out:
  667. return fl;
  668. }
  669. static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
  670. {
  671. struct ip6_flowlabel *fl = ip6fl_get_first(seq);
  672. if (fl)
  673. while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
  674. --pos;
  675. return pos ? NULL : fl;
  676. }
  677. static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
  678. __acquires(RCU)
  679. {
  680. struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
  681. state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb);
  682. rcu_read_lock();
  683. return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
  684. }
  685. static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  686. {
  687. struct ip6_flowlabel *fl;
  688. if (v == SEQ_START_TOKEN)
  689. fl = ip6fl_get_first(seq);
  690. else
  691. fl = ip6fl_get_next(seq, v);
  692. ++*pos;
  693. return fl;
  694. }
  695. static void ip6fl_seq_stop(struct seq_file *seq, void *v)
  696. __releases(RCU)
  697. {
  698. rcu_read_unlock();
  699. }
  700. static int ip6fl_seq_show(struct seq_file *seq, void *v)
  701. {
  702. struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
  703. if (v == SEQ_START_TOKEN) {
  704. seq_puts(seq, "Label S Owner Users Linger Expires Dst Opt\n");
  705. } else {
  706. struct ip6_flowlabel *fl = v;
  707. seq_printf(seq,
  708. "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
  709. (unsigned int)ntohl(fl->label),
  710. fl->share,
  711. ((fl->share == IPV6_FL_S_PROCESS) ?
  712. pid_nr_ns(fl->owner.pid, state->pid_ns) :
  713. ((fl->share == IPV6_FL_S_USER) ?
  714. from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
  715. 0)),
  716. atomic_read(&fl->users),
  717. fl->linger/HZ,
  718. (long)(fl->expires - jiffies)/HZ,
  719. &fl->dst,
  720. fl->opt ? fl->opt->opt_nflen : 0);
  721. }
  722. return 0;
  723. }
  724. static const struct seq_operations ip6fl_seq_ops = {
  725. .start = ip6fl_seq_start,
  726. .next = ip6fl_seq_next,
  727. .stop = ip6fl_seq_stop,
  728. .show = ip6fl_seq_show,
  729. };
  730. static int __net_init ip6_flowlabel_proc_init(struct net *net)
  731. {
  732. if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net,
  733. &ip6fl_seq_ops, sizeof(struct ip6fl_iter_state)))
  734. return -ENOMEM;
  735. return 0;
  736. }
  737. static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
  738. {
  739. remove_proc_entry("ip6_flowlabel", net->proc_net);
  740. }
  741. #else
  742. static inline int ip6_flowlabel_proc_init(struct net *net)
  743. {
  744. return 0;
  745. }
  746. static inline void ip6_flowlabel_proc_fini(struct net *net)
  747. {
  748. }
  749. #endif
  750. static void __net_exit ip6_flowlabel_net_exit(struct net *net)
  751. {
  752. ip6_fl_purge(net);
  753. ip6_flowlabel_proc_fini(net);
  754. }
  755. static struct pernet_operations ip6_flowlabel_net_ops = {
  756. .init = ip6_flowlabel_proc_init,
  757. .exit = ip6_flowlabel_net_exit,
  758. };
  759. int ip6_flowlabel_init(void)
  760. {
  761. return register_pernet_subsys(&ip6_flowlabel_net_ops);
  762. }
  763. void ip6_flowlabel_cleanup(void)
  764. {
  765. static_key_deferred_flush(&ipv6_flowlabel_exclusive);
  766. timer_delete(&ip6_fl_gc_timer);
  767. unregister_pernet_subsys(&ip6_flowlabel_net_ops);
  768. }