inet_ecn.h 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _INET_ECN_H_
  3. #define _INET_ECN_H_
  4. #include <linux/ip.h>
  5. #include <linux/skbuff.h>
  6. #include <linux/if_vlan.h>
  7. #include <net/inet_sock.h>
  8. #include <net/dsfield.h>
  9. #include <net/checksum.h>
  10. enum {
  11. INET_ECN_NOT_ECT = 0,
  12. INET_ECN_ECT_1 = 1,
  13. INET_ECN_ECT_0 = 2,
  14. INET_ECN_CE = 3,
  15. INET_ECN_MASK = 3,
  16. };
  17. extern int sysctl_tunnel_ecn_log;
  18. static inline int INET_ECN_is_ce(__u8 dsfield)
  19. {
  20. return (dsfield & INET_ECN_MASK) == INET_ECN_CE;
  21. }
  22. static inline int INET_ECN_is_not_ect(__u8 dsfield)
  23. {
  24. return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT;
  25. }
  26. static inline int INET_ECN_is_capable(__u8 dsfield)
  27. {
  28. return dsfield & INET_ECN_ECT_0;
  29. }
  30. /*
  31. * RFC 3168 9.1.1
  32. * The full-functionality option for ECN encapsulation is to copy the
  33. * ECN codepoint of the inside header to the outside header on
  34. * encapsulation if the inside header is not-ECT or ECT, and to set the
  35. * ECN codepoint of the outside header to ECT(0) if the ECN codepoint of
  36. * the inside header is CE.
  37. */
  38. static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
  39. {
  40. outer &= ~INET_ECN_MASK;
  41. outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) :
  42. INET_ECN_ECT_0;
  43. return outer;
  44. }
  45. /* Apply either ECT(0) or ECT(1) */
  46. static inline void __INET_ECN_xmit(struct sock *sk, bool use_ect_1)
  47. {
  48. __u8 ect = use_ect_1 ? INET_ECN_ECT_1 : INET_ECN_ECT_0;
  49. /* Mask the complete byte in case the connection alternates between
  50. * ECT(0) and ECT(1).
  51. */
  52. inet_sk(sk)->tos &= ~INET_ECN_MASK;
  53. inet_sk(sk)->tos |= ect;
  54. if (inet6_sk(sk)) {
  55. inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
  56. inet6_sk(sk)->tclass |= ect;
  57. }
  58. }
  59. static inline void INET_ECN_xmit(struct sock *sk)
  60. {
  61. __INET_ECN_xmit(sk, false);
  62. }
  63. static inline void INET_ECN_dontxmit(struct sock *sk)
  64. {
  65. inet_sk(sk)->tos &= ~INET_ECN_MASK;
  66. if (inet6_sk(sk) != NULL)
  67. inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
  68. }
  69. #define IP6_ECN_flow_init(label) do { \
  70. (label) &= ~htonl(INET_ECN_MASK << 20); \
  71. } while (0)
  72. #define IP6_ECN_flow_xmit(sk, label) do { \
  73. if (INET_ECN_is_capable(inet6_sk(sk)->tclass)) \
  74. (label) |= htonl(INET_ECN_ECT_0 << 20); \
  75. } while (0)
  76. static inline int IP_ECN_set_ce(struct iphdr *iph)
  77. {
  78. u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
  79. __be16 check_add;
  80. /*
  81. * After the last operation we have (in binary):
  82. * INET_ECN_NOT_ECT => 01
  83. * INET_ECN_ECT_1 => 10
  84. * INET_ECN_ECT_0 => 11
  85. * INET_ECN_CE => 00
  86. */
  87. if (!(ecn & 2))
  88. return !ecn;
  89. /*
  90. * The following gives us:
  91. * INET_ECN_ECT_1 => check += htons(0xFFFD)
  92. * INET_ECN_ECT_0 => check += htons(0xFFFE)
  93. */
  94. check_add = (__force __be16)((__force u16)htons(0xFFFB) +
  95. (__force u16)htons(ecn));
  96. iph->check = csum16_add(iph->check, check_add);
  97. iph->tos |= INET_ECN_CE;
  98. return 1;
  99. }
  100. static inline int IP_ECN_set_ect1(struct iphdr *iph)
  101. {
  102. if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0)
  103. return 0;
  104. iph->check = csum16_add(iph->check, htons(0x1));
  105. iph->tos ^= INET_ECN_MASK;
  106. return 1;
  107. }
  108. static inline void IP_ECN_clear(struct iphdr *iph)
  109. {
  110. iph->tos &= ~INET_ECN_MASK;
  111. }
  112. static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner)
  113. {
  114. dscp &= ~INET_ECN_MASK;
  115. ipv4_change_dsfield(inner, INET_ECN_MASK, dscp);
  116. }
  117. struct ipv6hdr;
  118. /* Note:
  119. * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE,
  120. * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE
  121. * In IPv6 case, no checksum compensates the change in IPv6 header,
  122. * so we have to update skb->csum.
  123. */
  124. static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
  125. {
  126. __be32 from, to;
  127. if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph)))
  128. return 0;
  129. from = *(__be32 *)iph;
  130. to = from | htonl(INET_ECN_CE << 20);
  131. *(__be32 *)iph = to;
  132. if (skb->ip_summed == CHECKSUM_COMPLETE)
  133. skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
  134. (__force __wsum)to);
  135. return 1;
  136. }
  137. static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph)
  138. {
  139. __be32 from, to;
  140. if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0)
  141. return 0;
  142. from = *(__be32 *)iph;
  143. to = from ^ htonl(INET_ECN_MASK << 20);
  144. *(__be32 *)iph = to;
  145. if (skb->ip_summed == CHECKSUM_COMPLETE)
  146. skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
  147. (__force __wsum)to);
  148. return 1;
  149. }
  150. static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
  151. {
  152. dscp &= ~INET_ECN_MASK;
  153. ipv6_change_dsfield(inner, INET_ECN_MASK, dscp);
  154. }
  155. static inline int INET_ECN_set_ce(struct sk_buff *skb)
  156. {
  157. switch (skb_protocol(skb, true)) {
  158. case cpu_to_be16(ETH_P_IP):
  159. if (skb_network_header(skb) + sizeof(struct iphdr) <=
  160. skb_tail_pointer(skb))
  161. return IP_ECN_set_ce(ip_hdr(skb));
  162. break;
  163. case cpu_to_be16(ETH_P_IPV6):
  164. if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
  165. skb_tail_pointer(skb))
  166. return IP6_ECN_set_ce(skb, ipv6_hdr(skb));
  167. break;
  168. }
  169. return 0;
  170. }
  171. static inline int skb_get_dsfield(struct sk_buff *skb)
  172. {
  173. switch (skb_protocol(skb, true)) {
  174. case cpu_to_be16(ETH_P_IP):
  175. if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
  176. break;
  177. return ipv4_get_dsfield(ip_hdr(skb));
  178. case cpu_to_be16(ETH_P_IPV6):
  179. if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
  180. break;
  181. return ipv6_get_dsfield(ipv6_hdr(skb));
  182. }
  183. return -1;
  184. }
  185. static inline int INET_ECN_set_ect1(struct sk_buff *skb)
  186. {
  187. switch (skb_protocol(skb, true)) {
  188. case cpu_to_be16(ETH_P_IP):
  189. if (skb_network_header(skb) + sizeof(struct iphdr) <=
  190. skb_tail_pointer(skb))
  191. return IP_ECN_set_ect1(ip_hdr(skb));
  192. break;
  193. case cpu_to_be16(ETH_P_IPV6):
  194. if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
  195. skb_tail_pointer(skb))
  196. return IP6_ECN_set_ect1(skb, ipv6_hdr(skb));
  197. break;
  198. }
  199. return 0;
  200. }
  201. /*
  202. * RFC 6040 4.2
  203. * To decapsulate the inner header at the tunnel egress, a compliant
  204. * tunnel egress MUST set the outgoing ECN field to the codepoint at the
  205. * intersection of the appropriate arriving inner header (row) and outer
  206. * header (column) in Figure 4
  207. *
  208. * +---------+------------------------------------------------+
  209. * |Arriving | Arriving Outer Header |
  210. * | Inner +---------+------------+------------+------------+
  211. * | Header | Not-ECT | ECT(0) | ECT(1) | CE |
  212. * +---------+---------+------------+------------+------------+
  213. * | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)|
  214. * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE |
  215. * | ECT(1) | ECT(1) | ECT(1) (!) | ECT(1) | CE |
  216. * | CE | CE | CE | CE(!!!)| CE |
  217. * +---------+---------+------------+------------+------------+
  218. *
  219. * Figure 4: New IP in IP Decapsulation Behaviour
  220. *
  221. * returns 0 on success
  222. * 1 if something is broken and should be logged (!!! above)
  223. * 2 if packet should be dropped
  224. */
  225. static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce)
  226. {
  227. if (INET_ECN_is_not_ect(inner)) {
  228. switch (outer & INET_ECN_MASK) {
  229. case INET_ECN_NOT_ECT:
  230. return 0;
  231. case INET_ECN_ECT_0:
  232. case INET_ECN_ECT_1:
  233. return 1;
  234. case INET_ECN_CE:
  235. return 2;
  236. }
  237. }
  238. *set_ce = INET_ECN_is_ce(outer);
  239. return 0;
  240. }
  241. static inline int INET_ECN_decapsulate(struct sk_buff *skb,
  242. __u8 outer, __u8 inner)
  243. {
  244. bool set_ce = false;
  245. int rc;
  246. rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
  247. if (!rc) {
  248. if (set_ce)
  249. INET_ECN_set_ce(skb);
  250. else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1)
  251. INET_ECN_set_ect1(skb);
  252. }
  253. return rc;
  254. }
  255. static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
  256. struct sk_buff *skb)
  257. {
  258. __u8 inner;
  259. switch (skb_protocol(skb, true)) {
  260. case htons(ETH_P_IP):
  261. inner = ip_hdr(skb)->tos;
  262. break;
  263. case htons(ETH_P_IPV6):
  264. inner = ipv6_get_dsfield(ipv6_hdr(skb));
  265. break;
  266. default:
  267. return 0;
  268. }
  269. return INET_ECN_decapsulate(skb, oiph->tos, inner);
  270. }
  271. static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h,
  272. struct sk_buff *skb)
  273. {
  274. __u8 inner;
  275. switch (skb_protocol(skb, true)) {
  276. case htons(ETH_P_IP):
  277. inner = ip_hdr(skb)->tos;
  278. break;
  279. case htons(ETH_P_IPV6):
  280. inner = ipv6_get_dsfield(ipv6_hdr(skb));
  281. break;
  282. default:
  283. return 0;
  284. }
  285. return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner);
  286. }
  287. #endif