sockex3_kern.c 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
  2. *
  3. * This program is free software; you can redistribute it and/or
  4. * modify it under the terms of version 2 of the GNU General Public
  5. * License as published by the Free Software Foundation.
  6. */
  7. #include <uapi/linux/bpf.h>
  8. #include <uapi/linux/in.h>
  9. #include <uapi/linux/if.h>
  10. #include <uapi/linux/if_ether.h>
  11. #include <uapi/linux/ip.h>
  12. #include <uapi/linux/ipv6.h>
  13. #include <uapi/linux/if_tunnel.h>
  14. #include <uapi/linux/mpls.h>
  15. #include <bpf/bpf_helpers.h>
  16. #include "bpf_legacy.h"
  17. #define IP_MF 0x2000
  18. #define IP_OFFSET 0x1FFF
  19. #define PARSE_VLAN 1
  20. #define PARSE_MPLS 2
  21. #define PARSE_IP 3
  22. #define PARSE_IPV6 4
  23. struct vlan_hdr {
  24. __be16 h_vlan_TCI;
  25. __be16 h_vlan_encapsulated_proto;
  26. };
  27. struct flow_key_record {
  28. __be32 src;
  29. __be32 dst;
  30. union {
  31. __be32 ports;
  32. __be16 port16[2];
  33. };
  34. __u32 ip_proto;
  35. };
  36. static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto);
  37. static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
  38. {
  39. return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
  40. & (IP_MF | IP_OFFSET);
  41. }
  42. static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
  43. {
  44. __u64 w0 = load_word(ctx, off);
  45. __u64 w1 = load_word(ctx, off + 4);
  46. __u64 w2 = load_word(ctx, off + 8);
  47. __u64 w3 = load_word(ctx, off + 12);
  48. return (__u32)(w0 ^ w1 ^ w2 ^ w3);
  49. }
  50. struct globals {
  51. struct flow_key_record flow;
  52. };
  53. struct {
  54. __uint(type, BPF_MAP_TYPE_ARRAY);
  55. __type(key, __u32);
  56. __type(value, struct globals);
  57. __uint(max_entries, 32);
  58. } percpu_map SEC(".maps");
  59. /* user poor man's per_cpu until native support is ready */
  60. static struct globals *this_cpu_globals(void)
  61. {
  62. u32 key = bpf_get_smp_processor_id();
  63. return bpf_map_lookup_elem(&percpu_map, &key);
  64. }
  65. /* some simple stats for user space consumption */
  66. struct pair {
  67. __u64 packets;
  68. __u64 bytes;
  69. };
  70. struct {
  71. __uint(type, BPF_MAP_TYPE_HASH);
  72. __type(key, struct flow_key_record);
  73. __type(value, struct pair);
  74. __uint(max_entries, 1024);
  75. } hash_map SEC(".maps");
  76. static void update_stats(struct __sk_buff *skb, struct globals *g)
  77. {
  78. struct flow_key_record key = g->flow;
  79. struct pair *value;
  80. value = bpf_map_lookup_elem(&hash_map, &key);
  81. if (value) {
  82. __sync_fetch_and_add(&value->packets, 1);
  83. __sync_fetch_and_add(&value->bytes, skb->len);
  84. } else {
  85. struct pair val = {1, skb->len};
  86. bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
  87. }
  88. }
  89. static __always_inline void parse_ip_proto(struct __sk_buff *skb,
  90. struct globals *g, __u32 ip_proto)
  91. {
  92. __u32 nhoff = skb->cb[0];
  93. int poff;
  94. switch (ip_proto) {
  95. case IPPROTO_GRE: {
  96. struct gre_hdr {
  97. __be16 flags;
  98. __be16 proto;
  99. };
  100. __u32 gre_flags = load_half(skb,
  101. nhoff + offsetof(struct gre_hdr, flags));
  102. __u32 gre_proto = load_half(skb,
  103. nhoff + offsetof(struct gre_hdr, proto));
  104. if (gre_flags & (GRE_VERSION|GRE_ROUTING))
  105. break;
  106. nhoff += 4;
  107. if (gre_flags & GRE_CSUM)
  108. nhoff += 4;
  109. if (gre_flags & GRE_KEY)
  110. nhoff += 4;
  111. if (gre_flags & GRE_SEQ)
  112. nhoff += 4;
  113. skb->cb[0] = nhoff;
  114. parse_eth_proto(skb, gre_proto);
  115. break;
  116. }
  117. case IPPROTO_IPIP:
  118. parse_eth_proto(skb, ETH_P_IP);
  119. break;
  120. case IPPROTO_IPV6:
  121. parse_eth_proto(skb, ETH_P_IPV6);
  122. break;
  123. case IPPROTO_TCP:
  124. case IPPROTO_UDP:
  125. g->flow.ports = load_word(skb, nhoff);
  126. case IPPROTO_ICMP:
  127. g->flow.ip_proto = ip_proto;
  128. update_stats(skb, g);
  129. break;
  130. default:
  131. break;
  132. }
  133. }
  134. SEC("socket")
  135. int bpf_func_ip(struct __sk_buff *skb)
  136. {
  137. struct globals *g = this_cpu_globals();
  138. __u32 nhoff, verlen, ip_proto;
  139. if (!g)
  140. return 0;
  141. nhoff = skb->cb[0];
  142. if (unlikely(ip_is_fragment(skb, nhoff)))
  143. return 0;
  144. ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
  145. if (ip_proto != IPPROTO_GRE) {
  146. g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
  147. g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
  148. }
  149. verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
  150. nhoff += (verlen & 0xF) << 2;
  151. skb->cb[0] = nhoff;
  152. parse_ip_proto(skb, g, ip_proto);
  153. return 0;
  154. }
  155. SEC("socket")
  156. int bpf_func_ipv6(struct __sk_buff *skb)
  157. {
  158. struct globals *g = this_cpu_globals();
  159. __u32 nhoff, ip_proto;
  160. if (!g)
  161. return 0;
  162. nhoff = skb->cb[0];
  163. ip_proto = load_byte(skb,
  164. nhoff + offsetof(struct ipv6hdr, nexthdr));
  165. g->flow.src = ipv6_addr_hash(skb,
  166. nhoff + offsetof(struct ipv6hdr, saddr));
  167. g->flow.dst = ipv6_addr_hash(skb,
  168. nhoff + offsetof(struct ipv6hdr, daddr));
  169. nhoff += sizeof(struct ipv6hdr);
  170. skb->cb[0] = nhoff;
  171. parse_ip_proto(skb, g, ip_proto);
  172. return 0;
  173. }
  174. SEC("socket")
  175. int bpf_func_vlan(struct __sk_buff *skb)
  176. {
  177. __u32 nhoff, proto;
  178. nhoff = skb->cb[0];
  179. proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
  180. h_vlan_encapsulated_proto));
  181. nhoff += sizeof(struct vlan_hdr);
  182. skb->cb[0] = nhoff;
  183. parse_eth_proto(skb, proto);
  184. return 0;
  185. }
  186. SEC("socket")
  187. int bpf_func_mpls(struct __sk_buff *skb)
  188. {
  189. __u32 nhoff, label;
  190. nhoff = skb->cb[0];
  191. label = load_word(skb, nhoff);
  192. nhoff += sizeof(struct mpls_label);
  193. skb->cb[0] = nhoff;
  194. if (label & MPLS_LS_S_MASK) {
  195. __u8 verlen = load_byte(skb, nhoff);
  196. if ((verlen & 0xF0) == 4)
  197. parse_eth_proto(skb, ETH_P_IP);
  198. else
  199. parse_eth_proto(skb, ETH_P_IPV6);
  200. } else {
  201. parse_eth_proto(skb, ETH_P_MPLS_UC);
  202. }
  203. return 0;
  204. }
  205. struct {
  206. __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
  207. __uint(key_size, sizeof(u32));
  208. __uint(max_entries, 8);
  209. __array(values, u32 (void *));
  210. } prog_array_init SEC(".maps") = {
  211. .values = {
  212. [PARSE_VLAN] = (void *)&bpf_func_vlan,
  213. [PARSE_IP] = (void *)&bpf_func_ip,
  214. [PARSE_IPV6] = (void *)&bpf_func_ipv6,
  215. [PARSE_MPLS] = (void *)&bpf_func_mpls,
  216. },
  217. };
  218. /* Protocol dispatch routine. It tail-calls next BPF program depending
  219. * on eth proto. Note, we could have used ...
  220. *
  221. * bpf_tail_call(skb, &prog_array_init, proto);
  222. *
  223. * ... but it would need large prog_array and cannot be optimised given
  224. * the map key is not static.
  225. */
  226. static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
  227. {
  228. switch (proto) {
  229. case ETH_P_8021Q:
  230. case ETH_P_8021AD:
  231. bpf_tail_call(skb, &prog_array_init, PARSE_VLAN);
  232. break;
  233. case ETH_P_MPLS_UC:
  234. case ETH_P_MPLS_MC:
  235. bpf_tail_call(skb, &prog_array_init, PARSE_MPLS);
  236. break;
  237. case ETH_P_IP:
  238. bpf_tail_call(skb, &prog_array_init, PARSE_IP);
  239. break;
  240. case ETH_P_IPV6:
  241. bpf_tail_call(skb, &prog_array_init, PARSE_IPV6);
  242. break;
  243. }
  244. }
  245. SEC("socket")
  246. int main_prog(struct __sk_buff *skb)
  247. {
  248. __u32 nhoff = ETH_HLEN;
  249. __u32 proto = load_half(skb, 12);
  250. skb->cb[0] = nhoff;
  251. parse_eth_proto(skb, proto);
  252. return 0;
  253. }
  254. char _license[] SEC("license") = "GPL";