udpgro_fwd.sh 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. #!/bin/bash
  2. # SPDX-License-Identifier: GPL-2.0
  3. source lib.sh
  4. BPF_FILE="lib/xdp_dummy.bpf.o"
  5. readonly BASE="ns-$(mktemp -u XXXXXX)"
  6. readonly SRC=2
  7. readonly DST=1
  8. readonly DST_NAT=100
  9. readonly NS_SRC=$BASE$SRC
  10. readonly NS_DST=$BASE$DST
  11. # "baremetal" network used for raw UDP traffic
  12. readonly BM_NET_V4=192.168.1.
  13. readonly BM_NET_V6=2001:db8::
  14. # "overlay" network used for UDP over UDP tunnel traffic
  15. readonly OL_NET_V4=172.16.1.
  16. readonly OL_NET_V6=2001:db8:1::
  17. readonly NPROCS=`nproc`
  18. cleanup() {
  19. local ns
  20. local -r jobs="$(jobs -p)"
  21. [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
  22. for ns in $NS_SRC $NS_DST; do
  23. ip netns del $ns 2>/dev/null
  24. done
  25. }
  26. trap cleanup EXIT
  27. create_ns() {
  28. local net
  29. local ns
  30. for ns in $NS_SRC $NS_DST; do
  31. ip netns add $ns
  32. ip -n $ns link set dev lo up
  33. # disable route solicitations to decrease 'noise' traffic
  34. ip netns exec $ns sysctl -qw net.ipv6.conf.default.router_solicitations=0
  35. ip netns exec $ns sysctl -qw net.ipv6.conf.all.router_solicitations=0
  36. done
  37. ip link add name veth$SRC type veth peer name veth$DST
  38. for ns in $SRC $DST; do
  39. ip link set dev veth$ns netns $BASE$ns
  40. ip -n $BASE$ns link set dev veth$ns up
  41. ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
  42. ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
  43. done
  44. ip -n $NS_DST link set veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null
  45. }
  46. create_vxlan_endpoint() {
  47. local -r netns=$1
  48. local -r bm_dev=$2
  49. local -r bm_rem_addr=$3
  50. local -r vxlan_dev=$4
  51. local -r vxlan_id=$5
  52. local -r vxlan_port=4789
  53. ip -n $netns link set dev $bm_dev up
  54. ip -n $netns link add dev $vxlan_dev type vxlan id $vxlan_id \
  55. dstport $vxlan_port remote $bm_rem_addr
  56. ip -n $netns link set dev $vxlan_dev up
  57. }
  58. create_vxlan_pair() {
  59. local ns
  60. create_ns
  61. for ns in $SRC $DST; do
  62. # note that 3 - $SRC == $DST and 3 - $DST == $SRC
  63. create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V4$((3 - $ns)) vxlan$ns 4
  64. ip -n $BASE$ns addr add dev vxlan$ns $OL_NET_V4$ns/24
  65. done
  66. for ns in $SRC $DST; do
  67. create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6
  68. ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad
  69. done
  70. # preload neighbur cache, do avoid some noisy traffic
  71. local addr_dst=$(ip -j -n $BASE$DST link show dev vxlan6$DST |jq -r '.[]["address"]')
  72. local addr_src=$(ip -j -n $BASE$SRC link show dev vxlan6$SRC |jq -r '.[]["address"]')
  73. ip -n $BASE$DST neigh add dev vxlan6$DST lladdr $addr_src $OL_NET_V6$SRC
  74. ip -n $BASE$SRC neigh add dev vxlan6$SRC lladdr $addr_dst $OL_NET_V6$DST
  75. }
  76. is_ipv6() {
  77. if [[ $1 =~ .*:.* ]]; then
  78. return 0
  79. fi
  80. return 1
  81. }
  82. run_test() {
  83. local -r msg=$1
  84. local -r dst=$2
  85. local -r pkts=$3
  86. local -r vxpkts=$4
  87. local bind=$5
  88. local rx_args=""
  89. local rx_family="-4"
  90. local family=-4
  91. local filter=IpInReceives
  92. local ipt=iptables
  93. printf "%-40s" "$msg"
  94. if is_ipv6 $dst; then
  95. # rx program does not support '-6' and implies ipv6 usage by default
  96. rx_family=""
  97. family=-6
  98. filter=Ip6InReceives
  99. ipt=ip6tables
  100. fi
  101. rx_args="$rx_family"
  102. [ -n "$bind" ] && rx_args="$rx_args -b $bind"
  103. # send a single GSO packet, segmented in 10 UDP frames.
  104. # Always expect 10 UDP frames on RX side as rx socket does
  105. # not enable GRO
  106. ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789
  107. ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000
  108. ip netns exec $NS_DST ./udpgso_bench_rx -C 2000 -R 100 -n 10 -l 1300 $rx_args &
  109. local spid=$!
  110. wait_local_port_listen "$NS_DST" 8000 udp
  111. ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst
  112. local retc=$?
  113. wait $spid
  114. local rets=$?
  115. if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
  116. echo " fail client exit code $retc, server $rets"
  117. ret=1
  118. return
  119. fi
  120. local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \
  121. sed -e 's/\[//' -e 's/:.*//'`
  122. if [ $rcv != $pkts ]; then
  123. echo " fail - received $rcv packets, expected $pkts"
  124. ret=1
  125. return
  126. fi
  127. local vxrcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 4789' | \
  128. sed -e 's/\[//' -e 's/:.*//'`
  129. # upper net can generate a little noise, allow some tolerance
  130. if [ $vxrcv -lt $vxpkts -o $vxrcv -gt $((vxpkts + 3)) ]; then
  131. echo " fail - received $vxrcv vxlan packets, expected $vxpkts"
  132. ret=1
  133. return
  134. fi
  135. echo " ok"
  136. }
  137. run_test_csum() {
  138. local -r msg="$1"
  139. local -r dst="$2"
  140. local csum_error_filter=UdpInCsumErrors
  141. local csum_errors
  142. printf "%-40s" "$msg"
  143. is_ipv6 "$dst" && csum_error_filter=Udp6InCsumErrors
  144. ip netns exec "$NS_DST" iperf3 -s -1 >/dev/null &
  145. wait_local_port_listen "$NS_DST" 5201 tcp
  146. local spid="$!"
  147. ip netns exec "$NS_SRC" iperf3 -c "$dst" -t 2 >/dev/null
  148. local retc="$?"
  149. wait "$spid"
  150. local rets="$?"
  151. if [ "$rets" -ne 0 ] || [ "$retc" -ne 0 ]; then
  152. echo " fail client exit code $retc, server $rets"
  153. ret=1
  154. return
  155. fi
  156. csum_errors=$(ip netns exec "$NS_DST" nstat -as "$csum_error_filter" |
  157. grep "$csum_error_filter" | awk '{print $2}')
  158. if [ -n "$csum_errors" ] && [ "$csum_errors" -gt 0 ]; then
  159. echo " fail - csum error on receive $csum_errors, expected 0"
  160. ret=1
  161. return
  162. fi
  163. echo " ok"
  164. }
  165. run_bench() {
  166. local -r msg=$1
  167. local -r dst=$2
  168. local family=-4
  169. printf "%-40s" "$msg"
  170. if [ $NPROCS -lt 2 ]; then
  171. echo " skip - needed 2 CPUs found $NPROCS"
  172. return
  173. fi
  174. is_ipv6 $dst && family=-6
  175. # bind the sender and the receiver to different CPUs to try
  176. # get reproducible results
  177. ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus"
  178. ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 2000 -R 100 &
  179. local spid=$!
  180. wait_local_port_listen "$NS_DST" 8000 udp
  181. ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst
  182. local retc=$?
  183. wait $spid
  184. local rets=$?
  185. if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
  186. echo " fail client exit code $retc, server $rets"
  187. ret=1
  188. return
  189. fi
  190. }
  191. for family in 4 6; do
  192. BM_NET=$BM_NET_V4
  193. OL_NET=$OL_NET_V4
  194. IPT=iptables
  195. SUFFIX=24
  196. VXDEV=vxlan
  197. PING=ping
  198. if [ $family = 6 ]; then
  199. BM_NET=$BM_NET_V6
  200. OL_NET=$OL_NET_V6
  201. SUFFIX="64 nodad"
  202. VXDEV=vxlan6
  203. IPT=ip6tables
  204. # Use ping6 on systems where ping doesn't handle IPv6
  205. ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING="ping6"
  206. fi
  207. echo "IPv$family"
  208. create_ns
  209. run_test "No GRO" $BM_NET$DST 10 0
  210. cleanup
  211. create_ns
  212. ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
  213. ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
  214. run_test "GRO frag list" $BM_NET$DST 1 0
  215. cleanup
  216. # UDP GRO fwd skips aggregation when find an udp socket with the GRO option
  217. # if there is an UDP tunnel in the running system, such lookup happen
  218. # take place.
  219. # use NAT to circumvent GRO FWD check
  220. create_ns
  221. ip -n $NS_DST addr add dev veth$DST $BM_NET$DST_NAT/$SUFFIX
  222. ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
  223. ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
  224. ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $BM_NET$DST_NAT \
  225. -j DNAT --to-destination $BM_NET$DST
  226. run_test "GRO fwd" $BM_NET$DST_NAT 1 0 $BM_NET$DST
  227. cleanup
  228. create_ns
  229. run_bench "UDP fwd perf" $BM_NET$DST
  230. ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
  231. run_bench "UDP GRO fwd perf" $BM_NET$DST
  232. cleanup
  233. create_vxlan_pair
  234. ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
  235. ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
  236. run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10
  237. cleanup
  238. # use NAT to circumvent GRO FWD check
  239. create_vxlan_pair
  240. ip -n $NS_DST addr add dev $VXDEV$DST $OL_NET$DST_NAT/$SUFFIX
  241. ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
  242. ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
  243. ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $OL_NET$DST_NAT \
  244. -j DNAT --to-destination $OL_NET$DST
  245. # load arp cache before running the test to reduce the amount of
  246. # stray traffic on top of the UDP tunnel
  247. ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
  248. run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST
  249. cleanup
  250. # force segmentation and re-aggregation
  251. create_vxlan_pair
  252. ip netns exec "$NS_DST" ethtool -K veth"$DST" generic-receive-offload on
  253. ip netns exec "$NS_SRC" ethtool -K veth"$SRC" tso off
  254. ip -n "$NS_SRC" link set dev veth"$SRC" mtu 1430
  255. # forward to a 2nd veth pair
  256. ip -n "$NS_DST" link add br0 type bridge
  257. ip -n "$NS_DST" link set dev veth"$DST" master br0
  258. # segment the aggregated TSO packet, without csum offload
  259. ip -n "$NS_DST" link add veth_segment type veth peer veth_rx
  260. for FEATURE in tso tx-udp-segmentation tx-checksumming; do
  261. ip netns exec "$NS_DST" ethtool -K veth_segment "$FEATURE" off
  262. done
  263. ip -n "$NS_DST" link set dev veth_segment master br0 up
  264. ip -n "$NS_DST" link set dev br0 up
  265. ip -n "$NS_DST" link set dev veth_rx up
  266. # move the lower layer IP in the last added veth
  267. for ADDR in "$BM_NET_V4$DST/24" "$BM_NET_V6$DST/64"; do
  268. # the dad argument will let iproute emit a unharmful warning
  269. # with ipv4 addresses
  270. ip -n "$NS_DST" addr del dev veth"$DST" "$ADDR"
  271. ip -n "$NS_DST" addr add dev veth_rx "$ADDR" \
  272. nodad 2>/dev/null
  273. done
  274. run_test_csum "GSO after GRO" "$OL_NET$DST"
  275. cleanup
  276. done
  277. exit $ret