398 lines
8.3 KiB
C
398 lines
8.3 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
// Copyright (c) 2022 Meta
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <stdbool.h>
|
|
#include <linux/bpf.h>
|
|
#include <linux/stddef.h>
|
|
#include <linux/pkt_cls.h>
|
|
#include <linux/if_ether.h>
|
|
#include <linux/in.h>
|
|
#include <linux/ip.h>
|
|
#include <linux/ipv6.h>
|
|
#include <linux/tcp.h>
|
|
#include <linux/udp.h>
|
|
#include <bpf/bpf_helpers.h>
|
|
#include <bpf/bpf_endian.h>
|
|
|
|
/* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst
|
|
* | |
|
|
* ns_src | ns_fwd | ns_dst
|
|
*
|
|
* ns_src and ns_dst: ENDHOST namespace
|
|
* ns_fwd: Fowarding namespace
|
|
*/
|
|
|
|
#define ctx_ptr(field) (void *)(long)(field)
|
|
|
|
#define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */
|
|
#define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */
|
|
|
|
#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
|
|
0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
|
|
#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
|
|
0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
|
|
|
|
#define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \
|
|
a.s6_addr32[1] == b.s6_addr32[1] && \
|
|
a.s6_addr32[2] == b.s6_addr32[2] && \
|
|
a.s6_addr32[3] == b.s6_addr32[3])
|
|
|
|
volatile const __u32 IFINDEX_SRC;
|
|
volatile const __u32 IFINDEX_DST;
|
|
|
|
#define EGRESS_ENDHOST_MAGIC 0x0b9fbeef
|
|
#define INGRESS_FWDNS_MAGIC 0x1b9fbeef
|
|
#define EGRESS_FWDNS_MAGIC 0x2b9fbeef
|
|
|
|
enum {
|
|
INGRESS_FWDNS_P100,
|
|
INGRESS_FWDNS_P101,
|
|
EGRESS_FWDNS_P100,
|
|
EGRESS_FWDNS_P101,
|
|
INGRESS_ENDHOST,
|
|
EGRESS_ENDHOST,
|
|
SET_DTIME,
|
|
__MAX_CNT,
|
|
};
|
|
|
|
enum {
|
|
TCP_IP6_CLEAR_DTIME,
|
|
TCP_IP4,
|
|
TCP_IP6,
|
|
UDP_IP4,
|
|
UDP_IP6,
|
|
TCP_IP4_RT_FWD,
|
|
TCP_IP6_RT_FWD,
|
|
UDP_IP4_RT_FWD,
|
|
UDP_IP6_RT_FWD,
|
|
UKN_TEST,
|
|
__NR_TESTS,
|
|
};
|
|
|
|
enum {
|
|
SRC_NS = 1,
|
|
DST_NS,
|
|
};
|
|
|
|
__u32 dtimes[__NR_TESTS][__MAX_CNT] = {};
|
|
__u32 errs[__NR_TESTS][__MAX_CNT] = {};
|
|
__u32 test = 0;
|
|
|
|
static void inc_dtimes(__u32 idx)
|
|
{
|
|
if (test < __NR_TESTS)
|
|
dtimes[test][idx]++;
|
|
else
|
|
dtimes[UKN_TEST][idx]++;
|
|
}
|
|
|
|
static void inc_errs(__u32 idx)
|
|
{
|
|
if (test < __NR_TESTS)
|
|
errs[test][idx]++;
|
|
else
|
|
errs[UKN_TEST][idx]++;
|
|
}
|
|
|
|
static int skb_proto(int type)
|
|
{
|
|
return type & 0xff;
|
|
}
|
|
|
|
static int skb_ns(int type)
|
|
{
|
|
return (type >> 8) & 0xff;
|
|
}
|
|
|
|
static bool fwdns_clear_dtime(void)
|
|
{
|
|
return test == TCP_IP6_CLEAR_DTIME;
|
|
}
|
|
|
|
static bool bpf_fwd(void)
|
|
{
|
|
return test < TCP_IP4_RT_FWD;
|
|
}
|
|
|
|
static __u8 get_proto(void)
|
|
{
|
|
switch (test) {
|
|
case UDP_IP4:
|
|
case UDP_IP6:
|
|
case UDP_IP4_RT_FWD:
|
|
case UDP_IP6_RT_FWD:
|
|
return IPPROTO_UDP;
|
|
default:
|
|
return IPPROTO_TCP;
|
|
}
|
|
}
|
|
|
|
/* -1: parse error: TC_ACT_SHOT
|
|
* 0: not testing traffic: TC_ACT_OK
|
|
* >0: first byte is the inet_proto, second byte has the netns
|
|
* of the sender
|
|
*/
|
|
static int skb_get_type(struct __sk_buff *skb)
|
|
{
|
|
__u16 dst_ns_port = __bpf_htons(50000 + test);
|
|
void *data_end = ctx_ptr(skb->data_end);
|
|
void *data = ctx_ptr(skb->data);
|
|
__u8 inet_proto = 0, ns = 0;
|
|
struct ipv6hdr *ip6h;
|
|
__u16 sport, dport;
|
|
struct iphdr *iph;
|
|
struct tcphdr *th;
|
|
struct udphdr *uh;
|
|
void *trans;
|
|
|
|
switch (skb->protocol) {
|
|
case __bpf_htons(ETH_P_IP):
|
|
iph = data + sizeof(struct ethhdr);
|
|
if (iph + 1 > data_end)
|
|
return -1;
|
|
if (iph->saddr == ip4_src)
|
|
ns = SRC_NS;
|
|
else if (iph->saddr == ip4_dst)
|
|
ns = DST_NS;
|
|
inet_proto = iph->protocol;
|
|
trans = iph + 1;
|
|
break;
|
|
case __bpf_htons(ETH_P_IPV6):
|
|
ip6h = data + sizeof(struct ethhdr);
|
|
if (ip6h + 1 > data_end)
|
|
return -1;
|
|
if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_src))
|
|
ns = SRC_NS;
|
|
else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst))
|
|
ns = DST_NS;
|
|
inet_proto = ip6h->nexthdr;
|
|
trans = ip6h + 1;
|
|
break;
|
|
default:
|
|
return 0;
|
|
}
|
|
|
|
/* skb is not from src_ns or dst_ns.
|
|
* skb is not the testing IPPROTO.
|
|
*/
|
|
if (!ns || inet_proto != get_proto())
|
|
return 0;
|
|
|
|
switch (inet_proto) {
|
|
case IPPROTO_TCP:
|
|
th = trans;
|
|
if (th + 1 > data_end)
|
|
return -1;
|
|
sport = th->source;
|
|
dport = th->dest;
|
|
break;
|
|
case IPPROTO_UDP:
|
|
uh = trans;
|
|
if (uh + 1 > data_end)
|
|
return -1;
|
|
sport = uh->source;
|
|
dport = uh->dest;
|
|
break;
|
|
default:
|
|
return 0;
|
|
}
|
|
|
|
/* The skb is the testing traffic */
|
|
if ((ns == SRC_NS && dport == dst_ns_port) ||
|
|
(ns == DST_NS && sport == dst_ns_port))
|
|
return (ns << 8 | inet_proto);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* format: direction@iface@netns
|
|
* egress@veth_(src|dst)@ns_(src|dst)
|
|
*/
|
|
SEC("tc")
|
|
int egress_host(struct __sk_buff *skb)
|
|
{
|
|
int skb_type;
|
|
|
|
skb_type = skb_get_type(skb);
|
|
if (skb_type == -1)
|
|
return TC_ACT_SHOT;
|
|
if (!skb_type)
|
|
return TC_ACT_OK;
|
|
|
|
if (skb_proto(skb_type) == IPPROTO_TCP) {
|
|
if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
|
|
skb->tstamp)
|
|
inc_dtimes(EGRESS_ENDHOST);
|
|
else
|
|
inc_errs(EGRESS_ENDHOST);
|
|
} else {
|
|
if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC &&
|
|
skb->tstamp)
|
|
inc_dtimes(EGRESS_ENDHOST);
|
|
else
|
|
inc_errs(EGRESS_ENDHOST);
|
|
}
|
|
|
|
skb->tstamp = EGRESS_ENDHOST_MAGIC;
|
|
|
|
return TC_ACT_OK;
|
|
}
|
|
|
|
/* ingress@veth_(src|dst)@ns_(src|dst) */
|
|
SEC("tc")
|
|
int ingress_host(struct __sk_buff *skb)
|
|
{
|
|
int skb_type;
|
|
|
|
skb_type = skb_get_type(skb);
|
|
if (skb_type == -1)
|
|
return TC_ACT_SHOT;
|
|
if (!skb_type)
|
|
return TC_ACT_OK;
|
|
|
|
if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
|
|
skb->tstamp == EGRESS_FWDNS_MAGIC)
|
|
inc_dtimes(INGRESS_ENDHOST);
|
|
else
|
|
inc_errs(INGRESS_ENDHOST);
|
|
|
|
return TC_ACT_OK;
|
|
}
|
|
|
|
/* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */
|
|
SEC("tc")
|
|
int ingress_fwdns_prio100(struct __sk_buff *skb)
|
|
{
|
|
int skb_type;
|
|
|
|
skb_type = skb_get_type(skb);
|
|
if (skb_type == -1)
|
|
return TC_ACT_SHOT;
|
|
if (!skb_type)
|
|
return TC_ACT_OK;
|
|
|
|
/* delivery_time is only available to the ingress
|
|
* if the tc-bpf checks the skb->tstamp_type.
|
|
*/
|
|
if (skb->tstamp == EGRESS_ENDHOST_MAGIC)
|
|
inc_errs(INGRESS_FWDNS_P100);
|
|
|
|
if (fwdns_clear_dtime())
|
|
skb->tstamp = 0;
|
|
|
|
return TC_ACT_UNSPEC;
|
|
}
|
|
|
|
/* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */
|
|
SEC("tc")
|
|
int egress_fwdns_prio100(struct __sk_buff *skb)
|
|
{
|
|
int skb_type;
|
|
|
|
skb_type = skb_get_type(skb);
|
|
if (skb_type == -1)
|
|
return TC_ACT_SHOT;
|
|
if (!skb_type)
|
|
return TC_ACT_OK;
|
|
|
|
/* delivery_time is always available to egress even
|
|
* the tc-bpf did not use the tstamp_type.
|
|
*/
|
|
if (skb->tstamp == INGRESS_FWDNS_MAGIC)
|
|
inc_dtimes(EGRESS_FWDNS_P100);
|
|
else
|
|
inc_errs(EGRESS_FWDNS_P100);
|
|
|
|
if (fwdns_clear_dtime())
|
|
skb->tstamp = 0;
|
|
|
|
return TC_ACT_UNSPEC;
|
|
}
|
|
|
|
/* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */
|
|
SEC("tc")
|
|
int ingress_fwdns_prio101(struct __sk_buff *skb)
|
|
{
|
|
__u64 expected_dtime = EGRESS_ENDHOST_MAGIC;
|
|
int skb_type;
|
|
|
|
skb_type = skb_get_type(skb);
|
|
if (skb_type == -1 || !skb_type)
|
|
/* Should have handled in prio100 */
|
|
return TC_ACT_SHOT;
|
|
|
|
if (skb_proto(skb_type) == IPPROTO_UDP)
|
|
expected_dtime = 0;
|
|
|
|
if (skb->tstamp_type) {
|
|
if (fwdns_clear_dtime() ||
|
|
skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
|
|
skb->tstamp != expected_dtime)
|
|
inc_errs(INGRESS_FWDNS_P101);
|
|
else
|
|
inc_dtimes(INGRESS_FWDNS_P101);
|
|
} else {
|
|
if (!fwdns_clear_dtime() && expected_dtime)
|
|
inc_errs(INGRESS_FWDNS_P101);
|
|
}
|
|
|
|
if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
|
|
skb->tstamp = INGRESS_FWDNS_MAGIC;
|
|
} else {
|
|
if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
|
|
BPF_SKB_TSTAMP_DELIVERY_MONO))
|
|
inc_errs(SET_DTIME);
|
|
if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
|
|
BPF_SKB_TSTAMP_UNSPEC))
|
|
inc_errs(SET_DTIME);
|
|
}
|
|
|
|
if (skb_ns(skb_type) == SRC_NS)
|
|
return bpf_fwd() ?
|
|
bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK;
|
|
else
|
|
return bpf_fwd() ?
|
|
bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK;
|
|
}
|
|
|
|
/* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */
|
|
SEC("tc")
|
|
int egress_fwdns_prio101(struct __sk_buff *skb)
|
|
{
|
|
int skb_type;
|
|
|
|
skb_type = skb_get_type(skb);
|
|
if (skb_type == -1 || !skb_type)
|
|
/* Should have handled in prio100 */
|
|
return TC_ACT_SHOT;
|
|
|
|
if (skb->tstamp_type) {
|
|
if (fwdns_clear_dtime() ||
|
|
skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
|
|
skb->tstamp != INGRESS_FWDNS_MAGIC)
|
|
inc_errs(EGRESS_FWDNS_P101);
|
|
else
|
|
inc_dtimes(EGRESS_FWDNS_P101);
|
|
} else {
|
|
if (!fwdns_clear_dtime())
|
|
inc_errs(EGRESS_FWDNS_P101);
|
|
}
|
|
|
|
if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
|
|
skb->tstamp = EGRESS_FWDNS_MAGIC;
|
|
} else {
|
|
if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC,
|
|
BPF_SKB_TSTAMP_DELIVERY_MONO))
|
|
inc_errs(SET_DTIME);
|
|
if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
|
|
BPF_SKB_TSTAMP_UNSPEC))
|
|
inc_errs(SET_DTIME);
|
|
}
|
|
|
|
return TC_ACT_OK;
|
|
}
|
|
|
|
char __license[] SEC("license") = "GPL";
|