$ sudo dnf install clang llvm $ sudo dnf install elfutils-libelf-devel libpcap-devel
$ sudo dnf install kernel-headers
$ sudo dnf install bpftool
$ sudo dnf install perf
$ sudo apt install clang llvm $ sudo apt install libelf-dev libpcap-dev gcc-multilib build-essential
$ sudo apt install linux-headers-$(uname -r)
$ sudo apt install linux-tools-common linux-tools-generic
$ sudo apt install linux-perf
$ sudo apt install linux-tools-$(uname -r)
$ sudo zypper install clang llvm $ sudo zypper install libelf-devel libpcap-devel
$ sudo zypper install kernel-devel
$ sudo zypper install bpftool
$ sudo zypper install perf
$ sudo apt install autoconf automake m4 $ sudo apt install pkg-config $ sudo apt install libtool $ sudo apt install cmake $ sudo apt install kernel-package $ sudo apt install iproute2 $ sudo apt install dwarves ...
sudo add-apt-repository ppa:cappelikan/ppa sudo apt install mainline sudo mainline --list sudo mainline --install <version string>
/* SPDX-License-Identifier: GPL-2.0 */ #include <linux/bpf.h> #if 0L /* libbpf header 가 참조가능한 환경인 경우 */ #include <bpf/bpf_helpers.h> #else /* no libbpf header */ # if !defined(SEC) # define SEC(name) \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \ __attribute__((section(name), used)) \ _Pragma("GCC diagnostic pop") \ # endif #endif SEC("prog") int xdp_pass_func(struct xdp_md *ctx) { return XDP_PASS; } char _license[] SEC("license") = "GPL"; /* license 지정이 GPL인 경우와 아닌 경우에 따라서 사용할 수 있는 helpers 함수와 기능에 차이가 있을 수 있음 */
$ clang -target bpf -I "<libbpf의 header 위치>" -emit-llvm -c -o xdp-test.ll xdp-test.c $ llc -march bpf -filetype obj -o xdp-test.o xdp-test.ll
$ sudo ip link set dev "<인터페이스명>" xdp object xdp-test.o section "<section명>" verbose 또는 xdp-tools 를 받아서 설치한 환경인 경우 $ sudo xdp-loader load --verbose --mode native --section "<section명>" "<인터페이스명>" xdp-test.o
$ sudo ip link show dev "<인터페이스명>" <ifindex>: <인터페이스명>: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 xdp qdisc fq_codel state UP mode DEFAULT group default qlen 1000 link/ether 52:54:00:a8:c3:19 brd ff:ff:ff:ff:ff:ff prog/xdp id 19 tag b8a375b5b20c0074 jited 또는 xdp-tools 를 받아서 설치한 환경인 경우 $ sudo xdp-loader status "<인터페이스명>" CURRENT XDP PROGRAM STATUS: Interface Prio Program name Mode ID Tag Chain actions -------------------------------------------------------------------------------------- <인터페이스명> native 19 b8a375b5b20c0074
$ sudo ip link set dev "<인터페이스명>" xdp off 또는 xdp-tools 를 받아서 설치한 환경인 경우 $ sudo xdp-loader unload --all --verbose "<인터페이스명>"
$ sudo cat /sys/kernel/debug/tracing/trace_pipe
$ readelf -a "<XDP program elf object>" $ llvm -D "<XDP program elf object>"
32 ~ 63 (32 bits) (MSB) | 16 ~ 31 (16 bits) | 12 ~ 15 (4 bits) | 8 ~ 11 (4 bits) | 0 ~ 7 (8 bits) (LSB) |
immediate (imm: signed immediate constant) | offset (off: signed offset) | source register (src_reg) | destination register (dst_reg) | opcode |
/* Linux kernel header : "include/uapi/linux/bpf.h" */ struct bpf_insn { __u8 code; /* opcode */ __u8 dst_reg:4; /* dest register */ __u8 src_reg:4; /* source register */ __s16 off; /* signed offset */ __s32 imm; /* signed immediate constant */ };
4 bits (MSB) | 1 bit | 3 bits (LSB) |
operation code | source | instruction class |
code | 값 | 설명 | 비고 |
BPF_ADD | 0x00 | dst += src | |
BPF_SUB | 0x10 | dst -= src | |
BPF_MUL | 0x20 | dst *= src | |
BPF_DIV | 0x30 | dst /= src | |
BPF_OR | 0x40 | dst |= src | |
BPF_AND | 0x50 | dst &= src | |
BPF_LSH | 0x60 | dst <<= src | |
BPF_RSH | 0x70 | dst >>= src | |
BPF_NEG | 0x80 | dst = ~src | |
BPF_MOD | 0x90 | dst %= src | |
BPF_XOR | 0xa0 | dst ^= src | |
BPF_MOV | 0xb0 | dst = src | mov reg to reg |
BPF_ARSH | 0xc0 | sign extending shift right | sign extending arithmetic shift right |
BPF_END | 0xd0 | endianness conversion (flags for endianness conversion) | BPF_TO_LE/BPF_FROM_LE (0x00), BPF_TO_BE/BPF_FROM_BE (0x08) |
code | 값 | 설명 | 비고 |
BPF_JA | 0x00 | PC += off | BPF_JMP only |
BPF_JEQ | 0x10 | PC += off if dst == src | |
BPF_JGT | 0x20 | PC += off if dst > src | unsigned |
BPF_JGE | 0x30 | PC += off if dst >= src | unsigned |
BPF_JSET | 0x40 | PC += off if dst & src | |
BPF_JNE | 0x50 | PC += off if dst != src | |
BPF_JSGT | 0x60 | PC += off if dst > src | signed |
BPF_JSGE | 0x70 | PC += off if dst >= src | signed |
BPF_CALL | 0x80 | function call | |
BPF_EXIT | 0x90 | function / program return (return r0) | BPF_JMP only |
BPF_JLT | 0xa0 | PC += off if dst < src | unsigned |
BPF_JLE | 0xb0 | PC += off if dst <= src | unsigned |
BPF_JSLT | 0xc0 | PC += off if dst < src | signed |
BPF_JSLE | 0xd0 | PC += off if dst <= src | signed |
source | 값 | 설명 |
BPF_K | 0x00 | use 32-bit immediate as source operand |
BPF_X | 0x08 | use 'src_reg' register as source operand |
3 bits (MSB) | 2 bits | 3 bits (LSB) |
mode | size | instruction class |
size modifier | 값 | 설명 |
BPF_W | 0x00 | word (4 bytes) |
BPF_H | 0x08 | half word (2 bytes) |
BPF_B | 0x10 | byte (1 byte) |
BPF_DW | 0x18 | double word (8 bytes) |
mode modifier | 값 | 설명 |
BPF_IMM | 0x00 | used for 64-bit mov |
BPF_ABS | 0x20 | legacy BPF packet access |
BPF_IND | 0x40 | legacy BPF packet access |
BPF_MEM | 0x60 | all normal load and store operations |
(reserved) | 0x80 | reserved |
(reserved) | 0xa0 | reserved |
BPF_ATOMIC | 0xc0 | atomic operations (atomic memory ops - op type in immediate) |
class | 값 | 설명 | 비고 |
BPF_LD | 0x00 | non-standard load operations | Load instructions |
BPF_LDX | 0x01 | load into register operations | Load instructions |
BPF_ST | 0x02 | store from immediate operations | Store instructions |
BPF_STX | 0x03 | store from register operations | Store instructions |
BPF_ALU | 0x04 | 32-bit arithmetic operations | Arithmetic instructions |
BPF_JMP | 0x05 | 64-bit jump operations | Jump instructions |
BPF_JMP32 | 0x06 | 32-bit jump operations (Jump mode in word width) | Jump instructions |
BPF_ALU64 | 0x07 | 64-bit arithmetic operations (ALU mode in double word width) | Arithmetic instructions |
=> "kernel/bpf/core.c" source 에서 ___bpf_prog_run() 함수구현 참고 #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = OP }) .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg BPF_ATOMIC_OP(sizeof(u32), BPF_ADD, <dst_reg>, <src_reg>, <off16>) .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg BPF_ATOMIC_OP(sizeof(u64), BPF_ADD, <dst_reg>, <src_reg>, <off16>)
immediate | 값 | 설명 | 비고 |
BPF_ADD | 0x00 | *(uint *) (dst_reg + off16) += src_reg | atomic_add(src_reg, dst_reg + off16) |
BPF_AND | 0x50 | *(uint *) (dst_reg + off16) &= src_reg | atomic_and(src_reg, dst_reg + off16) |
BPF_OR | 0x40 | *(uint *) (dst_reg + off16) |= src_reg | atomic_or(src_reg, dst_reg + off16) |
BPF_XOR | 0xa0 | *(uint *) (dst_reg + off16) ^= src_reg | atomic_xor(src_reg, dst_reg + off16) |
BPF_ADD | BPF_FETCH | 0x01 (0x00 | 0x01) | tmp = *(uint *) (dst_reg + off16), *(uint *) (dst_reg + off16) += src_reg, src_reg = tmp | src_reg = atomic_fetch_add(src_reg, dst_reg + off16) |
BPF_AND | BPF_FETCH | 0x51 (0x50 | 0x01) | tmp = *(uint *) (dst_reg + off16), *(uint *) (dst_reg + off16) &= src_reg, src_reg = tmp | src_reg = atomic_fetch_and(src_reg, dst_reg + off16) |
BPF_OR | BPF_FETCH | 0x41 (0x40 | 0x01) | tmp = *(uint *) (dst_reg + off16), *(uint *) (dst_reg + off16) |= src_reg, src_reg = tmp | src_reg = atomic_fetch_or(src_reg, dst_reg + off16) |
BPF_XOR | BPF_FETCH | 0xa1 (0xa0 | 0x01) | tmp = *(uint *) (dst_reg + off16), *(uint *) (dst_reg + off16) ^= src_reg, src_reg = tmp | src_reg = atomic_fetch_xor(src_reg, dst_reg + off16) |
BPF_XCHG (0xe0 | BPF_FETCH) | 0xe1 (0xe0 | 0x01) | tmp = *(uint *) (dst_reg + off16), *(uint *) (dst_reg + off16) = src_reg, src_reg = tmp | src_reg = atomic_xchg(dst_reg + off16, src_reg) |
BPF_CMPXCHG (0xf0 | BPF_FETCH) | 0xf1 (0xf0 | 0x01) | (*(uint *) (dst_reg + off16) == r0) ? *(uint *) (dst_reg + off16) = src_reg : r0 = *(uint *) (dst_reg + off16) | r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) |
/* r0 = Data in packet, r1 - r5 are clobbered, r6 = struct sk_buff pointer, src_reg + imm32 = struct sk_buff data offset */ r0 = ntohl(*(u32 *) (((struct sk_buff *) r6)->data + src_reg + imm32));
__u32 n_umem_free_chunk; __u32 n_chunk; __u32 pos; __u32 i; n_umem_free_chunk = get_umem_free_chunks(); /* 여기서 get_umem_free_chunk 함수는 사용자가 구현하는 것으로 umem chunk 자원을 할당/해제하는 일련의 구현에서 해제된 사용가능한 chunk 개수를 반환하는 구현에 대응합니다. */ n_chunk = n_umem_free_chunk; /* 처음은 n_chunk 에 free chunk 수만큼을 대입하지만 이후 적절히 대입하는 구현이 고려되어야 합니다. */ for(;;) { n_reserved = xsk_ring_prod__reserve(&xsk_ring_prod, n_chunk, &pos); if(n_reserved == n_chunk) break; /* 채워넣을 Producer 공간의 예약이 되었으면 break */ /* Producer 예약이 요청한 갯수 n_chunk 만큼이 되지 않으면 */ if(xsk_ring_prod__need_wakeup(&xsk_ring_prod)) { /* Producer 가 확보될 때까지 또는 일정시간 지연에 대한 구현 */ } } for(i = 0u;i < n_chunk;i++) { /* CASE : 수신부 fill queue 목적인 경우 */ *xsk_ring_prod__fill_addr(&xsk_ring_prod /* umem_fq */, pos + i) = alloc_umem(); /* 여기서 alloc_umem 함수는 사용자가 구현하는 것으로 free umem chunk 로부터 1개의 chunk 를 할당하여 그 offset을 반환하는 구현에 대응합니다. */ /* CASE : 송신부 tx queue 목적인 경우 */ xsk_ring_prod__tx_desc(&xsk_ring_prod /* txq */, pos + i)->addr = <송신할 데이터가 채워진 chunk 주소>; xsk_ring_prod__tx_desc(&xsk_ring_prod /* txq */, pos + i)->len = <송신할 데이터의 크기>; } xsk_ring_prod__submit(&xsk_ring_prod, n_chunk); /* 이제 Producer 로 제공된 할당된 umem chunk를 처리할 수 있습니다. */ /* CASE : 송신부 tx queue 목적인 경우 */ if(xsk_ring_prod_needs_wakeup(&xsk_ring_prod /* txq */)) { sendto(xsk_socket__fd(xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); /* txq가 채워졌다고 알아서 Tx 되는 것은 아니며 이 때 Signal 목적의 send 함수를 호출해주어야 Tx 가 Trigger 됩니다. */ }
__u32 n_cons_size; __u32 pos; __u32 n_chunk; __u32 i; n_cons_size = ...; /* 여기서 n_cons_size 는 xsk_ring_cons 가 담을 수 있는 최대 chunk 수 입니다. */ n_chunk = xsk_rin_cons__peek(&xsk_ring_cons, n_cons_size, &pos); for(i = 0u;i < n_chunk;i++) { /* CASE: Completion queue 목적인 경우 */ __u64 addr = *xsk_ring_cons__comp_addr(&xsk_ring_cons, pos + i); /* chunk offset 을 얻어옵니다. */ free_chunk(addr); /* 여기서 free_chunk 함수는 사용자가 구현하는 것으로 할당되었던 umem chunk인 addr을 umem chunk 로 반환하는 구현을 하게 됩니다. */ /* CASE: rx queue 목적인 경우 */ __u64 addr = xsk_ring_cons__rx_desc(&xsk_ring_cons, pos + i)->addr; __u64 len = xsk_ring_cons__rx_desc(&xsk_ring_cons, pos + i)->len; } xsk_ring_cons__release(&xsk_ring_cons, n_chunk); /* Consumer 처리를 n_chunk 만큼 완료로 갱신합니다. */
make CC=clang
BCC is a toolkit for creating efficient kernel tracing and manipulation programs, and includes several useful tools and examples. It makes use of extended BPF (Berkeley Packet Filters), formally known as eBPF, a new feature that was first added to Linux 3.15. Much of what BCC uses requires Linux 4.1 and above.
https://legacy.netdevconf.info/0x14/pub/slides/54/[1]%20XDP%20meta%20data%20acceleration.pdf
: XDP meta-data Acceleration - Saeed Mahameed
struct xdp_buff *xdp { ... void *data_meta; ... } xdp_set_data_meta_invalid(&xdp); int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta); int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta); driver on XDP RX packet : xdp_buff.data_meta = xdp_buff->data - sizeof(meta_data); *xdp_buff.data_meta = meta_data; XDP user program: meta_data = (struct meta_data*)xdp_buff->data_meta; ...
ctx->data_meta
from kernel into user-space? - Stackoverflow(https://stackoverflow.com/questions/60487925/af-xdp-how-do-i-get-ctx-data-meta-from-kernel-into-user-space)
To get the full benefit of all features, libxdp needs to be used with kernel 5.10 or newer, unless the commits mentioned below have been backported. ...
참고 영상 |
Download xdp_building_block.pdf(https://archive.fosdem.org/2019/schedule/event/xdp_overview_and_update/attachments/slides/2877/export/events/attachments/xdp_overview_and_update/slides/2877/xdp_building_block.pdf) |
참고 영상 |
참고 영상 |
Slideshare: BPF Internals (eBPF) by Brendan Gregg(https://www.slideshare.net/brendangregg/bpf-internals-ebpf) |
참고 영상 |
참고 영상 |
참고 영상 |
참고 영상 |
참고 영상 |