1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation.
3 * Copyright 2014 6WIND S.A.
7 * Snipper from dpdk.org rte_mbuf.h.
8 * used to provide BPF programs information about rte_mbuf layout.
15 #include <rte_common.h>
16 #include <rte_memory.h>
23 * Packet Offload Features Flags. It also carry packet type information.
24 * Critical resources. Both rx/tx shared these bits. Be cautious on any change
26 * - RX flags start at bit position zero, and get added to the left of previous
28 * - The most-significant 3 bits are reserved for generic mbuf flags
29 * - TX flags therefore start at bit position 60 (i.e. 63-3), and new flags get
30 * added to the right of the previously defined flags i.e. they should count
31 * downwards, not upwards.
33 * Keep these flags synchronized with rte_get_rx_ol_flag_name() and
34 * rte_get_tx_ol_flag_name().
38 * RX packet is a 802.1q VLAN packet. This flag was set by PMDs when
39 * the packet is recognized as a VLAN, but the behavior between PMDs
40 * was not the same. This flag is kept for some time to avoid breaking
41 * applications and should be replaced by PKT_RX_VLAN_STRIPPED.
43 #define PKT_RX_VLAN_PKT (1ULL << 0)
45 #define PKT_RX_RSS_HASH (1ULL << 1)
46 /**< RX packet with RSS hash result. */
47 #define PKT_RX_FDIR (1ULL << 2)
48 /**< RX packet with FDIR match indicate. */
52 * Checking this flag alone is deprecated: check the 2 bits of
53 * PKT_RX_L4_CKSUM_MASK.
54 * This flag was set when the L4 checksum of a packet was detected as
55 * wrong by the hardware.
57 #define PKT_RX_L4_CKSUM_BAD (1ULL << 3)
61 * Checking this flag alone is deprecated: check the 2 bits of
62 * PKT_RX_IP_CKSUM_MASK.
63 * This flag was set when the IP checksum of a packet was detected as
64 * wrong by the hardware.
66 #define PKT_RX_IP_CKSUM_BAD (1ULL << 4)
68 #define PKT_RX_EIP_CKSUM_BAD (1ULL << 5)
69 /**< External IP header checksum error. */
72 * A vlan has been stripped by the hardware and its tci is saved in
73 * mbuf->vlan_tci. This can only happen if vlan stripping is enabled
74 * in the RX configuration of the PMD.
76 #define PKT_RX_VLAN_STRIPPED (1ULL << 6)
79 * Mask of bits used to determine the status of RX IP checksum.
80 * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
81 * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
82 * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
83 * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet
84 * data, but the integrity of the IP header is verified.
86 #define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7))
88 #define PKT_RX_IP_CKSUM_UNKNOWN 0
89 #define PKT_RX_IP_CKSUM_BAD (1ULL << 4)
90 #define PKT_RX_IP_CKSUM_GOOD (1ULL << 7)
91 #define PKT_RX_IP_CKSUM_NONE ((1ULL << 4) | (1ULL << 7))
94 * Mask of bits used to determine the status of RX L4 checksum.
95 * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
96 * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
97 * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
98 * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
99 * data, but the integrity of the L4 data is verified.
101 #define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8))
103 #define PKT_RX_L4_CKSUM_UNKNOWN 0
104 #define PKT_RX_L4_CKSUM_BAD (1ULL << 3)
105 #define PKT_RX_L4_CKSUM_GOOD (1ULL << 8)
106 #define PKT_RX_L4_CKSUM_NONE ((1ULL << 3) | (1ULL << 8))
108 #define PKT_RX_IEEE1588_PTP (1ULL << 9)
109 /**< RX IEEE1588 L2 Ethernet PT Packet. */
110 #define PKT_RX_IEEE1588_TMST (1ULL << 10)
111 /**< RX IEEE1588 L2/L4 timestamped packet.*/
112 #define PKT_RX_FDIR_ID (1ULL << 13)
113 /**< FD id reported if FDIR match. */
114 #define PKT_RX_FDIR_FLX (1ULL << 14)
115 /**< Flexible bytes reported if FDIR match. */
118 * The 2 vlans have been stripped by the hardware and their tci are
119 * saved in mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
120 * This can only happen if vlan stripping is enabled in the RX
121 * configuration of the PMD. If this flag is set, PKT_RX_VLAN_STRIPPED
124 #define PKT_RX_QINQ_STRIPPED (1ULL << 15)
128 * RX packet with double VLAN stripped.
129 * This flag is replaced by PKT_RX_QINQ_STRIPPED.
131 #define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED
134 * When packets are coalesced by a hardware or virtual driver, this flag
135 * can be set in the RX mbuf, meaning that the m->tso_segsz field is
136 * valid and is set to the segment size of original packets.
138 #define PKT_RX_LRO (1ULL << 16)
141 * Indicate that the timestamp field in the mbuf is valid.
143 #define PKT_RX_TIMESTAMP (1ULL << 17)
145 /* add new RX flags here */
147 /* add new TX flags here */
150 * Offload the MACsec. This flag must be set by the application to enable
151 * this offload feature for a packet to be transmitted.
153 #define PKT_TX_MACSEC (1ULL << 44)
156 * Bits 45:48 used for the tunnel type.
157 * When doing Tx offload like TSO or checksum, the HW needs to configure the
158 * tunnel type into the HW descriptors.
160 #define PKT_TX_TUNNEL_VXLAN (0x1ULL << 45)
161 #define PKT_TX_TUNNEL_GRE (0x2ULL << 45)
162 #define PKT_TX_TUNNEL_IPIP (0x3ULL << 45)
163 #define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45)
164 /**< TX packet with MPLS-in-UDP RFC 7510 header. */
165 #define PKT_TX_TUNNEL_MPLSINUDP (0x5ULL << 45)
166 /* add new TX TUNNEL type here */
167 #define PKT_TX_TUNNEL_MASK (0xFULL << 45)
170 * Second VLAN insertion (QinQ) flag.
172 #define PKT_TX_QINQ_PKT (1ULL << 49)
173 /**< TX packet with double VLAN inserted. */
176 * TCP segmentation offload. To enable this offload feature for a
177 * packet to be transmitted on hardware supporting TSO:
178 * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
180 * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
181 * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
183 * - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
184 * - calculate the pseudo header checksum without taking ip_len in account,
185 * and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and
186 * rte_ipv6_phdr_cksum() that can be used as helpers.
188 #define PKT_TX_TCP_SEG (1ULL << 50)
190 #define PKT_TX_IEEE1588_TMST (1ULL << 51)
191 /**< TX IEEE1588 packet to timestamp. */
194 * Bits 52+53 used for L4 packet type with checksum enabled: 00: Reserved,
195 * 01: TCP checksum, 10: SCTP checksum, 11: UDP checksum. To use hardware
196 * L4 checksum offload, the user needs to:
197 * - fill l2_len and l3_len in mbuf
198 * - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM
199 * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
200 * - calculate the pseudo header checksum and set it in the L4 header (only
201 * for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum().
202 * For SCTP, set the crc field to 0.
204 #define PKT_TX_L4_NO_CKSUM (0ULL << 52)
205 /**< Disable L4 cksum of TX pkt. */
206 #define PKT_TX_TCP_CKSUM (1ULL << 52)
207 /**< TCP cksum of TX pkt. computed by NIC. */
208 #define PKT_TX_SCTP_CKSUM (2ULL << 52)
209 /**< SCTP cksum of TX pkt. computed by NIC. */
210 #define PKT_TX_UDP_CKSUM (3ULL << 52)
211 /**< UDP cksum of TX pkt. computed by NIC. */
212 #define PKT_TX_L4_MASK (3ULL << 52)
213 /**< Mask for L4 cksum offload request. */
216 * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should
217 * also be set by the application, although a PMD will only check
219 * - set the IP checksum field in the packet to 0
220 * - fill the mbuf offload information: l2_len, l3_len
222 #define PKT_TX_IP_CKSUM (1ULL << 54)
225 * Packet is IPv4. This flag must be set when using any offload feature
226 * (TSO, L3 or L4 checksum) to tell the NIC that the packet is an IPv4
227 * packet. If the packet is a tunneled packet, this flag is related to
230 #define PKT_TX_IPV4 (1ULL << 55)
233 * Packet is IPv6. This flag must be set when using an offload feature
234 * (TSO or L4 checksum) to tell the NIC that the packet is an IPv6
235 * packet. If the packet is a tunneled packet, this flag is related to
238 #define PKT_TX_IPV6 (1ULL << 56)
240 #define PKT_TX_VLAN_PKT (1ULL << 57)
241 /**< TX packet is a 802.1q VLAN packet. */
244 * Offload the IP checksum of an external header in the hardware. The
245 * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh
246 * a PMD will only check PKT_TX_IP_CKSUM. The IP checksum field in the
247 * packet must be set to 0.
248 * - set the outer IP checksum field in the packet to 0
249 * - fill the mbuf offload information: outer_l2_len, outer_l3_len
251 #define PKT_TX_OUTER_IP_CKSUM (1ULL << 58)
254 * Packet outer header is IPv4. This flag must be set when using any
255 * outer offload feature (L3 or L4 checksum) to tell the NIC that the
256 * outer header of the tunneled packet is an IPv4 packet.
258 #define PKT_TX_OUTER_IPV4 (1ULL << 59)
261 * Packet outer header is IPv6. This flag must be set when using any
262 * outer offload feature (L4 checksum) to tell the NIC that the outer
263 * header of the tunneled packet is an IPv6 packet.
265 #define PKT_TX_OUTER_IPV6 (1ULL << 60)
268 * Bitmask of all supported packet Tx offload features flags,
269 * which can be set for packet.
271 #define PKT_TX_OFFLOAD_MASK ( \
274 PKT_TX_OUTER_IP_CKSUM | \
276 PKT_TX_IEEE1588_TMST | \
279 PKT_TX_TUNNEL_MASK | \
282 #define __RESERVED (1ULL << 61) /**< reserved for future mbuf use */
284 #define IND_ATTACHED_MBUF (1ULL << 62) /**< Indirect attached mbuf */
286 /* Use final bit of flags to indicate a control mbuf */
287 #define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */
289 /** Alignment constraint of mbuf private area. */
290 #define RTE_MBUF_PRIV_ALIGN 8
293 * Get the name of a RX offload flag
296 * The mask describing the flag.
298 * The name of this flag, or NULL if it's not a valid RX flag.
300 const char *rte_get_rx_ol_flag_name(uint64_t mask);
303 * Dump the list of RX offload flags in a buffer
306 * The mask describing the RX flags.
310 * The length of the buffer.
312 * 0 on success, (-1) on error.
314 int rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
317 * Get the name of a TX offload flag
320 * The mask describing the flag. Usually only one bit must be set.
321 * Several bits can be given if they belong to the same mask.
322 * Ex: PKT_TX_L4_MASK.
324 * The name of this flag, or NULL if it's not a valid TX flag.
326 const char *rte_get_tx_ol_flag_name(uint64_t mask);
329 * Dump the list of TX offload flags in a buffer
332 * The mask describing the TX flags.
336 * The length of the buffer.
338 * 0 on success, (-1) on error.
340 int rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
343 * Some NICs need at least 2KB buffer to RX standard Ethernet frame without
344 * splitting it into multiple segments.
345 * So, for mbufs that planned to be involved into RX/TX, the recommended
346 * minimal buffer length is 2KB + RTE_PKTMBUF_HEADROOM.
348 #define RTE_MBUF_DEFAULT_DATAROOM 2048
349 #define RTE_MBUF_DEFAULT_BUF_SIZE \
350 (RTE_MBUF_DEFAULT_DATAROOM + RTE_PKTMBUF_HEADROOM)
352 /* define a set of marker types that can be used to refer to set points in the
356 typedef void *MARKER[0]; /**< generic marker for a point in a structure */
358 typedef uint8_t MARKER8[0]; /**< generic marker with 1B alignment */
360 typedef uint64_t MARKER64[0];
361 /**< marker that allows us to overwrite 8 bytes with a single assignment */
364 volatile int16_t cnt; /**< An internal counter value. */
368 * The generic rte_mbuf, containing a packet mbuf.
373 void *buf_addr; /**< Virtual address of segment buffer. */
375 * Physical address of segment buffer.
376 * Force alignment to 8-bytes, so as to ensure we have the exact
377 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
378 * working on vector drivers easier.
380 phys_addr_t buf_physaddr __rte_aligned(sizeof(phys_addr_t));
382 /* next 8 bytes are initialised on RX descriptor rearm */
387 * Reference counter. Its size should at least equal to the size
388 * of port field (16 bits), to support zero-copy broadcast.
389 * It should only be accessed using the following functions:
390 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
391 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
392 * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC
397 rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
399 /**< Non-atomically accessed refcnt */
401 uint16_t nb_segs; /**< Number of segments. */
403 /** Input port (16 bits to support more than 256 virtual ports). */
406 uint64_t ol_flags; /**< Offload features. */
408 /* remaining bytes are set on RX when pulling packet from descriptor */
409 MARKER rx_descriptor_fields1;
412 * The packet type, which is the combination of outer/inner L2, L3, L4
413 * and tunnel types. The packet_type is about data really present in the
414 * mbuf. Example: if vlan stripping is enabled, a received vlan packet
415 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
416 * vlan is stripped from the data.
420 uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
422 uint32_t l2_type:4; /**< (Outer) L2 type. */
423 uint32_t l3_type:4; /**< (Outer) L3 type. */
424 uint32_t l4_type:4; /**< (Outer) L4 type. */
425 uint32_t tun_type:4; /**< Tunnel type. */
426 uint32_t inner_l2_type:4; /**< Inner L2 type. */
427 uint32_t inner_l3_type:4; /**< Inner L3 type. */
428 uint32_t inner_l4_type:4; /**< Inner L4 type. */
432 uint32_t pkt_len; /**< Total pkt len: sum of all segments. */
433 uint16_t data_len; /**< Amount of data in segment buffer. */
434 /** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
438 uint32_t rss; /**< RSS hash result if RSS enabled */
447 /**< Second 4 flexible bytes */
450 /**< First 4 flexible bytes or FD ID, dependent on
451 * PKT_RX_FDIR_* flag in ol_flags.
453 } fdir; /**< Filter identifier if FDIR enabled */
457 } sched; /**< Hierarchical scheduler */
459 /**< User defined tags. See rte_distributor_process() */
460 } hash; /**< hash information */
462 /** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
463 uint16_t vlan_tci_outer;
465 uint16_t buf_len; /**< Length of segment buffer. */
467 /** Valid if PKT_RX_TIMESTAMP is set. The unit and time reference
468 * are not normalized but are always the same for a given port.
472 /* second cache line - fields only used in slow path or on TX */
473 MARKER cacheline1 __rte_cache_min_aligned;
477 void *userdata; /**< Can be used for external metadata */
478 uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
481 struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
482 struct rte_mbuf *next; /**< Next segment of scattered packet. */
484 /* fields to support TX offloads */
487 uint64_t tx_offload; /**< combined for easy fetch */
491 /**< L2 (MAC) Header Length for non-tunneling pkt.
492 * Outer_L4_len + ... + Inner_L2_len for tunneling pkt.
494 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
495 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
496 uint64_t tso_segsz:16; /**< TCP TSO segment size */
498 /* fields for TX offloading of tunnels */
499 uint64_t outer_l3_len:9;
500 /**< Outer L3 (IP) Hdr Length. */
501 uint64_t outer_l2_len:7;
502 /**< Outer L2 (MAC) Hdr Length. */
504 /* uint64_t unused:8; */
508 /** Size of the application private data. In case of an indirect
509 * mbuf, it stores the direct mbuf private data size.
513 /** Timesync flags for use with IEEE1588. */
516 /** Sequence number. See also rte_reorder_insert(). */
519 } __rte_cache_aligned;
523 * Returns TRUE if given mbuf is indirect, or FALSE otherwise.
525 #define RTE_MBUF_INDIRECT(mb) ((mb)->ol_flags & IND_ATTACHED_MBUF)
528 * Returns TRUE if given mbuf is direct, or FALSE otherwise.
530 #define RTE_MBUF_DIRECT(mb) (!RTE_MBUF_INDIRECT(mb))
533 * Private data in case of pktmbuf pool.
535 * A structure that contains some pktmbuf_pool-specific data that are
536 * appended after the mempool structure (in private data).
538 struct rte_pktmbuf_pool_private {
539 uint16_t mbuf_data_room_size; /**< Size of data space in each mbuf. */
540 uint16_t mbuf_priv_size; /**< Size of private area in each mbuf. */
544 * A macro that points to an offset into the data in the mbuf.
546 * The returned pointer is cast to type t. Before using this
547 * function, the user must ensure that the first segment is large
548 * enough to accommodate its data.
553 * The offset into the mbuf data.
555 * The type to cast the result into.
557 #define rte_pktmbuf_mtod_offset(m, t, o) \
558 ((t)((char *)(m)->buf_addr + (m)->data_off + (o)))
561 * A macro that points to the start of the data in the mbuf.
563 * The returned pointer is cast to type t. Before using this
564 * function, the user must ensure that the first segment is large
565 * enough to accommodate its data.
570 * The type to cast the result into.
572 #define rte_pktmbuf_mtod(m, t) rte_pktmbuf_mtod_offset(m, t, 0)
578 #endif /* _MBUF_H_ */