1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation.
3 * Copyright 2014 6WIND S.A.
7 * Snipper from dpdk.org rte_mbuf.h.
8 * used to provide BPF programs information about rte_mbuf layout.
15 #include <rte_common.h>
22 * Packet Offload Features Flags. It also carry packet type information.
23 * Critical resources. Both rx/tx shared these bits. Be cautious on any change
25 * - RX flags start at bit position zero, and get added to the left of previous
27 * - The most-significant 3 bits are reserved for generic mbuf flags
28 * - TX flags therefore start at bit position 60 (i.e. 63-3), and new flags get
29 * added to the right of the previously defined flags i.e. they should count
30 * downwards, not upwards.
32 * Keep these flags synchronized with rte_get_rx_ol_flag_name() and
33 * rte_get_tx_ol_flag_name().
37 * RX packet is a 802.1q VLAN packet. This flag was set by PMDs when
38 * the packet is recognized as a VLAN, but the behavior between PMDs
39 * was not the same. This flag is kept for some time to avoid breaking
40 * applications and should be replaced by PKT_RX_VLAN_STRIPPED.
42 #define PKT_RX_VLAN_PKT (1ULL << 0)
44 #define PKT_RX_RSS_HASH (1ULL << 1)
45 /**< RX packet with RSS hash result. */
46 #define PKT_RX_FDIR (1ULL << 2)
47 /**< RX packet with FDIR match indicate. */
51 * Checking this flag alone is deprecated: check the 2 bits of
52 * PKT_RX_L4_CKSUM_MASK.
53 * This flag was set when the L4 checksum of a packet was detected as
54 * wrong by the hardware.
56 #define PKT_RX_L4_CKSUM_BAD (1ULL << 3)
60 * Checking this flag alone is deprecated: check the 2 bits of
61 * PKT_RX_IP_CKSUM_MASK.
62 * This flag was set when the IP checksum of a packet was detected as
63 * wrong by the hardware.
65 #define PKT_RX_IP_CKSUM_BAD (1ULL << 4)
67 #define PKT_RX_EIP_CKSUM_BAD (1ULL << 5)
68 /**< External IP header checksum error. */
71 * A vlan has been stripped by the hardware and its tci is saved in
72 * mbuf->vlan_tci. This can only happen if vlan stripping is enabled
73 * in the RX configuration of the PMD.
75 #define PKT_RX_VLAN_STRIPPED (1ULL << 6)
78 * Mask of bits used to determine the status of RX IP checksum.
79 * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
80 * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
81 * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
82 * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet
83 * data, but the integrity of the IP header is verified.
85 #define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7))
87 #define PKT_RX_IP_CKSUM_UNKNOWN 0
88 #define PKT_RX_IP_CKSUM_BAD (1ULL << 4)
89 #define PKT_RX_IP_CKSUM_GOOD (1ULL << 7)
90 #define PKT_RX_IP_CKSUM_NONE ((1ULL << 4) | (1ULL << 7))
93 * Mask of bits used to determine the status of RX L4 checksum.
94 * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
95 * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
96 * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
97 * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
98 * data, but the integrity of the L4 data is verified.
100 #define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8))
102 #define PKT_RX_L4_CKSUM_UNKNOWN 0
103 #define PKT_RX_L4_CKSUM_BAD (1ULL << 3)
104 #define PKT_RX_L4_CKSUM_GOOD (1ULL << 8)
105 #define PKT_RX_L4_CKSUM_NONE ((1ULL << 3) | (1ULL << 8))
107 #define PKT_RX_IEEE1588_PTP (1ULL << 9)
108 /**< RX IEEE1588 L2 Ethernet PT Packet. */
109 #define PKT_RX_IEEE1588_TMST (1ULL << 10)
110 /**< RX IEEE1588 L2/L4 timestamped packet.*/
111 #define PKT_RX_FDIR_ID (1ULL << 13)
112 /**< FD id reported if FDIR match. */
113 #define PKT_RX_FDIR_FLX (1ULL << 14)
114 /**< Flexible bytes reported if FDIR match. */
117 * The 2 vlans have been stripped by the hardware and their tci are
118 * saved in mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
119 * This can only happen if vlan stripping is enabled in the RX
120 * configuration of the PMD. If this flag is set, PKT_RX_VLAN_STRIPPED
123 #define PKT_RX_QINQ_STRIPPED (1ULL << 15)
127 * RX packet with double VLAN stripped.
128 * This flag is replaced by PKT_RX_QINQ_STRIPPED.
130 #define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED
133 * When packets are coalesced by a hardware or virtual driver, this flag
134 * can be set in the RX mbuf, meaning that the m->tso_segsz field is
135 * valid and is set to the segment size of original packets.
137 #define PKT_RX_LRO (1ULL << 16)
140 * Indicate that the timestamp field in the mbuf is valid.
142 #define PKT_RX_TIMESTAMP (1ULL << 17)
144 /* add new RX flags here */
146 /* add new TX flags here */
149 * Offload the MACsec. This flag must be set by the application to enable
150 * this offload feature for a packet to be transmitted.
152 #define PKT_TX_MACSEC (1ULL << 44)
155 * Bits 45:48 used for the tunnel type.
156 * When doing Tx offload like TSO or checksum, the HW needs to configure the
157 * tunnel type into the HW descriptors.
159 #define PKT_TX_TUNNEL_VXLAN (0x1ULL << 45)
160 #define PKT_TX_TUNNEL_GRE (0x2ULL << 45)
161 #define PKT_TX_TUNNEL_IPIP (0x3ULL << 45)
162 #define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45)
163 /**< TX packet with MPLS-in-UDP RFC 7510 header. */
164 #define PKT_TX_TUNNEL_MPLSINUDP (0x5ULL << 45)
165 /* add new TX TUNNEL type here */
166 #define PKT_TX_TUNNEL_MASK (0xFULL << 45)
169 * Second VLAN insertion (QinQ) flag.
171 #define PKT_TX_QINQ_PKT (1ULL << 49)
172 /**< TX packet with double VLAN inserted. */
175 * TCP segmentation offload. To enable this offload feature for a
176 * packet to be transmitted on hardware supporting TSO:
177 * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
179 * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
180 * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
182 * - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
183 * - calculate the pseudo header checksum without taking ip_len in account,
184 * and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and
185 * rte_ipv6_phdr_cksum() that can be used as helpers.
187 #define PKT_TX_TCP_SEG (1ULL << 50)
189 #define PKT_TX_IEEE1588_TMST (1ULL << 51)
190 /**< TX IEEE1588 packet to timestamp. */
193 * Bits 52+53 used for L4 packet type with checksum enabled: 00: Reserved,
194 * 01: TCP checksum, 10: SCTP checksum, 11: UDP checksum. To use hardware
195 * L4 checksum offload, the user needs to:
196 * - fill l2_len and l3_len in mbuf
197 * - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM
198 * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
199 * - calculate the pseudo header checksum and set it in the L4 header (only
200 * for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum().
201 * For SCTP, set the crc field to 0.
203 #define PKT_TX_L4_NO_CKSUM (0ULL << 52)
204 /**< Disable L4 cksum of TX pkt. */
205 #define PKT_TX_TCP_CKSUM (1ULL << 52)
206 /**< TCP cksum of TX pkt. computed by NIC. */
207 #define PKT_TX_SCTP_CKSUM (2ULL << 52)
208 /**< SCTP cksum of TX pkt. computed by NIC. */
209 #define PKT_TX_UDP_CKSUM (3ULL << 52)
210 /**< UDP cksum of TX pkt. computed by NIC. */
211 #define PKT_TX_L4_MASK (3ULL << 52)
212 /**< Mask for L4 cksum offload request. */
215 * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should
216 * also be set by the application, although a PMD will only check
218 * - set the IP checksum field in the packet to 0
219 * - fill the mbuf offload information: l2_len, l3_len
221 #define PKT_TX_IP_CKSUM (1ULL << 54)
224 * Packet is IPv4. This flag must be set when using any offload feature
225 * (TSO, L3 or L4 checksum) to tell the NIC that the packet is an IPv4
226 * packet. If the packet is a tunneled packet, this flag is related to
229 #define PKT_TX_IPV4 (1ULL << 55)
232 * Packet is IPv6. This flag must be set when using an offload feature
233 * (TSO or L4 checksum) to tell the NIC that the packet is an IPv6
234 * packet. If the packet is a tunneled packet, this flag is related to
237 #define PKT_TX_IPV6 (1ULL << 56)
239 #define PKT_TX_VLAN_PKT (1ULL << 57)
240 /**< TX packet is a 802.1q VLAN packet. */
243 * Offload the IP checksum of an external header in the hardware. The
244 * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh
245 * a PMD will only check PKT_TX_IP_CKSUM. The IP checksum field in the
246 * packet must be set to 0.
247 * - set the outer IP checksum field in the packet to 0
248 * - fill the mbuf offload information: outer_l2_len, outer_l3_len
250 #define PKT_TX_OUTER_IP_CKSUM (1ULL << 58)
253 * Packet outer header is IPv4. This flag must be set when using any
254 * outer offload feature (L3 or L4 checksum) to tell the NIC that the
255 * outer header of the tunneled packet is an IPv4 packet.
257 #define PKT_TX_OUTER_IPV4 (1ULL << 59)
260 * Packet outer header is IPv6. This flag must be set when using any
261 * outer offload feature (L4 checksum) to tell the NIC that the outer
262 * header of the tunneled packet is an IPv6 packet.
264 #define PKT_TX_OUTER_IPV6 (1ULL << 60)
267 * Bitmask of all supported packet Tx offload features flags,
268 * which can be set for packet.
270 #define PKT_TX_OFFLOAD_MASK ( \
273 PKT_TX_OUTER_IP_CKSUM | \
275 PKT_TX_IEEE1588_TMST | \
278 PKT_TX_TUNNEL_MASK | \
281 #define __RESERVED (1ULL << 61) /**< reserved for future mbuf use */
283 #define IND_ATTACHED_MBUF (1ULL << 62) /**< Indirect attached mbuf */
285 /* Use final bit of flags to indicate a control mbuf */
286 #define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */
288 /** Alignment constraint of mbuf private area. */
289 #define RTE_MBUF_PRIV_ALIGN 8
292 * Get the name of a RX offload flag
295 * The mask describing the flag.
297 * The name of this flag, or NULL if it's not a valid RX flag.
299 const char *rte_get_rx_ol_flag_name(uint64_t mask);
302 * Dump the list of RX offload flags in a buffer
305 * The mask describing the RX flags.
309 * The length of the buffer.
311 * 0 on success, (-1) on error.
313 int rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
316 * Get the name of a TX offload flag
319 * The mask describing the flag. Usually only one bit must be set.
320 * Several bits can be given if they belong to the same mask.
321 * Ex: PKT_TX_L4_MASK.
323 * The name of this flag, or NULL if it's not a valid TX flag.
325 const char *rte_get_tx_ol_flag_name(uint64_t mask);
328 * Dump the list of TX offload flags in a buffer
331 * The mask describing the TX flags.
335 * The length of the buffer.
337 * 0 on success, (-1) on error.
339 int rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
342 * Some NICs need at least 2KB buffer to RX standard Ethernet frame without
343 * splitting it into multiple segments.
344 * So, for mbufs that planned to be involved into RX/TX, the recommended
345 * minimal buffer length is 2KB + RTE_PKTMBUF_HEADROOM.
347 #define RTE_MBUF_DEFAULT_DATAROOM 2048
348 #define RTE_MBUF_DEFAULT_BUF_SIZE \
349 (RTE_MBUF_DEFAULT_DATAROOM + RTE_PKTMBUF_HEADROOM)
351 /* define a set of marker types that can be used to refer to set points in the
355 typedef void *MARKER[0]; /**< generic marker for a point in a structure */
357 typedef uint8_t MARKER8[0]; /**< generic marker with 1B alignment */
359 typedef uint64_t MARKER64[0];
360 /**< marker that allows us to overwrite 8 bytes with a single assignment */
363 volatile int16_t cnt; /**< An internal counter value. */
366 #define RTE_CACHE_LINE_MIN_SIZE 64 /**< Minimum Cache line size. */
369 * Force minimum cache line alignment.
371 #define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
374 * IO virtual address type.
375 * When the physical addressing mode (IOVA as PA) is in use,
376 * the translation from an IO virtual address (IOVA) to a physical address
377 * is a direct mapping, i.e. the same value.
378 * Otherwise, in virtual mode (IOVA as VA), an IOMMU may do the translation.
380 typedef uint64_t rte_iova_t;
381 #define RTE_BAD_IOVA ((rte_iova_t)-1)
384 * The generic rte_mbuf, containing a packet mbuf.
389 void *buf_addr; /**< Virtual address of segment buffer. */
391 * Physical address of segment buffer.
392 * Force alignment to 8-bytes, so as to ensure we have the exact
393 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
394 * working on vector drivers easier.
399 rte_iova_t buf_physaddr; /**< deprecated */
400 } __rte_aligned(sizeof(rte_iova_t));
402 /* next 8 bytes are initialised on RX descriptor rearm */
407 * Reference counter. Its size should at least equal to the size
408 * of port field (16 bits), to support zero-copy broadcast.
409 * It should only be accessed using the following functions:
410 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
411 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
412 * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC
417 rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
419 /**< Non-atomically accessed refcnt */
421 uint16_t nb_segs; /**< Number of segments. */
423 /** Input port (16 bits to support more than 256 virtual ports). */
426 uint64_t ol_flags; /**< Offload features. */
428 /* remaining bytes are set on RX when pulling packet from descriptor */
429 MARKER rx_descriptor_fields1;
432 * The packet type, which is the combination of outer/inner L2, L3, L4
433 * and tunnel types. The packet_type is about data really present in the
434 * mbuf. Example: if vlan stripping is enabled, a received vlan packet
435 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
436 * vlan is stripped from the data.
440 uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
442 uint32_t l2_type:4; /**< (Outer) L2 type. */
443 uint32_t l3_type:4; /**< (Outer) L3 type. */
444 uint32_t l4_type:4; /**< (Outer) L4 type. */
445 uint32_t tun_type:4; /**< Tunnel type. */
446 uint32_t inner_l2_type:4; /**< Inner L2 type. */
447 uint32_t inner_l3_type:4; /**< Inner L3 type. */
448 uint32_t inner_l4_type:4; /**< Inner L4 type. */
452 uint32_t pkt_len; /**< Total pkt len: sum of all segments. */
453 uint16_t data_len; /**< Amount of data in segment buffer. */
454 /** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
458 uint32_t rss; /**< RSS hash result if RSS enabled */
467 /**< Second 4 flexible bytes */
470 /**< First 4 flexible bytes or FD ID, dependent on
471 * PKT_RX_FDIR_* flag in ol_flags.
473 } fdir; /**< Filter identifier if FDIR enabled */
477 } sched; /**< Hierarchical scheduler */
479 /**< User defined tags. See rte_distributor_process() */
480 } hash; /**< hash information */
482 /** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
483 uint16_t vlan_tci_outer;
485 uint16_t buf_len; /**< Length of segment buffer. */
487 /** Valid if PKT_RX_TIMESTAMP is set. The unit and time reference
488 * are not normalized but are always the same for a given port.
492 /* second cache line - fields only used in slow path or on TX */
493 MARKER cacheline1 __rte_cache_min_aligned;
497 void *userdata; /**< Can be used for external metadata */
498 uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
501 struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
502 struct rte_mbuf *next; /**< Next segment of scattered packet. */
504 /* fields to support TX offloads */
507 uint64_t tx_offload; /**< combined for easy fetch */
511 /**< L2 (MAC) Header Length for non-tunneling pkt.
512 * Outer_L4_len + ... + Inner_L2_len for tunneling pkt.
514 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
515 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
516 uint64_t tso_segsz:16; /**< TCP TSO segment size */
518 /* fields for TX offloading of tunnels */
519 uint64_t outer_l3_len:9;
520 /**< Outer L3 (IP) Hdr Length. */
521 uint64_t outer_l2_len:7;
522 /**< Outer L2 (MAC) Hdr Length. */
524 /* uint64_t unused:8; */
528 /** Size of the application private data. In case of an indirect
529 * mbuf, it stores the direct mbuf private data size.
533 /** Timesync flags for use with IEEE1588. */
536 /** Sequence number. See also rte_reorder_insert(). */
539 } __rte_cache_aligned;
543 * Returns TRUE if given mbuf is cloned by mbuf indirection, or FALSE
546 * If a mbuf has its data in another mbuf and references it by mbuf
547 * indirection, this mbuf can be defined as a cloned mbuf.
549 #define RTE_MBUF_CLONED(mb) ((mb)->ol_flags & IND_ATTACHED_MBUF)
552 * Returns TRUE if given mbuf is direct, or FALSE otherwise.
554 * If a mbuf embeds its own data after the rte_mbuf structure, this mbuf
555 * can be defined as a direct mbuf.
557 #define RTE_MBUF_DIRECT(mb) (!RTE_MBUF_CLONED(mb))
560 * Private data in case of pktmbuf pool.
562 * A structure that contains some pktmbuf_pool-specific data that are
563 * appended after the mempool structure (in private data).
565 struct rte_pktmbuf_pool_private {
566 uint16_t mbuf_data_room_size; /**< Size of data space in each mbuf. */
567 uint16_t mbuf_priv_size; /**< Size of private area in each mbuf. */
571 * A macro that points to an offset into the data in the mbuf.
573 * The returned pointer is cast to type t. Before using this
574 * function, the user must ensure that the first segment is large
575 * enough to accommodate its data.
580 * The offset into the mbuf data.
582 * The type to cast the result into.
584 #define rte_pktmbuf_mtod_offset(m, t, o) \
585 ((t)((char *)(m)->buf_addr + (m)->data_off + (o)))
588 * A macro that points to the start of the data in the mbuf.
590 * The returned pointer is cast to type t. Before using this
591 * function, the user must ensure that the first segment is large
592 * enough to accommodate its data.
597 * The type to cast the result into.
599 #define rte_pktmbuf_mtod(m, t) rte_pktmbuf_mtod_offset(m, t, 0)
605 #endif /* _MBUF_H_ */