* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright 2014 6WIND S.A.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
#include <stdint.h>
#include <rte_mempool.h>
+#include <rte_memory.h>
#include <rte_atomic.h>
#include <rte_prefetch.h>
#include <rte_branch_prediction.h>
* - The most-significant 8 bits are reserved for generic mbuf flags
* - TX flags therefore start at bit position 55 (i.e. 63-8), and new flags get
* added to the right of the previously defined flags
+ *
+ * Keep these flags synchronized with rte_get_rx_ol_flag_name() and
+ * rte_get_tx_ol_flag_name().
*/
#define PKT_RX_VLAN_PKT (1ULL << 0) /**< RX packet is a 802.1q VLAN packet. */
#define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */
-#define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR infos. */
+#define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */
#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) /**< L4 cksum of RX pkt. is not OK. */
#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) /**< IP cksum of RX pkt. is not OK. */
#define PKT_RX_EIP_CKSUM_BAD (0ULL << 0) /**< External IP header checksum error. */
#define PKT_RX_IPV6_HDR_EXT (1ULL << 8) /**< RX packet with extended IPv6 header. */
#define PKT_RX_IEEE1588_PTP (1ULL << 9) /**< RX IEEE1588 L2 Ethernet PT Packet. */
#define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/
+#define PKT_RX_TUNNEL_IPV4_HDR (1ULL << 11) /**< RX tunnel packet with IPv4 header.*/
+#define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with IPv6 header. */
+#define PKT_RX_FDIR_ID (1ULL << 13) /**< FD id reported if FDIR match. */
+#define PKT_RX_FDIR_FLX (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
+/* add new RX flags here */
-#define PKT_TX_VLAN_PKT (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
-#define PKT_TX_IP_CKSUM (1ULL << 54) /**< IP cksum of TX pkt. computed by NIC. */
-#define PKT_TX_IPV4_CSUM PKT_TX_IP_CKSUM /**< Alias of PKT_TX_IP_CKSUM. */
-#define PKT_TX_IPV4 PKT_RX_IPV4_HDR /**< IPv4 with no IP checksum offload. */
-#define PKT_TX_IPV6 PKT_RX_IPV6_HDR /**< IPv6 packet */
+/* add new TX flags here */
-/*
- * Bits 52+53 used for L4 packet type with checksum enabled.
- * 00: Reserved
- * 01: TCP checksum
- * 10: SCTP checksum
- * 11: UDP checksum
+/**
+ * TCP segmentation offload. To enable this offload feature for a
+ * packet to be transmitted on hardware supporting TSO:
+ * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
+ * PKT_TX_TCP_CKSUM)
+ * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
+ * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
+ * to 0 in the packet
+ * - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
+ * - calculate the pseudo header checksum without taking ip_len in account,
+ * and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and
+ * rte_ipv6_phdr_cksum() that can be used as helpers.
+ */
+#define PKT_TX_TCP_SEG (1ULL << 50)
+
+#define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to timestamp. */
+
+/**
+ * Bits 52+53 used for L4 packet type with checksum enabled: 00: Reserved,
+ * 01: TCP checksum, 10: SCTP checksum, 11: UDP checksum. To use hardware
+ * L4 checksum offload, the user needs to:
+ * - fill l2_len and l3_len in mbuf
+ * - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM
+ * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
+ * - calculate the pseudo header checksum and set it in the L4 header (only
+ * for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum().
+ * For SCTP, set the crc field to 0.
*/
#define PKT_TX_L4_NO_CKSUM (0ULL << 52) /**< Disable L4 cksum of TX pkt. */
#define PKT_TX_TCP_CKSUM (1ULL << 52) /**< TCP cksum of TX pkt. computed by NIC. */
#define PKT_TX_UDP_CKSUM (3ULL << 52) /**< UDP cksum of TX pkt. computed by NIC. */
#define PKT_TX_L4_MASK (3ULL << 52) /**< Mask for L4 cksum offload request. */
-/* Bit 51 - IEEE1588*/
-#define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to timestamp. */
+/**
+ * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should
+ * also be set by the application, although a PMD will only check
+ * PKT_TX_IP_CKSUM.
+ * - set the IP checksum field in the packet to 0
+ * - fill the mbuf offload information: l2_len, l3_len
+ */
+#define PKT_TX_IP_CKSUM (1ULL << 54)
+
+/**
+ * Packet is IPv4. This flag must be set when using any offload feature
+ * (TSO, L3 or L4 checksum) to tell the NIC that the packet is an IPv4
+ * packet. If the packet is a tunneled packet, this flag is related to
+ * the inner headers.
+ */
+#define PKT_TX_IPV4 (1ULL << 55)
+
+/**
+ * Packet is IPv6. This flag must be set when using an offload feature
+ * (TSO or L4 checksum) to tell the NIC that the packet is an IPv6
+ * packet. If the packet is a tunneled packet, this flag is related to
+ * the inner headers.
+ */
+#define PKT_TX_IPV6 (1ULL << 56)
+
+#define PKT_TX_VLAN_PKT (1ULL << 57) /**< TX packet is a 802.1q VLAN packet. */
+
+/**
+ * Offload the IP checksum of an external header in the hardware. The
+ * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh
+ * a PMD will only check PKT_TX_IP_CKSUM. The IP checksum field in the
+ * packet must be set to 0.
+ * - set the outer IP checksum field in the packet to 0
+ * - fill the mbuf offload information: outer_l2_len, outer_l3_len
+ */
+#define PKT_TX_OUTER_IP_CKSUM (1ULL << 58)
+
+/**
+ * Packet outer header is IPv4. This flag must be set when using any
+ * outer offload feature (L3 or L4 checksum) to tell the NIC that the
+ * outer header of the tunneled packet is an IPv4 packet.
+ */
+#define PKT_TX_OUTER_IPV4 (1ULL << 59)
+
+/**
+ * Packet outer header is IPv6. This flag must be set when using any
+ * outer offload feature (L4 checksum) to tell the NIC that the outer
+ * header of the tunneled packet is an IPv6 packet.
+ */
+#define PKT_TX_OUTER_IPV6 (1ULL << 60)
+
+#define IND_ATTACHED_MBUF (1ULL << 62) /**< Indirect attached mbuf */
/* Use final bit of flags to indicate a control mbuf */
#define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */
/**
- * Bit Mask to indicate what bits required for building TX context
+ * Get the name of a RX offload flag
+ *
+ * @param mask
+ * The mask describing the flag.
+ * @return
+ * The name of this flag, or NULL if it's not a valid RX flag.
+ */
+const char *rte_get_rx_ol_flag_name(uint64_t mask);
+
+/**
+ * Get the name of a TX offload flag
+ *
+ * @param mask
+ * The mask describing the flag. Usually only one bit must be set.
+ * Several bits can be given if they belong to the same mask.
+ * Ex: PKT_TX_L4_MASK.
+ * @return
+ * The name of this flag, or NULL if it's not a valid TX flag.
*/
-#define PKT_TX_OFFLOAD_MASK (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK)
+const char *rte_get_tx_ol_flag_name(uint64_t mask);
/* define a set of marker types that can be used to refer to set points in the
* mbuf */
typedef void *MARKER[0]; /**< generic marker for a point in a structure */
+typedef uint8_t MARKER8[0]; /**< generic marker with 1B alignment */
typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes
* with a single assignment */
+
/**
* The generic rte_mbuf, containing a packet mbuf.
*/
void *buf_addr; /**< Virtual address of segment buffer. */
phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */
- /* next 8 bytes are initialised on RX descriptor rearm */
- MARKER64 rearm_data;
uint16_t buf_len; /**< Length of segment buffer. */
+
+ /* next 6 bytes are initialised on RX descriptor rearm */
+ MARKER8 rearm_data;
uint16_t data_off;
/**
* config option.
*/
union {
-#ifdef RTE_MBUF_REFCNT
rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
uint16_t refcnt; /**< Non-atomically accessed refcnt */
-#endif
- uint16_t refcnt_reserved; /**< Do not use this field */
};
uint8_t nb_segs; /**< Number of segments. */
uint8_t port; /**< Input port. */
/* remaining bytes are set on RX when pulling packet from descriptor */
MARKER rx_descriptor_fields1;
- uint16_t reserved2; /**< Unused field. Required for padding */
+
+ /**
+ * The packet type, which is used to indicate ordinary packet and also
+ * tunneled packet format, i.e. each number is represented a type of
+ * packet.
+ */
+ uint16_t packet_type;
+
uint16_t data_len; /**< Amount of data in segment buffer. */
uint32_t pkt_len; /**< Total pkt len: sum of all segments. */
uint16_t vlan_tci; /**< VLAN Tag Control Identifier (CPU order) */
union {
uint32_t rss; /**< RSS hash result if RSS enabled */
struct {
- uint16_t hash;
- uint16_t id;
+ union {
+ struct {
+ uint16_t hash;
+ uint16_t id;
+ };
+ uint32_t lo;
+ /**< Second 4 flexible bytes */
+ };
+ uint32_t hi;
+ /**< First 4 flexible bytes or FD ID, dependent on
+ PKT_RX_FDIR_* flag in ol_flags. */
} fdir; /**< Filter identifier if FDIR enabled */
uint32_t sched; /**< Hierarchical scheduler */
+ uint32_t usr; /**< User defined tags. See @rte_distributor_process */
} hash; /**< hash information */
+ /* sequence number - field used in distributor and reorder library */
+ uint32_t seqn;
+
/* second cache line - fields only used in slow path or on TX */
MARKER cacheline1 __rte_cache_aligned;
/* fields to support TX offloads */
union {
- uint16_t l2_l3_len; /**< combined l2/l3 lengths as single var */
+ uint64_t tx_offload; /**< combined for easy fetch */
struct {
- uint16_t l3_len:9; /**< L3 (IP) Header Length. */
- uint16_t l2_len:7; /**< L2 (MAC) Header Length. */
+ uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+ uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+ uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+ uint64_t tso_segsz:16; /**< TCP TSO segment size */
+
+ /* fields for TX offloading of tunnels */
+ uint64_t outer_l3_len:9; /**< Outer L3 (IP) Hdr Length. */
+ uint64_t outer_l2_len:7; /**< Outer L2 (MAC) Hdr Length. */
+
+ /* uint64_t unused:8; */
};
};
} __rte_cache_aligned;
/**
* Returns TRUE if given mbuf is indirect, or FALSE otherwise.
*/
-#define RTE_MBUF_INDIRECT(mb) (RTE_MBUF_FROM_BADDR((mb)->buf_addr) != (mb))
+#define RTE_MBUF_INDIRECT(mb) ((mb)->ol_flags & IND_ATTACHED_MBUF)
/**
* Returns TRUE if given mbuf is direct, or FALSE otherwise.
*/
-#define RTE_MBUF_DIRECT(mb) (RTE_MBUF_FROM_BADDR((mb)->buf_addr) == (mb))
-
+#define RTE_MBUF_DIRECT(mb) (!RTE_MBUF_INDIRECT(mb))
/**
* Private data in case of pktmbuf pool.
#endif /* RTE_LIBRTE_MBUF_DEBUG */
-#ifdef RTE_MBUF_REFCNT
#ifdef RTE_MBUF_REFCNT_ATOMIC
/**
rte_prefetch0(m); \
} while (0)
-#else /* ! RTE_MBUF_REFCNT */
-
-/** Mbuf prefetch */
-#define RTE_MBUF_PREFETCH_TO_FREE(m) do { } while(0)
-
-#define rte_mbuf_refcnt_set(m,v) do { } while(0)
-
-#endif /* RTE_MBUF_REFCNT */
-
/**
* Sanity checks on an mbuf.
if (rte_mempool_get(mp, &mb) < 0)
return NULL;
m = (struct rte_mbuf *)mb;
-#ifdef RTE_MBUF_REFCNT
RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(m) == 0);
rte_mbuf_refcnt_set(m, 1);
-#endif /* RTE_MBUF_REFCNT */
return (m);
}
static inline void __attribute__((always_inline))
__rte_mbuf_raw_free(struct rte_mbuf *m)
{
-#ifdef RTE_MBUF_REFCNT
RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(m) == 0);
-#endif /* RTE_MBUF_REFCNT */
rte_mempool_put(m->pool, m);
}
{
m->next = NULL;
m->pkt_len = 0;
- m->l2_l3_len = 0;
+ m->tx_offload = 0;
m->vlan_tci = 0;
m->nb_segs = 1;
m->port = 0xff;
m->ol_flags = 0;
+ m->packet_type = 0;
m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?
RTE_PKTMBUF_HEADROOM : m->buf_len;
return (m);
}
-#ifdef RTE_MBUF_REFCNT
-
/**
* Attach packet mbuf to another packet mbuf.
* After attachment we refer the mbuf we attached as 'indirect',
mi->data_len = md->data_len;
mi->port = md->port;
mi->vlan_tci = md->vlan_tci;
- mi->l2_l3_len = md->l2_l3_len;
+ mi->tx_offload = md->tx_offload;
mi->hash = md->hash;
mi->next = NULL;
mi->pkt_len = mi->data_len;
mi->nb_segs = 1;
- mi->ol_flags = md->ol_flags;
+ mi->ol_flags = md->ol_flags | IND_ATTACHED_MBUF;
+ mi->packet_type = md->packet_type;
__rte_mbuf_sanity_check(mi, 1);
__rte_mbuf_sanity_check(md, 0);
RTE_PKTMBUF_HEADROOM : m->buf_len;
m->data_len = 0;
-}
-
-#endif /* RTE_MBUF_REFCNT */
+ m->ol_flags = 0;
+}
static inline struct rte_mbuf* __attribute__((always_inline))
__rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
{
__rte_mbuf_sanity_check(m, 0);
-#ifdef RTE_MBUF_REFCNT
if (likely (rte_mbuf_refcnt_read(m) == 1) ||
likely (rte_mbuf_refcnt_update(m, -1) == 0)) {
- struct rte_mbuf *md = RTE_MBUF_FROM_BADDR(m->buf_addr);
rte_mbuf_refcnt_set(m, 0);
* - detach mbuf
* - free attached mbuf segment
*/
- if (unlikely (md != m)) {
+ if (RTE_MBUF_INDIRECT(m)) {
+ struct rte_mbuf *md = RTE_MBUF_FROM_BADDR(m->buf_addr);
rte_pktmbuf_detach(m);
if (rte_mbuf_refcnt_update(md, -1) == 0)
__rte_mbuf_raw_free(md);
}
-#endif
return(m);
-#ifdef RTE_MBUF_REFCNT
}
return (NULL);
-#endif
}
/**
}
}
-#ifdef RTE_MBUF_REFCNT
-
/**
* Creates a "clone" of the given packet mbuf.
*
} while ((m = m->next) != NULL);
}
-#endif /* RTE_MBUF_REFCNT */
-
/**
* Get the headroom in a packet mbuf.
*