Currently, enic completely ignores the requested max Rx packet size
(rxmode.max_rx_pkt_len). The desired behavior is that the NIC hardware
drops packets larger than the requested size, even though they are
still smaller than MTU.
Cisco VIC does not have such a feature. But, we can accomplish a
similar (not same) effect by reducing the size of posted receive
buffers. Packets larger than the posted size get truncated, and the
receive handler drops them. This is also how the kernel enic driver
enforces the Rx side MTU.
This workaround works only when scatter mode is *not* used. When
scatter is used, there is currently no way to support
rxmode.max_rx_pkt_len, as the NIC always receives packets up to MTU.
For posterity, add a copious amount of comments regarding the
hardware's drop/receive behavior with respect to max/current MTU.
Signed-off-by: Hyong Youb Kim <hyonkim@cisco.com>
Reviewed-by: John Daley <johndale@cisco.com>
- Setting of extended VLAN
- UDP RSS hashing
- MTU update only works if Scattered Rx mode is disabled
- Setting of extended VLAN
- UDP RSS hashing
- MTU update only works if Scattered Rx mode is disabled
+- Maximum receive packet length is ignored if Scattered Rx mode is used
Prerequisites
-------------
Prerequisites
-------------
union vnic_rss_cpu rss_cpu;
};
union vnic_rss_cpu rss_cpu;
};
+/* Compute ethdev's max packet size from MTU */
+static inline uint32_t enic_mtu_to_max_rx_pktlen(uint32_t mtu)
+{
+ /* ethdev max size includes eth and crc whereas NIC MTU does not */
+ return mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
+}
+
/* Get the CQ index from a Start of Packet(SOP) RQ index */
static inline unsigned int enic_sop_rq_idx_to_cq_idx(unsigned int sop_idx)
{
/* Get the CQ index from a Start of Packet(SOP) RQ index */
static inline unsigned int enic_sop_rq_idx_to_cq_idx(unsigned int sop_idx)
{
device_info->max_rx_queues = enic->conf_rq_count / 2;
device_info->max_tx_queues = enic->conf_wq_count;
device_info->min_rx_bufsize = ENIC_MIN_MTU;
device_info->max_rx_queues = enic->conf_rq_count / 2;
device_info->max_tx_queues = enic->conf_wq_count;
device_info->min_rx_bufsize = ENIC_MIN_MTU;
- device_info->max_rx_pktlen = enic->max_mtu + ETHER_HDR_LEN + 4;
+ /* "Max" mtu is not a typo. HW receives packet sizes up to the
+ * max mtu regardless of the current mtu (vNIC's mtu). vNIC mtu is
+ * a hint to the driver to size receive buffers accordingly so that
+ * larger-than-vnic-mtu packets get truncated.. For DPDK, we let
+ * the user decide the buffer size via rxmode.max_rx_pkt_len, basically
+ * ignoring vNIC mtu.
+ */
+ device_info->max_rx_pktlen = enic_mtu_to_max_rx_pktlen(enic->max_mtu);
device_info->max_mac_addrs = ENIC_MAX_MAC_ADDR;
device_info->rx_offload_capa =
DEV_RX_OFFLOAD_VLAN_STRIP |
device_info->max_mac_addrs = ENIC_MAX_MAC_ADDR;
device_info->rx_offload_capa =
DEV_RX_OFFLOAD_VLAN_STRIP |
struct rq_enet_desc *rqd = rq->ring.descs;
unsigned i;
dma_addr_t dma_addr;
struct rq_enet_desc *rqd = rq->ring.descs;
unsigned i;
dma_addr_t dma_addr;
+ uint32_t max_rx_pkt_len;
+ uint16_t rq_buf_len;
if (!rq->in_use)
return 0;
if (!rq->in_use)
return 0;
dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
rq->ring.desc_count);
dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
rq->ring.desc_count);
+ /*
+ * If *not* using scatter and the mbuf size is smaller than the
+ * requested max packet size (max_rx_pkt_len), then reduce the
+ * posted buffer size to max_rx_pkt_len. HW still receives packets
+ * larger than max_rx_pkt_len, but they will be truncated, which we
+ * drop in the rx handler. Not ideal, but better than returning
+ * large packets when the user is not expecting them.
+ */
+ max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
+ rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
+ if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
+ rq_buf_len = max_rx_pkt_len;
for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
mb = rte_mbuf_raw_alloc(rq->mp);
if (mb == NULL) {
for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
mb = rte_mbuf_raw_alloc(rq->mp);
if (mb == NULL) {
rq_enet_desc_enc(rqd, dma_addr,
(rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
: RQ_ENET_TYPE_NOT_SOP),
rq_enet_desc_enc(rqd, dma_addr,
(rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
: RQ_ENET_TYPE_NOT_SOP),
- mb->buf_len - RTE_PKTMBUF_HEADROOM);
unsigned int mbuf_size, mbufs_per_pkt;
unsigned int nb_sop_desc, nb_data_desc;
uint16_t min_sop, max_sop, min_data, max_data;
unsigned int mbuf_size, mbufs_per_pkt;
unsigned int nb_sop_desc, nb_data_desc;
uint16_t min_sop, max_sop, min_data, max_data;
- uint16_t mtu = enic->rte_dev->data->mtu;
+ uint32_t max_rx_pkt_len;
rq_sop->is_sop = 1;
rq_sop->data_queue_idx = data_queue_idx;
rq_sop->is_sop = 1;
rq_sop->data_queue_idx = data_queue_idx;
mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
RTE_PKTMBUF_HEADROOM);
mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
RTE_PKTMBUF_HEADROOM);
+ /* max_rx_pkt_len includes the ethernet header and CRC. */
+ max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
if (enic->rte_dev->data->dev_conf.rxmode.offloads &
DEV_RX_OFFLOAD_SCATTER) {
dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
if (enic->rte_dev->data->dev_conf.rxmode.offloads &
DEV_RX_OFFLOAD_SCATTER) {
dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
- /* ceil((mtu + ETHER_HDR_LEN + 4)/mbuf_size) */
- mbufs_per_pkt = ((mtu + ETHER_HDR_LEN + 4) +
- (mbuf_size - 1)) / mbuf_size;
+ /* ceil((max pkt len)/mbuf_size) */
+ mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
} else {
dev_info(enic, "Scatter rx mode disabled\n");
mbufs_per_pkt = 1;
} else {
dev_info(enic, "Scatter rx mode disabled\n");
mbufs_per_pkt = 1;
+ if (max_rx_pkt_len > mbuf_size) {
+ dev_warning(enic, "The maximum Rx packet size (%u) is"
+ " larger than the mbuf size (%u), and"
+ " scatter is disabled. Larger packets will"
+ " be truncated.\n",
+ max_rx_pkt_len, mbuf_size);
+ }
}
if (mbufs_per_pkt > 1) {
dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
rq_sop->data_queue_enable = 1;
rq_data->in_use = 1;
}
if (mbufs_per_pkt > 1) {
dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
rq_sop->data_queue_enable = 1;
rq_data->in_use = 1;
+ /*
+ * HW does not directly support rxmode.max_rx_pkt_len. HW always
+ * receives packet sizes up to the "max" MTU.
+ * If not using scatter, we can achieve the effect of dropping
+ * larger packets by reducing the size of posted buffers.
+ * See enic_alloc_rx_queue_mbufs().
+ */
+ if (max_rx_pkt_len <
+ enic_mtu_to_max_rx_pktlen(enic->rte_dev->data->mtu)) {
+ dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
+ " when scatter rx mode is in use.\n");
+ }
} else {
dev_info(enic, "Rq %u Scatter rx mode not being used\n",
queue_idx);
} else {
dev_info(enic, "Rq %u Scatter rx mode not being used\n",
queue_idx);
nb_data_desc = max_data;
}
if (mbufs_per_pkt > 1) {
nb_data_desc = max_data;
}
if (mbufs_per_pkt > 1) {
- dev_info(enic, "For mtu %d and mbuf size %d valid rx descriptor range is %d to %d\n",
- mtu, mbuf_size, min_sop + min_data,
+ dev_info(enic, "For max packet size %u and mbuf size %u valid"
+ " rx descriptor range is %u to %u\n",
+ max_rx_pkt_len, mbuf_size, min_sop + min_data,
max_sop + max_data);
}
dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
max_sop + max_data);
}
dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",