1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
9 #include <rte_ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
13 #include <mlx5_malloc.h>
17 #include "mlx5_rxtx.h"
18 #include "mlx5_utils.h"
19 #include "rte_pmd_mlx5.h"
22 * Stop traffic on Tx queues.
25 * Pointer to Ethernet device structure.
28 mlx5_txq_stop(struct rte_eth_dev *dev)
30 struct mlx5_priv *priv = dev->data->dev_private;
33 for (i = 0; i != priv->txqs_n; ++i)
34 mlx5_txq_release(dev, i);
38 * Start traffic on Tx queues.
41 * Pointer to Ethernet device structure.
44 * 0 on success, a negative errno value otherwise and rte_errno is set.
47 mlx5_txq_start(struct rte_eth_dev *dev)
49 struct mlx5_priv *priv = dev->data->dev_private;
53 for (i = 0; i != priv->txqs_n; ++i) {
54 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
55 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
56 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
60 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
61 txq_alloc_elts(txq_ctrl);
62 MLX5_ASSERT(!txq_ctrl->obj);
63 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
67 "memory resources.", dev->data->port_id,
72 ret = priv->obj_ops.txq_obj_new(dev, i);
74 mlx5_free(txq_ctrl->obj);
78 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
79 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
80 txq_data->fcqs = mlx5_malloc(flags, size,
83 if (!txq_data->fcqs) {
84 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
85 "allocate memory (FCQ).",
86 dev->data->port_id, i);
91 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
92 dev->data->port_id, i, (void *)&txq_ctrl->obj);
93 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97 ret = rte_errno; /* Save rte_errno before cleanup. */
99 mlx5_txq_release(dev, i);
101 rte_errno = ret; /* Restore rte_errno. */
106 * Stop traffic on Rx queues.
109 * Pointer to Ethernet device structure.
112 mlx5_rxq_stop(struct rte_eth_dev *dev)
114 struct mlx5_priv *priv = dev->data->dev_private;
117 for (i = 0; i != priv->rxqs_n; ++i)
118 mlx5_rxq_release(dev, i);
122 * Start traffic on Rx queues.
125 * Pointer to Ethernet device structure.
128 * 0 on success, a negative errno value otherwise and rte_errno is set.
131 mlx5_rxq_start(struct rte_eth_dev *dev)
133 struct mlx5_priv *priv = dev->data->dev_private;
137 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
138 if (mlx5_mprq_alloc_mp(dev)) {
139 /* Should not release Rx queues but return immediately. */
142 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
143 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
144 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
145 dev->data->port_id, priv->sh->device_attr.max_sge);
146 for (i = 0; i != priv->rxqs_n; ++i) {
147 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
148 struct rte_mempool *mp;
152 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
153 /* Pre-register Rx mempool. */
154 mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
155 rxq_ctrl->rxq.mprq_mp : rxq_ctrl->rxq.mp;
156 DRV_LOG(DEBUG, "Port %u Rx queue %u registering mp %s"
157 " having %u chunks.", dev->data->port_id,
158 rxq_ctrl->rxq.idx, mp->name, mp->nb_mem_chunks);
159 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp);
160 ret = rxq_alloc_elts(rxq_ctrl);
164 MLX5_ASSERT(!rxq_ctrl->obj);
165 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
166 sizeof(*rxq_ctrl->obj), 0,
168 if (!rxq_ctrl->obj) {
170 "Port %u Rx queue %u can't allocate resources.",
171 dev->data->port_id, (*priv->rxqs)[i]->idx);
175 ret = priv->obj_ops.rxq_obj_new(dev, i);
177 mlx5_free(rxq_ctrl->obj);
180 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
181 dev->data->port_id, i, (void *)&rxq_ctrl->obj);
182 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
186 ret = rte_errno; /* Save rte_errno before cleanup. */
188 mlx5_rxq_release(dev, i);
190 rte_errno = ret; /* Restore rte_errno. */
195 * Binds Tx queues to Rx queues for hairpin.
197 * Binds Tx queues to the target Rx queues.
200 * Pointer to Ethernet device structure.
203 * 0 on success, a negative errno value otherwise and rte_errno is set.
206 mlx5_hairpin_bind(struct rte_eth_dev *dev)
208 struct mlx5_priv *priv = dev->data->dev_private;
209 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
210 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
211 struct mlx5_txq_ctrl *txq_ctrl;
212 struct mlx5_rxq_ctrl *rxq_ctrl;
213 struct mlx5_devx_obj *sq;
214 struct mlx5_devx_obj *rq;
218 for (i = 0; i != priv->txqs_n; ++i) {
219 txq_ctrl = mlx5_txq_get(dev, i);
222 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
223 mlx5_txq_release(dev, i);
226 if (!txq_ctrl->obj) {
228 DRV_LOG(ERR, "port %u no txq object found: %d",
229 dev->data->port_id, i);
230 mlx5_txq_release(dev, i);
233 sq = txq_ctrl->obj->sq;
234 rxq_ctrl = mlx5_rxq_get(dev,
235 txq_ctrl->hairpin_conf.peers[0].queue);
237 mlx5_txq_release(dev, i);
239 DRV_LOG(ERR, "port %u no rxq object found: %d",
241 txq_ctrl->hairpin_conf.peers[0].queue);
244 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
245 rxq_ctrl->hairpin_conf.peers[0].queue != i) {
247 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
248 "Rx queue %d", dev->data->port_id,
249 i, txq_ctrl->hairpin_conf.peers[0].queue);
252 rq = rxq_ctrl->obj->rq;
255 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
257 txq_ctrl->hairpin_conf.peers[0].queue);
260 sq_attr.state = MLX5_SQC_STATE_RDY;
261 sq_attr.sq_state = MLX5_SQC_STATE_RST;
262 sq_attr.hairpin_peer_rq = rq->id;
263 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
264 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
267 rq_attr.state = MLX5_SQC_STATE_RDY;
268 rq_attr.rq_state = MLX5_SQC_STATE_RST;
269 rq_attr.hairpin_peer_sq = sq->id;
270 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
271 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
274 mlx5_txq_release(dev, i);
275 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
279 mlx5_txq_release(dev, i);
280 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
285 * DPDK callback to start the device.
287 * Simulate device start by attaching all configured flows.
290 * Pointer to Ethernet device structure.
293 * 0 on success, a negative errno value otherwise and rte_errno is set.
296 mlx5_dev_start(struct rte_eth_dev *dev)
298 struct mlx5_priv *priv = dev->data->dev_private;
302 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
303 fine_inline = rte_mbuf_dynflag_lookup
304 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
306 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
308 rte_net_mlx5_dynf_inline_mask = 0;
309 if (dev->data->nb_rx_queues > 0) {
310 ret = mlx5_dev_configure_rss_reta(dev);
312 DRV_LOG(ERR, "port %u reta config failed: %s",
313 dev->data->port_id, strerror(rte_errno));
317 ret = mlx5_txpp_start(dev);
319 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
320 dev->data->port_id, strerror(rte_errno));
323 ret = mlx5_txq_start(dev);
325 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
326 dev->data->port_id, strerror(rte_errno));
329 ret = mlx5_rxq_start(dev);
331 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
332 dev->data->port_id, strerror(rte_errno));
335 ret = mlx5_hairpin_bind(dev);
337 DRV_LOG(ERR, "port %u hairpin binding failed: %s",
338 dev->data->port_id, strerror(rte_errno));
341 /* Set started flag here for the following steps like control flow. */
342 dev->data->dev_started = 1;
343 ret = mlx5_rx_intr_vec_enable(dev);
345 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
349 mlx5_os_stats_init(dev);
350 ret = mlx5_traffic_enable(dev);
352 DRV_LOG(ERR, "port %u failed to set defaults flows",
356 /* Set a mask and offset of dynamic metadata flows into Rx queues. */
357 mlx5_flow_rxq_dynf_metadata_set(dev);
358 /* Set flags and context to convert Rx timestamps. */
359 mlx5_rxq_timestamp_set(dev);
360 /* Set a mask and offset of scheduling on timestamp into Tx queues. */
361 mlx5_txq_dynf_timestamp_set(dev);
363 * In non-cached mode, it only needs to start the default mreg copy
364 * action and no flow created by application exists anymore.
365 * But it is worth wrapping the interface for further usage.
367 ret = mlx5_flow_start_default(dev);
369 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
370 dev->data->port_id, strerror(rte_errno));
374 dev->tx_pkt_burst = mlx5_select_tx_function(dev);
375 dev->rx_pkt_burst = mlx5_select_rx_function(dev);
376 /* Enable datapath on secondary process. */
377 mlx5_mp_os_req_start_rxtx(dev);
378 if (priv->sh->intr_handle.fd >= 0) {
379 priv->sh->port[priv->dev_port - 1].ih_port_id =
380 (uint32_t)dev->data->port_id;
382 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
384 dev->data->dev_conf.intr_conf.lsc = 0;
385 dev->data->dev_conf.intr_conf.rmv = 0;
387 if (priv->sh->intr_handle_devx.fd >= 0)
388 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
389 (uint32_t)dev->data->port_id;
392 ret = rte_errno; /* Save rte_errno before cleanup. */
394 dev->data->dev_started = 0;
395 mlx5_flow_stop_default(dev);
396 mlx5_traffic_disable(dev);
399 mlx5_txpp_stop(dev); /* Stop last. */
400 rte_errno = ret; /* Restore rte_errno. */
405 * DPDK callback to stop the device.
407 * Simulate device stop by detaching all configured flows.
410 * Pointer to Ethernet device structure.
413 mlx5_dev_stop(struct rte_eth_dev *dev)
415 struct mlx5_priv *priv = dev->data->dev_private;
417 dev->data->dev_started = 0;
418 /* Prevent crashes when queues are still in use. */
419 dev->rx_pkt_burst = removed_rx_burst;
420 dev->tx_pkt_burst = removed_tx_burst;
422 /* Disable datapath on secondary process. */
423 mlx5_mp_os_req_stop_rxtx(dev);
424 usleep(1000 * priv->rxqs_n);
425 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
426 mlx5_flow_stop_default(dev);
427 /* Control flows for default traffic can be removed firstly. */
428 mlx5_traffic_disable(dev);
429 /* All RX queue flags will be cleared in the flush interface. */
430 mlx5_flow_list_flush(dev, &priv->flows, true);
431 mlx5_rx_intr_vec_disable(dev);
432 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
433 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
442 * Enable traffic flows configured by control plane
445 * Pointer to Ethernet device private data.
447 * Pointer to Ethernet device structure.
450 * 0 on success, a negative errno value otherwise and rte_errno is set.
453 mlx5_traffic_enable(struct rte_eth_dev *dev)
455 struct mlx5_priv *priv = dev->data->dev_private;
456 struct rte_flow_item_eth bcast = {
457 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
459 struct rte_flow_item_eth ipv6_multi_spec = {
460 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
462 struct rte_flow_item_eth ipv6_multi_mask = {
463 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
465 struct rte_flow_item_eth unicast = {
466 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
468 struct rte_flow_item_eth unicast_mask = {
469 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
471 const unsigned int vlan_filter_n = priv->vlan_filter_n;
472 const struct rte_ether_addr cmp = {
473 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
480 * Hairpin txq default flow should be created no matter if it is
481 * isolation mode. Or else all the packets to be sent will be sent
482 * out directly without the TX flow actions, e.g. encapsulation.
484 for (i = 0; i != priv->txqs_n; ++i) {
485 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
488 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) {
489 ret = mlx5_ctrl_flow_source_queue(dev, i);
491 mlx5_txq_release(dev, i);
495 mlx5_txq_release(dev, i);
497 if (priv->config.dv_esw_en && !priv->config.vf) {
498 if (mlx5_flow_create_esw_table_zero_flow(dev))
499 priv->fdb_def_rule = 1;
501 DRV_LOG(INFO, "port %u FDB default rule cannot be"
502 " configured - only Eswitch group 0 flows are"
503 " supported.", dev->data->port_id);
505 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
506 ret = mlx5_flow_lacp_miss(dev);
508 DRV_LOG(INFO, "port %u LACP rule cannot be created - "
509 "forward LACP to kernel.", dev->data->port_id);
511 DRV_LOG(INFO, "LACP traffic will be missed in port %u."
512 , dev->data->port_id);
516 if (dev->data->promiscuous) {
517 struct rte_flow_item_eth promisc = {
518 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
519 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
523 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
527 if (dev->data->all_multicast) {
528 struct rte_flow_item_eth multicast = {
529 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
530 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
534 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
538 /* Add broadcast/multicast flows. */
539 for (i = 0; i != vlan_filter_n; ++i) {
540 uint16_t vlan = priv->vlan_filter[i];
542 struct rte_flow_item_vlan vlan_spec = {
543 .tci = rte_cpu_to_be_16(vlan),
545 struct rte_flow_item_vlan vlan_mask =
546 rte_flow_item_vlan_mask;
548 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
549 &vlan_spec, &vlan_mask);
552 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
554 &vlan_spec, &vlan_mask);
558 if (!vlan_filter_n) {
559 ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
562 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
568 /* Add MAC address flows. */
569 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
570 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
572 if (!memcmp(mac, &cmp, sizeof(*mac)))
574 memcpy(&unicast.dst.addr_bytes,
577 for (j = 0; j != vlan_filter_n; ++j) {
578 uint16_t vlan = priv->vlan_filter[j];
580 struct rte_flow_item_vlan vlan_spec = {
581 .tci = rte_cpu_to_be_16(vlan),
583 struct rte_flow_item_vlan vlan_mask =
584 rte_flow_item_vlan_mask;
586 ret = mlx5_ctrl_flow_vlan(dev, &unicast,
593 if (!vlan_filter_n) {
594 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
601 ret = rte_errno; /* Save rte_errno before cleanup. */
602 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
603 rte_errno = ret; /* Restore rte_errno. */
609 * Disable traffic flows configured by control plane
612 * Pointer to Ethernet device private data.
615 mlx5_traffic_disable(struct rte_eth_dev *dev)
617 struct mlx5_priv *priv = dev->data->dev_private;
619 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
623 * Restart traffic flows configured by control plane
626 * Pointer to Ethernet device private data.
629 * 0 on success, a negative errno value otherwise and rte_errno is set.
632 mlx5_traffic_restart(struct rte_eth_dev *dev)
634 if (dev->data->dev_started) {
635 mlx5_traffic_disable(dev);
636 return mlx5_traffic_enable(dev);