1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
9 #include <rte_ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
13 #include <mlx5_malloc.h>
17 #include "mlx5_rxtx.h"
18 #include "mlx5_utils.h"
19 #include "rte_pmd_mlx5.h"
22 * Stop traffic on Tx queues.
25 * Pointer to Ethernet device structure.
28 mlx5_txq_stop(struct rte_eth_dev *dev)
30 struct mlx5_priv *priv = dev->data->dev_private;
33 for (i = 0; i != priv->txqs_n; ++i)
34 mlx5_txq_release(dev, i);
38 * Start traffic on Tx queues.
41 * Pointer to Ethernet device structure.
44 * 0 on success, a negative errno value otherwise and rte_errno is set.
47 mlx5_txq_start(struct rte_eth_dev *dev)
49 struct mlx5_priv *priv = dev->data->dev_private;
53 for (i = 0; i != priv->txqs_n; ++i) {
54 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
55 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
56 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
60 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
61 txq_alloc_elts(txq_ctrl);
62 MLX5_ASSERT(!txq_ctrl->obj);
63 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
67 "memory resources.", dev->data->port_id,
72 ret = priv->obj_ops.txq_obj_new(dev, i);
74 mlx5_free(txq_ctrl->obj);
78 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
79 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
80 txq_data->fcqs = mlx5_malloc(flags, size,
83 if (!txq_data->fcqs) {
84 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
85 "allocate memory (FCQ).",
86 dev->data->port_id, i);
91 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
92 dev->data->port_id, i, (void *)&txq_ctrl->obj);
93 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97 ret = rte_errno; /* Save rte_errno before cleanup. */
99 mlx5_txq_release(dev, i);
101 rte_errno = ret; /* Restore rte_errno. */
106 * Stop traffic on Rx queues.
109 * Pointer to Ethernet device structure.
112 mlx5_rxq_stop(struct rte_eth_dev *dev)
114 struct mlx5_priv *priv = dev->data->dev_private;
117 for (i = 0; i != priv->rxqs_n; ++i)
118 mlx5_rxq_release(dev, i);
122 * Start traffic on Rx queues.
125 * Pointer to Ethernet device structure.
128 * 0 on success, a negative errno value otherwise and rte_errno is set.
131 mlx5_rxq_start(struct rte_eth_dev *dev)
133 struct mlx5_priv *priv = dev->data->dev_private;
137 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
138 if (mlx5_mprq_alloc_mp(dev)) {
139 /* Should not release Rx queues but return immediately. */
142 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
143 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
144 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
145 dev->data->port_id, priv->sh->device_attr.max_sge);
146 for (i = 0; i != priv->rxqs_n; ++i) {
147 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
151 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
152 /* Pre-register Rx mempools. */
153 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
154 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl,
155 rxq_ctrl->rxq.mprq_mp);
159 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++)
161 (dev, &rxq_ctrl->rxq.mr_ctrl,
162 rxq_ctrl->rxq.rxseg[s].mp);
164 ret = rxq_alloc_elts(rxq_ctrl);
168 MLX5_ASSERT(!rxq_ctrl->obj);
169 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
170 sizeof(*rxq_ctrl->obj), 0,
172 if (!rxq_ctrl->obj) {
174 "Port %u Rx queue %u can't allocate resources.",
175 dev->data->port_id, (*priv->rxqs)[i]->idx);
179 ret = priv->obj_ops.rxq_obj_new(dev, i);
181 mlx5_free(rxq_ctrl->obj);
184 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
185 dev->data->port_id, i, (void *)&rxq_ctrl->obj);
186 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
190 ret = rte_errno; /* Save rte_errno before cleanup. */
192 mlx5_rxq_release(dev, i);
194 rte_errno = ret; /* Restore rte_errno. */
199 * Binds Tx queues to Rx queues for hairpin.
201 * Binds Tx queues to the target Rx queues.
204 * Pointer to Ethernet device structure.
207 * 0 on success, a negative errno value otherwise and rte_errno is set.
210 mlx5_hairpin_bind(struct rte_eth_dev *dev)
212 struct mlx5_priv *priv = dev->data->dev_private;
213 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
214 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
215 struct mlx5_txq_ctrl *txq_ctrl;
216 struct mlx5_rxq_ctrl *rxq_ctrl;
217 struct mlx5_devx_obj *sq;
218 struct mlx5_devx_obj *rq;
222 for (i = 0; i != priv->txqs_n; ++i) {
223 txq_ctrl = mlx5_txq_get(dev, i);
226 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
227 mlx5_txq_release(dev, i);
230 if (!txq_ctrl->obj) {
232 DRV_LOG(ERR, "port %u no txq object found: %d",
233 dev->data->port_id, i);
234 mlx5_txq_release(dev, i);
237 sq = txq_ctrl->obj->sq;
238 rxq_ctrl = mlx5_rxq_get(dev,
239 txq_ctrl->hairpin_conf.peers[0].queue);
241 mlx5_txq_release(dev, i);
243 DRV_LOG(ERR, "port %u no rxq object found: %d",
245 txq_ctrl->hairpin_conf.peers[0].queue);
248 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
249 rxq_ctrl->hairpin_conf.peers[0].queue != i) {
251 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
252 "Rx queue %d", dev->data->port_id,
253 i, txq_ctrl->hairpin_conf.peers[0].queue);
256 rq = rxq_ctrl->obj->rq;
259 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
261 txq_ctrl->hairpin_conf.peers[0].queue);
264 sq_attr.state = MLX5_SQC_STATE_RDY;
265 sq_attr.sq_state = MLX5_SQC_STATE_RST;
266 sq_attr.hairpin_peer_rq = rq->id;
267 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
268 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
271 rq_attr.state = MLX5_SQC_STATE_RDY;
272 rq_attr.rq_state = MLX5_SQC_STATE_RST;
273 rq_attr.hairpin_peer_sq = sq->id;
274 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
275 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
278 mlx5_txq_release(dev, i);
279 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
283 mlx5_txq_release(dev, i);
284 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
289 * DPDK callback to start the device.
291 * Simulate device start by attaching all configured flows.
294 * Pointer to Ethernet device structure.
297 * 0 on success, a negative errno value otherwise and rte_errno is set.
300 mlx5_dev_start(struct rte_eth_dev *dev)
302 struct mlx5_priv *priv = dev->data->dev_private;
306 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
307 fine_inline = rte_mbuf_dynflag_lookup
308 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
309 if (fine_inline >= 0)
310 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
312 rte_net_mlx5_dynf_inline_mask = 0;
313 if (dev->data->nb_rx_queues > 0) {
314 ret = mlx5_dev_configure_rss_reta(dev);
316 DRV_LOG(ERR, "port %u reta config failed: %s",
317 dev->data->port_id, strerror(rte_errno));
321 ret = mlx5_txpp_start(dev);
323 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
324 dev->data->port_id, strerror(rte_errno));
327 ret = mlx5_txq_start(dev);
329 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
330 dev->data->port_id, strerror(rte_errno));
333 ret = mlx5_rxq_start(dev);
335 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
336 dev->data->port_id, strerror(rte_errno));
339 ret = mlx5_hairpin_bind(dev);
341 DRV_LOG(ERR, "port %u hairpin binding failed: %s",
342 dev->data->port_id, strerror(rte_errno));
345 /* Set started flag here for the following steps like control flow. */
346 dev->data->dev_started = 1;
347 ret = mlx5_rx_intr_vec_enable(dev);
349 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
353 mlx5_os_stats_init(dev);
354 ret = mlx5_traffic_enable(dev);
356 DRV_LOG(ERR, "port %u failed to set defaults flows",
360 /* Set a mask and offset of dynamic metadata flows into Rx queues. */
361 mlx5_flow_rxq_dynf_metadata_set(dev);
362 /* Set flags and context to convert Rx timestamps. */
363 mlx5_rxq_timestamp_set(dev);
364 /* Set a mask and offset of scheduling on timestamp into Tx queues. */
365 mlx5_txq_dynf_timestamp_set(dev);
367 * In non-cached mode, it only needs to start the default mreg copy
368 * action and no flow created by application exists anymore.
369 * But it is worth wrapping the interface for further usage.
371 ret = mlx5_flow_start_default(dev);
373 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
374 dev->data->port_id, strerror(rte_errno));
378 dev->tx_pkt_burst = mlx5_select_tx_function(dev);
379 dev->rx_pkt_burst = mlx5_select_rx_function(dev);
380 /* Enable datapath on secondary process. */
381 mlx5_mp_os_req_start_rxtx(dev);
382 if (priv->sh->intr_handle.fd >= 0) {
383 priv->sh->port[priv->dev_port - 1].ih_port_id =
384 (uint32_t)dev->data->port_id;
386 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
388 dev->data->dev_conf.intr_conf.lsc = 0;
389 dev->data->dev_conf.intr_conf.rmv = 0;
391 if (priv->sh->intr_handle_devx.fd >= 0)
392 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
393 (uint32_t)dev->data->port_id;
396 ret = rte_errno; /* Save rte_errno before cleanup. */
398 dev->data->dev_started = 0;
399 mlx5_flow_stop_default(dev);
400 mlx5_traffic_disable(dev);
403 mlx5_txpp_stop(dev); /* Stop last. */
404 rte_errno = ret; /* Restore rte_errno. */
409 * DPDK callback to stop the device.
411 * Simulate device stop by detaching all configured flows.
414 * Pointer to Ethernet device structure.
417 mlx5_dev_stop(struct rte_eth_dev *dev)
419 struct mlx5_priv *priv = dev->data->dev_private;
421 dev->data->dev_started = 0;
422 /* Prevent crashes when queues are still in use. */
423 dev->rx_pkt_burst = removed_rx_burst;
424 dev->tx_pkt_burst = removed_tx_burst;
426 /* Disable datapath on secondary process. */
427 mlx5_mp_os_req_stop_rxtx(dev);
428 usleep(1000 * priv->rxqs_n);
429 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
430 mlx5_flow_stop_default(dev);
431 /* Control flows for default traffic can be removed firstly. */
432 mlx5_traffic_disable(dev);
433 /* All RX queue flags will be cleared in the flush interface. */
434 mlx5_flow_list_flush(dev, &priv->flows, true);
435 mlx5_rx_intr_vec_disable(dev);
436 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
437 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
446 * Enable traffic flows configured by control plane
449 * Pointer to Ethernet device private data.
451 * Pointer to Ethernet device structure.
454 * 0 on success, a negative errno value otherwise and rte_errno is set.
457 mlx5_traffic_enable(struct rte_eth_dev *dev)
459 struct mlx5_priv *priv = dev->data->dev_private;
460 struct rte_flow_item_eth bcast = {
461 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
463 struct rte_flow_item_eth ipv6_multi_spec = {
464 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
466 struct rte_flow_item_eth ipv6_multi_mask = {
467 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
469 struct rte_flow_item_eth unicast = {
470 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
472 struct rte_flow_item_eth unicast_mask = {
473 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
475 const unsigned int vlan_filter_n = priv->vlan_filter_n;
476 const struct rte_ether_addr cmp = {
477 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
484 * Hairpin txq default flow should be created no matter if it is
485 * isolation mode. Or else all the packets to be sent will be sent
486 * out directly without the TX flow actions, e.g. encapsulation.
488 for (i = 0; i != priv->txqs_n; ++i) {
489 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
492 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) {
493 ret = mlx5_ctrl_flow_source_queue(dev, i);
495 mlx5_txq_release(dev, i);
499 mlx5_txq_release(dev, i);
501 if (priv->config.dv_esw_en && !priv->config.vf) {
502 if (mlx5_flow_create_esw_table_zero_flow(dev))
503 priv->fdb_def_rule = 1;
505 DRV_LOG(INFO, "port %u FDB default rule cannot be"
506 " configured - only Eswitch group 0 flows are"
507 " supported.", dev->data->port_id);
509 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
510 ret = mlx5_flow_lacp_miss(dev);
512 DRV_LOG(INFO, "port %u LACP rule cannot be created - "
513 "forward LACP to kernel.", dev->data->port_id);
515 DRV_LOG(INFO, "LACP traffic will be missed in port %u."
516 , dev->data->port_id);
520 if (dev->data->promiscuous) {
521 struct rte_flow_item_eth promisc = {
522 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
523 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
527 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
531 if (dev->data->all_multicast) {
532 struct rte_flow_item_eth multicast = {
533 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
534 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
538 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
542 /* Add broadcast/multicast flows. */
543 for (i = 0; i != vlan_filter_n; ++i) {
544 uint16_t vlan = priv->vlan_filter[i];
546 struct rte_flow_item_vlan vlan_spec = {
547 .tci = rte_cpu_to_be_16(vlan),
549 struct rte_flow_item_vlan vlan_mask =
550 rte_flow_item_vlan_mask;
552 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
553 &vlan_spec, &vlan_mask);
556 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
558 &vlan_spec, &vlan_mask);
562 if (!vlan_filter_n) {
563 ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
566 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
572 /* Add MAC address flows. */
573 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
574 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
576 if (!memcmp(mac, &cmp, sizeof(*mac)))
578 memcpy(&unicast.dst.addr_bytes,
581 for (j = 0; j != vlan_filter_n; ++j) {
582 uint16_t vlan = priv->vlan_filter[j];
584 struct rte_flow_item_vlan vlan_spec = {
585 .tci = rte_cpu_to_be_16(vlan),
587 struct rte_flow_item_vlan vlan_mask =
588 rte_flow_item_vlan_mask;
590 ret = mlx5_ctrl_flow_vlan(dev, &unicast,
597 if (!vlan_filter_n) {
598 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
605 ret = rte_errno; /* Save rte_errno before cleanup. */
606 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
607 rte_errno = ret; /* Restore rte_errno. */
613 * Disable traffic flows configured by control plane
616 * Pointer to Ethernet device private data.
619 mlx5_traffic_disable(struct rte_eth_dev *dev)
621 struct mlx5_priv *priv = dev->data->dev_private;
623 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
627 * Restart traffic flows configured by control plane
630 * Pointer to Ethernet device private data.
633 * 0 on success, a negative errno value otherwise and rte_errno is set.
636 mlx5_traffic_restart(struct rte_eth_dev *dev)
638 if (dev->data->dev_started) {
639 mlx5_traffic_disable(dev);
640 return mlx5_traffic_enable(dev);