1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
9 #include <rte_ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
15 #include "mlx5_rxtx.h"
16 #include "mlx5_utils.h"
17 #include "rte_pmd_mlx5.h"
20 * Stop traffic on Tx queues.
23 * Pointer to Ethernet device structure.
26 mlx5_txq_stop(struct rte_eth_dev *dev)
28 struct mlx5_priv *priv = dev->data->dev_private;
31 for (i = 0; i != priv->txqs_n; ++i)
32 mlx5_txq_release(dev, i);
36 * Start traffic on Tx queues.
39 * Pointer to Ethernet device structure.
42 * 0 on success, a negative errno value otherwise and rte_errno is set.
45 mlx5_txq_start(struct rte_eth_dev *dev)
47 struct mlx5_priv *priv = dev->data->dev_private;
51 for (i = 0; i != priv->txqs_n; ++i) {
52 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
56 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) {
57 txq_ctrl->obj = mlx5_txq_obj_new
58 (dev, i, MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN);
60 txq_alloc_elts(txq_ctrl);
61 txq_ctrl->obj = mlx5_txq_obj_new
62 (dev, i, priv->txpp_en ?
63 MLX5_TXQ_OBJ_TYPE_DEVX_SQ :
64 MLX5_TXQ_OBJ_TYPE_IBV);
73 ret = rte_errno; /* Save rte_errno before cleanup. */
75 mlx5_txq_release(dev, i);
77 rte_errno = ret; /* Restore rte_errno. */
82 * Stop traffic on Rx queues.
85 * Pointer to Ethernet device structure.
88 mlx5_rxq_stop(struct rte_eth_dev *dev)
90 struct mlx5_priv *priv = dev->data->dev_private;
93 for (i = 0; i != priv->rxqs_n; ++i)
94 mlx5_rxq_release(dev, i);
98 * Start traffic on Rx queues.
101 * Pointer to Ethernet device structure.
104 * 0 on success, a negative errno value otherwise and rte_errno is set.
107 mlx5_rxq_start(struct rte_eth_dev *dev)
109 struct mlx5_priv *priv = dev->data->dev_private;
112 enum mlx5_rxq_obj_type obj_type = MLX5_RXQ_OBJ_TYPE_IBV;
113 struct mlx5_rxq_data *rxq = NULL;
115 for (i = 0; i < priv->rxqs_n; ++i) {
116 rxq = (*priv->rxqs)[i];
117 if (rxq && rxq->lro) {
118 obj_type = MLX5_RXQ_OBJ_TYPE_DEVX_RQ;
122 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
123 if (mlx5_mprq_alloc_mp(dev)) {
124 /* Should not release Rx queues but return immediately. */
127 for (i = 0; i != priv->rxqs_n; ++i) {
128 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
129 struct rte_mempool *mp;
133 if (rxq_ctrl->type == MLX5_RXQ_TYPE_HAIRPIN) {
134 rxq_ctrl->obj = mlx5_rxq_obj_new
135 (dev, i, MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN);
140 /* Pre-register Rx mempool. */
141 mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
142 rxq_ctrl->rxq.mprq_mp : rxq_ctrl->rxq.mp;
144 "port %u Rx queue %u registering"
145 " mp %s having %u chunks",
146 dev->data->port_id, rxq_ctrl->rxq.idx,
147 mp->name, mp->nb_mem_chunks);
148 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp);
149 ret = rxq_alloc_elts(rxq_ctrl);
152 rxq_ctrl->obj = mlx5_rxq_obj_new(dev, i, obj_type);
155 if (obj_type == MLX5_RXQ_OBJ_TYPE_IBV)
157 ((struct ibv_wq *)(rxq_ctrl->obj->wq))->wq_num;
158 else if (obj_type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ)
159 rxq_ctrl->wqn = rxq_ctrl->obj->rq->id;
163 ret = rte_errno; /* Save rte_errno before cleanup. */
165 mlx5_rxq_release(dev, i);
167 rte_errno = ret; /* Restore rte_errno. */
172 * Binds Tx queues to Rx queues for hairpin.
174 * Binds Tx queues to the target Rx queues.
177 * Pointer to Ethernet device structure.
180 * 0 on success, a negative errno value otherwise and rte_errno is set.
183 mlx5_hairpin_bind(struct rte_eth_dev *dev)
185 struct mlx5_priv *priv = dev->data->dev_private;
186 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
187 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
188 struct mlx5_txq_ctrl *txq_ctrl;
189 struct mlx5_rxq_ctrl *rxq_ctrl;
190 struct mlx5_devx_obj *sq;
191 struct mlx5_devx_obj *rq;
195 for (i = 0; i != priv->txqs_n; ++i) {
196 txq_ctrl = mlx5_txq_get(dev, i);
199 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
200 mlx5_txq_release(dev, i);
203 if (!txq_ctrl->obj) {
205 DRV_LOG(ERR, "port %u no txq object found: %d",
206 dev->data->port_id, i);
207 mlx5_txq_release(dev, i);
210 sq = txq_ctrl->obj->sq;
211 rxq_ctrl = mlx5_rxq_get(dev,
212 txq_ctrl->hairpin_conf.peers[0].queue);
214 mlx5_txq_release(dev, i);
216 DRV_LOG(ERR, "port %u no rxq object found: %d",
218 txq_ctrl->hairpin_conf.peers[0].queue);
221 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
222 rxq_ctrl->hairpin_conf.peers[0].queue != i) {
224 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
225 "Rx queue %d", dev->data->port_id,
226 i, txq_ctrl->hairpin_conf.peers[0].queue);
229 rq = rxq_ctrl->obj->rq;
232 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
234 txq_ctrl->hairpin_conf.peers[0].queue);
237 sq_attr.state = MLX5_SQC_STATE_RDY;
238 sq_attr.sq_state = MLX5_SQC_STATE_RST;
239 sq_attr.hairpin_peer_rq = rq->id;
240 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
241 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
244 rq_attr.state = MLX5_SQC_STATE_RDY;
245 rq_attr.rq_state = MLX5_SQC_STATE_RST;
246 rq_attr.hairpin_peer_sq = sq->id;
247 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
248 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
251 mlx5_txq_release(dev, i);
252 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
256 mlx5_txq_release(dev, i);
257 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
262 * DPDK callback to start the device.
264 * Simulate device start by attaching all configured flows.
267 * Pointer to Ethernet device structure.
270 * 0 on success, a negative errno value otherwise and rte_errno is set.
273 mlx5_dev_start(struct rte_eth_dev *dev)
275 struct mlx5_priv *priv = dev->data->dev_private;
279 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
280 fine_inline = rte_mbuf_dynflag_lookup
281 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
283 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
285 rte_net_mlx5_dynf_inline_mask = 0;
286 if (dev->data->nb_rx_queues > 0) {
287 ret = mlx5_dev_configure_rss_reta(dev);
289 DRV_LOG(ERR, "port %u reta config failed: %s",
290 dev->data->port_id, strerror(rte_errno));
294 ret = mlx5_txpp_start(dev);
296 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
297 dev->data->port_id, strerror(rte_errno));
300 ret = mlx5_txq_start(dev);
302 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
303 dev->data->port_id, strerror(rte_errno));
306 ret = mlx5_rxq_start(dev);
308 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
309 dev->data->port_id, strerror(rte_errno));
312 ret = mlx5_hairpin_bind(dev);
314 DRV_LOG(ERR, "port %u hairpin binding failed: %s",
315 dev->data->port_id, strerror(rte_errno));
318 /* Set started flag here for the following steps like control flow. */
319 dev->data->dev_started = 1;
320 ret = mlx5_rx_intr_vec_enable(dev);
322 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
326 mlx5_os_stats_init(dev);
327 ret = mlx5_traffic_enable(dev);
329 DRV_LOG(ERR, "port %u failed to set defaults flows",
333 /* Set a mask and offset of dynamic metadata flows into Rx queues. */
334 mlx5_flow_rxq_dynf_metadata_set(dev);
335 /* Set flags and context to convert Rx timestamps. */
336 mlx5_rxq_timestamp_set(dev);
337 /* Set a mask and offset of scheduling on timestamp into Tx queues. */
338 mlx5_txq_dynf_timestamp_set(dev);
340 * In non-cached mode, it only needs to start the default mreg copy
341 * action and no flow created by application exists anymore.
342 * But it is worth wrapping the interface for further usage.
344 ret = mlx5_flow_start_default(dev);
346 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
347 dev->data->port_id, strerror(rte_errno));
351 dev->tx_pkt_burst = mlx5_select_tx_function(dev);
352 dev->rx_pkt_burst = mlx5_select_rx_function(dev);
353 /* Enable datapath on secondary process. */
354 mlx5_mp_os_req_start_rxtx(dev);
355 if (priv->sh->intr_handle.fd >= 0) {
356 priv->sh->port[priv->dev_port - 1].ih_port_id =
357 (uint32_t)dev->data->port_id;
359 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
361 dev->data->dev_conf.intr_conf.lsc = 0;
362 dev->data->dev_conf.intr_conf.rmv = 0;
364 if (priv->sh->intr_handle_devx.fd >= 0)
365 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
366 (uint32_t)dev->data->port_id;
369 ret = rte_errno; /* Save rte_errno before cleanup. */
371 dev->data->dev_started = 0;
372 mlx5_flow_stop_default(dev);
373 mlx5_traffic_disable(dev);
376 mlx5_txpp_stop(dev); /* Stop last. */
377 rte_errno = ret; /* Restore rte_errno. */
382 * DPDK callback to stop the device.
384 * Simulate device stop by detaching all configured flows.
387 * Pointer to Ethernet device structure.
390 mlx5_dev_stop(struct rte_eth_dev *dev)
392 struct mlx5_priv *priv = dev->data->dev_private;
394 dev->data->dev_started = 0;
395 /* Prevent crashes when queues are still in use. */
396 dev->rx_pkt_burst = removed_rx_burst;
397 dev->tx_pkt_burst = removed_tx_burst;
399 /* Disable datapath on secondary process. */
400 mlx5_mp_os_req_stop_rxtx(dev);
401 usleep(1000 * priv->rxqs_n);
402 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
403 mlx5_flow_stop_default(dev);
404 /* Control flows for default traffic can be removed firstly. */
405 mlx5_traffic_disable(dev);
406 /* All RX queue flags will be cleared in the flush interface. */
407 mlx5_flow_list_flush(dev, &priv->flows, true);
408 mlx5_rx_intr_vec_disable(dev);
409 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
410 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
417 * Enable traffic flows configured by control plane
420 * Pointer to Ethernet device private data.
422 * Pointer to Ethernet device structure.
425 * 0 on success, a negative errno value otherwise and rte_errno is set.
428 mlx5_traffic_enable(struct rte_eth_dev *dev)
430 struct mlx5_priv *priv = dev->data->dev_private;
431 struct rte_flow_item_eth bcast = {
432 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
434 struct rte_flow_item_eth ipv6_multi_spec = {
435 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
437 struct rte_flow_item_eth ipv6_multi_mask = {
438 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
440 struct rte_flow_item_eth unicast = {
441 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
443 struct rte_flow_item_eth unicast_mask = {
444 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
446 const unsigned int vlan_filter_n = priv->vlan_filter_n;
447 const struct rte_ether_addr cmp = {
448 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
455 * Hairpin txq default flow should be created no matter if it is
456 * isolation mode. Or else all the packets to be sent will be sent
457 * out directly without the TX flow actions, e.g. encapsulation.
459 for (i = 0; i != priv->txqs_n; ++i) {
460 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
463 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) {
464 ret = mlx5_ctrl_flow_source_queue(dev, i);
466 mlx5_txq_release(dev, i);
470 mlx5_txq_release(dev, i);
472 if (priv->config.dv_esw_en && !priv->config.vf) {
473 if (mlx5_flow_create_esw_table_zero_flow(dev))
474 priv->fdb_def_rule = 1;
476 DRV_LOG(INFO, "port %u FDB default rule cannot be"
477 " configured - only Eswitch group 0 flows are"
478 " supported.", dev->data->port_id);
480 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
481 ret = mlx5_flow_lacp_miss(dev);
483 DRV_LOG(INFO, "port %u LACP rule cannot be created - "
484 "forward LACP to kernel.", dev->data->port_id);
486 DRV_LOG(INFO, "LACP traffic will be missed in port %u."
487 , dev->data->port_id);
491 if (dev->data->promiscuous) {
492 struct rte_flow_item_eth promisc = {
493 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
494 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
498 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
502 if (dev->data->all_multicast) {
503 struct rte_flow_item_eth multicast = {
504 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
505 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
509 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
513 /* Add broadcast/multicast flows. */
514 for (i = 0; i != vlan_filter_n; ++i) {
515 uint16_t vlan = priv->vlan_filter[i];
517 struct rte_flow_item_vlan vlan_spec = {
518 .tci = rte_cpu_to_be_16(vlan),
520 struct rte_flow_item_vlan vlan_mask =
521 rte_flow_item_vlan_mask;
523 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
524 &vlan_spec, &vlan_mask);
527 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
529 &vlan_spec, &vlan_mask);
533 if (!vlan_filter_n) {
534 ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
537 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
543 /* Add MAC address flows. */
544 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
545 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
547 if (!memcmp(mac, &cmp, sizeof(*mac)))
549 memcpy(&unicast.dst.addr_bytes,
552 for (j = 0; j != vlan_filter_n; ++j) {
553 uint16_t vlan = priv->vlan_filter[j];
555 struct rte_flow_item_vlan vlan_spec = {
556 .tci = rte_cpu_to_be_16(vlan),
558 struct rte_flow_item_vlan vlan_mask =
559 rte_flow_item_vlan_mask;
561 ret = mlx5_ctrl_flow_vlan(dev, &unicast,
568 if (!vlan_filter_n) {
569 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
576 ret = rte_errno; /* Save rte_errno before cleanup. */
577 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
578 rte_errno = ret; /* Restore rte_errno. */
584 * Disable traffic flows configured by control plane
587 * Pointer to Ethernet device private data.
590 mlx5_traffic_disable(struct rte_eth_dev *dev)
592 struct mlx5_priv *priv = dev->data->dev_private;
594 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
598 * Restart traffic flows configured by control plane
601 * Pointer to Ethernet device private data.
604 * 0 on success, a negative errno value otherwise and rte_errno is set.
607 mlx5_traffic_restart(struct rte_eth_dev *dev)
609 if (dev->data->dev_started) {
610 mlx5_traffic_disable(dev);
611 return mlx5_traffic_enable(dev);