1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
9 #include <rte_ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
13 #include <mlx5_malloc.h>
17 #include "mlx5_rxtx.h"
18 #include "mlx5_utils.h"
19 #include "rte_pmd_mlx5.h"
22 * Stop traffic on Tx queues.
25 * Pointer to Ethernet device structure.
28 mlx5_txq_stop(struct rte_eth_dev *dev)
30 struct mlx5_priv *priv = dev->data->dev_private;
33 for (i = 0; i != priv->txqs_n; ++i)
34 mlx5_txq_release(dev, i);
38 * Start traffic on Tx queues.
41 * Pointer to Ethernet device structure.
44 * 0 on success, a negative errno value otherwise and rte_errno is set.
47 mlx5_txq_start(struct rte_eth_dev *dev)
49 struct mlx5_priv *priv = dev->data->dev_private;
53 for (i = 0; i != priv->txqs_n; ++i) {
54 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
58 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) {
59 txq_ctrl->obj = mlx5_txq_obj_new
60 (dev, i, MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN);
62 txq_alloc_elts(txq_ctrl);
63 txq_ctrl->obj = mlx5_txq_obj_new
64 (dev, i, priv->txpp_en ?
65 MLX5_TXQ_OBJ_TYPE_DEVX_SQ :
66 MLX5_TXQ_OBJ_TYPE_IBV);
75 ret = rte_errno; /* Save rte_errno before cleanup. */
77 mlx5_txq_release(dev, i);
79 rte_errno = ret; /* Restore rte_errno. */
84 * Stop traffic on Rx queues.
87 * Pointer to Ethernet device structure.
90 mlx5_rxq_stop(struct rte_eth_dev *dev)
92 struct mlx5_priv *priv = dev->data->dev_private;
95 for (i = 0; i != priv->rxqs_n; ++i)
96 mlx5_rxq_release(dev, i);
100 * Start traffic on Rx queues.
103 * Pointer to Ethernet device structure.
106 * 0 on success, a negative errno value otherwise and rte_errno is set.
109 mlx5_rxq_start(struct rte_eth_dev *dev)
111 struct mlx5_priv *priv = dev->data->dev_private;
115 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
116 if (mlx5_mprq_alloc_mp(dev)) {
117 /* Should not release Rx queues but return immediately. */
120 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
121 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
122 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
123 dev->data->port_id, priv->sh->device_attr.max_sge);
124 for (i = 0; i != priv->rxqs_n; ++i) {
125 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
126 struct rte_mempool *mp;
130 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
131 /* Pre-register Rx mempool. */
132 mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
133 rxq_ctrl->rxq.mprq_mp : rxq_ctrl->rxq.mp;
134 DRV_LOG(DEBUG, "Port %u Rx queue %u registering mp %s"
135 " having %u chunks.", dev->data->port_id,
136 rxq_ctrl->rxq.idx, mp->name, mp->nb_mem_chunks);
137 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp);
138 ret = rxq_alloc_elts(rxq_ctrl);
142 MLX5_ASSERT(!rxq_ctrl->obj);
143 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
144 sizeof(*rxq_ctrl->obj), 0,
146 if (!rxq_ctrl->obj) {
148 "Port %u Rx queue %u can't allocate resources.",
149 dev->data->port_id, (*priv->rxqs)[i]->idx);
153 ret = priv->obj_ops.rxq_obj_new(dev, i);
155 mlx5_free(rxq_ctrl->obj);
158 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
159 dev->data->port_id, i, (void *)&rxq_ctrl->obj);
160 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
164 ret = rte_errno; /* Save rte_errno before cleanup. */
166 mlx5_rxq_release(dev, i);
168 rte_errno = ret; /* Restore rte_errno. */
173 * Binds Tx queues to Rx queues for hairpin.
175 * Binds Tx queues to the target Rx queues.
178 * Pointer to Ethernet device structure.
181 * 0 on success, a negative errno value otherwise and rte_errno is set.
184 mlx5_hairpin_bind(struct rte_eth_dev *dev)
186 struct mlx5_priv *priv = dev->data->dev_private;
187 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
188 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
189 struct mlx5_txq_ctrl *txq_ctrl;
190 struct mlx5_rxq_ctrl *rxq_ctrl;
191 struct mlx5_devx_obj *sq;
192 struct mlx5_devx_obj *rq;
196 for (i = 0; i != priv->txqs_n; ++i) {
197 txq_ctrl = mlx5_txq_get(dev, i);
200 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
201 mlx5_txq_release(dev, i);
204 if (!txq_ctrl->obj) {
206 DRV_LOG(ERR, "port %u no txq object found: %d",
207 dev->data->port_id, i);
208 mlx5_txq_release(dev, i);
211 sq = txq_ctrl->obj->sq;
212 rxq_ctrl = mlx5_rxq_get(dev,
213 txq_ctrl->hairpin_conf.peers[0].queue);
215 mlx5_txq_release(dev, i);
217 DRV_LOG(ERR, "port %u no rxq object found: %d",
219 txq_ctrl->hairpin_conf.peers[0].queue);
222 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
223 rxq_ctrl->hairpin_conf.peers[0].queue != i) {
225 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
226 "Rx queue %d", dev->data->port_id,
227 i, txq_ctrl->hairpin_conf.peers[0].queue);
230 rq = rxq_ctrl->obj->rq;
233 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
235 txq_ctrl->hairpin_conf.peers[0].queue);
238 sq_attr.state = MLX5_SQC_STATE_RDY;
239 sq_attr.sq_state = MLX5_SQC_STATE_RST;
240 sq_attr.hairpin_peer_rq = rq->id;
241 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
242 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
245 rq_attr.state = MLX5_SQC_STATE_RDY;
246 rq_attr.rq_state = MLX5_SQC_STATE_RST;
247 rq_attr.hairpin_peer_sq = sq->id;
248 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
249 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
252 mlx5_txq_release(dev, i);
253 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
257 mlx5_txq_release(dev, i);
258 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
263 * DPDK callback to start the device.
265 * Simulate device start by attaching all configured flows.
268 * Pointer to Ethernet device structure.
271 * 0 on success, a negative errno value otherwise and rte_errno is set.
274 mlx5_dev_start(struct rte_eth_dev *dev)
276 struct mlx5_priv *priv = dev->data->dev_private;
280 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
281 fine_inline = rte_mbuf_dynflag_lookup
282 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
284 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
286 rte_net_mlx5_dynf_inline_mask = 0;
287 if (dev->data->nb_rx_queues > 0) {
288 ret = mlx5_dev_configure_rss_reta(dev);
290 DRV_LOG(ERR, "port %u reta config failed: %s",
291 dev->data->port_id, strerror(rte_errno));
295 ret = mlx5_txpp_start(dev);
297 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
298 dev->data->port_id, strerror(rte_errno));
301 ret = mlx5_txq_start(dev);
303 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
304 dev->data->port_id, strerror(rte_errno));
307 ret = mlx5_rxq_start(dev);
309 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
310 dev->data->port_id, strerror(rte_errno));
313 ret = mlx5_hairpin_bind(dev);
315 DRV_LOG(ERR, "port %u hairpin binding failed: %s",
316 dev->data->port_id, strerror(rte_errno));
319 /* Set started flag here for the following steps like control flow. */
320 dev->data->dev_started = 1;
321 ret = mlx5_rx_intr_vec_enable(dev);
323 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
327 mlx5_os_stats_init(dev);
328 ret = mlx5_traffic_enable(dev);
330 DRV_LOG(ERR, "port %u failed to set defaults flows",
334 /* Set a mask and offset of dynamic metadata flows into Rx queues. */
335 mlx5_flow_rxq_dynf_metadata_set(dev);
336 /* Set flags and context to convert Rx timestamps. */
337 mlx5_rxq_timestamp_set(dev);
338 /* Set a mask and offset of scheduling on timestamp into Tx queues. */
339 mlx5_txq_dynf_timestamp_set(dev);
341 * In non-cached mode, it only needs to start the default mreg copy
342 * action and no flow created by application exists anymore.
343 * But it is worth wrapping the interface for further usage.
345 ret = mlx5_flow_start_default(dev);
347 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
348 dev->data->port_id, strerror(rte_errno));
352 dev->tx_pkt_burst = mlx5_select_tx_function(dev);
353 dev->rx_pkt_burst = mlx5_select_rx_function(dev);
354 /* Enable datapath on secondary process. */
355 mlx5_mp_os_req_start_rxtx(dev);
356 if (priv->sh->intr_handle.fd >= 0) {
357 priv->sh->port[priv->dev_port - 1].ih_port_id =
358 (uint32_t)dev->data->port_id;
360 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
362 dev->data->dev_conf.intr_conf.lsc = 0;
363 dev->data->dev_conf.intr_conf.rmv = 0;
365 if (priv->sh->intr_handle_devx.fd >= 0)
366 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
367 (uint32_t)dev->data->port_id;
370 ret = rte_errno; /* Save rte_errno before cleanup. */
372 dev->data->dev_started = 0;
373 mlx5_flow_stop_default(dev);
374 mlx5_traffic_disable(dev);
377 mlx5_txpp_stop(dev); /* Stop last. */
378 rte_errno = ret; /* Restore rte_errno. */
383 * DPDK callback to stop the device.
385 * Simulate device stop by detaching all configured flows.
388 * Pointer to Ethernet device structure.
391 mlx5_dev_stop(struct rte_eth_dev *dev)
393 struct mlx5_priv *priv = dev->data->dev_private;
395 dev->data->dev_started = 0;
396 /* Prevent crashes when queues are still in use. */
397 dev->rx_pkt_burst = removed_rx_burst;
398 dev->tx_pkt_burst = removed_tx_burst;
400 /* Disable datapath on secondary process. */
401 mlx5_mp_os_req_stop_rxtx(dev);
402 usleep(1000 * priv->rxqs_n);
403 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
404 mlx5_flow_stop_default(dev);
405 /* Control flows for default traffic can be removed firstly. */
406 mlx5_traffic_disable(dev);
407 /* All RX queue flags will be cleared in the flush interface. */
408 mlx5_flow_list_flush(dev, &priv->flows, true);
409 mlx5_rx_intr_vec_disable(dev);
410 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
411 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
418 * Enable traffic flows configured by control plane
421 * Pointer to Ethernet device private data.
423 * Pointer to Ethernet device structure.
426 * 0 on success, a negative errno value otherwise and rte_errno is set.
429 mlx5_traffic_enable(struct rte_eth_dev *dev)
431 struct mlx5_priv *priv = dev->data->dev_private;
432 struct rte_flow_item_eth bcast = {
433 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
435 struct rte_flow_item_eth ipv6_multi_spec = {
436 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
438 struct rte_flow_item_eth ipv6_multi_mask = {
439 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
441 struct rte_flow_item_eth unicast = {
442 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
444 struct rte_flow_item_eth unicast_mask = {
445 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
447 const unsigned int vlan_filter_n = priv->vlan_filter_n;
448 const struct rte_ether_addr cmp = {
449 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
456 * Hairpin txq default flow should be created no matter if it is
457 * isolation mode. Or else all the packets to be sent will be sent
458 * out directly without the TX flow actions, e.g. encapsulation.
460 for (i = 0; i != priv->txqs_n; ++i) {
461 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
464 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) {
465 ret = mlx5_ctrl_flow_source_queue(dev, i);
467 mlx5_txq_release(dev, i);
471 mlx5_txq_release(dev, i);
473 if (priv->config.dv_esw_en && !priv->config.vf) {
474 if (mlx5_flow_create_esw_table_zero_flow(dev))
475 priv->fdb_def_rule = 1;
477 DRV_LOG(INFO, "port %u FDB default rule cannot be"
478 " configured - only Eswitch group 0 flows are"
479 " supported.", dev->data->port_id);
481 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
482 ret = mlx5_flow_lacp_miss(dev);
484 DRV_LOG(INFO, "port %u LACP rule cannot be created - "
485 "forward LACP to kernel.", dev->data->port_id);
487 DRV_LOG(INFO, "LACP traffic will be missed in port %u."
488 , dev->data->port_id);
492 if (dev->data->promiscuous) {
493 struct rte_flow_item_eth promisc = {
494 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
495 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
499 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
503 if (dev->data->all_multicast) {
504 struct rte_flow_item_eth multicast = {
505 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
506 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
510 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
514 /* Add broadcast/multicast flows. */
515 for (i = 0; i != vlan_filter_n; ++i) {
516 uint16_t vlan = priv->vlan_filter[i];
518 struct rte_flow_item_vlan vlan_spec = {
519 .tci = rte_cpu_to_be_16(vlan),
521 struct rte_flow_item_vlan vlan_mask =
522 rte_flow_item_vlan_mask;
524 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
525 &vlan_spec, &vlan_mask);
528 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
530 &vlan_spec, &vlan_mask);
534 if (!vlan_filter_n) {
535 ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
538 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
544 /* Add MAC address flows. */
545 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
546 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
548 if (!memcmp(mac, &cmp, sizeof(*mac)))
550 memcpy(&unicast.dst.addr_bytes,
553 for (j = 0; j != vlan_filter_n; ++j) {
554 uint16_t vlan = priv->vlan_filter[j];
556 struct rte_flow_item_vlan vlan_spec = {
557 .tci = rte_cpu_to_be_16(vlan),
559 struct rte_flow_item_vlan vlan_mask =
560 rte_flow_item_vlan_mask;
562 ret = mlx5_ctrl_flow_vlan(dev, &unicast,
569 if (!vlan_filter_n) {
570 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
577 ret = rte_errno; /* Save rte_errno before cleanup. */
578 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
579 rte_errno = ret; /* Restore rte_errno. */
585 * Disable traffic flows configured by control plane
588 * Pointer to Ethernet device private data.
591 mlx5_traffic_disable(struct rte_eth_dev *dev)
593 struct mlx5_priv *priv = dev->data->dev_private;
595 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
599 * Restart traffic flows configured by control plane
602 * Pointer to Ethernet device private data.
605 * 0 on success, a negative errno value otherwise and rte_errno is set.
608 mlx5_traffic_restart(struct rte_eth_dev *dev)
610 if (dev->data->dev_started) {
611 mlx5_traffic_disable(dev);
612 return mlx5_traffic_enable(dev);