4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 * Copyright 2014 6WIND S.A.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 #include <sys/queue.h>
45 #include <rte_compat.h>
46 #include <rte_debug.h>
47 #include <rte_common.h>
49 #include <rte_memory.h>
50 #include <rte_launch.h>
52 #include <rte_per_lcore.h>
53 #include <rte_lcore.h>
54 #include <rte_atomic.h>
55 #include <rte_branch_prediction.h>
56 #include <rte_mempool.h>
58 #include <rte_mbuf_pool_ops.h>
59 #include <rte_string_fns.h>
60 #include <rte_hexdump.h>
61 #include <rte_errno.h>
62 #include <rte_memcpy.h>
65 * ctrlmbuf constructor, given as a callback function to
66 * rte_mempool_obj_iter() or rte_mempool_create()
69 rte_ctrlmbuf_init(struct rte_mempool *mp,
70 __attribute__((unused)) void *opaque_arg,
72 __attribute__((unused)) unsigned i)
74 struct rte_mbuf *m = _m;
75 rte_pktmbuf_init(mp, opaque_arg, _m, i);
76 m->ol_flags |= CTRL_MBUF_FLAG;
80 * pktmbuf pool constructor, given as a callback function to
81 * rte_mempool_create(), or called directly if using
82 * rte_mempool_create_empty()/rte_mempool_populate()
85 rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg)
87 struct rte_pktmbuf_pool_private *user_mbp_priv, *mbp_priv;
88 struct rte_pktmbuf_pool_private default_mbp_priv;
91 RTE_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf));
93 /* if no structure is provided, assume no mbuf private area */
94 user_mbp_priv = opaque_arg;
95 if (user_mbp_priv == NULL) {
96 default_mbp_priv.mbuf_priv_size = 0;
97 if (mp->elt_size > sizeof(struct rte_mbuf))
98 roomsz = mp->elt_size - sizeof(struct rte_mbuf);
101 default_mbp_priv.mbuf_data_room_size = roomsz;
102 user_mbp_priv = &default_mbp_priv;
105 RTE_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf) +
106 user_mbp_priv->mbuf_data_room_size +
107 user_mbp_priv->mbuf_priv_size);
109 mbp_priv = rte_mempool_get_priv(mp);
110 memcpy(mbp_priv, user_mbp_priv, sizeof(*mbp_priv));
114 * pktmbuf constructor, given as a callback function to
115 * rte_mempool_obj_iter() or rte_mempool_create().
116 * Set the fields of a packet mbuf to their default values.
119 rte_pktmbuf_init(struct rte_mempool *mp,
120 __attribute__((unused)) void *opaque_arg,
122 __attribute__((unused)) unsigned i)
124 struct rte_mbuf *m = _m;
125 uint32_t mbuf_size, buf_len, priv_size;
127 priv_size = rte_pktmbuf_priv_size(mp);
128 mbuf_size = sizeof(struct rte_mbuf) + priv_size;
129 buf_len = rte_pktmbuf_data_room_size(mp);
131 RTE_ASSERT(RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) == priv_size);
132 RTE_ASSERT(mp->elt_size >= mbuf_size);
133 RTE_ASSERT(buf_len <= UINT16_MAX);
135 memset(m, 0, mbuf_size);
136 /* start of buffer is after mbuf structure and priv data */
137 m->priv_size = priv_size;
138 m->buf_addr = (char *)m + mbuf_size;
139 m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
140 m->buf_len = (uint16_t)buf_len;
142 /* keep some headroom between start of buffer and data */
143 m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
145 /* init some constant fields */
148 m->port = MBUF_INVALID_PORT;
149 rte_mbuf_refcnt_set(m, 1);
153 /* Helper to create a mbuf pool with given mempool ops name*/
154 struct rte_mempool * __rte_experimental
155 rte_pktmbuf_pool_create_by_ops(const char *name, unsigned int n,
156 unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
157 int socket_id, const char *ops_name)
159 struct rte_mempool *mp;
160 struct rte_pktmbuf_pool_private mbp_priv;
161 const char *mp_ops_name = ops_name;
165 if (RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) != priv_size) {
166 RTE_LOG(ERR, MBUF, "mbuf priv_size=%u is not aligned\n",
171 elt_size = sizeof(struct rte_mbuf) + (unsigned)priv_size +
172 (unsigned)data_room_size;
173 mbp_priv.mbuf_data_room_size = data_room_size;
174 mbp_priv.mbuf_priv_size = priv_size;
176 mp = rte_mempool_create_empty(name, n, elt_size, cache_size,
177 sizeof(struct rte_pktmbuf_pool_private), socket_id, 0);
181 if (mp_ops_name == NULL)
182 mp_ops_name = rte_mbuf_best_mempool_ops();
183 ret = rte_mempool_set_ops_byname(mp, mp_ops_name, NULL);
185 RTE_LOG(ERR, MBUF, "error setting mempool handler\n");
186 rte_mempool_free(mp);
190 rte_pktmbuf_pool_init(mp, &mbp_priv);
192 ret = rte_mempool_populate_default(mp);
194 rte_mempool_free(mp);
199 rte_mempool_obj_iter(mp, rte_pktmbuf_init, NULL);
204 /* helper to create a mbuf pool */
206 rte_pktmbuf_pool_create(const char *name, unsigned int n,
207 unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
210 return rte_pktmbuf_pool_create_by_ops(name, n, cache_size, priv_size,
211 data_room_size, socket_id, NULL);
214 /* do some sanity checks on a mbuf: panic if it fails */
216 rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header)
218 unsigned int nb_segs, pkt_len;
221 rte_panic("mbuf is NULL\n");
225 rte_panic("bad mbuf pool\n");
226 if (m->buf_iova == 0)
227 rte_panic("bad IO addr\n");
228 if (m->buf_addr == NULL)
229 rte_panic("bad virt addr\n");
231 uint16_t cnt = rte_mbuf_refcnt_read(m);
232 if ((cnt == 0) || (cnt == UINT16_MAX))
233 rte_panic("bad ref cnt\n");
235 /* nothing to check for sub-segments */
239 /* data_len is supposed to be not more than pkt_len */
240 if (m->data_len > m->pkt_len)
241 rte_panic("bad data_len\n");
243 nb_segs = m->nb_segs;
244 pkt_len = m->pkt_len;
248 pkt_len -= m->data_len;
249 } while ((m = m->next) != NULL);
252 rte_panic("bad nb_segs\n");
254 rte_panic("bad pkt_len\n");
257 /* dump a mbuf on console */
259 rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
262 unsigned int nb_segs;
264 __rte_mbuf_sanity_check(m, 1);
266 fprintf(f, "dump mbuf at %p, iova=%"PRIx64", buf_len=%u\n",
267 m, (uint64_t)m->buf_iova, (unsigned)m->buf_len);
268 fprintf(f, " pkt_len=%"PRIu32", ol_flags=%"PRIx64", nb_segs=%u, "
269 "in_port=%u\n", m->pkt_len, m->ol_flags,
270 (unsigned)m->nb_segs, (unsigned)m->port);
271 nb_segs = m->nb_segs;
273 while (m && nb_segs != 0) {
274 __rte_mbuf_sanity_check(m, 0);
276 fprintf(f, " segment at %p, data=%p, data_len=%u\n",
277 m, rte_pktmbuf_mtod(m, void *), (unsigned)m->data_len);
279 if (len > m->data_len)
282 rte_hexdump(f, NULL, rte_pktmbuf_mtod(m, void *), len);
289 /* read len data bytes in a mbuf at specified offset (internal) */
290 const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off,
291 uint32_t len, void *buf)
293 const struct rte_mbuf *seg = m;
294 uint32_t buf_off = 0, copy_len;
296 if (off + len > rte_pktmbuf_pkt_len(m))
299 while (off >= rte_pktmbuf_data_len(seg)) {
300 off -= rte_pktmbuf_data_len(seg);
304 if (off + len <= rte_pktmbuf_data_len(seg))
305 return rte_pktmbuf_mtod_offset(seg, char *, off);
307 /* rare case: header is split among several segments */
309 copy_len = rte_pktmbuf_data_len(seg) - off;
312 rte_memcpy((char *)buf + buf_off,
313 rte_pktmbuf_mtod_offset(seg, char *, off), copy_len);
324 * Get the name of a RX offload flag. Must be kept synchronized with flag
325 * definitions in rte_mbuf.h.
327 const char *rte_get_rx_ol_flag_name(uint64_t mask)
330 case PKT_RX_VLAN: return "PKT_RX_VLAN";
331 case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH";
332 case PKT_RX_FDIR: return "PKT_RX_FDIR";
333 case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD";
334 case PKT_RX_L4_CKSUM_GOOD: return "PKT_RX_L4_CKSUM_GOOD";
335 case PKT_RX_L4_CKSUM_NONE: return "PKT_RX_L4_CKSUM_NONE";
336 case PKT_RX_IP_CKSUM_BAD: return "PKT_RX_IP_CKSUM_BAD";
337 case PKT_RX_IP_CKSUM_GOOD: return "PKT_RX_IP_CKSUM_GOOD";
338 case PKT_RX_IP_CKSUM_NONE: return "PKT_RX_IP_CKSUM_NONE";
339 case PKT_RX_EIP_CKSUM_BAD: return "PKT_RX_EIP_CKSUM_BAD";
340 case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED";
341 case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
342 case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
343 case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED";
344 case PKT_RX_LRO: return "PKT_RX_LRO";
345 case PKT_RX_TIMESTAMP: return "PKT_RX_TIMESTAMP";
346 case PKT_RX_SEC_OFFLOAD: return "PKT_RX_SEC_OFFLOAD";
347 case PKT_RX_SEC_OFFLOAD_FAILED: return "PKT_RX_SEC_OFFLOAD_FAILED";
348 default: return NULL;
355 const char *default_name;
358 /* write the list of rx ol flags in buffer buf */
360 rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
362 const struct flag_mask rx_flags[] = {
363 { PKT_RX_VLAN, PKT_RX_VLAN, NULL },
364 { PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, NULL },
365 { PKT_RX_FDIR, PKT_RX_FDIR, NULL },
366 { PKT_RX_L4_CKSUM_BAD, PKT_RX_L4_CKSUM_MASK, NULL },
367 { PKT_RX_L4_CKSUM_GOOD, PKT_RX_L4_CKSUM_MASK, NULL },
368 { PKT_RX_L4_CKSUM_NONE, PKT_RX_L4_CKSUM_MASK, NULL },
369 { PKT_RX_L4_CKSUM_UNKNOWN, PKT_RX_L4_CKSUM_MASK,
370 "PKT_RX_L4_CKSUM_UNKNOWN" },
371 { PKT_RX_IP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK, NULL },
372 { PKT_RX_IP_CKSUM_GOOD, PKT_RX_IP_CKSUM_MASK, NULL },
373 { PKT_RX_IP_CKSUM_NONE, PKT_RX_IP_CKSUM_MASK, NULL },
374 { PKT_RX_IP_CKSUM_UNKNOWN, PKT_RX_IP_CKSUM_MASK,
375 "PKT_RX_IP_CKSUM_UNKNOWN" },
376 { PKT_RX_EIP_CKSUM_BAD, PKT_RX_EIP_CKSUM_BAD, NULL },
377 { PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN_STRIPPED, NULL },
378 { PKT_RX_IEEE1588_PTP, PKT_RX_IEEE1588_PTP, NULL },
379 { PKT_RX_IEEE1588_TMST, PKT_RX_IEEE1588_TMST, NULL },
380 { PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL },
381 { PKT_RX_LRO, PKT_RX_LRO, NULL },
382 { PKT_RX_TIMESTAMP, PKT_RX_TIMESTAMP, NULL },
383 { PKT_RX_SEC_OFFLOAD, PKT_RX_SEC_OFFLOAD, NULL },
384 { PKT_RX_SEC_OFFLOAD_FAILED, PKT_RX_SEC_OFFLOAD_FAILED, NULL },
385 { PKT_RX_QINQ, PKT_RX_QINQ, NULL },
395 for (i = 0; i < RTE_DIM(rx_flags); i++) {
396 if ((mask & rx_flags[i].mask) != rx_flags[i].flag)
398 name = rte_get_rx_ol_flag_name(rx_flags[i].flag);
400 name = rx_flags[i].default_name;
401 ret = snprintf(buf, buflen, "%s ", name);
404 if ((size_t)ret >= buflen)
414 * Get the name of a TX offload flag. Must be kept synchronized with flag
415 * definitions in rte_mbuf.h.
417 const char *rte_get_tx_ol_flag_name(uint64_t mask)
420 case PKT_TX_VLAN_PKT: return "PKT_TX_VLAN_PKT";
421 case PKT_TX_IP_CKSUM: return "PKT_TX_IP_CKSUM";
422 case PKT_TX_TCP_CKSUM: return "PKT_TX_TCP_CKSUM";
423 case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM";
424 case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
425 case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
426 case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
427 case PKT_TX_IPV4: return "PKT_TX_IPV4";
428 case PKT_TX_IPV6: return "PKT_TX_IPV6";
429 case PKT_TX_OUTER_IP_CKSUM: return "PKT_TX_OUTER_IP_CKSUM";
430 case PKT_TX_OUTER_IPV4: return "PKT_TX_OUTER_IPV4";
431 case PKT_TX_OUTER_IPV6: return "PKT_TX_OUTER_IPV6";
432 case PKT_TX_TUNNEL_VXLAN: return "PKT_TX_TUNNEL_VXLAN";
433 case PKT_TX_TUNNEL_GRE: return "PKT_TX_TUNNEL_GRE";
434 case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP";
435 case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE";
436 case PKT_TX_TUNNEL_MPLSINUDP: return "PKT_TX_TUNNEL_MPLSINUDP";
437 case PKT_TX_MACSEC: return "PKT_TX_MACSEC";
438 case PKT_TX_SEC_OFFLOAD: return "PKT_TX_SEC_OFFLOAD";
439 default: return NULL;
443 /* write the list of tx ol flags in buffer buf */
445 rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
447 const struct flag_mask tx_flags[] = {
448 { PKT_TX_VLAN_PKT, PKT_TX_VLAN_PKT, NULL },
449 { PKT_TX_IP_CKSUM, PKT_TX_IP_CKSUM, NULL },
450 { PKT_TX_TCP_CKSUM, PKT_TX_L4_MASK, NULL },
451 { PKT_TX_SCTP_CKSUM, PKT_TX_L4_MASK, NULL },
452 { PKT_TX_UDP_CKSUM, PKT_TX_L4_MASK, NULL },
453 { PKT_TX_L4_NO_CKSUM, PKT_TX_L4_MASK, "PKT_TX_L4_NO_CKSUM" },
454 { PKT_TX_IEEE1588_TMST, PKT_TX_IEEE1588_TMST, NULL },
455 { PKT_TX_TCP_SEG, PKT_TX_TCP_SEG, NULL },
456 { PKT_TX_IPV4, PKT_TX_IPV4, NULL },
457 { PKT_TX_IPV6, PKT_TX_IPV6, NULL },
458 { PKT_TX_OUTER_IP_CKSUM, PKT_TX_OUTER_IP_CKSUM, NULL },
459 { PKT_TX_OUTER_IPV4, PKT_TX_OUTER_IPV4, NULL },
460 { PKT_TX_OUTER_IPV6, PKT_TX_OUTER_IPV6, NULL },
461 { PKT_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_MASK,
462 "PKT_TX_TUNNEL_NONE" },
463 { PKT_TX_TUNNEL_GRE, PKT_TX_TUNNEL_MASK,
464 "PKT_TX_TUNNEL_NONE" },
465 { PKT_TX_TUNNEL_IPIP, PKT_TX_TUNNEL_MASK,
466 "PKT_TX_TUNNEL_NONE" },
467 { PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK,
468 "PKT_TX_TUNNEL_NONE" },
469 { PKT_TX_TUNNEL_MPLSINUDP, PKT_TX_TUNNEL_MASK,
470 "PKT_TX_TUNNEL_NONE" },
471 { PKT_TX_MACSEC, PKT_TX_MACSEC, NULL },
472 { PKT_TX_SEC_OFFLOAD, PKT_TX_SEC_OFFLOAD, NULL },
482 for (i = 0; i < RTE_DIM(tx_flags); i++) {
483 if ((mask & tx_flags[i].mask) != tx_flags[i].flag)
485 name = rte_get_tx_ol_flag_name(tx_flags[i].flag);
487 name = tx_flags[i].default_name;
488 ret = snprintf(buf, buflen, "%s ", name);
491 if ((size_t)ret >= buflen)