X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmemif%2Frte_eth_memif.c;h=8ed9aebab7468c4c97635c87e354a2a77d567983;hb=cb99500de915040339f294b6ad2fc8edb4085b19;hp=35934c834538485bcca648bfc0329b01305cd5b5;hpb=ed650f3dd83fe4a4000277d1a44a38806c0c7d67;p=dpdk.git diff --git a/drivers/net/memif/rte_eth_memif.c b/drivers/net/memif/rte_eth_memif.c index 35934c8345..8ed9aebab7 100644 --- a/drivers/net/memif/rte_eth_memif.c +++ b/drivers/net/memif/rte_eth_memif.c @@ -23,6 +23,10 @@ #include #include #include +#include +#include +#include +#include #include "rte_eth_memif.h" #include "memif_socket.h" @@ -32,6 +36,7 @@ #define ETH_MEMIF_PKT_BUFFER_SIZE_ARG "bsize" #define ETH_MEMIF_RING_SIZE_ARG "rsize" #define ETH_MEMIF_SOCKET_ARG "socket" +#define ETH_MEMIF_SOCKET_ABSTRACT_ARG "socket-abstract" #define ETH_MEMIF_MAC_ARG "mac" #define ETH_MEMIF_ZC_ARG "zero-copy" #define ETH_MEMIF_SECRET_ARG "secret" @@ -42,14 +47,26 @@ static const char * const valid_arguments[] = { ETH_MEMIF_PKT_BUFFER_SIZE_ARG, ETH_MEMIF_RING_SIZE_ARG, ETH_MEMIF_SOCKET_ARG, + ETH_MEMIF_SOCKET_ABSTRACT_ARG, ETH_MEMIF_MAC_ARG, ETH_MEMIF_ZC_ARG, ETH_MEMIF_SECRET_ARG, NULL }; +static const struct rte_eth_link pmd_link = { + .link_speed = ETH_SPEED_NUM_10G, + .link_duplex = ETH_LINK_FULL_DUPLEX, + .link_status = ETH_LINK_DOWN, + .link_autoneg = ETH_LINK_AUTONEG +}; + #define MEMIF_MP_SEND_REGION "memif_mp_send_region" + +static int memif_region_init_zc(const struct rte_memseg_list *msl, + const struct rte_memseg *ms, void *arg); + const char * memif_version(void) { @@ -116,10 +133,14 @@ memif_mp_request_regions(struct rte_eth_dev *dev) struct mp_region_msg *reply_param; struct memif_region *r; struct pmd_process_private *proc_private = dev->process_private; + struct pmd_internals *pmd = dev->data->dev_private; + /* in case of zero-copy client, only request region 0 */ + uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ? + 1 : ETH_MEMIF_MAX_REGION_NUM; MIF_LOG(DEBUG, "Requesting memory regions"); - for (i = 0; i < ETH_MEMIF_MAX_REGION_NUM; i++) { + for (i = 0; i < max_region_num; i++) { /* Prepare the message */ memset(&msg, 0, sizeof(msg)); strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name)); @@ -161,6 +182,12 @@ memif_mp_request_regions(struct rte_eth_dev *dev) free(reply); } + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { + ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private); + if (ret < 0) + return ret; + } + return memif_connect(dev); } @@ -185,7 +212,7 @@ memif_get_ring(struct pmd_internals *pmd, struct pmd_process_private *proc_priva int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) * (1 << pmd->run.log2_ring_size); - p = (uint8_t *)p + (ring_num + type * pmd->run.num_s2m_rings) * ring_size; + p = (uint8_t *)p + (ring_num + type * pmd->run.num_c2s_rings) * ring_size; return (memif_ring_t *)p; } @@ -220,6 +247,29 @@ memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d) return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset); } +/* Free mbufs received by server */ +static void +memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq) +{ + uint16_t cur_tail; + uint16_t mask = (1 << mq->log2_ring_size) - 1; + memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); + + /* FIXME: improve performance */ + /* The ring->tail acts as a guard variable between Tx and Rx + * threads, so using load-acquire pairs with store-release + * in function eth_memif_rx for C2S queues. + */ + cur_tail = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE); + while (mq->last_tail != cur_tail) { + RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]); + /* Decrement refcnt and free mbuf. (current segment) */ + rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1); + rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]); + mq->last_tail++; + } +} + static int memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail, struct rte_mbuf *tail) @@ -280,7 +330,7 @@ eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) ring_size = 1 << mq->log2_ring_size; mask = ring_size - 1; - if (type == MEMIF_RING_S2M) { + if (type == MEMIF_RING_C2S) { cur_slot = mq->last_head; last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE); } else { @@ -334,8 +384,8 @@ next_slot: rte_pktmbuf_pkt_len(mbuf_head) += cp_len; memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off), - (uint8_t *)memif_get_buffer(proc_private, d0) + - src_off, cp_len); + (uint8_t *)memif_get_buffer(proc_private, d0) + src_off, + cp_len); src_off += cp_len; dst_off += cp_len; @@ -354,7 +404,7 @@ next_slot: } no_free_bufs: - if (type == MEMIF_RING_S2M) { + if (type == MEMIF_RING_C2S) { __atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE); mq->last_head = cur_slot; } else { @@ -362,8 +412,12 @@ no_free_bufs: } refill: - if (type == MEMIF_RING_M2S) { - head = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE); + if (type == MEMIF_RING_S2C) { + /* ring->head is updated by the receiver and this function + * is called in the context of receiver thread. The loads in + * the receiver do not need to synchronize with its own stores. + */ + head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED); n_slots = ring_size - head + mq->last_tail; while (n_slots--) { @@ -378,6 +432,131 @@ refill: return n_rx_pkts; } +static uint16_t +eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct memif_queue *mq = queue; + struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private; + struct pmd_process_private *proc_private = + rte_eth_devices[mq->in_port].process_private; + memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); + uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head; + uint16_t n_rx_pkts = 0; + memif_desc_t *d0; + struct rte_mbuf *mbuf, *mbuf_tail; + struct rte_mbuf *mbuf_head = NULL; + int ret; + struct rte_eth_link link; + + if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0)) + return 0; + if (unlikely(ring == NULL)) { + /* Secondary process will attempt to request regions. */ + rte_eth_link_get(mq->in_port, &link); + return 0; + } + + /* consume interrupt */ + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) { + uint64_t b; + ssize_t size __rte_unused; + size = read(mq->intr_handle.fd, &b, sizeof(b)); + } + + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + cur_slot = mq->last_tail; + /* The ring->tail acts as a guard variable between Tx and Rx + * threads, so using load-acquire pairs with store-release + * to synchronize it between threads. + */ + last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE); + if (cur_slot == last_slot) + goto refill; + n_slots = last_slot - cur_slot; + + while (n_slots && n_rx_pkts < nb_pkts) { + s0 = cur_slot & mask; + + d0 = &ring->desc[s0]; + mbuf_head = mq->buffers[s0]; + mbuf = mbuf_head; + +next_slot: + /* prefetch next descriptor */ + if (n_rx_pkts + 1 < nb_pkts) + rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]); + + mbuf->port = mq->in_port; + rte_pktmbuf_data_len(mbuf) = d0->length; + rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf); + + mq->n_bytes += rte_pktmbuf_data_len(mbuf); + + cur_slot++; + n_slots--; + if (d0->flags & MEMIF_DESC_FLAG_NEXT) { + s0 = cur_slot & mask; + d0 = &ring->desc[s0]; + mbuf_tail = mbuf; + mbuf = mq->buffers[s0]; + ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf); + if (unlikely(ret < 0)) { + MIF_LOG(ERR, "number-of-segments-overflow"); + goto refill; + } + goto next_slot; + } + + *bufs++ = mbuf_head; + n_rx_pkts++; + } + + mq->last_tail = cur_slot; + +/* Supply server with new buffers */ +refill: + /* ring->head is updated by the receiver and this function + * is called in the context of receiver thread. The loads in + * the receiver do not need to synchronize with its own stores. + */ + head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED); + n_slots = ring_size - head + mq->last_tail; + + if (n_slots < 32) + goto no_free_mbufs; + + ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots); + if (unlikely(ret < 0)) + goto no_free_mbufs; + + while (n_slots--) { + s0 = head++ & mask; + if (n_slots > 0) + rte_prefetch0(mq->buffers[head & mask]); + d0 = &ring->desc[s0]; + /* store buffer header */ + mbuf = mq->buffers[s0]; + /* populate descriptor */ + d0->length = rte_pktmbuf_data_room_size(mq->mempool) - + RTE_PKTMBUF_HEADROOM; + d0->region = 1; + d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) - + (uint8_t *)proc_private->regions[d0->region]->addr; + } +no_free_mbufs: + /* The ring->head acts as a guard variable between Tx and Rx + * threads, so using store-release pairs with load-acquire + * in function eth_memif_tx. + */ + __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE); + + mq->n_pkts += n_rx_pkts; + + return n_rx_pkts; +} + static uint16_t eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { @@ -412,14 +591,24 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) ring_size = 1 << mq->log2_ring_size; mask = ring_size - 1; - n_free = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE) - mq->last_tail; - mq->last_tail += n_free; - - if (type == MEMIF_RING_S2M) { - slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE); - n_free = ring_size - slot + mq->last_tail; + if (type == MEMIF_RING_C2S) { + /* For C2S queues ring->head is updated by the sender and + * this function is called in the context of sending thread. + * The loads in the sender do not need to synchronize with + * its own stores. Hence, the following load can be a + * relaxed load. + */ + slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED); + n_free = ring_size - slot + + __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE); } else { - slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE); + /* For S2C queues ring->tail is updated by the sender and + * this function is called in the context of sending thread. + * The loads in the sender do not need to synchronize with + * its own stores. Hence, the following load can be a + * relaxed load. + */ + slot = __atomic_load_n(&ring->tail, __ATOMIC_RELAXED); n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot; } @@ -430,7 +619,7 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) saved_slot = slot; d0 = &ring->desc[slot & mask]; dst_off = 0; - dst_len = (type == MEMIF_RING_S2M) ? + dst_len = (type == MEMIF_RING_C2S) ? pmd->run.pkt_buffer_size : d0->length; next_in_chain: @@ -445,7 +634,7 @@ next_in_chain: d0->flags |= MEMIF_DESC_FLAG_NEXT; d0 = &ring->desc[slot & mask]; dst_off = 0; - dst_len = (type == MEMIF_RING_S2M) ? + dst_len = (type == MEMIF_RING_C2S) ? pmd->run.pkt_buffer_size : d0->length; d0->flags = 0; } else { @@ -480,7 +669,7 @@ next_in_chain: } no_free_slots: - if (type == MEMIF_RING_S2M) + if (type == MEMIF_RING_C2S) __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE); else __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE); @@ -498,19 +687,178 @@ no_free_slots: return n_tx_pkts; } + +static int +memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq, + memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask, + uint16_t slot, uint16_t n_free) +{ + memif_desc_t *d0; + int used_slots = 1; + +next_in_chain: + /* store pointer to mbuf to free it later */ + mq->buffers[slot & mask] = mbuf; + /* Increment refcnt to make sure the buffer is not freed before server + * receives it. (current segment) + */ + rte_mbuf_refcnt_update(mbuf, 1); + /* populate descriptor */ + d0 = &ring->desc[slot & mask]; + d0->length = rte_pktmbuf_data_len(mbuf); + /* FIXME: get region index */ + d0->region = 1; + d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) - + (uint8_t *)proc_private->regions[d0->region]->addr; + d0->flags = 0; + + /* check if buffer is chained */ + if (rte_pktmbuf_is_contiguous(mbuf) == 0) { + if (n_free < 2) + return 0; + /* mark buffer as chained */ + d0->flags |= MEMIF_DESC_FLAG_NEXT; + /* advance mbuf */ + mbuf = mbuf->next; + /* update counters */ + used_slots++; + slot++; + n_free--; + goto next_in_chain; + } + return used_slots; +} + +static uint16_t +eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct memif_queue *mq = queue; + struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private; + struct pmd_process_private *proc_private = + rte_eth_devices[mq->in_port].process_private; + memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); + uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0; + struct rte_eth_link link; + + if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0)) + return 0; + if (unlikely(ring == NULL)) { + /* Secondary process will attempt to request regions. */ + rte_eth_link_get(mq->in_port, &link); + return 0; + } + + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + /* free mbufs received by server */ + memif_free_stored_mbufs(proc_private, mq); + + /* ring type always MEMIF_RING_C2S */ + /* For C2S queues ring->head is updated by the sender and + * this function is called in the context of sending thread. + * The loads in the sender do not need to synchronize with + * its own stores. Hence, the following load can be a + * relaxed load. + */ + slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED); + n_free = ring_size - slot + mq->last_tail; + + int used_slots; + + while (n_free && (n_tx_pkts < nb_pkts)) { + while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) { + if ((nb_pkts - n_tx_pkts) > 8) { + rte_prefetch0(*bufs + 4); + rte_prefetch0(*bufs + 5); + rte_prefetch0(*bufs + 6); + rte_prefetch0(*bufs + 7); + } + used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, + mask, slot, n_free); + if (unlikely(used_slots < 1)) + goto no_free_slots; + n_tx_pkts++; + slot += used_slots; + n_free -= used_slots; + + used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, + mask, slot, n_free); + if (unlikely(used_slots < 1)) + goto no_free_slots; + n_tx_pkts++; + slot += used_slots; + n_free -= used_slots; + + used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, + mask, slot, n_free); + if (unlikely(used_slots < 1)) + goto no_free_slots; + n_tx_pkts++; + slot += used_slots; + n_free -= used_slots; + + used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, + mask, slot, n_free); + if (unlikely(used_slots < 1)) + goto no_free_slots; + n_tx_pkts++; + slot += used_slots; + n_free -= used_slots; + } + used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++, + mask, slot, n_free); + if (unlikely(used_slots < 1)) + goto no_free_slots; + n_tx_pkts++; + slot += used_slots; + n_free -= used_slots; + } + +no_free_slots: + /* ring type always MEMIF_RING_C2S */ + /* The ring->head acts as a guard variable between Tx and Rx + * threads, so using store-release pairs with load-acquire + * in function eth_memif_rx for C2S rings. + */ + __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE); + + /* Send interrupt, if enabled. */ + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) { + uint64_t a = 1; + ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a)); + if (unlikely(size < 0)) { + MIF_LOG(WARNING, + "Failed to send interrupt. %s", strerror(errno)); + } + } + + /* increment queue counters */ + mq->n_pkts += n_tx_pkts; + + return n_tx_pkts; +} + void -memif_free_regions(struct pmd_process_private *proc_private) +memif_free_regions(struct rte_eth_dev *dev) { + struct pmd_process_private *proc_private = dev->process_private; + struct pmd_internals *pmd = dev->data->dev_private; int i; struct memif_region *r; - MIF_LOG(DEBUG, "Free memory regions"); /* regions are allocated contiguously, so it's * enough to loop until 'proc_private->regions_num' */ for (i = 0; i < proc_private->regions_num; i++) { r = proc_private->regions[i]; if (r != NULL) { + /* This is memzone */ + if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) { + r->addr = NULL; + if (r->fd > 0) + close(r->fd); + } if (r->addr != NULL) { munmap(r->addr, r->region_size); if (r->fd > 0) { @@ -525,6 +873,45 @@ memif_free_regions(struct pmd_process_private *proc_private) proc_private->regions_num = 0; } +static int +memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms, + void *arg) +{ + struct pmd_process_private *proc_private = (struct pmd_process_private *)arg; + struct memif_region *r; + + if (proc_private->regions_num < 1) { + MIF_LOG(ERR, "Missing descriptor region"); + return -1; + } + + r = proc_private->regions[proc_private->regions_num - 1]; + + if (r->addr != msl->base_va) + r = proc_private->regions[++proc_private->regions_num - 1]; + + if (r == NULL) { + r = rte_zmalloc("region", sizeof(struct memif_region), 0); + if (r == NULL) { + MIF_LOG(ERR, "Failed to alloc memif region."); + return -ENOMEM; + } + + r->addr = msl->base_va; + r->region_size = ms->len; + r->fd = rte_memseg_get_fd(ms); + if (r->fd < 0) + return -1; + r->pkt_buffer_offset = 0; + + proc_private->regions[proc_private->regions_num - 1] = r; + } else { + r->region_size += ms->len; + } + + return 0; +} + static int memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers) { @@ -546,7 +933,7 @@ memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers) } /* calculate buffer offset */ - r->pkt_buffer_offset = (pmd->run.num_s2m_rings + pmd->run.num_m2s_rings) * + r->pkt_buffer_offset = (pmd->run.num_c2s_rings + pmd->run.num_s2c_rings) * (sizeof(memif_ring_t) + sizeof(memif_desc_t) * (1 << pmd->run.log2_ring_size)); @@ -555,8 +942,8 @@ memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers) if (has_buffers == 1) r->region_size += (uint32_t)(pmd->run.pkt_buffer_size * (1 << pmd->run.log2_ring_size) * - (pmd->run.num_s2m_rings + - pmd->run.num_m2s_rings)); + (pmd->run.num_c2s_rings + + pmd->run.num_s2c_rings)); memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE); snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d", @@ -605,12 +992,29 @@ error: static int memif_regions_init(struct rte_eth_dev *dev) { + struct pmd_internals *pmd = dev->data->dev_private; int ret; - /* create one buffer region */ - ret = memif_region_init_shm(dev, /* has buffer */ 1); - if (ret < 0) - return ret; + /* + * Zero-copy exposes dpdk memory. + * Each memseg list will be represented by memif region. + * Zero-copy regions indexing: memseg list idx + 1, + * as we already have region 0 reserved for descriptors. + */ + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { + /* create region idx 0 containing descriptors */ + ret = memif_region_init_shm(dev, 0); + if (ret < 0) + return ret; + ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private); + if (ret < 0) + return ret; + } else { + /* create one memory region contaning rings and buffers */ + ret = memif_region_init_shm(dev, /* has buffers */ 1); + if (ret < 0) + return ret; + } return 0; } @@ -624,12 +1028,16 @@ memif_init_rings(struct rte_eth_dev *dev) int i, j; uint16_t slot; - for (i = 0; i < pmd->run.num_s2m_rings; i++) { - ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2M, i); + for (i = 0; i < pmd->run.num_c2s_rings; i++) { + ring = memif_get_ring(pmd, proc_private, MEMIF_RING_C2S, i); __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED); __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED); ring->cookie = MEMIF_COOKIE; ring->flags = 0; + + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) + continue; + for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) { slot = i * (1 << pmd->run.log2_ring_size) + j; ring->desc[j].region = 0; @@ -640,14 +1048,18 @@ memif_init_rings(struct rte_eth_dev *dev) } } - for (i = 0; i < pmd->run.num_m2s_rings; i++) { - ring = memif_get_ring(pmd, proc_private, MEMIF_RING_M2S, i); + for (i = 0; i < pmd->run.num_s2c_rings; i++) { + ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2C, i); __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED); __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED); ring->cookie = MEMIF_COOKIE; ring->flags = 0; + + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) + continue; + for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) { - slot = (i + pmd->run.num_s2m_rings) * + slot = (i + pmd->run.num_c2s_rings) * (1 << pmd->run.log2_ring_size) + j; ring->desc[j].region = 0; ring->desc[j].offset = @@ -658,20 +1070,20 @@ memif_init_rings(struct rte_eth_dev *dev) } } -/* called only by slave */ -static void +/* called only by client */ +static int memif_init_queues(struct rte_eth_dev *dev) { struct pmd_internals *pmd = dev->data->dev_private; struct memif_queue *mq; int i; - for (i = 0; i < pmd->run.num_s2m_rings; i++) { + for (i = 0; i < pmd->run.num_c2s_rings; i++) { mq = dev->data->tx_queues[i]; mq->log2_ring_size = pmd->run.log2_ring_size; /* queues located only in region 0 */ mq->region = 0; - mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2M, i); + mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_C2S, i); mq->last_head = 0; mq->last_tail = 0; mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK); @@ -680,14 +1092,21 @@ memif_init_queues(struct rte_eth_dev *dev) "Failed to create eventfd for tx queue %d: %s.", i, strerror(errno)); } + mq->buffers = NULL; + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { + mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) * + (1 << mq->log2_ring_size), 0); + if (mq->buffers == NULL) + return -ENOMEM; + } } - for (i = 0; i < pmd->run.num_m2s_rings; i++) { + for (i = 0; i < pmd->run.num_s2c_rings; i++) { mq = dev->data->rx_queues[i]; mq->log2_ring_size = pmd->run.log2_ring_size; /* queues located only in region 0 */ mq->region = 0; - mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_M2S, i); + mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2C, i); mq->last_head = 0; mq->last_tail = 0; mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK); @@ -696,7 +1115,15 @@ memif_init_queues(struct rte_eth_dev *dev) "Failed to create eventfd for rx queue %d: %s.", i, strerror(errno)); } + mq->buffers = NULL; + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { + mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) * + (1 << mq->log2_ring_size), 0); + if (mq->buffers == NULL) + return -ENOMEM; + } } + return 0; } int @@ -710,7 +1137,9 @@ memif_init_regions_and_queues(struct rte_eth_dev *dev) memif_init_rings(dev); - memif_init_queues(dev); + ret = memif_init_queues(dev); + if (ret < 0) + return ret; return 0; } @@ -734,15 +1163,23 @@ memif_connect(struct rte_eth_dev *dev) mr->addr = mmap(NULL, mr->region_size, PROT_READ | PROT_WRITE, MAP_SHARED, mr->fd, 0); - if (mr->addr == NULL) + if (mr->addr == MAP_FAILED) { + MIF_LOG(ERR, "mmap failed: %s\n", + strerror(errno)); return -1; + } + } + if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) { + /* close memseg file */ + close(mr->fd); + mr->fd = -1; } } } if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - for (i = 0; i < pmd->run.num_s2m_rings; i++) { - mq = (pmd->role == MEMIF_ROLE_SLAVE) ? + for (i = 0; i < pmd->run.num_c2s_rings; i++) { + mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->tx_queues[i] : dev->data->rx_queues[i]; ring = memif_get_ring_from_queue(proc_private, mq); if (ring == NULL || ring->cookie != MEMIF_COOKIE) { @@ -754,11 +1191,11 @@ memif_connect(struct rte_eth_dev *dev) mq->last_head = 0; mq->last_tail = 0; /* enable polling mode */ - if (pmd->role == MEMIF_ROLE_MASTER) + if (pmd->role == MEMIF_ROLE_SERVER) ring->flags = MEMIF_RING_FLAG_MASK_INT; } - for (i = 0; i < pmd->run.num_m2s_rings; i++) { - mq = (pmd->role == MEMIF_ROLE_SLAVE) ? + for (i = 0; i < pmd->run.num_s2c_rings; i++) { + mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->rx_queues[i] : dev->data->tx_queues[i]; ring = memif_get_ring_from_queue(proc_private, mq); if (ring == NULL || ring->cookie != MEMIF_COOKIE) { @@ -770,7 +1207,7 @@ memif_connect(struct rte_eth_dev *dev) mq->last_head = 0; mq->last_tail = 0; /* enable polling mode */ - if (pmd->role == MEMIF_ROLE_SLAVE) + if (pmd->role == MEMIF_ROLE_CLIENT) ring->flags = MEMIF_RING_FLAG_MASK_INT; } @@ -789,15 +1226,14 @@ memif_dev_start(struct rte_eth_dev *dev) int ret = 0; switch (pmd->role) { - case MEMIF_ROLE_SLAVE: - ret = memif_connect_slave(dev); + case MEMIF_ROLE_CLIENT: + ret = memif_connect_client(dev); break; - case MEMIF_ROLE_MASTER: - ret = memif_connect_master(dev); + case MEMIF_ROLE_SERVER: + ret = memif_connect_server(dev); break; default: - MIF_LOG(ERR, "%s: Unknown role: %d.", - rte_vdev_device_name(pmd->vdev), pmd->role); + MIF_LOG(ERR, "Unknown role: %d.", pmd->role); ret = -1; break; } @@ -805,7 +1241,7 @@ memif_dev_start(struct rte_eth_dev *dev) return ret; } -static void +static int memif_dev_close(struct rte_eth_dev *dev) { struct pmd_internals *pmd = dev->data->dev_private; @@ -826,6 +1262,8 @@ memif_dev_close(struct rte_eth_dev *dev) } rte_free(dev->process_private); + + return 0; } static int @@ -834,17 +1272,17 @@ memif_dev_configure(struct rte_eth_dev *dev) struct pmd_internals *pmd = dev->data->dev_private; /* - * SLAVE - TXQ - * MASTER - RXQ + * CLIENT - TXQ + * SERVER - RXQ */ - pmd->cfg.num_s2m_rings = (pmd->role == MEMIF_ROLE_SLAVE) ? + pmd->cfg.num_c2s_rings = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->nb_tx_queues : dev->data->nb_rx_queues; /* - * SLAVE - RXQ - * MASTER - TXQ + * CLIENT - RXQ + * SERVER - TXQ */ - pmd->cfg.num_m2s_rings = (pmd->role == MEMIF_ROLE_SLAVE) ? + pmd->cfg.num_s2c_rings = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->nb_rx_queues : dev->data->nb_tx_queues; return 0; @@ -862,13 +1300,12 @@ memif_tx_queue_setup(struct rte_eth_dev *dev, mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0); if (mq == NULL) { - MIF_LOG(ERR, "%s: Failed to allocate tx queue id: %u", - rte_vdev_device_name(pmd->vdev), qid); + MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid); return -ENOMEM; } mq->type = - (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S; + (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_C2S : MEMIF_RING_S2C; mq->n_pkts = 0; mq->n_bytes = 0; mq->intr_handle.fd = -1; @@ -892,12 +1329,11 @@ memif_rx_queue_setup(struct rte_eth_dev *dev, mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0); if (mq == NULL) { - MIF_LOG(ERR, "%s: Failed to allocate rx queue id: %u", - rte_vdev_device_name(pmd->vdev), qid); + MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid); return -ENOMEM; } - mq->type = (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M; + mq->type = (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_S2C : MEMIF_RING_C2S; mq->n_pkts = 0; mq->n_bytes = 0; mq->intr_handle.fd = -1; @@ -933,7 +1369,7 @@ memif_link_update(struct rte_eth_dev *dev, memif_mp_request_regions(dev); } else if (dev->data->dev_link.link_status == ETH_LINK_DOWN && proc_private->regions_num > 0) { - memif_free_regions(proc_private); + memif_free_regions(dev); } } return 0; @@ -952,8 +1388,8 @@ memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) stats->opackets = 0; stats->obytes = 0; - tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_s2m_rings : - pmd->run.num_m2s_rings; + tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_c2s_rings : + pmd->run.num_s2c_rings; nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp : RTE_ETHDEV_QUEUE_STAT_CNTRS; @@ -966,8 +1402,8 @@ memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) stats->ibytes += mq->n_bytes; } - tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings : - pmd->run.num_s2m_rings; + tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_s2c_rings : + pmd->run.num_c2s_rings; nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp : RTE_ETHDEV_QUEUE_STAT_CNTRS; @@ -989,14 +1425,14 @@ memif_stats_reset(struct rte_eth_dev *dev) int i; struct memif_queue *mq; - for (i = 0; i < pmd->run.num_s2m_rings; i++) { - mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->tx_queues[i] : + for (i = 0; i < pmd->run.num_c2s_rings; i++) { + mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->tx_queues[i] : dev->data->rx_queues[i]; mq->n_pkts = 0; mq->n_bytes = 0; } - for (i = 0; i < pmd->run.num_m2s_rings; i++) { - mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->rx_queues[i] : + for (i = 0; i < pmd->run.num_s2c_rings; i++) { + mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->rx_queues[i] : dev->data->tx_queues[i]; mq->n_pkts = 0; mq->n_bytes = 0; @@ -1054,11 +1490,6 @@ memif_create(struct rte_vdev_device *vdev, enum memif_role_t role, const unsigned int numa_node = vdev->device.numa_node; const char *name = rte_vdev_device_name(vdev); - if (flags & ETH_MEMIF_FLAG_ZERO_COPY) { - MIF_LOG(ERR, "Zero-copy slave not supported."); - return -1; - } - eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd)); if (eth_dev == NULL) { MIF_LOG(ERR, "%s: Unable to allocate device struct.", name); @@ -1082,6 +1513,9 @@ memif_create(struct rte_vdev_device *vdev, enum memif_role_t role, pmd->flags = flags; pmd->flags |= ETH_MEMIF_FLAG_DISABLED; pmd->role = role; + /* Zero-copy flag irelevant to server. */ + if (pmd->role == MEMIF_ROLE_SERVER) + pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY; ret = memif_socket_init(eth_dev, socket_filename); if (ret < 0) @@ -1093,22 +1527,29 @@ memif_create(struct rte_vdev_device *vdev, enum memif_role_t role, pmd->cfg.log2_ring_size = log2_ring_size; /* set in .dev_configure() */ - pmd->cfg.num_s2m_rings = 0; - pmd->cfg.num_m2s_rings = 0; + pmd->cfg.num_c2s_rings = 0; + pmd->cfg.num_s2c_rings = 0; pmd->cfg.pkt_buffer_size = pkt_buffer_size; + rte_spinlock_init(&pmd->cc_lock); data = eth_dev->data; data->dev_private = pmd; data->numa_node = numa_node; + data->dev_link = pmd_link; data->mac_addrs = ether_addr; + data->promiscuous = 1; + data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; eth_dev->dev_ops = &ops; eth_dev->device = &vdev->device; - eth_dev->rx_pkt_burst = eth_memif_rx; - eth_dev->tx_pkt_burst = eth_memif_tx; - - eth_dev->data->dev_flags &= RTE_ETH_DEV_CLOSE_REMOVE; + if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) { + eth_dev->rx_pkt_burst = eth_memif_rx_zc; + eth_dev->tx_pkt_burst = eth_memif_tx_zc; + } else { + eth_dev->rx_pkt_burst = eth_memif_rx; + eth_dev->tx_pkt_burst = eth_memif_tx; + } rte_eth_dev_probing_finish(eth_dev); @@ -1121,10 +1562,16 @@ memif_set_role(const char *key __rte_unused, const char *value, { enum memif_role_t *role = (enum memif_role_t *)extra_args; - if (strstr(value, "master") != NULL) { - *role = MEMIF_ROLE_MASTER; + if (strstr(value, "server") != NULL) { + *role = MEMIF_ROLE_SERVER; + } else if (strstr(value, "client") != NULL) { + *role = MEMIF_ROLE_CLIENT; + } else if (strstr(value, "master") != NULL) { + MIF_LOG(NOTICE, "Role argument \"master\" is deprecated, use \"server\""); + *role = MEMIF_ROLE_SERVER; } else if (strstr(value, "slave") != NULL) { - *role = MEMIF_ROLE_SLAVE; + MIF_LOG(NOTICE, "Role argument \"slave\" is deprecated, use \"client\""); + *role = MEMIF_ROLE_CLIENT; } else { MIF_LOG(ERR, "Unknown role: %s.", value); return -EINVAL; @@ -1138,6 +1585,10 @@ memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args) uint32_t *flags = (uint32_t *)extra_args; if (strstr(value, "yes") != NULL) { + if (!rte_mcfg_get_single_file_segments()) { + MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments."); + return -ENOTSUP; + } *flags |= ETH_MEMIF_FLAG_ZERO_COPY; } else if (strstr(value, "no") != NULL) { *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY; @@ -1233,7 +1684,23 @@ memif_set_socket_filename(const char *key __rte_unused, const char *value, const char **socket_filename = (const char **)extra_args; *socket_filename = value; - return memif_check_socket_filename(*socket_filename); + return 0; +} + +static int +memif_set_is_socket_abstract(const char *key __rte_unused, const char *value, void *extra_args) +{ + uint32_t *flags = (uint32_t *)extra_args; + + if (strstr(value, "yes") != NULL) { + *flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT; + } else if (strstr(value, "no") != NULL) { + *flags &= ~ETH_MEMIF_FLAG_SOCKET_ABSTRACT; + } else { + MIF_LOG(ERR, "Failed to parse socket-abstract param: %s.", value); + return -EINVAL; + } + return 0; } static int @@ -1263,7 +1730,7 @@ rte_pmd_memif_probe(struct rte_vdev_device *vdev) int ret = 0; struct rte_kvargs *kvlist; const char *name = rte_vdev_device_name(vdev); - enum memif_role_t role = MEMIF_ROLE_SLAVE; + enum memif_role_t role = MEMIF_ROLE_CLIENT; memif_interface_id_t id = 0; uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE; memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE; @@ -1319,6 +1786,9 @@ rte_pmd_memif_probe(struct rte_vdev_device *vdev) MIF_LOG(WARNING, "Failed to register mp action callback: %s", strerror(rte_errno)); + /* use abstract address by default */ + flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT; + kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments); /* parse parameters */ @@ -1344,6 +1814,10 @@ rte_pmd_memif_probe(struct rte_vdev_device *vdev) (void *)(&socket_filename)); if (ret < 0) goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ABSTRACT_ARG, + &memif_set_is_socket_abstract, &flags); + if (ret < 0) + goto exit; ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG, &memif_set_mac, ether_addr); if (ret < 0) @@ -1358,6 +1832,12 @@ rte_pmd_memif_probe(struct rte_vdev_device *vdev) goto exit; } + if (!(flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT)) { + ret = memif_check_socket_filename(socket_filename); + if (ret < 0) + goto exit; + } + /* create interface */ ret = memif_create(vdev, role, id, flags, socket_filename, log2_ring_size, pkt_buffer_size, secret, ether_addr); @@ -1377,9 +1857,7 @@ rte_pmd_memif_remove(struct rte_vdev_device *vdev) if (eth_dev == NULL) return 0; - rte_eth_dev_close(eth_dev->data->port_id); - - return 0; + return rte_eth_dev_close(eth_dev->data->port_id); } static struct rte_vdev_driver pmd_memif_drv = { @@ -1391,19 +1869,13 @@ RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv); RTE_PMD_REGISTER_PARAM_STRING(net_memif, ETH_MEMIF_ID_ARG "=" - ETH_MEMIF_ROLE_ARG "=master|slave" + ETH_MEMIF_ROLE_ARG "=server|client" ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=" ETH_MEMIF_RING_SIZE_ARG "=" ETH_MEMIF_SOCKET_ARG "=" + ETH_MEMIF_SOCKET_ABSTRACT_ARG "=yes|no" ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx" ETH_MEMIF_ZC_ARG "=yes|no" ETH_MEMIF_SECRET_ARG "="); -int memif_logtype; - -RTE_INIT(memif_init_log) -{ - memif_logtype = rte_log_register("pmd.net.memif"); - if (memif_logtype >= 0) - rte_log_set_level(memif_logtype, RTE_LOG_NOTICE); -} +RTE_LOG_REGISTER(memif_logtype, pmd.net.memif, NOTICE);