c12960c8228e5e8bbda5945fb4ecdc500615f59a
[dpdk.git] / drivers / net / liquidio / lio_rxtx.c
1 /*
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2017 Cavium, Inc.. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Cavium, Inc. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <rte_ethdev.h>
35 #include <rte_cycles.h>
36 #include <rte_malloc.h>
37
38 #include "lio_logs.h"
39 #include "lio_struct.h"
40 #include "lio_ethdev.h"
41 #include "lio_rxtx.h"
42
43 #define LIO_MAX_SG 12
44 /* Flush iq if available tx_desc fall below LIO_FLUSH_WM */
45 #define LIO_FLUSH_WM(_iq) ((_iq)->max_count / 2)
46 #define LIO_PKT_IN_DONE_CNT_MASK 0x00000000FFFFFFFFULL
47
48 static void
49 lio_droq_compute_max_packet_bufs(struct lio_droq *droq)
50 {
51         uint32_t count = 0;
52
53         do {
54                 count += droq->buffer_size;
55         } while (count < LIO_MAX_RX_PKTLEN);
56 }
57
58 static void
59 lio_droq_reset_indices(struct lio_droq *droq)
60 {
61         droq->read_idx  = 0;
62         droq->write_idx = 0;
63         droq->refill_idx = 0;
64         droq->refill_count = 0;
65         rte_atomic64_set(&droq->pkts_pending, 0);
66 }
67
68 static void
69 lio_droq_destroy_ring_buffers(struct lio_droq *droq)
70 {
71         uint32_t i;
72
73         for (i = 0; i < droq->max_count; i++) {
74                 if (droq->recv_buf_list[i].buffer) {
75                         rte_pktmbuf_free((struct rte_mbuf *)
76                                          droq->recv_buf_list[i].buffer);
77                         droq->recv_buf_list[i].buffer = NULL;
78                 }
79         }
80
81         lio_droq_reset_indices(droq);
82 }
83
84 static void *
85 lio_recv_buffer_alloc(struct lio_device *lio_dev, int q_no)
86 {
87         struct lio_droq *droq = lio_dev->droq[q_no];
88         struct rte_mempool *mpool = droq->mpool;
89         struct rte_mbuf *m;
90
91         m = rte_pktmbuf_alloc(mpool);
92         if (m == NULL) {
93                 lio_dev_err(lio_dev, "Cannot allocate\n");
94                 return NULL;
95         }
96
97         rte_mbuf_refcnt_set(m, 1);
98         m->next = NULL;
99         m->data_off = RTE_PKTMBUF_HEADROOM;
100         m->nb_segs = 1;
101         m->pool = mpool;
102
103         return m;
104 }
105
106 static int
107 lio_droq_setup_ring_buffers(struct lio_device *lio_dev,
108                             struct lio_droq *droq)
109 {
110         struct lio_droq_desc *desc_ring = droq->desc_ring;
111         uint32_t i;
112         void *buf;
113
114         for (i = 0; i < droq->max_count; i++) {
115                 buf = lio_recv_buffer_alloc(lio_dev, droq->q_no);
116                 if (buf == NULL) {
117                         lio_dev_err(lio_dev, "buffer alloc failed\n");
118                         lio_droq_destroy_ring_buffers(droq);
119                         return -ENOMEM;
120                 }
121
122                 droq->recv_buf_list[i].buffer = buf;
123                 droq->info_list[i].length = 0;
124
125                 /* map ring buffers into memory */
126                 desc_ring[i].info_ptr = lio_map_ring_info(droq, i);
127                 desc_ring[i].buffer_ptr =
128                         lio_map_ring(droq->recv_buf_list[i].buffer);
129         }
130
131         lio_droq_reset_indices(droq);
132
133         lio_droq_compute_max_packet_bufs(droq);
134
135         return 0;
136 }
137
138 static void
139 lio_dma_zone_free(struct lio_device *lio_dev, const struct rte_memzone *mz)
140 {
141         const struct rte_memzone *mz_tmp;
142         int ret = 0;
143
144         if (mz == NULL) {
145                 lio_dev_err(lio_dev, "Memzone NULL\n");
146                 return;
147         }
148
149         mz_tmp = rte_memzone_lookup(mz->name);
150         if (mz_tmp == NULL) {
151                 lio_dev_err(lio_dev, "Memzone %s Not Found\n", mz->name);
152                 return;
153         }
154
155         ret = rte_memzone_free(mz);
156         if (ret)
157                 lio_dev_err(lio_dev, "Memzone free Failed ret %d\n", ret);
158 }
159
160 /**
161  *  Frees the space for descriptor ring for the droq.
162  *
163  *  @param lio_dev      - pointer to the lio device structure
164  *  @param q_no         - droq no.
165  */
166 static void
167 lio_delete_droq(struct lio_device *lio_dev, uint32_t q_no)
168 {
169         struct lio_droq *droq = lio_dev->droq[q_no];
170
171         lio_dev_dbg(lio_dev, "OQ[%d]\n", q_no);
172
173         lio_droq_destroy_ring_buffers(droq);
174         rte_free(droq->recv_buf_list);
175         droq->recv_buf_list = NULL;
176         lio_dma_zone_free(lio_dev, droq->info_mz);
177         lio_dma_zone_free(lio_dev, droq->desc_ring_mz);
178
179         memset(droq, 0, LIO_DROQ_SIZE);
180 }
181
182 static void *
183 lio_alloc_info_buffer(struct lio_device *lio_dev,
184                       struct lio_droq *droq, unsigned int socket_id)
185 {
186         droq->info_mz = rte_eth_dma_zone_reserve(lio_dev->eth_dev,
187                                                  "info_list", droq->q_no,
188                                                  (droq->max_count *
189                                                         LIO_DROQ_INFO_SIZE),
190                                                  RTE_CACHE_LINE_SIZE,
191                                                  socket_id);
192
193         if (droq->info_mz == NULL)
194                 return NULL;
195
196         droq->info_list_dma = droq->info_mz->phys_addr;
197         droq->info_alloc_size = droq->info_mz->len;
198         droq->info_base_addr = (size_t)droq->info_mz->addr;
199
200         return droq->info_mz->addr;
201 }
202
203 /**
204  *  Allocates space for the descriptor ring for the droq and
205  *  sets the base addr, num desc etc in Octeon registers.
206  *
207  * @param lio_dev       - pointer to the lio device structure
208  * @param q_no          - droq no.
209  * @param app_ctx       - pointer to application context
210  * @return Success: 0   Failure: -1
211  */
212 static int
213 lio_init_droq(struct lio_device *lio_dev, uint32_t q_no,
214               uint32_t num_descs, uint32_t desc_size,
215               struct rte_mempool *mpool, unsigned int socket_id)
216 {
217         uint32_t c_refill_threshold;
218         uint32_t desc_ring_size;
219         struct lio_droq *droq;
220
221         lio_dev_dbg(lio_dev, "OQ[%d]\n", q_no);
222
223         droq = lio_dev->droq[q_no];
224         droq->lio_dev = lio_dev;
225         droq->q_no = q_no;
226         droq->mpool = mpool;
227
228         c_refill_threshold = LIO_OQ_REFILL_THRESHOLD_CFG(lio_dev);
229
230         droq->max_count = num_descs;
231         droq->buffer_size = desc_size;
232
233         desc_ring_size = droq->max_count * LIO_DROQ_DESC_SIZE;
234         droq->desc_ring_mz = rte_eth_dma_zone_reserve(lio_dev->eth_dev,
235                                                       "droq", q_no,
236                                                       desc_ring_size,
237                                                       RTE_CACHE_LINE_SIZE,
238                                                       socket_id);
239
240         if (droq->desc_ring_mz == NULL) {
241                 lio_dev_err(lio_dev,
242                             "Output queue %d ring alloc failed\n", q_no);
243                 return -1;
244         }
245
246         droq->desc_ring_dma = droq->desc_ring_mz->phys_addr;
247         droq->desc_ring = (struct lio_droq_desc *)droq->desc_ring_mz->addr;
248
249         lio_dev_dbg(lio_dev, "droq[%d]: desc_ring: virt: 0x%p, dma: %lx\n",
250                     q_no, droq->desc_ring, (unsigned long)droq->desc_ring_dma);
251         lio_dev_dbg(lio_dev, "droq[%d]: num_desc: %d\n", q_no,
252                     droq->max_count);
253
254         droq->info_list = lio_alloc_info_buffer(lio_dev, droq, socket_id);
255         if (droq->info_list == NULL) {
256                 lio_dev_err(lio_dev, "Cannot allocate memory for info list.\n");
257                 goto init_droq_fail;
258         }
259
260         droq->recv_buf_list = rte_zmalloc_socket("recv_buf_list",
261                                                  (droq->max_count *
262                                                         LIO_DROQ_RECVBUF_SIZE),
263                                                  RTE_CACHE_LINE_SIZE,
264                                                  socket_id);
265         if (droq->recv_buf_list == NULL) {
266                 lio_dev_err(lio_dev,
267                             "Output queue recv buf list alloc failed\n");
268                 goto init_droq_fail;
269         }
270
271         if (lio_droq_setup_ring_buffers(lio_dev, droq))
272                 goto init_droq_fail;
273
274         droq->refill_threshold = c_refill_threshold;
275
276         rte_spinlock_init(&droq->lock);
277
278         lio_dev->fn_list.setup_oq_regs(lio_dev, q_no);
279
280         lio_dev->io_qmask.oq |= (1ULL << q_no);
281
282         return 0;
283
284 init_droq_fail:
285         lio_delete_droq(lio_dev, q_no);
286
287         return -1;
288 }
289
290 int
291 lio_setup_droq(struct lio_device *lio_dev, int oq_no, int num_descs,
292                int desc_size, struct rte_mempool *mpool, unsigned int socket_id)
293 {
294         struct lio_droq *droq;
295
296         PMD_INIT_FUNC_TRACE();
297
298         if (lio_dev->droq[oq_no]) {
299                 lio_dev_dbg(lio_dev, "Droq %d in use\n", oq_no);
300                 return 0;
301         }
302
303         /* Allocate the DS for the new droq. */
304         droq = rte_zmalloc_socket("ethdev RX queue", sizeof(*droq),
305                                   RTE_CACHE_LINE_SIZE, socket_id);
306         if (droq == NULL)
307                 return -ENOMEM;
308
309         lio_dev->droq[oq_no] = droq;
310
311         /* Initialize the Droq */
312         if (lio_init_droq(lio_dev, oq_no, num_descs, desc_size, mpool,
313                           socket_id)) {
314                 lio_dev_err(lio_dev, "Droq[%u] Initialization Failed\n", oq_no);
315                 rte_free(lio_dev->droq[oq_no]);
316                 lio_dev->droq[oq_no] = NULL;
317                 return -ENOMEM;
318         }
319
320         lio_dev->num_oqs++;
321
322         lio_dev_dbg(lio_dev, "Total number of OQ: %d\n", lio_dev->num_oqs);
323
324         /* Send credit for octeon output queues. credits are always
325          * sent after the output queue is enabled.
326          */
327         rte_write32(lio_dev->droq[oq_no]->max_count,
328                     lio_dev->droq[oq_no]->pkts_credit_reg);
329         rte_wmb();
330
331         return 0;
332 }
333
334 static inline uint32_t
335 lio_droq_get_bufcount(uint32_t buf_size, uint32_t total_len)
336 {
337         uint32_t buf_cnt = 0;
338
339         while (total_len > (buf_size * buf_cnt))
340                 buf_cnt++;
341
342         return buf_cnt;
343 }
344
345 /* If we were not able to refill all buffers, try to move around
346  * the buffers that were not dispatched.
347  */
348 static inline uint32_t
349 lio_droq_refill_pullup_descs(struct lio_droq *droq,
350                              struct lio_droq_desc *desc_ring)
351 {
352         uint32_t refill_index = droq->refill_idx;
353         uint32_t desc_refilled = 0;
354
355         while (refill_index != droq->read_idx) {
356                 if (droq->recv_buf_list[refill_index].buffer) {
357                         droq->recv_buf_list[droq->refill_idx].buffer =
358                                 droq->recv_buf_list[refill_index].buffer;
359                         desc_ring[droq->refill_idx].buffer_ptr =
360                                 desc_ring[refill_index].buffer_ptr;
361                         droq->recv_buf_list[refill_index].buffer = NULL;
362                         desc_ring[refill_index].buffer_ptr = 0;
363                         do {
364                                 droq->refill_idx = lio_incr_index(
365                                                         droq->refill_idx, 1,
366                                                         droq->max_count);
367                                 desc_refilled++;
368                                 droq->refill_count--;
369                         } while (droq->recv_buf_list[droq->refill_idx].buffer);
370                 }
371                 refill_index = lio_incr_index(refill_index, 1,
372                                               droq->max_count);
373         }       /* while */
374
375         return desc_refilled;
376 }
377
378 /* lio_droq_refill
379  *
380  * @param lio_dev       - pointer to the lio device structure
381  * @param droq          - droq in which descriptors require new buffers.
382  *
383  * Description:
384  *  Called during normal DROQ processing in interrupt mode or by the poll
385  *  thread to refill the descriptors from which buffers were dispatched
386  *  to upper layers. Attempts to allocate new buffers. If that fails, moves
387  *  up buffers (that were not dispatched) to form a contiguous ring.
388  *
389  * Returns:
390  *  No of descriptors refilled.
391  *
392  * Locks:
393  * This routine is called with droq->lock held.
394  */
395 static uint32_t
396 lio_droq_refill(struct lio_device *lio_dev, struct lio_droq *droq)
397 {
398         struct lio_droq_desc *desc_ring;
399         uint32_t desc_refilled = 0;
400         void *buf = NULL;
401
402         desc_ring = droq->desc_ring;
403
404         while (droq->refill_count && (desc_refilled < droq->max_count)) {
405                 /* If a valid buffer exists (happens if there is no dispatch),
406                  * reuse the buffer, else allocate.
407                  */
408                 if (droq->recv_buf_list[droq->refill_idx].buffer == NULL) {
409                         buf = lio_recv_buffer_alloc(lio_dev, droq->q_no);
410                         /* If a buffer could not be allocated, no point in
411                          * continuing
412                          */
413                         if (buf == NULL)
414                                 break;
415
416                         droq->recv_buf_list[droq->refill_idx].buffer = buf;
417                 }
418
419                 desc_ring[droq->refill_idx].buffer_ptr =
420                     lio_map_ring(droq->recv_buf_list[droq->refill_idx].buffer);
421                 /* Reset any previous values in the length field. */
422                 droq->info_list[droq->refill_idx].length = 0;
423
424                 droq->refill_idx = lio_incr_index(droq->refill_idx, 1,
425                                                   droq->max_count);
426                 desc_refilled++;
427                 droq->refill_count--;
428         }
429
430         if (droq->refill_count)
431                 desc_refilled += lio_droq_refill_pullup_descs(droq, desc_ring);
432
433         /* if droq->refill_count
434          * The refill count would not change in pass two. We only moved buffers
435          * to close the gap in the ring, but we would still have the same no. of
436          * buffers to refill.
437          */
438         return desc_refilled;
439 }
440
441 static int
442 lio_droq_fast_process_packet(struct lio_device *lio_dev,
443                              struct lio_droq *droq,
444                              struct rte_mbuf **rx_pkts)
445 {
446         struct rte_mbuf *nicbuf = NULL;
447         struct lio_droq_info *info;
448         uint32_t total_len = 0;
449         int data_total_len = 0;
450         uint32_t pkt_len = 0;
451         union octeon_rh *rh;
452         int data_pkts = 0;
453
454         info = &droq->info_list[droq->read_idx];
455         lio_swap_8B_data((uint64_t *)info, 2);
456
457         if (!info->length)
458                 return -1;
459
460         /* Len of resp hdr in included in the received data len. */
461         info->length -= OCTEON_RH_SIZE;
462         rh = &info->rh;
463
464         total_len += (uint32_t)info->length;
465
466         if (lio_opcode_slow_path(rh)) {
467                 uint32_t buf_cnt;
468
469                 buf_cnt = lio_droq_get_bufcount(droq->buffer_size,
470                                                 (uint32_t)info->length);
471                 droq->read_idx = lio_incr_index(droq->read_idx, buf_cnt,
472                                                 droq->max_count);
473                 droq->refill_count += buf_cnt;
474         } else {
475                 if (info->length <= droq->buffer_size) {
476                         if (rh->r_dh.has_hash)
477                                 pkt_len = (uint32_t)(info->length - 8);
478                         else
479                                 pkt_len = (uint32_t)info->length;
480
481                         nicbuf = droq->recv_buf_list[droq->read_idx].buffer;
482                         droq->recv_buf_list[droq->read_idx].buffer = NULL;
483                         droq->read_idx = lio_incr_index(
484                                                 droq->read_idx, 1,
485                                                 droq->max_count);
486                         droq->refill_count++;
487
488                         if (likely(nicbuf != NULL)) {
489                                 nicbuf->data_off = RTE_PKTMBUF_HEADROOM;
490                                 nicbuf->nb_segs = 1;
491                                 nicbuf->next = NULL;
492                                 /* We don't have a way to pass flags yet */
493                                 nicbuf->ol_flags = 0;
494                                 if (rh->r_dh.has_hash) {
495                                         uint64_t *hash_ptr;
496
497                                         nicbuf->ol_flags |= PKT_RX_RSS_HASH;
498                                         hash_ptr = rte_pktmbuf_mtod(nicbuf,
499                                                                     uint64_t *);
500                                         lio_swap_8B_data(hash_ptr, 1);
501                                         nicbuf->hash.rss = (uint32_t)*hash_ptr;
502                                         nicbuf->data_off += 8;
503                                 }
504
505                                 nicbuf->pkt_len = pkt_len;
506                                 nicbuf->data_len = pkt_len;
507                                 nicbuf->port = lio_dev->port_id;
508                                 /* Store the mbuf */
509                                 rx_pkts[data_pkts++] = nicbuf;
510                                 data_total_len += pkt_len;
511                         }
512
513                         /* Prefetch buffer pointers when on a cache line
514                          * boundary
515                          */
516                         if ((droq->read_idx & 3) == 0) {
517                                 rte_prefetch0(
518                                     &droq->recv_buf_list[droq->read_idx]);
519                                 rte_prefetch0(
520                                     &droq->info_list[droq->read_idx]);
521                         }
522                 } else {
523                         struct rte_mbuf *first_buf = NULL;
524                         struct rte_mbuf *last_buf = NULL;
525
526                         while (pkt_len < info->length) {
527                                 int cpy_len = 0;
528
529                                 cpy_len = ((pkt_len + droq->buffer_size) >
530                                                 info->length)
531                                                 ? ((uint32_t)info->length -
532                                                         pkt_len)
533                                                 : droq->buffer_size;
534
535                                 nicbuf =
536                                     droq->recv_buf_list[droq->read_idx].buffer;
537                                 droq->recv_buf_list[droq->read_idx].buffer =
538                                     NULL;
539
540                                 if (likely(nicbuf != NULL)) {
541                                         /* Note the first seg */
542                                         if (!pkt_len)
543                                                 first_buf = nicbuf;
544
545                                         nicbuf->data_off = RTE_PKTMBUF_HEADROOM;
546                                         nicbuf->nb_segs = 1;
547                                         nicbuf->next = NULL;
548                                         nicbuf->port = lio_dev->port_id;
549                                         /* We don't have a way to pass
550                                          * flags yet
551                                          */
552                                         nicbuf->ol_flags = 0;
553                                         if ((!pkt_len) && (rh->r_dh.has_hash)) {
554                                                 uint64_t *hash_ptr;
555
556                                                 nicbuf->ol_flags |=
557                                                     PKT_RX_RSS_HASH;
558                                                 hash_ptr = rte_pktmbuf_mtod(
559                                                     nicbuf, uint64_t *);
560                                                 lio_swap_8B_data(hash_ptr, 1);
561                                                 nicbuf->hash.rss =
562                                                     (uint32_t)*hash_ptr;
563                                                 nicbuf->data_off += 8;
564                                                 nicbuf->pkt_len = cpy_len - 8;
565                                                 nicbuf->data_len = cpy_len - 8;
566                                         } else {
567                                                 nicbuf->pkt_len = cpy_len;
568                                                 nicbuf->data_len = cpy_len;
569                                         }
570
571                                         if (pkt_len)
572                                                 first_buf->nb_segs++;
573
574                                         if (last_buf)
575                                                 last_buf->next = nicbuf;
576
577                                         last_buf = nicbuf;
578                                 } else {
579                                         PMD_RX_LOG(lio_dev, ERR, "no buf\n");
580                                 }
581
582                                 pkt_len += cpy_len;
583                                 droq->read_idx = lio_incr_index(
584                                                         droq->read_idx,
585                                                         1, droq->max_count);
586                                 droq->refill_count++;
587
588                                 /* Prefetch buffer pointers when on a
589                                  * cache line boundary
590                                  */
591                                 if ((droq->read_idx & 3) == 0) {
592                                         rte_prefetch0(&droq->recv_buf_list
593                                                               [droq->read_idx]);
594
595                                         rte_prefetch0(
596                                             &droq->info_list[droq->read_idx]);
597                                 }
598                         }
599                         rx_pkts[data_pkts++] = first_buf;
600                         if (rh->r_dh.has_hash)
601                                 data_total_len += (pkt_len - 8);
602                         else
603                                 data_total_len += pkt_len;
604                 }
605
606                 /* Inform upper layer about packet checksum verification */
607                 struct rte_mbuf *m = rx_pkts[data_pkts - 1];
608
609                 if (rh->r_dh.csum_verified & LIO_IP_CSUM_VERIFIED)
610                         m->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
611
612                 if (rh->r_dh.csum_verified & LIO_L4_CSUM_VERIFIED)
613                         m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
614         }
615
616         if (droq->refill_count >= droq->refill_threshold) {
617                 int desc_refilled = lio_droq_refill(lio_dev, droq);
618
619                 /* Flush the droq descriptor data to memory to be sure
620                  * that when we update the credits the data in memory is
621                  * accurate.
622                  */
623                 rte_wmb();
624                 rte_write32(desc_refilled, droq->pkts_credit_reg);
625                 /* make sure mmio write completes */
626                 rte_wmb();
627         }
628
629         info->length = 0;
630         info->rh.rh64 = 0;
631
632         return data_pkts;
633 }
634
635 static uint32_t
636 lio_droq_fast_process_packets(struct lio_device *lio_dev,
637                               struct lio_droq *droq,
638                               struct rte_mbuf **rx_pkts,
639                               uint32_t pkts_to_process)
640 {
641         int ret, data_pkts = 0;
642         uint32_t pkt;
643
644         for (pkt = 0; pkt < pkts_to_process; pkt++) {
645                 ret = lio_droq_fast_process_packet(lio_dev, droq,
646                                                    &rx_pkts[data_pkts]);
647                 if (ret < 0) {
648                         lio_dev_err(lio_dev, "Port[%d] DROQ[%d] idx: %d len:0, pkt_cnt: %d\n",
649                                     lio_dev->port_id, droq->q_no,
650                                     droq->read_idx, pkts_to_process);
651                         break;
652                 }
653                 data_pkts += ret;
654         }
655
656         rte_atomic64_sub(&droq->pkts_pending, pkt);
657
658         return data_pkts;
659 }
660
661 static inline uint32_t
662 lio_droq_check_hw_for_pkts(struct lio_droq *droq)
663 {
664         uint32_t last_count;
665         uint32_t pkt_count;
666
667         pkt_count = rte_read32(droq->pkts_sent_reg);
668
669         last_count = pkt_count - droq->pkt_count;
670         droq->pkt_count = pkt_count;
671
672         if (last_count)
673                 rte_atomic64_add(&droq->pkts_pending, last_count);
674
675         return last_count;
676 }
677
678 uint16_t
679 lio_dev_recv_pkts(void *rx_queue,
680                   struct rte_mbuf **rx_pkts,
681                   uint16_t budget)
682 {
683         struct lio_droq *droq = rx_queue;
684         struct lio_device *lio_dev = droq->lio_dev;
685         uint32_t pkts_processed = 0;
686         uint32_t pkt_count = 0;
687
688         lio_droq_check_hw_for_pkts(droq);
689
690         pkt_count = rte_atomic64_read(&droq->pkts_pending);
691         if (!pkt_count)
692                 return 0;
693
694         if (pkt_count > budget)
695                 pkt_count = budget;
696
697         /* Grab the lock */
698         rte_spinlock_lock(&droq->lock);
699         pkts_processed = lio_droq_fast_process_packets(lio_dev,
700                                                        droq, rx_pkts,
701                                                        pkt_count);
702
703         if (droq->pkt_count) {
704                 rte_write32(droq->pkt_count, droq->pkts_sent_reg);
705                 droq->pkt_count = 0;
706         }
707
708         /* Release the spin lock */
709         rte_spinlock_unlock(&droq->lock);
710
711         return pkts_processed;
712 }
713
714 void
715 lio_delete_droq_queue(struct lio_device *lio_dev,
716                       int oq_no)
717 {
718         lio_delete_droq(lio_dev, oq_no);
719         lio_dev->num_oqs--;
720         rte_free(lio_dev->droq[oq_no]);
721         lio_dev->droq[oq_no] = NULL;
722 }
723
724 /**
725  *  lio_init_instr_queue()
726  *  @param lio_dev      - pointer to the lio device structure.
727  *  @param txpciq       - queue to be initialized.
728  *
729  *  Called at driver init time for each input queue. iq_conf has the
730  *  configuration parameters for the queue.
731  *
732  *  @return  Success: 0 Failure: -1
733  */
734 static int
735 lio_init_instr_queue(struct lio_device *lio_dev,
736                      union octeon_txpciq txpciq,
737                      uint32_t num_descs, unsigned int socket_id)
738 {
739         uint32_t iq_no = (uint32_t)txpciq.s.q_no;
740         struct lio_instr_queue *iq;
741         uint32_t instr_type;
742         uint32_t q_size;
743
744         instr_type = LIO_IQ_INSTR_TYPE(lio_dev);
745
746         q_size = instr_type * num_descs;
747         iq = lio_dev->instr_queue[iq_no];
748         iq->iq_mz = rte_eth_dma_zone_reserve(lio_dev->eth_dev,
749                                              "instr_queue", iq_no, q_size,
750                                              RTE_CACHE_LINE_SIZE,
751                                              socket_id);
752         if (iq->iq_mz == NULL) {
753                 lio_dev_err(lio_dev, "Cannot allocate memory for instr queue %d\n",
754                             iq_no);
755                 return -1;
756         }
757
758         iq->base_addr_dma = iq->iq_mz->phys_addr;
759         iq->base_addr = (uint8_t *)iq->iq_mz->addr;
760
761         iq->max_count = num_descs;
762
763         /* Initialize a list to holds requests that have been posted to Octeon
764          * but has yet to be fetched by octeon
765          */
766         iq->request_list = rte_zmalloc_socket("request_list",
767                                               sizeof(*iq->request_list) *
768                                                         num_descs,
769                                               RTE_CACHE_LINE_SIZE,
770                                               socket_id);
771         if (iq->request_list == NULL) {
772                 lio_dev_err(lio_dev, "Alloc failed for IQ[%d] nr free list\n",
773                             iq_no);
774                 lio_dma_zone_free(lio_dev, iq->iq_mz);
775                 return -1;
776         }
777
778         lio_dev_dbg(lio_dev, "IQ[%d]: base: %p basedma: %lx count: %d\n",
779                     iq_no, iq->base_addr, (unsigned long)iq->base_addr_dma,
780                     iq->max_count);
781
782         iq->lio_dev = lio_dev;
783         iq->txpciq.txpciq64 = txpciq.txpciq64;
784         iq->fill_cnt = 0;
785         iq->host_write_index = 0;
786         iq->lio_read_index = 0;
787         iq->flush_index = 0;
788
789         rte_atomic64_set(&iq->instr_pending, 0);
790
791         /* Initialize the spinlock for this instruction queue */
792         rte_spinlock_init(&iq->lock);
793         rte_spinlock_init(&iq->post_lock);
794
795         rte_atomic64_clear(&iq->iq_flush_running);
796
797         lio_dev->io_qmask.iq |= (1ULL << iq_no);
798
799         /* Set the 32B/64B mode for each input queue */
800         lio_dev->io_qmask.iq64B |= ((instr_type == 64) << iq_no);
801         iq->iqcmd_64B = (instr_type == 64);
802
803         lio_dev->fn_list.setup_iq_regs(lio_dev, iq_no);
804
805         return 0;
806 }
807
808 int
809 lio_setup_instr_queue0(struct lio_device *lio_dev)
810 {
811         union octeon_txpciq txpciq;
812         uint32_t num_descs = 0;
813         uint32_t iq_no = 0;
814
815         num_descs = LIO_NUM_DEF_TX_DESCS_CFG(lio_dev);
816
817         lio_dev->num_iqs = 0;
818
819         lio_dev->instr_queue[0] = rte_zmalloc(NULL,
820                                         sizeof(struct lio_instr_queue), 0);
821         if (lio_dev->instr_queue[0] == NULL)
822                 return -ENOMEM;
823
824         lio_dev->instr_queue[0]->q_index = 0;
825         lio_dev->instr_queue[0]->app_ctx = (void *)(size_t)0;
826         txpciq.txpciq64 = 0;
827         txpciq.s.q_no = iq_no;
828         txpciq.s.pkind = lio_dev->pfvf_hsword.pkind;
829         txpciq.s.use_qpg = 0;
830         txpciq.s.qpg = 0;
831         if (lio_init_instr_queue(lio_dev, txpciq, num_descs, SOCKET_ID_ANY)) {
832                 rte_free(lio_dev->instr_queue[0]);
833                 lio_dev->instr_queue[0] = NULL;
834                 return -1;
835         }
836
837         lio_dev->num_iqs++;
838
839         return 0;
840 }
841
842 /**
843  *  lio_delete_instr_queue()
844  *  @param lio_dev      - pointer to the lio device structure.
845  *  @param iq_no        - queue to be deleted.
846  *
847  *  Called at driver unload time for each input queue. Deletes all
848  *  allocated resources for the input queue.
849  */
850 static void
851 lio_delete_instr_queue(struct lio_device *lio_dev, uint32_t iq_no)
852 {
853         struct lio_instr_queue *iq = lio_dev->instr_queue[iq_no];
854
855         rte_free(iq->request_list);
856         iq->request_list = NULL;
857         lio_dma_zone_free(lio_dev, iq->iq_mz);
858 }
859
860 void
861 lio_free_instr_queue0(struct lio_device *lio_dev)
862 {
863         lio_delete_instr_queue(lio_dev, 0);
864         rte_free(lio_dev->instr_queue[0]);
865         lio_dev->instr_queue[0] = NULL;
866         lio_dev->num_iqs--;
867 }
868
869 /* Return 0 on success, -1 on failure */
870 int
871 lio_setup_iq(struct lio_device *lio_dev, int q_index,
872              union octeon_txpciq txpciq, uint32_t num_descs, void *app_ctx,
873              unsigned int socket_id)
874 {
875         uint32_t iq_no = (uint32_t)txpciq.s.q_no;
876
877         if (lio_dev->instr_queue[iq_no]) {
878                 lio_dev_dbg(lio_dev, "IQ is in use. Cannot create the IQ: %d again\n",
879                             iq_no);
880                 lio_dev->instr_queue[iq_no]->txpciq.txpciq64 = txpciq.txpciq64;
881                 lio_dev->instr_queue[iq_no]->app_ctx = app_ctx;
882                 return 0;
883         }
884
885         lio_dev->instr_queue[iq_no] = rte_zmalloc_socket("ethdev TX queue",
886                                                 sizeof(struct lio_instr_queue),
887                                                 RTE_CACHE_LINE_SIZE, socket_id);
888         if (lio_dev->instr_queue[iq_no] == NULL)
889                 return -1;
890
891         lio_dev->instr_queue[iq_no]->q_index = q_index;
892         lio_dev->instr_queue[iq_no]->app_ctx = app_ctx;
893
894         if (lio_init_instr_queue(lio_dev, txpciq, num_descs, socket_id))
895                 goto release_lio_iq;
896
897         lio_dev->num_iqs++;
898         if (lio_dev->fn_list.enable_io_queues(lio_dev))
899                 goto delete_lio_iq;
900
901         return 0;
902
903 delete_lio_iq:
904         lio_delete_instr_queue(lio_dev, iq_no);
905         lio_dev->num_iqs--;
906 release_lio_iq:
907         rte_free(lio_dev->instr_queue[iq_no]);
908         lio_dev->instr_queue[iq_no] = NULL;
909
910         return -1;
911 }
912
913 static inline void
914 lio_ring_doorbell(struct lio_device *lio_dev,
915                   struct lio_instr_queue *iq)
916 {
917         if (rte_atomic64_read(&lio_dev->status) == LIO_DEV_RUNNING) {
918                 rte_write32(iq->fill_cnt, iq->doorbell_reg);
919                 /* make sure doorbell write goes through */
920                 rte_wmb();
921                 iq->fill_cnt = 0;
922         }
923 }
924
925 static inline void
926 copy_cmd_into_iq(struct lio_instr_queue *iq, uint8_t *cmd)
927 {
928         uint8_t *iqptr, cmdsize;
929
930         cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
931         iqptr = iq->base_addr + (cmdsize * iq->host_write_index);
932
933         rte_memcpy(iqptr, cmd, cmdsize);
934 }
935
936 static inline struct lio_iq_post_status
937 post_command2(struct lio_instr_queue *iq, uint8_t *cmd)
938 {
939         struct lio_iq_post_status st;
940
941         st.status = LIO_IQ_SEND_OK;
942
943         /* This ensures that the read index does not wrap around to the same
944          * position if queue gets full before Octeon could fetch any instr.
945          */
946         if (rte_atomic64_read(&iq->instr_pending) >=
947                         (int32_t)(iq->max_count - 1)) {
948                 st.status = LIO_IQ_SEND_FAILED;
949                 st.index = -1;
950                 return st;
951         }
952
953         if (rte_atomic64_read(&iq->instr_pending) >=
954                         (int32_t)(iq->max_count - 2))
955                 st.status = LIO_IQ_SEND_STOP;
956
957         copy_cmd_into_iq(iq, cmd);
958
959         /* "index" is returned, host_write_index is modified. */
960         st.index = iq->host_write_index;
961         iq->host_write_index = lio_incr_index(iq->host_write_index, 1,
962                                               iq->max_count);
963         iq->fill_cnt++;
964
965         /* Flush the command into memory. We need to be sure the data is in
966          * memory before indicating that the instruction is pending.
967          */
968         rte_wmb();
969
970         rte_atomic64_inc(&iq->instr_pending);
971
972         return st;
973 }
974
975 static inline void
976 lio_add_to_request_list(struct lio_instr_queue *iq,
977                         int idx, void *buf, int reqtype)
978 {
979         iq->request_list[idx].buf = buf;
980         iq->request_list[idx].reqtype = reqtype;
981 }
982
983 static inline void
984 lio_free_netsgbuf(void *buf)
985 {
986         struct lio_buf_free_info *finfo = buf;
987         struct lio_device *lio_dev = finfo->lio_dev;
988         struct rte_mbuf *m = finfo->mbuf;
989         struct lio_gather *g = finfo->g;
990         uint8_t iq = finfo->iq_no;
991
992         /* This will take care of multiple segments also */
993         rte_pktmbuf_free(m);
994
995         rte_spinlock_lock(&lio_dev->glist_lock[iq]);
996         STAILQ_INSERT_TAIL(&lio_dev->glist_head[iq], &g->list, entries);
997         rte_spinlock_unlock(&lio_dev->glist_lock[iq]);
998         rte_free(finfo);
999 }
1000
1001 /* Can only run in process context */
1002 static int
1003 lio_process_iq_request_list(struct lio_device *lio_dev,
1004                             struct lio_instr_queue *iq)
1005 {
1006         struct octeon_instr_irh *irh = NULL;
1007         uint32_t old = iq->flush_index;
1008         struct lio_soft_command *sc;
1009         uint32_t inst_count = 0;
1010         int reqtype;
1011         void *buf;
1012
1013         while (old != iq->lio_read_index) {
1014                 reqtype = iq->request_list[old].reqtype;
1015                 buf     = iq->request_list[old].buf;
1016
1017                 if (reqtype == LIO_REQTYPE_NONE)
1018                         goto skip_this;
1019
1020                 switch (reqtype) {
1021                 case LIO_REQTYPE_NORESP_NET:
1022                         rte_pktmbuf_free((struct rte_mbuf *)buf);
1023                         break;
1024                 case LIO_REQTYPE_NORESP_NET_SG:
1025                         lio_free_netsgbuf(buf);
1026                         break;
1027                 case LIO_REQTYPE_SOFT_COMMAND:
1028                         sc = buf;
1029                         irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
1030                         if (irh->rflag) {
1031                                 /* We're expecting a response from Octeon.
1032                                  * It's up to lio_process_ordered_list() to
1033                                  * process sc. Add sc to the ordered soft
1034                                  * command response list because we expect
1035                                  * a response from Octeon.
1036                                  */
1037                                 rte_spinlock_lock(&lio_dev->response_list.lock);
1038                                 rte_atomic64_inc(
1039                                     &lio_dev->response_list.pending_req_count);
1040                                 STAILQ_INSERT_TAIL(
1041                                         &lio_dev->response_list.head,
1042                                         &sc->node, entries);
1043                                 rte_spinlock_unlock(
1044                                                 &lio_dev->response_list.lock);
1045                         } else {
1046                                 if (sc->callback) {
1047                                         /* This callback must not sleep */
1048                                         sc->callback(LIO_REQUEST_DONE,
1049                                                      sc->callback_arg);
1050                                 }
1051                         }
1052                         break;
1053                 default:
1054                         lio_dev_err(lio_dev,
1055                                     "Unknown reqtype: %d buf: %p at idx %d\n",
1056                                     reqtype, buf, old);
1057                 }
1058
1059                 iq->request_list[old].buf = NULL;
1060                 iq->request_list[old].reqtype = 0;
1061
1062 skip_this:
1063                 inst_count++;
1064                 old = lio_incr_index(old, 1, iq->max_count);
1065         }
1066
1067         iq->flush_index = old;
1068
1069         return inst_count;
1070 }
1071
1072 static void
1073 lio_update_read_index(struct lio_instr_queue *iq)
1074 {
1075         uint32_t pkt_in_done = rte_read32(iq->inst_cnt_reg);
1076         uint32_t last_done;
1077
1078         last_done = pkt_in_done - iq->pkt_in_done;
1079         iq->pkt_in_done = pkt_in_done;
1080
1081         /* Add last_done and modulo with the IQ size to get new index */
1082         iq->lio_read_index = (iq->lio_read_index +
1083                         (uint32_t)(last_done & LIO_PKT_IN_DONE_CNT_MASK)) %
1084                         iq->max_count;
1085 }
1086
1087 int
1088 lio_flush_iq(struct lio_device *lio_dev, struct lio_instr_queue *iq)
1089 {
1090         uint32_t tot_inst_processed = 0;
1091         uint32_t inst_processed = 0;
1092         int tx_done = 1;
1093
1094         if (rte_atomic64_test_and_set(&iq->iq_flush_running) == 0)
1095                 return tx_done;
1096
1097         rte_spinlock_lock(&iq->lock);
1098
1099         lio_update_read_index(iq);
1100
1101         do {
1102                 /* Process any outstanding IQ packets. */
1103                 if (iq->flush_index == iq->lio_read_index)
1104                         break;
1105
1106                 inst_processed = lio_process_iq_request_list(lio_dev, iq);
1107
1108                 if (inst_processed)
1109                         rte_atomic64_sub(&iq->instr_pending, inst_processed);
1110
1111                 tot_inst_processed += inst_processed;
1112                 inst_processed = 0;
1113
1114         } while (1);
1115
1116         rte_spinlock_unlock(&iq->lock);
1117
1118         rte_atomic64_clear(&iq->iq_flush_running);
1119
1120         return tx_done;
1121 }
1122
1123 static int
1124 lio_send_command(struct lio_device *lio_dev, uint32_t iq_no, void *cmd,
1125                  void *buf, uint32_t datasize __rte_unused, uint32_t reqtype)
1126 {
1127         struct lio_instr_queue *iq = lio_dev->instr_queue[iq_no];
1128         struct lio_iq_post_status st;
1129
1130         rte_spinlock_lock(&iq->post_lock);
1131
1132         st = post_command2(iq, cmd);
1133
1134         if (st.status != LIO_IQ_SEND_FAILED) {
1135                 lio_add_to_request_list(iq, st.index, buf, reqtype);
1136                 lio_ring_doorbell(lio_dev, iq);
1137         }
1138
1139         rte_spinlock_unlock(&iq->post_lock);
1140
1141         return st.status;
1142 }
1143
1144 void
1145 lio_prepare_soft_command(struct lio_device *lio_dev,
1146                          struct lio_soft_command *sc, uint8_t opcode,
1147                          uint8_t subcode, uint32_t irh_ossp, uint64_t ossp0,
1148                          uint64_t ossp1)
1149 {
1150         struct octeon_instr_pki_ih3 *pki_ih3;
1151         struct octeon_instr_ih3 *ih3;
1152         struct octeon_instr_irh *irh;
1153         struct octeon_instr_rdp *rdp;
1154
1155         RTE_ASSERT(opcode <= 15);
1156         RTE_ASSERT(subcode <= 127);
1157
1158         ih3       = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
1159
1160         ih3->pkind = lio_dev->instr_queue[sc->iq_no]->txpciq.s.pkind;
1161
1162         pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
1163
1164         pki_ih3->w      = 1;
1165         pki_ih3->raw    = 1;
1166         pki_ih3->utag   = 1;
1167         pki_ih3->uqpg   = lio_dev->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
1168         pki_ih3->utt    = 1;
1169
1170         pki_ih3->tag    = LIO_CONTROL;
1171         pki_ih3->tagtype = OCTEON_ATOMIC_TAG;
1172         pki_ih3->qpg    = lio_dev->instr_queue[sc->iq_no]->txpciq.s.qpg;
1173         pki_ih3->pm     = 0x7;
1174         pki_ih3->sl     = 8;
1175
1176         if (sc->datasize)
1177                 ih3->dlengsz = sc->datasize;
1178
1179         irh             = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
1180         irh->opcode     = opcode;
1181         irh->subcode    = subcode;
1182
1183         /* opcode/subcode specific parameters (ossp) */
1184         irh->ossp = irh_ossp;
1185         sc->cmd.cmd3.ossp[0] = ossp0;
1186         sc->cmd.cmd3.ossp[1] = ossp1;
1187
1188         if (sc->rdatasize) {
1189                 rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
1190                 rdp->pcie_port = lio_dev->pcie_port;
1191                 rdp->rlen      = sc->rdatasize;
1192                 irh->rflag = 1;
1193                 /* PKI IH3 */
1194                 ih3->fsz    = OCTEON_SOFT_CMD_RESP_IH3;
1195         } else {
1196                 irh->rflag = 0;
1197                 /* PKI IH3 */
1198                 ih3->fsz    = OCTEON_PCI_CMD_O3;
1199         }
1200 }
1201
1202 int
1203 lio_send_soft_command(struct lio_device *lio_dev,
1204                       struct lio_soft_command *sc)
1205 {
1206         struct octeon_instr_ih3 *ih3;
1207         struct octeon_instr_irh *irh;
1208         uint32_t len = 0;
1209
1210         ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
1211         if (ih3->dlengsz) {
1212                 RTE_ASSERT(sc->dmadptr);
1213                 sc->cmd.cmd3.dptr = sc->dmadptr;
1214         }
1215
1216         irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
1217         if (irh->rflag) {
1218                 RTE_ASSERT(sc->dmarptr);
1219                 RTE_ASSERT(sc->status_word != NULL);
1220                 *sc->status_word = LIO_COMPLETION_WORD_INIT;
1221                 sc->cmd.cmd3.rptr = sc->dmarptr;
1222         }
1223
1224         len = (uint32_t)ih3->dlengsz;
1225
1226         if (sc->wait_time)
1227                 sc->timeout = lio_uptime + sc->wait_time;
1228
1229         return lio_send_command(lio_dev, sc->iq_no, &sc->cmd, sc, len,
1230                                 LIO_REQTYPE_SOFT_COMMAND);
1231 }
1232
1233 int
1234 lio_setup_sc_buffer_pool(struct lio_device *lio_dev)
1235 {
1236         char sc_pool_name[RTE_MEMPOOL_NAMESIZE];
1237         uint16_t buf_size;
1238
1239         buf_size = LIO_SOFT_COMMAND_BUFFER_SIZE + RTE_PKTMBUF_HEADROOM;
1240         snprintf(sc_pool_name, sizeof(sc_pool_name),
1241                  "lio_sc_pool_%u", lio_dev->port_id);
1242         lio_dev->sc_buf_pool = rte_pktmbuf_pool_create(sc_pool_name,
1243                                                 LIO_MAX_SOFT_COMMAND_BUFFERS,
1244                                                 0, 0, buf_size, SOCKET_ID_ANY);
1245         return 0;
1246 }
1247
1248 void
1249 lio_free_sc_buffer_pool(struct lio_device *lio_dev)
1250 {
1251         rte_mempool_free(lio_dev->sc_buf_pool);
1252 }
1253
1254 struct lio_soft_command *
1255 lio_alloc_soft_command(struct lio_device *lio_dev, uint32_t datasize,
1256                        uint32_t rdatasize, uint32_t ctxsize)
1257 {
1258         uint32_t offset = sizeof(struct lio_soft_command);
1259         struct lio_soft_command *sc;
1260         struct rte_mbuf *m;
1261         uint64_t dma_addr;
1262
1263         RTE_ASSERT((offset + datasize + rdatasize + ctxsize) <=
1264                    LIO_SOFT_COMMAND_BUFFER_SIZE);
1265
1266         m = rte_pktmbuf_alloc(lio_dev->sc_buf_pool);
1267         if (m == NULL) {
1268                 lio_dev_err(lio_dev, "Cannot allocate mbuf for sc\n");
1269                 return NULL;
1270         }
1271
1272         /* set rte_mbuf data size and there is only 1 segment */
1273         m->pkt_len = LIO_SOFT_COMMAND_BUFFER_SIZE;
1274         m->data_len = LIO_SOFT_COMMAND_BUFFER_SIZE;
1275
1276         /* use rte_mbuf buffer for soft command */
1277         sc = rte_pktmbuf_mtod(m, struct lio_soft_command *);
1278         memset(sc, 0, LIO_SOFT_COMMAND_BUFFER_SIZE);
1279         sc->size = LIO_SOFT_COMMAND_BUFFER_SIZE;
1280         sc->dma_addr = rte_mbuf_data_dma_addr(m);
1281         sc->mbuf = m;
1282
1283         dma_addr = sc->dma_addr;
1284
1285         if (ctxsize) {
1286                 sc->ctxptr = (uint8_t *)sc + offset;
1287                 sc->ctxsize = ctxsize;
1288         }
1289
1290         /* Start data at 128 byte boundary */
1291         offset = (offset + ctxsize + 127) & 0xffffff80;
1292
1293         if (datasize) {
1294                 sc->virtdptr = (uint8_t *)sc + offset;
1295                 sc->dmadptr = dma_addr + offset;
1296                 sc->datasize = datasize;
1297         }
1298
1299         /* Start rdata at 128 byte boundary */
1300         offset = (offset + datasize + 127) & 0xffffff80;
1301
1302         if (rdatasize) {
1303                 RTE_ASSERT(rdatasize >= 16);
1304                 sc->virtrptr = (uint8_t *)sc + offset;
1305                 sc->dmarptr = dma_addr + offset;
1306                 sc->rdatasize = rdatasize;
1307                 sc->status_word = (uint64_t *)((uint8_t *)(sc->virtrptr) +
1308                                                rdatasize - 8);
1309         }
1310
1311         return sc;
1312 }
1313
1314 void
1315 lio_free_soft_command(struct lio_soft_command *sc)
1316 {
1317         rte_pktmbuf_free(sc->mbuf);
1318 }
1319
1320 void
1321 lio_setup_response_list(struct lio_device *lio_dev)
1322 {
1323         STAILQ_INIT(&lio_dev->response_list.head);
1324         rte_spinlock_init(&lio_dev->response_list.lock);
1325         rte_atomic64_set(&lio_dev->response_list.pending_req_count, 0);
1326 }
1327
1328 int
1329 lio_process_ordered_list(struct lio_device *lio_dev)
1330 {
1331         int resp_to_process = LIO_MAX_ORD_REQS_TO_PROCESS;
1332         struct lio_response_list *ordered_sc_list;
1333         struct lio_soft_command *sc;
1334         int request_complete = 0;
1335         uint64_t status64;
1336         uint32_t status;
1337
1338         ordered_sc_list = &lio_dev->response_list;
1339
1340         do {
1341                 rte_spinlock_lock(&ordered_sc_list->lock);
1342
1343                 if (STAILQ_EMPTY(&ordered_sc_list->head)) {
1344                         /* ordered_sc_list is empty; there is
1345                          * nothing to process
1346                          */
1347                         rte_spinlock_unlock(&ordered_sc_list->lock);
1348                         return -1;
1349                 }
1350
1351                 sc = LIO_STQUEUE_FIRST_ENTRY(&ordered_sc_list->head,
1352                                              struct lio_soft_command, node);
1353
1354                 status = LIO_REQUEST_PENDING;
1355
1356                 /* check if octeon has finished DMA'ing a response
1357                  * to where rptr is pointing to
1358                  */
1359                 status64 = *sc->status_word;
1360
1361                 if (status64 != LIO_COMPLETION_WORD_INIT) {
1362                         /* This logic ensures that all 64b have been written.
1363                          * 1. check byte 0 for non-FF
1364                          * 2. if non-FF, then swap result from BE to host order
1365                          * 3. check byte 7 (swapped to 0) for non-FF
1366                          * 4. if non-FF, use the low 32-bit status code
1367                          * 5. if either byte 0 or byte 7 is FF, don't use status
1368                          */
1369                         if ((status64 & 0xff) != 0xff) {
1370                                 lio_swap_8B_data(&status64, 1);
1371                                 if (((status64 & 0xff) != 0xff)) {
1372                                         /* retrieve 16-bit firmware status */
1373                                         status = (uint32_t)(status64 &
1374                                                             0xffffULL);
1375                                         if (status) {
1376                                                 status =
1377                                                 LIO_FIRMWARE_STATUS_CODE(
1378                                                                         status);
1379                                         } else {
1380                                                 /* i.e. no error */
1381                                                 status = LIO_REQUEST_DONE;
1382                                         }
1383                                 }
1384                         }
1385                 } else if ((sc->timeout && lio_check_timeout(lio_uptime,
1386                                                              sc->timeout))) {
1387                         lio_dev_err(lio_dev,
1388                                     "cmd failed, timeout (%ld, %ld)\n",
1389                                     (long)lio_uptime, (long)sc->timeout);
1390                         status = LIO_REQUEST_TIMEOUT;
1391                 }
1392
1393                 if (status != LIO_REQUEST_PENDING) {
1394                         /* we have received a response or we have timed out.
1395                          * remove node from linked list
1396                          */
1397                         STAILQ_REMOVE(&ordered_sc_list->head,
1398                                       &sc->node, lio_stailq_node, entries);
1399                         rte_atomic64_dec(
1400                             &lio_dev->response_list.pending_req_count);
1401                         rte_spinlock_unlock(&ordered_sc_list->lock);
1402
1403                         if (sc->callback)
1404                                 sc->callback(status, sc->callback_arg);
1405
1406                         request_complete++;
1407                 } else {
1408                         /* no response yet */
1409                         request_complete = 0;
1410                         rte_spinlock_unlock(&ordered_sc_list->lock);
1411                 }
1412
1413                 /* If we hit the Max Ordered requests to process every loop,
1414                  * we quit and let this function be invoked the next time
1415                  * the poll thread runs to process the remaining requests.
1416                  * This function can take up the entire CPU if there is
1417                  * no upper limit to the requests processed.
1418                  */
1419                 if (request_complete >= resp_to_process)
1420                         break;
1421         } while (request_complete);
1422
1423         return 0;
1424 }
1425
1426 static inline struct lio_stailq_node *
1427 list_delete_first_node(struct lio_stailq_head *head)
1428 {
1429         struct lio_stailq_node *node;
1430
1431         if (STAILQ_EMPTY(head))
1432                 node = NULL;
1433         else
1434                 node = STAILQ_FIRST(head);
1435
1436         if (node)
1437                 STAILQ_REMOVE(head, node, lio_stailq_node, entries);
1438
1439         return node;
1440 }
1441
1442 void
1443 lio_delete_sglist(struct lio_instr_queue *txq)
1444 {
1445         struct lio_device *lio_dev = txq->lio_dev;
1446         int iq_no = txq->q_index;
1447         struct lio_gather *g;
1448
1449         if (lio_dev->glist_head == NULL)
1450                 return;
1451
1452         do {
1453                 g = (struct lio_gather *)list_delete_first_node(
1454                                                 &lio_dev->glist_head[iq_no]);
1455                 if (g) {
1456                         if (g->sg)
1457                                 rte_free(
1458                                     (void *)((unsigned long)g->sg - g->adjust));
1459                         rte_free(g);
1460                 }
1461         } while (g);
1462 }
1463
1464 /**
1465  * \brief Setup gather lists
1466  * @param lio per-network private data
1467  */
1468 int
1469 lio_setup_sglists(struct lio_device *lio_dev, int iq_no,
1470                   int fw_mapped_iq, int num_descs, unsigned int socket_id)
1471 {
1472         struct lio_gather *g;
1473         int i;
1474
1475         rte_spinlock_init(&lio_dev->glist_lock[iq_no]);
1476
1477         STAILQ_INIT(&lio_dev->glist_head[iq_no]);
1478
1479         for (i = 0; i < num_descs; i++) {
1480                 g = rte_zmalloc_socket(NULL, sizeof(*g), RTE_CACHE_LINE_SIZE,
1481                                        socket_id);
1482                 if (g == NULL) {
1483                         lio_dev_err(lio_dev,
1484                                     "lio_gather memory allocation failed for qno %d\n",
1485                                     iq_no);
1486                         break;
1487                 }
1488
1489                 g->sg_size =
1490                     ((ROUNDUP4(LIO_MAX_SG) >> 2) * LIO_SG_ENTRY_SIZE);
1491
1492                 g->sg = rte_zmalloc_socket(NULL, g->sg_size + 8,
1493                                            RTE_CACHE_LINE_SIZE, socket_id);
1494                 if (g->sg == NULL) {
1495                         lio_dev_err(lio_dev,
1496                                     "sg list memory allocation failed for qno %d\n",
1497                                     iq_no);
1498                         rte_free(g);
1499                         break;
1500                 }
1501
1502                 /* The gather component should be aligned on 64-bit boundary */
1503                 if (((unsigned long)g->sg) & 7) {
1504                         g->adjust = 8 - (((unsigned long)g->sg) & 7);
1505                         g->sg =
1506                             (struct lio_sg_entry *)((unsigned long)g->sg +
1507                                                        g->adjust);
1508                 }
1509
1510                 STAILQ_INSERT_TAIL(&lio_dev->glist_head[iq_no], &g->list,
1511                                    entries);
1512         }
1513
1514         if (i != num_descs) {
1515                 lio_delete_sglist(lio_dev->instr_queue[fw_mapped_iq]);
1516                 return -ENOMEM;
1517         }
1518
1519         return 0;
1520 }
1521
1522 void
1523 lio_delete_instruction_queue(struct lio_device *lio_dev, int iq_no)
1524 {
1525         lio_delete_instr_queue(lio_dev, iq_no);
1526         rte_free(lio_dev->instr_queue[iq_no]);
1527         lio_dev->instr_queue[iq_no] = NULL;
1528         lio_dev->num_iqs--;
1529 }
1530
1531 static inline uint32_t
1532 lio_iq_get_available(struct lio_device *lio_dev, uint32_t q_no)
1533 {
1534         return ((lio_dev->instr_queue[q_no]->max_count - 1) -
1535                 (uint32_t)rte_atomic64_read(
1536                                 &lio_dev->instr_queue[q_no]->instr_pending));
1537 }
1538
1539 static inline int
1540 lio_iq_is_full(struct lio_device *lio_dev, uint32_t q_no)
1541 {
1542         return ((uint32_t)rte_atomic64_read(
1543                                 &lio_dev->instr_queue[q_no]->instr_pending) >=
1544                                 (lio_dev->instr_queue[q_no]->max_count - 2));
1545 }
1546
1547 static int
1548 lio_dev_cleanup_iq(struct lio_device *lio_dev, int iq_no)
1549 {
1550         struct lio_instr_queue *iq = lio_dev->instr_queue[iq_no];
1551         uint32_t count = 10000;
1552
1553         while ((lio_iq_get_available(lio_dev, iq_no) < LIO_FLUSH_WM(iq)) &&
1554                         --count)
1555                 lio_flush_iq(lio_dev, iq);
1556
1557         return count ? 0 : 1;
1558 }
1559
1560 static void
1561 lio_ctrl_cmd_callback(uint32_t status __rte_unused, void *sc_ptr)
1562 {
1563         struct lio_soft_command *sc = sc_ptr;
1564         struct lio_dev_ctrl_cmd *ctrl_cmd;
1565         struct lio_ctrl_pkt *ctrl_pkt;
1566
1567         ctrl_pkt = (struct lio_ctrl_pkt *)sc->ctxptr;
1568         ctrl_cmd = ctrl_pkt->ctrl_cmd;
1569         ctrl_cmd->cond = 1;
1570
1571         lio_free_soft_command(sc);
1572 }
1573
1574 static inline struct lio_soft_command *
1575 lio_alloc_ctrl_pkt_sc(struct lio_device *lio_dev,
1576                       struct lio_ctrl_pkt *ctrl_pkt)
1577 {
1578         struct lio_soft_command *sc = NULL;
1579         uint32_t uddsize, datasize;
1580         uint32_t rdatasize;
1581         uint8_t *data;
1582
1583         uddsize = (uint32_t)(ctrl_pkt->ncmd.s.more * 8);
1584
1585         datasize = OCTEON_CMD_SIZE + uddsize;
1586         rdatasize = (ctrl_pkt->wait_time) ? 16 : 0;
1587
1588         sc = lio_alloc_soft_command(lio_dev, datasize,
1589                                     rdatasize, sizeof(struct lio_ctrl_pkt));
1590         if (sc == NULL)
1591                 return NULL;
1592
1593         rte_memcpy(sc->ctxptr, ctrl_pkt, sizeof(struct lio_ctrl_pkt));
1594
1595         data = (uint8_t *)sc->virtdptr;
1596
1597         rte_memcpy(data, &ctrl_pkt->ncmd, OCTEON_CMD_SIZE);
1598
1599         lio_swap_8B_data((uint64_t *)data, OCTEON_CMD_SIZE >> 3);
1600
1601         if (uddsize) {
1602                 /* Endian-Swap for UDD should have been done by caller. */
1603                 rte_memcpy(data + OCTEON_CMD_SIZE, ctrl_pkt->udd, uddsize);
1604         }
1605
1606         sc->iq_no = (uint32_t)ctrl_pkt->iq_no;
1607
1608         lio_prepare_soft_command(lio_dev, sc,
1609                                  LIO_OPCODE, LIO_OPCODE_CMD,
1610                                  0, 0, 0);
1611
1612         sc->callback = lio_ctrl_cmd_callback;
1613         sc->callback_arg = sc;
1614         sc->wait_time = ctrl_pkt->wait_time;
1615
1616         return sc;
1617 }
1618
1619 int
1620 lio_send_ctrl_pkt(struct lio_device *lio_dev, struct lio_ctrl_pkt *ctrl_pkt)
1621 {
1622         struct lio_soft_command *sc = NULL;
1623         int retval;
1624
1625         sc = lio_alloc_ctrl_pkt_sc(lio_dev, ctrl_pkt);
1626         if (sc == NULL) {
1627                 lio_dev_err(lio_dev, "soft command allocation failed\n");
1628                 return -1;
1629         }
1630
1631         retval = lio_send_soft_command(lio_dev, sc);
1632         if (retval == LIO_IQ_SEND_FAILED) {
1633                 lio_free_soft_command(sc);
1634                 lio_dev_err(lio_dev, "Port: %d soft command: %d send failed status: %x\n",
1635                             lio_dev->port_id, ctrl_pkt->ncmd.s.cmd, retval);
1636                 return -1;
1637         }
1638
1639         return retval;
1640 }
1641
1642 /** Send data packet to the device
1643  *  @param lio_dev - lio device pointer
1644  *  @param ndata   - control structure with queueing, and buffer information
1645  *
1646  *  @returns IQ_FAILED if it failed to add to the input queue. IQ_STOP if it the
1647  *  queue should be stopped, and LIO_IQ_SEND_OK if it sent okay.
1648  */
1649 static inline int
1650 lio_send_data_pkt(struct lio_device *lio_dev, struct lio_data_pkt *ndata)
1651 {
1652         return lio_send_command(lio_dev, ndata->q_no, &ndata->cmd,
1653                                 ndata->buf, ndata->datasize, ndata->reqtype);
1654 }
1655
1656 uint16_t
1657 lio_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
1658 {
1659         struct lio_instr_queue *txq = tx_queue;
1660         union lio_cmd_setup cmdsetup;
1661         struct lio_device *lio_dev;
1662         struct lio_data_pkt ndata;
1663         int i, processed = 0;
1664         struct rte_mbuf *m;
1665         uint32_t tag = 0;
1666         int status = 0;
1667         int iq_no;
1668
1669         lio_dev = txq->lio_dev;
1670         iq_no = txq->txpciq.s.q_no;
1671
1672         if (!lio_dev->intf_open || !lio_dev->linfo.link.s.link_up) {
1673                 PMD_TX_LOG(lio_dev, ERR, "Transmit failed link_status : %d\n",
1674                            lio_dev->linfo.link.s.link_up);
1675                 goto xmit_failed;
1676         }
1677
1678         lio_dev_cleanup_iq(lio_dev, iq_no);
1679
1680         for (i = 0; i < nb_pkts; i++) {
1681                 uint32_t pkt_len = 0;
1682
1683                 m = pkts[i];
1684
1685                 /* Prepare the attributes for the data to be passed to BASE. */
1686                 memset(&ndata, 0, sizeof(struct lio_data_pkt));
1687
1688                 ndata.buf = m;
1689
1690                 ndata.q_no = iq_no;
1691                 if (lio_iq_is_full(lio_dev, ndata.q_no)) {
1692                         if (lio_dev_cleanup_iq(lio_dev, iq_no)) {
1693                                 PMD_TX_LOG(lio_dev, ERR,
1694                                            "Transmit failed iq:%d full\n",
1695                                            ndata.q_no);
1696                                 break;
1697                         }
1698                 }
1699
1700                 cmdsetup.cmd_setup64 = 0;
1701                 cmdsetup.s.iq_no = iq_no;
1702
1703                 /* check checksum offload flags to form cmd */
1704                 if (m->ol_flags & PKT_TX_IP_CKSUM)
1705                         cmdsetup.s.ip_csum = 1;
1706
1707                 if ((m->ol_flags & PKT_TX_TCP_CKSUM) ||
1708                                 (m->ol_flags & PKT_TX_UDP_CKSUM))
1709                         cmdsetup.s.transport_csum = 1;
1710
1711                 if (m->nb_segs == 1) {
1712                         pkt_len = rte_pktmbuf_data_len(m);
1713                         cmdsetup.s.u.datasize = pkt_len;
1714                         lio_prepare_pci_cmd(lio_dev, &ndata.cmd,
1715                                             &cmdsetup, tag);
1716                         ndata.cmd.cmd3.dptr = rte_mbuf_data_dma_addr(m);
1717                         ndata.reqtype = LIO_REQTYPE_NORESP_NET;
1718                 } else {
1719                         struct lio_buf_free_info *finfo;
1720                         struct lio_gather *g;
1721                         phys_addr_t phyaddr;
1722                         int i, frags;
1723
1724                         finfo = (struct lio_buf_free_info *)rte_malloc(NULL,
1725                                                         sizeof(*finfo), 0);
1726                         if (finfo == NULL) {
1727                                 PMD_TX_LOG(lio_dev, ERR,
1728                                            "free buffer alloc failed\n");
1729                                 goto xmit_failed;
1730                         }
1731
1732                         rte_spinlock_lock(&lio_dev->glist_lock[iq_no]);
1733                         g = (struct lio_gather *)list_delete_first_node(
1734                                                 &lio_dev->glist_head[iq_no]);
1735                         rte_spinlock_unlock(&lio_dev->glist_lock[iq_no]);
1736                         if (g == NULL) {
1737                                 PMD_TX_LOG(lio_dev, ERR,
1738                                            "Transmit scatter gather: glist null!\n");
1739                                 goto xmit_failed;
1740                         }
1741
1742                         cmdsetup.s.gather = 1;
1743                         cmdsetup.s.u.gatherptrs = m->nb_segs;
1744                         lio_prepare_pci_cmd(lio_dev, &ndata.cmd,
1745                                             &cmdsetup, tag);
1746
1747                         memset(g->sg, 0, g->sg_size);
1748                         g->sg[0].ptr[0] = rte_mbuf_data_dma_addr(m);
1749                         lio_add_sg_size(&g->sg[0], m->data_len, 0);
1750                         pkt_len = m->data_len;
1751                         finfo->mbuf = m;
1752
1753                         /* First seg taken care above */
1754                         frags = m->nb_segs - 1;
1755                         i = 1;
1756                         m = m->next;
1757                         while (frags--) {
1758                                 g->sg[(i >> 2)].ptr[(i & 3)] =
1759                                                 rte_mbuf_data_dma_addr(m);
1760                                 lio_add_sg_size(&g->sg[(i >> 2)],
1761                                                 m->data_len, (i & 3));
1762                                 pkt_len += m->data_len;
1763                                 i++;
1764                                 m = m->next;
1765                         }
1766
1767                         phyaddr = rte_mem_virt2phy(g->sg);
1768                         if (phyaddr == RTE_BAD_PHYS_ADDR) {
1769                                 PMD_TX_LOG(lio_dev, ERR, "bad phys addr\n");
1770                                 goto xmit_failed;
1771                         }
1772
1773                         ndata.cmd.cmd3.dptr = phyaddr;
1774                         ndata.reqtype = LIO_REQTYPE_NORESP_NET_SG;
1775
1776                         finfo->g = g;
1777                         finfo->lio_dev = lio_dev;
1778                         finfo->iq_no = (uint64_t)iq_no;
1779                         ndata.buf = finfo;
1780                 }
1781
1782                 ndata.datasize = pkt_len;
1783
1784                 status = lio_send_data_pkt(lio_dev, &ndata);
1785
1786                 if (unlikely(status == LIO_IQ_SEND_FAILED)) {
1787                         PMD_TX_LOG(lio_dev, ERR, "send failed\n");
1788                         break;
1789                 }
1790
1791                 if (unlikely(status == LIO_IQ_SEND_STOP)) {
1792                         PMD_TX_LOG(lio_dev, DEBUG, "iq full\n");
1793                         /* create space as iq is full */
1794                         lio_dev_cleanup_iq(lio_dev, iq_no);
1795                 }
1796
1797                 processed++;
1798         }
1799
1800 xmit_failed:
1801
1802         return processed;
1803 }
1804