e323884a6ad012d2d0a226a878688fc4814d8022
[dpdk.git] / drivers / net / liquidio / lio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Cavium, Inc
3  */
4
5 #include <rte_ethdev.h>
6 #include <rte_cycles.h>
7 #include <rte_malloc.h>
8
9 #include "lio_logs.h"
10 #include "lio_struct.h"
11 #include "lio_ethdev.h"
12 #include "lio_rxtx.h"
13
14 #define LIO_MAX_SG 12
15 /* Flush iq if available tx_desc fall below LIO_FLUSH_WM */
16 #define LIO_FLUSH_WM(_iq) ((_iq)->max_count / 2)
17 #define LIO_PKT_IN_DONE_CNT_MASK 0x00000000FFFFFFFFULL
18
19 static void
20 lio_droq_compute_max_packet_bufs(struct lio_droq *droq)
21 {
22         uint32_t count = 0;
23
24         do {
25                 count += droq->buffer_size;
26         } while (count < LIO_MAX_RX_PKTLEN);
27 }
28
29 static void
30 lio_droq_reset_indices(struct lio_droq *droq)
31 {
32         droq->read_idx  = 0;
33         droq->write_idx = 0;
34         droq->refill_idx = 0;
35         droq->refill_count = 0;
36         rte_atomic64_set(&droq->pkts_pending, 0);
37 }
38
39 static void
40 lio_droq_destroy_ring_buffers(struct lio_droq *droq)
41 {
42         uint32_t i;
43
44         for (i = 0; i < droq->max_count; i++) {
45                 if (droq->recv_buf_list[i].buffer) {
46                         rte_pktmbuf_free((struct rte_mbuf *)
47                                          droq->recv_buf_list[i].buffer);
48                         droq->recv_buf_list[i].buffer = NULL;
49                 }
50         }
51
52         lio_droq_reset_indices(droq);
53 }
54
55 static int
56 lio_droq_setup_ring_buffers(struct lio_device *lio_dev,
57                             struct lio_droq *droq)
58 {
59         struct lio_droq_desc *desc_ring = droq->desc_ring;
60         uint32_t i;
61         void *buf;
62
63         for (i = 0; i < droq->max_count; i++) {
64                 buf = rte_pktmbuf_alloc(droq->mpool);
65                 if (buf == NULL) {
66                         lio_dev_err(lio_dev, "buffer alloc failed\n");
67                         droq->stats.rx_alloc_failure++;
68                         lio_droq_destroy_ring_buffers(droq);
69                         return -ENOMEM;
70                 }
71
72                 droq->recv_buf_list[i].buffer = buf;
73                 droq->info_list[i].length = 0;
74
75                 /* map ring buffers into memory */
76                 desc_ring[i].info_ptr = lio_map_ring_info(droq, i);
77                 desc_ring[i].buffer_ptr =
78                         lio_map_ring(droq->recv_buf_list[i].buffer);
79         }
80
81         lio_droq_reset_indices(droq);
82
83         lio_droq_compute_max_packet_bufs(droq);
84
85         return 0;
86 }
87
88 static void
89 lio_dma_zone_free(struct lio_device *lio_dev, const struct rte_memzone *mz)
90 {
91         const struct rte_memzone *mz_tmp;
92         int ret = 0;
93
94         if (mz == NULL) {
95                 lio_dev_err(lio_dev, "Memzone NULL\n");
96                 return;
97         }
98
99         mz_tmp = rte_memzone_lookup(mz->name);
100         if (mz_tmp == NULL) {
101                 lio_dev_err(lio_dev, "Memzone %s Not Found\n", mz->name);
102                 return;
103         }
104
105         ret = rte_memzone_free(mz);
106         if (ret)
107                 lio_dev_err(lio_dev, "Memzone free Failed ret %d\n", ret);
108 }
109
110 /**
111  *  Frees the space for descriptor ring for the droq.
112  *
113  *  @param lio_dev      - pointer to the lio device structure
114  *  @param q_no         - droq no.
115  */
116 static void
117 lio_delete_droq(struct lio_device *lio_dev, uint32_t q_no)
118 {
119         struct lio_droq *droq = lio_dev->droq[q_no];
120
121         lio_dev_dbg(lio_dev, "OQ[%d]\n", q_no);
122
123         lio_droq_destroy_ring_buffers(droq);
124         rte_free(droq->recv_buf_list);
125         droq->recv_buf_list = NULL;
126         lio_dma_zone_free(lio_dev, droq->info_mz);
127         lio_dma_zone_free(lio_dev, droq->desc_ring_mz);
128
129         memset(droq, 0, LIO_DROQ_SIZE);
130 }
131
132 static void *
133 lio_alloc_info_buffer(struct lio_device *lio_dev,
134                       struct lio_droq *droq, unsigned int socket_id)
135 {
136         droq->info_mz = rte_eth_dma_zone_reserve(lio_dev->eth_dev,
137                                                  "info_list", droq->q_no,
138                                                  (droq->max_count *
139                                                         LIO_DROQ_INFO_SIZE),
140                                                  RTE_CACHE_LINE_SIZE,
141                                                  socket_id);
142
143         if (droq->info_mz == NULL)
144                 return NULL;
145
146         droq->info_list_dma = droq->info_mz->iova;
147         droq->info_alloc_size = droq->info_mz->len;
148         droq->info_base_addr = (size_t)droq->info_mz->addr;
149
150         return droq->info_mz->addr;
151 }
152
153 /**
154  *  Allocates space for the descriptor ring for the droq and
155  *  sets the base addr, num desc etc in Octeon registers.
156  *
157  * @param lio_dev       - pointer to the lio device structure
158  * @param q_no          - droq no.
159  * @param app_ctx       - pointer to application context
160  * @return Success: 0   Failure: -1
161  */
162 static int
163 lio_init_droq(struct lio_device *lio_dev, uint32_t q_no,
164               uint32_t num_descs, uint32_t desc_size,
165               struct rte_mempool *mpool, unsigned int socket_id)
166 {
167         uint32_t c_refill_threshold;
168         uint32_t desc_ring_size;
169         struct lio_droq *droq;
170
171         lio_dev_dbg(lio_dev, "OQ[%d]\n", q_no);
172
173         droq = lio_dev->droq[q_no];
174         droq->lio_dev = lio_dev;
175         droq->q_no = q_no;
176         droq->mpool = mpool;
177
178         c_refill_threshold = LIO_OQ_REFILL_THRESHOLD_CFG(lio_dev);
179
180         droq->max_count = num_descs;
181         droq->buffer_size = desc_size;
182
183         desc_ring_size = droq->max_count * LIO_DROQ_DESC_SIZE;
184         droq->desc_ring_mz = rte_eth_dma_zone_reserve(lio_dev->eth_dev,
185                                                       "droq", q_no,
186                                                       desc_ring_size,
187                                                       RTE_CACHE_LINE_SIZE,
188                                                       socket_id);
189
190         if (droq->desc_ring_mz == NULL) {
191                 lio_dev_err(lio_dev,
192                             "Output queue %d ring alloc failed\n", q_no);
193                 return -1;
194         }
195
196         droq->desc_ring_dma = droq->desc_ring_mz->iova;
197         droq->desc_ring = (struct lio_droq_desc *)droq->desc_ring_mz->addr;
198
199         lio_dev_dbg(lio_dev, "droq[%d]: desc_ring: virt: 0x%p, dma: %lx\n",
200                     q_no, droq->desc_ring, (unsigned long)droq->desc_ring_dma);
201         lio_dev_dbg(lio_dev, "droq[%d]: num_desc: %d\n", q_no,
202                     droq->max_count);
203
204         droq->info_list = lio_alloc_info_buffer(lio_dev, droq, socket_id);
205         if (droq->info_list == NULL) {
206                 lio_dev_err(lio_dev, "Cannot allocate memory for info list.\n");
207                 goto init_droq_fail;
208         }
209
210         droq->recv_buf_list = rte_zmalloc_socket("recv_buf_list",
211                                                  (droq->max_count *
212                                                         LIO_DROQ_RECVBUF_SIZE),
213                                                  RTE_CACHE_LINE_SIZE,
214                                                  socket_id);
215         if (droq->recv_buf_list == NULL) {
216                 lio_dev_err(lio_dev,
217                             "Output queue recv buf list alloc failed\n");
218                 goto init_droq_fail;
219         }
220
221         if (lio_droq_setup_ring_buffers(lio_dev, droq))
222                 goto init_droq_fail;
223
224         droq->refill_threshold = c_refill_threshold;
225
226         rte_spinlock_init(&droq->lock);
227
228         lio_dev->fn_list.setup_oq_regs(lio_dev, q_no);
229
230         lio_dev->io_qmask.oq |= (1ULL << q_no);
231
232         return 0;
233
234 init_droq_fail:
235         lio_delete_droq(lio_dev, q_no);
236
237         return -1;
238 }
239
240 int
241 lio_setup_droq(struct lio_device *lio_dev, int oq_no, int num_descs,
242                int desc_size, struct rte_mempool *mpool, unsigned int socket_id)
243 {
244         struct lio_droq *droq;
245
246         PMD_INIT_FUNC_TRACE();
247
248         if (lio_dev->droq[oq_no]) {
249                 lio_dev_dbg(lio_dev, "Droq %d in use\n", oq_no);
250                 return 0;
251         }
252
253         /* Allocate the DS for the new droq. */
254         droq = rte_zmalloc_socket("ethdev RX queue", sizeof(*droq),
255                                   RTE_CACHE_LINE_SIZE, socket_id);
256         if (droq == NULL)
257                 return -ENOMEM;
258
259         lio_dev->droq[oq_no] = droq;
260
261         /* Initialize the Droq */
262         if (lio_init_droq(lio_dev, oq_no, num_descs, desc_size, mpool,
263                           socket_id)) {
264                 lio_dev_err(lio_dev, "Droq[%u] Initialization Failed\n", oq_no);
265                 rte_free(lio_dev->droq[oq_no]);
266                 lio_dev->droq[oq_no] = NULL;
267                 return -ENOMEM;
268         }
269
270         lio_dev->num_oqs++;
271
272         lio_dev_dbg(lio_dev, "Total number of OQ: %d\n", lio_dev->num_oqs);
273
274         /* Send credit for octeon output queues. credits are always
275          * sent after the output queue is enabled.
276          */
277         rte_write32(lio_dev->droq[oq_no]->max_count,
278                     lio_dev->droq[oq_no]->pkts_credit_reg);
279         rte_wmb();
280
281         return 0;
282 }
283
284 static inline uint32_t
285 lio_droq_get_bufcount(uint32_t buf_size, uint32_t total_len)
286 {
287         uint32_t buf_cnt = 0;
288
289         while (total_len > (buf_size * buf_cnt))
290                 buf_cnt++;
291
292         return buf_cnt;
293 }
294
295 /* If we were not able to refill all buffers, try to move around
296  * the buffers that were not dispatched.
297  */
298 static inline uint32_t
299 lio_droq_refill_pullup_descs(struct lio_droq *droq,
300                              struct lio_droq_desc *desc_ring)
301 {
302         uint32_t refill_index = droq->refill_idx;
303         uint32_t desc_refilled = 0;
304
305         while (refill_index != droq->read_idx) {
306                 if (droq->recv_buf_list[refill_index].buffer) {
307                         droq->recv_buf_list[droq->refill_idx].buffer =
308                                 droq->recv_buf_list[refill_index].buffer;
309                         desc_ring[droq->refill_idx].buffer_ptr =
310                                 desc_ring[refill_index].buffer_ptr;
311                         droq->recv_buf_list[refill_index].buffer = NULL;
312                         desc_ring[refill_index].buffer_ptr = 0;
313                         do {
314                                 droq->refill_idx = lio_incr_index(
315                                                         droq->refill_idx, 1,
316                                                         droq->max_count);
317                                 desc_refilled++;
318                                 droq->refill_count--;
319                         } while (droq->recv_buf_list[droq->refill_idx].buffer);
320                 }
321                 refill_index = lio_incr_index(refill_index, 1,
322                                               droq->max_count);
323         }       /* while */
324
325         return desc_refilled;
326 }
327
328 /* lio_droq_refill
329  *
330  * @param droq          - droq in which descriptors require new buffers.
331  *
332  * Description:
333  *  Called during normal DROQ processing in interrupt mode or by the poll
334  *  thread to refill the descriptors from which buffers were dispatched
335  *  to upper layers. Attempts to allocate new buffers. If that fails, moves
336  *  up buffers (that were not dispatched) to form a contiguous ring.
337  *
338  * Returns:
339  *  No of descriptors refilled.
340  *
341  * Locks:
342  * This routine is called with droq->lock held.
343  */
344 static uint32_t
345 lio_droq_refill(struct lio_droq *droq)
346 {
347         struct lio_droq_desc *desc_ring;
348         uint32_t desc_refilled = 0;
349         void *buf = NULL;
350
351         desc_ring = droq->desc_ring;
352
353         while (droq->refill_count && (desc_refilled < droq->max_count)) {
354                 /* If a valid buffer exists (happens if there is no dispatch),
355                  * reuse the buffer, else allocate.
356                  */
357                 if (droq->recv_buf_list[droq->refill_idx].buffer == NULL) {
358                         buf = rte_pktmbuf_alloc(droq->mpool);
359                         /* If a buffer could not be allocated, no point in
360                          * continuing
361                          */
362                         if (buf == NULL) {
363                                 droq->stats.rx_alloc_failure++;
364                                 break;
365                         }
366
367                         droq->recv_buf_list[droq->refill_idx].buffer = buf;
368                 }
369
370                 desc_ring[droq->refill_idx].buffer_ptr =
371                     lio_map_ring(droq->recv_buf_list[droq->refill_idx].buffer);
372                 /* Reset any previous values in the length field. */
373                 droq->info_list[droq->refill_idx].length = 0;
374
375                 droq->refill_idx = lio_incr_index(droq->refill_idx, 1,
376                                                   droq->max_count);
377                 desc_refilled++;
378                 droq->refill_count--;
379         }
380
381         if (droq->refill_count)
382                 desc_refilled += lio_droq_refill_pullup_descs(droq, desc_ring);
383
384         /* if droq->refill_count
385          * The refill count would not change in pass two. We only moved buffers
386          * to close the gap in the ring, but we would still have the same no. of
387          * buffers to refill.
388          */
389         return desc_refilled;
390 }
391
392 static int
393 lio_droq_fast_process_packet(struct lio_device *lio_dev,
394                              struct lio_droq *droq,
395                              struct rte_mbuf **rx_pkts)
396 {
397         struct rte_mbuf *nicbuf = NULL;
398         struct lio_droq_info *info;
399         uint32_t total_len = 0;
400         int data_total_len = 0;
401         uint32_t pkt_len = 0;
402         union octeon_rh *rh;
403         int data_pkts = 0;
404
405         info = &droq->info_list[droq->read_idx];
406         lio_swap_8B_data((uint64_t *)info, 2);
407
408         if (!info->length)
409                 return -1;
410
411         /* Len of resp hdr in included in the received data len. */
412         info->length -= OCTEON_RH_SIZE;
413         rh = &info->rh;
414
415         total_len += (uint32_t)info->length;
416
417         if (lio_opcode_slow_path(rh)) {
418                 uint32_t buf_cnt;
419
420                 buf_cnt = lio_droq_get_bufcount(droq->buffer_size,
421                                                 (uint32_t)info->length);
422                 droq->read_idx = lio_incr_index(droq->read_idx, buf_cnt,
423                                                 droq->max_count);
424                 droq->refill_count += buf_cnt;
425         } else {
426                 if (info->length <= droq->buffer_size) {
427                         if (rh->r_dh.has_hash)
428                                 pkt_len = (uint32_t)(info->length - 8);
429                         else
430                                 pkt_len = (uint32_t)info->length;
431
432                         nicbuf = droq->recv_buf_list[droq->read_idx].buffer;
433                         droq->recv_buf_list[droq->read_idx].buffer = NULL;
434                         droq->read_idx = lio_incr_index(
435                                                 droq->read_idx, 1,
436                                                 droq->max_count);
437                         droq->refill_count++;
438
439                         if (likely(nicbuf != NULL)) {
440                                 /* We don't have a way to pass flags yet */
441                                 nicbuf->ol_flags = 0;
442                                 if (rh->r_dh.has_hash) {
443                                         uint64_t *hash_ptr;
444
445                                         nicbuf->ol_flags |= PKT_RX_RSS_HASH;
446                                         hash_ptr = rte_pktmbuf_mtod(nicbuf,
447                                                                     uint64_t *);
448                                         lio_swap_8B_data(hash_ptr, 1);
449                                         nicbuf->hash.rss = (uint32_t)*hash_ptr;
450                                         nicbuf->data_off += 8;
451                                 }
452
453                                 nicbuf->pkt_len = pkt_len;
454                                 nicbuf->data_len = pkt_len;
455                                 nicbuf->port = lio_dev->port_id;
456                                 /* Store the mbuf */
457                                 rx_pkts[data_pkts++] = nicbuf;
458                                 data_total_len += pkt_len;
459                         }
460
461                         /* Prefetch buffer pointers when on a cache line
462                          * boundary
463                          */
464                         if ((droq->read_idx & 3) == 0) {
465                                 rte_prefetch0(
466                                     &droq->recv_buf_list[droq->read_idx]);
467                                 rte_prefetch0(
468                                     &droq->info_list[droq->read_idx]);
469                         }
470                 } else {
471                         struct rte_mbuf *first_buf = NULL;
472                         struct rte_mbuf *last_buf = NULL;
473
474                         while (pkt_len < info->length) {
475                                 int cpy_len = 0;
476
477                                 cpy_len = ((pkt_len + droq->buffer_size) >
478                                                 info->length)
479                                                 ? ((uint32_t)info->length -
480                                                         pkt_len)
481                                                 : droq->buffer_size;
482
483                                 nicbuf =
484                                     droq->recv_buf_list[droq->read_idx].buffer;
485                                 droq->recv_buf_list[droq->read_idx].buffer =
486                                     NULL;
487
488                                 if (likely(nicbuf != NULL)) {
489                                         /* Note the first seg */
490                                         if (!pkt_len)
491                                                 first_buf = nicbuf;
492
493                                         nicbuf->port = lio_dev->port_id;
494                                         /* We don't have a way to pass
495                                          * flags yet
496                                          */
497                                         nicbuf->ol_flags = 0;
498                                         if ((!pkt_len) && (rh->r_dh.has_hash)) {
499                                                 uint64_t *hash_ptr;
500
501                                                 nicbuf->ol_flags |=
502                                                     PKT_RX_RSS_HASH;
503                                                 hash_ptr = rte_pktmbuf_mtod(
504                                                     nicbuf, uint64_t *);
505                                                 lio_swap_8B_data(hash_ptr, 1);
506                                                 nicbuf->hash.rss =
507                                                     (uint32_t)*hash_ptr;
508                                                 nicbuf->data_off += 8;
509                                                 nicbuf->pkt_len = cpy_len - 8;
510                                                 nicbuf->data_len = cpy_len - 8;
511                                         } else {
512                                                 nicbuf->pkt_len = cpy_len;
513                                                 nicbuf->data_len = cpy_len;
514                                         }
515
516                                         if (pkt_len)
517                                                 first_buf->nb_segs++;
518
519                                         if (last_buf)
520                                                 last_buf->next = nicbuf;
521
522                                         last_buf = nicbuf;
523                                 } else {
524                                         PMD_RX_LOG(lio_dev, ERR, "no buf\n");
525                                 }
526
527                                 pkt_len += cpy_len;
528                                 droq->read_idx = lio_incr_index(
529                                                         droq->read_idx,
530                                                         1, droq->max_count);
531                                 droq->refill_count++;
532
533                                 /* Prefetch buffer pointers when on a
534                                  * cache line boundary
535                                  */
536                                 if ((droq->read_idx & 3) == 0) {
537                                         rte_prefetch0(&droq->recv_buf_list
538                                                               [droq->read_idx]);
539
540                                         rte_prefetch0(
541                                             &droq->info_list[droq->read_idx]);
542                                 }
543                         }
544                         rx_pkts[data_pkts++] = first_buf;
545                         if (rh->r_dh.has_hash)
546                                 data_total_len += (pkt_len - 8);
547                         else
548                                 data_total_len += pkt_len;
549                 }
550
551                 /* Inform upper layer about packet checksum verification */
552                 struct rte_mbuf *m = rx_pkts[data_pkts - 1];
553
554                 if (rh->r_dh.csum_verified & LIO_IP_CSUM_VERIFIED)
555                         m->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
556
557                 if (rh->r_dh.csum_verified & LIO_L4_CSUM_VERIFIED)
558                         m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
559         }
560
561         if (droq->refill_count >= droq->refill_threshold) {
562                 int desc_refilled = lio_droq_refill(droq);
563
564                 /* Flush the droq descriptor data to memory to be sure
565                  * that when we update the credits the data in memory is
566                  * accurate.
567                  */
568                 rte_wmb();
569                 rte_write32(desc_refilled, droq->pkts_credit_reg);
570                 /* make sure mmio write completes */
571                 rte_wmb();
572         }
573
574         info->length = 0;
575         info->rh.rh64 = 0;
576
577         droq->stats.pkts_received++;
578         droq->stats.rx_pkts_received += data_pkts;
579         droq->stats.rx_bytes_received += data_total_len;
580         droq->stats.bytes_received += total_len;
581
582         return data_pkts;
583 }
584
585 static uint32_t
586 lio_droq_fast_process_packets(struct lio_device *lio_dev,
587                               struct lio_droq *droq,
588                               struct rte_mbuf **rx_pkts,
589                               uint32_t pkts_to_process)
590 {
591         int ret, data_pkts = 0;
592         uint32_t pkt;
593
594         for (pkt = 0; pkt < pkts_to_process; pkt++) {
595                 ret = lio_droq_fast_process_packet(lio_dev, droq,
596                                                    &rx_pkts[data_pkts]);
597                 if (ret < 0) {
598                         lio_dev_err(lio_dev, "Port[%d] DROQ[%d] idx: %d len:0, pkt_cnt: %d\n",
599                                     lio_dev->port_id, droq->q_no,
600                                     droq->read_idx, pkts_to_process);
601                         break;
602                 }
603                 data_pkts += ret;
604         }
605
606         rte_atomic64_sub(&droq->pkts_pending, pkt);
607
608         return data_pkts;
609 }
610
611 static inline uint32_t
612 lio_droq_check_hw_for_pkts(struct lio_droq *droq)
613 {
614         uint32_t last_count;
615         uint32_t pkt_count;
616
617         pkt_count = rte_read32(droq->pkts_sent_reg);
618
619         last_count = pkt_count - droq->pkt_count;
620         droq->pkt_count = pkt_count;
621
622         if (last_count)
623                 rte_atomic64_add(&droq->pkts_pending, last_count);
624
625         return last_count;
626 }
627
628 uint16_t
629 lio_dev_recv_pkts(void *rx_queue,
630                   struct rte_mbuf **rx_pkts,
631                   uint16_t budget)
632 {
633         struct lio_droq *droq = rx_queue;
634         struct lio_device *lio_dev = droq->lio_dev;
635         uint32_t pkts_processed = 0;
636         uint32_t pkt_count = 0;
637
638         lio_droq_check_hw_for_pkts(droq);
639
640         pkt_count = rte_atomic64_read(&droq->pkts_pending);
641         if (!pkt_count)
642                 return 0;
643
644         if (pkt_count > budget)
645                 pkt_count = budget;
646
647         /* Grab the lock */
648         rte_spinlock_lock(&droq->lock);
649         pkts_processed = lio_droq_fast_process_packets(lio_dev,
650                                                        droq, rx_pkts,
651                                                        pkt_count);
652
653         if (droq->pkt_count) {
654                 rte_write32(droq->pkt_count, droq->pkts_sent_reg);
655                 droq->pkt_count = 0;
656         }
657
658         /* Release the spin lock */
659         rte_spinlock_unlock(&droq->lock);
660
661         return pkts_processed;
662 }
663
664 void
665 lio_delete_droq_queue(struct lio_device *lio_dev,
666                       int oq_no)
667 {
668         lio_delete_droq(lio_dev, oq_no);
669         lio_dev->num_oqs--;
670         rte_free(lio_dev->droq[oq_no]);
671         lio_dev->droq[oq_no] = NULL;
672 }
673
674 /**
675  *  lio_init_instr_queue()
676  *  @param lio_dev      - pointer to the lio device structure.
677  *  @param txpciq       - queue to be initialized.
678  *
679  *  Called at driver init time for each input queue. iq_conf has the
680  *  configuration parameters for the queue.
681  *
682  *  @return  Success: 0 Failure: -1
683  */
684 static int
685 lio_init_instr_queue(struct lio_device *lio_dev,
686                      union octeon_txpciq txpciq,
687                      uint32_t num_descs, unsigned int socket_id)
688 {
689         uint32_t iq_no = (uint32_t)txpciq.s.q_no;
690         struct lio_instr_queue *iq;
691         uint32_t instr_type;
692         uint32_t q_size;
693
694         instr_type = LIO_IQ_INSTR_TYPE(lio_dev);
695
696         q_size = instr_type * num_descs;
697         iq = lio_dev->instr_queue[iq_no];
698         iq->iq_mz = rte_eth_dma_zone_reserve(lio_dev->eth_dev,
699                                              "instr_queue", iq_no, q_size,
700                                              RTE_CACHE_LINE_SIZE,
701                                              socket_id);
702         if (iq->iq_mz == NULL) {
703                 lio_dev_err(lio_dev, "Cannot allocate memory for instr queue %d\n",
704                             iq_no);
705                 return -1;
706         }
707
708         iq->base_addr_dma = iq->iq_mz->iova;
709         iq->base_addr = (uint8_t *)iq->iq_mz->addr;
710
711         iq->max_count = num_descs;
712
713         /* Initialize a list to holds requests that have been posted to Octeon
714          * but has yet to be fetched by octeon
715          */
716         iq->request_list = rte_zmalloc_socket("request_list",
717                                               sizeof(*iq->request_list) *
718                                                         num_descs,
719                                               RTE_CACHE_LINE_SIZE,
720                                               socket_id);
721         if (iq->request_list == NULL) {
722                 lio_dev_err(lio_dev, "Alloc failed for IQ[%d] nr free list\n",
723                             iq_no);
724                 lio_dma_zone_free(lio_dev, iq->iq_mz);
725                 return -1;
726         }
727
728         lio_dev_dbg(lio_dev, "IQ[%d]: base: %p basedma: %lx count: %d\n",
729                     iq_no, iq->base_addr, (unsigned long)iq->base_addr_dma,
730                     iq->max_count);
731
732         iq->lio_dev = lio_dev;
733         iq->txpciq.txpciq64 = txpciq.txpciq64;
734         iq->fill_cnt = 0;
735         iq->host_write_index = 0;
736         iq->lio_read_index = 0;
737         iq->flush_index = 0;
738
739         rte_atomic64_set(&iq->instr_pending, 0);
740
741         /* Initialize the spinlock for this instruction queue */
742         rte_spinlock_init(&iq->lock);
743         rte_spinlock_init(&iq->post_lock);
744
745         rte_atomic64_clear(&iq->iq_flush_running);
746
747         lio_dev->io_qmask.iq |= (1ULL << iq_no);
748
749         /* Set the 32B/64B mode for each input queue */
750         lio_dev->io_qmask.iq64B |= ((instr_type == 64) << iq_no);
751         iq->iqcmd_64B = (instr_type == 64);
752
753         lio_dev->fn_list.setup_iq_regs(lio_dev, iq_no);
754
755         return 0;
756 }
757
758 int
759 lio_setup_instr_queue0(struct lio_device *lio_dev)
760 {
761         union octeon_txpciq txpciq;
762         uint32_t num_descs = 0;
763         uint32_t iq_no = 0;
764
765         num_descs = LIO_NUM_DEF_TX_DESCS_CFG(lio_dev);
766
767         lio_dev->num_iqs = 0;
768
769         lio_dev->instr_queue[0] = rte_zmalloc(NULL,
770                                         sizeof(struct lio_instr_queue), 0);
771         if (lio_dev->instr_queue[0] == NULL)
772                 return -ENOMEM;
773
774         lio_dev->instr_queue[0]->q_index = 0;
775         lio_dev->instr_queue[0]->app_ctx = (void *)(size_t)0;
776         txpciq.txpciq64 = 0;
777         txpciq.s.q_no = iq_no;
778         txpciq.s.pkind = lio_dev->pfvf_hsword.pkind;
779         txpciq.s.use_qpg = 0;
780         txpciq.s.qpg = 0;
781         if (lio_init_instr_queue(lio_dev, txpciq, num_descs, SOCKET_ID_ANY)) {
782                 rte_free(lio_dev->instr_queue[0]);
783                 lio_dev->instr_queue[0] = NULL;
784                 return -1;
785         }
786
787         lio_dev->num_iqs++;
788
789         return 0;
790 }
791
792 /**
793  *  lio_delete_instr_queue()
794  *  @param lio_dev      - pointer to the lio device structure.
795  *  @param iq_no        - queue to be deleted.
796  *
797  *  Called at driver unload time for each input queue. Deletes all
798  *  allocated resources for the input queue.
799  */
800 static void
801 lio_delete_instr_queue(struct lio_device *lio_dev, uint32_t iq_no)
802 {
803         struct lio_instr_queue *iq = lio_dev->instr_queue[iq_no];
804
805         rte_free(iq->request_list);
806         iq->request_list = NULL;
807         lio_dma_zone_free(lio_dev, iq->iq_mz);
808 }
809
810 void
811 lio_free_instr_queue0(struct lio_device *lio_dev)
812 {
813         lio_delete_instr_queue(lio_dev, 0);
814         rte_free(lio_dev->instr_queue[0]);
815         lio_dev->instr_queue[0] = NULL;
816         lio_dev->num_iqs--;
817 }
818
819 /* Return 0 on success, -1 on failure */
820 int
821 lio_setup_iq(struct lio_device *lio_dev, int q_index,
822              union octeon_txpciq txpciq, uint32_t num_descs, void *app_ctx,
823              unsigned int socket_id)
824 {
825         uint32_t iq_no = (uint32_t)txpciq.s.q_no;
826
827         if (lio_dev->instr_queue[iq_no]) {
828                 lio_dev_dbg(lio_dev, "IQ is in use. Cannot create the IQ: %d again\n",
829                             iq_no);
830                 lio_dev->instr_queue[iq_no]->txpciq.txpciq64 = txpciq.txpciq64;
831                 lio_dev->instr_queue[iq_no]->app_ctx = app_ctx;
832                 return 0;
833         }
834
835         lio_dev->instr_queue[iq_no] = rte_zmalloc_socket("ethdev TX queue",
836                                                 sizeof(struct lio_instr_queue),
837                                                 RTE_CACHE_LINE_SIZE, socket_id);
838         if (lio_dev->instr_queue[iq_no] == NULL)
839                 return -1;
840
841         lio_dev->instr_queue[iq_no]->q_index = q_index;
842         lio_dev->instr_queue[iq_no]->app_ctx = app_ctx;
843
844         if (lio_init_instr_queue(lio_dev, txpciq, num_descs, socket_id))
845                 goto release_lio_iq;
846
847         lio_dev->num_iqs++;
848         if (lio_dev->fn_list.enable_io_queues(lio_dev))
849                 goto delete_lio_iq;
850
851         return 0;
852
853 delete_lio_iq:
854         lio_delete_instr_queue(lio_dev, iq_no);
855         lio_dev->num_iqs--;
856 release_lio_iq:
857         rte_free(lio_dev->instr_queue[iq_no]);
858         lio_dev->instr_queue[iq_no] = NULL;
859
860         return -1;
861 }
862
863 int
864 lio_wait_for_instr_fetch(struct lio_device *lio_dev)
865 {
866         int pending, instr_cnt;
867         int i, retry = 1000;
868
869         do {
870                 instr_cnt = 0;
871
872                 for (i = 0; i < LIO_MAX_INSTR_QUEUES(lio_dev); i++) {
873                         if (!(lio_dev->io_qmask.iq & (1ULL << i)))
874                                 continue;
875
876                         if (lio_dev->instr_queue[i] == NULL)
877                                 break;
878
879                         pending = rte_atomic64_read(
880                             &lio_dev->instr_queue[i]->instr_pending);
881                         if (pending)
882                                 lio_flush_iq(lio_dev, lio_dev->instr_queue[i]);
883
884                         instr_cnt += pending;
885                 }
886
887                 if (instr_cnt == 0)
888                         break;
889
890                 rte_delay_ms(1);
891
892         } while (retry-- && instr_cnt);
893
894         return instr_cnt;
895 }
896
897 static inline void
898 lio_ring_doorbell(struct lio_device *lio_dev,
899                   struct lio_instr_queue *iq)
900 {
901         if (rte_atomic64_read(&lio_dev->status) == LIO_DEV_RUNNING) {
902                 rte_write32(iq->fill_cnt, iq->doorbell_reg);
903                 /* make sure doorbell write goes through */
904                 rte_wmb();
905                 iq->fill_cnt = 0;
906         }
907 }
908
909 static inline void
910 copy_cmd_into_iq(struct lio_instr_queue *iq, uint8_t *cmd)
911 {
912         uint8_t *iqptr, cmdsize;
913
914         cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
915         iqptr = iq->base_addr + (cmdsize * iq->host_write_index);
916
917         rte_memcpy(iqptr, cmd, cmdsize);
918 }
919
920 static inline struct lio_iq_post_status
921 post_command2(struct lio_instr_queue *iq, uint8_t *cmd)
922 {
923         struct lio_iq_post_status st;
924
925         st.status = LIO_IQ_SEND_OK;
926
927         /* This ensures that the read index does not wrap around to the same
928          * position if queue gets full before Octeon could fetch any instr.
929          */
930         if (rte_atomic64_read(&iq->instr_pending) >=
931                         (int32_t)(iq->max_count - 1)) {
932                 st.status = LIO_IQ_SEND_FAILED;
933                 st.index = -1;
934                 return st;
935         }
936
937         if (rte_atomic64_read(&iq->instr_pending) >=
938                         (int32_t)(iq->max_count - 2))
939                 st.status = LIO_IQ_SEND_STOP;
940
941         copy_cmd_into_iq(iq, cmd);
942
943         /* "index" is returned, host_write_index is modified. */
944         st.index = iq->host_write_index;
945         iq->host_write_index = lio_incr_index(iq->host_write_index, 1,
946                                               iq->max_count);
947         iq->fill_cnt++;
948
949         /* Flush the command into memory. We need to be sure the data is in
950          * memory before indicating that the instruction is pending.
951          */
952         rte_wmb();
953
954         rte_atomic64_inc(&iq->instr_pending);
955
956         return st;
957 }
958
959 static inline void
960 lio_add_to_request_list(struct lio_instr_queue *iq,
961                         int idx, void *buf, int reqtype)
962 {
963         iq->request_list[idx].buf = buf;
964         iq->request_list[idx].reqtype = reqtype;
965 }
966
967 static inline void
968 lio_free_netsgbuf(void *buf)
969 {
970         struct lio_buf_free_info *finfo = buf;
971         struct lio_device *lio_dev = finfo->lio_dev;
972         struct rte_mbuf *m = finfo->mbuf;
973         struct lio_gather *g = finfo->g;
974         uint8_t iq = finfo->iq_no;
975
976         /* This will take care of multiple segments also */
977         rte_pktmbuf_free(m);
978
979         rte_spinlock_lock(&lio_dev->glist_lock[iq]);
980         STAILQ_INSERT_TAIL(&lio_dev->glist_head[iq], &g->list, entries);
981         rte_spinlock_unlock(&lio_dev->glist_lock[iq]);
982         rte_free(finfo);
983 }
984
985 /* Can only run in process context */
986 static int
987 lio_process_iq_request_list(struct lio_device *lio_dev,
988                             struct lio_instr_queue *iq)
989 {
990         struct octeon_instr_irh *irh = NULL;
991         uint32_t old = iq->flush_index;
992         struct lio_soft_command *sc;
993         uint32_t inst_count = 0;
994         int reqtype;
995         void *buf;
996
997         while (old != iq->lio_read_index) {
998                 reqtype = iq->request_list[old].reqtype;
999                 buf     = iq->request_list[old].buf;
1000
1001                 if (reqtype == LIO_REQTYPE_NONE)
1002                         goto skip_this;
1003
1004                 switch (reqtype) {
1005                 case LIO_REQTYPE_NORESP_NET:
1006                         rte_pktmbuf_free((struct rte_mbuf *)buf);
1007                         break;
1008                 case LIO_REQTYPE_NORESP_NET_SG:
1009                         lio_free_netsgbuf(buf);
1010                         break;
1011                 case LIO_REQTYPE_SOFT_COMMAND:
1012                         sc = buf;
1013                         irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
1014                         if (irh->rflag) {
1015                                 /* We're expecting a response from Octeon.
1016                                  * It's up to lio_process_ordered_list() to
1017                                  * process sc. Add sc to the ordered soft
1018                                  * command response list because we expect
1019                                  * a response from Octeon.
1020                                  */
1021                                 rte_spinlock_lock(&lio_dev->response_list.lock);
1022                                 rte_atomic64_inc(
1023                                     &lio_dev->response_list.pending_req_count);
1024                                 STAILQ_INSERT_TAIL(
1025                                         &lio_dev->response_list.head,
1026                                         &sc->node, entries);
1027                                 rte_spinlock_unlock(
1028                                                 &lio_dev->response_list.lock);
1029                         } else {
1030                                 if (sc->callback) {
1031                                         /* This callback must not sleep */
1032                                         sc->callback(LIO_REQUEST_DONE,
1033                                                      sc->callback_arg);
1034                                 }
1035                         }
1036                         break;
1037                 default:
1038                         lio_dev_err(lio_dev,
1039                                     "Unknown reqtype: %d buf: %p at idx %d\n",
1040                                     reqtype, buf, old);
1041                 }
1042
1043                 iq->request_list[old].buf = NULL;
1044                 iq->request_list[old].reqtype = 0;
1045
1046 skip_this:
1047                 inst_count++;
1048                 old = lio_incr_index(old, 1, iq->max_count);
1049         }
1050
1051         iq->flush_index = old;
1052
1053         return inst_count;
1054 }
1055
1056 static void
1057 lio_update_read_index(struct lio_instr_queue *iq)
1058 {
1059         uint32_t pkt_in_done = rte_read32(iq->inst_cnt_reg);
1060         uint32_t last_done;
1061
1062         last_done = pkt_in_done - iq->pkt_in_done;
1063         iq->pkt_in_done = pkt_in_done;
1064
1065         /* Add last_done and modulo with the IQ size to get new index */
1066         iq->lio_read_index = (iq->lio_read_index +
1067                         (uint32_t)(last_done & LIO_PKT_IN_DONE_CNT_MASK)) %
1068                         iq->max_count;
1069 }
1070
1071 int
1072 lio_flush_iq(struct lio_device *lio_dev, struct lio_instr_queue *iq)
1073 {
1074         uint32_t tot_inst_processed = 0;
1075         uint32_t inst_processed = 0;
1076         int tx_done = 1;
1077
1078         if (rte_atomic64_test_and_set(&iq->iq_flush_running) == 0)
1079                 return tx_done;
1080
1081         rte_spinlock_lock(&iq->lock);
1082
1083         lio_update_read_index(iq);
1084
1085         do {
1086                 /* Process any outstanding IQ packets. */
1087                 if (iq->flush_index == iq->lio_read_index)
1088                         break;
1089
1090                 inst_processed = lio_process_iq_request_list(lio_dev, iq);
1091
1092                 if (inst_processed) {
1093                         rte_atomic64_sub(&iq->instr_pending, inst_processed);
1094                         iq->stats.instr_processed += inst_processed;
1095                 }
1096
1097                 tot_inst_processed += inst_processed;
1098                 inst_processed = 0;
1099
1100         } while (1);
1101
1102         rte_spinlock_unlock(&iq->lock);
1103
1104         rte_atomic64_clear(&iq->iq_flush_running);
1105
1106         return tx_done;
1107 }
1108
1109 static int
1110 lio_send_command(struct lio_device *lio_dev, uint32_t iq_no, void *cmd,
1111                  void *buf, uint32_t datasize, uint32_t reqtype)
1112 {
1113         struct lio_instr_queue *iq = lio_dev->instr_queue[iq_no];
1114         struct lio_iq_post_status st;
1115
1116         rte_spinlock_lock(&iq->post_lock);
1117
1118         st = post_command2(iq, cmd);
1119
1120         if (st.status != LIO_IQ_SEND_FAILED) {
1121                 lio_add_to_request_list(iq, st.index, buf, reqtype);
1122                 LIO_INCR_INSTRQUEUE_PKT_COUNT(lio_dev, iq_no, bytes_sent,
1123                                               datasize);
1124                 LIO_INCR_INSTRQUEUE_PKT_COUNT(lio_dev, iq_no, instr_posted, 1);
1125
1126                 lio_ring_doorbell(lio_dev, iq);
1127         } else {
1128                 LIO_INCR_INSTRQUEUE_PKT_COUNT(lio_dev, iq_no, instr_dropped, 1);
1129         }
1130
1131         rte_spinlock_unlock(&iq->post_lock);
1132
1133         return st.status;
1134 }
1135
1136 void
1137 lio_prepare_soft_command(struct lio_device *lio_dev,
1138                          struct lio_soft_command *sc, uint8_t opcode,
1139                          uint8_t subcode, uint32_t irh_ossp, uint64_t ossp0,
1140                          uint64_t ossp1)
1141 {
1142         struct octeon_instr_pki_ih3 *pki_ih3;
1143         struct octeon_instr_ih3 *ih3;
1144         struct octeon_instr_irh *irh;
1145         struct octeon_instr_rdp *rdp;
1146
1147         RTE_ASSERT(opcode <= 15);
1148         RTE_ASSERT(subcode <= 127);
1149
1150         ih3       = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
1151
1152         ih3->pkind = lio_dev->instr_queue[sc->iq_no]->txpciq.s.pkind;
1153
1154         pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
1155
1156         pki_ih3->w      = 1;
1157         pki_ih3->raw    = 1;
1158         pki_ih3->utag   = 1;
1159         pki_ih3->uqpg   = lio_dev->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
1160         pki_ih3->utt    = 1;
1161
1162         pki_ih3->tag    = LIO_CONTROL;
1163         pki_ih3->tagtype = OCTEON_ATOMIC_TAG;
1164         pki_ih3->qpg    = lio_dev->instr_queue[sc->iq_no]->txpciq.s.qpg;
1165         pki_ih3->pm     = 0x7;
1166         pki_ih3->sl     = 8;
1167
1168         if (sc->datasize)
1169                 ih3->dlengsz = sc->datasize;
1170
1171         irh             = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
1172         irh->opcode     = opcode;
1173         irh->subcode    = subcode;
1174
1175         /* opcode/subcode specific parameters (ossp) */
1176         irh->ossp = irh_ossp;
1177         sc->cmd.cmd3.ossp[0] = ossp0;
1178         sc->cmd.cmd3.ossp[1] = ossp1;
1179
1180         if (sc->rdatasize) {
1181                 rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
1182                 rdp->pcie_port = lio_dev->pcie_port;
1183                 rdp->rlen      = sc->rdatasize;
1184                 irh->rflag = 1;
1185                 /* PKI IH3 */
1186                 ih3->fsz    = OCTEON_SOFT_CMD_RESP_IH3;
1187         } else {
1188                 irh->rflag = 0;
1189                 /* PKI IH3 */
1190                 ih3->fsz    = OCTEON_PCI_CMD_O3;
1191         }
1192 }
1193
1194 int
1195 lio_send_soft_command(struct lio_device *lio_dev,
1196                       struct lio_soft_command *sc)
1197 {
1198         struct octeon_instr_ih3 *ih3;
1199         struct octeon_instr_irh *irh;
1200         uint32_t len = 0;
1201
1202         ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
1203         if (ih3->dlengsz) {
1204                 RTE_ASSERT(sc->dmadptr);
1205                 sc->cmd.cmd3.dptr = sc->dmadptr;
1206         }
1207
1208         irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
1209         if (irh->rflag) {
1210                 RTE_ASSERT(sc->dmarptr);
1211                 RTE_ASSERT(sc->status_word != NULL);
1212                 *sc->status_word = LIO_COMPLETION_WORD_INIT;
1213                 sc->cmd.cmd3.rptr = sc->dmarptr;
1214         }
1215
1216         len = (uint32_t)ih3->dlengsz;
1217
1218         if (sc->wait_time)
1219                 sc->timeout = lio_uptime + sc->wait_time;
1220
1221         return lio_send_command(lio_dev, sc->iq_no, &sc->cmd, sc, len,
1222                                 LIO_REQTYPE_SOFT_COMMAND);
1223 }
1224
1225 int
1226 lio_setup_sc_buffer_pool(struct lio_device *lio_dev)
1227 {
1228         char sc_pool_name[RTE_MEMPOOL_NAMESIZE];
1229         uint16_t buf_size;
1230
1231         buf_size = LIO_SOFT_COMMAND_BUFFER_SIZE + RTE_PKTMBUF_HEADROOM;
1232         snprintf(sc_pool_name, sizeof(sc_pool_name),
1233                  "lio_sc_pool_%u", lio_dev->port_id);
1234         lio_dev->sc_buf_pool = rte_pktmbuf_pool_create(sc_pool_name,
1235                                                 LIO_MAX_SOFT_COMMAND_BUFFERS,
1236                                                 0, 0, buf_size, SOCKET_ID_ANY);
1237         return 0;
1238 }
1239
1240 void
1241 lio_free_sc_buffer_pool(struct lio_device *lio_dev)
1242 {
1243         rte_mempool_free(lio_dev->sc_buf_pool);
1244 }
1245
1246 struct lio_soft_command *
1247 lio_alloc_soft_command(struct lio_device *lio_dev, uint32_t datasize,
1248                        uint32_t rdatasize, uint32_t ctxsize)
1249 {
1250         uint32_t offset = sizeof(struct lio_soft_command);
1251         struct lio_soft_command *sc;
1252         struct rte_mbuf *m;
1253         uint64_t dma_addr;
1254
1255         RTE_ASSERT((offset + datasize + rdatasize + ctxsize) <=
1256                    LIO_SOFT_COMMAND_BUFFER_SIZE);
1257
1258         m = rte_pktmbuf_alloc(lio_dev->sc_buf_pool);
1259         if (m == NULL) {
1260                 lio_dev_err(lio_dev, "Cannot allocate mbuf for sc\n");
1261                 return NULL;
1262         }
1263
1264         /* set rte_mbuf data size and there is only 1 segment */
1265         m->pkt_len = LIO_SOFT_COMMAND_BUFFER_SIZE;
1266         m->data_len = LIO_SOFT_COMMAND_BUFFER_SIZE;
1267
1268         /* use rte_mbuf buffer for soft command */
1269         sc = rte_pktmbuf_mtod(m, struct lio_soft_command *);
1270         memset(sc, 0, LIO_SOFT_COMMAND_BUFFER_SIZE);
1271         sc->size = LIO_SOFT_COMMAND_BUFFER_SIZE;
1272         sc->dma_addr = rte_mbuf_data_iova(m);
1273         sc->mbuf = m;
1274
1275         dma_addr = sc->dma_addr;
1276
1277         if (ctxsize) {
1278                 sc->ctxptr = (uint8_t *)sc + offset;
1279                 sc->ctxsize = ctxsize;
1280         }
1281
1282         /* Start data at 128 byte boundary */
1283         offset = (offset + ctxsize + 127) & 0xffffff80;
1284
1285         if (datasize) {
1286                 sc->virtdptr = (uint8_t *)sc + offset;
1287                 sc->dmadptr = dma_addr + offset;
1288                 sc->datasize = datasize;
1289         }
1290
1291         /* Start rdata at 128 byte boundary */
1292         offset = (offset + datasize + 127) & 0xffffff80;
1293
1294         if (rdatasize) {
1295                 RTE_ASSERT(rdatasize >= 16);
1296                 sc->virtrptr = (uint8_t *)sc + offset;
1297                 sc->dmarptr = dma_addr + offset;
1298                 sc->rdatasize = rdatasize;
1299                 sc->status_word = (uint64_t *)((uint8_t *)(sc->virtrptr) +
1300                                                rdatasize - 8);
1301         }
1302
1303         return sc;
1304 }
1305
1306 void
1307 lio_free_soft_command(struct lio_soft_command *sc)
1308 {
1309         rte_pktmbuf_free(sc->mbuf);
1310 }
1311
1312 void
1313 lio_setup_response_list(struct lio_device *lio_dev)
1314 {
1315         STAILQ_INIT(&lio_dev->response_list.head);
1316         rte_spinlock_init(&lio_dev->response_list.lock);
1317         rte_atomic64_set(&lio_dev->response_list.pending_req_count, 0);
1318 }
1319
1320 int
1321 lio_process_ordered_list(struct lio_device *lio_dev)
1322 {
1323         int resp_to_process = LIO_MAX_ORD_REQS_TO_PROCESS;
1324         struct lio_response_list *ordered_sc_list;
1325         struct lio_soft_command *sc;
1326         int request_complete = 0;
1327         uint64_t status64;
1328         uint32_t status;
1329
1330         ordered_sc_list = &lio_dev->response_list;
1331
1332         do {
1333                 rte_spinlock_lock(&ordered_sc_list->lock);
1334
1335                 if (STAILQ_EMPTY(&ordered_sc_list->head)) {
1336                         /* ordered_sc_list is empty; there is
1337                          * nothing to process
1338                          */
1339                         rte_spinlock_unlock(&ordered_sc_list->lock);
1340                         return -1;
1341                 }
1342
1343                 sc = LIO_STQUEUE_FIRST_ENTRY(&ordered_sc_list->head,
1344                                              struct lio_soft_command, node);
1345
1346                 status = LIO_REQUEST_PENDING;
1347
1348                 /* check if octeon has finished DMA'ing a response
1349                  * to where rptr is pointing to
1350                  */
1351                 status64 = *sc->status_word;
1352
1353                 if (status64 != LIO_COMPLETION_WORD_INIT) {
1354                         /* This logic ensures that all 64b have been written.
1355                          * 1. check byte 0 for non-FF
1356                          * 2. if non-FF, then swap result from BE to host order
1357                          * 3. check byte 7 (swapped to 0) for non-FF
1358                          * 4. if non-FF, use the low 32-bit status code
1359                          * 5. if either byte 0 or byte 7 is FF, don't use status
1360                          */
1361                         if ((status64 & 0xff) != 0xff) {
1362                                 lio_swap_8B_data(&status64, 1);
1363                                 if (((status64 & 0xff) != 0xff)) {
1364                                         /* retrieve 16-bit firmware status */
1365                                         status = (uint32_t)(status64 &
1366                                                             0xffffULL);
1367                                         if (status) {
1368                                                 status =
1369                                                 LIO_FIRMWARE_STATUS_CODE(
1370                                                                         status);
1371                                         } else {
1372                                                 /* i.e. no error */
1373                                                 status = LIO_REQUEST_DONE;
1374                                         }
1375                                 }
1376                         }
1377                 } else if ((sc->timeout && lio_check_timeout(lio_uptime,
1378                                                              sc->timeout))) {
1379                         lio_dev_err(lio_dev,
1380                                     "cmd failed, timeout (%ld, %ld)\n",
1381                                     (long)lio_uptime, (long)sc->timeout);
1382                         status = LIO_REQUEST_TIMEOUT;
1383                 }
1384
1385                 if (status != LIO_REQUEST_PENDING) {
1386                         /* we have received a response or we have timed out.
1387                          * remove node from linked list
1388                          */
1389                         STAILQ_REMOVE(&ordered_sc_list->head,
1390                                       &sc->node, lio_stailq_node, entries);
1391                         rte_atomic64_dec(
1392                             &lio_dev->response_list.pending_req_count);
1393                         rte_spinlock_unlock(&ordered_sc_list->lock);
1394
1395                         if (sc->callback)
1396                                 sc->callback(status, sc->callback_arg);
1397
1398                         request_complete++;
1399                 } else {
1400                         /* no response yet */
1401                         request_complete = 0;
1402                         rte_spinlock_unlock(&ordered_sc_list->lock);
1403                 }
1404
1405                 /* If we hit the Max Ordered requests to process every loop,
1406                  * we quit and let this function be invoked the next time
1407                  * the poll thread runs to process the remaining requests.
1408                  * This function can take up the entire CPU if there is
1409                  * no upper limit to the requests processed.
1410                  */
1411                 if (request_complete >= resp_to_process)
1412                         break;
1413         } while (request_complete);
1414
1415         return 0;
1416 }
1417
1418 static inline struct lio_stailq_node *
1419 list_delete_first_node(struct lio_stailq_head *head)
1420 {
1421         struct lio_stailq_node *node;
1422
1423         if (STAILQ_EMPTY(head))
1424                 node = NULL;
1425         else
1426                 node = STAILQ_FIRST(head);
1427
1428         if (node)
1429                 STAILQ_REMOVE(head, node, lio_stailq_node, entries);
1430
1431         return node;
1432 }
1433
1434 void
1435 lio_delete_sglist(struct lio_instr_queue *txq)
1436 {
1437         struct lio_device *lio_dev = txq->lio_dev;
1438         int iq_no = txq->q_index;
1439         struct lio_gather *g;
1440
1441         if (lio_dev->glist_head == NULL)
1442                 return;
1443
1444         do {
1445                 g = (struct lio_gather *)list_delete_first_node(
1446                                                 &lio_dev->glist_head[iq_no]);
1447                 if (g) {
1448                         if (g->sg)
1449                                 rte_free(
1450                                     (void *)((unsigned long)g->sg - g->adjust));
1451                         rte_free(g);
1452                 }
1453         } while (g);
1454 }
1455
1456 /**
1457  * \brief Setup gather lists
1458  * @param lio per-network private data
1459  */
1460 int
1461 lio_setup_sglists(struct lio_device *lio_dev, int iq_no,
1462                   int fw_mapped_iq, int num_descs, unsigned int socket_id)
1463 {
1464         struct lio_gather *g;
1465         int i;
1466
1467         rte_spinlock_init(&lio_dev->glist_lock[iq_no]);
1468
1469         STAILQ_INIT(&lio_dev->glist_head[iq_no]);
1470
1471         for (i = 0; i < num_descs; i++) {
1472                 g = rte_zmalloc_socket(NULL, sizeof(*g), RTE_CACHE_LINE_SIZE,
1473                                        socket_id);
1474                 if (g == NULL) {
1475                         lio_dev_err(lio_dev,
1476                                     "lio_gather memory allocation failed for qno %d\n",
1477                                     iq_no);
1478                         break;
1479                 }
1480
1481                 g->sg_size =
1482                     ((ROUNDUP4(LIO_MAX_SG) >> 2) * LIO_SG_ENTRY_SIZE);
1483
1484                 g->sg = rte_zmalloc_socket(NULL, g->sg_size + 8,
1485                                            RTE_CACHE_LINE_SIZE, socket_id);
1486                 if (g->sg == NULL) {
1487                         lio_dev_err(lio_dev,
1488                                     "sg list memory allocation failed for qno %d\n",
1489                                     iq_no);
1490                         rte_free(g);
1491                         break;
1492                 }
1493
1494                 /* The gather component should be aligned on 64-bit boundary */
1495                 if (((unsigned long)g->sg) & 7) {
1496                         g->adjust = 8 - (((unsigned long)g->sg) & 7);
1497                         g->sg =
1498                             (struct lio_sg_entry *)((unsigned long)g->sg +
1499                                                        g->adjust);
1500                 }
1501
1502                 STAILQ_INSERT_TAIL(&lio_dev->glist_head[iq_no], &g->list,
1503                                    entries);
1504         }
1505
1506         if (i != num_descs) {
1507                 lio_delete_sglist(lio_dev->instr_queue[fw_mapped_iq]);
1508                 return -ENOMEM;
1509         }
1510
1511         return 0;
1512 }
1513
1514 void
1515 lio_delete_instruction_queue(struct lio_device *lio_dev, int iq_no)
1516 {
1517         lio_delete_instr_queue(lio_dev, iq_no);
1518         rte_free(lio_dev->instr_queue[iq_no]);
1519         lio_dev->instr_queue[iq_no] = NULL;
1520         lio_dev->num_iqs--;
1521 }
1522
1523 static inline uint32_t
1524 lio_iq_get_available(struct lio_device *lio_dev, uint32_t q_no)
1525 {
1526         return ((lio_dev->instr_queue[q_no]->max_count - 1) -
1527                 (uint32_t)rte_atomic64_read(
1528                                 &lio_dev->instr_queue[q_no]->instr_pending));
1529 }
1530
1531 static inline int
1532 lio_iq_is_full(struct lio_device *lio_dev, uint32_t q_no)
1533 {
1534         return ((uint32_t)rte_atomic64_read(
1535                                 &lio_dev->instr_queue[q_no]->instr_pending) >=
1536                                 (lio_dev->instr_queue[q_no]->max_count - 2));
1537 }
1538
1539 static int
1540 lio_dev_cleanup_iq(struct lio_device *lio_dev, int iq_no)
1541 {
1542         struct lio_instr_queue *iq = lio_dev->instr_queue[iq_no];
1543         uint32_t count = 10000;
1544
1545         while ((lio_iq_get_available(lio_dev, iq_no) < LIO_FLUSH_WM(iq)) &&
1546                         --count)
1547                 lio_flush_iq(lio_dev, iq);
1548
1549         return count ? 0 : 1;
1550 }
1551
1552 static void
1553 lio_ctrl_cmd_callback(uint32_t status __rte_unused, void *sc_ptr)
1554 {
1555         struct lio_soft_command *sc = sc_ptr;
1556         struct lio_dev_ctrl_cmd *ctrl_cmd;
1557         struct lio_ctrl_pkt *ctrl_pkt;
1558
1559         ctrl_pkt = (struct lio_ctrl_pkt *)sc->ctxptr;
1560         ctrl_cmd = ctrl_pkt->ctrl_cmd;
1561         ctrl_cmd->cond = 1;
1562
1563         lio_free_soft_command(sc);
1564 }
1565
1566 static inline struct lio_soft_command *
1567 lio_alloc_ctrl_pkt_sc(struct lio_device *lio_dev,
1568                       struct lio_ctrl_pkt *ctrl_pkt)
1569 {
1570         struct lio_soft_command *sc = NULL;
1571         uint32_t uddsize, datasize;
1572         uint32_t rdatasize;
1573         uint8_t *data;
1574
1575         uddsize = (uint32_t)(ctrl_pkt->ncmd.s.more * 8);
1576
1577         datasize = OCTEON_CMD_SIZE + uddsize;
1578         rdatasize = (ctrl_pkt->wait_time) ? 16 : 0;
1579
1580         sc = lio_alloc_soft_command(lio_dev, datasize,
1581                                     rdatasize, sizeof(struct lio_ctrl_pkt));
1582         if (sc == NULL)
1583                 return NULL;
1584
1585         rte_memcpy(sc->ctxptr, ctrl_pkt, sizeof(struct lio_ctrl_pkt));
1586
1587         data = (uint8_t *)sc->virtdptr;
1588
1589         rte_memcpy(data, &ctrl_pkt->ncmd, OCTEON_CMD_SIZE);
1590
1591         lio_swap_8B_data((uint64_t *)data, OCTEON_CMD_SIZE >> 3);
1592
1593         if (uddsize) {
1594                 /* Endian-Swap for UDD should have been done by caller. */
1595                 rte_memcpy(data + OCTEON_CMD_SIZE, ctrl_pkt->udd, uddsize);
1596         }
1597
1598         sc->iq_no = (uint32_t)ctrl_pkt->iq_no;
1599
1600         lio_prepare_soft_command(lio_dev, sc,
1601                                  LIO_OPCODE, LIO_OPCODE_CMD,
1602                                  0, 0, 0);
1603
1604         sc->callback = lio_ctrl_cmd_callback;
1605         sc->callback_arg = sc;
1606         sc->wait_time = ctrl_pkt->wait_time;
1607
1608         return sc;
1609 }
1610
1611 int
1612 lio_send_ctrl_pkt(struct lio_device *lio_dev, struct lio_ctrl_pkt *ctrl_pkt)
1613 {
1614         struct lio_soft_command *sc = NULL;
1615         int retval;
1616
1617         sc = lio_alloc_ctrl_pkt_sc(lio_dev, ctrl_pkt);
1618         if (sc == NULL) {
1619                 lio_dev_err(lio_dev, "soft command allocation failed\n");
1620                 return -1;
1621         }
1622
1623         retval = lio_send_soft_command(lio_dev, sc);
1624         if (retval == LIO_IQ_SEND_FAILED) {
1625                 lio_free_soft_command(sc);
1626                 lio_dev_err(lio_dev, "Port: %d soft command: %d send failed status: %x\n",
1627                             lio_dev->port_id, ctrl_pkt->ncmd.s.cmd, retval);
1628                 return -1;
1629         }
1630
1631         return retval;
1632 }
1633
1634 /** Send data packet to the device
1635  *  @param lio_dev - lio device pointer
1636  *  @param ndata   - control structure with queueing, and buffer information
1637  *
1638  *  @returns IQ_FAILED if it failed to add to the input queue. IQ_STOP if it the
1639  *  queue should be stopped, and LIO_IQ_SEND_OK if it sent okay.
1640  */
1641 static inline int
1642 lio_send_data_pkt(struct lio_device *lio_dev, struct lio_data_pkt *ndata)
1643 {
1644         return lio_send_command(lio_dev, ndata->q_no, &ndata->cmd,
1645                                 ndata->buf, ndata->datasize, ndata->reqtype);
1646 }
1647
1648 uint16_t
1649 lio_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
1650 {
1651         struct lio_instr_queue *txq = tx_queue;
1652         union lio_cmd_setup cmdsetup;
1653         struct lio_device *lio_dev;
1654         struct lio_iq_stats *stats;
1655         struct lio_data_pkt ndata;
1656         int i, processed = 0;
1657         struct rte_mbuf *m;
1658         uint32_t tag = 0;
1659         int status = 0;
1660         int iq_no;
1661
1662         lio_dev = txq->lio_dev;
1663         iq_no = txq->txpciq.s.q_no;
1664         stats = &lio_dev->instr_queue[iq_no]->stats;
1665
1666         if (!lio_dev->intf_open || !lio_dev->linfo.link.s.link_up) {
1667                 PMD_TX_LOG(lio_dev, ERR, "Transmit failed link_status : %d\n",
1668                            lio_dev->linfo.link.s.link_up);
1669                 goto xmit_failed;
1670         }
1671
1672         lio_dev_cleanup_iq(lio_dev, iq_no);
1673
1674         for (i = 0; i < nb_pkts; i++) {
1675                 uint32_t pkt_len = 0;
1676
1677                 m = pkts[i];
1678
1679                 /* Prepare the attributes for the data to be passed to BASE. */
1680                 memset(&ndata, 0, sizeof(struct lio_data_pkt));
1681
1682                 ndata.buf = m;
1683
1684                 ndata.q_no = iq_no;
1685                 if (lio_iq_is_full(lio_dev, ndata.q_no)) {
1686                         stats->tx_iq_busy++;
1687                         if (lio_dev_cleanup_iq(lio_dev, iq_no)) {
1688                                 PMD_TX_LOG(lio_dev, ERR,
1689                                            "Transmit failed iq:%d full\n",
1690                                            ndata.q_no);
1691                                 break;
1692                         }
1693                 }
1694
1695                 cmdsetup.cmd_setup64 = 0;
1696                 cmdsetup.s.iq_no = iq_no;
1697
1698                 /* check checksum offload flags to form cmd */
1699                 if (m->ol_flags & PKT_TX_IP_CKSUM)
1700                         cmdsetup.s.ip_csum = 1;
1701
1702                 if (m->ol_flags & PKT_TX_OUTER_IP_CKSUM)
1703                         cmdsetup.s.tnl_csum = 1;
1704                 else if ((m->ol_flags & PKT_TX_TCP_CKSUM) ||
1705                                 (m->ol_flags & PKT_TX_UDP_CKSUM))
1706                         cmdsetup.s.transport_csum = 1;
1707
1708                 if (m->nb_segs == 1) {
1709                         pkt_len = rte_pktmbuf_data_len(m);
1710                         cmdsetup.s.u.datasize = pkt_len;
1711                         lio_prepare_pci_cmd(lio_dev, &ndata.cmd,
1712                                             &cmdsetup, tag);
1713                         ndata.cmd.cmd3.dptr = rte_mbuf_data_iova(m);
1714                         ndata.reqtype = LIO_REQTYPE_NORESP_NET;
1715                 } else {
1716                         struct lio_buf_free_info *finfo;
1717                         struct lio_gather *g;
1718                         rte_iova_t phyaddr;
1719                         int i, frags;
1720
1721                         finfo = (struct lio_buf_free_info *)rte_malloc(NULL,
1722                                                         sizeof(*finfo), 0);
1723                         if (finfo == NULL) {
1724                                 PMD_TX_LOG(lio_dev, ERR,
1725                                            "free buffer alloc failed\n");
1726                                 goto xmit_failed;
1727                         }
1728
1729                         rte_spinlock_lock(&lio_dev->glist_lock[iq_no]);
1730                         g = (struct lio_gather *)list_delete_first_node(
1731                                                 &lio_dev->glist_head[iq_no]);
1732                         rte_spinlock_unlock(&lio_dev->glist_lock[iq_no]);
1733                         if (g == NULL) {
1734                                 PMD_TX_LOG(lio_dev, ERR,
1735                                            "Transmit scatter gather: glist null!\n");
1736                                 goto xmit_failed;
1737                         }
1738
1739                         cmdsetup.s.gather = 1;
1740                         cmdsetup.s.u.gatherptrs = m->nb_segs;
1741                         lio_prepare_pci_cmd(lio_dev, &ndata.cmd,
1742                                             &cmdsetup, tag);
1743
1744                         memset(g->sg, 0, g->sg_size);
1745                         g->sg[0].ptr[0] = rte_mbuf_data_iova(m);
1746                         lio_add_sg_size(&g->sg[0], m->data_len, 0);
1747                         pkt_len = m->data_len;
1748                         finfo->mbuf = m;
1749
1750                         /* First seg taken care above */
1751                         frags = m->nb_segs - 1;
1752                         i = 1;
1753                         m = m->next;
1754                         while (frags--) {
1755                                 g->sg[(i >> 2)].ptr[(i & 3)] =
1756                                                 rte_mbuf_data_iova(m);
1757                                 lio_add_sg_size(&g->sg[(i >> 2)],
1758                                                 m->data_len, (i & 3));
1759                                 pkt_len += m->data_len;
1760                                 i++;
1761                                 m = m->next;
1762                         }
1763
1764                         phyaddr = rte_mem_virt2iova(g->sg);
1765                         if (phyaddr == RTE_BAD_IOVA) {
1766                                 PMD_TX_LOG(lio_dev, ERR, "bad phys addr\n");
1767                                 goto xmit_failed;
1768                         }
1769
1770                         ndata.cmd.cmd3.dptr = phyaddr;
1771                         ndata.reqtype = LIO_REQTYPE_NORESP_NET_SG;
1772
1773                         finfo->g = g;
1774                         finfo->lio_dev = lio_dev;
1775                         finfo->iq_no = (uint64_t)iq_no;
1776                         ndata.buf = finfo;
1777                 }
1778
1779                 ndata.datasize = pkt_len;
1780
1781                 status = lio_send_data_pkt(lio_dev, &ndata);
1782
1783                 if (unlikely(status == LIO_IQ_SEND_FAILED)) {
1784                         PMD_TX_LOG(lio_dev, ERR, "send failed\n");
1785                         break;
1786                 }
1787
1788                 if (unlikely(status == LIO_IQ_SEND_STOP)) {
1789                         PMD_TX_LOG(lio_dev, DEBUG, "iq full\n");
1790                         /* create space as iq is full */
1791                         lio_dev_cleanup_iq(lio_dev, iq_no);
1792                 }
1793
1794                 stats->tx_done++;
1795                 stats->tx_tot_bytes += pkt_len;
1796                 processed++;
1797         }
1798
1799 xmit_failed:
1800         stats->tx_dropped += (nb_pkts - processed);
1801
1802         return processed;
1803 }
1804
1805 void
1806 lio_dev_clear_queues(struct rte_eth_dev *eth_dev)
1807 {
1808         struct lio_instr_queue *txq;
1809         struct lio_droq *rxq;
1810         uint16_t i;
1811
1812         for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
1813                 txq = eth_dev->data->tx_queues[i];
1814                 if (txq != NULL) {
1815                         lio_dev_tx_queue_release(txq);
1816                         eth_dev->data->tx_queues[i] = NULL;
1817                 }
1818         }
1819
1820         for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
1821                 rxq = eth_dev->data->rx_queues[i];
1822                 if (rxq != NULL) {
1823                         lio_dev_rx_queue_release(rxq);
1824                         eth_dev->data->rx_queues[i] = NULL;
1825                 }
1826         }
1827 }